Python log_statusの例、Scraper.tools.log_status Pythonの例

コード例 #1

0

ファイルを表示

def get_champ():
    """
    Get the stat information for each champion
    :return: return double array of each champion and their stats
    """

    champ_url = []  # Each champion wiki page
    all_champ_threads = []  # Hold all threads

    # Fetch main wiki page
    http_pool = urllib3.PoolManager()
    main_url = my_tools.get_web_page(page_name='League_of_Legends_Wiki',
                                     http_pool=http_pool)

    # Parse the HTML page for champion names
    with my_globals.bs4_lock:
        champions_html = BeautifulSoup(markup=main_url, features='lxml')
    champ_roster_ol = champions_html.find(class_="champion_roster")
    champ_roster_li = champ_roster_ol.find_all('a')

    # Get wiki page for each champion
    for champ_roster_name in champ_roster_li:
        champ_url.append(champ_roster_name.get('href').strip())

    my_tools.log_status("Getting champion info for;")

    for champ in champ_url:
        # Create a thread for each champion
        while True:
            # Only create new thread if limit has not been exceeded
            if my_globals.thread_count < my_globals.thread_max:
                # Signal a new thread is being created
                with my_globals.counter_lock:
                    my_globals.thread_count += 1

                # Create thread for current champion
                thread = my_globals.threading.Thread(target=get_champ_info,
                                                     args=(champ, http_pool),
                                                     name=champ)

                # Append curren thread to list and start thread
                all_champ_threads.append(thread)
                thread.start()

                # Exit loop once processing is done
                break

            # Wait if the thread queue is full
            time.sleep(2)

    # Wait for all threads to finish processing
    for thread in all_champ_threads:
        thread.join()

    #FOR DEBUGGING ONLY
    temp = my_globals.champion_info.copy()
    print()
    return

コード例 #2

0

ファイルを表示

def main():

    # Start time of program
    start_time = time.time()

    # Get current patch
    my_tools.get_patch()

    # Processes stat for each champion
    # my_champs.get_champ()
    # my_champs.champ_google_sheets()
    #
    # # Process all item information
    # my_items.get_item(my_globals.home_directory)
    # my_items.item_google_sheets()

    # Process all champion abilities
    my_abilities.get_abilities()
    tmp = my_globals.ability_info.copy()

    # End time of program
    end_time = time.time()

    # Formatting
    total_time = end_time - start_time
    minutes = round(total_time / 60)
    seconds = round(total_time % 60)
    if seconds > 10:
        # Log the duration of the program
        my_tools.log_status('M:S')
        my_tools.log_status(''.join([str(minutes), ':', str(seconds)]))
    else:
        # Log the duration of the program
        my_tools.log_status('M:S')
        my_tools.log_status(''.join([str(minutes), ':0', str(seconds)]))

コード例 #3

0

ファイルを表示

ファイル: abilities.py プロジェクト: Debonairesnake6/SecureCalculator

def get_ability_info(champ, chrome):
    """
    Process all ability info
    :param champ: Champion being processed
    :param chrome: Selenium browser to load ability boxes
    :return:
    """

    # Log wich champion is currently being processed
    my_tools.log_status(champ)

    # Open champion page
    with my_globals.selenium_lock:
        ability_url = my_tools.get_web_page(page_name=champ,
                                            path='/Abilities',
                                            browser=chrome)
        abilities_html = BeautifulSoup(markup=ability_url, features='lxml')

    # Use regex to find each skill box
    passive_html = abilities_html.find_all(
        'div', {'class': re.compile('skill skill.*')})

    # Hold the current abilities for this champion
    current_abilities = {}
    cnt_test = 0  #DEBUG ONLY

    # Loop through each ability box
    for ability in passive_html:
        # 0 = passive
        # 1 = q
        # 2 = w
        # 3 = e
        # 4 = r or q2
        # 5 = w2
        # 6 = e2
        # 7 = r
        if cnt_test != 3:  # and cnt_test != 6: #DEBUG ONLY
            cnt_test += 1
            # continue
        else:
            cnt_test += 1

        # Get the button name
        button = ability.get('class')[1].split('_', 1)[1]
        if button == 'innate':
            button = 'passive'

        # Detect if already has ability key (eg. for Jayce/Rek'sai)
        try:
            if current_abilities[button]:
                button += '2'
        except KeyError:
            pass

        # Create entry for current button
        current_abilities[button] = {}

        # Get name for current ability
        ability = ability.contents[1].contents[2]
        current_abilities[button]['name'] = ability.get('id').replace(
            '_', ' ').replace('.27', '\'')

        # Get string for all ability stat
        info = ability.contents[1].contents[1].contents[0].contents[2]

        # Split up the list based on stats
        all_stats = info.text.split(':')

        # Detect if there is an html element (eg. image) in the text and remove it
        image_start = [stat for stat in all_stats if '<' in stat]
        for illegal in image_start:
            all_stats.remove(illegal)

        # Remove the remaining part of the tag
        image_end = [stat for stat in all_stats if '>' in stat]
        for illegal in image_end:
            location = [pos for pos, char in enumerate(illegal) if char == '>']
            legal = illegal[location[len(location) - 1] + 2:]
            for cnt, stat in enumerate(all_stats):
                if stat == illegal:
                    all_stats[cnt] = legal

        # Go throgh each stat
        for cnt, stat in enumerate(all_stats):

            # Don't process last one, already handled
            if cnt + 1 < len(all_stats):

                # Split up the current stat
                full_effect = ''
                effect = stat.split(' ')

                # Loop through each stat
                for word in effect:

                    # Upper case is often used for a new type of effect
                    if word.isupper():
                        if full_effect == '':
                            full_effect += word.lower()
                        else:
                            full_effect += ''.join([' ', word.lower()])

                    # These words are not in uppercase
                    elif word == 'On-Target':
                        full_effect += word.lower()
                    elif word == 'Cooldown':
                        full_effect += ''.join([' ', word.lower()])

                # Get the next stat
                full_value = ''
                value = all_stats[cnt + 1].split(' ')

                # Loop through for each number in the next stat
                for number in value:
                    # Often writen like 5/10/15 for values
                    if number.isdigit():
                        full_value += number
                    elif number == '/':

                        full_value += '/'

                    # Global is still a valid value
                    elif number == 'Global':
                        full_value += number

                    # Test if text has a decimal
                    else:
                        try:
                            # Will error out if word, but will take decimals (eg. 2.5)
                            float(number)
                            full_value += number
                        except ValueError:
                            pass

                # Put them together
                current_abilities[button][full_effect] = full_value.strip()

    # Add current ability to list
    my_globals.ability_info[champ.replace('\'', '_')] = current_abilities
    tmp = my_globals.ability_info.copy()  #DEBUG ONLY

    # Signal current thread is done processing
    with my_globals.counter_lock:
        my_globals.thread_count -= 1

コード例 #4

0

ファイルを表示

ファイル: abilities.py プロジェクト: Debonairesnake6/SecureCalculator

def get_abilities():

    champ_url = []  # Each champion wiki page
    all_ability_threads = []  # Hold all threads

    # Start headless chrome to get javascript from pages
    driver = webdriver.ChromeOptions()
    driver.add_argument('headless')

    # try:  #todo detect if selenium is installed
    # Current directory is Scraper\HTML Pages
    chrome = webdriver.Chrome(
        chrome_options=driver,
        executable_path='../Chrome Driver/chromedriver.exe')
    chrome.implicitly_wait(30)
    # except:
    #     pass

    # Fetch main wiki page
    http_pool = urllib3.PoolManager()
    main_url = my_tools.get_web_page(page_name='League_of_Legends_Wiki',
                                     http_pool=http_pool)

    # Parse the HTML page for champion names
    with my_globals.bs4_lock:
        champions_html = BeautifulSoup(markup=main_url, features='lxml')
    champ_roster_ol = champions_html.find(class_="champion_roster")
    champ_roster_li = champ_roster_ol.find_all('a')

    # Get wiki page for each champion
    for champ_roster_name in champ_roster_li:
        champ_url.append(champ_roster_name.get('href').strip())

    # General log message
    my_tools.log_status("Getting ability info for;")

    for champ in champ_url:

        # Change formatting for readability
        champ = champ[6:].replace('%27', '\'').replace('_', ' ')

        #FOR DEBUGGING ONLY
        if champ != "Akali":
            # continue
            pass

        # Create a thread for each champion
        while True:
            # Only create new thread if limit has not been exceeded
            if my_globals.thread_count < my_globals.thread_max:
                # Signal a new thread is being created
                with my_globals.counter_lock:
                    my_globals.thread_count += 1

                # Create thread for current champion
                thread = my_globals.threading.Thread(target=get_ability_info,
                                                     args=(champ, chrome),
                                                     name=champ)

                # Append curren thread to list and start thread
                all_ability_threads.append(thread)
                thread.start()

                # Exit loop once processing is done
                break

            # Wait if the thread queue is full
            time.sleep(2)

        #FOR DEBUGGING ONLY
        # break

    # Wait for all threads to finish processing
    for thread in all_ability_threads:
        thread.join()

    temp = my_globals.champion_info.copy()  #FOR DEBUGGING ONLY
    return

コード例 #5

0

ファイルを表示

def get_champ_info(champ, http_pool):

    champion_stats = {}  # Hold the stats for the current champion
    stat_type = [
        "Health",  # Keep track of each stat
        "HealthRegen",
        "ResourceBar",
        "ResourceRegen",
        "AttackDamage",
        "AttackSpeed",
        "Armor",
        "MagicResist",
        "MovementSpeed"
    ]

    # Open champion page
    main_url = my_tools.get_web_page(page_name=champ[6:].replace('%27',
                                                                 '\'').replace(
                                                                     '_', ' '),
                                     path='/Champions/',
                                     http_pool=http_pool)
    with my_globals.bs4_lock:
        champions_html = BeautifulSoup(markup=main_url, features='lxml')

    # Append stats to array
    for stat in stat_type:
        champ_roster_stat_html = champions_html.find(
            id=''.join([stat, "_", champ[6:].replace("%27", "_")]))

        # If the champion does not have that stat (eg. energy), write None instead
        try:
            champion_stats[stat] = champ_roster_stat_html.text
        except AttributeError:
            champion_stats[stat] = '0'

    # Append stats/lvl to array
    for stat in stat_type:
        # Attack speed is named differently on site
        if stat == "AttackSpeed":
            stat = "AttackSpeedBonus"

        champ_roster_stat_html = champions_html.find(
            id=''.join([stat, "_", champ[6:].replace("%27", "_"), "_lvl"]))

        # If the champion does not scale in that stat, write 0 instead
        try:
            champion_stats[''.join([stat,
                                    '/lvl'])] = champ_roster_stat_html.text[2:]
        except AttributeError:
            champion_stats[''.join([stat, '/lvl'])] = '0'

    # Find the mana type, location of "Secondary Bar:" test
    champions_resource_html = champions_html.find(
        style="font-size:10px; line-height:1em; display:block; "
        "color:rgb(147, 115, 65); margin-top:3px; margin-bottom:0;")

    # Try and get the direct path of the bar
    try:
        champ_resource = champions_resource_html.next_sibling.next_element.contents[
            2].text
    except IndexError:
        champ_resource = "Manaless"
    # Add stat to stat array
    champion_stats['ResourceType'] = champ_resource

    # Write champs with stats into array
    my_globals.champion_info[champ[6:].replace("%27", "-")] = champion_stats

    my_tools.log_status(champ[6:])

    # Signal thread is complete
    with my_globals.counter_lock:
        my_globals.thread_count -= 1

コード例 #6

0

ファイルを表示

def get_item(home_directory):
    """
    Return all item information from all maps
    :return: item information
    """

    # Log current status of program
    my_tools.log_status('Getting Item Grid')

    # Change directory to HTML pages
    os.chdir(''.join([home_directory, '/HTML Pages']))

    # Create urllib3 pool to download each web page
    http_pool = urllib3.PoolManager()
    main_url = my_tools.get_web_page(page_name='Item',
                                     path='/Items',
                                     http_pool=http_pool)

    # For formatting
    my_tools.log_status('\n')

    # Use the item page and set up parsing
    with my_globals.bs4_lock:
        item_grid_html = BeautifulSoup(markup=main_url, features='lxml')

    # Find the item grid and start to parse
    finished_items_html = item_grid_html.find(id='item-grid')

    # Loop through item grid for each item section
    for cnt, null in enumerate(finished_items_html.contents):

        # Add section to dictionary
        if cnt % 4 == 1:
            # Save current section being worked on
            category = finished_items_html.contents[cnt].text.strip()

            # Skip sections not used by calculator
            if category == 'Potions and Consumables' or \
               category == 'Distributed' or \
               category == 'Removed items' or \
               category == 'Trinkets':
                continue

            # Log status of program
            my_tools.log_status(''.join([
                'Starting Section: ',
                finished_items_html.contents[cnt].text.strip()
            ]))

            # Create entry for current section in global dictionary
            my_globals.item_info[
                finished_items_html.contents[cnt].text.strip()] = {}

        # Search though section for items
        if cnt % 4 == 3:
            # Save current section being worked on
            category = finished_items_html.contents[cnt - 2].text.strip()

            # Skip sections not used by calculator
            if category == 'Potions and Consumables' or \
               category == 'Distributed' or \
               category == 'Removed items' or \
               category == 'Trinkets':
                continue

            # Array to hold threads
            all_item_threads = []

            # Get the page for each item in the category and start to parse
            for item in finished_items_html.contents[cnt]:
                # Save item path and readable names
                item_name = item.contents[0].contents[0].contents[0].get(
                    'href')
                current_item_name = item_name[6:].replace('%27', '\'').replace(
                    '_', ' ')

                # Create thread for each item being parsed
                while True:
                    # Only create a thread if limit has not been exceeded
                    if my_globals.thread_count < my_globals.thread_max:
                        # Signal a new thread is being created
                        with my_globals.counter_lock:
                            my_globals.thread_count += 1

                        # Create thread and process each item
                        thread = threading.Thread(target=get_item_page,
                                                  args=(item, cnt,
                                                        finished_items_html,
                                                        category, http_pool),
                                                  name=current_item_name)

                        # Append current thread to list and start thread
                        all_item_threads.append(thread)
                        thread.start()

                        # Exit loop once processing is done
                        break

                    # Wait if a thread queue is full
                    time.sleep(2)
                # break

            # Wait for all threads to finish processing
            for thread in all_item_threads:
                thread.join()

                # For formatting
                my_tools.log_status('\n')

            #FOR DEBUGGING, STOP AFTER FIRST SECTION
            # break
    #FOR DEBUGGING, CREATE LOCAL COPY AS GLOBAL VARIABLE DOES NOT SHOW UP IN THE DEBUGGER
    temp = my_globals.item_info.copy()
    return

コード例 #7

0

ファイルを表示

def get_item_info(item_name, cnt, finished_items_html, item_html):
    """
    Process current item html page and add information to global dictionary
    :param item_name: Item path for current item
    :param cnt: Item number within the current section
    :param finished_items_html: URL of item being processed
    :param item_html:
    :return:
    """

    # Get readable item name and section
    name = item_name[6:].replace('%27', '\'').replace('_', ' ')
    item_section = finished_items_html.contents[cnt - 2].text.strip()

    # Get item info box
    item_list = item_html.find(
        class_='portable-infobox pi-background pi-theme-wikia pi-layout-stacked'
    )

    # Check if item, if not then skip (e.g. skip GP ult upgrades)
    try:
        # Check if item is sub-item (e.g. Doran's Lost Shield)
        if name != item_list.contents[1].contents[0]:
            return
    except AttributeError:
        return

    # Create local dict to not constantly call global, used to improve speed
    current_info = {}

    # Get all information about the current item
    try:
        # Search through each section in the item info box
        for cnt, info_box_section in enumerate(item_list):
            try:
                # Retrieve current section name in item box
                if len(info_box_section) > 1:
                    info_box_section_name = info_box_section.contents[
                        1].text.strip()
                else:
                    continue

                # Conduct appropriate parsing depending on current section name
                if info_box_section_name == 'Stats':  # todo add item actives
                    current_info = get_stats(
                        info_box_section_name=info_box_section,
                        current_info=current_info)
                elif info_box_section_name == 'Passive':
                    current_info = get_passive(
                        info_box_section_name=info_box_section,
                        current_info=current_info)
                elif info_box_section_name[:12] == 'Availability':
                    current_info = get_map(
                        info_box_section_name=info_box_section,
                        current_info=current_info)
                elif info_box_section_name[:4] == 'Cost':
                    cost = info_box_section.contents[1].contents[3].contents[
                        1].contents[1].text
                    current_info['cost'] = cost
            except AttributeError:
                pass
            except TypeError:
                pass
    except TypeError:
        return

    # Log status of job complete and add local dictionary to global dictionary
    my_tools.log_status(''.join(['Item completed: ', name]))
    my_globals.item_info[item_section][name] = current_info
    return