Python make_soup Exemples, session.make_soup Python Exemples

Exemple #1

0

Afficher le fichier

        def generate_sponsor_contract(slot, sponsor):
            response = self.session.request(
                "GET",
                suburl=self.suburl_sponsor,
                params={'slot': slot, 'pag': 'oferta', 'sponsor': sponsor}
            )
            soup = make_soup(response)
            contract_row = [i.text for i in soup.find('table').find_all('tr')[1].find_all('td')[1:]]
            contract_row[0] = contract_row[0].split('/match')[0]
            assert len(contract_row) == 3

            payment, victory, draw = [self.get_value_from_string(i) for i in contract_row]
            if payment < min_payment:
                return False
            if victory < min_victory:
                print(f"victory: {victory} too low")
                return False
            if draw < min_draw:
                print(f"draw: {draw} too low")
                return False
            
            # Confirm contract
            self.session.request(
                "GET",
                suburl=self.suburl_sponsor,
                params={'slot': slot, 'sponsor': sponsor, 'pag': 'confirmare'}
            )

            # Return contract so it can be printed to console when made
            contract = {'payment': payment, 'victory': victory, 'draw': draw}
            return contract

Exemple #2

0

Afficher le fichier

    def trained_today(self):
        """
        Returns True if a player has been trained today else False
        Does this by going to their profile and attempting to train one of their skills
        But the training of this skill is not confirmed and the player is not actually trained

        r-type: bool
        """
        is_trainable = self.is_trainable
        
        # These players should be caught earlier in Training module
        if not any(is_trainable): raise Exception("Player is maxed out already")

        # Find first trainable skill
        # We need this for subsequent request
        first_positive = is_trainable.index(True)+1

        # Make request to check whether player has been trained today
        response = self.session.request(
            "GET",
            suburl='profil.php?',
            params={'id': self.player_id, 'antrenament': f'A{str(first_positive)}'}
        )
        soup = make_soup(response)

        if soup.find_all('font', text=re.compile("already been trained today")):
            return True
        return False

Exemple #3

0

Afficher le fichier

 def comment_soup(self):
     """ Returns the soup containing information about the list's existing comments."""
     response = SESSION.request("GET",
                                f"csi/list/{self._id}/comments-section/?",
                                params={'esiAllowUser': True})
     soup = make_soup(response)
     return soup

Exemple #4

0

Afficher le fichier

Fichier : club.py Projet : punkgazer/best11_scraper

    def transfers(self):
        """
        Returns a list of transfers (bought and sold), sorted by date
        r-type: list of dicts
        """
        request = self.session.request("GET",
                                       "transferuri_club.php?",
                                       params=self.params)
        soup = make_soup(request)

        # For i in View all Transfers > rows of data under transfers(in) and transfers(out)
        # grab information about each transfer through the __get_individual_transfer_info() func
        bought = [
            self.__get_individual_transfer_info(i)
            for i in soup.find_all('table')[2].find_all('tr')
        ]
        sold = [
            self.__get_individual_transfer_info(i)
            for i in soup.find_all('table')[4].find_all('tr')
        ]

        [x.update({'type': 'sell'}) for x in sold]
        [x.update({'type': 'buy'}) for x in bought]

        transfers = sorted(
            list(bought + sold),
            key=lambda k: pendulum.from_format(k['date'], 'YYYY-MM-DD'))
        return transfers

Exemple #5

0

Afficher le fichier

    def youthcoach(self):
        """
        Returns information about the current youth coach
            - Name
            - Salary
            - Ratings
        If no youth coach is employed, return False

        r-type: dict
        """
        response = self.session.request("GET", self.suburl_facilities)
        soup = make_soup(response)
        youth_coach_box = soup.find_all('table')[3].find_all('tr')[1]

        hire_coach_button = youth_coach_box.find('input', attrs={'value': 'Hire coach'})

        # Button for hiring a new coach. Hence there is no current coach. So return False
        if hire_coach_button:
            return False

        name = youth_coach_box.find('b').text
        link = youth_coach_box.find('a', attrs={'onmouseover': True})
        pattern = r"Salary: (\d{1}\.?\d{0,3}) C"
        salary = int(re.findall(pattern, link['onmouseover'])[0])

        pattern = r"stele/(\d{1,2})"
        star_ratings = tuple([int(re.findall(pattern, i['src'])[0])//2 for i in 
            soup.find_all('table')[4].find_all('tr')[1].find_all('img', attrs={'src': re.compile(r"imagini/stele/")})
            ])

        return {'name': name, 'salary': salary, 'ratings': star_ratings}

Exemple #6

0

Afficher le fichier

    def __get_tp_from_slot(self, slot_num):
        """
        Collects TP from a given slot.
        Returns the amount earned

        r-type: int
        """
        # Make the request
        response = self.session.request(
            "POST",
            'antrenor.php?',
            params={'pag': 'antrenament', 'slot': slot_num},
            data={'submit': 'Perform training'}
        )

        # -- Getting value of TP obtained --
        soup = make_soup(response)
        div_text = soup.find_all('div')[4].text

        try:
            value = float(re.findall(r"(\d?.\d{3}) TP", div_text)[0])
        except:
            raise Exception(f"Failed to get value from {div_text}")

        # Fix values
        if value > 10 and value < 100: 
            raise ValueError("Unknown training point value")
        elif value <= 10:
            value *= 1000
        
        return int(value)

Exemple #7

0

Afficher le fichier

 def __get_rating_soup(self):
     """ The film's rating info is loaded from a different page
     Hence we make the request to this separate page to get it
     r-type: BeautifulSoup """
     suburl = f"csi/film/{self.path}/rating-histogram/"
     request = SESSION.request("GET", suburl)
     return make_soup(request)

Exemple #8

0

Afficher le fichier

Fichier : match.py Projet : punkgazer/best11_scraper

    def __get_matchpage_soup(self):
        """ 
        Gets the soup
        Called by __init__ to avoid repeat calling
        Divides soup up into commonly used sections
        r-type: dict
        """

        # -- Make request to the match's page --
        response = self.session.request("GET",
                                        suburl=self.suburl_match,
                                        params=self.params)
        soup = make_soup(response)

        # -- Check that match has been played --
        match_played = bool(
            (table := soup.find('table')
             ).find('tr').find_all('td')[1].text == "Final Score")
        if not match_played:
            raise Exception("Match has not been played yet")

        # -- Create soup dict for commonly used soup --
        soup_dict = {
            'table': table,
            'game_events_table': soup.find_all('table')[2],
        }
        soup_dict['rows'] = soup_dict['table'].find_all('tr')[1:4]
        soup_dict['attweath'] = soup_dict['game_events_table'].find_all(
            'tr')[-2].text
        soup_dict['sub_table'] = soup_dict['rows'][1].find('table')
        soup_dict['home_stats'] = soup_dict['rows'][1].find('td')
        soup_dict['away_stats'] = soup_dict['rows'][1].find_all('td')[-1]
        return soup_dict

Exemple #9

0

Afficher le fichier

 def __get_info_soup(self):
     """ Go the main film_page and grab the soup. 
     r-type: BeautifulSoup"""
     request = SESSION.request("GET", self.suburl)
     soup = make_soup(request)
     page_wrapper = soup.find('div', id='film-page-wrapper')
     return page_wrapper

Exemple #10

0

Afficher le fichier

    def request_listed_players(self, position):
        request = self.session.request(
            "POST",
            suburl='lista_transferuri.php',
            data={
                'varsta': 0,  # Age
                'pozitie': position,
                'A1': 10,  # Skill 1
                'A2': 10,  # Skill 2
                'A3': 10,  # Skill 3
                'AMB': 1,  # Ambition
                'INT': 1,  # Intelligence
                'REZ': 1,  # Stamina
                'AGR':
                5,  # Aggression (highest considered worst, hence redboy has default set to 5)
                'VUL':
                5,  # Vulnerability (highest considered worst, hence redboy has default set to 5)
            })

        soup = make_soup(request)
        player_ids = [
            util.get_id_from_href(i.get('action'))
            for i in soup.find_all('form')
        ]
        return player_ids

Exemple #11

0

Afficher le fichier

Fichier : finances.py Projet : punkgazer/best11_scraper

    def get_page(self, page_num, reverse=False):
        if page_num not in range(1, self.total_pages + 1):
            raise Exception(f"Page num {page_num} outside pages range")

        ## Get the starting entry id
        entries_num = self.entries_per_page if page_num != self.total_pages else self.entries_on_last_page
        entry_id = self.__get_entry_start(page_num)

        print("starting entry id", entry_id)

        ## Make request to page
        request = self.session.request(
            "GET",
            suburl=self.finance_subpage,
            params={
                'nr_pag': str(page_num)
            }  # Arbitrary big number. Because we just want to go the last page
        )
        soup = make_soup(request)
        finance_table = soup.find_all('table',
                                      attrs={'width':
                                             300})[1].find_all('tr')[:-1]

        page_entries = [
            self.__get_entry(entry=finance_table.pop(), entry_id=entry_id + i)
            for i in range(0, entries_num)
        ]

        if reverse: page_entries.reverse()
        return page_entries

Exemple #12

0

Afficher le fichier

    def psych(self):
        """
        Get the current hired psychologist if one exists
        Otherwise returns False.
        """
        
        response = self.session.request(
            "GET",
            suburl=self.suburl_facilities
        )
        soup = make_soup(response)

        psych_box = soup.find_all('table')[9].find_all('tr')[1]

        if psych_box.find_all('input', attrs={'value': 'Hire psychologist'}):
            # No psychologist is hired at the moment
            return False

        name = psych_box.find('b').text
        
        link = psych_box.find('a', attrs={'onmouseover': True})
        pattern = r"Level: (\d{1})/5 <br> Consultation: (\d{0,3}?\.?\d{1,3}) C"
        level, consultation = re.findall(pattern, link['onmouseover'])[0]

        level = int(level)
        consultation = self.get_value_from_string(consultation)

        return {'name': name, 'level': level, 'consultation': consultation}

Exemple #13

0

Afficher le fichier

    def get_daily_bonus(self, choice=1):
        """
        Collects the Daily Bonus that you get for logging in

        1. Checks if bonus corrected already
            -> return False
        2. Collects bonus
        3. TODO Print out the bonus received

        r-type: None
        """
        if not choice in range(1,6):
            raise ValueError("Choice must be in inclusive range 1-5")

        # Check whether collected already
        response = self.session.request(
            "GET",
            suburl=self.suburl_dailybonus
        )
        soup = make_soup(response)
        red_font = soup.find('font', attrs={'color': 'red'})
        if red_font and red_font.text.startswith("You've already collected the daily bonus"):
            print("Daily bonus was already collected today")
            return False
        
        self.session.request(
            "GET",
            suburl=self.suburl_dailybonus,
            params={'cadou': choice}
        )

Exemple #14

0

Afficher le fichier

 def load(self, *args):
     """ Overload of load from parent class.
     Uses the edit view rather than standard list view. """
     list_name = self.get_formatted_name()
     edit_url = f"{SESSION.username}/list/{list_name}/edit"
     request = SESSION.request("GET", edit_url)
     soup = make_soup(request)
     self.soup = soup

Exemple #15

0

Afficher le fichier

    def get_film_names(self):
        """ Returns each id in the film list together with the corresponding film_name. """

        response = SESSION.request("GET", self.view_list)
        soup = make_soup(response)

        if not (page_navigator := soup.find('div', class_='pagination')):
            last_page = 1

Exemple #16

0

Afficher le fichier

Fichier : social_network.py Projet : punkgazer/letterboxd_scraper

def get_blocked():
    """ Returns a list of the users in your block list.
    NOTE: You can only see who you've blocked, hence there is no
    username argument for this function unlike following and followers. """
    username = SESSION.username
    request = SESSION.request("GET", f"{username}/blocked/")
    soup = make_soup(request)
    return __get_people(soup)

Exemple #17

0

Afficher le fichier

Fichier : spider.py Projet : punkgazer/best11_scraper_v1

 def __users_search_query(session, search_club_id_payload):
     """ Conducts a search query on the users page,
     Returns the resulting soup. """
     request = req_post(session,
                        'useri.php?',
                        params={"pag": "cauta"},
                        data=search_club_id_payload)
     return make_soup(request)

Exemple #18

0

Afficher le fichier

Fichier : spider.py Projet : punkgazer/best11_scraper_v1

    def __get_active_managers(self):
        """ Returns a list of active managers.
        rtype: list """

        # Make post request via Community > Users > Search > Search by Manager
        request = req_post(
            self.sesh,
            "useri.php?",
            params={"pag": "cauta"},
            data={
                'cautare': 2,
                'manager': ''
            }  # Leave manager blank - shows all
        )
        # Get resulting soup
        soup = make_soup(request)

        # Grab the table containing table rows containing data for...
        # User, Action, Club, Last Login
        table = soup.find_all('table')[2]

        # Manager considered inactive if last_logged_in before target_datetime
        server_time = pendulum.now(tz=tz.server)
        target_datetime = server_time.subtract(days=7)

        def is_active(tr):
            """ Returns the club_id of the club if they are active, else False.
            rtype: int """
            # Get last logged in
            last_logged_in = tr.find_all('td')[3].text

            # Bypass weird Best11 error wherby the year is 0
            # NOTE These managers are no longer active regardless.
            year = int(last_logged_in.split('-')[0])
            if year == 0:
                return False

            # Convert to pendulum.datetime
            last_logged_in = pendulum.parse(last_logged_in, tz=tz.server)
            if last_logged_in < target_datetime:
                # Manager is inactive
                return False

            # Manager is active
            href = tr.find_all('td')[2].find('a').get('href')
            club_id = util.get_id_from_href(href)
            return club_id

        # Get all table rows in users search query soup
        table_rows = table.find_all('tr')[1:]

        # Run is_active() func for each manager
        active_managers = []
        while table_rows:
            # Add each manager to the list if they are active
            if n := (is_active(table_rows.pop())):
                active_managers.append(n)

Exemple #19

0

Afficher le fichier

Fichier : club.py Projet : punkgazer/best11_scraper_v1

 def origin(self):
     """ Returns the first season in a club's history - 
     presumably the season a manager started playing. """
     # Make request to club history page
     request = req_get(self.sesh, 'istoric_club.php?', params=self.params)
     soup = make_soup(request)
     # Grab last row of table as this will be the earliest season
     original_season = soup.find_all('tr')[-1].find('td').text     
     return int(original_season)

Exemple #20

0

Afficher le fichier

Fichier : spider.py Projet : punkgazer/best11_scraper_v1

 def tactic_a_teamsheet(self):
     """ Test function for checking info can be scraped properly. 
     Returns your teamsheet for tactic A. """
     request = req_get(self.sesh, "tactici.php")
     soup = make_soup(request)
     select = soup.find('select', attrs={'class': 'normal', 'name': 'cpt'})
     options = select.find_all('option')
     for option in options:
         print(option.text)

Exemple #21

0

Afficher le fichier

    def __init__(self, sesh, player_id):
        super().__init__(sesh)

        self.player_id = player_id

        # The same params are used so frequently across this class
        # That I just made it an instance var
        self.params = {'id': player_id}

        # -- Generate table soup ---
        request = req_get(self.sesh,
                          "vizualizare_jucator.php?",
                          params=self.params)
        self.tables = make_soup(request).find_all('table')

        # -- If player does not exist, do not bother executing remaining code --
        if not self.__check_player_exists():
            raise Exception("Invalid player")

        # -- Set up Index Dict. ---
        """ The dict tonctains all the indexes of player information. 
        It utilises the get_table_index decorator to distribute these
        indexes across the class's methods. """
        # NOTE: these names MUST correspond with the names of the methods
        index_dict = {
            'player_name': 2,
            'club': 2,
            'club_id': 2,
            'picture': 4,
            'pos': 5,
            'age': 6,
            'salary': 7,
            'value': 8,
            'exp': 9,
            'nat': 11,
            'boots': 12,
            'goals': 14,
            'mom': 14,
            'nat_stats': 14,
            'skill': 16,
            'energy': 19,
            'morale': 20,
            'fixed': 23
        }

        # -- Get Transfer listed attribute --
        self.listed = self.__is_listed()

        if not self.listed:
            # If player is not listed, index dict as above is correct
            self.index_dict = index_dict
        else:
            # Increment some values by three because tables are moved # down the page to make way for transfer info
            self.index_dict = {
                k: (v if k in ('player_name', 'club') else v + 3)
                for k, v in index_dict.items()
            }

Exemple #22

0

Afficher le fichier

    def load(self, username):
        """ load an instance for an existing list, given its name. """
        list_name = self.get_formatted_name()
        view_list = f"{username}/list/{list_name}/"

        # Make request to list url on Letterboxd
        response = SESSION.request("GET", view_list)
        soup = make_soup(response)
        self.soup = soup

Exemple #23

0

Afficher le fichier

    def __get_active_managers(self):
        """
        Returns a dict containing active managers and their names
        r-type: dict
        r-format: {club_id: 200, club: 'Solent City', manager: 'callumEvans'}
        """    

        # Make post request via Community > Users > Search > Search by manager
        request = self.session.request(
            "POST",
            "useri.php?",
            params = {'pag': 'cauta'},
            data = {'cautare': 2, 'manager': ''} # Leave manager blank - shows all
        )
        soup = make_soup(request)

        # Grab the table containing table rows, which each contain data for...
        # User, Action, Club, Last Login
        table = soup.find_all('table')[2]
        table_rows = table.find_all('tr')[1:]        

        # Manager considered inactive if last_logged_in before target_dt
        server_time = pendulum.now(tz=tz.server)
        target_dt = server_time.subtract(days=7)

        def is_active(tr):
            # Get last logged in
            last_logged_in = tr.find_all('i')[-1].text

            # Bypass weird Best11 error wherby the year is 0 
            # NOTE These managers are no longer active regardless.
            year = int(last_logged_in.split('-')[0])
            if year == 0:
                return False

            # Convert to pendulum.datetime
            last_logged_in = pendulum.parse(last_logged_in, tz=tz.server)
            if last_logged_in < target_dt:
                # Manager is inactive
                return False
            return True

        def get_manager(tr):
            """
            Get the information about a single manager based on the row.
            """
            # Manager is active. Get their information.
            manager = tr.find('b').text
            club_link = tr.find('a', href=re.compile(r"vizualizare_club.php\?"))
            club_name = club_link.text
            club_id = util.get_id_from_href(club_link.get('href'))
            
            return {'club_id': club_id, 'club': club_name, 'manager': manager}

        managers = [get_manager(i) for i in table_rows if is_active(i)]
        return managers

Exemple #24

0

Afficher le fichier

Fichier : club.py Projet : punkgazer/best11_scraper_v1

    def transfers(self):
        """ Returns a dict of a club's transfers. """
        request = req_get(self.sesh, "transferuri_club.php?", params=self.params)
        soup = make_soup(request)

        # For i in View all Transfers > rows of data under transfers(in) and transfers(out)
        # grab information about each transfer through the __get_individual_transfer_info() func
        bought = [self.__get_individual_transfer_info(i) for i in soup.find_all('table')[2].find_all('tr')]
        sold = [self.__get_individual_transfer_info(i) for i in soup.find_all('table')[4].find_all('tr')]
        return bought, sold

Exemple #25

0

Afficher le fichier

Fichier : club.py Projet : punkgazer/best11_scraper

    def tables(self):
        """
        Returns the soup for the club.php which contains
        extra info about the user's club.

        NOTE: I turned this from method used in __init__ to property
        so that it remains constantly updated
        """
        response = self.session.request("GET", "club.php?")
        tables = make_soup(response).find_all('table')
        return tables

Exemple #26

0

Afficher le fichier

    def medical_allowance(self):
        """
        Get the medical allowance level (1-5 inclusive)
        r-type: int
        """
        response = self.session.request("GET", suburl=self.suburl_facilities)
        soup = make_soup(response)
        medical_box = soup.find_all('table')[8]

        selected = medical_box.find('option', attrs={'selected':True})
        return int(selected.get('value'))

Exemple #27

0

Afficher le fichier

 def conduct_search(data):
     """ Conduct an individual search on Best11 for transfer listed players. """
     request = req_post(self.sesh, subpage, data=data)
     soup = make_soup(request)
     # Forms containing player forms which contains their links
     forms = soup.find_all('table')[1].find_all('form')
     # Get player_ids from form links
     ids = [
         int(util.get_id_from_href(i))
         for i in [j.get('action') for j in forms]
     ]
     # Add ids to main list
     return ids

Exemple #28

0

Afficher le fichier

    def get_training_points(self, only_slot=False, max_tp=False):
        """ 
        Collects Training Points (TP)

        1. TODO check of TP collected already for one or both technical staff
        2. Collect TP
        3. Prints out the amount of TP earned. TODO make float?
        """
        # You have reached max TP balance set in UserSettings. Do not continue
        if max_tp and self.tp_balance > max_tp:
            print("Exceeded max set TP")
            return False

        # -- Ensure that techstaff hired in one or more of the selected slots --
        # e.g. if only_slot == False and only slot 2 hired -> this is okay
        # e.g. if only_lsot == 1 and only slot 2 hired -> no point in continuing; return False
        current_techstaff = self.techstaff
        if not current_techstaff:
            print("No techstaff have been hired! Could not collect TP")
            return False
        elif only_slot and not current_techstaff.get(only_slot):
            print(f"No techstaff hired in position {only_slot}")
            return False

        # -- Determine if selected slots are requestable --
        # i.e TP has not yet been collected today
        response = self.session.request(
            "GET",
            suburl=self.suburl_facilities
        )
        soup = make_soup(response)
        table_data = soup.find_all('table')[2].find_all('tr')[1].find('td')     
        slots_to_train = [only_slot] if only_slot else [1,2]
        requestable_slots = []
        for i in slots_to_train:
            pattern = fr"antrenor\.php\?pag=antrenament[w&;]slot={i}"
            if table_data.find('form', action=re.compile(pattern)):
                requestable_slots.append(i)

        if not requestable_slots:
            print("No slots were useable!")
            return False  

        # -- Make the requests --
        total_tp_earnt = 0
        for slot in slots_to_train:
            total_tp_earnt += self.__get_tp_from_slot(slot)

        # Print/Return the result
        print(f"TP earnt: {total_tp_earnt}")
        return total_tp_earnt

Exemple #29

0

Afficher le fichier

    def get_bonus_from_partners(self, club_id=USER_CLUB.club_id):
        """
        Collects the Bonus from Partners

        1. Checks if bonus collected already
            -> return False
        2. Collects Bonus from Partners
        
        r-type: None
        """
        ## Check bonus hasn't been collected already
        response = self.session.request(
            "GET",
            suburl=self.suburl_clubpage
        )
        credits_balance_table = make_soup(response).find_all('table')[20]
        if not credits_balance_table.find('img', attrs={'src': re.compile(r"\/bonus.gif")}):
            print("Bonus from Partners already collected today")
            return False

        ## Get valid partner_ids
        response = self.session.request(
            "GET",
            suburl='bonus_parteneri.php?'
        )
        soup = make_soup(response)
        
        # Pattern to find bonus link
        pattern = r"get_bonus.php\?partener=(\d+)&"
        bonus_hrefs = [i.get('href') for i in soup.find_all('a', attrs={'href': re.compile(pattern)})]
        
        # Extract partner_ids from bonus_links
        # NOTE: redboy frequently changes these as websites go down, hence it's good to get up to date partner_ids
        valid_partner_ids = [int(re.findall(pattern, i)[0]) for i in bonus_hrefs]

        # Collect bonus for each partner_id
        get_bonus = lambda partner_id: self.session.request("GET", suburl='get_bonus.php?', params={'partener': partner_id, 'club':club_id})
        [get_bonus(partner) for partner in valid_partner_ids]

Exemple #30

0

Afficher le fichier

Fichier : club.py Projet : punkgazer/best11_scraper_v1

    def avatar(self):
        """ Returns the link to a club's avatar. """
        request = req_get(self.sesh, "vizualizare_club.php?", params=self.params)
        soup = make_soup(request)

        # Grab avatar link from soup and replace spaces to make working link
        avatar = soup.find_all('table')[1].find_all('tr')[2].find('img').get('src').replace(' ', '%20')

        # If avatar is the defualt img
        if '/standard.jpg' in avatar:
            return False

        full_link = MAIN_URL + avatar
        return full_link