class GitHubCrawler: base_url = 'https://github.com/search?q={keywords}&type={search_type}' def __init__(self): self.requester = Requester() def search(self, keywords: list, search_type: str, proxies: list) -> list: url = self._build_endpoint(keywords, search_type) proxy = self._set_proxies(proxies) logging.info(f'Crawling URL: "{url}"') response = self.requester.get(url, proxy) return self._process_response(response) def _build_endpoint(self, keywords: list, search_type: str) -> str: formatted_keywords = '+'.join(keywords) return self.base_url.format(keywords=formatted_keywords, search_type=search_type.lower()) @staticmethod def _set_proxies(proxies: list) -> dict: return {'http': random.choice(proxies)} @staticmethod def _process_response(response: str) -> list: result = [] parsed_response = BeautifulSoup(response, "html.parser") entities = parsed_response.find_all('div', {'class': 'f4 text-normal'}) for entity in entities: try: entity_url = json.loads( entity.find('a').attrs['data-hydro-click'] )['payload']['result']['url'] except Exception as e: logging.warning(f'Error processing entity: "{repr(e)}"') continue result.append(entity_url) return result
res = cur.fetchall() cur.close() req = Requester() def get_player_team(teams, player_id): for team in teams: for player in team['players']: if player['player_id'] == player_id: return team['team_id'] for i, r in enumerate(res): try: stats = req.get(f'matches/{r[0]}/stats')['rounds'][0] winner = stats['round_stats']['Winner'] if winner == get_player_team(stats['teams'], r[1]): sql = f"UPDATE matches SET win = true WHERE faceit_id = '{r[0]}' AND player_faceit_id = '{r[1]}'" cur = con.cursor() cur.execute(sql, ()) con.commit() cur.close() if i % 50 == 0: print(i) except Exception as ex: print(ex) continue con.close()