def fetch_problem(p): errors = set() for attempt in range(3): try: page = REQ.get(p['url'], time_out=30) match = re.search('<a[^>]*href="(?P<href>[^"]*module=ProblemDetail[^"]*)"[^>]*>', page) page = REQ.get(urljoin(p['url'], match.group('href')), time_out=30) matches = re.findall(r'<td[^>]*class="statTextBig"[^>]*>(?P<key>[^<]*)</td>\s*<td[^>]*>(?P<value>.*?)</td>', page, re.DOTALL) # noqa for key, value in matches: key = key.strip().rstrip(':').lower() if key == 'categories': tags = [t.strip().lower() for t in value.split(',')] tags = [t for t in tags if t] if tags: p['tags'] = tags elif key.startswith('writer') or key.startswith('tester'): key = key.rstrip('s') + 's' p[key] = re.findall('(?<=>)[^<>,]+(?=<)', value) for w in p.get('writers', []): writers[w] += 1 except Exception as e: errors.add(f'error parse problem info {p}: {e}') sleep(5 + attempt) else: errors = None if errors: LOG.error(errors) return p
def fetch_profle_page(account): if is_chine(account): ret = {} page = REQ.get( 'https://leetcode.com/graphql', post=b'{"variables":{},"query":"{allContests{titleSlug}}"}', content_type='application/json', ) ret['contests'] = json.loads(page)['data'] page = REQ.get( 'https://leetcode-cn.com/graphql', post=b''' {"operationName":"userPublicProfile","variables":{"userSlug":"''' + account.key.encode() + b'''"},"query":"query userPublicProfile($userSlug: String!) { userProfilePublicProfile(userSlug: $userSlug) { username profile { userSlug realName contestCount ranking { currentLocalRanking currentGlobalRanking currentRating ratingProgress totalLocalUsers totalGlobalUsers } } }}"}''', # noqa content_type='application/json', ) ret['profile'] = json.loads(page)['data'] page = ret else: url = resource.profile_url.format(**account.dict_with_info()) try: page = REQ.get(url) except FailOnGetResponse as e: arg = e.args[0] if not hasattr(arg, 'code') or arg.code == 404: page = None else: raise e return account, page
def _get(url, lock=Lock()): attempt = 0 while True: attempt += 1 try: page = REQ.get(url) if 'id="id_login"' in page and 'id="id_password"' in page: with lock: if not Statistic.LOGGED_IN: page = REQ.get(Statistic.LOGIN_URL_) page = REQ.submit_form( { 'login': conf.HACKEREARTH_USERNAME, 'password': conf.HACKEREARTH_PASSWORD, 'signin': 'Log In', }, limit=0, ) Statistic.LOGGED_IN = True if 'AJAX' in url: headers = {'x-requested-with': 'XMLHttpRequest'} csrftoken = REQ.get_cookie('csrftoken') if csrftoken: headers['x-csrftoken'] = csrftoken else: headers = {} return REQ.get(url, headers=headers) except FailOnGetResponse as e: if attempt == 15 or getattr(e.args[0], 'code', None) != 500: raise ExceptionParseStandings(e.args[0]) sleep(2 * attempt)
def _get(self, *args, **kwargs): page = REQ.get(*args, **kwargs) form = REQ.form(limit=2, selectors=['class="form-horizontal"']) if form: form['post'].update({ 'username': self._username, 'password': self._password, }) page = REQ.get(form['url'], post=form['post']) return page
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) REQ.get('https://facebook.com/') form = REQ.form(action='/login/') if form: data = { 'email': conf.FACEBOOK_USERNAME, 'pass': conf.FACEBOOK_PASSWORD, } REQ.submit_form(data=data, form=form) form = REQ.form(action='/login/') if form and 'validate-password' in form['url']: REQ.submit_form(data=data, form=form)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) page = REQ.get('https://auth.geeksforgeeks.org/') form = REQ.form(page=page, action=None, fid='Login') if form: REQ.get('https://auth.geeksforgeeks.org/setLoginToken.php') page = REQ.submit_form( url='https://auth.geeksforgeeks.org/auth.php', data={ 'user': conf.GEEKSFORGEEKS_USERNAME, 'pass': conf.GEEKSFORGEEKS_PASSWORD, }, form=form, )
def fetch_problem(p): errors = set() for attempt in range(3): try: page = REQ.get(p['url'], time_out=30) match = re.search('<a[^>]*href="(?P<href>[^"]*module=ProblemDetail[^"]*)"[^>]*>', page) page = REQ.get(urljoin(p['url'], match.group('href')), time_out=30) matches = re.findall(r'<td[^>]*class="statTextBig"[^>]*>(?P<key>[^<]*)</td>\s*<td[^>]*>(?P<value>.*?)</td>', page, re.DOTALL) # noqa for key, value in matches: key = key.strip().rstrip(':').lower() if key == 'categories': tags = [t.strip().lower() for t in value.split(',')] tags = [t for t in tags if t] if tags: p['tags'] = tags elif key.startswith('writer') or key.startswith('tester'): key = key.rstrip('s') + 's' p[key] = re.findall('(?<=>)[^<>,]+(?=<)', value) for w in p.get('writers', []): writers[w] += 1 info = p.setdefault('info', {}) matches = re.finditer('<table[^>]*paddingTable2[^>]*>.*?</table>', page, re.DOTALL) for match in matches: html_table = match.group(0) rows = parsed_table.ParsedTable(html_table) for row in rows: key, value = None, None for k, v in row.items(): if k == "": key = v.value elif k and division_str in k.split(): value = v.value if key and value: key = re.sub(' +', '_', key.lower()) info[key] = value if key == 'point_value': value = toint(value) or asfloat(value) if value is not None: p['full_score'] = value except Exception as e: errors.add(f'error parse problem info {p}: {e}') sleep(5 + attempt) else: errors = None if errors: LOG.error(errors) return p
def fetch_ratings(user, account): if account.info.get('is_virtual'): return user, False, None try: page = REQ.get(f'https://toph.co/u/{user}/ratings') except FailOnGetResponse as e: if e.code == 404: return user, None, None return user, False, None tables = re.findall('<table[^>]*>.*?</table>', page, re.DOTALL) t = parsed_table.ParsedTable(html=tables[-1]) ratings = {} info = {} for row in t: href = row['Contest'].column.node.xpath('.//a/@href')[0] key = href.rstrip('/').split('/')[-1] rating = int(row['Rating'].value) ratings[key] = {'new_rating': rating} info.setdefault('rating', rating) matches = re.finditer( ''' <div[^>]*class="?value"?[^>]*>(?P<value>[^<]*)</div>[^<]* <div[^>]*class="?title"?>(?P<key>[^<]*)</div> ''', page, re.DOTALL | re.VERBOSE) for match in matches: key = match.group('key').lower() value = match.group('value') info[key] = value return user, info, ratings
def get_region(team_name): nonlocal team_regions if not team_regions: page = REQ.get('https://icpc.kimden.online/') matches = re.finditer( '<label[^>]*for="(?P<selector>[^"]*)"[^"]*onclick="setRegion[^"]*"[^>]*>(?P<name>[^>]*)</', page, ) regions = {} for match in matches: selector = match.group('selector').replace( 'selector', '').replace('--', '-') regions[selector] = match.group('name') pprint(regions) matches = re.finditer( r''' <tr[^>]*class="(?P<class>[^"]*)"[^>]*>\s*<td[^>]*>[^<]*</td>\s*<td[^>]*title="(?P<name>[^"]*)">[^<]*</td> ''', page, re.VERBOSE, ) for match in matches: classes = match.group('class').split() name = match.group('name') name = canonize_name(name) for c in classes: if c in regions: team_regions[name] = regions[c] break team_name = canonize_name(team_name) return team_regions[team_name]
def get_from_icpc(year): medal_result_url = f'https://icpc.global/api/help/cms/virtpublic/community/results-{year}' page = REQ.get(medal_result_url) try: json_data = json.loads(page) except json.decoder.JSONDecodeError: return regex = '''<table[^>]*id=["']medalTable[^>]*>.*?</table>''' match = re.search(regex, json_data['content'], re.DOTALL) if not match: return html_table = match.group(0) table = parsed_table.ParsedTable(html_table) medals = OrderedDict() fields = ('gold', 'silver', 'bronze') for f in fields: medals[f] = 0 for r in table: _, v = next(iter(r.items())) for attr in v.attrs.get('class', '').split(): if attr in fields: medals[attr] = medals.get(attr, 0) + 1 break if not medals: return return medals
def process_data(data): rows = data['models'] if 'models' in data else data['data'] school_ids = set() for r in rows: if isinstance(r.get('attributes'), dict): r = r['attributes'] def get(*fields): for f in fields: if f in r: return r.pop(f) handle = get('hacker', 'name') if handle is None: continue row = result.setdefault(handle, collections.OrderedDict()) row['member'] = handle score = get('score', 'solved_challenges') if score is None: score = get('percentage_score') * 100 row['solving'] = score row['place'] = get('rank', 'leaderboard_rank') time = get('time_taken', 'time_taken_seconds') if time: row['time'] = self.to_time(time, 3) country = get('country') if country: row['country'] = country avatar_url = get('avatar') if avatar_url: row['info'] = {'avatar_url': avatar_url} for k, v in r.items(): if k not in row and v is not None: row[k] = v hidden_fields.add(k) if statistics and handle in statistics: stat = statistics[handle] for k in ('old_rating', 'rating_change', 'new_rating'): if k in stat: row[k] = stat[k] if 'school_id' in row and row['school_id'] not in schools: school_ids.add(row['school_id']) if school_ids: query = ','.join(school_ids) url = self.host + f'community/v1/schools?page[limit]={len(school_ids)}&filter[unique_id]={query}' page = REQ.get(url) data = json.loads(page) for s in data['data']: schools[s['id']] = s['attributes']['name'] for row in result.values(): if 'school_id' in row and 'school' not in row: row['school'] = schools[row['school_id']]
def update_problem_info(info): slug = info['url'].strip('/').rsplit('/', 1)[-1] params = { 'operationName': 'questionData', 'variables': { 'titleSlug': slug }, 'query': 'query questionData($titleSlug: String!) { question(titleSlug: $titleSlug) { questionId difficulty contributors { profileUrl } topicTags { name } hints } }', # noqa } page = REQ.get( 'https://leetcode.com/graphql', content_type='application/json', post=json.dumps(params).encode('utf-8'), ) question = json.loads(page)['data']['question'] info['tags'] = [t['name'].lower() for t in question['topicTags']] info['writers'] = [ re.search('/(?P<username>[^/]*)/?$', c['profileUrl']).group('username') for c in question['contributors'] ] if not info['writers']: info['writers'] = ['leetcode'] info['difficulty'] = question['difficulty'].lower() info['hints'] = question['hints'] return info
def get_response(url, message, types, force_str_paths=(), xmessage_type=None): query = blackboxprotobuf.encode_message(message, types) query = len(query).to_bytes(5, byteorder='big') + query query = base64.b64encode(query) page = REQ.get(url, post=query, content_type='application/grpc-web-text', headers={'accept': 'application/grpc-web-text'}) data = base64.b64decode(page) size = int.from_bytes(data[:5], 'big') data = data[5:5 + size] message_type = {} for path in force_str_paths: d = message_type *path, key = path for k in path: d = d.setdefault(k, { 'type': 'message', 'message_typedef': {} }) d = d['message_typedef'] d[key] = {'type': 'bytes'} message, types = blackboxprotobuf.decode_message( data, message_type) return message, types
def fetch_profile(user): url = f'http://api.topcoder.com/v2/users/{quote(user)}' ret = {} for _ in range(2): try: page = REQ.get(url) ret = json.loads(page) if 'error' in ret: if isinstance(ret['error'], dict) and ret['error'].get('value') == 404: ret = {'handle': user, 'action': 'remove'} else: continue break except Exception: pass sleep(1) if 'handle' not in ret: if not ret: ret['delta'] = timedelta(days=30) ret['handle'] = user if not ret.get('photoLink'): ret.pop('photoLink', None) if user in dd_active_algorithm: data = dd_active_algorithm[user] if 'alg_vol' in data: ret['volatility'] = toint(data['alg_vol']) if 'alg_rating' in data: ret['rating'] = toint(data['alg_rating']) return ret
def _get(self, *args, **kwargs): if not getattr(self, '_authorized', None) and getattr( conf, 'LEETCODE_COOKIES', False): for kw in conf.LEETCODE_COOKIES: REQ.add_cookie(**kw) setattr(self, '_authorized', True) return REQ.get(*args, **kwargs)
def fetch_results(page): url = self.API_RANKING_URL_FORMAT_.format(id=self.key, sid=session['id'], page=page) page = REQ.get(url) data = json.loads(page) return data
def fetch_user(user, account): try: url = resource.profile_url.format(account=user) page = REQ.get(url) page = html.unescape(page) except FailOnGetResponse as e: if e.code == 404: return user, None return user, False info = {} match = re.search( r'<div[^>]*class="page-header"[^>]*>\s*<h2[^>]*>(?P<name>[^<]*)<', page) if match: info['name'] = match.group('name') match = re.search( r'<h2[^>]*>\s*<img[^>]*alt="(?P<country>[^"]*)"[^>]*>\s*(?P<name>[^<]*)<', page) if match: info['country'] = match.group('country') info['name'] = match.group('name') match = re.search( r'<img[^>]*src="(?P<img>[^"]*)"(?:[^>]*(?:width|height)="[^"]*"){2}[^>]*>\s*<br[^>]*>', page) # noqa if match: info['avatar_url'] = urljoin(url, match.group('img')) return user, info
def fetch_and_process_page(page): url = f'https://practiceapi.geeksforgeeks.org/api/v1/contest/{self.key}/leaderboard/?page={page + 1}&type=current' # noqa page = REQ.get(url) data = json.loads(page) for row in data['results']['ranks_list']: handle = row.pop('profile_link').rstrip('/').rsplit('/', 1)[-1] r = result.setdefault(handle, OrderedDict()) name = row.pop('handle') if name != handle: r['name'] = name r['member'] = handle r['place'] = row.pop('rank') r['solving'] = row.pop('score') last_correct_submission = row.get('last_correct_submission') if last_correct_submission: time = dateutil.parser.parse(last_correct_submission + '+05:30') delta = time - self.start_time r['time'] = self.to_time(delta) for k, v in list(row.items()): if k.endswith('_score'): r[k] = row.pop(k) return data
def get_all_users_infos(): base_url = 'https://open.kattis.com/ranklist' page = REQ.get(base_url) users = set() def parse_users(page): nonlocal users matches = re.finditer('<a[^>]*href="/users/(?P<member>[^"/]*)"[^>]*>(?P<name>[^<]*)</a>', page) for match in matches: member = match.group('member') if member in users: continue users.add(member) name = match.group('name').strip() yield {'member': member, 'info': {'name': name}} yield from parse_users(page) urls = re.findall(r'url\s*:\s*(?P<url>"[^"]+")', page) def fetch_url(url): url = json.loads(url) url = urljoin(base_url, url) page = REQ.get(url) yield from parse_users(page) with PoolExecutor(max_workers=10) as executor, tqdm(total=len(urls), desc='urls') as pbar: for gen in executor.map(fetch_url, urls): yield from gen pbar.update()
def fetch_team_results(d): member = str(d['id']) url = self.TEAM_RESULTS_URL_.format(cid=cid, uid=member, name=participaty_type) page = REQ.get(url) matches = re.finditer( r'<a[^>]*href="[^"]*/Problem/(?P<code>[^"/]*)">[^<]*(?:\s*<[^>]*>)*(?P<score>[.0-9]+)', page) problems = {} for m in matches: k = m['code'] if k not in problems_info: continue p = problems.setdefault(k, {}) p['result'] = m['score'] users = re.findall( '<a[^>]*href="[^"]*/CompetitorResults/[^"]*">([^<]*)</a>', page) info = { 'problems': problems, 'url': url, 'member': member, } return d, info, users
def get_standings(self, users=None, statistics=None): year = self.start_time.year - (0 if self.start_time.month > 8 else 1) season = f'{year}-{year + 1}' result = {} page = REQ.get(self.standings_url) table = parsed_table.ParsedTable( html=page, xpath="//table[@class='ir-contest-standings']//tr") problems_info = collections.OrderedDict() for r in table: row = collections.OrderedDict() problems = row.setdefault('problems', {}) ioi_total_fields = ['Sum', 'Сумма'] ioi_style = any((f in r for f in ioi_total_fields)) for k, v in list(r.items()): classes = v.attrs['class'].split() if 'ir-column-contestant' in classes: row['member'] = v.value + ' ' + season row['name'] = v.value elif 'ir-column-place' in classes: row['place'] = v.value elif 'ir-column-penalty' in classes: row['penalty'] = int(v.value) elif 'ir-problem-count' in classes or k in ioi_total_fields: row['solving'] = int(v.value) elif len(k.split()[0]) == 1: letter = k.split()[0] problems_info[letter] = {'short': letter} if v.value == DOT: continue p = problems.setdefault(letter, {}) values = v.value.replace('−', '-').split(' ') p['result'] = values[0] if len(values) > 1: p['time'] = values[1] if ioi_style and p['result'].isdigit(): val = int(p['result']) if val: p['partial'] = val < 100 else: row[k.lower()] = v.value if not problems or users and row['member'] not in users: continue member = row['member'] if member in result: idx = 0 while member + f'-{idx}' in result: idx += 1 member += f'-{idx}' row['member'] = member result[member] = row standings = { 'result': result, 'url': self.standings_url, 'problems': list(problems_info.values()), 'problems_time_format': '{H}:{m:02d}', } return standings
def fetch_profile(user): nonlocal stop if stop: return False url = Statistic.API_PROFILE_URL_FORMAT_.format(user=user) page = REQ.get(url) data = json.loads(page) return data
def fetch_attempts(handle): query = f'{{"nickname":{json.dumps(handle)},"include_non_final_results":true}}' url = api_attempts_url_format + encode(query) try: content = REQ.get(url) data = decode(content) except FailOnGetResponse: data = None return handle, data
def _get(url, *args, **kwargs): page = REQ.get(url, *args, **kwargs) if 'document.cookie="RCPC="+toHex(slowAES.decrypt(c,2,a,b))+";' in page: matches = re.findall(r'(?P<var>[a-z]+)=toNumbers\("(?P<value>[^"]*)"\)', page) variables = {} for variable, value in matches: variables[variable] = [int(value[i:i + 2], 16) for i in range(0, len(value), 2)] size = len(variables['a']) ret = AESModeOfOperation().decrypt(variables['c'], None, 2, variables['a'], size, variables['b']) rcpc = ''.join(('0' if x < 16 else '') + hex(x)[2:] for x in map(ord, ret)) REQ.add_cookie('RCPC', rcpc) match = re.search('document.location.href="(?P<url>[^"]*)"', page) url = match.group('url') page = REQ.get(url, *args, **kwargs) REQ.save_cookie() return page
def get_source_code(contest, problem): if 'url' not in problem: raise ExceptionParseStandings('Not found url') page = REQ.get(problem['url']) match = re.search('<pre[^>]*id="submission-code"[^>]*>(?P<source>[^<]*)</pre>', page) if not match: raise ExceptionParseStandings('Not found source code') solution = html.unescape(match.group('source')) return {'solution': solution}
def fetch_user(user): url = resource.profile_url.format(account=user) page = REQ.get(url) info = {} matches = re.findall( r'<span[^>]*>([A-Z]+)</span>\s*<span[^>]*>([0-9]+)</span>', page) for k, v in matches: info[k.lower()] = int(v) match = re.search( '<img[^>]*src="[^"]*country[^"]*([0-9]+)[^"]*"[^>]*alt="country"[^>]*>', page) if match: info['country'] = countries.get(match.group(1)) match = re.search( '<img[^>]*class="img-circle"[^>]*src="([^"]*getAvatar.php[^"]*)"[^>]*>', page) if match: info['avatar_url'] = urljoin(url, match.group(1)) page = REQ.get(Statistic.USER_RATING_API_URL_.format(user)) data = json.loads(page) ratings = {} old_rating = None for stat in data: rating = ratings.setdefault(stat['contestid'], collections.OrderedDict()) new_rating = int(stat['rating']) if old_rating is not None: rating['old_rating'] = old_rating rating['rating_change'] = new_rating - old_rating rating['new_rating'] = new_rating old_rating = new_rating info['rating'] = new_rating if not ratings: info.pop('rating', None) return user, info, ratings
def fetch_leaderboard_page(page): url = f'{api_server_url}/api/contest/getLeaderboardByPage' data = { 'contestId': self.key, 'page': page + 1, 'usersPerPage': per_page, } page = REQ.get(url, post=data) data = json.loads(page) return data
def get_leaderboard(url, column="", value=""): active = 'true' if column else 'false' filt = f'{{"active":{active},"column":"{column}","filter":"{value}"}}' if clash_hubs: post = f'[1,{filt},null,true,"global",{clash_hubs[0]["clashHubId"]}]' else: post = f'["{self.key}",null,"global",{filt}]' page = REQ.get(url, post=post, content_type='application/json') data = json.loads(page) return data
def get(*args, **kwargs): n_iterations = 5 for iteration in range(n_iterations): try: return REQ.get(*args, **kwargs) except FailOnGetResponse as e: if e.code == 502 and iteration + 1 < n_iterations: time.sleep(3 + iteration) continue raise e
def fetch_profle_page(user): url = Statistic.PROFILE_URL_FORMAT_.format(user=user) try: page = REQ.get(url) except FailOnGetResponse as e: if e.args[0].code == 404: page = None else: raise e return page