def get_source_code(contest, problem): if 'url' not in problem: raise ExceptionParseStandings('Not found url') page = REQ.get(problem['url']) match = re.search('<pre[^>]*id="submission-code"[^>]*>(?P<source>[^<]*)</pre>', page) if not match: raise ExceptionParseStandings('Not found source code') solution = html.unescape(match.group('source')) return {'solution': solution}
def get_source_code(contest, problem): if 'url' not in problem: raise ExceptionParseStandings('Not found url') page = _get(problem['url']) match = re.search('<pre[^>]*id="program-source-text"[^>]*class="(?P<class>[^"]*)"[^>]*>(?P<source>[^<]*)</pre>', page) # noqa if not match: raise ExceptionParseStandings('Not found source code') solution = html.unescape(match.group('source')) ret = {'solution': solution} for c in match.group('class').split(): if c.startswith('lang-'): ret['lang_class'] = c return ret
def get_league_name(league): nonlocal leagues_names if leagues_names is None: raise ExceptionParseStandings('not found leagues_names') index = league['divisionCount'] - league['divisionIndex'] - 1 + league.get('divisionOffset', 0) number = index - len(leagues_names) + 2 return f'{leagues_names[-1]} {number}' if number >= 1 else leagues_names[index]
def _get(url, lock=Lock()): attempt = 0 while True: attempt += 1 try: page = REQ.get(url) if 'id="id_login"' in page and 'id="id_password"' in page: with lock: if not Statistic.LOGGED_IN: page = REQ.get(Statistic.LOGIN_URL_) page = REQ.submit_form( { 'login': conf.HACKEREARTH_USERNAME, 'password': conf.HACKEREARTH_PASSWORD, 'signin': 'Log In', }, limit=0, ) Statistic.LOGGED_IN = True if 'AJAX' in url: headers = {'x-requested-with': 'XMLHttpRequest'} csrftoken = REQ.get_cookie('csrftoken') if csrftoken: headers['x-csrftoken'] = csrftoken else: headers = {} return REQ.get(url, headers=headers) except FailOnGetResponse as e: if attempt == 15 or getattr(e.args[0], 'code', None) != 500: raise ExceptionParseStandings(e.args[0]) sleep(2 * attempt)
def get_standings(self, users=None, statistics=None): try: page = REQ.get(self.url) except FailOnGetResponse as e: return {'action': 'delete'} if e.code == 404 else {} match = re.search('<table[^>]*past_event_rating[^>]*>.*?</table>', page, re.DOTALL) if not match: raise ExceptionParseStandings('not found table') header_mapping = { 'Team': 'name', 'Place': 'place', 'CTF points': 'solving', } table = parsed_table.ParsedTable(html=match.group(0), header_mapping=header_mapping) results = {} max_score = 0 for r in table: row = OrderedDict() for k, v in r.items(): k = k.strip('*') k = k.strip(' ') value = ' '.join([c.value for c in v]).strip() if isinstance( v, list) else v.value if k == 'name': href = v.column.node.xpath('.//a/@href')[0] match = re.search('/([0-9]+)/?$', href) row['member'] = match.group(1) row['name'] = value else: value = as_number(value) row[k] = value max_score = max(max_score, row.get('solving', 0)) results[row['member']] = row if max_score > 0: for row in results.values(): if 'solving' in row: row['percent'] = f'{row["solving"] * 100 / max_score:.2f}' has_medals = not re.search(r'\bqual', self.name, flags=re.I) and re.search( r'\bfinal', self.name, flags=re.I) medals = [{'name': 'gold', 'count': 1}] if has_medals else [] return dict( standings_url=self.url, result=results, options={'medals': medals}, )
def get_standings(self, users=None, statistics=None): result = {} filepath = self.info.get('standings_csv_filepath_') if not filepath: raise ExceptionParseStandings('not found csv filepath') season = self.get_season() result = {} problems_info = collections.OrderedDict() with open(filepath, 'r') as fo: data = csv.DictReader(fo) last, place = None, None for idx, r in enumerate(data, start=1): row = collections.OrderedDict() problems = row.setdefault('problems', {}) for k, v in r.items(): if k == 'User': row['member'] = v + ' ' + season row['name'] = v elif k == 'Last valid submission': delta = arrow.get(v, ['YYYY-MM-DD H:mm:ss']) - self.start_time row['penalty'] = self.to_time(delta, 3) elif k in ['Global']: row['solving'] = v else: if k not in problems_info: problems_info[k] = {'short': k, 'full_score': 100} if float(v) > 1e-9: p = problems.setdefault(k, {}) p['result'] = v p['partial'] = float(v) + 1e-9 < problems_info[k]['full_score'] score = (row['solving'], row['penalty']) if last != score: last = score place = idx row['place'] = place result[row['member']] = row standings = { 'result': result, 'problems': list(problems_info.values()), 'hidden_fields': ['medal'], } return standings
def _get(url): attempt = 0 while True: attempt += 1 try: if 'AJAX' in url: headers = {'x-requested-with': 'XMLHttpRequest'} csrftoken = REQ.get_cookie('csrftoken') if csrftoken: headers['x-csrftoken'] = csrftoken else: headers = {} return REQ.get(url, headers=headers) except FailOnGetResponse as e: if attempt == 7 or getattr(e.args[0], 'code', None) != 500: raise ExceptionParseStandings(e.args[0]) sleep(2**attempt)
def get_standings(self, users=None, statistics=None): result = {} @RateLimiter(max_calls=10, period=2) def fetch_and_process_page(page): url = f'https://practiceapi.geeksforgeeks.org/api/v1/contest/{self.key}/leaderboard/?page={page + 1}&type=current' # noqa page = REQ.get(url) data = json.loads(page) for row in data['results']['ranks_list']: handle = row.pop('profile_link').rstrip('/').rsplit('/', 1)[-1] r = result.setdefault(handle, OrderedDict()) name = row.pop('handle') if name != handle: r['name'] = name r['member'] = handle r['place'] = row.pop('rank') r['solving'] = row.pop('score') last_correct_submission = row.get('last_correct_submission') if last_correct_submission: time = dateutil.parser.parse(last_correct_submission + '+05:30') delta = time - self.start_time r['time'] = self.to_time(delta) for k, v in list(row.items()): if k.endswith('_score'): r[k] = row.pop(k) return data data = fetch_and_process_page(0) total = data['results']['rows_count'] per_page = len(data['results']['ranks_list']) if not total or not per_page: raise ExceptionParseStandings('empty standings') n_pages = (total + per_page - 1) // per_page with PoolExecutor(max_workers=8) as executor: executor.map(fetch_and_process_page, range(1, n_pages)) ret = { 'url': os.path.join(self.url, 'leaderboard'), 'result': result, } return ret
def query(name, variables): params = { 'fb_dtsg': tokens.get('dtsginitialdata', ''), 'lsd': tokens['lsd'], 'fb_api_caller_class': 'RelayModern', 'fb_api_req_friendly_name': name, 'variables': json.dumps(variables), 'doc_id': self.info['_scoreboard_ids'][name], } ret = REQ.get( self.API_GRAPH_URL_, post=params, headers={'accept-language': 'en-US,en;q=1.0'} ) try: return json.loads(ret) except Exception as e: raise ExceptionParseStandings(f'Error on query {name} = {e}')
def parse_problems_infos(): problem_url = self.standings_url.replace('/ranking', '/p') page = REQ.get(problem_url) match = re.search( r'<h1[^>]*>[^<]*</h1>(\s*<[^/][^>]*>)*\s*(?P<table><table[^>]*>.*?</table>)', page, re.DOTALL) if not match: raise ExceptionParseStandings('Not found problems table') table = parsed_table.ParsedTable(html=match.group('table'), ignore_wrong_header_number=False) skip = False problems_infos = collections.OrderedDict() for r in table: if isinstance(r, parsed_table.ParsedTableRow): runda = re.sub(r'\s*\(.*\)\s*$', '', r.columns[0].value).strip() skip = runda.lower() not in self.name.lower() continue if skip: continue problem_info = {} for k, vs in list(r.items()): if isinstance(vs, list): v = ' '.join([v.value for v in vs]).strip() else: v = vs.value if not k: problem_info['short'] = v elif k in ('Nazwa', 'Name'): match = re.search(r'\[(?P<letter>[^\]]+)\]$', v) if match: problem_info['_letter'] = match.group('letter') problem_info['name'] = v href = vs.column.node.xpath('//a/@href') if href: problem_info['url'] = urljoin(problem_url, href[0]) if problem_info: problems_infos[problem_info['short']] = problem_info return problems_infos
def rec_fix_type(messages, types, path=[]): if not types: return for message in to_list(messages): to_conv = True a = [] for k, v in types.items(): a.append(v['type']) if not v['type'].startswith('fixed'): to_conv = False break if to_conv: raise ExceptionParseStandings( f'Excepted str value for path = {path}') else: for k, v in types.items(): if k in message: rec_fix_type(message[k], v.get('message_typedef'), path + [k])
def get_standings(self, users=None, statistics=None): season = self.get_season() def standings_page(req): return req.get(self.standings_url) print(self.standings_url) with REQ( with_proxy=True, args_proxy=dict( time_limit=3, n_limit=30, connect=standings_page, ), ) as req: page = req.proxer.get_connect_ret() html_table = re.search('<table[^>]*>.*?</table>', page, re.MULTILINE | re.DOTALL) if not html_table: raise ExceptionParseStandings('Not found html table') mapping = { 'Rank': 'place', 'Name': 'name', 'Language': 'language', } table = parsed_table.ParsedTable(html_table.group(0), header_mapping=mapping) result = {} for r in table: row = dict() for k, v in r.items(): if v.value: row[k] = v.value if 'member' not in row: row['member'] = f'{row["name"]} {season}' result[row['member']] = row return {'result': result}
def get_standings(self, users=None, statistics=None): if not self.standings_url: year = self.start_time.year name = re.sub(r'(online|onsite)\s+', '', self.name, flags=re.I).strip() query = f'site:https://www.facebook.com/hackercup/round/* Facebook Hacker Cup {year} {name}' urls = list(googlesearch.search(query, stop=2)) if len(urls) == 1: self.standings_url = urls[0].replace('/round/', '/scoreboard/') if not self.standings_url: raise ExceptionParseStandings('not found standing url') offset = 0 limit = 100 result = OrderedDict() pbar = None total = None title = None problems_info = None while limit: url = f'{self.standings_url}?offset={offset}&length={limit}' page = REQ.get(url) match = re.search(r'"problemData":(?P<data>\[[^\]]*\])', page, re.I) if not match: limit //= 2 continue problem_data = json.loads(match.group('data')) if problems_info is None: matches = re.finditer( r'<div[^>]*class="linkWrap noCount"[^>]*>(?P<score>[0-9]+):\s*(?P<title>[^<]*)', page) problems_scores = {} for match in matches: score = int(match.group('score')) name = html.unescape(match.group('title')).strip() problems_scores[name] = score problems_info = [] for problem in problem_data: name = str(problem['name']).strip() problems_info.append({ 'code': str(problem['id']), 'name': name, 'full_score': problems_scores[name], }) if title is None: match = re.search( '<h2[^>]*class="accessible_elem"[^>]*>(?P<title>[^<]*)</h2>', page) title = match.group('title') match = re.search(r'"scoreboardData":(?P<data>\[[^\]]*\])', page, re.I) data = json.loads(match.group('data')) if pbar is None: match = re.search(r'"pagerData":(?P<data>{[^}]*})', page, re.I) pager = json.loads(match.group('data')) total = pager['total'] pbar = tqdm(total=total, desc='paging') for row in data: handle = str(row.pop('userID')) r = result.setdefault(handle, OrderedDict()) r['member'] = handle r['solving'] = row.pop('score') r['place'] = row.pop('rank') r['name'] = row.pop('profile')['name'] penalty = row.pop('penalty') if penalty: r['penalty'] = self.to_time(penalty) problems = r.setdefault('problems', {}) solved = 0 for k, v in row.pop('problemData').items(): verdict = v.get('result') if not verdict or verdict == 'none': continue p = problems.setdefault(k, {}) if verdict == 'accepted': p['result'] = '+' p['binary'] = True solved += 1 else: p['result'] = '0' p['verdict'] = verdict p['binary'] = False u = v.get('sourceURI') if v: p['url'] = urljoin(url, u) r['solved'] = {'solving': solved} pbar.update() total -= 1 if len(data) < limit: break offset += limit pbar.close() words = self.name.split() words.append(str(self.start_time.year)) for w in words: if w.lower() not in title.lower(): warnings.warn(f'"{w}" not in title "{title}"') if total: warnings.warn(f'{total} member(s) did not get') standings = { 'result': result, 'url': self.standings_url, 'problems': problems_info, } if re.search(r'\bfinals?\b', self.name, re.I): standings['options'] = { 'medals': [{ 'name': name, 'count': 1 } for name in ('gold', 'silver', 'bronze')] } return standings
def get_standings(self, users=None, statistics=None): result = {} writers = defaultdict(int) start_time = self.start_time.replace(tzinfo=None) if not self.standings_url and datetime.now() - start_time < timedelta( days=30): re_round_overview = re.compile( r''' (?:<td[^>]*> (?: [^<]*<a[^>]*href="(?P<url>[^"]*/stat[^"]*rd=(?P<rd>[0-9]+)[^"]*)"[^>]*>(?P<title>[^<]*)</a>[^<]*| (?P<date>[0-9]+\.[0-9]+\.[0-9]+) )</td>[^<]* ){2} ''', re.VERBOSE, ) for url in [ 'https://www.topcoder.com/tc?module=MatchList&nr=100500', 'https://community.topcoder.com/longcontest/stats/?module=MatchList&nr=100500', ]: page = REQ.get(url) matches = re_round_overview.finditer(str(page)) opt = 0.61803398875 for match in matches: date = datetime.strptime(match.group('date'), '%m.%d.%Y') if abs(date - start_time) < timedelta(days=2): title = match.group('title') intersection = len( set(title.split()) & set(self.name.split())) union = len( set(title.split()) | set(self.name.split())) iou = intersection / union if iou > opt: opt = iou self.standings_url = urljoin( url, match.group('url')) if not self.standings_url: raise InitModuleException('Not set standings url for %s' % self.name) url = self.standings_url + '&nr=100000042' page = REQ.get(url) result_urls = re.findall( r'<a[^>]*href="(?P<url>[^"]*)"[^>]*>Results</a>', str(page), re.I) if not result_urls: # marathon match match = re.search( '<[^>]*>Problem:[^<]*<a[^>]*href="(?P<href>[^"]*)"[^>]*>(?P<name>[^<]*)<', page) if not match: raise ExceptionParseStandings('not found problem') problem_name = match.group('name').strip() problems_info = [{ 'short': problem_name, 'url': urljoin(url, match.group('href').replace('&', '&')) }] rows = etree.HTML(page).xpath( "//table[contains(@class, 'stat')]//tr") header = None for row in rows: r = parsed_table.ParsedTableRow(row) if len(r.columns) < 8: continue values = [ c.value.strip().replace(u'\xa0', '') for c in r.columns ] if header is None: header = values continue d = OrderedDict(list(zip(header, values))) handle = d.pop('Handle').strip() d = self._dict_as_number(d) if 'rank' not in d or users and handle not in users: continue row = result.setdefault(handle, OrderedDict()) row.update(d) score = row.pop('final_score' if 'final_score' in row else 'provisional_score') row['member'] = handle row['place'] = row.pop('rank') row['solving'] = score row['solved'] = {'solving': 1 if score > 0 else 0} problems = row.setdefault('problems', {}) problem = problems.setdefault(problem_name, {}) problem['result'] = score history_index = values.index('submission history') if history_index: column = r.columns[history_index] href = column.node.xpath('a/@href') if href: problem['url'] = urljoin(url, href[0]) else: # single round match matches = re.finditer('<table[^>]*>.*?</table>', page, re.DOTALL) problems_sets = [] for match in matches: problems = re.findall( '<a[^>]*href="(?P<href>[^"]*c=problem_statement[^"]*)"[^>]*>(?P<name>[^/]*)</a>', match.group(), re.IGNORECASE, ) if problems: problems_sets.append([{ 'short': n, 'url': urljoin(url, u) } for u, n in problems]) problems_info = dict() if len(problems_sets) > 1 else list() for problems_set, result_url in zip(problems_sets, result_urls): url = urljoin(self.standings_url, result_url + '&em=1000000042') url = url.replace('&', '&') division = int(parse_qs(url)['dn'][0]) with PoolExecutor(max_workers=3) as executor: def fetch_problem(p): errors = set() for attempt in range(3): try: page = REQ.get(p['url'], time_out=30) match = re.search( '<a[^>]*href="(?P<href>[^"]*module=ProblemDetail[^"]*)"[^>]*>', page) page = REQ.get(urljoin(p['url'], match.group('href')), time_out=30) matches = re.findall( r'<td[^>]*class="statTextBig"[^>]*>(?P<key>[^<]*)</td>\s*<td[^>]*>(?P<value>.*?)</td>', page, re.DOTALL) # noqa for key, value in matches: key = key.strip().rstrip(':').lower() if key == 'categories': tags = [ t.strip().lower() for t in value.split(',') ] tags = [t for t in tags if t] if tags: p['tags'] = tags elif key.startswith( 'writer') or key.startswith( 'tester'): key = key.rstrip('s') + 's' p[key] = re.findall( '(?<=>)[^<>,]+(?=<)', value) for w in p.get('writers', []): writers[w] += 1 except Exception as e: errors.add( f'error parse problem info {p}: {e}') sleep(5**attempt) else: errors = None if errors: LOG.error(errors) return p for p in tqdm.tqdm(executor.map(fetch_problem, problems_set), total=len(problems_set)): d = problems_info if len(problems_sets) > 1: d = d.setdefault('division', OrderedDict()) d = d.setdefault('I' * division, []) d.append(p) if not users and users is not None: continue page = REQ.get(url) rows = etree.HTML(page).xpath("//tr[@valign='middle']") header = None url_infos = [] for row in rows: r = parsed_table.ParsedTableRow(row) if len(r.columns) < 10: continue values = [c.value for c in r.columns] if header is None: header = values continue d = OrderedDict(list(zip(header, values))) handle = d.pop('Coders').strip() d = self._dict_as_number(d) if users and handle not in users: continue row = result.setdefault(handle, OrderedDict()) row.update(d) if not row.get('new_rating') and not row.get( 'old_rating') and not row.get('rating_change'): row.pop('new_rating', None) row.pop('old_rating', None) row.pop('rating_change', None) row['member'] = handle row['place'] = row.pop('division_placed', None) row['solving'] = row['point_total'] row['solved'] = {'solving': 0} row['division'] = 'I' * division if 'adv.' in row: row['advanced'] = row.pop('adv.').lower().startswith( 'y') url_info = urljoin(url, r.columns[0].node.xpath('a/@href')[0]) url_infos.append(url_info) def fetch_solution(url): for i in range(2): try: page = REQ.get(url, time_out=60) match = re.search( '<td[^>]*class="problemText"[^>]*>(?P<solution>.*?)</td>', page, re.DOTALL | re.IGNORECASE) ret = html.unescape(match.group('solution')) ret = ret.strip() ret = ret.replace('<BR>', '\n') ret = ret.replace('\xa0', ' ') return ret except FailOnGetResponse: sleep(i * 10 + 3) return None def fetch_info(url): delay = 3 for _ in range(5): try: page = REQ.get(url) break except Exception: sleep(delay) delay *= 2 else: return None, None, None match = re.search( 'class="coderBrackets">.*?<a[^>]*>(?P<handle>[^<]*)</a>', page, re.IGNORECASE) handle = html.unescape(match.group('handle').strip()) match = re.search(r' Room\s*(?P<room>[0-9]+)', page) room = match.group('room') if match else None matches = re.finditer( r''' <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*c=problem_solution[^"]*)"[^>]*>(?P<short>[^<]*)</a>[^<]*</td>[^<]* <td[^>]*>[^<]*</td>[^<]* <td[^>]*>[^<]*</td>[^<]* <td[^>]*>(?P<time>[^<]*)</td>[^<]* <td[^>]*>(?P<status>[^<]*)</td>[^<]* <td[^>]*>(?P<result>[^<]*)</td>[^<]* ''', page, re.VERBOSE | re.IGNORECASE) problems = {} n_fetch_solution = 0 for match in matches: d = match.groupdict() short = d.pop('short') solution_url = urljoin(url, d['url']) d['url'] = solution_url d = self._dict_as_number(d) if d['status'] in [ 'Challenge Succeeded', 'Failed System Test' ]: d['result'] = -d['result'] if abs(d['result']) < 1e-9: d.pop('result') if re.match('^[0.:]+$', d['time']): d.pop('time') solution = (statistics or {}).get(handle, {}).get( 'problems', {}).get(short, {}).get('solution') if not solution: n_fetch_solution += 1 solution = fetch_solution(solution_url) d['solution'] = solution problems[short] = d challenges = [] matches = re.finditer( r''' <td[^>]*>[^<]*<a[^>]*href="[^"]*module=MemberProfile[^"]*"[^>]*>(?P<target>[^<]*)</a>[^<]*</td>[^<]* <td[^>]*>(?P<problem>[^<]*)</td>[^<]* <td[^>]*>(?P<status>[^<]*)</td>[^<]* <td[^>]*>(?P<time>[^<]*)</td>[^<]* <td[^>]*>(?P<result>[^<]*)</td>[^<]* <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*)"[^>]*>\s*details\s*</a>[^<]*</td>[^<]* ''', page, re.VERBOSE | re.IGNORECASE) for match in matches: d = match.groupdict() d = {k: v.strip() for k, v in d.items()} d['result'] = float(d['result'].replace(',', '.')) d['url'] = urljoin(url, d['url']) p = problems.setdefault(d['problem'], {}) p.setdefault('extra_score', 0) p['extra_score'] += d['result'] p.setdefault( 'extra_info', []).append(f'{d["target"]}: {d["result"]}') challenges.append(d) return url, handle, room, problems, challenges, n_fetch_solution with PoolExecutor(max_workers=20) as executor, tqdm.tqdm( total=len(url_infos)) as pbar: n_fetch_solution = 0 for url, handle, room, problems, challenges, n_sol in executor.map( fetch_info, url_infos): n_fetch_solution += n_sol pbar.set_description(f'div{division} {url}') pbar.set_postfix(n_solution=n_fetch_solution) pbar.update() if handle is not None: if handle not in result: LOG.error( f'{handle} not in result, url = {url}') result[handle]['url'] = url if room: result[handle]['room'] = room result[handle]['problems'] = problems result[handle]['challenges'] = challenges for p in problems.values(): if p.get('result', 0) > 1e-9: result[handle]['solved']['solving'] += 1 if challenges: h = result[handle].setdefault( 'hack', { 'title': 'challenges', 'successful': 0, 'unsuccessful': 0, }) for c in challenges: h['successful' if c['status'].lower() == 'yes' else 'unsuccessful'] += 1 standings = { 'result': result, 'url': self.standings_url, 'problems': problems_info, 'options': { 'fixed_fields': [('hack', 'Challenges')], }, } if writers: writers = [ w[0] for w in sorted( writers.items(), key=lambda w: w[1], reverse=True) ] standings['writers'] = writers if re.search(r'\bfinals?(?:\s+rounds?)?$', self.name, re.I): standings['options']['medals'] = [{ 'name': name, 'count': 1 } for name in ('gold', 'silver', 'bronze')] return standings
def get_standings(self, users=None, statistics=None): if not self.standings_url: self.standings_url = f'https://projecteuler.net/fastest={self.key}' user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' # noqa page = REQ.get(self.standings_url, headers={'User-Agent': user_agent}) sign_out = re.search('<form[^>]*action="sign_out"[^>]*>', page) if not sign_out: for attempt in range(20): while True: value = f'{random.random():.16f}' image_bytes = REQ.get(f'https://projecteuler.net/captcha/show_captcha.php?{value}') image_stream = io.BytesIO(image_bytes) image_rgb = Image.open(image_stream) text = pytesseract.image_to_string(image_rgb, config='--oem 0 --psm 13 digits') text = text.strip() if re.match('^[0-9]{5}$', text): break REQ.get('https://projecteuler.net/sign_in') page = REQ.submit_form( name='sign_in_form', action=None, data={ 'username': conf.PROJECTEULER_USERNAME, 'password': conf.PROJECTEULER_PASSWORD, 'captcha': text, 'remember_me': '1', }, ) match = re.search('<p[^>]*class="warning"[^>]*>(?P<message>[^<]*)</p>', page) if match: REQ.print(match.group('message')) else: break else: raise ExceptionParseStandings('Did not recognize captcha for sign in') page = REQ.get(self.standings_url) result = {} problem_name = self.name.split('.', 1)[1].strip() problems_info = [{'name': problem_name, 'url': self.url}] regex = '<table[^>]*>.*?</table>' html_table = re.search(regex, page, re.DOTALL) if html_table: table = parsed_table.ParsedTable(html_table.group(0)) for r in table: row = OrderedDict() row['solving'] = 1 for k, v in r.items(): if isinstance(v, list): place, country = v row['place'] = re.match('[0-9]+', place.value).group(0) country = first(country.column.node.xpath('.//@title')) if country: row['country'] = country elif k == 'Time To Solve': params = {} for x in v.value.split(', '): value, field = x.split() if field[-1] != 's': field += 's' params[field] = int(value) rel_delta = relativedelta(**params) now = timezone.now() delta = now - (now - rel_delta) row['penalty'] = f'{delta.total_seconds() / 60:.2f}' elif k == 'User': member = first(v.column.node.xpath('.//@title')) or v.value row['member'] = member else: row[k.lower()] = v.value problems = row.setdefault('problems', {}) problem = problems.setdefault(problem_name, {}) problem['result'] = '+' problem['binary'] = True row['_skip_for_problem_stat'] = True if 'member' not in row: continue result[row['member']] = row standings = { 'result': result, 'url': self.standings_url, 'problems': problems_info, } if len(result) < 100: delta = timezone.now() - self.start_time if delta < timedelta(days=1): standings['timing_statistic_delta'] = timedelta(minutes=60) elif delta < timedelta(days=30): standings['timing_statistic_delta'] = timedelta(days=1) return standings
def get_standings(self, users=None, statistics=None): if not hasattr(self, 'season'): year = self.start_time.year - (0 if self.start_time.month > 8 else 1) season = f'{year}-{year + 1}' else: season = self.season result = {} problems_info = OrderedDict() if not re.search('/[0-9]+/', self.standings_url): return {} url = self.standings_url n_page = 1 while True: page = REQ.get(url) match = re.search( '<table[^>]*class="[^"]*standings[^>]*>.*?</table>', page, re.MULTILINE | re.DOTALL) if not match: raise ExceptionParseStandings('Not found table standings') html_table = match.group(0) table = parsed_table.ParsedTable(html_table) for r in table: row = {} problems = row.setdefault('problems', {}) solved = 0 has_solved = False for k, v in list(r.items()): if 'table__cell_role_result' in v.attrs['class']: letter = k.split(' ', 1)[0] if letter == 'X': continue p = problems_info.setdefault(letter, {'short': letter}) names = v.header.node.xpath('.//span/@title') if len(names) == 1: p['name'] = names[0] p = problems.setdefault(letter, {}) n = v.column.node if n.xpath( 'img[contains(@class,"image_type_success")]'): res = '+' p['binary'] = True elif n.xpath( 'img[contains(@class,"image_type_fail")]'): res = '-' p['binary'] = False else: if ' ' not in v.value: problems.pop(letter) continue res = v.value.split(' ', 1)[0] p['result'] = res p['time'] = v.value.split(' ', 1)[-1] if 'table__cell_firstSolved_true' in v.attrs['class']: p['first_ac'] = True if '+' in res or res.startswith('100'): solved += 1 try: has_solved = has_solved or '+' not in res and float( res) > 0 except ValueError: pass elif 'table__cell_role_participant' in v.attrs['class']: title = v.column.node.xpath('.//@title') if title: name = title[0] else: name = v.value.replace(' ', '', 1) row['name'] = name row['member'] = name if ' ' not in name else f'{name} {season}' elif 'table__cell_role_place' in v.attrs['class']: row['place'] = v.value elif 'table__header_type_penalty' in v.attrs['class']: row['penalty'] = int( v.value) if v.value.isdigit() else v.value elif 'table__header_type_score' in v.attrs['class']: row['solving'] = int(round(float(v.value))) if has_solved: row['solved'] = {'solving': solved} result[row['member']] = row n_page += 1 match = re.search( f'<a[^>]*href="(?P<href>[^"]*standings[^"]*p[^"]*={n_page})"[^>]*>', page) if not match: break url = urljoin(url, match.group('href')) standings = { 'result': result, 'url': self.standings_url, 'problems': list(problems_info.values()), } return standings
def get_standings(self, users=None, statistics=None): page = REQ.get(self.COMPETITION_INFO_API_URL_) data = json.loads(page) for round_data in data['rounds']: match = re.search( r'start\s*date\s*(?:<b[^>]*>)?(?P<start_time>[^<]*)(?:</b>)?.*end\s*date', round_data['description'], re.IGNORECASE) start_time = parser.parse(match.group('start_time'), tzinfos={'CET': 'UTC+1'}) title = re.sub(r'\s+', ' ', round_data['name']) if start_time == self.start_time and title == self.name: break else: raise ExceptionParseStandings('not found round') m = re.search('maxPointsForProblem=(?P<score>[0-9]+)', round_data['description']) max_points_challenge_problem = int(m.group('score')) if m else None page = REQ.get(self.ROUND_INFO_API_URL_) round_infos = json.loads(page) for round_info in round_infos['roundDisplayInfo']: title = re.sub(r'\s+', ' ', round_info['displayName']) if title == self.name: break else: raise ExceptionParseStandings('not found round') default_problems_info = OrderedDict([(p['code'], { 'code': p['code'], 'short': chr(i + ord('A')), 'name': p['name'], 'url': self.PROBLEM_URL_.format(**p), }) for i, p in enumerate(round_data['problems'])]) if self.name.startswith('Round'): level = int(self.name.split()[-1]) if level in [1, 2]: for p in default_problems_info.values(): p['full_score'] = level d_problems_info = OrderedDict() result = dict() has_scoring = {} divisions_order = [] for cid, ctype in ( (round_infos['teamCompetitionPremierLeagueId'], 'Team'), (round_infos['teamCompetitionRisingStarsId'], 'Team'), (round_infos['teamCompetitionPremierLeagueId'], 'individual'), ): url = self.RESULTS_API_URL_.format(cid=cid, url=round_info['url'], ctype=ctype) page = REQ.get(url) data = json.loads(page) division = data['displayedName'].replace(self.name, '').strip().lower() if division not in divisions_order: divisions_order.append(division) problems_info = d_problems_info.setdefault( division, deepcopy(default_problems_info)) participaty_type = { 'Team': 'Team', 'individual': 'Competitor', }[ctype] sorted_data = sorted(data['standings'], key=lambda r: r['score'], reverse=True) division_result = dict() with PoolExecutor(max_workers=20) as executor, tqdm.tqdm( total=len(sorted_data)) as pbar: def fetch_team_results(d): member = str(d['id']) url = self.TEAM_RESULTS_URL_.format(cid=cid, uid=member, name=participaty_type) page = REQ.get(url) matches = re.finditer( r'<a[^>]*href="[^"]*/Problem/(?P<code>[^"/]*)">[^<]*(?:\s*<[^>]*>)*(?P<score>[.0-9]+)', page, ) problems = {} for m in matches: k = m['code'] if k not in problems_info: continue p = problems.setdefault(problems_info[k]['short'], {}) p['result'] = m['score'] matches = re.finditer( '<a[^>]*href="[^"]*/CompetitorResults/[^"]*/(?P<account>[0-9]+)/?">(?P<name>[^<]*)</a>', page, ) users = [m.groupdict() for m in matches] info = { 'problems': problems, 'url': url, 'member': member, } matches = re.finditer( r'<tr[^>]*>\s*<td[^>]*><b>(?P<key>[^<]*)</b></td>\s*<td[^>]*>(?P<value>[^<]*)</td>\s*</tr>', page, ) more_info = {} for m in matches: k = m.group('key').lower().replace(' ', '_') v = m.group('value') if not v: continue more_info[k] = v if more_info.get('name') and more_info.get('surname'): info['full_name'] = '{name} {surname}'.format( **more_info) if more_info.get('birth_year') == '0': more_info.pop('birth_year') for k in 'school', 'city', 'birth_year': if more_info.get(k): info[k] = more_info[k] return d, info, users place = None last = None for index, (r, row, users) in enumerate(executor.map( fetch_team_results, sorted_data), start=1): if last is None or abs(r['score'] - last) > 1e-7: place = index last = r['score'] row['name'] = r['name'] if users: row['_members'] = users row['place'] = place row['solving'] = r['score'] country = unquote(r['country']) country = re.sub(r'\s*\(.*$', '', country) row['country'] = country row['division'] = division if ctype == 'individual': row['_skip_for_problem_stat'] = True division_result[row['member']] = row pbar.update() if max_points_challenge_problem is not None: for code, problem_info in problems_info.items(): key = problem_info['short'] target = self.info.get('parse', {}).get('problems', {}).get(key, {}).get('target') if target is None: url = self.PROBLEM_API_URL_.format(**problem_info) if url not in has_scoring: page = REQ.get(url) data = json.loads(page) has_scoring[url] = bool( re.search(r'####\s*Scoring:\s+', data['statement'])) if has_scoring[url]: for r in division_result.values(): p = r['problems'].get(key, {}) if 'result' not in p: continue p['status'] = p.pop('result') continue problem_info['full_score'] = max_points_challenge_problem if target == 'minimize': func = min elif target == 'maximize': func = max else: raise ExceptionParseStandings( f'unknown target = {target}') opt = None for r in division_result.values(): res = r['problems'].get(key, {}).get('result') if res is None: continue res = float(res) if opt is None: opt = res else: opt = func(opt, res) for r in division_result.values(): p = r['problems'].get(key, {}) if 'result' not in p: continue p['status'] = p['result'] if opt is None or abs(opt) < 1e-9: p.pop('result') continue if target == 'minimize': coefficient = 1 - (1 - opt / float(p['result']))**.5 elif target == 'maximize': coefficient = 1 - (1 - float(p['result']) / opt)**.5 if coefficient < 1: p['partial'] = True p['result'] = round( max_points_challenge_problem * coefficient, 2) for r in division_result.values(): solved = 0 for p in r['problems'].values(): if not p.get('partial') and 'result' in p and float( p['result']) > 0: solved += 1 r['solved'] = {'solving': solved} result.update(division_result) standings_url = self.STANDING_URL_.format( cid=round_infos['teamCompetitionPremierLeagueId']) problem_info = { 'division': OrderedDict( ((d, list(ps.values())) for d, ps in d_problems_info.items())) } if len(problem_info['division']) == 1: problems_info = next(iter(problem_info['division'].values())) standings = { 'result': result, 'url': standings_url, 'problems': problem_info, 'divisions_order': divisions_order, 'hidden_fields': ['full_name', 'school', 'city', 'birth_year'], } return standings
def _api_get_standings(self, users=None, statistics=None): match = re.search('/([0-9a-f]{16})$', self.url) if not match: raise ExceptionParseStandings(f'Not found id in url = {self.url}') self.id = match.group(1) standings_url = self.url api_ranking_url_format = self.API_RANKING_URL_FORMAT_.format( **self.__dict__) api_attempts_url_format = self.API_ATTEMPTS_URL_FORMAT_.format( **self.__dict__) def encode(value): ret = base64.b64encode(value.encode()).decode() ret = ret.replace('+', '-') ret = ret.replace('/', '_') return ret def decode(code): code = code.replace('-', '+') code = code.replace('_', '/') code = re.sub(r'[^A-Za-z0-9\+\/]', '', code) code += '=' * ((4 - len(code) % 4) % 4) data = json.loads(base64.b64decode(code).decode()) return data def get(offset, num): query = f'{{"min_rank":{offset},"num_consecutive_users":{num}}}' url = api_ranking_url_format + encode(query) content = REQ.get(url) return decode(content) data = get(1, 1) problems_info = [{ 'url': os.path.join(self.url, task['id']), 'code': task['id'], 'name': task['title'], 'full_score': sum([test['value'] for test in task['tests']]) } for task in data['challenge']['tasks']] problems_info.sort(key=lambda t: (t['full_score'], t['name'])) problems_info = OrderedDict([(t['code'], t) for t in problems_info]) are_results_final = data['challenge']['are_results_final'] num_consecutive_users = 200 n_page = (data['full_scoreboard_size'] - 1) // num_consecutive_users + 1 def fetch_page(page): return get(page * num_consecutive_users + 1, num_consecutive_users) def fetch_attempts(handle): query = f'{{"nickname":{json.dumps(handle)},"include_non_final_results":true}}' url = api_attempts_url_format + encode(query) try: content = REQ.get(url) data = decode(content) except FailOnGetResponse: data = None return handle, data result = {} with PoolExecutor(max_workers=8) as executor: handles_for_getting_attempts = [] for data in tqdm.tqdm(executor.map(fetch_page, range(n_page)), total=n_page, desc='paging'): for row in data['user_scores']: if not row['task_info']: continue handle = row.pop('displayname') if users and handle not in users: continue r = result.setdefault(handle, {}) r['member'] = handle r['place'] = row.pop('rank') r['solving'] = row.pop('score_1') r['penalty'] = self.to_time(-row.pop('score_2') / 10**6) if '/round/' in self.url: query = encode(handle) url = self.url.replace( '/round/', '/submissions/').rstrip('/') + f'/{query}' r['url'] = url.rstrip('=') country = row.pop('country', None) if country: r['country'] = country solved = 0 problems = r.setdefault('problems', {}) for task_info in row['task_info']: tid = task_info['task_id'] p = problems.setdefault(tid, {}) if task_info['penalty_micros'] > 0: p['time'] = self.to_time( task_info['penalty_micros'] / 10**6) p['result'] = task_info['score'] if p['result'] and p['result'] != problems_info[tid][ 'full_score']: p['partial'] = True if task_info['penalty_attempts']: p['penalty'] = task_info['penalty_attempts'] solved += task_info['tests_definitely_solved'] r['solved'] = {'solving': solved} if statistics and handle in statistics and statistics[ handle].get('_with_subscores'): result[handle] = self.merge_dict( r, statistics.pop(handle)) else: handles_for_getting_attempts.append(handle) if are_results_final: for handle, data in tqdm.tqdm( executor.map(fetch_attempts, handles_for_getting_attempts), total=len(handles_for_getting_attempts), desc='attempting'): if data is None: continue challenge = data['challenge'] if not challenge.get('are_results_final'): break tasks = {t['id']: t for t in challenge['tasks']} row = result[handle] problems = row['problems'] for attempt in sorted(data['attempts'], key=lambda a: a['timestamp_ms']): task_id = attempt['task_id'] problem = problems.setdefault(task_id, {}) subscores = [] score = 0 for res, test in zip( attempt['judgement'].pop('results'), tasks[task_id]['tests']): if not test.get('value'): continue subscore = {'status': test['value']} if 'verdict' in res: subscore['result'] = res['verdict'] == 1 subscore['verdict'] = res['verdict__str'] else: subscore['verdict'] = res['status__str'] subscores.append(subscore) if res.get('verdict') == 1: score += test['value'] if score != problem.get('result'): continue problem['subscores'] = subscores problem['solution'] = attempt.pop( 'src_content').replace('\u0000', '') language = attempt.get('src_language__str') if language: problem['language'] = language if 'time' not in problem: delta_ms = attempt['timestamp_ms'] - challenge[ 'start_ms'] problem['time'] = self.to_time(delta_ms / 10**3) row['_with_subscores'] = True standings = { 'result': result, 'url': standings_url, 'problems': list(problems_info.values()), } return standings
def _hashcode(self, users=None, statistics=None): standings_url = None is_final_round = self.name.endswith('Final Round') data = None try: page = REQ.get( self.ARCHIVE_DATA_URL_FORMAT_.format( year=self.start_time.year)) data = json.loads(page) names = set() for data_round in data['rounds']: name = data_round['name'] if name in names: name = 'Qualification Round' if self.name.endswith(name) or name in [ 'Full ranking', 'Main round' ] and is_final_round: data = data_round['data'] standings_url = self.ARCHIVE_URL_FORMAT_.format( year=self.start_time.year) break names.add(name) else: data = None except FailOnGetResponse as e: if e.code != 404: raise e if not data: if 'hashcode_scoreboard' in self.info: page = REQ.get(self.info['hashcode_scoreboard']) data = json.loads(page) else: raise ExceptionParseStandings('Not found data') if 'columns' in data: columns = data['columns'] data = data['rows'] else: columns = None result = {} season = self.get_season() for rank, row in enumerate(data, start=1): if columns is not None: row = dict(zip(columns, row)) row = {k.lower().replace(' ', ''): v for k, v in row.items()} name = row.pop('teamname') name = unescape(name) member = f'{name}, {season}' if users is not None and name not in users: continue r = result.setdefault(member, {}) r['name'] = name r['member'] = member score = row.pop('score', '0') score = re.sub(r'[\s,]', '', str(score)) try: float(score) except Exception: score = '0' r['solving'] = score if 'rank' in row: r['place'] = row.pop('rank') else: r['place'] = rank if 'country' in row: r['_countries'] = re.sub(r',\s+', ',', row.pop('country')).split(',') elif 'countries' in row: r['_countries'] = row.pop('countries') if 'finalround' in row: r['advanced'] = row['finalround'] stime = row.get('submissiontime', {}).get('iMillis') if stime: r['time'] = self.to_time( stime / 1000 - self.start_time.timestamp(), 3) if 'hubid' in row: r['hub_id'] = row.pop('hubid') standings = { 'result': result, 'hidden_fields': ['hub_id'], 'problems': [], } if standings_url: standings['url'] = standings_url return standings
def get_standings(self, users=None, statistics=None): api_ranking_url_version = self.resource.info.get('statistics', {}).get( 'api_ranking_url_version', 'v2') resource = '{uri.scheme}://{uri.netloc}'.format(uri=urlparse(self.url)) infos = self.__dict__ infos['resource'] = resource url = self.API_RANKING_URL_FORMATS_[api_ranking_url_version].format( **infos) try: time.sleep(1) page = REQ.get(url) except FailOnGetResponse as e: if e.code == 404: return {'action': 'delete'} raise ExceptionParseStandings('not found api ranking url') data = json.loads(page) if 'data' in data and 'object' in data['data']: data = data['data']['object'] problems_info = [] for idx, p in enumerate(data.pop('problems'), start=1): info = { 'short': p.get('label', str(idx)), 'name': p['name'], 'code': p['code'], } info['url'] = self.PROBLEM_URL_.format(resource=resource, **info) if p.get('points'): info['full_score'] = p['points'] problems_info.append(info) result = {} prev = None skip = 0 handles_to_get_new_rating = [] has_rated = data.get('is_rated', True) and data.get('has_rating', True) has_rating = False rankings = data.pop('rankings') for r in rankings: for src, dst in ( ('points', 'score'), ('cumtime', 'cumulative_time'), ): if src in r: r[dst] = r.pop(src) rankings = sorted(rankings, key=lambda x: (-x['score'], x['cumulative_time'])) fields_types = {} hidden_fields = set() for index, r in enumerate(rankings, start=1): solutions = r.pop('solutions') if not any(solutions) and not r.get('new_rating'): skip += 1 continue handle = r.pop('user') row = result.setdefault(handle, collections.OrderedDict()) row['member'] = handle row['solving'] = r.pop('score') cumulative_time = r.pop('cumulative_time') if cumulative_time: row['penalty'] = self.to_time(cumulative_time) curr = (row['solving'], cumulative_time) if curr != prev: prev = curr rank = index - skip row['place'] = rank solved = 0 problems = row.setdefault('problems', {}) for prob, sol in zip(problems_info, solutions): if not sol: continue p = problems.setdefault(prob['short'], {}) if sol['points'] > 0 and prob.get('full_score'): p['partial'] = prob['full_score'] > sol['points'] p['result'] = sol.pop('points') t = sol.pop('time') if t: p['time'] = self.to_time(t) if p['result'] > 0 and not p.get('partial', False): solved += 1 r.pop('is_disqualified', None) r.pop('tiebreaker', None) row['old_rating'] = r.pop('old_rating', None) new_rating = r.pop('new_rating', None) if has_rated: row['rating_change'] = None row['new_rating'] = new_rating for k, v in r.items(): hidden_fields.add(k) if k.endswith('_time'): r[k] = arrow.get(v).timestamp fields_types.setdefault(k, ['time']) row.update({k: v for k, v in r.items() if k not in row}) row['solved'] = {'solving': solved} if has_rated: if row.get('new_rating') is not None: has_rating = True elif statistics is None or 'new_rating' not in statistics.get( handle, {}): handles_to_get_new_rating.append(handle) else: row['old_rating'] = statistics[handle].get('old_rating') row['new_rating'] = statistics[handle]['new_rating'] if has_rated and not has_rating and handles_to_get_new_rating: with ExitStack() as stack: executor = stack.enter_context(PoolExecutor(max_workers=8)) pbar = stack.enter_context( tqdm.tqdm(total=len(handles_to_get_new_rating), desc='getting new rankings')) @RateLimiter(max_calls=1, period=2) def fetch_data(handle): url = self.FETCH_USER_INFO_URL_.format( resource=resource, user=quote_plus(handle)) page = REQ.get(url) data = json.loads(page) return handle, data for handle, data in executor.map(fetch_data, handles_to_get_new_rating): rating = data.get('contests', {}).get('current_rating') if rating: result[handle].setdefault('info', {})['rating'] = rating contest_addition_update = {} for key, contest in data['contests']['history'].items(): rating = contest.get('rating') if not rating: continue if key == self.key: result[handle]['new_rating'] = rating else: contest_addition_update[ key] = collections.OrderedDict( (('new_rating', rating), )) result[handle][ 'contest_addition_update'] = contest_addition_update pbar.update() standings_url = self.url.rstrip( '/') + '/ranking/' if result else self.standings_url standings = { 'result': result, 'url': standings_url, 'problems': problems_info, 'fields_types': fields_types, 'hidden_fields': list(hidden_fields), } return standings
def get_standings(self, users=None, statistics=None): result = {} hidden_fields = [] fields_types = {} order = None writers = defaultdict(int) start_time = self.start_time.replace(tzinfo=None) if not self.standings_url and datetime.now() - start_time < timedelta(days=30): opt = 0.61803398875 def canonize_title(value): value = value.lower() value = re.sub(r'\s+-[^-]+$', '', value) value = re.sub(r'\bsingle\s+round\s+match\b', 'srm', value) value = re.sub(r'\bmarathon\s+match\b', 'mm', value) value = re.sub(r'[0-9]*([0-9]{2})\s*tco(\s+)', r'tco\1\2', value) value = re.sub(r'tco\s*[0-9]*([0-9]{2})(\s+)', r'tco\1\2', value) value = re.sub(r'^[0-9]{2}([0-9]{2})(\s+)', r'tco\1\2', value) return set(re.split('[^A-Za-z0-9]+', value)) def process_match(date, title, url): nonlocal opt if abs(date - start_time) > timedelta(days=2): return a1 = canonize_title(title) a2 = canonize_title(self.name) intersection = 0 for w1 in a1: for w2 in a2: if w1.isdigit() or w2.isdigit(): if w1 == w2: intersection += 1 break elif w1.startswith(w2) or w2.startswith(w1): intersection += 1 break union = len(a1) + len(a2) - intersection iou = intersection / union if iou > opt: opt = iou self.standings_url = url url = 'https://www.topcoder.com/tc?module=MatchList&nr=100500' page = REQ.get(url) re_round_overview = re.compile( r''' (?:<td[^>]*>(?: [^<]*<a[^>]*href="(?P<url>[^"]*/stat[^"]*rd=(?P<rd>[0-9]+)[^"]*)"[^>]*>(?P<title>[^<]*)</a>[^<]*| (?P<date>[0-9]+\.[0-9]+\.[0-9]+) )</td>[^<]*){2} ''', re.VERBOSE, ) matches = re_round_overview.finditer(str(page)) for match in matches: date = datetime.strptime(match.group('date'), '%m.%d.%Y') process_match(date, match.group('title'), urljoin(url, match.group('url'))) url = 'https://www.topcoder.com/tc?module=BasicData&c=dd_round_list' page = REQ.get(url) root = ET.fromstring(page) for child in root: data = {} for field in child: data[field.tag] = field.text date = dateutil.parser.parse(data['date']) url = 'https://www.topcoder.com/stat?c=round_overview&er=5&rd=' + data['round_id'] process_match(date, data['full_name'], url) for url in self.url, self.standings_url: if url: match = re.search('/challenges/(?P<cid>[0-9]+)', url) if match: challenge_id = match.group('cid') break else: challenge_id = None if challenge_id: # marathon match url = conf.TOPCODER_API_MM_URL_FORMAT.format(challenge_id) page = REQ.get(url) data = json.loads(page) problems_info = [] hidden_fields.extend(['time', 'submits', 'style']) fields_types = {'delta_rank': ['delta'], 'delta_score': ['delta']} order = ['place_as_int', '-solving', 'addition__provisional_rank', '-addition__provisional_score'] for row in data: handle = row.pop('member') r = result.setdefault(handle, OrderedDict()) r['member'] = handle r['place'] = row.pop('finalRank', None) r['provisional_rank'] = row.pop('provisionalRank', None) r['style'] = row.pop('style') if r['place'] and r['provisional_rank']: r['delta_rank'] = r['provisional_rank'] - r['place'] submissions = row.pop('submissions') has_solution = False for s in submissions: score = s.get('finalScore') if not score or score == '-': if 'provisional_score' not in r: p_score = s.pop('provisionalScore', None) if isinstance(p_score, str): p_score = asfloat(p_score) if p_score is not None: r['provisional_score'] = round(p_score, 2) if p_score >= 0 else False r['time'] = s['created'] has_solution = True continue r['solving'] = score r['solved'] = {'solving': int(score > 0)} p_score = s.pop('provisionalScore') if isinstance(p_score, str): p_score = asfloat(p_score) if p_score is not None and p_score > 0: r['provisional_score'] = round(p_score, 2) r['delta_score'] = round(score - p_score, 2) r['time'] = s['created'] has_solution = True break if not has_solution: continue r['submits'] = len(submissions) if not result: raise ExceptionParseStandings('empty standings') else: # single round match if not self.standings_url: raise InitModuleException('Not set standings url for %s' % self.name) url = self.standings_url + '&nr=100000042' page = REQ.get(url, time_out=100) result_urls = re.findall(r'<a[^>]*href="(?P<url>[^"]*)"[^>]*>Results</a>', str(page), re.I) if not result_urls: raise ExceptionParseStandings('not found result urls') dd_round_results = {} match = re.search('rd=(?P<rd>[0-9]+)', url) if match: rd = match.group('rd') url = f'https://www.topcoder.com/tc?module=BasicData&c=dd_round_results&rd={rd}' try: dd_round_results_page = REQ.get(url) root = ET.fromstring(dd_round_results_page) for child in root: data = {} for field in child: data[field.tag] = field.text handle = data.pop('handle') dd_round_results[handle] = self._dict_as_number(data) except FailOnGetResponse: pass hidden_fields.extend(['coding_phase', 'challenge_phase', 'system_test', 'point_total', 'room']) matches = re.finditer('<table[^>]*>.*?</table>', page, re.DOTALL) problems_sets = [] for match in matches: problems = re.findall( '<a[^>]*href="(?P<href>[^"]*c=problem_statement[^"]*)"[^>]*>(?P<name>[^/]*)</a>', match.group(), re.IGNORECASE, ) if problems: problems_sets.append([ {'short': n, 'url': urljoin(url, u)} for u, n in problems ]) problems_info = dict() if len(problems_sets) > 1 else list() for problems_set, result_url in zip(problems_sets, result_urls): url = urljoin(self.standings_url, result_url + '&em=1000000042') url = url.replace('&', '&') division = int(parse_qs(url)['dn'][0]) division_str = 'I' * division with PoolExecutor(max_workers=3) as executor: def fetch_problem(p): errors = set() for attempt in range(3): try: page = REQ.get(p['url'], time_out=30) match = re.search('<a[^>]*href="(?P<href>[^"]*module=ProblemDetail[^"]*)"[^>]*>', page) page = REQ.get(urljoin(p['url'], match.group('href')), time_out=30) matches = re.findall(r'<td[^>]*class="statTextBig"[^>]*>(?P<key>[^<]*)</td>\s*<td[^>]*>(?P<value>.*?)</td>', page, re.DOTALL) # noqa for key, value in matches: key = key.strip().rstrip(':').lower() if key == 'categories': tags = [t.strip().lower() for t in value.split(',')] tags = [t for t in tags if t] if tags: p['tags'] = tags elif key.startswith('writer') or key.startswith('tester'): key = key.rstrip('s') + 's' p[key] = re.findall('(?<=>)[^<>,]+(?=<)', value) for w in p.get('writers', []): writers[w] += 1 info = p.setdefault('info', {}) matches = re.finditer('<table[^>]*paddingTable2[^>]*>.*?</table>', page, re.DOTALL) for match in matches: html_table = match.group(0) rows = parsed_table.ParsedTable(html_table) for row in rows: key, value = None, None for k, v in row.items(): if k == "": key = v.value elif k and division_str in k.split(): value = v.value if key and value: key = re.sub(' +', '_', key.lower()) info[key] = value if key == 'point_value': value = toint(value) or asfloat(value) if value is not None: p['full_score'] = value except Exception as e: errors.add(f'error parse problem info {p}: {e}') sleep(5 + attempt) else: errors = None if errors: LOG.error(errors) return p for p in tqdm.tqdm(executor.map(fetch_problem, problems_set), total=len(problems_set)): d = problems_info if len(problems_sets) > 1: d = d.setdefault('division', OrderedDict()) d = d.setdefault(division_str, []) d.append(p) if not users and users is not None: continue page = REQ.get(url) rows = etree.HTML(page).xpath("//tr[@valign='middle']") header = None url_infos = [] for row in rows: r = parsed_table.ParsedTableRow(row) if len(r.columns) < 10: continue values = [c.value for c in r.columns] if header is None: header = values continue d = OrderedDict(list(zip(header, values))) handle = d.pop('Coders').strip() d = self._dict_as_number(d) if users and handle not in users: continue row = result.setdefault(handle, OrderedDict()) row.update(d) if not row.get('new_rating') and not row.get('old_rating') and not row.get('rating_change'): row.pop('new_rating', None) row.pop('old_rating', None) row.pop('rating_change', None) row['member'] = handle row['place'] = row.pop('division_placed', None) row['solving'] = row['point_total'] row['solved'] = {'solving': 0} row['division'] = 'I' * division if 'adv.' in row: row['advanced'] = row.pop('adv.').lower().startswith('y') url_info = urljoin(url, r.columns[0].node.xpath('a/@href')[0]) url_infos.append(url_info) def fetch_solution(url): for i in range(2): try: page = REQ.get(url, time_out=60) match = re.search('<td[^>]*class="problemText"[^>]*>(?P<solution>.*?)</td>', page, re.DOTALL | re.IGNORECASE) if not match: break ret = html.unescape(match.group('solution')) ret = ret.strip() ret = ret.replace('<BR>', '\n') ret = ret.replace('\xa0', ' ') return ret except FailOnGetResponse: sleep(i * 10 + 3) return None n_failed_fetch_info = 0 def fetch_info(url): nonlocal n_failed_fetch_info if n_failed_fetch_info > 10: return delay = 10 for _ in range(5): try: page = REQ.get(url, time_out=delay) match = re.search('class="coderBrackets">.*?<a[^>]*>(?P<handle>[^<]*)</a>', page, re.IGNORECASE) if match: break except Exception: sleep(delay + _) else: n_failed_fetch_info += 1 return handle = html.unescape(match.group('handle').strip()) match = re.search(r' Room\s*(?P<room>[0-9]+)', page) room = match.group('room') if match else None matches = re.finditer(r''' <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*c=problem_solution[^"]*)"[^>]*>(?P<short>[^<]*)</a>[^<]*</td>[^<]* <td[^>]*>[^<]*</td>[^<]* <td[^>]*>[^<]*</td>[^<]* <td[^>]*>(?P<time>[^<]*)</td>[^<]* <td[^>]*>(?P<status>[^<]*)</td>[^<]* <td[^>]*>(?P<result>[^<]*)</td>[^<]* ''', page, re.VERBOSE | re.IGNORECASE) problems = {} n_fetch_solution = 0 for match in matches: d = match.groupdict() short = d.pop('short') solution_url = urljoin(url, d['url']) d['url'] = solution_url d = self._dict_as_number(d) if d['status'] in ['Challenge Succeeded', 'Failed System Test']: d['result'] = -d['result'] if abs(d['result']) < 1e-9: d.pop('result') if re.match('^[0.:]+$', d['time']): d.pop('time') else: time_in_seconds = 0 for t in d['time'].split(':'): time_in_seconds = time_in_seconds * 60 + asfloat(t) d['time_in_seconds'] = time_in_seconds solution = (statistics or {}).get(handle, {}).get('problems', {}).get(short, {}).get('solution') if not solution: n_fetch_solution += 1 solution = fetch_solution(solution_url) d['solution'] = solution problems[short] = d challenges = [] matches = re.finditer(r''' <td[^>]*>[^<]*<a[^>]*href="[^"]*module=MemberProfile[^"]*"[^>]*>(?P<target>[^<]*)</a>[^<]*</td>[^<]* <td[^>]*>(?P<problem>[^<]*)</td>[^<]* <td[^>]*>(?P<status>[^<]*)</td>[^<]* <td[^>]*>(?P<time>[^<]*)</td>[^<]* <td[^>]*>(?P<result>[^<]*)</td>[^<]* <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*)"[^>]*>\s*details\s*</a>[^<]*</td>[^<]* ''', page, re.VERBOSE | re.IGNORECASE) for match in matches: d = match.groupdict() d = {k: v.strip() for k, v in d.items()} d['result'] = float(d['result'].replace(',', '.')) d['url'] = urljoin(url, d['url']) p = problems.setdefault(d['problem'], {}) p.setdefault('extra_score', 0) p['extra_score'] += d['result'] p.setdefault('extra_info', []).append(f'{d["target"]}: {d["result"]}') challenges.append(d) return url, handle, room, problems, challenges, n_fetch_solution with PoolExecutor(max_workers=20) as executor, tqdm.tqdm(total=len(url_infos)) as pbar: n_fetch_solution = 0 for info in executor.map(fetch_info, url_infos): if info is None: continue url, handle, room, problems, challenges, n_sol = info n_fetch_solution += n_sol pbar.set_description(f'div{division} {url}') pbar.set_postfix(n_solution=n_fetch_solution, n_failed_fetch_info=n_failed_fetch_info) pbar.update() if handle is not None: if handle not in result: LOG.error(f'{handle} not in result, url = {url}') row = result[handle] row['url'] = url if room: row['room'] = room row['problems'] = problems row['challenges'] = challenges for p in problems.values(): if p.get('result', 0) > 1e-9: row['solved']['solving'] += 1 if challenges: h = row.setdefault('hack', { 'title': 'challenges', 'successful': 0, 'unsuccessful': 0, }) for c in challenges: h['successful' if c['status'].lower() == 'yes' else 'unsuccessful'] += 1 if dd_round_results: fields = set() hidden_fields_set = set(hidden_fields) for data in result.values(): for field in data.keys(): fields.add(field) k_mapping = {'new_vol': 'new_volatility', 'advanced': None} for handle, data in dd_round_results.items(): if handle not in result: continue row = result[handle] for k, v in data.items(): k = k_mapping.get(k, k) if k and k not in fields: if k in {'new_rating', 'old_rating'} and not v: continue row[k] = v if k not in hidden_fields_set: hidden_fields_set.add(k) hidden_fields.append(k) ks = k.split('_') if ks[0] == 'level' and ks[-1] == 'language' and v and v.lower() != 'unspecified': idx = {'one': 0, 'two': 1, 'three': 2}.get(ks[1], None) d = problems_info if len(problems_sets) > 1: d = d['division'][row['division']] if idx is not None and 0 <= idx < len(d) and d[idx]['short'] in row['problems']: row['problems'][d[idx]['short']]['language'] = v standings = { 'result': result, 'url': self.standings_url, 'problems': problems_info, 'hidden_fields': hidden_fields, 'fields_types': fields_types, 'options': { 'fixed_fields': [('hack', 'Challenges')], }, } if writers: writers = [w[0] for w in sorted(writers.items(), key=lambda w: w[1], reverse=True)] standings['writers'] = writers if re.search(r'\bfinals?(?:\s+rounds?)?$', self.name, re.I): standings['options']['medals'] = [{'name': name, 'count': 1} for name in ('gold', 'silver', 'bronze')] if order: standings['options']['order'] = order return standings
def get_standings(self, users=None, statistics=None): year = int(re.search(r'\b[0-9]{4}\b', self.key).group(0)) season = '%d-%d' % (year - 1, year) icpc_standings_url = f'https://icpc.global/community/results-{year}' icpc_api_standings_url = f'https://icpc.global/api/help/cms/virtpublic/community/results-{year}' standings_urls = [] if not self.standings_url: for url in ( f'http://static.kattis.com/icpc/wf{year}/', f'https://zibada.guru/finals/{year}/', f'http://web.archive.org/web/{year}/https://icpc.baylor.edu/scoreboard/', f'http://web.archive.org/web/{year}/https://icpc.global/scoreboard/', f'https://cphof.org/standings/icpc/{year}', icpc_api_standings_url, ): try: page = REQ.get(url) except FailOnGetResponse: continue if 'web.archive.org' in REQ.last_url and f'/{year}' not in REQ.last_url: continue if not re.search( rf'\b(world\s*finals\s*{year}|{year}\s*world\s*finals)\b', page, re.IGNORECASE): continue standings_urls.append(url) else: if self.standings_url == icpc_standings_url: standings_urls.append(icpc_api_standings_url) else: standings_urls.append(self.standings_url) if not standings_urls: raise ExceptionParseStandings( f'Not found standings url year = {year}') for standings_url in standings_urls: is_icpc_api_standings_url = standings_url == icpc_api_standings_url page = REQ.get(standings_url) result = {} hidden_fields = set(self.info.get('hidden_fields', [])) | {'region'} problems_info = OrderedDict() if 'zibada' in standings_url: match = re.search(r' = (?P<data>[\{\[].*?);?\s*$', page, re.MULTILINE) if match: names = self._json_load(match.group('data')) else: names = None try: page = REQ.get('standings.js') match = re.search(r' = (?P<data>\{.*?);?\s*$', page, re.MULTILINE) data = self._json_load(match.group('data')) except Exception: assert names data = names for p_name in data['problems']: problems_info[p_name] = {'short': p_name} events = data.pop('events', None) if events: teams = {} time_divider = 60 events.sort(key=lambda e: int(e.split()[-1])) for e in events: tid, p_name, status, attempt, time = e.split() time = int(time) team = teams.setdefault(tid, {}) problems = team.setdefault('problems', {}) result = problems.get(p_name, {}).get('result', '') if not result.startswith('?') and status.startswith( '?'): continue if status == '+': attempt = int(attempt) - 1 p_info = problems_info[p_name] problems[p_name] = { 'time': time, 'result': '+' if status == '+' and attempt == 0 else f'{status}{attempt}', } for tid, team in teams.items(): name = names[int(tid)][0] name = html.unescape(name) team['member'] = f'{name} {season}' team['name'] = name penalty = 0 solving = 0 for p_name, problem in team.get('problems', {}).items(): if problem['result'].startswith('+'): solving += 1 attempt_penalty = (int( problem['result'].lstrip('+') or 0)) * 20 * time_divider penalty += problem['time'] + attempt_penalty team['penalty'] = int(round(penalty / time_divider)) team['solving'] = solving else: teams = {} time_divider = 1 data_teams = data['teams'] if isinstance(data_teams, dict): data_teams = data_teams.values() for team in data_teams: row = {} def get(key, index): return team[key] if isinstance( team, dict) else team[index] name = get('name', 0) name = html.unescape(name) row['member'] = f'{name} {season}' row['name'] = name row['solving'] = int(get('score', 2)) row['penalty'] = int(get('time', 3)) if isinstance(team, dict): team['problems'] = [ team[str(index)] for index in range(len(data['problems'])) ] problems = row.setdefault('problems', {}) for p_name, verdict in zip(data['problems'], get('problems', 4)): if not verdict: continue if isinstance(verdict, dict): verdict = {k[0]: v for k, v in verdict.items()} verdict['a'] = int(verdict['a']) if isinstance(verdict.get('p'), int): verdict['a'] += verdict['p'] if isinstance(verdict['s'], str): verdict['s'] = int(verdict['s']) status = '+' if verdict['s'] else ( '?' if verdict.get('p', False) else '-') time = verdict['t'] result = verdict['a'] time_divider = 1000 * 60 if not result: continue else: status, result = verdict.split(' ', 1) if ' ' in result: result, time = result.split() time = int(time) else: time = None result = int(result) problem = problems.setdefault(p_name, {}) if status == '+': problem['time'] = time problem[ 'result'] = '+' if result == 1 else f'+{result - 1}' else: problem['result'] = f'{status}{result}' teams[row['member']] = row teams = list(teams.values()) teams.sort(key=lambda t: (t['solving'], -t['penalty']), reverse=True) rank = 0 prev = None for i, t in enumerate(teams): curr = (t['solving'], t['penalty']) if prev != curr: rank = i + 1 prev = curr t['place'] = rank result = {t['member']: t for t in teams} problems_info = OrderedDict(sorted(problems_info.items())) else: if is_icpc_api_standings_url: page = re.sub( r'</table>\s*<table>\s*(<tr[^>]*>\s*<t[^>]*>)', r'\1', page, flags=re.I) regex = '''(?:<table[^>]*(?:id=["']standings|class=["']scoreboard)[^>]*>|"content":"[^"]*<table[^>]*>|<table[^>]*class="[^"]*(?:table[^"]*){3}"[^>]*>).*?</table>''' # noqa match = re.search(regex, page, re.DOTALL) if match: html_table = match.group(0) table = parsed_table.ParsedTable( html_table, with_not_full_row=is_icpc_api_standings_url) else: table = [] time_divider = 1 last_place = None honorables = [] for r in table: row = {} problems = row.setdefault('problems', {}) for k, vs in r.items(): if isinstance(vs, list): v = ' '.join(i.value for i in vs if i.value) else: v = vs.value k = k.lower().strip('.') v = v.strip() if honorables: if v: honorables.append(v) continue if k in ('rank', 'rk', 'place'): if not isinstance(vs, list): medal = vs.column.node.xpath('.//img/@alt') if medal and medal[0].endswith('medal'): row['medal'] = medal[0].split()[0] if v and not v[0].isdigit(): honorables.append(v) row['place'] = v elif k in ('team', 'name', 'university'): if isinstance(vs, list): for el in vs: logo = el.column.node.xpath('.//img/@src') if logo: logo = urllib.parse.urljoin( standings_url, logo[0]) row.setdefault('info', {})['logo'] = logo break for el in vs: region = el.column.node.xpath( './/*[@class="badge badge-warning"]') if region: region = ''.join([ s.strip() for s in region[0].xpath('text()') ]) if region: row['region'] = region if 'cphof' in standings_url: member = vs.column.node.xpath( './/a/text()')[0].strip() row['member'] = f'{member} {season}' else: row['member'] = f'{v} {season}' row['name'] = v elif k in ('time', 'penalty', 'total time (min)', 'minutes'): if v: row['penalty'] = int(v) elif k in ('slv', 'solved', '# solved'): row['solving'] = int(v) elif k == 'score': if ' ' in v: row['solving'], row['penalty'] = map( int, v.split()) else: row['solving'] = int(v) elif len(k) == 1: k = k.title() if k not in problems_info: problems_info[k] = {'short': k} if 'title' in vs.header.attrs: problems_info[k]['name'] = vs.header.attrs[ 'title'] v = re.sub(r'([0-9]+)\s+([0-9]+)\s+tr.*', r'\2 \1', v) v = re.sub('tr[a-z]*', '', v) v = re.sub('-*', '', v) v = v.strip() if not v: continue p = problems.setdefault(k, {}) if '+' in v: v = v.replace(' ', '') p['result'] = f'?{v}' elif ' ' in v: pnt, time = map(int, v.split()) p['result'] = '+' if pnt == 1 else f'+{pnt - 1}' p['time'] = time if ('solvedfirst' in vs.column.attrs.get( 'class', '' ) or vs.column.node.xpath( './/*[contains(@class, "score_first")]' )): p['first_ac'] = True else: p['result'] = f'-{v}' if row.get('place'): last_place = row['place'] elif last_place: row['place'] = last_place if 'member' not in row or row['member'].startswith(' '): continue result[row['member']] = row elements = etree.HTML(page).xpath( '//div[@class="card-header"]/following-sibling::div[@class="card-body"]//li' ) # noqa for el in elements: name = ''.join([s.strip() for s in el.xpath('text()')]) member = f'{name} {season}' row = result.setdefault(member, { 'member': member, 'name': name }) logo = el.xpath('./img/@src') if logo: row.setdefault('info', {})['logo'] = urllib.parse.urljoin( standings_url, logo[0]) while el is not None: prv = el.getprevious() if prv is not None and prv.tag == 'div' and prv.get( 'class') == 'card-header': break el = el.getparent() if el is not None: region = ''.join( [s.strip() for s in prv.xpath('text()')]) row['region'] = region if result and honorables: for name in honorables: if 'honorable' in name.lower(): continue row = dict(name=name, member=f'{name} {season}') result[row['member']] = row if not result: continue if statistics: for team, row in result.items(): stat = statistics.get(team) if not stat: continue for k, v in stat.items(): if k not in row: hidden_fields.add(k) row[k] = v if any(['region' not in r for r in result.values()]): try: url = f'https://icpc.global/api/team/wf/{year}/published' page = REQ.get(url, time_out=60) data = self._json_load(page) except Exception: traceback.print_exc() data = None if data: def canonize_name(name): name = name.lower() name = name.replace('&', ' and ') name = re.sub(r'\s{2,}', ' ', name) name = re.split(r'(?:\s-\s|\s-|-\s|,\s)', name) name = tuple(sorted([n.strip() for n in name])) return name matching = {} for key, row in result.items(): name = row['name'] matching.setdefault(name, key) name = canonize_name(name) matching.setdefault(name, key) for site in data: region = site['siteName'] for team in site['teams']: name = team['university'] if name not in matching: name = canonize_name(name) if name not in matching: name = tuple( sorted(name + canonize_name(team['name']))) if name not in matching: logger.warning(f'Not found team = {name}') else: row = result[matching[name]] row['region'] = region for k, v in team.items(): k = k.lower() if k not in row: hidden_fields.add(k) row[k] = v first_ac_of_all = None for team in result.values(): for p_name, problem in team.get('problems', {}).items(): p_info = problems_info[p_name] if not problem['result'].startswith('+'): continue time = problem['time'] if 'first_ac' not in p_info or time < p_info['first_ac']: p_info['first_ac'] = time if first_ac_of_all is None or time < first_ac_of_all: first_ac_of_all = time if problem.get('first_ac'): p_info['has_first_ac'] = True for team in result.values(): for p_name, problem in team.get('problems', {}).items(): p_info = problems_info[p_name] if problem['result'].startswith('+'): if p_info.get('has_first_ac' ) and not problem.get('first_ac'): continue if problem['time'] == p_info['first_ac']: problem['first_ac'] = True if problem['time'] == first_ac_of_all: problem['first_ac_of_all'] = True if 'time' in problem: problem['time'] = int( round(problem['time'] / time_divider)) without_medals = any(p['result'].startswith('?') for row in result.values() for p in row.get('problems', {}).values()) options = {'per_page': None} if not without_medals: medals = self._get_medals(year) if medals: medals = [{ 'name': k, 'count': v } for k, v in medals.items()] options['medals'] = medals standings = { 'result': result, 'url': icpc_standings_url if is_icpc_api_standings_url else standings_url, 'problems': list(problems_info.values()), 'options': options, 'hidden_fields': list(hidden_fields), } return standings raise ExceptionParseStandings( f'Not found standings url from {standings_urls}')
def get_standings(self, users=None, statistics=None): slug = self.url.rstrip('/').rsplit('/', 1)[-1] config_url = self.CONFIG_URL_FORMAT_.format(slug=slug) page = REQ.get(config_url) config_data = json.loads(page) style = config_data['contest']['style'].upper() jid = config_data['contest']['jid'] url = self.API_STANDINGS_URL_FORMAT_.format(jid=jid) page = REQ.get(url) data = json.loads(page) users_profiles_map = data['profilesMap'] problems_url = self.API_PROBLEMS_URL_FORMAT_.format(jid=jid) problems_data = json.loads(REQ.get(problems_url)) problems_info = [] state = data['data']['scoreboard']['state'] for idx, (code, short, problem_data) in enumerate( zip(state['problemJids'], state['problemAliases'], problems_data['data'])): problem_data.update( problems_data['problemsMap'][problem_data['problemJid']]) title = problem_data['titlesByLanguage'][ problem_data['defaultLanguage']] info = { 'name': title, 'code': problem_data['slug'], 'short': short, } if state['problemPoints']: info['full_score'] = state['problemPoints'][idx] elif problem_data['points']: info['full_score'] = problem_data['points'] info['url'] = self.PROBLEM_URL_FORMAT_.format(url=self.url, short=info['short']) problems_info.append(info) result = {} if users is None or users: rows = data['data']['scoreboard']['content']['entries'] handles_to_get_new_rating = [] has_old_rating = False for row in rows: cjid = row['contestantJid'] if cjid not in users_profiles_map: continue user = users_profiles_map[cjid] handle = user['username'] r = result.setdefault(handle, collections.OrderedDict()) r['member'] = handle r['place'] = row.pop('rank') if user.get('country'): r['country'] = user['country'] if style == 'ICPC': r['penalty'] = row.pop('totalPenalties') r['solving'] = row.pop('totalAccepted') elif style == 'GCJ' or style == 'TROC': penalty = row.pop('totalPenalties') r['penalty'] = f'{penalty // 60:02d}:{penalty % 60:02d}' r['solving'] = row.pop('totalPoints') elif style == 'IOI': r['solving'] = row.pop('totalScores') else: raise ExceptionParseStandings(f'style = {style}') problems = r.setdefault('problems', {}) solving = 0 if style == 'IOI': for idx, score in enumerate(row['scores']): if score is None: continue k = problems_info[idx]['short'] p = problems.setdefault(k, {}) p['result'] = score p['partial'] = problems_info[idx].get( 'full_score', 100) > score if not p['partial']: solving += 1 else: for idx, (attempt, penalty, pstate) in enumerate( zip(row['attemptsList'], row['penaltyList'], row['problemStateList'])): if not attempt: continue k = problems_info[idx]['short'] p = problems.setdefault(k, {}) if pstate: solving += 1 p['result'] = f"+{'' if attempt == 1 else attempt - 1}" p['time'] = f'{penalty // 60:02d}:{penalty % 60:02d}' else: p['result'] = f"-{attempt}" if pstate == 2: p['first_ac'] = True if not problems: result.pop(handle) continue if state['problemPoints'] or style == 'IOI': r['solved'] = {'solving': solving} r['old_rating'] = (user.get('rating') or {}).get('publicRating') if r['old_rating'] is not None: has_old_rating = True if statistics is None or 'new_rating' not in statistics.get( handle, {}): handles_to_get_new_rating.append(handle) else: r['new_rating'] = statistics[handle]['new_rating'] if not has_old_rating: for r in result.values(): r.pop('old_rating') with ExitStack() as stack: executor = stack.enter_context(PoolExecutor(max_workers=8)) pbar = stack.enter_context( tqdm.tqdm(total=len(handles_to_get_new_rating), desc='getting new rankings')) def fetch_data(handle): url = self.API_HISTORY_URL_FORMAT_.format(handle=handle) data = json.loads(REQ.get(url)) return handle, data for handle, data in executor.map(fetch_data, handles_to_get_new_rating): max_begin_time = -1 for contest in data['data']: if contest['rating']: rating = contest['rating']['publicRating'] if contest['contestJid'] == jid: result[handle]['new_rating'] = rating info = data['contestsMap'][contest['contestJid']] if info['beginTime'] > max_begin_time: result[handle]['info'] = {'rating': rating} max_begin_time = info['beginTime'] pbar.update() standings = { 'result': result, 'url': self.STANDING_URL_FORMAT_.format(self), 'problems': problems_info, } return standings
def get_standings(self, users=None, statistics=None): if not self.standings_url: page = REQ.get(urljoin(self.url, '/')) for name in ( 'Соревнования', 'Тренировочные олимпиады', ): match = re.search( '<a[^>]*href="(?P<url>[^"]*)"[^>]*>{}<'.format(name), page) url = match.group('url') page = REQ.get(url) match = re.search( '{}.*?<a[^>]*href="(?P<url>[^"]*)"[^>]*>{}<'.format( re.escape(self.name), 'Результаты прошедших тренировок'), page, re.DOTALL, ) if not match: raise ExceptionParseStandings('Not found standing url') url = match.group('url') page = REQ.get(url) date = self.start_time.strftime('%Y-%m-%d') matches = re.findall( r''' <tr[^>]*>[^<]*<td[^>]*>{}</td>[^<]* <td[^>]*>(?P<title>[^<]*)</td>[^<]* <td[^>]*>[^<]*<a[^>]*href\s*=["\s]*(?P<url>[^">]*)["\s]*[^>]*> '''.format(date), page, re.MULTILINE | re.VERBOSE) urls = [(title, urljoin(url, u)) for title, u in matches] if len(urls) > 1: urls = [( title, urljoin(url, u) ) for title, u in matches if not re.search( r'[0-9]\s*-\s*[0-9].*(?:[0-9]\s*-\s*[0-9].*\bкл\b|школа)', title, re.I)] if not urls: raise ExceptionParseStandings('Not found standing url') if len(urls) > 1: ok = True urls_set = set() for _, u in urls: page = REQ.get(u) path = re.findall( '<td[^>]*nowrap><a[^>]*href="(?P<href>[^"]*)"', page) if len(path) < 2: ok = False parent = urljoin(u, path[-2]) urls_set.add(parent) if len(urls_set) > 1: _, url = urls[0] elif not ok: raise ExceptionParseStandings('Too much standing url') else: url = urls_set.pop() else: _, url = urls[0] page = REQ.get(url) self.standings_url = REQ.last_url else: page = REQ.get(self.standings_url) def get_table(page): html_table = re.search( '<table[^>]*bgcolor="silver"[^>]*>.*?</table>', page, re.MULTILINE | re.DOTALL).group(0) table = parsed_table.ParsedTable(html_table) return table table = get_table(page) problems_info = OrderedDict() max_score = defaultdict(float) scoring = False result = {} for r in table: row = OrderedDict() problems = row.setdefault('problems', {}) for k, v in list(r.items()): if k == 'Имя': href = v.column.node.xpath('a/@href') if not href: continue uid = re.search('[0-9]+$', href[0]).group(0) row['member'] = uid row['name'] = v.value elif k == 'Место': row['place'] = v.value elif k == 'Время': row['penalty'] = int(v.value) elif k in ['Сумма', 'Задачи']: row['solving'] = float(v.value) elif re.match('^[a-zA-Z0-9]+$', k): problems_info[k] = {'short': k} if v.value: p = problems.setdefault(k, {}) p['result'] = v.value if v.value and v.value[0] not in ['-', '+']: scoring = True try: max_score[k] = max(max_score[k], float(v.value)) except ValueError: pass elif k: row[k.strip()] = v.value.strip() elif v.value.strip().lower() == 'log': href = v.column.node.xpath('.//a/@href') if href: row['url'] = urljoin(self.standings_url, href[0]) result[row['member']] = row if scoring: match = re.search( r'<b[^>]*>\s*<a[^>]*href="(?P<url>[^"]*)"[^>]*>ACM</a>\s*</b>', page) if match: page = REQ.get(match.group('url')) table = get_table(page) for r in table: uid = None for k, v in list(r.items()): if k == 'Имя': href = v.column.node.xpath('a/@href') if not href: continue uid = re.search('[0-9]+$', href[0]).group(0) elif re.match('^[a-zA-Z0-9]+$', k) and uid and v.value: if v.value[0] == '-': result[uid]['problems'][k]['partial'] = True elif v.value[0] == '+': result[uid]['problems'][k]['partial'] = False problems_info[k]['full_score'] = result[uid][ 'problems'][k]['result'] for r in result.values(): solved = 0 for k, p in r['problems'].items(): if p.get('partial'): continue score = p['result'] if score.startswith( '+') or 'partial' in p and not p['partial']: solved += 1 else: try: score = float(score) except ValueError: continue if abs(max_score[k] - score) < 1e-9 and score > 0: solved += 1 r['solved'] = {'solving': solved} standings = { 'result': result, 'url': self.standings_url, 'problems': list(problems_info.values()), } return standings
def get_standings(self, users=None, statistics=None): standings_url = (self.url + '/standings').replace( 'contests', 'contest') is_gym = '/gym/' in self.url result = {} for unofficial in [False, True]: params = { 'contestId': self.cid, 'showUnofficial': str(unofficial).lower(), } if users: params['handles'] = ';'.join(users) try: data = _query( method='contest.standings', params=params, api_key=self.api_key, ) except FailOnGetResponse as e: if getattr(e.args[0], 'code', None) == 400: return {'action': 'delete'} raise ExceptionParseStandings(e.args[0]) if data['status'] != 'OK': raise ExceptionParseStandings(data['status']) phase = data['result']['contest'].get('phase', 'FINISHED').upper() contest_type = data['result']['contest']['type'].upper() duration_seconds = data['result']['contest'].get('durationSeconds') result_problems = data['result']['problems'] problems_info = OrderedDict() for p in result_problems: d = {'short': p['index'], 'name': p['name']} if 'points' in p: d['full_score'] = p['points'] elif contest_type == 'IOI': d['full_score'] = 100 d['url'] = urljoin(standings_url.rstrip('/'), f"problem/{d['short']}") problems_info[d['short']] = d grouped = any('teamId' in row['party'] for row in data['result']['rows']) for row in data['result']['rows']: party = row['party'] if is_gym and not party['members']: is_ghost_team = True name = party['teamName'] party['members'] = [{ 'handle': f'{name} {self.get_season()}', 'name': name, }] else: is_ghost_team = False for member in party['members']: if is_gym: upsolve = False else: upsolve = party['participantType'] != 'CONTESTANT' if unofficial != upsolve: continue handle = member['handle'] r = result.setdefault(handle, OrderedDict()) r['member'] = handle if 'room' in party: r['room'] = str(party['room']) r.setdefault('participant_type', []).append(party['participantType']) if is_ghost_team: r['name'] = member['name'] r['_no_update_name'] = True elif grouped and (not upsolve and not is_gym or 'name' not in r): r['name'] = ', '.join(m['handle'] for m in party['members']) if 'teamId' in party: r['team_id'] = party['teamId'] r['name'] = f"{party['teamName']}: {r['name']}" r['_no_update_name'] = True hack = row['successfulHackCount'] unhack = row['unsuccessfulHackCount'] problems = r.setdefault('problems', {}) for i, s in enumerate(row['problemResults']): k = result_problems[i]['index'] points = float(s['points']) n = s.get('rejectedAttemptCount') if n is not None and contest_type == 'ICPC' and points + n > 0: points = f'+{"" if n == 0 else n}' if points > 0 else f'-{n}' u = upsolve if s['type'] == 'FINAL' and (points or n): if not points: points = f'-{n}' p = {'result': points} if contest_type == 'IOI': full_score = problems_info[k].get('full_score') if full_score: p['partial'] = points < full_score if 'bestSubmissionTimeSeconds' in s: time = s['bestSubmissionTimeSeconds'] if time > duration_seconds: u = True else: time /= 60 p['time'] = '%02d:%02d' % (time / 60, time % 60) a = problems.setdefault(k, {}) if u: a['upsolving'] = p else: a.update(p) if row['rank'] and not upsolve: r['place'] = row['rank'] r['solving'] = row['points'] if contest_type == 'ICPC': r['penalty'] = row['penalty'] r['solving'] = int(round(r['solving'])) if hack or unhack: r['hack'] = { 'title': 'hacks', 'successful': hack, 'unsuccessful': unhack, } try: params.pop('showUnofficial') data = _query( method='contest.ratingChanges', params=params, api_key=self.api_key, ) if data and data['status'] == 'OK': for row in data['result']: if str(row.pop('contestId')) != self.key: continue handle = row.pop('handle') if handle not in result: continue r = result[handle] old_rating = row.pop('oldRating') new_rating = row.pop('newRating') r['old_rating'] = old_rating r['new_rating'] = new_rating except FailOnGetResponse: pass def to_score(x): return (1 if x.startswith('+') or float(x) > 0 else 0) if isinstance(x, str) else x def to_solve(x): return not x.get('partial', False) and to_score(x.get('result', 0)) > 0 for r in result.values(): upsolving = 0 solving = 0 upsolving_score = 0 for a in r['problems'].values(): if 'upsolving' in a and to_solve(a['upsolving']) > to_solve(a): upsolving_score += to_score(a['upsolving']['result']) upsolving += to_solve(a['upsolving']) else: solving += to_solve(a) r.setdefault('solving', 0) r['upsolving'] = upsolving_score if abs(solving - r['solving']) > 1e-9 or abs(upsolving - r['upsolving']) > 1e-9: r['solved'] = { 'solving': solving, 'upsolving': upsolving, } standings = { 'result': result, 'url': standings_url, 'problems': list(problems_info.values()), 'options': { 'fixed_fields': [('hack', 'Hacks')], }, } if phase != 'FINISHED': standings['timing_statistic_delta'] = timedelta(minutes=10) return standings
def get_standings(self, users=None, statistics=None): page = REQ.get(self.COMPETITION_INFO_API_URL_) data = json.loads(page) for round_data in data['rounds']: match = re.search( r'start\s*date\s*(?:<b[^>]*>)?(?P<start_time>[^<]*)(?:</b>)?.*end\s*date', round_data['description'], re.IGNORECASE) start_time = parser.parse(match.group('start_time'), tzinfos={'CET': 'UTC+1'}) if start_time == self.start_time and round_data[ 'name'] == self.name: break else: raise ExceptionParseStandings('not found round') m = re.search('maxPointsForProblem=(?P<score>[0-9]+)', round_data['description']) max_points_challenge_problem = int(m.group('score')) if m else None page = REQ.get(self.ROUND_INFO_API_URL_) round_infos = json.loads(page) for round_info in round_infos['roundDisplayInfo']: if round_info['displayName'] == self.name: break else: raise ExceptionParseStandings('not found round') problems_info = collections.OrderedDict([(p['code'], { 'code': p['code'], 'name': p['name'], 'url': self.PROBLEM_URL_.format(**p), }) for p in round_data['problems']]) if self.name.startswith('Round'): level = int(self.name.split()[-1]) if level in [1, 2]: for p in problems_info.values(): p['full_score'] = level result = dict() divisions_order = [] for cid, ctype in ( (round_infos['teamCompetitionPremierLeagueId'], 'Team'), (round_infos['teamCompetitionRisingStarsId'], 'Team'), (round_infos['teamCompetitionPremierLeagueId'], 'individual'), ): url = self.RESULTS_API_URL_.format(cid=cid, url=round_info['url'], ctype=ctype) page = REQ.get(url) data = json.loads(page) division = data['displayedName'].replace(self.name, '').strip().lower() if division not in divisions_order: divisions_order.append(division) participaty_type = { 'Team': 'Team', 'individual': 'Competitor', }[ctype] sorted_data = sorted(data['standings'], key=lambda r: r['score'], reverse=True) max_points = collections.defaultdict(int) division_result = dict() with PoolExecutor(max_workers=20) as executor, tqdm.tqdm( total=len(sorted_data)) as pbar: def fetch_team_results(d): member = str(d['id']) url = self.TEAM_RESULTS_URL_.format(cid=cid, uid=member, name=participaty_type) page = REQ.get(url) matches = re.finditer( r'<a[^>]*href="[^"]*/Problem/(?P<code>[^"/]*)">[^<]*(?:\s*<[^>]*>)*(?P<score>[.0-9]+)', page) problems = {} for m in matches: k = m['code'] if k not in problems_info: continue p = problems.setdefault(k, {}) p['result'] = m['score'] users = re.findall( '<a[^>]*href="[^"]*/CompetitorResults/[^"]*">([^<]*)</a>', page) info = { 'problems': problems, 'url': url, 'member': member, } return d, info, users place = None last = None for index, (r, row, users) in enumerate(executor.map( fetch_team_results, sorted_data), start=1): if last is None or abs(r['score'] - last) > 1e-7: place = index last = r['score'] row['name'] = r['name'] if users: row['name'] += f': {", ".join(users)}' row['place'] = place row['solving'] = r['score'] row['country'] = unquote(r['country']).split()[0] row['division'] = division if ctype == 'individual': row['_skip_for_problem_stat'] = True division_result[row['member']] = row for k, p in row['problems'].items(): max_points[k] = max(max_points[k], float(p['result'])) pbar.update() if max_points_challenge_problem is not None: for code, value in max_points.items(): if code != round_data['problems'][-1][ 'code'] and value <= 2: continue problems_info[code][ 'full_score'] = max_points_challenge_problem for r in division_result.values(): if code in r['problems']: p = r['problems'][code] p['status'] = p['result'] k = 1 - (1 - float(p['result']) / value)**.5 if k < 1: p['partial'] = True p['result'] = round( max_points_challenge_problem * k, 2) for r in division_result.values(): solved = 0 for p in r['problems'].values(): if not p.get('partial') and float(p['result']) > 0: solved += 1 r['solved'] = {'solving': solved} result.update(division_result) standings_url = self.STANDING_URL_.format( cid=round_infos['teamCompetitionPremierLeagueId']) standings = { 'result': result, 'url': standings_url, 'problems': list(problems_info.values()), 'divisions_order': divisions_order, } return standings
def get_standings(self, users=None, statistics=None): if not self.standings_url: raise ExceptionParseStandings('not standings url') def get_results(standings_url, division_data): page = REQ.get(standings_url) page_format = division_data.get('format') if page_format == 'json': data = json.loads(page) scores_field = None if 'problems' in data: scores_field = 'problem' elif 'tournaments' in data: scores_field = 'tournament' if scores_field: scores_fields_mapping = {'submission': 'T', 'request': 'R'} scores_mapping = OrderedDict() for score in data[f'{scores_field}s']: name = str(score[f'{scores_field}Id']) scores_mapping[name] = scores_fields_mapping.get(name, name.split(':')[-1]) table = [] for team in data['teams']: row = OrderedDict() row['name'] = team['team']['teamName'] row['solving'] = team['score'] row['country'] = team['team']['customData']['country'] if scores_field: problems = row.setdefault('_scores', OrderedDict()) scores = team[f'{scores_field}s'] for field, out in scores_mapping.items(): if field in scores: problems[out] = as_number(scores.get(field, {}).get('score')) table.append(row) else: mapping = { 'Rank': 'place', '': 'place', 'Score': 'solving', 'score': 'solving', 'Total Score': 'solving', 'Team': 'name', 'name': 'name', 'score + unspent LAM': 'unspent_lam', } xpath = division_data.get('xpath', '//table//tr') table = parsed_table.ParsedTable(html=page, header_mapping=mapping, xpath=xpath) season = self.get_season() ret = {} was_place = False for r in table: row = OrderedDict() for k, v in r.items(): was_place = was_place or k == 'place' if isinstance(v, parsed_table.ParsedTableValue): v = v.value if k == 'name': row['name'] = v row['member'] = f'{v} {season}' else: row[k] = as_number(v) if k in {'place', 'solving'} else v ret[row['member']] = row if not was_place: place = None last = None for idx, row in enumerate(sorted(ret.values(), key=lambda r: r['solving'], reverse=True), start=1): if row['solving'] != last: last = row['solving'] place = idx row['place'] = place return ret fields_types = {} results = {} divisions = self.info.get('standings', {}).get('divisions', []) divisions_order = [] divisions_fields_types = defaultdict(OrderedDict) for division_data in divisions: division = division_data['name'] division_results = get_results(division_data['standings_url'], division_data) medals = [] for medal in division_data.get('medals', []): medals += [medal['name']] * medal['count'] for handle, result in division_results.items(): default = OrderedDict(member=result.pop('member'), name=result['name']) row = results.setdefault(handle, default) place_as_int = toint(result.get('place')) if place_as_int is not None and place_as_int <= len(medals): medal = medals[place_as_int - 1] result['medal'] = medal result['_medal_title_field'] = '_' result['_'] = f'{division.title()} {medal.title()}' scores = result.pop('_scores', {}) if divisions_order: prev_division = divisions_order[-1] reverse_mapping = {'place': 'rank', 'solving': 'score'} for k, v in list(result.items()): if k in 'medal' and k not in row: for f in 'medal', '_medal_title_field', '_': row[f] = result[f] if k in {'name', 'medal'} or k.startswith('_'): continue if k in {'place', 'solving'}: new_k = f'{division}_{reverse_mapping.get(k, k)}' row[new_k] = v try: prev_val = row['_division_addition'][prev_division][k] ref_k = f'{prev_division}_{reverse_mapping.get(k, k)}' result[ref_k] = prev_val divisions_fields_types[division].setdefault(ref_k, []) val = float(prev_val) - float(v) val = int(val) if int(val) == val else val if k == 'place': val = -val field = f'{new_k}_delta' row[field] = val field_types = fields_types.setdefault(field, []) if 'delta' not in field_types: field_types.append('delta') field = f'{ref_k}_delta' result[field] = val field_types = divisions_fields_types[division].setdefault(field, []) if 'delta' not in field_types: field_types.append('delta') except Exception: pass else: row.update(scores) row.update(result) division_addition = row.setdefault('_division_addition', {}).setdefault(division, OrderedDict()) division_addition.update(scores) division_addition.update(result) divisions_order.append(division) for value in results.values(): for division, row in value.get('_division_addition', {}).items(): for k, v in row.items(): field_types = divisions_fields_types[division].setdefault(k, []) field_type = type(v).__name__ if field_type not in field_types: field_types.append(field_type) for idx, division_data in enumerate(divisions): division = division_data['name'] disable_fields = division_data.get('disable_fields', []) for field in disable_fields: divisions_fields_types[division].pop(field, None) if idx == 0: for row in results.values(): for field in disable_fields: row.pop(field, None) return dict( result=results, fields_types=fields_types, divisions_addition={k: dict(fields=list(fields_types.keys()), fields_types=fields_types) for k, fields_types in divisions_fields_types.items()}, divisions_order=divisions_order, )
def get_standings(self, users=None, statistics=None): if not self.standings_url: raise ExceptionParseStandings('Not set stnadings url') is_final = self.name.lower().startswith('final round') now = datetime.utcnow().replace(tzinfo=pytz.utc) if not is_final and self.end_time + timedelta(days=3) < now: raise ExceptionParseStandings('Too late') page = REQ.get(self.standings_url) html_table = re.search('<table[^>]*>.*?</table>', page, re.MULTILINE | re.DOTALL).group(0) table = parsed_table.ParsedTable(html_table, as_list=True, ignore_wrong_header_number=False, ignore_display_none=True) problems_info = OrderedDict() result = {} season = self.get_season() advanced = False for r in table: if isinstance(r, parsed_table.ParsedTableRow): if re.search(r'qualification\s*threshold', r.columns[0].value, re.I): advanced = True for row in result.values(): row['advanced'] = True continue row = OrderedDict() problems = row.setdefault('problems', {}) if advanced: row['advanced'] = False pid = 0 for k, v in r: if k == '#': row['place'] = v.value elif k == 'Name': row['name'] = v.value elif k.startswith('Total'): row['solving'] = v.value elif '_top_column' in v.header.attrs: problem_key = str(pid) if problem_key not in problems_info: name = v.header.attrs['_top_column'].value p_info = {'code': problem_key} p_info_regex = r'^(?P<name>.*)\s+\(?(?P<score>[0-9]{2,})\)?$' match = re.search(p_info_regex, name) if match: name = match.group('name').strip() match = re.search(p_info_regex, k) if match: p_info['subname'] = match.group('name').strip() p_info['full_score'] = int(match.group('score')) p_info['name'] = name href = v.header.node.xpath('a/@href') if href: p_info['suburl'] = href[0] p_info['url'] = href[0] problems_info[problem_key] = p_info if v.value: try: val = float(v.value) if val: p = problems.setdefault(problem_key, {}) p['result'] = v.value full_score = problems_info[problem_key].get( 'full_score') if full_score is not None: p['partial'] = val < full_score else: style = v.attrs.get('style') if style: if 'yellow' in style: p['partial'] = True elif 'lightgreen' in style: p['partial'] = False if full_score is None: problems_info[problem_key][ 'full_score'] = int( round(val, 0)) except ValueError: pass pid += 1 else: row.setdefault('_info', {})[k] = v.value if not problems: continue handle = row['name'] + ' ' + season row['member'] = handle if handle in result: continue result[handle] = row standings = { 'result': result, 'problems': list(problems_info.values()), } if is_final: standings['options'] = { 'medals': [{ 'name': k, 'count': 1 } for k in ('gold', 'silver', 'bronze')] } return standings
def get_standings(self, users=None, statistics=None): standings_url = self.standings_url standings_url = re.sub('.*/(http.*)', r'\1', standings_url) web_archive_url = self.info.get('parse', {}).get('web_archive_url') if web_archive_url: web_archive_url = re.sub('/http.*', '/', web_archive_url) standings_url = web_archive_url + standings_url passed = datetime.utcnow().replace(tzinfo=pytz.utc) - self.end_time > timedelta(days=30) if not web_archive_url and passed: raise ExceptionParseStandings('Long time passed') total_num_pages = None codename = self.name.split('.')[0] @RateLimiter(max_calls=10, period=1) def fetch_table(page): nonlocal web_archive_url nonlocal total_num_pages nonlocal standings_url url = standings_url if n_page > 1: url += f'/page/{page}' if not web_archive_url: url += '?locale=en' page = Statistic.get(url) match = re.search('<title>[^<]*-(?P<name>[^<]*)</title>', page) if codename not in match.group('name'): return if total_num_pages is None: matches = re.findall( '<span[^>]*class="[^"]*page-index[^"]*"[^>]*pageindex="([0-9]+)"[^>]*>', page, re.I, ) if matches: total_num_pages = int(matches[-1]) regex = '''<table[^>]*class="[^>]*table[^>]*"[^>]*>.*?</table>''' match = re.search(regex, page, re.DOTALL) table = parsed_table.ParsedTable( match.group(0), header_mapping={ '№': '#', 'Участник': 'Participant', 'Бои': 'Games', 'Игры': 'Games', 'Побед': 'Won', 'Рейтинг': 'Rating', 'Язык': 'Language', }, ) return table result = {} n_page = 1 ok = True last_rating = None while ok and (not users or len(users) != len(result)): ok = False table = fetch_table(n_page) if table is None: break for row in table: ok = True r = OrderedDict() participant = row.pop('Participant') member = participant.value if member in result or users and member not in users: continue r['member'] = member if not web_archive_url: r['info'] = {'avatar': participant.column.node.xpath('.//img/@src')[0]} url = participant.column.node.xpath('.//a/@href')[0] r['url'] = urllib.parse.urljoin(standings_url, url) r['place'] = int(row.pop('#').value) score = int(row.pop('Rating').value) r['solving'] = score r['delta'] = last_rating - score if last_rating is not None else '' last_rating = score if 'Language' in row: classes = row.pop('Language').column.node.xpath('.//*[contains(@class, "lc")]/@class') if classes: prefix = 'LangIc-' language = None for cls in classes[0].split(): if cls.startswith(prefix): language = cls[len(prefix):] if language: r['language'] = Statistic.LANGUAGES_MAPPING.get(language, language) if 'Games' in row: n_games = row.pop('Games').value.split()[-1] if n_games != '0': r['games'] = n_games if 'Won' in row: p_won = row.pop('Won').value if p_won != '-': r['won'] = p_won row.pop('Δ', None) for k, v in list(row.items()): r[k.strip().lower()] = v.value result[member] = r n_page += 1 if total_num_pages is None or n_page > total_num_pages: break def fetch_rating(row): member = row['member'] if not statistics or member not in statistics: return user_id = statistics[member].get('_user_id') if not user_id: page = Statistic.get(row['url']) match = re.search(r'userId\s*:\s*(?P<user_id>[0-9]+)', page) user_id = match.group('user_id') row['_user_id'] = user_id post = { 'action': 'getRatingChanges', 'userId': user_id, 'mode': 'ALL', 'csrf_token': csrf_token, } page = Statistic.get('/data/ratingChangeDataPage', post=post) rating_changes = json.loads(page) rating_data = {} ratings = rating_changes.get('ratingChanges') if ratings: ratings = json.loads(ratings) rating_data['ratings'] = ratings if ratings and len(ratings) > 1: ema = 0 prev = None alpha = 0.1 for rating in ratings: if prev is not None: ema += ((rating['rating'] - prev) - ema) * alpha prev = rating['rating'] row[f'delta_ema={alpha}'] = f'{ema:.2f}' if not passed: row['new_rating'] = ratings[-1]['rating'] submissions = rating_changes.get('submissions') if submissions: submissions = json.loads(submissions) rating_data['submissions'] = submissions row['created'] = Statistic.norm_timestamp(submissions[0]['time']) row['updated'] = Statistic.norm_timestamp(submissions[-1]['time']) row['version'] = len(submissions) rating_data_str = json.dumps(rating_data) rating_data_zip = zlib.compress(rating_data_str.encode('utf-8')) rating_data_b64 = b64encode(rating_data_zip).decode('ascii') row['_rating_data'] = rating_data_b64 if not web_archive_url and '/1/' in self.standings_url: match = re.search('<meta[^>]*name="x-csrf-token"[^>]*content="(?P<token>[^"]*)"[^>]*>', REQ.last_page, re.I) csrf_token = match.group('token') with PoolExecutor(max_workers=8) as executor: for _ in tqdm.tqdm(executor.map(fetch_rating, result.values()), desc='ratings'): pass ret = { 'result': result, 'url': standings_url, 'fields_types': {'updated': ['timestamp'], 'created': ['timestamp']}, 'hidden_fields': ['new_rating', 'version', 'created'], } if self.name.endswith('Finals'): ret['options'] = { 'medals': [ {'name': 'gold', 'count': 1}, {'name': 'silver', 'count': 1}, {'name': 'bronze', 'count': 1}, {'name': 'honorable', 'count': 3}, ] } return ret
def get_standings(self, users=None, statistics=None): # REQ.get('https://www.codechef.com/') # try: # form = REQ.form() # form['post'].update({ # 'name': self._username, # 'pass': self._password, # }) # page = REQ.get(form['url'], post=form['post']) # form = REQ.form() # if form['url'] == '/session/limit': # for field in form['unchecked'][:-1]: # form['post'][field['name']] = field['value'].encode('utf8') # page = REQ.get(form['url'], post=form['post']) # except Exception: # pass url = self.API_CONTEST_URL_FORMAT_.format(**self.__dict__) page = REQ.get(url) data = json.loads(page) if data['status'] != 'success': raise ExceptionParseStandings(json.dumps(data)) if 'child_contests' in data: contest_infos = { d['contest_code']: { 'division': k } for k, d in data['child_contests'].items() } else: contest_infos = {self.key: {}} result = {} problems_info = dict() if len(contest_infos) > 1 else list() for key, contest_info in contest_infos.items(): url = self.STANDINGS_URL_FORMAT_.format(key=key) page = REQ.get(url) match = re.search( '<input[^>]*name="csrfToken"[^>]*id="edit-csrfToken"[^>]*value="([^"]*)"', page) csrf_token = match.group(1) n_page = 0 per_page = 150 n_total_page = None pbar = None contest_type = None while n_total_page is None or n_page < n_total_page: n_page += 1 time.sleep(2) url = self.API_RANKING_URL_FORMAT_.format(key=key, page=n_page, per_page=per_page) if users: urls = [f'{url}&search={user}' for user in users] else: urls = [url] for url in urls: delay = 10 for _ in range(10): try: headers = { 'x-csrf-token': csrf_token, 'x-requested-with': 'XMLHttpRequest', } page = REQ.get(url, headers=headers) data = json.loads(page) assert data.get('status') != 'rate_limit_exceeded' break except Exception: traceback.print_exc() delay = min(300, delay * 2) sys.stdout.write(f'url = {url}\n') sys.stdout.write(f'Sleep {delay}... ') sys.stdout.flush() time.sleep(delay) sys.stdout.write('Done\n') else: raise ExceptionParseStandings( f'Failed getting {n_page} by url {url}') if 'status' in data and data['status'] != 'success': raise ExceptionParseStandings(json.dumps(data)) unscored_problems = data['contest_info'][ 'unscored_problems'] if n_total_page is None: for p in data['problems']: if p['code'] in unscored_problems: continue d = problems_info if 'division' in contest_info: d = d.setdefault('division', OrderedDict()) d = d.setdefault(contest_info['division'], []) d.append({ 'short': p['code'], 'name': p['name'], 'url': f"https://www.codechef.com/problems/{p['code']}", }) n_total_page = data['availablePages'] pbar = tqdm.tqdm(total=n_total_page * len(urls)) contest_type = data['contest_info'].get('type') for d in data['list']: handle = d.pop('user_handle') d.pop('html_handle', None) problems_status = d.pop('problems_status') if d['score'] < 1e-9 and not problems_status: LOG.warning(f'Skip handle = {handle}: {d}') continue row = result.setdefault(handle, {}) row['member'] = handle row['place'] = d.pop('rank') row['solving'] = d.pop('score') problems = row.setdefault('problems', {}) solved, upsolved = 0, 0 if problems_status: for k, v in problems_status.items(): t = 'upsolving' if k in unscored_problems else 'result' v[t] = v.pop('score') solved += 1 if v.get('result', 0) > 0 else 0 upsolved += 1 if v.get('upsolving', 0) > 0 else 0 if contest_type == '1' and 'penalty' in v: penalty = v.pop('penalty') if v[t] > 0: v[t] = f'+{"" if penalty == 0 else penalty}' else: v[t] = f'-{penalty}' problems[k] = v row['solved'] = { 'solving': solved, 'upsolving': upsolved } country = d.pop('country_code') if country: d['country'] = country row.update(d) row.update(contest_info) pbar.set_description(f'key={key} url={url}') pbar.update() has_penalty = False for row in result.values(): p = row.get('penalty') has_penalty = has_penalty or p and str(p) != "0" if not has_penalty: for row in result.values(): row.pop('penalty', None) if pbar is not None: pbar.close() standings = { 'result': result, 'url': self.url, 'problems': problems_info, } return standings