def get_standings(self, users=None, statistics=None): try: page = REQ.get(self.url) except FailOnGetResponse as e: return {'action': 'delete'} if e.code == 404 else {} match = re.search('<table[^>]*past_event_rating[^>]*>.*?</table>', page, re.DOTALL) if not match: raise ExceptionParseStandings('not found table') header_mapping = { 'Team': 'name', 'Place': 'place', 'CTF points': 'solving', } table = parsed_table.ParsedTable(html=match.group(0), header_mapping=header_mapping) results = {} max_score = 0 for r in table: row = OrderedDict() for k, v in r.items(): k = k.strip('*') k = k.strip(' ') value = ' '.join([c.value for c in v]).strip() if isinstance( v, list) else v.value if k == 'name': href = v.column.node.xpath('.//a/@href')[0] match = re.search('/([0-9]+)/?$', href) row['member'] = match.group(1) row['name'] = value else: value = as_number(value) row[k] = value max_score = max(max_score, row.get('solving', 0)) results[row['member']] = row if max_score > 0: for row in results.values(): if 'solving' in row: row['percent'] = f'{row["solving"] * 100 / max_score:.2f}' has_medals = not re.search(r'\bqual', self.name, flags=re.I) and re.search( r'\bfinal', self.name, flags=re.I) medals = [{'name': 'gold', 'count': 1}] if has_medals else [] return dict( standings_url=self.url, result=results, options={'medals': medals}, )
def get_handle(row): url = row['_profile_url'] if 'university' in url: row['_skip'] = True if url in statistics_profiles_urls: stat = statistics_profiles_urls[url] for k, v in stat.items(): if k not in row: row[k] = v if '_member' in row and '_info' in row: row['member'] = row['_member'] row['info'] = row['_info'] return row page = REQ.get(url) info = row.setdefault('info', {}) if 'university' in url: handle = unquote(urlparse(url).path) handle = handle.strip('/') handle = handle.replace('/', ':') row['member'] = handle else: match = re.search( '<link[^>]*rel="canonical"[^>]*href="[^"]*/profile/(?P<handle>[^"]*)"[^>]*>', page) handle = match.group('handle') row['member'] = handle match = re.search( r'>[^<]*prize[^<]*money[^<]*(?:<[^>]*>)*[^<]*\$(?P<val>[.0-9]+)', page, re.IGNORECASE) if match: info['prize_money'] = as_number(match.group('val')) match = re.search( r'>country:</[^>]*>(?:\s*<[^>]*>)*\s*<a[^>]*href="[^"]*/country/(?P<country>[^"]*)"', page, re.IGNORECASE) if match: info['country'] = match.group('country') match = re.search('<h3[^>]*>(?P<name>[^>]*)<', page) info['name'] = match.group('name').strip() row['_member'] = row['member'] row['_info'] = dict(info) return row
def get_results(standings_url, division_data): page = REQ.get(standings_url) page_format = division_data.get('format') if page_format == 'json': data = json.loads(page) scores_field = None if 'problems' in data: scores_field = 'problem' elif 'tournaments' in data: scores_field = 'tournament' if scores_field: scores_fields_mapping = {'submission': 'T', 'request': 'R'} scores_mapping = OrderedDict() for score in data[f'{scores_field}s']: name = str(score[f'{scores_field}Id']) scores_mapping[name] = scores_fields_mapping.get(name, name.split(':')[-1]) table = [] for team in data['teams']: row = OrderedDict() row['name'] = team['team']['teamName'] row['solving'] = team['score'] row['country'] = team['team']['customData']['country'] if scores_field: problems = row.setdefault('_scores', OrderedDict()) scores = team[f'{scores_field}s'] for field, out in scores_mapping.items(): if field in scores: problems[out] = as_number(scores.get(field, {}).get('score')) table.append(row) else: mapping = { 'Rank': 'place', '': 'place', 'Score': 'solving', 'score': 'solving', 'Total Score': 'solving', 'Team': 'name', 'name': 'name', 'score + unspent LAM': 'unspent_lam', } xpath = division_data.get('xpath', '//table//tr') table = parsed_table.ParsedTable(html=page, header_mapping=mapping, xpath=xpath) season = self.get_season() ret = {} was_place = False for r in table: row = OrderedDict() for k, v in r.items(): was_place = was_place or k == 'place' if isinstance(v, parsed_table.ParsedTableValue): v = v.value if k == 'name': row['name'] = v row['member'] = f'{v} {season}' else: row[k] = as_number(v) if k in {'place', 'solving'} else v ret[row['member']] = row if not was_place: place = None last = None for idx, row in enumerate(sorted(ret.values(), key=lambda r: r['solving'], reverse=True), start=1): if row['solving'] != last: last = row['solving'] place = idx row['place'] = place return ret
def get_standings(self, users=None, statistics=None): standings_url = self.standings_url or self.url page = REQ.get(standings_url) standings = {'url': standings_url} options = standings.setdefault('options', {'parse': {}}) regex = '<table>.*?</table>' match = re.search(regex, page, re.DOTALL) if match: html_table = match.group(0) table = parsed_table.ParsedTable(html_table, without_header=True, ignore_wrong_header_number=False) infos = {} for r in table: k, v = [col.value for col in r.columns] k = k.strip(':').lower().replace(' ', '_') infos[k] = v options['parse'] = infos def find_related(statistics): infos = deepcopy(self.info.get('standings', {}).get('parse', {})) if '_related' in infos and Contest.objects.get( pk=infos['_related']): options['parse']['_related'] = infos['_related'] return related = None infos.update(options.get('parse', {})) host_mapping = self.resource.info['_host_mapping'] host = infos.get('official_page') if host: match = re.search('.*https?://(?P<host>[^/]*)/', host) host = match.group('host') else: host = infos.get('series') ignore_n_statistics = False ignore_title = None for mapping in host_mapping: if re.search(mapping['regex'], host): host = mapping['host'] ignore_title = mapping.get('ignore_title') ignore_n_statistics = mapping.get('ignore_n_statistics', ignore_n_statistics) break if host: delta_start = timedelta(days=3) qs = Contest.objects.filter(resource__host=host) qs = qs.filter( Q(start_time__gte=self.start_time - delta_start, start_time__lte=self.start_time + delta_start) | Q(end_time__gte=self.start_time - delta_start, end_time__lte=self.start_time + delta_start)) if not ignore_n_statistics: teams = set() for r in statistics.values(): if 'team_id' in r: teams.add(r['team_id']) n_statistics = len(teams) if teams else len(statistics) delta_n = round(n_statistics * 0.15) qs = qs.filter(n_statistics__gte=n_statistics - delta_n, n_statistics__lte=n_statistics + delta_n) if ignore_title: qs = qs.exclude(title__iregex=ignore_title) if len(qs) > 1: first = None for stat in statistics.values(): if stat.get('place') == '1': first = stat['member'].split(':', 1)[-1] qs = qs.filter(statistics__place_as_int=1, statistics__account__key=first) if len(qs) == 1: related = qs.first().pk if related is not None: options['parse']['_related'] = related standings['invisible'] = True else: standings['invisible'] = False regex = '<table[^>]*class="[^"]*table[^"]*"[^>]*>.*?</table>' match = re.search(regex, page, re.DOTALL) html_table = match.group(0) table = parsed_table.ParsedTable(html_table) profile_urls = {} for r in table: row = OrderedDict() rank = r.pop('Rank') row['place'] = rank.value medal = rank.column.node.xpath( './/img[contains(@alt,"medal")]/@title') if medal: row['medal'] = medal[0].lower() name_key = 'Name' if 'Name' in r else 'Team' name = r.pop(name_key) members = name.column.node.xpath('.//a') val = name.value if name_key == 'Team': if ':' in val: val = val.rsplit(': ', 1)[0] row['team_id'] = val row['name'] = val val = r.pop('Score').value.strip() row['solving'] = as_number(val) if val and val != '?' else 0 row['_no_update_name'] = True for k, v in r.items(): k = k.lower() if k in row: continue v = v.value.strip() if not v or v == '?': continue row[k.lower()] = as_number(v) for member in members: url = urljoin(standings_url, member.attrib['href']) row['_profile_url'] = url profile_urls[url] = deepcopy(row) statistics_profiles_urls = {} if statistics: for s in statistics.values(): if '_profile_url' in s: statistics_profiles_urls[s['_profile_url']] = s def get_handle(row): url = row['_profile_url'] if 'university' in url: row['_skip'] = True if url in statistics_profiles_urls: stat = statistics_profiles_urls[url] for k, v in stat.items(): if k not in row: row[k] = v if '_member' in row and '_info' in row: row['member'] = row['_member'] row['info'] = row['_info'] return row page = REQ.get(url) info = row.setdefault('info', {}) if 'university' in url: handle = unquote(urlparse(url).path) handle = handle.strip('/') handle = handle.replace('/', ':') row['member'] = handle else: match = re.search( '<link[^>]*rel="canonical"[^>]*href="[^"]*/profile/(?P<handle>[^"]*)"[^>]*>', page) handle = match.group('handle') row['member'] = handle match = re.search( r'>[^<]*prize[^<]*money[^<]*(?:<[^>]*>)*[^<]*\$(?P<val>[.0-9]+)', page, re.IGNORECASE) if match: info['prize_money'] = as_number(match.group('val')) match = re.search( r'>country:</[^>]*>(?:\s*<[^>]*>)*\s*<a[^>]*href="[^"]*/country/(?P<country>[^"]*)"', page, re.IGNORECASE) if match: info['country'] = match.group('country') match = re.search('<h3[^>]*>(?P<name>[^>]*)<', page) info['name'] = match.group('name').strip() row['_member'] = row['member'] row['_info'] = dict(info) return row result = {} members = defaultdict(list) with PoolExecutor(max_workers=4) as executor, tqdm( total=len(result), desc='urls') as pbar: for row in executor.map(get_handle, profile_urls.values()): pbar.update() result[row['member']] = row skip = row.pop('_skip', False) if not skip and 'team_id' in row: members[row['team_id']].append({ 'account': row['member'], 'name': row['info']['name'] }) if members: for row in result.values(): if 'team_id' in row: row['_members'] = members[row['team_id']] find_related(result) standings['result'] = result return standings
def get_standings(self, users=None, statistics=None): if self.is_spectator_ranklist: return self.get_standings_from_html() contest_url = self.url.replace('contests', 'contest') standings_url = contest_url.rstrip('/') + '/standings' is_gym = '/gym/' in self.url result = {} domain_users = {} if '_domain_users' in self.info: for user in self.info['_domain_users']: user = deepcopy(user) domain_users[user.pop('login')] = user problems_info = OrderedDict() for unofficial in [True]: params = { 'contestId': self.cid, 'showUnofficial': str(unofficial).lower(), } if users: params['handles'] = ';'.join(users) data = _query(method='contest.standings', params=params, api_key=self.api_key) if data['status'] != 'OK': if data['code'] == 400: return {'action': 'delete'} raise ExceptionParseStandings(data['status']) phase = data['result']['contest'].get('phase', 'FINISHED').upper() contest_type = data['result']['contest']['type'].upper() duration_seconds = data['result']['contest'].get('durationSeconds') result_problems = data['result']['problems'] for p in result_problems: d = {'short': p['index'], 'name': p['name']} if 'points' in p: d['full_score'] = p['points'] tags = p.get('tags') if tags: d['tags'] = tags d['url'] = urljoin(standings_url.rstrip('/'), f"problem/{d['short']}") problems_info[d['short']] = d if users is not None and not users: continue grouped = any('teamId' in row['party'] for row in data['result']['rows']) if grouped: grouped = all('teamId' in row['party'] or row['party'] ['participantType'] not in self.PARTICIPANT_TYPES for row in data['result']['rows']) place = None last = None idx = 0 teams_to_skip = set() for row in data['result']['rows']: party = row['party'] if is_gym and not party['members']: is_ghost_team = True name = party['teamName'] party['members'] = [{ 'handle': f'{name} {self.get_season()}', 'name': name, }] else: is_ghost_team = False for member in party['members']: if is_gym: upsolve = False else: upsolve = party[ 'participantType'] not in self.PARTICIPANT_TYPES handle = member['handle'] r = result.setdefault(handle, OrderedDict()) r['member'] = handle if 'room' in party: r['room'] = as_number(party['room']) r.setdefault('participant_type', []).append(party['participantType']) r['_no_update_n_contests'] = 'CONTESTANT' not in r[ 'participant_type'] if is_ghost_team and member['name']: r['name'] = member['name'] r['_no_update_name'] = True elif grouped and (not upsolve and not is_gym or 'name' not in r): r['name'] = ', '.join(m['handle'] for m in party['members']) if 'teamId' in party: r['team_id'] = party['teamId'] r['name'] = f"{party['teamName']}" r['_members'] = [{ 'account': m['handle'] } for m in party['members']] r['_account_url'] = urljoin( self.url, '/team/' + str(r['team_id'])) r['_no_update_name'] = True if domain_users and '=' in handle: _, login = handle.split('=', 1) r.update(domain_users.get(login, {})) hack = row['successfulHackCount'] unhack = row['unsuccessfulHackCount'] problems = r.setdefault('problems', {}) for i, s in enumerate(row['problemResults']): k = result_problems[i]['index'] points = float(s['points']) if contest_type == 'IOI' and 'pointsInfo' in s: points = float(s['pointsInfo'] or '0') n = s.get('rejectedAttemptCount') if n is not None and contest_type == 'ICPC' and points + n > 0: points = f'+{"" if n == 0 else n}' if points > 0 else f'-{n}' u = upsolve if s['type'] == 'PRELIMINARY': p = {'result': f'?{n + 1}'} elif points or n: if not points: points = f'-{n}' n = None p = {'result': points} if contest_type == 'IOI': full_score = problems_info[k].get('full_score') if full_score: p['partial'] = points < full_score elif contest_type == 'CF' and n: p['penalty_score'] = n else: continue if 'bestSubmissionTimeSeconds' in s and duration_seconds: time = s['bestSubmissionTimeSeconds'] if time > duration_seconds: u = True else: p['time_in_seconds'] = time time /= 60 p['time'] = '%02d:%02d' % (time / 60, time % 60) a = problems.setdefault(k, {}) if u: a['upsolving'] = p else: a.update(p) if row['rank'] and not upsolve: score = row['points'] if contest_type == 'IOI' and 'pointsInfo' in row: score = float(row['pointsInfo'] or '0') if is_gym: r['place'] = row['rank'] elif unofficial: if users: r['place'] = '__unchanged__' elif 'team_id' not in r and 'OUT_OF_COMPETITION' in r.get( 'participant_type', []): r['place'] = None else: if 'team_id' in r: if r['team_id'] not in teams_to_skip: teams_to_skip.add(r['team_id']) idx += 1 else: idx += 1 value = (score, row.get('penalty')) if last != value: last = value place = idx r['place'] = place r['solving'] = score if contest_type == 'ICPC': r['penalty'] = row['penalty'] if hack or unhack: r['hack'] = { 'title': 'hacks', 'successful': hack, 'unsuccessful': unhack, } params.pop('showUnofficial') data = _query(method='contest.ratingChanges', params=params, api_key=self.api_key) if data.get('status') not in ['OK', 'FAILED']: raise ExceptionParseStandings(data) if data and data['status'] == 'OK': for row in data['result']: if str(row.pop('contestId')) != self.key: continue handle = row.pop('handle') if handle not in result: continue r = result[handle] old_rating = row.pop('oldRating') new_rating = row.pop('newRating') r['old_rating'] = old_rating r['new_rating'] = new_rating params = {'contestId': self.cid} if users: array_params = [] for user in users: params['handle'] = user array_params.append(deepcopy(params)) else: array_params = [params] submissions = [] for params in array_params: data = _query('contest.status', params=params, api_key=self.api_key) if data.get('status') not in ['OK', 'FAILED']: raise ExceptionParseStandings(data) if data['status'] == 'OK': submissions.extend(data['result']) for submission in submissions: party = submission['author'] info = { 'submission_id': submission['id'], 'url': Statistic.SUBMISSION_URL_FORMAT_.format(url=contest_url, sid=submission['id']), 'external_solution': True, } if 'verdict' in submission: v = submission['verdict'].upper() if v == 'PARTIAL': info['partial'] = True info['verdict'] = ''.join( s[0].upper() for s in v.split('_')) if len(v) > 3 else v.upper() if 'programmingLanguage' in submission: info['language'] = submission['programmingLanguage'] is_accepted = info.get('verdict') == 'OK' if not is_accepted and 'passedTestCount' in submission: info['test'] = submission['passedTestCount'] + 1 if is_gym: upsolve = False else: upsolve = party[ 'participantType'] not in self.PARTICIPANT_TYPES if ('relativeTimeSeconds' in submission and duration_seconds and duration_seconds < submission['relativeTimeSeconds']): upsolve = True for member in party['members']: handle = member['handle'] if handle not in result: continue r = result[handle] problems = r.setdefault('problems', {}) k = submission['problem']['index'] p = problems.setdefault(k, {}) if upsolve: p = p.setdefault('upsolving', {}) if 'submission_id' not in p: p.update(info) if 'result' not in p: p['result'] = '+' if is_accepted else '-1' elif upsolve: v = str(p.get('result')) if v and v[0] in ['-', '+']: v = 0 if v == '+' else int(v) v = v + 1 if v >= 0 else v - 1 p['result'] = f'{"+" if v > 0 else ""}{v}' result = { k: v for k, v in result.items() if v.get('hack') or v.get('problems') or 'new_rating' in v or not v.get('_no_update_n_contests') } def to_score(x): return ((1 if x.startswith('+') or not x.startswith('?') and float(x) > 0 else 0) if isinstance(x, str) else x) def to_solve(x): return not x.get('partial', False) and to_score(x.get('result', 0)) > 0 for r in result.values(): upsolving = 0 solving = 0 upsolving_score = 0 for a in r['problems'].values(): if 'upsolving' in a and to_solve(a['upsolving']) > to_solve(a): upsolving_score += to_score(a['upsolving']['result']) upsolving += to_solve(a['upsolving']) else: solving += to_solve(a) r.setdefault('solving', 0) r['upsolving'] = upsolving_score if abs(solving - r['solving']) > 1e-9 or abs(upsolving - r['upsolving']) > 1e-9: r['solved'] = { 'solving': solving, 'upsolving': upsolving, } standings = { 'result': result, 'url': standings_url, 'problems': list(problems_info.values()), 'options': { 'fixed_fields': [('hack', 'Hacks')], }, } if re.search('^educational codeforces round', self.name, re.IGNORECASE): standings['options'].setdefault('timeline', {}).update({ 'attempt_penalty': 10 * 60, 'challenge_score': False }) if phase != 'FINISHED' and self.end_time + timedelta( hours=3) > datetime.utcnow().replace(tzinfo=pytz.utc): standings['timing_statistic_delta'] = timedelta(minutes=3) return standings
def get_standings(self, users=None, statistics=None): if not hasattr(self, 'season'): if not hasattr(self, 'start_time'): self.season = self.key.split()[0] else: year = self.start_time.year - (0 if self.start_time.month > 8 else 1) self.season = f'{year}-{year + 1}' result = {} problems_info = OrderedDict() page = REQ.get(self.standings_url, detect_charsets=True) regex = '<table[^>]*class="standings"[^>]*>.*?</table>' html_table = re.search(regex, page, re.DOTALL).group(0) table = parsed_table.ParsedTable(html_table) matches = re.finditer( r''' <a[^>]*>[^<]*Day\s*(?P<day>[0-9]+):[^<]*<[^#]*? <a[^>]*href="(?P<url>[^"]*)"[^>]*>\s*(?:Div\s*[A1]\s*)?Results\s*</a> ''', page, re.IGNORECASE | re.VERBOSE, ) stages = { m.group('day'): urllib.parse.urljoin(self.standings_url, m.group('url')) for m in matches } fields_types = defaultdict(set) for r in table: row = OrderedDict() other = OrderedDict() problems = row.setdefault('problems', {}) for key, v in list(r.items()): if re.match('^Stage [0-9]+$', key): k = key.split()[1] else: k = key.split()[0] if len(k) == 1 and 'A' <= k <= 'Z' or k.isdigit(): if k >= 'X': continue d = problems_info.setdefault(k, {}) d['short'] = k if v.attrs.get('title'): d['name'] = v.attrs['title'] if k.isdigit() and k in stages: d['url'] = stages[k] classes = v.attrs.get('class', '').split() if ' ' in v.value: p = problems.setdefault(k, {}) point, time = v.value.split() if point == 'X': p['binary'] = True point = '+' elif point == '0': p['binary'] = False point = '-1' if 'opener' in classes: p['first_ac'] = True if 'frost' in classes and point and point[0] == '-': point = '?' + point[1:] time = None if time: p['time'] = time p['result'] = point elif 'frost' in classes: p = problems.setdefault(k, {}) p['result'] = '?' else: try: point = float(v.value) p = problems.setdefault(k, {}) p['result'] = point except Exception: pass elif k == 'Total': row['solving'] = float(v.value) elif k == 'Time': if "'" in v.value and '"' in v.value: minute, seconds = map(int, re.findall('-?[0-9]+', v.value)) if minute < 0: seconds = -seconds row['penalty'] = f'{minute + seconds / 60:.2f}' else: row['penalty'] = int(v.value) elif k == 'Place': row['place'] = v.value.strip('.') elif 'team' in k.lower() or 'name' in k.lower(): row['member'] = v.value if ' ' not in v.value else v.value + ' ' + self.season row['name'] = v.value else: t = as_number(v.value) if t: fields_types[key].add(type(t)) other[key] = v.value for k, v in other.items(): if k.lower() not in row: row[k] = v if 'solving' not in row: row['solving'] = row.pop('Rating', 0) result[row['member']] = row for field, types in fields_types.items(): if len(types) != 1: continue field_type = next(iter(types)) if field_type not in [int, float]: continue for row in result.values(): if field in row: row[field] = as_number(row[field]) standings = { 'result': result, 'url': self.standings_url, 'problems': list(problems_info.values()), 'problems_time_format': '{H}:{m:02d}', } return standings
def get_standings(self, users=None, statistics=None): def parse_problems_infos(): problem_url = self.standings_url.replace('/ranking', '/p') page = REQ.get(problem_url) match = re.search( r'<h1[^>]*>[^<]*</h1>(\s*<[^/][^>]*>)*\s*(?P<table><table[^>]*>.*?</table>)', page, re.DOTALL) if not match: raise ExceptionParseStandings('Not found problems table') table = parsed_table.ParsedTable(html=match.group('table'), ignore_wrong_header_number=False) skip = False problems_infos = collections.OrderedDict() for r in table: if isinstance(r, parsed_table.ParsedTableRow): runda = re.sub(r'\s*\(.*\)\s*$', '', r.columns[0].value).strip() skip = runda.lower() not in self.name.lower() continue if skip: continue problem_info = {} for k, vs in list(r.items()): if isinstance(vs, list): v = ' '.join([v.value for v in vs]).strip() else: v = vs.value if not k: problem_info['short'] = v elif k in ('Nazwa', 'Name'): match = re.search(r'\[(?P<letter>[^\]]+)\]$', v) if match: problem_info['_letter'] = match.group('letter') problem_info['name'] = v href = vs.column.node.xpath('//a/@href') if href: problem_info['url'] = urljoin(problem_url, href[0]) if problem_info: problems_infos[problem_info['short']] = problem_info return problems_infos problems_infos = parse_problems_infos() result = {} page = 1 while page is not None: content = REQ.get(self.standings_url + f'?page={page}') matches = re.finditer( r'<a[^>]*href="[^"]*\?page=(?P<page>[0-9]+)"[^>]*>', content) next_page = None for match in matches: p = int(match.group('page')) if p > page and (next_page is None or p < next_page): next_page = p page = next_page table = parsed_table.ParsedTable( html=content, xpath="//table[contains(@class,'table-ranking')]//tr") for r in table: row = collections.OrderedDict() problems = row.setdefault('problems', {}) row['solving'] = 0 for k, v in list(r.items()): if k == '#': row['place'] = v.value elif k == 'User': row['name'] = v.value rid = v.row.node.xpath('@id')[0] match = re.match('^ranking_row_(?P<id>[0-9]+)$', rid) member = match.group('id') row['member'] = member elif k in problems_infos and v.value: problems[k] = {'result': v.value} row['solving'] += as_number(v.value) if not problems: continue result[row['member']] = row last = None for idx, row in enumerate(sorted(result.values(), key=lambda r: -r['solving']), start=1): if last != row['solving']: last = row['solving'] rank = idx row['place'] = rank problems_infos = list(problems_infos.values()) if all('_letter' in p for p in problems_infos): problems_infos.sort(key=lambda p: p['_letter']) for p in problems_infos: p.pop('_letter', None) ret = { 'result': result, 'problems': problems_infos, } return ret