예제 #1
0
파일: ctftime.py 프로젝트: aropan/clist
    def get_standings(self, users=None, statistics=None):
        try:
            page = REQ.get(self.url)
        except FailOnGetResponse as e:
            return {'action': 'delete'} if e.code == 404 else {}

        match = re.search('<table[^>]*past_event_rating[^>]*>.*?</table>',
                          page, re.DOTALL)
        if not match:
            raise ExceptionParseStandings('not found table')

        header_mapping = {
            'Team': 'name',
            'Place': 'place',
            'CTF points': 'solving',
        }
        table = parsed_table.ParsedTable(html=match.group(0),
                                         header_mapping=header_mapping)

        results = {}
        max_score = 0
        for r in table:
            row = OrderedDict()
            for k, v in r.items():
                k = k.strip('*')
                k = k.strip(' ')
                value = ' '.join([c.value for c in v]).strip() if isinstance(
                    v, list) else v.value
                if k == 'name':
                    href = v.column.node.xpath('.//a/@href')[0]
                    match = re.search('/([0-9]+)/?$', href)
                    row['member'] = match.group(1)
                    row['name'] = value
                else:
                    value = as_number(value)
                row[k] = value
            max_score = max(max_score, row.get('solving', 0))
            results[row['member']] = row

        if max_score > 0:
            for row in results.values():
                if 'solving' in row:
                    row['percent'] = f'{row["solving"] * 100 / max_score:.2f}'

        has_medals = not re.search(r'\bqual', self.name,
                                   flags=re.I) and re.search(
                                       r'\bfinal', self.name, flags=re.I)
        medals = [{'name': 'gold', 'count': 1}] if has_medals else []

        return dict(
            standings_url=self.url,
            result=results,
            options={'medals': medals},
        )
예제 #2
0
        def get_handle(row):
            url = row['_profile_url']
            if 'university' in url:
                row['_skip'] = True

            if url in statistics_profiles_urls:
                stat = statistics_profiles_urls[url]
                for k, v in stat.items():
                    if k not in row:
                        row[k] = v
                if '_member' in row and '_info' in row:
                    row['member'] = row['_member']
                    row['info'] = row['_info']
                    return row

            page = REQ.get(url)
            info = row.setdefault('info', {})

            if 'university' in url:
                handle = unquote(urlparse(url).path)
                handle = handle.strip('/')
                handle = handle.replace('/', ':')
                row['member'] = handle
            else:
                match = re.search(
                    '<link[^>]*rel="canonical"[^>]*href="[^"]*/profile/(?P<handle>[^"]*)"[^>]*>',
                    page)
                handle = match.group('handle')
                row['member'] = handle

                match = re.search(
                    r'>[^<]*prize[^<]*money[^<]*(?:<[^>]*>)*[^<]*\$(?P<val>[.0-9]+)',
                    page, re.IGNORECASE)
                if match:
                    info['prize_money'] = as_number(match.group('val'))

                match = re.search(
                    r'>country:</[^>]*>(?:\s*<[^>]*>)*\s*<a[^>]*href="[^"]*/country/(?P<country>[^"]*)"',
                    page, re.IGNORECASE)
                if match:
                    info['country'] = match.group('country')

            match = re.search('<h3[^>]*>(?P<name>[^>]*)<', page)
            info['name'] = match.group('name').strip()

            row['_member'] = row['member']
            row['_info'] = dict(info)

            return row
예제 #3
0
        def get_results(standings_url, division_data):
            page = REQ.get(standings_url)

            page_format = division_data.get('format')
            if page_format == 'json':
                data = json.loads(page)
                scores_field = None
                if 'problems' in data:
                    scores_field = 'problem'
                elif 'tournaments' in data:
                    scores_field = 'tournament'

                if scores_field:
                    scores_fields_mapping = {'submission': 'T', 'request': 'R'}
                    scores_mapping = OrderedDict()
                    for score in data[f'{scores_field}s']:
                        name = str(score[f'{scores_field}Id'])
                        scores_mapping[name] = scores_fields_mapping.get(name, name.split(':')[-1])

                table = []
                for team in data['teams']:
                    row = OrderedDict()
                    row['name'] = team['team']['teamName']
                    row['solving'] = team['score']
                    row['country'] = team['team']['customData']['country']
                    if scores_field:
                        problems = row.setdefault('_scores', OrderedDict())
                        scores = team[f'{scores_field}s']
                        for field, out in scores_mapping.items():
                            if field in scores:
                                problems[out] = as_number(scores.get(field, {}).get('score'))
                    table.append(row)
            else:
                mapping = {
                    'Rank': 'place',
                    '': 'place',
                    'Score': 'solving',
                    'score': 'solving',
                    'Total Score': 'solving',
                    'Team': 'name',
                    'name': 'name',
                    'score + unspent LAM': 'unspent_lam',
                }
                xpath = division_data.get('xpath', '//table//tr')
                table = parsed_table.ParsedTable(html=page, header_mapping=mapping, xpath=xpath)

            season = self.get_season()
            ret = {}
            was_place = False
            for r in table:
                row = OrderedDict()
                for k, v in r.items():
                    was_place = was_place or k == 'place'
                    if isinstance(v, parsed_table.ParsedTableValue):
                        v = v.value
                    if k == 'name':
                        row['name'] = v
                        row['member'] = f'{v} {season}'
                    else:
                        row[k] = as_number(v) if k in {'place', 'solving'} else v
                ret[row['member']] = row
            if not was_place:
                place = None
                last = None
                for idx, row in enumerate(sorted(ret.values(), key=lambda r: r['solving'], reverse=True), start=1):
                    if row['solving'] != last:
                        last = row['solving']
                        place = idx
                    row['place'] = place
            return ret
예제 #4
0
    def get_standings(self, users=None, statistics=None):
        standings_url = self.standings_url or self.url
        page = REQ.get(standings_url)

        standings = {'url': standings_url}
        options = standings.setdefault('options', {'parse': {}})

        regex = '<table>.*?</table>'
        match = re.search(regex, page, re.DOTALL)
        if match:
            html_table = match.group(0)
            table = parsed_table.ParsedTable(html_table,
                                             without_header=True,
                                             ignore_wrong_header_number=False)
            infos = {}
            for r in table:
                k, v = [col.value for col in r.columns]
                k = k.strip(':').lower().replace(' ', '_')
                infos[k] = v
            options['parse'] = infos

        def find_related(statistics):
            infos = deepcopy(self.info.get('standings', {}).get('parse', {}))

            if '_related' in infos and Contest.objects.get(
                    pk=infos['_related']):
                options['parse']['_related'] = infos['_related']
                return

            related = None

            infos.update(options.get('parse', {}))

            host_mapping = self.resource.info['_host_mapping']
            host = infos.get('official_page')
            if host:
                match = re.search('.*https?://(?P<host>[^/]*)/', host)
                host = match.group('host')
            else:
                host = infos.get('series')

            ignore_n_statistics = False
            ignore_title = None
            for mapping in host_mapping:
                if re.search(mapping['regex'], host):
                    host = mapping['host']
                    ignore_title = mapping.get('ignore_title')
                    ignore_n_statistics = mapping.get('ignore_n_statistics',
                                                      ignore_n_statistics)
                    break
            if host:
                delta_start = timedelta(days=3)
                qs = Contest.objects.filter(resource__host=host)
                qs = qs.filter(
                    Q(start_time__gte=self.start_time - delta_start,
                      start_time__lte=self.start_time + delta_start)
                    | Q(end_time__gte=self.start_time - delta_start,
                        end_time__lte=self.start_time + delta_start))

                if not ignore_n_statistics:
                    teams = set()
                    for r in statistics.values():
                        if 'team_id' in r:
                            teams.add(r['team_id'])
                    n_statistics = len(teams) if teams else len(statistics)
                    delta_n = round(n_statistics * 0.15)
                    qs = qs.filter(n_statistics__gte=n_statistics - delta_n,
                                   n_statistics__lte=n_statistics + delta_n)

                if ignore_title:
                    qs = qs.exclude(title__iregex=ignore_title)

                if len(qs) > 1:
                    first = None
                    for stat in statistics.values():
                        if stat.get('place') == '1':
                            first = stat['member'].split(':', 1)[-1]
                    qs = qs.filter(statistics__place_as_int=1,
                                   statistics__account__key=first)

                if len(qs) == 1:
                    related = qs.first().pk

            if related is not None:
                options['parse']['_related'] = related
                standings['invisible'] = True
            else:
                standings['invisible'] = False

        regex = '<table[^>]*class="[^"]*table[^"]*"[^>]*>.*?</table>'
        match = re.search(regex, page, re.DOTALL)
        html_table = match.group(0)
        table = parsed_table.ParsedTable(html_table)

        profile_urls = {}
        for r in table:
            row = OrderedDict()
            rank = r.pop('Rank')
            row['place'] = rank.value
            medal = rank.column.node.xpath(
                './/img[contains(@alt,"medal")]/@title')
            if medal:
                row['medal'] = medal[0].lower()

            name_key = 'Name' if 'Name' in r else 'Team'
            name = r.pop(name_key)
            members = name.column.node.xpath('.//a')
            val = name.value
            if name_key == 'Team':
                if ':' in val:
                    val = val.rsplit(': ', 1)[0]
                row['team_id'] = val
            row['name'] = val

            val = r.pop('Score').value.strip()
            row['solving'] = as_number(val) if val and val != '?' else 0

            row['_no_update_name'] = True

            for k, v in r.items():
                k = k.lower()
                if k in row:
                    continue
                v = v.value.strip()
                if not v or v == '?':
                    continue
                row[k.lower()] = as_number(v)

            for member in members:
                url = urljoin(standings_url, member.attrib['href'])
                row['_profile_url'] = url
                profile_urls[url] = deepcopy(row)

        statistics_profiles_urls = {}
        if statistics:
            for s in statistics.values():
                if '_profile_url' in s:
                    statistics_profiles_urls[s['_profile_url']] = s

        def get_handle(row):
            url = row['_profile_url']
            if 'university' in url:
                row['_skip'] = True

            if url in statistics_profiles_urls:
                stat = statistics_profiles_urls[url]
                for k, v in stat.items():
                    if k not in row:
                        row[k] = v
                if '_member' in row and '_info' in row:
                    row['member'] = row['_member']
                    row['info'] = row['_info']
                    return row

            page = REQ.get(url)
            info = row.setdefault('info', {})

            if 'university' in url:
                handle = unquote(urlparse(url).path)
                handle = handle.strip('/')
                handle = handle.replace('/', ':')
                row['member'] = handle
            else:
                match = re.search(
                    '<link[^>]*rel="canonical"[^>]*href="[^"]*/profile/(?P<handle>[^"]*)"[^>]*>',
                    page)
                handle = match.group('handle')
                row['member'] = handle

                match = re.search(
                    r'>[^<]*prize[^<]*money[^<]*(?:<[^>]*>)*[^<]*\$(?P<val>[.0-9]+)',
                    page, re.IGNORECASE)
                if match:
                    info['prize_money'] = as_number(match.group('val'))

                match = re.search(
                    r'>country:</[^>]*>(?:\s*<[^>]*>)*\s*<a[^>]*href="[^"]*/country/(?P<country>[^"]*)"',
                    page, re.IGNORECASE)
                if match:
                    info['country'] = match.group('country')

            match = re.search('<h3[^>]*>(?P<name>[^>]*)<', page)
            info['name'] = match.group('name').strip()

            row['_member'] = row['member']
            row['_info'] = dict(info)

            return row

        result = {}
        members = defaultdict(list)
        with PoolExecutor(max_workers=4) as executor, tqdm(
                total=len(result), desc='urls') as pbar:
            for row in executor.map(get_handle, profile_urls.values()):
                pbar.update()
                result[row['member']] = row

                skip = row.pop('_skip', False)
                if not skip and 'team_id' in row:
                    members[row['team_id']].append({
                        'account': row['member'],
                        'name': row['info']['name']
                    })

        if members:
            for row in result.values():
                if 'team_id' in row:
                    row['_members'] = members[row['team_id']]

        find_related(result)

        standings['result'] = result
        return standings
예제 #5
0
    def get_standings(self, users=None, statistics=None):

        if self.is_spectator_ranklist:
            return self.get_standings_from_html()

        contest_url = self.url.replace('contests', 'contest')
        standings_url = contest_url.rstrip('/') + '/standings'

        is_gym = '/gym/' in self.url
        result = {}

        domain_users = {}
        if '_domain_users' in self.info:
            for user in self.info['_domain_users']:
                user = deepcopy(user)
                domain_users[user.pop('login')] = user

        problems_info = OrderedDict()
        for unofficial in [True]:
            params = {
                'contestId': self.cid,
                'showUnofficial': str(unofficial).lower(),
            }
            if users:
                params['handles'] = ';'.join(users)

            data = _query(method='contest.standings',
                          params=params,
                          api_key=self.api_key)

            if data['status'] != 'OK':
                if data['code'] == 400:
                    return {'action': 'delete'}
                raise ExceptionParseStandings(data['status'])

            phase = data['result']['contest'].get('phase', 'FINISHED').upper()
            contest_type = data['result']['contest']['type'].upper()
            duration_seconds = data['result']['contest'].get('durationSeconds')

            result_problems = data['result']['problems']
            for p in result_problems:
                d = {'short': p['index'], 'name': p['name']}
                if 'points' in p:
                    d['full_score'] = p['points']
                tags = p.get('tags')
                if tags:
                    d['tags'] = tags
                d['url'] = urljoin(standings_url.rstrip('/'),
                                   f"problem/{d['short']}")

                problems_info[d['short']] = d

            if users is not None and not users:
                continue

            grouped = any('teamId' in row['party']
                          for row in data['result']['rows'])
            if grouped:
                grouped = all('teamId' in row['party'] or row['party']
                              ['participantType'] not in self.PARTICIPANT_TYPES
                              for row in data['result']['rows'])

            place = None
            last = None
            idx = 0
            teams_to_skip = set()
            for row in data['result']['rows']:
                party = row['party']

                if is_gym and not party['members']:
                    is_ghost_team = True
                    name = party['teamName']
                    party['members'] = [{
                        'handle': f'{name} {self.get_season()}',
                        'name': name,
                    }]
                else:
                    is_ghost_team = False

                for member in party['members']:
                    if is_gym:
                        upsolve = False
                    else:
                        upsolve = party[
                            'participantType'] not in self.PARTICIPANT_TYPES

                    handle = member['handle']

                    r = result.setdefault(handle, OrderedDict())

                    r['member'] = handle
                    if 'room' in party:
                        r['room'] = as_number(party['room'])

                    r.setdefault('participant_type',
                                 []).append(party['participantType'])
                    r['_no_update_n_contests'] = 'CONTESTANT' not in r[
                        'participant_type']

                    if is_ghost_team and member['name']:
                        r['name'] = member['name']
                        r['_no_update_name'] = True
                    elif grouped and (not upsolve and not is_gym
                                      or 'name' not in r):
                        r['name'] = ', '.join(m['handle']
                                              for m in party['members'])
                        if 'teamId' in party:
                            r['team_id'] = party['teamId']
                            r['name'] = f"{party['teamName']}"
                            r['_members'] = [{
                                'account': m['handle']
                            } for m in party['members']]
                            r['_account_url'] = urljoin(
                                self.url, '/team/' + str(r['team_id']))
                        r['_no_update_name'] = True
                    if domain_users and '=' in handle:
                        _, login = handle.split('=', 1)
                        r.update(domain_users.get(login, {}))

                    hack = row['successfulHackCount']
                    unhack = row['unsuccessfulHackCount']

                    problems = r.setdefault('problems', {})
                    for i, s in enumerate(row['problemResults']):

                        k = result_problems[i]['index']
                        points = float(s['points'])
                        if contest_type == 'IOI' and 'pointsInfo' in s:
                            points = float(s['pointsInfo'] or '0')

                        n = s.get('rejectedAttemptCount')
                        if n is not None and contest_type == 'ICPC' and points + n > 0:
                            points = f'+{"" if n == 0 else n}' if points > 0 else f'-{n}'

                        u = upsolve
                        if s['type'] == 'PRELIMINARY':
                            p = {'result': f'?{n + 1}'}
                        elif points or n:
                            if not points:
                                points = f'-{n}'
                                n = None
                            p = {'result': points}
                            if contest_type == 'IOI':
                                full_score = problems_info[k].get('full_score')
                                if full_score:
                                    p['partial'] = points < full_score
                            elif contest_type == 'CF' and n:
                                p['penalty_score'] = n
                        else:
                            continue

                        if 'bestSubmissionTimeSeconds' in s and duration_seconds:
                            time = s['bestSubmissionTimeSeconds']
                            if time > duration_seconds:
                                u = True
                            else:
                                p['time_in_seconds'] = time
                                time /= 60
                                p['time'] = '%02d:%02d' % (time / 60,
                                                           time % 60)
                        a = problems.setdefault(k, {})
                        if u:
                            a['upsolving'] = p
                        else:
                            a.update(p)

                    if row['rank'] and not upsolve:
                        score = row['points']
                        if contest_type == 'IOI' and 'pointsInfo' in row:
                            score = float(row['pointsInfo'] or '0')

                        if is_gym:
                            r['place'] = row['rank']
                        elif unofficial:
                            if users:
                                r['place'] = '__unchanged__'
                            elif 'team_id' not in r and 'OUT_OF_COMPETITION' in r.get(
                                    'participant_type', []):
                                r['place'] = None
                            else:
                                if 'team_id' in r:
                                    if r['team_id'] not in teams_to_skip:
                                        teams_to_skip.add(r['team_id'])
                                        idx += 1
                                else:
                                    idx += 1
                                value = (score, row.get('penalty'))
                                if last != value:
                                    last = value
                                    place = idx
                                r['place'] = place

                        r['solving'] = score
                        if contest_type == 'ICPC':
                            r['penalty'] = row['penalty']

                    if hack or unhack:
                        r['hack'] = {
                            'title': 'hacks',
                            'successful': hack,
                            'unsuccessful': unhack,
                        }

        params.pop('showUnofficial')

        data = _query(method='contest.ratingChanges',
                      params=params,
                      api_key=self.api_key)
        if data.get('status') not in ['OK', 'FAILED']:
            raise ExceptionParseStandings(data)
        if data and data['status'] == 'OK':
            for row in data['result']:
                if str(row.pop('contestId')) != self.key:
                    continue
                handle = row.pop('handle')
                if handle not in result:
                    continue
                r = result[handle]
                old_rating = row.pop('oldRating')
                new_rating = row.pop('newRating')
                r['old_rating'] = old_rating
                r['new_rating'] = new_rating

        params = {'contestId': self.cid}
        if users:
            array_params = []
            for user in users:
                params['handle'] = user
                array_params.append(deepcopy(params))
        else:
            array_params = [params]

        submissions = []
        for params in array_params:
            data = _query('contest.status',
                          params=params,
                          api_key=self.api_key)
            if data.get('status') not in ['OK', 'FAILED']:
                raise ExceptionParseStandings(data)
            if data['status'] == 'OK':
                submissions.extend(data['result'])

        for submission in submissions:
            party = submission['author']

            info = {
                'submission_id':
                submission['id'],
                'url':
                Statistic.SUBMISSION_URL_FORMAT_.format(url=contest_url,
                                                        sid=submission['id']),
                'external_solution':
                True,
            }

            if 'verdict' in submission:
                v = submission['verdict'].upper()
                if v == 'PARTIAL':
                    info['partial'] = True
                info['verdict'] = ''.join(
                    s[0].upper()
                    for s in v.split('_')) if len(v) > 3 else v.upper()

            if 'programmingLanguage' in submission:
                info['language'] = submission['programmingLanguage']

            is_accepted = info.get('verdict') == 'OK'
            if not is_accepted and 'passedTestCount' in submission:
                info['test'] = submission['passedTestCount'] + 1

            if is_gym:
                upsolve = False
            else:
                upsolve = party[
                    'participantType'] not in self.PARTICIPANT_TYPES

            if ('relativeTimeSeconds' in submission and duration_seconds
                    and duration_seconds < submission['relativeTimeSeconds']):
                upsolve = True

            for member in party['members']:
                handle = member['handle']
                if handle not in result:
                    continue
                r = result[handle]
                problems = r.setdefault('problems', {})
                k = submission['problem']['index']
                p = problems.setdefault(k, {})
                if upsolve:
                    p = p.setdefault('upsolving', {})
                if 'submission_id' not in p:
                    p.update(info)
                    if 'result' not in p:
                        p['result'] = '+' if is_accepted else '-1'
                elif upsolve:
                    v = str(p.get('result'))
                    if v and v[0] in ['-', '+']:
                        v = 0 if v == '+' else int(v)
                        v = v + 1 if v >= 0 else v - 1
                        p['result'] = f'{"+" if v > 0 else ""}{v}'

        result = {
            k: v
            for k, v in result.items() if v.get('hack') or v.get('problems')
            or 'new_rating' in v or not v.get('_no_update_n_contests')
        }

        def to_score(x):
            return ((1 if x.startswith('+') or not x.startswith('?')
                     and float(x) > 0 else 0) if isinstance(x, str) else x)

        def to_solve(x):
            return not x.get('partial',
                             False) and to_score(x.get('result', 0)) > 0

        for r in result.values():
            upsolving = 0
            solving = 0
            upsolving_score = 0

            for a in r['problems'].values():
                if 'upsolving' in a and to_solve(a['upsolving']) > to_solve(a):
                    upsolving_score += to_score(a['upsolving']['result'])
                    upsolving += to_solve(a['upsolving'])
                else:
                    solving += to_solve(a)
            r.setdefault('solving', 0)
            r['upsolving'] = upsolving_score
            if abs(solving -
                   r['solving']) > 1e-9 or abs(upsolving -
                                               r['upsolving']) > 1e-9:
                r['solved'] = {
                    'solving': solving,
                    'upsolving': upsolving,
                }

        standings = {
            'result': result,
            'url': standings_url,
            'problems': list(problems_info.values()),
            'options': {
                'fixed_fields': [('hack', 'Hacks')],
            },
        }

        if re.search('^educational codeforces round', self.name,
                     re.IGNORECASE):
            standings['options'].setdefault('timeline', {}).update({
                'attempt_penalty':
                10 * 60,
                'challenge_score':
                False
            })

        if phase != 'FINISHED' and self.end_time + timedelta(
                hours=3) > datetime.utcnow().replace(tzinfo=pytz.utc):
            standings['timing_statistic_delta'] = timedelta(minutes=3)
        return standings
예제 #6
0
    def get_standings(self, users=None, statistics=None):
        if not hasattr(self, 'season'):
            if not hasattr(self, 'start_time'):
                self.season = self.key.split()[0]
            else:
                year = self.start_time.year - (0 if self.start_time.month > 8
                                               else 1)
                self.season = f'{year}-{year + 1}'

        result = {}
        problems_info = OrderedDict()

        page = REQ.get(self.standings_url, detect_charsets=True)
        regex = '<table[^>]*class="standings"[^>]*>.*?</table>'
        html_table = re.search(regex, page, re.DOTALL).group(0)
        table = parsed_table.ParsedTable(html_table)

        matches = re.finditer(
            r'''
            <a[^>]*>[^<]*Day\s*(?P<day>[0-9]+):[^<]*<[^#]*?
            <a[^>]*href="(?P<url>[^"]*)"[^>]*>\s*(?:Div\s*[A1]\s*)?Results\s*</a>
            ''',
            page,
            re.IGNORECASE | re.VERBOSE,
        )
        stages = {
            m.group('day'): urllib.parse.urljoin(self.standings_url,
                                                 m.group('url'))
            for m in matches
        }

        fields_types = defaultdict(set)
        for r in table:
            row = OrderedDict()
            other = OrderedDict()
            problems = row.setdefault('problems', {})
            for key, v in list(r.items()):
                if re.match('^Stage [0-9]+$', key):
                    k = key.split()[1]
                else:
                    k = key.split()[0]
                if len(k) == 1 and 'A' <= k <= 'Z' or k.isdigit():
                    if k >= 'X':
                        continue
                    d = problems_info.setdefault(k, {})
                    d['short'] = k
                    if v.attrs.get('title'):
                        d['name'] = v.attrs['title']
                    if k.isdigit() and k in stages:
                        d['url'] = stages[k]
                    classes = v.attrs.get('class', '').split()
                    if ' ' in v.value:
                        p = problems.setdefault(k, {})
                        point, time = v.value.split()
                        if point == 'X':
                            p['binary'] = True
                            point = '+'
                        elif point == '0':
                            p['binary'] = False
                            point = '-1'
                        if 'opener' in classes:
                            p['first_ac'] = True
                        if 'frost' in classes and point and point[0] == '-':
                            point = '?' + point[1:]
                            time = None
                        if time:
                            p['time'] = time
                        p['result'] = point
                    elif 'frost' in classes:
                        p = problems.setdefault(k, {})
                        p['result'] = '?'
                    else:
                        try:
                            point = float(v.value)
                            p = problems.setdefault(k, {})
                            p['result'] = point
                        except Exception:
                            pass
                elif k == 'Total':
                    row['solving'] = float(v.value)
                elif k == 'Time':
                    if "'" in v.value and '"' in v.value:
                        minute, seconds = map(int,
                                              re.findall('-?[0-9]+', v.value))
                        if minute < 0:
                            seconds = -seconds
                        row['penalty'] = f'{minute + seconds / 60:.2f}'
                    else:
                        row['penalty'] = int(v.value)
                elif k == 'Place':
                    row['place'] = v.value.strip('.')
                elif 'team' in k.lower() or 'name' in k.lower():
                    row['member'] = v.value if ' ' not in v.value else v.value + ' ' + self.season
                    row['name'] = v.value
                else:
                    t = as_number(v.value)
                    if t:
                        fields_types[key].add(type(t))
                    other[key] = v.value
            for k, v in other.items():
                if k.lower() not in row:
                    row[k] = v
            if 'solving' not in row:
                row['solving'] = row.pop('Rating', 0)
            result[row['member']] = row

        for field, types in fields_types.items():
            if len(types) != 1:
                continue
            field_type = next(iter(types))
            if field_type not in [int, float]:
                continue
            for row in result.values():
                if field in row:
                    row[field] = as_number(row[field])

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
            'problems_time_format': '{H}:{m:02d}',
        }
        return standings
예제 #7
0
    def get_standings(self, users=None, statistics=None):
        def parse_problems_infos():
            problem_url = self.standings_url.replace('/ranking', '/p')
            page = REQ.get(problem_url)

            match = re.search(
                r'<h1[^>]*>[^<]*</h1>(\s*<[^/][^>]*>)*\s*(?P<table><table[^>]*>.*?</table>)',
                page, re.DOTALL)
            if not match:
                raise ExceptionParseStandings('Not found problems table')
            table = parsed_table.ParsedTable(html=match.group('table'),
                                             ignore_wrong_header_number=False)
            skip = False
            problems_infos = collections.OrderedDict()
            for r in table:
                if isinstance(r, parsed_table.ParsedTableRow):
                    runda = re.sub(r'\s*\(.*\)\s*$', '',
                                   r.columns[0].value).strip()
                    skip = runda.lower() not in self.name.lower()
                    continue

                if skip:
                    continue

                problem_info = {}
                for k, vs in list(r.items()):
                    if isinstance(vs, list):
                        v = ' '.join([v.value for v in vs]).strip()
                    else:
                        v = vs.value
                    if not k:
                        problem_info['short'] = v
                    elif k in ('Nazwa', 'Name'):
                        match = re.search(r'\[(?P<letter>[^\]]+)\]$', v)
                        if match:
                            problem_info['_letter'] = match.group('letter')
                        problem_info['name'] = v
                        href = vs.column.node.xpath('//a/@href')
                        if href:
                            problem_info['url'] = urljoin(problem_url, href[0])
                if problem_info:
                    problems_infos[problem_info['short']] = problem_info
            return problems_infos

        problems_infos = parse_problems_infos()

        result = {}

        page = 1
        while page is not None:
            content = REQ.get(self.standings_url + f'?page={page}')

            matches = re.finditer(
                r'<a[^>]*href="[^"]*\?page=(?P<page>[0-9]+)"[^>]*>', content)
            next_page = None
            for match in matches:
                p = int(match.group('page'))
                if p > page and (next_page is None or p < next_page):
                    next_page = p
            page = next_page

            table = parsed_table.ParsedTable(
                html=content,
                xpath="//table[contains(@class,'table-ranking')]//tr")
            for r in table:
                row = collections.OrderedDict()
                problems = row.setdefault('problems', {})
                row['solving'] = 0
                for k, v in list(r.items()):
                    if k == '#':
                        row['place'] = v.value
                    elif k == 'User':
                        row['name'] = v.value
                        rid = v.row.node.xpath('@id')[0]
                        match = re.match('^ranking_row_(?P<id>[0-9]+)$', rid)
                        member = match.group('id')
                        row['member'] = member
                    elif k in problems_infos and v.value:
                        problems[k] = {'result': v.value}
                        row['solving'] += as_number(v.value)
                if not problems:
                    continue
                result[row['member']] = row

        last = None
        for idx, row in enumerate(sorted(result.values(),
                                         key=lambda r: -r['solving']),
                                  start=1):
            if last != row['solving']:
                last = row['solving']
                rank = idx
            row['place'] = rank

        problems_infos = list(problems_infos.values())
        if all('_letter' in p for p in problems_infos):
            problems_infos.sort(key=lambda p: p['_letter'])
        for p in problems_infos:
            p.pop('_letter', None)

        ret = {
            'result': result,
            'problems': problems_infos,
        }
        return ret