예제 #1
0
    def get_standings(self, users=None, statistics=None):
        # REQ.get('https://www.codechef.com/')

        # try:
        #     form = REQ.form()
        #     form['post'].update({
        #         'name': self._username,
        #         'pass': self._password,
        #     })
        #     page = REQ.get(form['url'], post=form['post'])

        #     form = REQ.form()
        #     if form['url'] == '/session/limit':
        #         for field in form['unchecked'][:-1]:
        #             form['post'][field['name']] = field['value'].encode('utf8')
        #         page = REQ.get(form['url'], post=form['post'])
        # except Exception:
        #     pass

        url = self.API_CONTEST_URL_FORMAT_.format(**self.__dict__)
        page = REQ.get(url)
        data = json.loads(page)
        if data['status'] != 'success':
            raise ExceptionParseStandings(json.dumps(data))
        if 'child_contests' in data:
            contest_infos = {
                d['contest_code']: {
                    'division': k
                }
                for k, d in data['child_contests'].items()
            }
        else:
            contest_infos = {self.key: {}}

        result = {}

        problems_info = dict() if len(contest_infos) > 1 else list()
        hidden_fields = set()

        for key, contest_info in contest_infos.items():
            url = self.STANDINGS_URL_FORMAT_.format(key=key)
            page = REQ.get(url)
            match = re.search(
                '<input[^>]*name="csrfToken"[^>]*id="edit-csrfToken"[^>]*value="([^"]*)"',
                page)
            csrf_token = match.group(1)

            n_page = 0
            per_page = 150
            n_total_page = None
            pbar = None
            contest_type = None
            while n_total_page is None or n_page < n_total_page:
                n_page += 1
                time.sleep(2)
                url = self.API_RANKING_URL_FORMAT_.format(key=key,
                                                          page=n_page,
                                                          per_page=per_page)

                if users:
                    urls = [f'{url}&search={user}' for user in users]
                else:
                    urls = [url]

                for url in urls:
                    delay = 5
                    for _ in range(10):
                        try:
                            headers = {
                                'x-csrf-token': csrf_token,
                                'x-requested-with': 'XMLHttpRequest',
                            }
                            page = REQ.get(url, headers=headers)
                            data = json.loads(page)
                            assert data.get('status') != 'rate_limit_exceeded'
                            break
                        except Exception:
                            traceback.print_exc()
                            delay = min(300, delay * 2)
                            sys.stdout.write(f'url = {url}\n')
                            sys.stdout.write(f'Sleep {delay}... ')
                            sys.stdout.flush()
                            time.sleep(delay)
                            sys.stdout.write('Done\n')
                    else:
                        raise ExceptionParseStandings(
                            f'Failed getting {n_page} by url {url}')

                    if 'status' in data and data['status'] != 'success':
                        raise ExceptionParseStandings(json.dumps(data))

                    unscored_problems = data['contest_info'][
                        'unscored_problems']

                    if n_total_page is None:
                        for p in data['problems']:
                            if p['code'] in unscored_problems:
                                continue
                            d = problems_info
                            if 'division' in contest_info:
                                d = d.setdefault('division', OrderedDict())
                                d = d.setdefault(contest_info['division'], [])
                            d.append({
                                'short':
                                p['code'],
                                'name':
                                p['name'],
                                'url':
                                f"https://www.codechef.com/problems/{p['code']}",
                            })
                        n_total_page = data['availablePages']
                        pbar = tqdm.tqdm(total=n_total_page * len(urls))
                        contest_type = data['contest_info'].get('type')

                    for d in data['list']:
                        handle = d.pop('user_handle')
                        d.pop('html_handle', None)
                        problems_status = d.pop('problems_status')
                        if d['score'] < 1e-9 and not problems_status:
                            LOG.warning(f'Skip handle = {handle}: {d}')
                            continue
                        row = result.setdefault(handle, OrderedDict())

                        row['member'] = handle
                        row['place'] = d.pop('rank')
                        row['solving'] = d.pop('score')
                        for k in 'time', 'total_time':
                            if k in d:
                                row['time'] = d.pop(k)
                                break

                        problems = row.setdefault('problems', {})
                        solved, upsolved = 0, 0
                        if problems_status:
                            for k, v in problems_status.items():
                                t = 'upsolving' if k in unscored_problems else 'result'
                                v[t] = v.pop('score')
                                solved += 1 if v.get('result', 0) > 0 else 0
                                upsolved += 1 if v.get('upsolving',
                                                       0) > 0 else 0

                                if contest_type == '1' and 'penalty' in v:
                                    penalty = v.pop('penalty')
                                    if v[t] > 0:
                                        v[t] = f'+{"" if penalty == 0 else penalty}'
                                    else:
                                        v[t] = f'-{penalty}'

                                problems[k] = v
                            row['solved'] = {
                                'solving': solved,
                                'upsolving': upsolved
                            }
                        country = d.pop('country_code')
                        if country:
                            d['country'] = country

                        rating = d.pop('rating', None)
                        if rating and rating != '0':
                            hidden_fields.add('rating')
                            row['rating'] = rating

                        row.update(d)
                        row.update(contest_info)
                        if statistics and handle in statistics:
                            stat = statistics[handle]
                            for k in ('rating_change', 'new_rating'):
                                if k in stat:
                                    row[k] = stat[k]
                        hidden_fields |= set(list(d.keys()))
                    pbar.set_description(f'key={key} url={url}')
                    pbar.update()

            has_penalty = False
            for row in result.values():
                p = row.get('penalty')
                has_penalty = has_penalty or p and str(p) != "0"
            if not has_penalty:
                for row in result.values():
                    row.pop('penalty', None)

            if pbar is not None:
                pbar.close()

        standings = {
            'result': result,
            'url': self.url,
            'problems': problems_info,
            'hidden_fields': list(hidden_fields),
        }
        return standings
예제 #2
0
    def get_standings(self, users=None, statistics=None):
        year = self.start_time.year
        year = year if self.start_time.month >= 9 else year - 1
        season = '%d-%d' % (year, year + 1)

        page = REQ.get(self.url)
        match = re.search(
            r'''<a[^>]*href=["']?(?P<href>[^"' ]*rating[^"' ]*)["']?[^>]*>\[Рейтинг\]''',
            page)

        if not match and re.search(
                r'''<b>Олимпиада №[0-9]+ не существует!</b>''', page):
            return {'action': 'delete'}

        page = REQ.get(match.group('href'))
        standings_url = REQ.last_url

        match = re.search(
            r'''var(?P<vars>(?:\s*[a-z]+=[0-9]+,)+)\s*M=(?:new Array)?[\[\(]?(?P<data>.*?)[\]\)]\s*(?:function|var)''',
            page)  # noqa

        result = {}
        problems_info = OrderedDict()

        def canonize_name(name):
            name = name.replace('\r', ' ')
            name = name.replace('\n', ' ')
            name = re.sub(r'\s+', ' ', name)
            name = re.sub(r'<br/?>', ',', name)
            name = re.sub(r'<[^>]*>', '', name)
            name = re.sub(r'\s*,\s*', ', ', name)
            name = name.strip()
            return name

        if match:
            data = match.group('data')
            data = data.replace('\\', '\\\\')
            data = data.replace('"', r'\"')
            data = data.replace("'", '"')
            data = re.sub(r'\s+', ' ', data)
            data = json.loads(f'[{data}]')

            variables = {}
            for var in re.split(r',\s*', match.group('vars').strip()):
                if not var:
                    continue
                k, v = var.split('=')
                variables[k] = v

            match = re.search(r'''M\[\((?P<val>[0-9]+)\+''', page)
            offset = int(match.group('val'))

            n_problems = int(variables['tn'])
            n_teams = int(variables['nk'])
            n_fields = offset + 3 * n_problems
            place = 0
            last = None
            for rank, st in enumerate(range(0, n_teams * n_fields, n_fields),
                                      start=1):
                row = data[st:st + n_fields]

                name = canonize_name(row[0])

                member = name + ', ' + season

                r = result.setdefault(member, {})

                r['name'] = name
                r['member'] = member
                r['solving'] = int(row[1])
                r['penalty'] = int(row[2])

                score = r['solving'], r['penalty']
                if score != last:
                    place = rank
                    last = score
                r['place'] = place

                n_problems_fields = 3
                problems = r.setdefault('problems', {})
                for idx in range(0, n_problems):
                    p_info = row[offset + idx * n_problems_fields:offset +
                                 (idx + 1) * n_problems_fields]
                    stat, errors, seconds = map(int, p_info)
                    key = chr(ord('A') +
                              idx) if n_problems < 27 else f'{idx + 1:02d}'

                    if key not in problems_info:
                        info = {'short': key}
                        if abs(errors) >= 1000:
                            info['full_score'] = 100
                        problems_info[key] = info

                    if not stat:
                        continue
                    p = problems.setdefault(key, {})
                    p['time'] = self.to_time(seconds, num=2)
                    if abs(errors) < 1000:
                        p['result'] = f'+{errors if errors else ""}' if stat == 1 else f'-{errors}'
                    else:
                        solved = r.setdefault('solved', {'solving': 0})
                        score = errors - 1000
                        p['result'] = score
                        if score > 0:
                            p['partial'] = score < problems_info[key][
                                'full_score']
                            if not p['partial']:
                                solved['solving'] += 1

                if not problems:
                    result.pop(member)
        else:
            regex = '''<table[^>]*class=["']?olimp["']?[^>]*>.*?</table>'''
            match = re.search(regex, page, re.DOTALL)
            if not match and 'Рейтинг олимпиады' not in page:
                return {'action': 'delete'}
            table = parsed_table.ParsedTable(match.group(0))

            for row in table:
                r = OrderedDict()
                problems = r.setdefault('problems', {})
                for k, v in list(row.items()):
                    if k == '=':
                        r['solving'] = int(v.value)
                    elif k == 'Место':
                        r['place'] = int(v.value)
                    elif k == 'Время':
                        r['penalty'] = int(v.value)
                    elif k == 'Участник':
                        name = canonize_name(v.value)
                        r['name'] = name
                        r['member'] = name + ', ' + season
                    elif len(k) == 1 and k not in ['№']:
                        if k not in problems_info:
                            info = {'short': k}
                            problems_info[k] = info
                        if v.value != DOT:
                            p = problems.setdefault(k, {})
                            p['result'], *values = v.value.split()
                            if values:
                                p['time'] = values[0]
                if not problems:
                    continue

                result[r['member']] = r

        standings = {
            'result': result,
            'url': standings_url,
            'problems': list(problems_info.values()),
        }
        return standings
예제 #3
0
    def get_standings(self, users=None, statistics=None):
        standings_url = self.standings_url or self.RANKING_URL_FORMAT_.format(
            **self.__dict__)

        api_ranking_url_format = self.API_RANKING_URL_FORMAT_.format(
            **self.__dict__)
        url = api_ranking_url_format.format(1)
        content = REQ.get(url)
        data = json.loads(content)
        if not data:
            return {'result': {}, 'url': standings_url}
        n_page = (data['user_num'] - 1) // len(data['total_rank']) + 1

        problems_info = [{
            'short': f'Q{i + 1}',
            'name': p['title']
        } for i, p in enumerate(data['questions'])]

        def fetch_page(page):
            url = api_ranking_url_format.format(page + 1)
            content = REQ.get(url)
            return json.loads(content)

        start_time = self.start_time.replace(tzinfo=None)
        result = {}
        with PoolExecutor(max_workers=8) as executor:
            for data in executor.map(fetch_page, range(n_page)):
                for row, submissions in zip(data['total_rank'],
                                            data['submissions']):
                    if not submissions:
                        continue
                    handle = row.pop('username')
                    if users and handle not in users:
                        continue
                    row.pop('contest_id')
                    row.pop('user_slug')
                    row.pop('global_ranking')

                    r = result.setdefault(handle, {})
                    r['member'] = handle
                    r['place'] = row.pop('rank')
                    r['solving'] = row.pop('score')

                    data_region = row.pop('data_region').lower()
                    r['info'] = {
                        'profile_url': {
                            '_data_region':
                            '' if data_region == 'us' else f'-{data_region}'
                        }
                    }

                    country = None
                    for field in 'country_code', 'country_name':
                        country = country or row.pop(field, None)
                    if country:
                        r['country'] = country

                    solved = 0
                    problems = r.setdefault('problems', {})
                    for i, (k, s) in enumerate(submissions.items()):
                        p = problems.setdefault(f'Q{i + 1}', {})
                        p['time'] = self.to_time(
                            datetime.fromtimestamp(s['date']) - start_time)
                        if s['status'] == 10:
                            solved += 1
                            p['result'] = '+' + str(s['fail_count'] or '')
                        else:
                            p['result'] = f'-{s["fail_count"]}'
                    r['solved'] = {'solving': solved}
                    finish_time = datetime.fromtimestamp(
                        row.pop('finish_time')) - start_time
                    r['penalty'] = self.to_time(finish_time)
                    r.update(row)

        standings = {
            'result': result,
            'url': standings_url,
            'problems': problems_info,
        }
        return standings
예제 #4
0
    def get_users_infos(users, resource, accounts, pbar=None):

        page = REQ.get(urljoin(resource.profile_url, Statistic.SETTINGS_URL_))
        form = REQ.form(action=r'login.php\?action=login')
        if form:
            data = {
                'username': conf.BESTCODER_AUTHORID,
                'password': conf.BESTCODER_PASSWORD,
                'remember': 'on',
            }
            page = REQ.submit_form(data=data, form=form)

        match = re.search('<select[^>]*id="country"[^>]*>.*?</select>', page,
                          re.DOTALL)
        countries = dict(
            re.findall('<option[^>]*value="([0-9]+)"[^>]*>([^<]*)</option>',
                       match.group(0)))

        @RateLimiter(max_calls=5, period=1)
        def fetch_user(user):
            url = resource.profile_url.format(account=user)
            page = REQ.get(url)

            info = {}

            matches = re.findall(
                r'<span[^>]*>([A-Z]+)</span>\s*<span[^>]*>([0-9]+)</span>',
                page)
            for k, v in matches:
                info[k.lower()] = int(v)

            match = re.search(
                '<img[^>]*src="[^"]*country[^"]*([0-9]+)[^"]*"[^>]*alt="country"[^>]*>',
                page)
            if match:
                info['country'] = countries.get(match.group(1))

            match = re.search(
                '<img[^>]*class="img-circle"[^>]*src="([^"]*getAvatar.php[^"]*)"[^>]*>',
                page)
            if match:
                info['avatar_url'] = urljoin(url, match.group(1))

            page = REQ.get(Statistic.USER_RATING_API_URL_.format(user))
            data = json.loads(page)
            ratings = {}
            old_rating = None
            for stat in data:
                rating = ratings.setdefault(stat['contestid'],
                                            collections.OrderedDict())
                new_rating = int(stat['rating'])
                if old_rating is not None:
                    rating['old_rating'] = old_rating
                    rating['rating_change'] = new_rating - old_rating
                rating['new_rating'] = new_rating
                old_rating = new_rating
                info['rating'] = new_rating

            if not ratings:
                info.pop('rating', None)

            return user, info, ratings

        with PoolExecutor(max_workers=8) as executor:
            for user, info, ratings in executor.map(fetch_user, users):
                if pbar:
                    pbar.update()
                if not info:
                    if info is None:
                        yield {'info': None}
                    else:
                        yield {'skip': True}
                    continue
                info = {
                    'info': info,
                    'contest_addition_update_params': {
                        'update': ratings,
                        'by': 'key',
                    },
                }
                yield info
예제 #5
0
    def get_standings(self, users=None, statistics=None):

        page = REQ.get(self.url)
        if 'login.php' in REQ.last_url:
            raise ExceptionParseStandings('private contest')

        table = parsed_table.ParsedTable(
            html=page, xpath='.//table[@id="contest-problems"]//tr')

        problems_infos = collections.OrderedDict()
        for r in table:
            p_info = {
                'short': r['Pro.ID'].value,
                'name': r['Title'].value,
            }
            href = r['Title'].column.node.xpath('.//a/@href')
            if href:
                p_info['url'] = urljoin(self.url, href[0])
            problems_infos[p_info['short']] = p_info

        standings_url = urljoin(
            self.url, self.STANDINGS_URL_FORMAT_.format(key=self.key))
        page = REQ.get(standings_url)
        matches = re.findall('"[^"]*contest_ranklist[^"]*page=([0-9]+)', page)
        n_pages = max(map(int, matches)) if matches else 1

        def fetch_page(page):
            url = f'{standings_url}&page={page + 1}'
            return REQ.get(url)

        results = {}
        header_mapping = {
            'Rank': 'place',
            'User': '******',
            'Score': 'solving',
            'Hack': 'hack'
        }
        with PoolExecutor(max_workers=4) as executor, tqdm.tqdm(
                total=n_pages, desc='paging') as pbar:
            for page in executor.map(fetch_page, range(n_pages)):
                table = parsed_table.ParsedTable(
                    html=page,
                    xpath='.//table[@id="contest-ranklist"]//tr',
                    header_mapping=header_mapping)
                for r in table:
                    row = collections.OrderedDict()
                    problems = row.setdefault('problems', {})
                    for k, v in r.items():
                        p = k.split()
                        if p[0] not in problems_infos:
                            row[k] = v.value
                            continue
                        short, full_score = p
                        problems_infos[short].setdefault(
                            'full_score', full_score)
                        if not v.value:
                            continue

                        p = problems.setdefault(short, {})
                        score, *info = v.value.split()
                        p['result'] = score
                        if score.startswith('-'):
                            continue

                        if 'ondblclick' in v.column.attrs:
                            ondblclick = v.column.attrs['ondblclick']
                            ids = re.findall('[0-9]+', ondblclick)
                            if len(ids) == 2:
                                url = urljoin(
                                    self.url,
                                    self.SOLUTION_URL_FORMAT_.format(*ids))
                                p['url'] = url
                                p['external_solution'] = True

                        *info, p['time'] = info
                        if info and info[0] == '(':
                            m = re.search('-([0-9]+)', info[1])
                            if m:
                                p['penalty_score'] = m.group(1)
                            info = info[3:]
                    if not problems:
                        continue

                    hack = row.pop('hack')
                    if hack:
                        row['hack'] = {'title': 'hacks'}
                        m = re.search(r'\+[0-9]+', hack)
                        row['hack']['successful'] = int(m.group(0)) if m else 0
                        m = re.search(r'\-[0-9]+', hack)
                        row['hack']['unsuccessful'] = -int(
                            m.group(0)) if m else 0

                    handle = row['member']
                    if statistics and handle in statistics:
                        stat = statistics[handle]
                        for k in ('old_rating', 'rating_change', 'new_rating'):
                            if k in stat:
                                row[k] = stat[k]

                    results[handle] = row
                pbar.update()

        ret = {
            'url': standings_url,
            'problems': list(problems_infos.values()),
            'result': results,
            'options': {
                'fixed_fields': [('hack', 'Hack')],
            },
        }
        return ret
예제 #6
0
    def get_users_infos(users, resource=None, accounts=None, pbar=None):
        handles = ';'.join(users)

        len_limit = 1000
        if len(handles) > len_limit:
            s = 0
            for i in range(len(users)):
                s += len(users[i])
                if s > len_limit:
                    return Statistic.get_users_infos(
                        users[:i], pbar) + Statistic.get_users_infos(
                            users[i:], pbar)

        removed = []
        last_index = 0
        orig_users = list(users)
        while True:
            handles = ';'.join(users)
            data = _query(method='user.info', params={'handles': handles})
            if data['status'] == 'OK':
                break
            if data['status'] == 'FAILED' and data['comment'].startswith(
                    'handles: User with handle'):
                handle = data['comment'].split()[-3]
                location = REQ.geturl(
                    f'https://codeforces.com/profile/{handle}')
                index = users.index(handle)
                if location.endswith('//codeforces.com/'):
                    removed.append((index, users[index]))
                    users.pop(index)
                else:
                    target = location.rstrip('/').split('/')[-1]
                    users[index] = target
                if pbar is not None:
                    pbar.update(index - last_index)
                    last_index = index
            else:
                raise NameError(f'data = {data}')
        if pbar is not None:
            pbar.update(len(users) - last_index)

        infos = data['result']
        for index, user in removed:
            infos.insert(index, None)
            users.insert(index, user)

        ret = []
        assert len(infos) == len(users)
        for data, user, orig in zip(infos, users, orig_users):
            if data:
                if data['handle'].lower() != user.lower():
                    raise ValueError(
                        f'Do not match handle name for user = {user} and data = {data}'
                    )
                if data.get('avatar', '').endswith('/no-avatar.jpg'):
                    data.pop('avatar')
                if data.get('titlePhoto', '').endswith('/no-title.jpg'):
                    data.pop('titlePhoto')
            ret.append({'info': data})
            if data and data['handle'] != orig:
                ret[-1]['rename'] = data['handle']
        return ret
예제 #7
0
    def _hashcode(self, users=None, statistics=None):
        standings_url = None
        is_final_round = self.name.endswith('Final Round')
        page = REQ.get(
            self.ARCHIVE_DATA_URL_FORMAT_.format(year=self.start_time.year))
        data = json.loads(page)
        names = set()
        for data_round in data['rounds']:
            name = data_round['name']
            if name in names:
                name = 'Qualification Round'
            if self.name.endswith(name) or name in [
                    'Full ranking', 'Main round'
            ] and is_final_round:
                data = data_round['data']
                standings_url = self.ARCHIVE_URL_FORMAT_.format(
                    year=self.start_time.year)
                break
            names.add(name)
        else:
            data = None

        if not data:
            if 'hashcode_scoreboard' in self.info:
                page = REQ.get(self.info['hashcode_scoreboard'])
                data = json.loads(page)
            else:
                raise ExceptionParseStandings('Not found data')

        if 'columns' in data:
            columns = data['columns']
            data = data['rows']
        else:
            columns = None

        result = {}
        season = self.get_season()
        for rank, row in enumerate(data, start=1):
            if columns is not None:
                row = dict(zip(columns, row))
            row = {k.lower().replace(' ', ''): v for k, v in row.items()}

            name = row.pop('teamname')
            name = unescape(name)
            member = f'{name}, {season}'

            if users is not None and name not in users:
                continue

            r = result.setdefault(member, {})
            r['name'] = name
            r['member'] = member

            score = row.pop('score', '0')
            score = re.sub(r'[\s,]', '', str(score))
            try:
                float(score)
            except Exception:
                score = '0'
            r['solving'] = score

            if 'rank' in row:
                r['place'] = row.pop('rank')
            else:
                r['place'] = rank

            if 'country' in row:
                r['_countries'] = re.sub(r',\s+', ',',
                                         row.pop('country')).split(',')
            elif 'countries' in row:
                r['_countries'] = row.pop('countries')

            if 'finalround' in row:
                r['advanced'] = row['finalround']

        standings = {
            'result': result,
            'problems': [],
        }

        if standings_url:
            standings['url'] = standings_url

        return standings
예제 #8
0
 def fetch_data(handle):
     url = f'{self.HISTORY_URL_.format(urlparse(self.url), handle)}/json'
     data = json.loads(REQ.get(url))
     return handle, data
예제 #9
0
    def get_standings(self, users=None, statistics=None):
        year = self.start_time.year
        year = year if self.start_time.month >= 9 else year - 1
        season = '%d-%d' % (year, year + 1)

        ret = {}

        page = REQ.get(self.url)
        match = re.search(
            rf'<h2>[^<]*Day\s*[0-9]+:\s*(?P<problem_name>[^<]*)</h2>', page)
        problem_name = match.group('problem_name').strip('-').strip()

        if self.name.count('.') == 1 and problem_name:
            ret['title'] = f'{self.name}. {problem_name}'

        standings_url = self.standings_url or self.url.replace(
            '/day/', '/leaderboard/day/')
        page = REQ.get(standings_url)

        matches = re.finditer(
            r'''
            <div[^>]*class="leaderboard-entry"[^>]*>\s*
                <span[^>]*class="leaderboard-position"[^>]*>\s*(?P<rank>[0-9]+)[^<]*</span>\s*
                <span[^>]*class="leaderboard-time"[^>]*>(?P<time>[^<]*)</span>\s*
                (?:<a[^>]*href="(?P<href>[^"]*)"[^>]*>\s*)?
                <span[^>]*class="leaderboard-userphoto"[^>]*>(\s*<img[^>]*src="(?P<avatar>[^"]*)"[^>]*>)?[^<]*</span>\s*
                (?:<span[^>]*class="leaderboard-anon"[^>]*>)?(?P<name>[^<]*)
            ''', page, re.VERBOSE)

        problems_info = OrderedDict()

        result = {}
        last = None
        n_problems = 0
        n_results = 0
        for match in matches:
            n_results += 1
            href = match.group('href')
            name = html.unescape(match.group('name')).strip()
            if href:
                handle = href.split('//')[-1].strip('/')
            elif re.match(r'^\(anonymous user #[0-9]+\)$', name):
                handle = name
            else:
                handle = f'{name}, {season}'
            handle = handle.replace('/', '-')

            rank = int(match.group('rank'))
            if last is None or last >= rank:
                n_problems += 1
            last = rank

            row = result.setdefault(handle, {
                'solving': 0,
                '_skip_for_problem_stat': True
            })
            score = 100 - rank + 1
            row['solving'] += score
            row['name'] = name
            row['member'] = handle

            avatar = match.group('avatar')
            if avatar:
                row['info'] = {'avatar': avatar}

            k = str(n_problems)
            if k not in problems_info:
                problems_info[k] = {
                    'name': problem_name,
                    'code': k,
                    'url': self.url,
                    'group': 0,
                    'full_score': 100
                }

            problem = row.setdefault('problems', {}).setdefault(k, {})
            problem['result'] = score
            time = f'''{self.start_time.year} {match.group('time')} -05:00'''
            problem['time'] = self.to_time(
                arrow.get(time, 'YYYY MMM D  HH:mm:ss ZZ') - self.start_time)
            if rank == 1:
                problem['first_ac'] = True

        problems = list(reversed(problems_info.values()))
        problems[0]['subname'] = 'first star'
        if len(problems) > 1:
            problems[1]['subname'] = 'both stars'

        place = None
        last = None
        for rank, row in enumerate(sorted(result.values(),
                                          key=lambda r: -r['solving']),
                                   start=1):
            score = row['solving']
            if last != score:
                place = rank
                last = score
            row['place'] = place

        ret.update({
            'result': result,
            'url': standings_url,
            'problems': problems,
        })
        if n_results < 200:
            ret['timing_statistic_delta'] = timedelta(minutes=5)
        return ret
예제 #10
0
 def fetch_page(page_index):
     url = self.standings_url
     if page_index:
         url += f'?page={page_index}'
     return REQ.get(url), url
예제 #11
0
파일: neerc_ifmo.py 프로젝트: aropan/clist
    def get_standings(self, users=None, statistics=None):
        year = self.start_time.year
        year = year if self.start_time.month >= 9 else year - 1
        season = '%d-%d' % (year, year + 1)

        result = {}
        problems_info = OrderedDict()

        page = REQ.get(self.standings_url)

        try:
            standings_xml = REQ.get(self.standings_url.replace(
                '.html', '.xml'),
                                    detect_charsets=False)
            xml_result = parse_xml(standings_xml)
        except FailOnGetResponse:
            xml_result = {}

        regex = '<table[^>]*class="standings"[^>]*>.*?</table>'
        match = re.search(regex, page, re.DOTALL)
        if not match:
            page = re.sub('<table[^>]*wrapper[^>]*>', '', page)
            regex = '<table[^>]*>.*?</table>'
            match = re.search(regex, page, re.DOTALL)
        html_table = match.group(0)
        table = parsed_table.ParsedTable(html_table, as_list=True)

        university_regex = self.info.get('standings', {}).get('1st_u',
                                                              {}).get('regex')
        for r in table:
            row = {}
            problems = row.setdefault('problems', {})
            for k, v in r:
                k = k.split()[0]
                if k == 'Total' or k == '=':
                    row['solving'] = int(v.value)
                elif len(k) <= 3:
                    problems_info[k] = {'short': k}
                    if 'title' in v.attrs:
                        problems_info[k]['name'] = v.attrs['title']

                    if '-' in v.value or '+' in v.value or '?' in v.value:
                        p = problems.setdefault(k, {})
                        if ' ' in v.value:
                            point, time = v.value.split()
                        else:
                            point = v.value
                            time = None
                        if 'result' in p and point != p.get('result'):
                            p.clear()
                        p['result'] = point
                        if time is not None:
                            p['time'] = time

                        first_ac = v.column.node.xpath(
                            './/*[@class="first-to-solve"]')
                        if len(first_ac):
                            p['first_ac'] = True
                elif k == 'Time':
                    row['penalty'] = int(v.value)
                elif k.lower() in ['place', 'rank']:
                    row['place'] = v.value.strip('.')
                elif 'team' in k.lower() or 'name' in k.lower():
                    if xml_result:
                        problems.update(xml_result[v.value])
                    row['member'] = v.value + ' ' + season
                    row['name'] = v.value
                else:
                    row[k] = v.value
            for f in 'diploma', 'medal':
                medal = row.pop(f, None) or row.pop(f.title(), None)
                if medal:
                    if medal in ['З', 'G']:
                        row['medal'] = 'gold'
                    elif medal in ['С', 'S']:
                        row['medal'] = 'silver'
                    elif medal in ['Б', 'B']:
                        row['medal'] = 'bronze'
                    break
            if university_regex:
                match = re.search(university_regex, row['name'])
                if match:
                    u = match.group('key').strip()
                    row['university'] = u
            result[row['member']] = row

        if statistics and self.info.get('use_icpc.kimden.online'):
            team_regions = {}

            def canonize_name(name):
                name = re.sub(':', '', name)
                name = re.sub(r'\s+', ' ', name)
                return name

            def get_region(team_name):
                nonlocal team_regions
                if not team_regions:
                    page = REQ.get('https://icpc.kimden.online/')
                    matches = re.finditer(
                        '<label[^>]*for="(?P<selector>[^"]*)"[^"]*onclick="setRegion[^"]*"[^>]*>(?P<name>[^>]*)</',
                        page,
                    )
                    regions = {}
                    for match in matches:
                        selector = match.group('selector').replace(
                            'selector', '').replace('--', '-')
                        regions[selector] = match.group('name')
                    pprint(regions)

                    matches = re.finditer(
                        r'''
                        <tr[^>]*class="(?P<class>[^"]*)"[^>]*>\s*<td[^>]*>[^<]*</td>\s*<td[^>]*title="(?P<name>[^"]*)">[^<]*</td>
                        ''',
                        page,
                        re.VERBOSE,
                    )

                    for match in matches:
                        classes = match.group('class').split()
                        name = match.group('name')
                        name = canonize_name(name)
                        for c in classes:
                            if c in regions:
                                team_regions[name] = regions[c]
                                break
                team_name = canonize_name(team_name)
                return team_regions[team_name]

            for row in result.values():
                stat = statistics.get(row['member'])
                if not stat:
                    continue
                if stat.get('region'):
                    row['region'] = stat['region']
                else:
                    row['region'] = get_region(row['name'])

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
            'problems_time_format': '{M}:{s:02d}',
            'hidden_fields': ['university', 'region', 'medal'],
        }
        return standings
예제 #12
0
    def get_standings(self, users=None, statistics=None):

        result = {}
        problems_info = OrderedDict()

        page = REQ.get(self.standings_url)
        match = re.findall('<a[^>]href="[^"]*page=[0-9]+"[^>]*>(?P<n_page>[0-9]+)</a>', page)
        n_page = 1 if not match else int(match[-1])

        def fetch_page(page_index):
            url = self.standings_url
            if page_index:
                url += f'?page={page_index}'
            return REQ.get(url), url

        place = 0
        idx = 0
        prev = None
        with PoolExecutor(max_workers=8) as executor, tqdm.tqdm(total=n_page, desc='fetch pages') as pbar:
            for page, url in executor.map(fetch_page, range(n_page)):
                pbar.set_postfix(url=url)
                pbar.update(1)

                regex = '<table[^>]*>.*?</table>'
                match = re.search(regex, page, re.DOTALL)
                html_table = match.group(0)
                table = parsed_table.ParsedTable(html_table)
                for r in table:
                    idx += 1
                    row = {}
                    problems = row.setdefault('problems', {})
                    for k, v in list(r.items()):
                        k = k.split()[0]
                        if k.lower() == 'score':
                            solving, *a = v.value.split()
                            row['solving'] = int(solving)
                            if a:
                                row['penalty'] = int(re.sub(r'[\(\)]', '', a[0]))
                        elif len(k) == 1:
                            if k not in problems_info:
                                problems_info[k] = {'short': k}
                                title = first(v.header.node.xpath('a[@title]/@title'))
                                url = first(v.header.node.xpath('a[@href]/@href'))
                                if title:
                                    problems_info[k]['name'] = title
                                if url:
                                    problems_info[k]['url'] = urllib.parse.urljoin(self.standings_url, url)

                            if '-' in v.value or '+' in v.value:
                                p = problems.setdefault(k, {})
                                if ' ' in v.value:
                                    point, time = v.value.split()
                                    p['time'] = time
                                else:
                                    point = v.value
                                if point == '+0':
                                    point = '+'
                                p['result'] = point
                            elif v.value.isdigit():
                                p = problems.setdefault(k, {})
                                p['result'] = v.value
                        elif k.lower() == 'user':
                            row['member'] = v.value
                        else:
                            row[k] = v.value

                    if 'penalty' not in row:
                        solved = [p for p in list(problems.values()) if p['result'] == '100']
                        row['solved'] = {'solving': len(solved)}

                    curr = (row['solving'], row.get('penalty'))
                    if prev is None or prev != curr:
                        place = idx
                        prev = curr
                    row['place'] = place

                    result[row['member']] = row

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
        }
        return standings
예제 #13
0
    def get_standings(self, users=None, statistics=None):
        result = {}
        problems_info = OrderedDict()
        year = self.start_time.year

        if not self.standings_url:
            self.standings_url = self.url.replace('/olympiads/', '/results/')

        page = REQ.get(self.standings_url)
        regex = '<table[^>]*>.*?</table>'
        html_table = re.search(regex, page, re.DOTALL).group(0)
        table = parsed_table.ParsedTable(html_table, as_list=True)

        idx = 0
        for r in table:
            row = OrderedDict()
            problems = row.setdefault('problems', {})
            problem_idx = 0
            for k, v in r:
                if 'taskscore' in v.header.attrs.get('class', '').split():
                    problem_idx += 1
                    d = problems_info.setdefault(problem_idx, {})
                    d['short'] = str(problem_idx)
                    d['full_score'] = 100
                    d['name'] = k
                    try:
                        score = float(v.value)
                        p = problems.setdefault(str(problem_idx), {})
                        p['result'] = v.value
                        p['partial'] = score < 100
                    except Exception:
                        pass
                elif k == 'Abs.':
                    row['solving'] = float(v.value)
                elif k == 'Rank':
                    row['place'] = v.value.strip('*').strip('.')
                elif k == 'Contestant':
                    if not v.value:
                        idx += 1
                        member = f'{year}-{idx:06d}'
                        row['member'] = member
                    else:
                        url = first(v.column.node.xpath('a[@href]/@href'))
                        member = url.strip('/').split('/')[-1]
                        row['member'] = member
                        row['name'] = v.value
                elif k == 'Country':
                    country = re.sub(r'\s*[0-9]+$', '', v.value)
                    if country:
                        row['country'] = country
                else:
                    val = v.value.strip()
                    if val:
                        row[k] = val
            result[row['member']] = row

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
        }
        return standings
예제 #14
0
    def get_standings(self, users=None, statistics=None):

        standings_url = self.url.rstrip('/') + '/leaderboard'

        per_page = 100
        if '/contests/' in self.url:
            api_standings_url_format = standings_url.replace(
                '/contests/', '/rest/contests/')
            api_standings_url_format += '?offset={offset}&limit={limit}&include_practice=true'
        elif '/competitions/' in self.url:
            url = self.host + f'api/hrw/resources/{self.key}?include=leaderboard'
            page = REQ.get(url)
            data = json.loads(page)
            entry_id = data['included'][0]['id']
            api_standings_url_format = self.host + f'api/hrw/resources/leaderboards/{entry_id}/leaderboard_entries'
            api_standings_url_format += '?page[limit]={limit}&page[offset]={offset}'
        else:
            raise ExceptionParseStandings(f'Unusual url = {self.url}')

        @RateLimiter(max_calls=1, period=2)
        def fetch_page(page):
            offset = (page - 1) * per_page
            url = api_standings_url_format.format(offset=offset,
                                                  limit=per_page)
            page = Statistic.get(url)
            data = json.loads(page)
            return data

        result = {}
        hidden_fields = set()
        schools = dict()

        def process_data(data):
            rows = data['models'] if 'models' in data else data['data']

            school_ids = set()
            for r in rows:
                if isinstance(r.get('attributes'), dict):
                    r = r['attributes']

                def get(*fields):
                    for f in fields:
                        if f in r:
                            return r.pop(f)

                handle = get('hacker', 'name')
                if handle is None:
                    continue
                row = result.setdefault(handle, collections.OrderedDict())
                row['member'] = handle
                score = get('score', 'solved_challenges')
                if score is None:
                    score = get('percentage_score') * 100
                row['solving'] = score
                row['place'] = get('rank', 'leaderboard_rank')
                time = get('time_taken', 'time_taken_seconds')
                if time:
                    row['time'] = self.to_time(time, 3)

                country = get('country')
                if country:
                    row['country'] = country

                avatar_url = get('avatar')
                if avatar_url:
                    row['info'] = {'avatar_url': avatar_url}

                for k, v in r.items():
                    if k not in row and v is not None:
                        row[k] = v
                        hidden_fields.add(k)

                if statistics and handle in statistics:
                    stat = statistics[handle]
                    for k in ('old_rating', 'rating_change', 'new_rating'):
                        if k in stat:
                            row[k] = stat[k]

                if 'school_id' in row and row['school_id'] not in schools:
                    school_ids.add(row['school_id'])

            if school_ids:
                query = ','.join(school_ids)
                url = self.host + f'community/v1/schools?page[limit]={len(school_ids)}&filter[unique_id]={query}'
                page = REQ.get(url)
                data = json.loads(page)
                for s in data['data']:
                    schools[s['id']] = s['attributes']['name']

            for row in result.values():
                if 'school_id' in row and 'school' not in row:
                    row['school'] = schools[row['school_id']]

        try:
            data = fetch_page(1)
        except FailOnGetResponse as e:
            if e.code == 404:
                return {'action': 'delete'}
            raise e
        process_data(data)

        total = data['meta']['record_count'] if 'meta' in data else data[
            'total']
        n_pages = (total - 1) // (per_page) + 1

        with ExitStack() as stack:
            executor = stack.enter_context(
                PoolExecutor(max_workers=Statistic.MAX_WORKERS))
            pbar = stack.enter_context(
                tqdm(total=n_pages - 1, desc='getting pages'))

            for data in executor.map(fetch_page, range(1, n_pages + 1)):
                process_data(data)
                pbar.set_postfix(delay=f'{Statistic.DELAY:.5f}', refresh=False)
                pbar.update()

        hidden_fields.discard('school')

        standings = {
            'result': result,
            'hidden_fields': list(hidden_fields),
            'url': standings_url,
        }
        return standings
예제 #15
0
    def get_standings_from_html(self):
        url = urljoin(self.standings_url, '?lang=en')
        page = REQ.get(url)
        regex = '''<table[^>]*standings[^>]*>.*?</table>'''
        match = re.search(regex, page, re.DOTALL)
        html_table = match.group(0)
        mapping = {
            '#': 'place',
            'Who': 'name',
            '=': 'solving',
            'Penalty': 'penalty',
        }
        table = parsed_table.ParsedTable(html_table, header_mapping=mapping)

        season = self.get_season()

        problems_info = OrderedDict()
        result = {}
        for r in table:
            row = {}
            problems = row.setdefault('problems', {})
            for k, v in r.items():
                if len(k) == 1:
                    problems_info.setdefault(k, {'short': k})
                    if v.value:
                        p = problems.setdefault(k, {})
                        v = v.value
                        if ' ' in v:
                            v, p['time'] = v.split()
                        p['result'] = v
                elif k == 'name':
                    f = v.column.node.xpath(
                        './/img[@class="standings-flag"]/@title')
                    if f:
                        row['country'] = f[0]
                    a = v.column.node.xpath('.//a')
                    if not a:
                        row[k] = v.value
                        row['member'] = row['name'] + ' ' + season
                    else:
                        for el in a:
                            href = el.attrib.get('href')
                            if not href:
                                continue
                            key, val = href.strip('/').split('/')
                            if key == 'team':
                                row['name'] = el.text
                                row['team_id'] = val
                                row['_account_url'] = urljoin(url, href)
                            elif key == 'profile':
                                row.setdefault('members', []).append(val)
                elif v.value:
                    if k == 'penalty':
                        row[k] = int(v.value)
                    elif v.value:
                        row[k] = v.value

            if 'solving' not in row:
                continue

            if 'members' in row:
                if 'team_id' in row:
                    row['_members'] = [{'account': m} for m in row['members']]
                for member in row.pop('members'):
                    result[member] = deepcopy(row)
                    result[member]['member'] = member
            else:
                result[row['member']] = row

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
        }
        return standings
예제 #16
0
 def get(*args, **kwargs):
     page = REQ.get(*args, **kwargs)
     for c in REQ.get_raw_cookies():
         if c.domain == domain and c.name.startswith('__'):
             c.value = re.sub(r'\s*', '', c.value)
     return page
예제 #17
0
def _query(method,
           params,
           api_key=DEFAULT_API_KEY,
           prev_time_queries={},
           api_url_format='https://codeforces.com/api/%s'):
    url = api_url_format % method
    key, secret = api_key
    params = dict(params)

    params.update({
        'time': int(time()),
        'apiKey': key,
        'lang': 'en',
    })

    url_encode = '&'.join(
        ('%s=%s' % (k, v) for k, v in sorted(params.items())))

    api_sig_prefix = ''.join(choice(ascii_lowercase) for x in range(6))
    api_sig = '%s/%s?%s#%s' % (
        api_sig_prefix,
        method,
        url_encode,
        secret,
    )
    params['apiSig'] = api_sig_prefix + sha512(
        api_sig.encode('utf8')).hexdigest()
    url += '?' + urlencode(params)

    times = prev_time_queries.setdefault((key, secret), [])
    if len(times) == 5:
        delta = max(4 - (time() - times[0]), 0)
        sleep(delta)
        times.clear()

    md5_file_cache = url
    for k in (
            'apiSig',
            'time',
    ):
        md5_file_cache = re.sub('%s=[0-9a-z]+' % k, '', md5_file_cache)
    times.append(time())

    for attempt in reversed(range(5)):
        try:
            page = REQ.get(url, md5_file_cache=md5_file_cache)
            times[-1] = time()
            ret = json.loads(page)
        except FailOnGetResponse as e:
            if e.code == 503 and attempt:
                sleep(1)
                continue
            err = e.args[0]
            if hasattr(err, 'fp'):
                try:
                    ret = json.load(err.fp)
                except json.decoder.JSONDecodeError:
                    ret = {'status': str(e)}
            else:
                ret = {'status': str(e)}
            ret['code'] = getattr(err, 'code', None)
        break

    return ret
예제 #18
0
    def get_standings(self, users=None, statistics=None):

        urlinfo = urllib.parse.urlparse(self.url)
        host = f'{urlinfo.scheme}://{urlinfo.netloc}/'

        page = REQ.get(
            host + 'services/Challenge/findWorldCupByPublicId',
            post=f'["{self.key}", null]',
            content_type='application/json',
        )
        data = json.loads(page)
        challenge = data.get('challenge', {})
        clash_hubs = challenge.get('clashHubs')

        def get_leaderboard(url, column="", value=""):
            active = 'true' if column else 'false'
            filt = f'{{"active":{active},"column":"{column}","filter":"{value}"}}'
            if clash_hubs:
                post = f'[1,{filt},null,true,"global",{clash_hubs[0]["clashHubId"]}]'
            else:
                post = f'["{self.key}",null,"global",{filt}]'
            page = REQ.get(url, post=post, content_type='application/json')
            data = json.loads(page)
            return data

        if clash_hubs:
            url = host + 'services/Leaderboards/getClashLeaderboard'
        else:
            url = host + 'services/Leaderboards/getFilteredChallengeLeaderboard'

        data = get_leaderboard(url)

        standings_url = os.path.join(self.url, 'leaderboard')

        page = REQ.get(standings_url)
        match = re.search(
            r'<script[^>]*src="(?P<js>[^"]*static.codingame.com/app\.[^"]*\.js)"[^>]*>',
            page)
        page = REQ.get(match.group('js'), detect_charsets=None)
        match = re.search(
            r'const t={EN:(?P<countries>\[{id:"[^"]*",name:"[^"]*"},.*?}]),[A-Z]{2}:',
            page)
        countries = match.group('countries')
        countries = countries.replace('id:', '"id":')
        countries = countries.replace('name:', '"name":')
        countries = json.loads(countries)
        countries = [c['id'] for c in countries]

        languages = list(data.get('programmingLanguages', {}).keys())

        with PoolExecutor(max_workers=8) as executor:
            hidden_fields = set()
            result = {}

            def process_data(data):
                nonlocal hidden_fields
                nonlocal result
                for row in data['users']:
                    if 'codingamer' not in row:
                        continue
                    info = row.pop('codingamer')
                    row.update(info)

                    info['profile_url'] = {
                        'public_handle': info.pop('publicHandle')
                    }
                    handle = str(info.pop('userId'))
                    if handle in result:
                        continue
                    r = result.setdefault(handle, OrderedDict())
                    r['member'] = handle
                    r['place'] = row.pop('rank')
                    r['info'] = info

                    if 'league' in row:
                        league = row.pop('league')
                        r['league'] = league['divisionIndex']
                        r['league_rank'] = row.pop('localRank')

                    for field, out in (
                        ('score', 'solving'),
                        ('programmingLanguage', 'language'),
                        ('clashes_count', 'clashes_count'),
                        ('pseudo', 'name'),
                        ('countryId', 'country'),
                        ('company', 'company'),
                        ('school', 'school'),
                    ):
                        if field in row:
                            r[out] = row.pop(field)

                    if 'updateTime' in row:
                        row['updated'] = row.pop('updateTime') / 1000
                    if 'creationTime' in row:
                        row['created'] = row.pop('creationTime') / 1000

                    row.pop('public_handle', None)
                    row.pop('test_session_handle', None)
                    row.pop('avatar', None)
                    for k, v in row.items():
                        if k not in r:
                            r[k] = v
                            hidden_fields.add(k)

            process_data(data)

            if len(data['users']) >= 1000:
                fetch_data = partial(get_leaderboard, url, "LANGUAGE")
                for data in tqdm.tqdm(executor.map(fetch_data, languages),
                                      total=len(languages),
                                      desc='languages'):
                    process_data(data)

                fetch_data = partial(get_leaderboard, url, "COUNTRY")
                for data in tqdm.tqdm(executor.map(fetch_data, countries),
                                      total=len(countries),
                                      desc='countries'):
                    process_data(data)

        standings = {
            'url': standings_url,
            'result': result,
            'fields_types': {
                'updated': ['timestamp'],
                'created': ['timestamp']
            },
            'hidden_fields': hidden_fields,
            'options': {
                'fixed_fields': [
                    ('league', 'league'),
                    ('league_rank', 'league_rank'),
                    ('language', 'Language'),
                    ('clashes_count', 'clashes_count'),
                    ('created', 'Submit Time'),
                ],
                'medals': [
                    {
                        'name': 'gold',
                        'count': 1
                    },
                    {
                        'name': 'silver',
                        'count': 1
                    },
                    {
                        'name': 'bronze',
                        'count': 1
                    },
                ],
            },
        }

        return standings
예제 #19
0
    def _old_get_standings(self, users=None):
        if not self.standings_url:
            self.standings_url = self.url.replace('/dashboard', '/scoreboard')

        result = {}

        page = REQ.get(self.standings_url)

        matches = re.finditer(r'GCJ.(?P<key>[^\s]*)\s*=\s*"?(?P<value>[^";]*)',
                              page)
        vs = {m.group('key'): m.group('value') for m in matches}
        vs['rowsPerPage'] = int(vs['rowsPerPage'])

        matches = re.finditer(r'GCJ.problems.push\((?P<problem>{[^}]*})', page)
        problems_info = OrderedDict([])
        problems = [json.loads(m.group('problem')) for m in matches]

        matches = re.finditer(
            r'(?P<new>\(\);)?\s*io.push\((?P<subtask>{[^}]*})', page)
        tid = -1
        for idx, m in enumerate(matches):
            subtask = json.loads(m.group('subtask'))
            if m.group('new'):
                tid += 1
            idx = str(idx)
            task = problems[tid].copy()
            task.update(subtask)
            task['name'] = task.pop('title')
            task['code'] = idx
            task['full_score'] = task.pop('points')
            problems_info[idx] = task

        def fetch_page(page_idx):
            nonlocal vs
            params = {
                'cmd': 'GetScoreboard',
                'contest_id': vs['contestId'],
                'show_type': 'all',
                'start_pos': page_idx * vs['rowsPerPage'] + 1,
                'csrfmiddlewaretoken': vs['csrfMiddlewareToken'],
            }
            url = os.path.join(self.standings_url,
                               'do') + '?' + urllib.parse.urlencode(params)
            page = REQ.get(url)
            data = json.loads(page)
            return data

        data = fetch_page(0)
        n_page = (data['stat']['nrp'] - 1) // vs['rowsPerPage'] + 1

        def time2str(t):
            h = t // 3600
            if h:
                return f'{h}:{t // 60 % 60:02d}:{t % 60:02d}'
            return f'{t // 60}:{t % 60:02d}'

        result = {}
        with PoolExecutor(max_workers=8) as executor:
            for data in tqdm.tqdm(executor.map(fetch_page, range(n_page)),
                                  total=n_page):
                for row in data['rows']:
                    handle = row.pop('n')
                    r = result.setdefault(handle, {})
                    r['member'] = handle
                    r['country'] = row.pop('c')
                    r['penalty'] = time2str(row.pop('pen'))
                    r['solving'] = row.pop('pts')
                    r['place'] = row.pop('r')

                    problems = r.setdefault('problems', {})
                    solved = 0
                    for idx, (attempt, time) in enumerate(
                            zip(row.pop('att'), row.pop('ss'))):
                        if attempt:
                            p = problems.setdefault(str(idx), {})
                            if time == -1:
                                p['result'] = -attempt
                            else:
                                solved += 1
                                p['result'] = '+' if attempt == 1 else f'+{attempt - 1}'
                                p['time'] = time2str(time)
                    r['solved'] = {'solving': solved}

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
        }
        return standings
예제 #20
0
    def get_standings(self, users=None, statistics=None):
        geolocator = Nominatim(user_agent="clist.by")
        geocode_func = partial(geolocator.geocode, timeout=10)
        geocode = RateLimiter(geocode_func, min_delay_seconds=1, max_retries=3)

        season = self.key.split('.')[0]

        if not self.standings_url:
            return {}

        page = REQ.get(self.standings_url)
        page = re.sub('<(/?)tl([^>]*)>', r'<\1tr\2>', page)

        regex = '<table[^>]*class="standings"[^>]*>.*?</table>'
        match = re.search(regex, page, re.DOTALL)
        if not match:
            regex = r'<table\s*(?:align="center"\s*)?border="1"\s*(?:align="center"\s*)?>.*?</table>'
            matches = re.finditer(regex, page, re.DOTALL)
            for match in matches:
                pass
        if not match:
            raise ExceptionParseStandings('not found standings table')
        html_table = match.group(0)
        c_mapping = {
            'place':
            'place',
            'место':
            'place',
            'user':
            '******',
            'team':
            'name',
            'участник':
            'name',
            'solved':
            'solved',
            'total':
            'solved',
            'имя':
            'first_name',
            'фамилия':
            'last_name',
            'отчество':
            'middle_name',
            'логин':
            'login',
            'login':
            '******',
            'класс':
            'class',
            'город':
            'city',
            'субъект российской федерации (для иностранных участников - государство)':
            'city',
            'балл':
            'solving',
            'сумма':
            'solving',
            'баллы':
            'solving',
            'score':
            'solving',
            'sum':
            'solving',
            'диплом':
            'diploma',
            'степень диплома':
            'diploma',
            'номер диплома':
            'diploma_number',
            'страна':
            'country',
            'школа (сокр.)':
            'school',
            'школа':
            'school',
            'учебное зачедение, класс':
            'school',
            'регион/статус':
            'region',
            'регион':
            'region',
            'имя в таблице':
            'handle',
            'uid':
            'uid',
        }

        table = parsed_table.ParsedTable(html_table)

        locations = None
        if os.path.exists(self.LOCATION_CACHE_FILE):
            with open(self.LOCATION_CACHE_FILE, 'r') as fo:
                locations = yaml.safe_load(fo)
        if locations is None:
            locations = {}

        def get_location(loc_info):
            loc_info = re.sub(r'[.,\s]+', ' ', loc_info).strip().lower()
            if loc_info not in locations:
                try:
                    ru = geocode(loc_info, language='ru')
                    en = geocode(loc_info, language='en')
                    if ru is None and en is None:
                        locations[loc_info] = None
                    else:
                        locations[loc_info] = {
                            'ru': ru.address,
                            'en': en.address
                        }
                except Exception:
                    pass

            return locations.get(loc_info)

        def get_country(address):
            *_, country = map(str.strip, address['en'].split(','))
            if country.startswith('The '):
                country = country[4:]
            return country

        try:
            result = {}
            problems_info = OrderedDict()
            has_bold = False
            last, place, placing = None, None, {}
            for idx, r in enumerate(tqdm.tqdm(table, total=len(table)),
                                    start=1):
                row = OrderedDict()
                problems = row.setdefault('problems', {})
                letter = chr(ord('A') - 1)
                solved = 0
                for k, v in list(r.items()):
                    is_russian = bool(re.search('[а-яА-Я]', k))
                    c = v.attrs.get('class')
                    c = c.split()[0] if c else k.lower()
                    if c and c.startswith('st_'):
                        c = c[3:].lower()
                    if c in ['prob'] or c not in c_mapping and not is_russian:
                        letter = chr(ord(letter) + 1)
                        problem_info = problems_info.setdefault(
                            letter, {
                                'short': letter,
                                'full_score': 100,
                            })
                        if letter.lower() != k.lower():
                            problem_info['name'] = k
                        if 'title' in v.attrs:
                            problem_info['name'] = v.attrs['title']

                        if v.value != DOT and v.value:
                            p = problems.setdefault(letter, {})

                            if v.column.node.xpath('b'):
                                p['partial'] = False
                                has_bold = True

                            v = v.value
                            if SPACE in v:
                                v, t = v.split(SPACE, 1)
                                p['time'] = t

                            try:
                                score = float(v)
                                p['result'] = v
                                p['partial'] = score < problem_info[
                                    'full_score']
                            except ValueError:
                                pass
                            if 'partial' in p and not p['partial']:
                                solved += 1
                    else:
                        v = v.value.strip()
                        if not v or v == '-':
                            continue
                        c = c_mapping.get(c, c).lower()
                        row[c] = v

                        if c == 'diploma':
                            row['_medal_title_field'] = 'diploma'
                            v = v.lower().split()[0]
                            if re.search('(^в.к|^вне)', v):
                                continue
                            if v in ['gold', 'i', '1'] or v.startswith('перв'):
                                row['medal'] = 'gold'
                            elif v in ['silver', 'ii', '2'
                                       ] or v.startswith('втор'):
                                row['medal'] = 'silver'
                            elif v in ['bronze', 'iii', '3'
                                       ] or v.startswith('трет'):
                                row['medal'] = 'bronze'
                            else:
                                row['medal'] = 'honorable'

                if 'solving' not in row:
                    if 'solved' in row:
                        row['solving'] = row.pop('solved')
                    else:
                        continue
                row['solved'] = {'solving': solved}

                if 'place' not in row:
                    if place is None and idx != 1:
                        continue
                    if row['solving'] != last:
                        place = idx
                        last = row['solving']
                    placing[place] = idx
                    row['place'] = place

                if 'name' not in row:
                    if 'first_name' in row and 'last_name' in row:
                        row['name'] = row['last_name'] + ' ' + row['first_name']
                    elif 'first_name' in row and 'last_name' not in row:
                        row['name'] = row.pop('first_name')

                if 'login' in row:
                    row['member'] = row['login']
                    if 'name' in row:
                        row['_name_instead_key'] = True
                elif 'name' in row:
                    name = row['name']
                    if ' ' in name:
                        row['member'] = name + ' ' + season
                    else:
                        row.pop('name')
                        row['member'] = name
                else:
                    row['member'] = f'{self.pk}-{idx}'

                addition = (statistics or {}).get(row['member'], {})
                if addition:
                    country = addition.get('country')
                    if country:
                        row.setdefault('country', country)
                    if 'country' not in row:
                        locs = []
                        if 'city' in row:
                            locs.append(row['city'])
                        if 'extra' in row:
                            extra = row['extra']
                            extra = re.sub(r'\s*(Не РФ|Not RF):\s*', ' ',
                                           extra, re.IGNORECASE)
                            locs.extend(extra.split(','))
                        for loc in locs:
                            loc = re.sub(r'\s*[0-9]+\s*', ' ', loc)
                            loc = loc.strip()

                            address = get_location(loc)
                            if address:
                                country = get_country(address)
                                row['country'] = country
                                break

                result[row['member']] = row
            if placing:
                for row in result.values():
                    place = row['place']
                    last = placing[place]
                    row['place'] = str(
                        place) if place == last else f'{place}-{last}'

            if has_bold:
                for row in result.values():
                    for p in row.get('problems').values():
                        if 'partial' not in p and 'result' in p:
                            p['partial'] = True
        finally:
            with open(self.LOCATION_CACHE_FILE, 'wb') as fo:
                yaml.dump(locations, fo, encoding='utf8', allow_unicode=True)

        standings = {
            'result':
            result,
            'problems':
            list(problems_info.values()),
            'hidden_fields': [
                'extra',
                'first_name',
                'last_name',
                'middle_name',
                'class',
                'city',
                'country',
                'diploma',
                'school',
                'login',
                'region',
                'uid',
                'handle',
                'diploma_number',
            ],
        }
        return standings
예제 #21
0
 def get(offset, num):
     query = f'{{"min_rank":{offset},"num_consecutive_users":{num}}}'
     url = api_ranking_url_format + encode(query)
     content = REQ.get(url)
     return decode(content)
예제 #22
0
    def get_standings(self, users=None, statistics=None):
        standings_data = None
        if not self.standings_url:
            page = REQ.get(urljoin(self.url, '/'))

            for name in (
                'Соревнования',
                'Тренировочные олимпиады',
            ):
                match = re.search('<a[^>]*href="(?P<url>[^"]*)"[^>]*>{}<'.format(name), page)
                url = match.group('url')
                page = REQ.get(url)

            regex = '''
            <a[^>]*href=["']?[^<"']*cid=(?P<cid>[0-9]+)[^>]*>[^>]*{}[^>]*</a>.*?
            <a[^>]*href="(?P<url>[^"]*)"[^>]*>{}<
            '''.format(
                re.escape(self.name),
                re.escape('Результаты прошедших тренировок'),
            )
            match = re.search(regex, page, re.DOTALL | re.IGNORECASE | re.VERBOSE)

            if not match:
                raise ExceptionParseStandings('Not found standings urls list')

            url = match.group('url')
            cid = match.group('cid')
            last_standings_data = self.resource.info['parse']['last_standings_data'].get(cid, {})
            page = REQ.get(url)

            dates = [self.start_time, self.start_time - timedelta(days=1)]
            dates = [d.strftime('%Y-%m-%d') for d in dates]
            re_dates = '|'.join(dates)

            regex = r'''
            <tr[^>]*>[^<]*<td[^>]*>\s*(?P<date>{})\s*</td>[^<]*
            <td[^>]*>(?P<title>[^<]*)</td>[^<]*
            <td[^>]*>[^<]*<a[^>]*href\s*=["\s]*(?P<url>[^">]*)["\s]*[^>]*>
            '''.format(re_dates)
            matches = re.findall(regex, page, re.MULTILINE | re.VERBOSE)

            datas = [
                {'date': date.strip(), 'title': title.strip(), 'url': urljoin(url, u)}
                for date, title, u in matches
            ]
            if len(datas) > 1:
                regex = r'[0-9]\s*-\s*[0-9].*(?:[0-9]\s*-\s*[0-9].*\bкл\b|школа)'
                datas = [d for d in datas if not re.search(regex, d['title'], re.I)]

            if last_standings_data:
                datas = [d for d in datas if d['date'] > last_standings_data['date']]

            if not datas:
                raise ExceptionParseStandings('Not found standings url')

            if len(datas) > 1:
                _datas = [d for d in datas if d['date'] == dates[0]]
                if _datas:
                    datas = _datas

            if len(datas) > 1:
                ok = True
                urls_map = {}
                for d in datas:
                    url = d['url']
                    page = REQ.get(url)
                    path = re.findall('<td[^>]*nowrap><a[^>]*href="(?P<href>[^"]*)"', page)
                    if len(path) < 2:
                        ok = False
                    parent = urljoin(url, path[-2])
                    urls_map.setdefault(parent, d)
                if len(urls_map) > 1:
                    standings_data = datas[0]
                elif not ok:
                    raise ExceptionParseStandings('Too much standing url')
                else:
                    standings_data = list(urls_map.values())[0]
            else:
                standings_data = datas[0]

            page = REQ.get(standings_data['url'])
            self.standings_url = REQ.last_url

        try:
            page = REQ.get(self.standings_url)
        except FailOnGetResponse as e:
            if e.code == 404:
                raise ExceptionParseStandings('Not found response from standings url')
            raise e

        def get_table(page):
            html_table = re.search('<table[^>]*bgcolor="silver"[^>]*>.*?</table>',
                                   page,
                                   re.MULTILINE | re.DOTALL).group(0)
            table = parsed_table.ParsedTable(html_table)
            return table

        table = get_table(page)

        problems_info = OrderedDict()
        max_score = defaultdict(float)

        scoring = False

        result = {}
        for r in table:
            row = OrderedDict()
            problems = row.setdefault('problems', {})
            for k, v in list(r.items()):
                if k == 'Имя':
                    href = v.column.node.xpath('a/@href')
                    if not href:
                        continue
                    uid = re.search('[0-9]+$', href[0]).group(0)
                    row['member'] = uid
                    row['name'] = v.value
                elif k == 'Место':
                    row['place'] = v.value
                elif k == 'Время':
                    row['penalty'] = int(v.value)
                elif k in ['Сумма', 'Задачи']:
                    row['solving'] = float(v.value)
                elif re.match('^[a-zA-Z0-9]+$', k):
                    problems_info[k] = {'short': k}
                    if v.value:
                        p = problems.setdefault(k, {})
                        p['result'] = v.value

                        if v.value and v.value[0] not in ['-', '+']:
                            scoring = True

                        try:
                            max_score[k] = max(max_score[k], float(v.value))
                        except ValueError:
                            pass
                elif k:
                    row[k.strip()] = v.value.strip()
                elif v.value.strip().lower() == 'log':
                    href = v.column.node.xpath('.//a/@href')
                    if href:
                        row['url'] = urljoin(self.standings_url, href[0])
            result[row['member']] = row

        if scoring:
            match = re.search(r'<b[^>]*>\s*<a[^>]*href="(?P<url>[^"]*)"[^>]*>ACM</a>\s*</b>', page)
            if match:
                page = REQ.get(match.group('url'))
                table = get_table(page)
                for r in table:
                    uid = None
                    for k, v in list(r.items()):
                        if k == 'Имя':
                            href = v.column.node.xpath('a/@href')
                            if not href:
                                continue
                            uid = re.search('[0-9]+$', href[0]).group(0)
                        elif re.match('^[a-zA-Z0-9]+$', k) and uid and v.value:
                            if v.value[0] == '-':
                                result[uid]['problems'][k]['partial'] = True
                            elif v.value[0] == '+':
                                result[uid]['problems'][k]['partial'] = False
                                problems_info[k]['full_score'] = result[uid]['problems'][k]['result']

        for r in result.values():
            solved = 0
            for k, p in r['problems'].items():
                if p.get('partial'):
                    continue
                score = p['result']
                if score.startswith('+') or 'partial' in p and not p['partial']:
                    solved += 1
                else:
                    try:
                        score = float(score)
                    except ValueError:
                        continue
                    if abs(max_score[k] - score) < 1e-9 and score > 0:
                        solved += 1
            r['solved'] = {'solving': solved}

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
            'info_fields': ['_standings_data'],
        }

        if result and standings_data:
            standings['_standings_data'] = standings_data
            self.resource.info['parse']['last_standings_data'][cid] = standings_data
            self.resource.save()

        return standings
예제 #23
0
 def get_source_code(contest, problem):
     if 'url' not in problem:
         raise ExceptionParseStandings('Not found url')
     solution = REQ.get(problem['url'])
     ret = {'solution': solution}
     return ret
예제 #24
0
    def get_standings(self, users=None, statistics=None):
        result = {}
        standings_url = None
        problems_info = OrderedDict()
        for url, upsolve in (
            (self.url, False),
            (self.url.replace(".html", "-upsolving.html"), True),
            (self.url.replace("-training-", "-practice-"), True),
            (self.key, False),
            (self.key.replace(".html", "-upsolving.html"), True),
            (self.key.replace("-training-", "-practice-"), True),
        ):
            if upsolve and url in [self.url, self.key]:
                continue
            try:
                page = REQ.get(url)
            except Exception:
                continue
            if standings_url is None:
                standings_url = url

            header = None
            for match in re.findall(r'<tr[^>]*>.*?<\/tr>', page):
                match = match.replace('&nbsp;', ' ')
                fields = [
                    re.sub('<[^>]*>', ' ', m).strip()
                    for m in re.findall(r'<t[hd][^>]*>.*?\/t[hd]>', match)
                ]

                if re.search(r'<\/th>', match):
                    header = fields
                    continue

                if not header:
                    continue

                fields = dict(list(zip(header, fields)))
                get_value = partial(self.get_value_by_keys_, fields)

                place = get_value('Место', 'Place')
                if not place:
                    continue
                member = get_value('Логин', 'Login', 'User', 'Участник')
                row = result.setdefault(member, {'member': member})

                type_ = ('up' if upsolve else '') + 'solving'
                row[type_] = int(
                    get_value('Всего', 'Решённые задачи', 'Total', 'Score'))

                problems = row.setdefault('problems', {})
                for k in sorted(fields.keys()):
                    if re.match('^(?:[A-Z]|[0-9]{,2})$', k):
                        problems_info[k] = {'short': k}
                        v = fields[k].split()
                        if len(v) > 0:
                            p = {'result': v[0]}
                            if len(v) > 1:
                                p['time'] = re.sub('[^0-9:]', '', v[1])
                            if upsolve:
                                a = problems.setdefault(k, {})
                                if a.get('result', None) != p['result']:
                                    a['upsolving'] = p
                            else:
                                problems[k] = p

                try:
                    solved = int(
                        get_value('Решённые задачи', 'Solved problems'))
                    row.setdefault('solved', {})[type_] = solved
                except ExceptionParseStandings:
                    pass

                if upsolve:
                    row['upsolving'] -= row.get('solving', 0)
                    if 'solved' in row:
                        row['solved']['upsolving'] -= row['solved'].get(
                            'solving', 0)
                else:
                    row['place'] = place

            if not header:
                raise ExceptionParseStandings('Not detect header')

        standings = {
            'result': result,
            'problems': list(problems_info.values()),
        }
        if standings_url is not None:
            standings['url'] = standings_url
        return standings
예제 #25
0
 def fetch_page(page):
     url = f'{standings_url}&page={page + 1}'
     return REQ.get(url)
예제 #26
0
    def get_standings(self, users=None, statistics=None):
        if not self.standings_url:
            page = REQ.get(urljoin(self.url, '/'))

            for name in (
                    'Соревнования',
                    'Тренировочные олимпиады',
            ):
                match = re.search(
                    '<a[^>]*href="(?P<url>[^"]*)"[^>]*>{}<'.format(name), page)
                page = REQ.get(match.group('url'))

            match = re.search(
                '{}.*?<a[^>]*href="(?P<url>[^"]*)"[^>]*>{}<'.format(
                    re.escape(self.name), 'Результаты прошедших тренировок'),
                page,
                re.DOTALL,
            )
            if not match:
                raise ExceptionParseStandings('Not found standing url')

            url = match.group('url')
            page = REQ.get(url)

            date = self.start_time.strftime('%Y-%m-%d')
            matches = re.findall(
                r'''
                <tr[^>]*>[^<]*<td[^>]*>{}</td>[^<]*
                <td[^>]*>(?P<title>[^<]*)</td>[^<]*
                <td[^>]*>[^<]*<a[^>]*href\s*=["\s]*(?P<url>[^">]*)["\s]*[^>]*>
            '''.format(date), page, re.MULTILINE | re.VERBOSE)

            urls = [(title, urljoin(url, u)) for title, u in matches]
            if len(urls) > 1:
                urls = [(
                    title, urljoin(url, u)
                ) for title, u in matches if not re.search(
                    r'[0-9]\s*-\s*[0-9].*(?:[0-9]\s*-\s*[0-9].*\bкл\b|школа)',
                    title, re.I)]

            if not urls:
                raise ExceptionParseStandings('Not found standing url')

            if len(urls) > 1:
                ok = True
                urls_set = set()
                for _, u in urls:
                    page = REQ.get(u)
                    path = re.findall(
                        '<td[^>]*nowrap><a[^>]*href="(?P<href>[^"]*)"', page)
                    if len(path) < 2:
                        ok = False
                    parent = urljoin(u, path[-2])
                    urls_set.add(parent)
                if len(urls_set) > 1:
                    _, url = urls[0]
                elif not ok:
                    raise ExceptionParseStandings('Too much standing url')
                else:
                    url = urls_set.pop()
            else:
                _, url = urls[0]

            page = REQ.get(url)
            self.standings_url = REQ.last_url
        else:
            page = REQ.get(self.standings_url)

        def get_table(page):
            html_table = re.search(
                '<table[^>]*bgcolor="silver"[^>]*>.*?</table>', page,
                re.MULTILINE | re.DOTALL).group(0)
            table = parsed_table.ParsedTable(html_table)
            return table

        table = get_table(page)

        problems_info = OrderedDict()
        max_score = defaultdict(float)

        scoring = False

        result = {}
        for r in table:
            row = OrderedDict()
            problems = row.setdefault('problems', {})
            for k, v in list(r.items()):
                if k == 'Имя':
                    href = v.column.node.xpath('a/@href')
                    if not href:
                        continue
                    uid = re.search('[0-9]+$', href[0]).group(0)
                    row['member'] = uid
                    row['name'] = v.value
                elif k == 'Место':
                    row['place'] = v.value
                elif k == 'Время':
                    row['penalty'] = int(v.value)
                elif k in ['Сумма', 'Задачи']:
                    row['solving'] = float(v.value)
                elif re.match('^[a-zA-Z0-9]+$', k):
                    problems_info[k] = {'short': k}
                    if v.value:
                        p = problems.setdefault(k, {})
                        p['result'] = v.value

                        if v.value and v.value[0] not in ['-', '+']:
                            scoring = True

                        try:
                            max_score[k] = max(max_score[k], float(v.value))
                        except ValueError:
                            pass
                elif k:
                    row[k.strip()] = v.value.strip()
                elif v.value.strip().lower() == 'log':
                    href = v.column.node.xpath('.//a/@href')
                    if href:
                        row['url'] = urljoin(self.standings_url, href[0])
            result[row['member']] = row

        if scoring:
            match = re.search(
                r'<b[^>]*>\s*<a[^>]*href="(?P<url>[^"]*)"[^>]*>ACM</a>\s*</b>',
                page)
            if match:
                page = REQ.get(match.group('url'))
                table = get_table(page)
                for r in table:
                    uid = None
                    for k, v in list(r.items()):
                        if k == 'Имя':
                            href = v.column.node.xpath('a/@href')
                            if not href:
                                continue
                            uid = re.search('[0-9]+$', href[0]).group(0)
                        elif re.match('^[a-zA-Z0-9]+$', k) and uid and v.value:
                            if v.value[0] == '-':
                                result[uid]['problems'][k]['partial'] = True
                            elif v.value[0] == '+':
                                result[uid]['problems'][k]['partial'] = False
                                problems_info[k]['full_score'] = result[uid][
                                    'problems'][k]['result']

        for r in result.values():
            solved = 0
            for k, p in r['problems'].items():
                if p.get('partial'):
                    continue
                score = p['result']
                if score.startswith(
                        '+') or 'partial' in p and not p['partial']:
                    solved += 1
                else:
                    try:
                        score = float(score)
                    except ValueError:
                        continue
                    if abs(max_score[k] - score) < 1e-9 and score > 0:
                        solved += 1
            r['solved'] = {'solving': solved}

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
        }

        return standings
예제 #27
0
파일: usaco.py 프로젝트: aropan/clist
    def get_standings(self, users=None, statistics=None):
        def parse_problems(page, full=False):
            matches = re.finditer(
                r'''
                <div[^>]*class=['"]panel\s*historypanel['"][^>]*>\s*
                <div[^>]*>\s*<h[^>]*>(?P<index>[^<]*)</h[^>]*>\s*</div>\s*
                <div[^>]*>(\s*<[^>]*>)*(?P<name>[^<]+)
                (\s*<[^>]*>)*\s*<a[^>]*href=["'](?P<url>[^"']*)["'][^>]*>
            ''', page, re.VERBOSE)

            problems = []
            problemsets = []

            prev_index = None
            for match in matches:
                index = match.group('index')
                if prev_index and index <= prev_index:
                    if full:
                        problemsets.append(problems)
                        problems = []
                    else:
                        break
                prev_index = index
                url = urllib.parse.urljoin(self.standings_url,
                                           match.group('url'))
                cpid = re.search('cpid=([0-9]+)', url).group(1)
                problems.append({
                    'short': str(len(problems) + 1),
                    'code': cpid,
                    'name': match.group('name'),
                    'url': url,
                })

            if problems:
                problemsets.append(problems)

            return problemsets if full else problems

        page = REQ.get(self.standings_url)
        divisions = list(
            re.finditer(
                '<a[^>]*href="(?P<url>[^"]*data[^"]*_(?P<name>[^_]*)_results.html)"[^>]*>',
                page))
        descriptions = []
        prev_span = None
        for division_match in divisions:
            curr_span = division_match.span()
            if prev_span is not None:
                descriptions.append(page[prev_span[1]:curr_span[0]])
            prev_span = curr_span
        if prev_span is not None:
            descriptions.append(page[prev_span[1]:])

        problems_info = OrderedDict()
        match = re.search(
            '''<a[^>]*href=["'](?P<href>[^"']*page=[a-z0-9]+problems)["'][^>]*>''',
            page)
        if match:
            url = urllib.parse.urljoin(self.standings_url, match.group('href'))
            page = REQ.get(url)
            problemsets = parse_problems(page, full=True)
            assert len(divisions) == len(problemsets)
        else:
            problemsets = None

        result = {}
        d0_set = set()
        for division_idx, (division_match, description) in enumerate(
                zip(divisions, descriptions)):
            division = division_match.group('name')

            d_problems = parse_problems(
                description
            ) if problemsets is None else problemsets[division_idx]
            division_info = problems_info.setdefault('division', OrderedDict())
            division_info[division] = d_problems

            d0 = division[0].upper()
            assert d0 not in d0_set
            d0_set.add(d0)
            for p in d_problems:
                p['short'] = d0 + p['short']

            url = urllib.parse.urljoin(self.standings_url,
                                       division_match.group('url'))
            page = REQ.get(url)

            tables = re.finditer(
                r'>(?P<title>[^<]*)</[^>]*>\s*(?P<html><table[^>]*>.*?</table>)',
                page, re.DOTALL)
            for table_match in tables:
                title = table_match.group('title')
                table = parsed_table.ParsedTable(table_match.group('html'))

                for r in table:
                    row = OrderedDict()
                    problems = row.setdefault('problems', {})
                    solved = 0
                    idx = 0
                    for key, value in r.items():
                        key = key.replace('&nbsp', ' ').strip()
                        if not key:
                            continue
                        if isinstance(value, list):
                            status = ''.join(v.value for v in value)
                            idx += 1
                            if not status:
                                continue
                            partial = not bool(re.match(r'^[\*]+$', status))
                            solved += not partial
                            problems[d0 + str(idx)] = {
                                'partial':
                                partial,
                                'result':
                                1000 / len(d_problems) * status.count('*') /
                                len(status),
                                'status':
                                status,
                            }
                        elif key == 'Score':
                            row['solving'] = int(value.value)
                        else:
                            row[key.lower()] = value.value.replace(
                                '&nbsp', ' ').strip()
                    row['member'] = f'{row["name"]}, {row["country"]}'
                    row['division'] = division
                    row['list'] = title.strip().strip(':')
                    row['solved'] = {'solving': solved}
                    result[row['member']] = row

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': problems_info,
            'hidden_fields': ['list'],
        }
        return standings
예제 #28
0
    def get_standings(self, users=None, statistics=None):
        if not hasattr(self, 'season'):
            year = self.start_time.year - (0
                                           if self.start_time.month > 8 else 1)
            season = f'{year}-{year + 1}'
        else:
            season = self.season

        result = {}
        problems_info = OrderedDict()

        if not re.search('/[0-9]+/', self.standings_url):
            return {}

        url = self.standings_url
        n_page = 1
        while True:
            page = REQ.get(url)

            match = re.search(
                '<table[^>]*class="[^"]*standings[^>]*>.*?</table>', page,
                re.MULTILINE | re.DOTALL)
            if not match:
                raise ExceptionParseStandings('Not found table standings')

            html_table = match.group(0)
            unnamed_fields = self.info.get('standings',
                                           {}).get('unnamed_fields', [])
            table = parsed_table.ParsedTable(html_table,
                                             unnamed_fields=unnamed_fields)

            for r in table:
                row = {}
                problems = row.setdefault('problems', {})
                solved = 0
                has_solved = False
                for k, v in list(r.items()):
                    if 'table__cell_role_result' in v.attrs['class']:
                        letter = k.split(' ', 1)[0]
                        if letter == 'X':
                            continue

                        p = problems_info.setdefault(letter, {'short': letter})
                        names = v.header.node.xpath('.//span/@title')
                        if len(names) == 1:
                            p['name'] = names[0]

                        p = problems.setdefault(letter, {})
                        n = v.column.node
                        if n.xpath(
                                'img[contains(@class,"image_type_success")]'):
                            res = '+'
                            p['binary'] = True
                        elif n.xpath(
                                'img[contains(@class,"image_type_fail")]'):
                            res = '-'
                            p['binary'] = False
                        else:
                            if ' ' not in v.value:
                                problems.pop(letter)
                                continue
                            res = v.value.split(' ', 1)[0]
                            res = res.replace(',', '')
                        p['result'] = res
                        p['time'] = v.value.split(' ', 1)[-1]
                        if 'table__cell_firstSolved_true' in v.attrs['class']:
                            p['first_ac'] = True

                        if '+' in res or res.startswith('100'):
                            solved += 1

                        try:
                            has_solved = has_solved or '+' not in res and float(
                                res) > 0
                        except ValueError:
                            pass
                    elif 'table__cell_role_participant' in v.attrs['class']:
                        title = v.column.node.xpath('.//@title')
                        if title:
                            name = str(title[0])
                        else:
                            name = v.value.replace(' ', '', 1)
                        row['name'] = name
                        row['member'] = name if ' ' not in name else f'{name} {season}'

                        country = v.column.node.xpath(
                            ".//div[contains(@class,'country-flag')]/@title")
                        if country:
                            row['country'] = str(country[0])
                    elif 'table__cell_role_place' in v.attrs['class']:
                        row['place'] = v.value
                    elif 'table__header_type_penalty' in v.attrs['class']:
                        row['penalty'] = int(v.value) if re.match(
                            '^-?[0-9]+$', v.value) else v.value
                    elif 'table__header_type_score' in v.attrs['class']:
                        row['solving'] = float(v.value.replace(',', ''))
                if has_solved:
                    row['solved'] = {'solving': solved}
                if not problems:
                    continue
                result[row['member']] = row

            n_page += 1
            match = re.search(
                f'<a[^>]*href="(?P<href>[^"]*standings[^"]*p[^"]*={n_page})"[^>]*>',
                page)
            if not match:
                break
            url = urljoin(url, match.group('href'))

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
        }
        return standings
예제 #29
0
 def fetch_page(page):
     url = api_ranking_url_format.format(page + 1)
     content = REQ.get(url)
     return json.loads(content)
예제 #30
0
    def get_standings(self, users=None, statistics=None):
        year = self.start_time.year - (0 if self.start_time.month > 8 else 1)
        season = f'{year}-{year + 1}'

        result = {}

        page = REQ.get(self.standings_url)
        table = parsed_table.ParsedTable(
            html=page, xpath="//table[@class='ir-contest-standings']//tr")
        problems_info = collections.OrderedDict()
        has_plus = False
        for r in table:
            row = collections.OrderedDict()
            problems = row.setdefault('problems', {})
            ioi_total_fields = ['Sum', 'Сумма']
            # ioi_style = any((f in r for f in ioi_total_fields))
            for k, v in list(r.items()):
                classes = v.attrs['class'].split()
                if 'ir-column-contestant' in classes:
                    row['member'] = v.value + ' ' + season
                    row['name'] = v.value
                elif 'ir-column-place' in classes:
                    row['place'] = v.value
                elif 'ir-column-penalty' in classes:
                    row['penalty'] = int(v.value)
                elif 'ir-problem-count' in classes or k in ioi_total_fields:
                    row['solving'] = int(v.value)
                elif len(k.split()[0]) == 1:
                    letter = k.split()[0]
                    problems_info[letter] = {'short': letter}
                    if v.value == DOT:
                        continue
                    p = problems.setdefault(letter, {})
                    values = v.value.replace('−', '-').split(' ')
                    p['result'] = values[0]
                    if p['result'].startswith('+'):
                        has_plus = True
                    elif v.column.node.xpath('.//*[@class="ir-rejected"]'):
                        p['partial'] = True
                    if len(values) > 1:
                        p['time'] = values[1]
                else:
                    row[k.lower()] = v.value
            if not problems or users and row['member'] not in users:
                continue
            member = row['member']
            if member in result:
                idx = 0
                while member + f'-{idx}' in result:
                    idx += 1
                member += f'-{idx}'
                row['member'] = member
            result[member] = row

        if not has_plus:
            for row in result.values():
                solved = 0
                for p in row['problems'].values():
                    if p.get('partial'):
                        continue
                    try:
                        score = float(p['result'])
                        if score > 0:
                            solved += 1
                    except Exception:
                        pass
                row['solved'] = {'solving': solved}

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': list(problems_info.values()),
            'problems_time_format': '{H}:{m:02d}',
        }
        return standings