예제 #1
0
    def __init__(self, **kwargs):
        super(Statistic, self).__init__(**kwargs)

        if not self.name or not self.url:
            raise InitModuleException()

        match = re.search(r'\b[0-9]{4}\b', self.key)
        if not match:
            raise InitModuleException('Not found year')
        self.year = int(match.group())
예제 #2
0
파일: codeforces.py 프로젝트: kmyk/clist
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        cid = self.key
        if ':' in cid:
            cid, api = cid.split(':', 1)
            self.api_key = api.split(':') if ':' in api else API_KEYS[api]
        else:
            self.api_key = DEFAULT_API_KEY
        if not re.match('^[0-9]+$', cid):
            raise InitModuleException(f'Contest id {cid} should be number')
        self.cid = cid
예제 #3
0
파일: withgoogle.py 프로젝트: aropan/clist
 def get_standings(self, users=None, statistics=None):
     if 'hashcode_scoreboard' in self.info or re.search(r'\bhash.*code\b.*\(round|final\)$', self.name, re.I):
         ret = self._hashcode(users, statistics)
     elif '/codingcompetitions.withgoogle.com/' in self.url:
         ret = self._api_get_standings(users, statistics)
     elif '/code.google.com/' in self.url or '/codejam.withgoogle.com/' in self.url:
         ret = self._old_get_standings(users)
     else:
         raise InitModuleException(f'url = {self.url}')
     if re.search(r'\bfinal\S*(?:\s+round)?$', self.name, re.I):
         ret['options'] = {'medals': [{'name': name, 'count': 1} for name in ('gold', 'silver', 'bronze')]}
     return ret
예제 #4
0
    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.is_spectator_ranklist = self.standings_url and 'spectator/ranklist' in self.standings_url
        if self.is_spectator_ranklist:
            return

        cid = self.key
        if ':' in cid:
            cid, api = cid.split(':', 1)
            self.api_key = api.split(':') if ':' in api else API_KEYS[api]
        else:
            self.api_key = DEFAULT_API_KEY
        if not re.match('^[0-9]+$', cid):
            raise InitModuleException(f'Contest id {cid} should be number')
        self.cid = cid
예제 #5
0
파일: usaco.py 프로젝트: aropan/clist
 def __init__(self, **kwargs):
     super(Statistic, self).__init__(**kwargs)
     if not self.standings_url:
         url = 'http://usaco.org/index.php?page=contests'
         page = REQ.get(url)
         matches = re.finditer(
             '<a[^>]*href="(?P<url>[^"]*)"[^>]*>(?P<name>[^<]*[0-9]{4}[^<]*Results)</a>',
             page)
         month = self.start_time.strftime('%B').lower()
         prev_standings_url = None
         for match in matches:
             name = match.group('name').lower()
             if (month in name or self.name.lower() in name) and str(
                     self.start_time.year) in name:
                 self.standings_url = urllib.parse.urljoin(
                     url, match.group('url'))
                 break
             if (month in name or self.name.lower()
                     in name) and str(self.start_time.year - 1) in name:
                 prev_standings_url = urllib.parse.urljoin(
                     url, match.group('url'))
         else:
             if prev_standings_url is not None:
                 pred_standings_url = re.sub(
                     '[0-9]+', lambda m: str(int(m.group(0)) + 1),
                     prev_standings_url)
                 url = 'http://usaco.org/'
                 page = REQ.get(url)
                 matches = re.finditer(
                     '<a[^>]*href="?(?P<url>[^"]*)"?[^>]*>here</a>', page)
                 for match in matches:
                     standings_url = urllib.parse.urljoin(
                         url, match.group('url'))
                     if standings_url == pred_standings_url:
                         self.standings_url = standings_url
                         break
             if not self.standings_url:
                 raise InitModuleException(
                     f'Not found standings url with'
                     f'month = {month}, '
                     f'year = {self.start_time.year}, '
                     f'name = {self.name}')
예제 #6
0
    def get_standings(self, users=None, statistics=None):
        result = {}

        start_time = self.start_time.replace(tzinfo=None)

        if not self.standings_url and datetime.now() - start_time < timedelta(
                days=30):
            re_round_overview = re.compile(
                r'''
(?:<td[^>]*>
    (?:
        [^<]*<a[^>]*href="(?P<url>[^"]*/stat[^"]*rd=(?P<rd>[0-9]+)[^"]*)"[^>]*>(?P<title>[^<]*)</a>[^<]*|
        (?P<date>[0-9]+\.[0-9]+\.[0-9]+)
    )</td>[^<]*
){2}
                ''',
                re.VERBOSE,
            )
            for url in [
                    'https://www.topcoder.com/tc?module=MatchList&nr=100500',
                    'https://community.topcoder.com/longcontest/stats/?module=MatchList&nr=100500',
            ]:
                page = REQ.get(url)
                matches = re_round_overview.finditer(str(page))
                opt = 0.61803398875
                for match in matches:
                    date = datetime.strptime(match.group('date'), '%m.%d.%Y')
                    if abs(date - start_time) < timedelta(days=2):
                        title = match.group('title')
                        intersection = len(
                            set(title.split()) & set(self.name.split()))
                        union = len(
                            set(title.split()) | set(self.name.split()))
                        iou = intersection / union
                        if iou > opt:
                            opt = iou
                            self.standings_url = urljoin(
                                url, match.group('url'))

        if not self.standings_url:
            raise InitModuleException('Not set standings url for %s' %
                                      self.name)

        url = self.standings_url + '&nr=100000042'
        page = REQ.get(url)
        result_urls = re.findall(
            r'<a[^>]*href="(?P<url>[^"]*)"[^>]*>Results</a>', str(page), re.I)

        if not result_urls:  # marathon match
            match = re.search(
                '<[^>]*>Problem:[^<]*<a[^>]*href="(?P<href>[^"]*)"[^>]*>(?P<name>[^<]*)<',
                page)
            problem_name = match.group('name').strip()
            problems_info = [{
                'short':
                problem_name,
                'url':
                urljoin(url,
                        match.group('href').replace('&amp;', '&'))
            }]
            rows = etree.HTML(page).xpath(
                "//table[contains(@class, 'stat')]//tr")
            header = None
            for row in rows:
                r = parsed_table.ParsedTableRow(row)
                if len(r.columns) < 8:
                    continue
                values = [
                    c.value.strip().replace(u'\xa0', '') for c in r.columns
                ]
                if header is None:
                    header = values
                    continue

                d = OrderedDict(list(zip(header, values)))
                handle = d.pop('Handle').strip()
                d = self._dict_as_number(d)
                if 'rank' not in d or users and handle not in users:
                    continue
                row = result.setdefault(handle, OrderedDict())
                row.update(d)

                score = row.pop('final_score' if 'final_score' in
                                row else 'provisional_score')
                row['member'] = handle
                row['place'] = row.pop('rank')
                row['solving'] = score
                row['solved'] = {'solving': 1 if score > 0 else 0}

                problems = row.setdefault('problems', {})
                problem = problems.setdefault(problem_name, {})
                problem['result'] = score

                history_index = values.index('submission history')
                if history_index:
                    column = r.columns[history_index]
                    href = column.node.xpath('a/@href')
                    if href:
                        problem['url'] = urljoin(url, href[0])
        else:  # single round match
            matches = re.finditer('<table[^>]*>.*?</table>', page, re.DOTALL)
            problems_sets = []
            for match in matches:
                problems = re.findall(
                    '<a[^>]*href="(?P<href>[^"]*c=problem_statement[^"]*)"[^>]*>(?P<name>[^/]*)</a>',
                    match.group(),
                    re.IGNORECASE,
                )
                if problems:
                    problems_sets.append([{
                        'short': n,
                        'url': urljoin(url, u)
                    } for u, n in problems])

            problems_info = dict() if len(problems_sets) > 1 else list()
            for problems_set, result_url in zip(problems_sets, result_urls):
                url = urljoin(self.standings_url,
                              result_url + '&em=1000000042')
                url = url.replace('&amp;', '&')
                division = int(parse_qs(url)['dn'][0])

                for p in problems_set:
                    d = problems_info
                    if len(problems_sets) > 1:
                        d = d.setdefault('division', OrderedDict())
                        d = d.setdefault('I' * division, [])
                    d.append(p)

                page = REQ.get(url)
                rows = etree.HTML(page).xpath("//tr[@valign='middle']")
                header = None
                url_infos = []
                for row in rows:
                    r = parsed_table.ParsedTableRow(row)
                    if len(r.columns) < 10:
                        continue
                    values = [c.value for c in r.columns]
                    if header is None:
                        header = values
                        continue

                    d = OrderedDict(list(zip(header, values)))
                    handle = d.pop('Coders').strip()
                    d = self._dict_as_number(d)
                    if 'division_placed' not in d or users and handle not in users:
                        continue

                    row = result.setdefault(handle, OrderedDict())
                    row.update(d)

                    if not row.get('new_rating') and not row.get(
                            'old_rating') and not row.get('rating_change'):
                        row.pop('new_rating', None)
                        row.pop('old_rating', None)
                        row.pop('rating_change', None)

                    row['member'] = handle
                    row['place'] = row.pop('division_placed')
                    row['solving'] = row['point_total']
                    row['solved'] = {'solving': 0}
                    row['division'] = 'I' * division

                    if 'adv.' in row:
                        row['advanced'] = row.pop('adv.').lower().startswith(
                            'y')

                    url_info = urljoin(url,
                                       r.columns[0].node.xpath('a/@href')[0])
                    url_infos.append(url_info)

                def fetch_solution(url):
                    for i in range(2):
                        try:
                            page = REQ.get(url, time_out=60)
                            match = re.search(
                                '<td[^>]*class="problemText"[^>]*>(?P<solution>.*?)</td>',
                                page, re.DOTALL | re.IGNORECASE)
                            ret = html.unescape(match.group('solution'))
                            ret = ret.strip()
                            ret = ret.replace('<BR>', '\n')
                            ret = ret.replace('\xa0', ' ')
                            return ret
                        except FailOnGetResponse:
                            sleep(i * 10 + 3)
                    return None

                def fetch_info(url):
                    delay = 3
                    for _ in range(5):
                        try:
                            page = REQ.get(url)
                            break
                        except Exception:
                            sleep(delay)
                            delay *= 2
                    else:
                        return None, None, None

                    match = re.search(
                        'class="coderBrackets">.*?<a[^>]*>(?P<handle>[^<]*)</a>',
                        page, re.IGNORECASE)
                    handle = html.unescape(match.group('handle').strip())

                    match = re.search(r'&nbsp;Room\s*(?P<room>[0-9]+)', page)
                    room = match.group('room') if match else None

                    matches = re.finditer(
                        r'''
                        <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*c=problem_solution[^"]*)"[^>]*>(?P<short>[^<]*)</a>[^<]*</td>[^<]*
                        <td[^>]*>[^<]*</td>[^<]*
                        <td[^>]*>[^<]*</td>[^<]*
                        <td[^>]*>(?P<time>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<status>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<result>[^<]*)</td>[^<]*
                    ''', page, re.VERBOSE | re.IGNORECASE)
                    problems = {}
                    n_fetch_solution = 0
                    for match in matches:
                        d = match.groupdict()
                        short = d.pop('short')
                        solution_url = urljoin(url, d['url'])
                        d['url'] = solution_url
                        d = self._dict_as_number(d)
                        if d['status'] in [
                                'Challenge Succeeded', 'Failed System Test'
                        ]:
                            d['result'] = -d['result']
                        if abs(d['result']) < 1e-9:
                            d.pop('result')
                        if re.match('^[0.:]+$', d['time']):
                            d.pop('time')

                        solution = (statistics or {}).get(handle, {}).get(
                            'problems', {}).get(short, {}).get('solution')
                        if not solution:
                            n_fetch_solution += 1
                            solution = fetch_solution(solution_url)
                        d['solution'] = solution

                        problems[short] = d

                    challenges = []
                    matches = re.finditer(
                        r'''
                        <td[^>]*>[^<]*<a[^>]*href="[^"]*module=MemberProfile[^"]*"[^>]*>(?P<target>[^<]*)</a>[^<]*</td>[^<]*
                        <td[^>]*>(?P<problem>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<status>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<time>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<result>[^<]*)</td>[^<]*
                        <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*)"[^>]*>\s*details\s*</a>[^<]*</td>[^<]*
                    ''', page, re.VERBOSE | re.IGNORECASE)
                    for match in matches:
                        d = match.groupdict()
                        d = {k: v.strip() for k, v in d.items()}
                        d['result'] = float(d['result'].replace(',', '.'))
                        d['url'] = urljoin(url, d['url'])

                        p = problems.setdefault(d['problem'], {})
                        p.setdefault('extra_score', 0)
                        p['extra_score'] += d['result']
                        p.setdefault(
                            'extra_info',
                            []).append(f'{d["target"]}: {d["result"]}')
                        challenges.append(d)

                    return url, handle, room, problems, challenges, n_fetch_solution

                with PoolExecutor(max_workers=20) as executor, tqdm.tqdm(
                        total=len(url_infos)) as pbar:
                    n_fetch_solution = 0
                    for url, handle, room, problems, challenges, n_sol in executor.map(
                            fetch_info, url_infos):
                        n_fetch_solution += n_sol
                        pbar.set_description(f'div{division} {url}')
                        pbar.set_postfix(n_solution=n_fetch_solution)
                        pbar.update()
                        if handle is not None:
                            if handle not in result:
                                LOG.error(
                                    f'{handle} not in result, url = {url}')
                            result[handle]['url'] = url
                            if room:
                                result[handle]['room'] = room
                            result[handle]['problems'] = problems
                            result[handle]['challenges'] = challenges
                            for p in problems.values():
                                if p.get('result', 0) > 1e-9:
                                    result[handle]['solved']['solving'] += 1
                            if challenges:
                                h = result[handle].setdefault(
                                    'hack', {
                                        'title': 'challenges',
                                        'successful': 0,
                                        'unsuccessful': 0,
                                    })
                                for c in challenges:
                                    h['successful' if c['status'].lower() ==
                                      'yes' else 'unsuccessful'] += 1

        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': problems_info,
            'options': {
                'fixed_fields': [('hack', 'Challenges')],
            },
        }

        if re.search(r'\bfinals?(?:\s+rounds?)?$', self.name, re.I):
            standings['options']['medals'] = [{
                'name': name,
                'count': 1
            } for name in ('gold', 'silver', 'bronze')]

        return standings
예제 #7
0
파일: dl_gsu.py 프로젝트: VadVergasov/clist
    def __init__(self, **kwargs):
        super(Statistic, self).__init__(**kwargs)

        if not self.name or not self.start_time or not self.url:
            raise InitModuleException()
예제 #8
0
 def __init__(self, **kwargs):
     super(Statistic, self).__init__(**kwargs)
     if not self.url.startswith(
             'http://stats.ioinformatics.org/olympiads/'):
         raise InitModuleException(
             f'Url = {self.url} should be from stats.ioinformatics.org')
예제 #9
0
 def __init__(self, **kwargs):
     super(Statistic, self).__init__(**kwargs)
     if not self.standings_url:
         raise InitModuleException('Not set standings url for %s' %
                                   self.name)
예제 #10
0
    def get_standings(self, users=None, statistics=None):
        result = {}
        hidden_fields = []
        fields_types = {}
        order = None
        writers = defaultdict(int)

        start_time = self.start_time.replace(tzinfo=None)

        if not self.standings_url and datetime.now() - start_time < timedelta(days=30):
            opt = 0.61803398875

            def canonize_title(value):
                value = value.lower()
                value = re.sub(r'\s+-[^-]+$', '', value)
                value = re.sub(r'\bsingle\s+round\s+match\b', 'srm', value)
                value = re.sub(r'\bmarathon\s+match\b', 'mm', value)
                value = re.sub(r'[0-9]*([0-9]{2})\s*tco(\s+)', r'tco\1\2', value)
                value = re.sub(r'tco\s*[0-9]*([0-9]{2})(\s+)', r'tco\1\2', value)
                value = re.sub(r'^[0-9]{2}([0-9]{2})(\s+)', r'tco\1\2', value)
                return set(re.split('[^A-Za-z0-9]+', value))

            def process_match(date, title, url):
                nonlocal opt

                if abs(date - start_time) > timedelta(days=2):
                    return

                a1 = canonize_title(title)
                a2 = canonize_title(self.name)
                intersection = 0
                for w1 in a1:
                    for w2 in a2:
                        if w1.isdigit() or w2.isdigit():
                            if w1 == w2:
                                intersection += 1
                                break
                        elif w1.startswith(w2) or w2.startswith(w1):
                            intersection += 1
                            break
                union = len(a1) + len(a2) - intersection
                iou = intersection / union
                if iou > opt:
                    opt = iou
                    self.standings_url = url

            url = 'https://www.topcoder.com/tc?module=MatchList&nr=100500'
            page = REQ.get(url)
            re_round_overview = re.compile(
                r'''
(?:<td[^>]*>(?:
[^<]*<a[^>]*href="(?P<url>[^"]*/stat[^"]*rd=(?P<rd>[0-9]+)[^"]*)"[^>]*>(?P<title>[^<]*)</a>[^<]*|
(?P<date>[0-9]+\.[0-9]+\.[0-9]+)
)</td>[^<]*){2}
                ''',
                re.VERBOSE,
            )
            matches = re_round_overview.finditer(str(page))
            for match in matches:
                date = datetime.strptime(match.group('date'), '%m.%d.%Y')
                process_match(date, match.group('title'), urljoin(url, match.group('url')))

            url = 'https://www.topcoder.com/tc?module=BasicData&c=dd_round_list'
            page = REQ.get(url)
            root = ET.fromstring(page)
            for child in root:
                data = {}
                for field in child:
                    data[field.tag] = field.text
                date = dateutil.parser.parse(data['date'])
                url = 'https://www.topcoder.com/stat?c=round_overview&er=5&rd=' + data['round_id']
                process_match(date, data['full_name'], url)

        for url in self.url, self.standings_url:
            if url:
                match = re.search('/challenges/(?P<cid>[0-9]+)', url)
                if match:
                    challenge_id = match.group('cid')
                    break
        else:
            challenge_id = None

        if challenge_id:  # marathon match
            url = conf.TOPCODER_API_MM_URL_FORMAT.format(challenge_id)
            page = REQ.get(url)
            data = json.loads(page)
            problems_info = []
            hidden_fields.extend(['time', 'submits', 'style'])
            fields_types = {'delta_rank': ['delta'], 'delta_score': ['delta']}
            order = ['place_as_int', '-solving', 'addition__provisional_rank', '-addition__provisional_score']
            for row in data:
                handle = row.pop('member')
                r = result.setdefault(handle, OrderedDict())
                r['member'] = handle
                r['place'] = row.pop('finalRank', None)
                r['provisional_rank'] = row.pop('provisionalRank', None)
                r['style'] = row.pop('style')
                if r['place'] and r['provisional_rank']:
                    r['delta_rank'] = r['provisional_rank'] - r['place']
                submissions = row.pop('submissions')
                has_solution = False
                for s in submissions:
                    score = s.get('finalScore')
                    if not score or score == '-':
                        if 'provisional_score' not in r:
                            p_score = s.pop('provisionalScore', None)
                            if isinstance(p_score, str):
                                p_score = asfloat(p_score)
                            if p_score is not None:
                                r['provisional_score'] = round(p_score, 2) if p_score >= 0 else False
                                r['time'] = s['created']
                                has_solution = True
                        continue
                    r['solving'] = score
                    r['solved'] = {'solving': int(score > 0)}
                    p_score = s.pop('provisionalScore')
                    if isinstance(p_score, str):
                        p_score = asfloat(p_score)
                    if p_score is not None and p_score > 0:
                        r['provisional_score'] = round(p_score, 2)
                        r['delta_score'] = round(score - p_score, 2)
                    r['time'] = s['created']
                    has_solution = True
                    break
                if not has_solution:
                    continue
                r['submits'] = len(submissions)
            if not result:
                raise ExceptionParseStandings('empty standings')
        else:  # single round match
            if not self.standings_url:
                raise InitModuleException('Not set standings url for %s' % self.name)
            url = self.standings_url + '&nr=100000042'
            page = REQ.get(url, time_out=100)
            result_urls = re.findall(r'<a[^>]*href="(?P<url>[^"]*)"[^>]*>Results</a>', str(page), re.I)
            if not result_urls:
                raise ExceptionParseStandings('not found result urls')

            dd_round_results = {}
            match = re.search('rd=(?P<rd>[0-9]+)', url)
            if match:
                rd = match.group('rd')
                url = f'https://www.topcoder.com/tc?module=BasicData&c=dd_round_results&rd={rd}'
                try:
                    dd_round_results_page = REQ.get(url)
                    root = ET.fromstring(dd_round_results_page)
                    for child in root:
                        data = {}
                        for field in child:
                            data[field.tag] = field.text
                        handle = data.pop('handle')
                        dd_round_results[handle] = self._dict_as_number(data)
                except FailOnGetResponse:
                    pass

            hidden_fields.extend(['coding_phase', 'challenge_phase', 'system_test', 'point_total', 'room'])

            matches = re.finditer('<table[^>]*>.*?</table>', page, re.DOTALL)
            problems_sets = []
            for match in matches:
                problems = re.findall(
                    '<a[^>]*href="(?P<href>[^"]*c=problem_statement[^"]*)"[^>]*>(?P<name>[^/]*)</a>',
                    match.group(),
                    re.IGNORECASE,
                )
                if problems:
                    problems_sets.append([
                        {'short': n, 'url': urljoin(url, u)}
                        for u, n in problems
                    ])

            problems_info = dict() if len(problems_sets) > 1 else list()
            for problems_set, result_url in zip(problems_sets, result_urls):
                url = urljoin(self.standings_url, result_url + '&em=1000000042')
                url = url.replace('&amp;', '&')
                division = int(parse_qs(url)['dn'][0])
                division_str = 'I' * division

                with PoolExecutor(max_workers=3) as executor:
                    def fetch_problem(p):
                        errors = set()
                        for attempt in range(3):
                            try:
                                page = REQ.get(p['url'], time_out=30)
                                match = re.search('<a[^>]*href="(?P<href>[^"]*module=ProblemDetail[^"]*)"[^>]*>', page)
                                page = REQ.get(urljoin(p['url'], match.group('href')), time_out=30)
                                matches = re.findall(r'<td[^>]*class="statTextBig"[^>]*>(?P<key>[^<]*)</td>\s*<td[^>]*>(?P<value>.*?)</td>', page, re.DOTALL)  # noqa
                                for key, value in matches:
                                    key = key.strip().rstrip(':').lower()
                                    if key == 'categories':
                                        tags = [t.strip().lower() for t in value.split(',')]
                                        tags = [t for t in tags if t]
                                        if tags:
                                            p['tags'] = tags
                                    elif key.startswith('writer') or key.startswith('tester'):
                                        key = key.rstrip('s') + 's'
                                        p[key] = re.findall('(?<=>)[^<>,]+(?=<)', value)
                                for w in p.get('writers', []):
                                    writers[w] += 1

                                info = p.setdefault('info', {})
                                matches = re.finditer('<table[^>]*paddingTable2[^>]*>.*?</table>', page, re.DOTALL)
                                for match in matches:
                                    html_table = match.group(0)
                                    rows = parsed_table.ParsedTable(html_table)
                                    for row in rows:
                                        key, value = None, None
                                        for k, v in row.items():
                                            if k == "":
                                                key = v.value
                                            elif k and division_str in k.split():
                                                value = v.value
                                        if key and value:
                                            key = re.sub(' +', '_', key.lower())
                                            info[key] = value
                                            if key == 'point_value':
                                                value = toint(value) or asfloat(value)
                                                if value is not None:
                                                    p['full_score'] = value
                            except Exception as e:
                                errors.add(f'error parse problem info {p}: {e}')
                                sleep(5 + attempt)
                        else:
                            errors = None
                        if errors:
                            LOG.error(errors)

                        return p

                    for p in tqdm.tqdm(executor.map(fetch_problem, problems_set), total=len(problems_set)):
                        d = problems_info
                        if len(problems_sets) > 1:
                            d = d.setdefault('division', OrderedDict())
                            d = d.setdefault(division_str, [])
                        d.append(p)

                if not users and users is not None:
                    continue

                page = REQ.get(url)
                rows = etree.HTML(page).xpath("//tr[@valign='middle']")
                header = None
                url_infos = []
                for row in rows:
                    r = parsed_table.ParsedTableRow(row)
                    if len(r.columns) < 10:
                        continue
                    values = [c.value for c in r.columns]
                    if header is None:
                        header = values
                        continue

                    d = OrderedDict(list(zip(header, values)))
                    handle = d.pop('Coders').strip()
                    d = self._dict_as_number(d)
                    if users and handle not in users:
                        continue

                    row = result.setdefault(handle, OrderedDict())
                    row.update(d)

                    if not row.get('new_rating') and not row.get('old_rating') and not row.get('rating_change'):
                        row.pop('new_rating', None)
                        row.pop('old_rating', None)
                        row.pop('rating_change', None)

                    row['member'] = handle
                    row['place'] = row.pop('division_placed', None)
                    row['solving'] = row['point_total']
                    row['solved'] = {'solving': 0}
                    row['division'] = 'I' * division

                    if 'adv.' in row:
                        row['advanced'] = row.pop('adv.').lower().startswith('y')

                    url_info = urljoin(url, r.columns[0].node.xpath('a/@href')[0])
                    url_infos.append(url_info)

                def fetch_solution(url):
                    for i in range(2):
                        try:
                            page = REQ.get(url, time_out=60)
                            match = re.search('<td[^>]*class="problemText"[^>]*>(?P<solution>.*?)</td>',
                                              page,
                                              re.DOTALL | re.IGNORECASE)
                            if not match:
                                break
                            ret = html.unescape(match.group('solution'))
                            ret = ret.strip()
                            ret = ret.replace('<BR>', '\n')
                            ret = ret.replace('\xa0', ' ')
                            return ret
                        except FailOnGetResponse:
                            sleep(i * 10 + 3)
                    return None

                n_failed_fetch_info = 0

                def fetch_info(url):
                    nonlocal n_failed_fetch_info
                    if n_failed_fetch_info > 10:
                        return
                    delay = 10
                    for _ in range(5):
                        try:
                            page = REQ.get(url, time_out=delay)
                            match = re.search('class="coderBrackets">.*?<a[^>]*>(?P<handle>[^<]*)</a>',
                                              page,
                                              re.IGNORECASE)
                            if match:
                                break
                        except Exception:
                            sleep(delay + _)
                    else:
                        n_failed_fetch_info += 1
                        return

                    handle = html.unescape(match.group('handle').strip())

                    match = re.search(r'&nbsp;Room\s*(?P<room>[0-9]+)', page)
                    room = match.group('room') if match else None

                    matches = re.finditer(r'''
                        <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*c=problem_solution[^"]*)"[^>]*>(?P<short>[^<]*)</a>[^<]*</td>[^<]*
                        <td[^>]*>[^<]*</td>[^<]*
                        <td[^>]*>[^<]*</td>[^<]*
                        <td[^>]*>(?P<time>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<status>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<result>[^<]*)</td>[^<]*
                    ''', page, re.VERBOSE | re.IGNORECASE)
                    problems = {}
                    n_fetch_solution = 0
                    for match in matches:
                        d = match.groupdict()
                        short = d.pop('short')
                        solution_url = urljoin(url, d['url'])
                        d['url'] = solution_url
                        d = self._dict_as_number(d)
                        if d['status'] in ['Challenge Succeeded', 'Failed System Test']:
                            d['result'] = -d['result']
                        if abs(d['result']) < 1e-9:
                            d.pop('result')
                        if re.match('^[0.:]+$', d['time']):
                            d.pop('time')
                        else:
                            time_in_seconds = 0
                            for t in d['time'].split(':'):
                                time_in_seconds = time_in_seconds * 60 + asfloat(t)
                            d['time_in_seconds'] = time_in_seconds

                        solution = (statistics or {}).get(handle, {}).get('problems', {}).get(short, {}).get('solution')
                        if not solution:
                            n_fetch_solution += 1
                            solution = fetch_solution(solution_url)
                        d['solution'] = solution

                        problems[short] = d

                    challenges = []
                    matches = re.finditer(r'''
                        <td[^>]*>[^<]*<a[^>]*href="[^"]*module=MemberProfile[^"]*"[^>]*>(?P<target>[^<]*)</a>[^<]*</td>[^<]*
                        <td[^>]*>(?P<problem>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<status>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<time>[^<]*)</td>[^<]*
                        <td[^>]*>(?P<result>[^<]*)</td>[^<]*
                        <td[^>]*>[^<]*<a[^>]*href="(?P<url>[^"]*)"[^>]*>\s*details\s*</a>[^<]*</td>[^<]*
                    ''', page, re.VERBOSE | re.IGNORECASE)
                    for match in matches:
                        d = match.groupdict()
                        d = {k: v.strip() for k, v in d.items()}
                        d['result'] = float(d['result'].replace(',', '.'))
                        d['url'] = urljoin(url, d['url'])

                        p = problems.setdefault(d['problem'], {})
                        p.setdefault('extra_score', 0)
                        p['extra_score'] += d['result']
                        p.setdefault('extra_info', []).append(f'{d["target"]}: {d["result"]}')
                        challenges.append(d)

                    return url, handle, room, problems, challenges, n_fetch_solution

                with PoolExecutor(max_workers=20) as executor, tqdm.tqdm(total=len(url_infos)) as pbar:
                    n_fetch_solution = 0
                    for info in executor.map(fetch_info, url_infos):
                        if info is None:
                            continue
                        url, handle, room, problems, challenges, n_sol = info
                        n_fetch_solution += n_sol
                        pbar.set_description(f'div{division} {url}')
                        pbar.set_postfix(n_solution=n_fetch_solution, n_failed_fetch_info=n_failed_fetch_info)
                        pbar.update()
                        if handle is not None:
                            if handle not in result:
                                LOG.error(f'{handle} not in result, url = {url}')
                            row = result[handle]
                            row['url'] = url
                            if room:
                                row['room'] = room
                            row['problems'] = problems
                            row['challenges'] = challenges
                            for p in problems.values():
                                if p.get('result', 0) > 1e-9:
                                    row['solved']['solving'] += 1
                            if challenges:
                                h = row.setdefault('hack', {
                                    'title': 'challenges',
                                    'successful': 0,
                                    'unsuccessful': 0,
                                })
                                for c in challenges:
                                    h['successful' if c['status'].lower() == 'yes' else 'unsuccessful'] += 1

            if dd_round_results:
                fields = set()
                hidden_fields_set = set(hidden_fields)
                for data in result.values():
                    for field in data.keys():
                        fields.add(field)

                k_mapping = {'new_vol': 'new_volatility', 'advanced': None}
                for handle, data in dd_round_results.items():
                    if handle not in result:
                        continue
                    row = result[handle]

                    for k, v in data.items():
                        k = k_mapping.get(k, k)
                        if k and k not in fields:
                            if k in {'new_rating', 'old_rating'} and not v:
                                continue
                            row[k] = v
                            if k not in hidden_fields_set:
                                hidden_fields_set.add(k)
                                hidden_fields.append(k)
                            ks = k.split('_')
                            if ks[0] == 'level' and ks[-1] == 'language' and v and v.lower() != 'unspecified':
                                idx = {'one': 0, 'two': 1, 'three': 2}.get(ks[1], None)
                                d = problems_info
                                if len(problems_sets) > 1:
                                    d = d['division'][row['division']]
                                if idx is not None and 0 <= idx < len(d) and d[idx]['short'] in row['problems']:
                                    row['problems'][d[idx]['short']]['language'] = v
        standings = {
            'result': result,
            'url': self.standings_url,
            'problems': problems_info,
            'hidden_fields': hidden_fields,
            'fields_types': fields_types,
            'options': {
                'fixed_fields': [('hack', 'Challenges')],
            },
        }

        if writers:
            writers = [w[0] for w in sorted(writers.items(), key=lambda w: w[1], reverse=True)]
            standings['writers'] = writers

        if re.search(r'\bfinals?(?:\s+rounds?)?$', self.name, re.I):
            standings['options']['medals'] = [{'name': name, 'count': 1} for name in ('gold', 'silver', 'bronze')]

        if order:
            standings['options']['order'] = order

        return standings
예제 #11
0
 def __init__(self, **kwargs):
     super(Statistic, self).__init__(**kwargs)
     if '//stats.ioinformatics.org/olympiads/' not in self.url:
         raise InitModuleException(f'Url {self.url} should be contains stats.ioinformatics.org/olympiads')
예제 #12
0
 def get_standings(self, users=None, statistics=None):
     if '/codingcompetitions.withgoogle.com/' in self.url:
         return self._api_get_standings(users, statistics)
     if '/code.google.com/' in self.url or '/codejam.withgoogle.com/' in self.url:
         return self._old_get_standings(users)
     raise InitModuleException(f'url = {self.url}')