def fetch_page(page_index): url = self.standings_url if page_index: url += f'?page={page_index}' n_attempts = 3 for attempt in range(n_attempts): try: page = REQ.get(url) break except FailOnGetResponse as e: if e.code == 503 and attempt + 1 < n_attempts: REQ.print(str(e)) sleep(5) continue raise e return page, url
def get_standings(self, users=None, statistics=None): if not self.standings_url: self.standings_url = f'https://projecteuler.net/fastest={self.key}' user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' # noqa page = REQ.get(self.standings_url, headers={'User-Agent': user_agent}) sign_out = re.search('<form[^>]*action="sign_out"[^>]*>', page) if not sign_out: for attempt in range(20): while True: value = f'{random.random():.16f}' image_bytes = REQ.get(f'https://projecteuler.net/captcha/show_captcha.php?{value}') image_stream = io.BytesIO(image_bytes) image_rgb = Image.open(image_stream) text = pytesseract.image_to_string(image_rgb, config='--oem 0 --psm 13 digits') text = text.strip() if re.match('^[0-9]{5}$', text): break REQ.get('https://projecteuler.net/sign_in') page = REQ.submit_form( name='sign_in_form', action=None, data={ 'username': conf.PROJECTEULER_USERNAME, 'password': conf.PROJECTEULER_PASSWORD, 'captcha': text, 'remember_me': '1', }, ) match = re.search('<p[^>]*class="warning"[^>]*>(?P<message>[^<]*)</p>', page) if match: REQ.print(match.group('message')) else: break else: raise ExceptionParseStandings('Did not recognize captcha for sign in') page = REQ.get(self.standings_url) result = {} problem_name = self.name.split('.', 1)[1].strip() problems_info = [{'name': problem_name, 'url': self.url}] regex = '<table[^>]*>.*?</table>' html_table = re.search(regex, page, re.DOTALL) if html_table: table = parsed_table.ParsedTable(html_table.group(0)) for r in table: row = OrderedDict() row['solving'] = 1 for k, v in r.items(): if isinstance(v, list): place, country = v row['place'] = re.match('[0-9]+', place.value).group(0) country = first(country.column.node.xpath('.//@title')) if country: row['country'] = country elif k == 'Time To Solve': params = {} for x in v.value.split(', '): value, field = x.split() if field[-1] != 's': field += 's' params[field] = int(value) rel_delta = relativedelta(**params) now = timezone.now() delta = now - (now - rel_delta) row['penalty'] = f'{delta.total_seconds() / 60:.2f}' elif k == 'User': member = first(v.column.node.xpath('.//@title')) or v.value row['member'] = member else: row[k.lower()] = v.value problems = row.setdefault('problems', {}) problem = problems.setdefault(problem_name, {}) problem['result'] = '+' problem['binary'] = True row['_skip_for_problem_stat'] = True if 'member' not in row: continue result[row['member']] = row standings = { 'result': result, 'url': self.standings_url, 'problems': problems_info, } if len(result) < 100: delta = timezone.now() - self.start_time if delta < timedelta(days=1): standings['timing_statistic_delta'] = timedelta(minutes=60) elif delta < timedelta(days=30): standings['timing_statistic_delta'] = timedelta(days=1) return standings