def scrape_submissions(csrf_token, task_name_dict, task_name=None, username=None): from_date = datetime.datetime.now() + datetime.timedelta(days=2) task_id = None if task_name: if task_name not in task_name_dict.values(): log.error(f"Task '{task_name}' not found.") return [task_id] = [ t_id for t_id, t_name in task_name_dict.items() if t_name == task_name ] user_id = None if username: response = requests.get(f'https://csacademy.com/user/{username}/', headers=__get_headers(csrf_token), cookies=__get_cookies(csrf_token)) json_data = json.loads(response.text) if json_data.get('error'): log.error(f"API Error: {json_data['error']}") return [] [user_data] = json_data['state']['publicuser'] user_id = user_data['id'] return scrape_paginated_submissions(csrf_token, task_name_dict, user_id, task_id, from_date)
def get_page(page_url, max_retries=10, **query_dict): """ Sends a GET request, while also printing the page to console. :param max_retries: the maximum number of retries :param page_url: the url of the GET request :param query_dict: the GET query parameters :return: the page received """ if len(query_dict) > 0: query_string = urllib.parse.urlencode(query_dict) page_url += "?" + query_string page = None for tries in range(max_retries): log.debug(f"GET: {page_url}") page = requests.get(page_url) if page.status_code == 200 or page.status_code == 400 or page.status_code == 404: break else: log.warning( f'Request failed (status code: {page.status_code}). Sleeping for 2 seconds...' ) time.sleep(2) log.info('Retrying...') if not page: log.error("Request failed. Page not found.") if page.status_code != 200: log.error("Request failed. Status code: %d" % page.status_code) return page
def scrape_recent_submissions(*judge_ids, to_days=1): to_date = datetime.now() - timedelta(days=to_days) for judge_id in judge_ids: try: scraper = scrapers.create_scraper(judge_id) submissions = scraper.scrape_recent_submissions() submissions = itertools.takewhile( lambda x: x['submitted_on'] >= to_date, submissions) queries.write_submissions(submissions) except Exception as ex: log.error( f"Exception while fetching recent submissions for {judge_id}") log.exception(ex)
def __insert_many_silent(coll, iterable, unique_fields): requests = [] for elem in iterable: find_dict = {field: elem[field] for field in unique_fields} requests.append(ReplaceOne(find_dict, elem, upsert=True)) try: result = coll.bulk_write(requests) return result.inserted_count except BulkWriteError as bwe: for err in bwe.details['writeErrors']: if err['code'] != 11000: log.error(bwe.details) log.error(pprint(iterable)) raise return bwe.details['nInserted']
def _api_get(api_method: str, kwargs) -> Any: page_url = f"https://codeforces.com/api/{api_method}" try: response = get_page(page_url, **kwargs) except Exception as ex: log.error(f"GET request got exception: {ex}") return [] json_data = response.json() status = json_data['status'] if status != 'OK': log.error(f"Codeforces API error " f"(expected status: 'OK' got: '{status}', " f"message: '{json_data.get('comment')}')") return [] return json_data['result']
def write_handles(handles_info): handles_info = list(handles_info) for handle_info in handles_info: try: handle = UserHandle.objects.get( judge__judge_id=handle_info['judge_id'], handle=handle_info['handle']) except ObjectDoesNotExist: log.error( f"Can't update handle: '{handle_info['handle']}': does not exist." ) continue if 'photo_url' in handle_info: handle.photo_url = handle_info['photo_url'] else: handle.photo_url = None handle.save() log.success(f"Successfully updated {len(handles_info)} handles!")
def parse_submission(submission_data): try: submission_id = submission_data['id'] task_id = '/'.join([ str(submission_data['problem']['contestId']), submission_data['problem']['index'] ]) if submission_data['verdict'] == 'TESTING': log.info(f'Skipped submission {submission_id}: still testing.') return [] if 'verdict' not in submission_data: log.warning(f'Skipped submission {submission_id}: no verdict?.') return [] for author in submission_data['author']['members']: author_id = author['handle'] submission = dict( judge_id=CODEFORCES_JUDGE_ID, submission_id=str(submission_id), task_id=task_id.lower(), submitted_on=datetime.datetime.utcfromtimestamp( submission_data['creationTimeSeconds']), language=submission_data['programmingLanguage'], verdict=parse_verdict(submission_data['verdict']), author_id=author_id.lower(), time_exec=submission_data['timeConsumedMillis'], memory_used=round(submission_data['memoryConsumedBytes'] / 1024), ) yield submission except Exception as ex: log.error( f"Failed to parse submission.\nSubmission data:{submission_data}\nError: {ex}" )
def get_context_data(self, **kwargs): context = super(SheetTaskEditView, self).get_context_data(**kwargs) context['sheet_id'] = self.kwargs['sheet_id'] context['task_id'] = self.kwargs['task_id'] log.error(context) return context