def _from_table_row(cls, tr: bs4.Tag, contest_id: str) -> 'AtCoderSubmission': tds = tr.find_all('td') assert len(tds) in (8, 10) self = cls.from_url('https://atcoder.jp' + tds[-1].find('a')['href']) problem = AtCoderProblem.from_url('https://atcoder.jp' + tds[1].find('a')['href']) assert self is not None assert problem is not None self._submission_time = datetime.datetime.strptime( tds[0].text, '%Y-%m-%d %H:%M:%S+0900').replace(tzinfo=utils.tzinfo_jst) self._problem_id = problem.problem_id self._user_id = tds[2].find_all('a')[0]['href'].split('/')[-1] self._language_name = tds[3].text self._score = float(tds[4].text) self._code_size = int(utils.remove_suffix(tds[5].text, ' Byte')) self._status = tds[6].text if len(tds) == 10: self._exec_time_msec = int(utils.remove_suffix(tds[7].text, ' ms')) self._memory_byte = int(utils.remove_suffix(tds[8].text, ' KB')) * 1000 return self
def _from_table_row(cls, tr: bs4.Tag, *, session: requests.Session, response: requests.Response, timestamp: datetime.datetime) -> 'AtCoderProblemData': tds = tr.find_all('td') assert 4 <= len(tds) <= 5 path = tds[1].find('a')['href'] problem = AtCoderProblem.from_url('https://atcoder.jp' + path) assert problem is not None alphabet = tds[0].text name = tds[1].text if tds[2].text.endswith(' msec'): time_limit_msec = int(utils.remove_suffix(tds[2].text, ' msec')) elif tds[2].text.endswith(' sec'): time_limit_msec = int(float(utils.remove_suffix(tds[2].text, ' sec')) * 1000) else: assert False if tds[3].text.endswith(' KB'): memory_limit_byte = int(float(utils.remove_suffix(tds[3].text, ' KB')) * 1000) elif tds[3].text.endswith(' MB'): memory_limit_byte = int(float(utils.remove_suffix(tds[3].text, ' MB')) * 1000 * 1000) # TODO: confirm this is MB truly, not MiB else: assert False if len(tds) == 5: assert tds[4].text.strip() in ('', 'Submit', '提出') return AtCoderProblemData( alphabet=alphabet, memory_limit_byte=memory_limit_byte, name=name, problem=problem, response=response, session=session, time_limit_msec=time_limit_msec, timestamp=timestamp, )
def _load_details(self, session: Optional[requests.Session] = None) -> None: session = session or utils.new_default_session() # get resp = _request('GET', self.get_url(type='beta', lang='ja'), session=session) soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) # parse h2 = soup.find('span', class_='h2') self._alphabet, _, self._task_name = h2.text.partition(' - ') time_limit, memory_limit = h2.find_next_sibling('p').text.split(' / ') self._time_limit_msec = int( utils.remove_suffix(utils.remove_prefix(time_limit, '実行時間制限: '), ' sec')) * 1000 self._memory_limit_byte = int( utils.remove_suffix(utils.remove_prefix(memory_limit, 'メモリ制限: '), ' MB')) * 1000 * 1000 task_statement = soup.find('div', id='task-statement') p = task_statement.find('p') # first if p is not None and p.text.startswith('配点 : '): self._score = int( utils.remove_suffix(utils.remove_prefix(p.text, '配点 : '), ' 点')) self._score_checked = True
def _AtCoderProblemContentPartial_from_row(tr: bs4.Tag): tds = tr.find_all('td') assert 4 <= len(tds) <= 5 path = tds[1].find('a')['href'] problem = AtCoderProblem.from_url('https://atcoder.jp' + path) assert problem is not None alphabet = tds[0].text name = tds[1].text if tds[2].text.endswith(' msec'): time_limit_msec = int(utils.remove_suffix(tds[2].text, ' msec')) elif tds[2].text.endswith(' sec'): time_limit_msec = int( float(utils.remove_suffix(tds[2].text, ' sec')) * 1000) else: assert False if tds[3].text.endswith(' KB'): memory_limit_byte = int( float(utils.remove_suffix(tds[3].text, ' KB')) * 1000) elif tds[3].text.endswith(' MB'): memory_limit_byte = int( float(utils.remove_suffix(tds[3].text, ' MB')) * 1000 * 1000) # TODO: confirm this is MB truly, not MiB else: assert False if len(tds) == 5: assert tds[4].text.strip() in ('', 'Submit', '提出') self = AtCoderProblemContentPartial(alphabet, memory_limit_byte, name, problem, time_limit_msec) problem._cached_content = self return self
def _AtCoderProblemContent_parse_partial( soup: bs4.BeautifulSoup, problem: 'AtCoderProblem') -> AtCoderProblemContentPartial: h2 = soup.find('span', class_='h2') alphabet, _, name = h2.text.partition(' - ') time_limit, memory_limit = h2.find_next_sibling('p').text.split(' / ') for time_limit_prefix in ('実行時間制限: ', 'Time Limit: '): if time_limit.startswith(time_limit_prefix): break else: assert False time_limit_msec = int( float( utils.remove_suffix( utils.remove_prefix(time_limit, time_limit_prefix), ' sec')) * 1000) for memory_limit_prefix in ('メモリ制限: ', 'Memory Limit: '): if memory_limit.startswith(memory_limit_prefix): break else: assert False memory_limit_byte = int( float( utils.remove_suffix( utils.remove_prefix(memory_limit, memory_limit_prefix), ' MB')) * 1000 * 1000) return AtCoderProblemContentPartial(alphabet, memory_limit_byte, name, problem, time_limit_msec)
def _from_table_row(cls, tr: bs4.Tag) -> 'AtCoderSubmissionTestCaseResult': tds = tr.find_all('td') case_name = tds[0].text status = tds[1].text exec_time_msec = None # type: Optional[int] memory_byte = None # type: Optional[int] if len(tds) == 4: exec_time_msec = int(utils.remove_suffix(tds[2].text, ' ms')) memory_byte = int(utils.remove_suffix(tds[3].text, ' KB')) * 1000 # TODO: confirm this is KB truly, not KiB else: assert len(tds) == 2 return AtCoderSubmissionTestCaseResult(case_name=case_name, status=status, exec_time_msec=exec_time_msec, memory_byte=memory_byte)
def _from_table_row(cls, tr: bs4.Tag) -> 'AtCoderProblem': tds = tr.find_all('td') assert len(tds) == 5 path = tds[1].find('a')['href'] self = cls.from_url('https://atcoder.jp' + path) assert self is not None self._alphabet = tds[0].text self._task_name = tds[1].text self._time_limit_msec = int( float(utils.remove_suffix(tds[2].text, ' sec')) * 1000) self._memory_limit_byte = int(utils.remove_suffix( tds[3].text, ' MB')) * 1000 * 1000 # TODO: confirm this is MB truly, not MiB assert tds[4].text.strip() in ('', 'Submit') return self
def from_url(cls, url: str) -> Optional['AtCoderContest']: """ :param url: example: - https://kupc2014.contest.atcoder.jp/tasks/kupc2014_d - https://atcoder.jp/contests/agc030 """ result = urllib.parse.urlparse(url) # example: https://kupc2014.contest.atcoder.jp/tasks/kupc2014_d if result.scheme in ( '', 'http', 'https') and result.hostname.endswith('.contest.atcoder.jp'): contest_id = utils.remove_suffix(result.hostname, '.contest.atcoder.jp') return cls(contest_id) # example: https://atcoder.jp/contests/agc030 if result.scheme in ('', 'http', 'https') and result.hostname in ( 'atcoder.jp', 'beta.atcoder.jp'): m = re.match(r'/contests/([\w\-_]+)/?.*', utils.normpath(result.path)) if m: contest_id = m.group(1) return cls(contest_id) return None
def _AtCoderProblemContent_parse_score( soup: bs4.BeautifulSoup) -> Optional[int]: task_statement = soup.find('div', id='task-statement') p = task_statement.find('p') # first if p is not None and p.text.startswith('配点 : '): return int( utils.remove_suffix(utils.remove_prefix(p.text, '配点 : '), ' 点')) return None
def _from_html(cls, html: bytes, *, problem: 'AtCoderProblem', session: Optional[requests.Session] = None, response: Optional[requests.Response] = None, timestamp: Optional[datetime.datetime] = None) -> 'AtCoderProblemData': soup = bs4.BeautifulSoup(html, utils.html_parser) h2 = soup.find('span', class_='h2') alphabet, _, name = h2.text.partition(' - ') time_limit, memory_limit = h2.find_next_sibling('p').text.split(' / ') for time_limit_prefix in ('実行時間制限: ', 'Time Limit: '): if time_limit.startswith(time_limit_prefix): break else: assert False if time_limit.endswith(' msec'): time_limit_msec = int(utils.remove_suffix(utils.remove_prefix(time_limit, time_limit_prefix), ' msec')) elif time_limit.endswith(' sec'): time_limit_msec = int(float(utils.remove_suffix(utils.remove_prefix(time_limit, time_limit_prefix), ' sec')) * 1000) else: assert False for memory_limit_prefix in ('メモリ制限: ', 'Memory Limit: '): if memory_limit.startswith(memory_limit_prefix): break else: assert False if memory_limit.endswith(' KB'): memory_limit_byte = int(float(utils.remove_suffix(utils.remove_prefix(memory_limit, memory_limit_prefix), ' KB')) * 1000) elif memory_limit.endswith(' MB'): memory_limit_byte = int(float(utils.remove_suffix(utils.remove_prefix(memory_limit, memory_limit_prefix), ' MB')) * 1000 * 1000) else: assert False return AtCoderProblemData( alphabet=alphabet, html=html, memory_limit_byte=memory_limit_byte, name=name, problem=problem, response=response, session=session, time_limit_msec=time_limit_msec, timestamp=timestamp, )
def _parse_score(cls, soup: bs4.BeautifulSoup) -> Optional[int]: task_statement = soup.find('div', id='task-statement') p = task_statement.find('p') # first if p is not None and p.text.startswith('配点 : '): score = utils.remove_suffix(utils.remove_prefix(p.text, '配点 : '), ' 点') try: return int(score) except ValueError: # some problems have scores like "<p>配点 : \(100\) 点</p>", not "<p>配点 : 100 点</p>" # example: https://atcoder.jp/contests/wupc2019/tasks/wupc2019_a pass return None
def _from_table_row(cls, tr: bs4.Tag, *, session: requests.Session, response: requests.Response, timestamp: datetime.datetime) -> 'AtCoderSubmissionData': tds = tr.find_all('td') assert len(tds) in (8, 10) submission = AtCoderSubmission.from_url('https://atcoder.jp' + tds[-1].find('a')['href']) problem = AtCoderProblem.from_url('https://atcoder.jp' + tds[1].find('a')['href']) assert submission is not None assert problem is not None submission_time = datetime.datetime.strptime(tds[0].text, '%Y-%m-%d %H:%M:%S+0900').replace(tzinfo=utils.tzinfo_jst) problem_id = problem.problem_id user_id = tds[2].find_all('a')[0]['href'].split('/')[-1] language_name = tds[3].text score = float(tds[4].text) code_size = int(utils.remove_suffix(tds[5].text, ' Byte')) status = tds[6].text if len(tds) == 10: exec_time_msec = int(utils.remove_suffix(tds[7].text, ' ms')) # type: Optional[int] memory_byte = int(utils.remove_suffix(tds[8].text, ' KB')) * 1000 # type: Optional[int] else: exec_time_msec = None memory_byte = None return AtCoderSubmissionData( code_size=code_size, exec_time_msec=exec_time_msec, language_name=language_name, memory_byte=memory_byte, problem_id=problem_id, problem=problem, response=response, score=score, session=session, status=status, submission=submission, submission_time=submission_time, timestamp=timestamp, user_id=user_id, )
def check_gnu_time(gnu_time: str) -> bool: try: with tempfile.NamedTemporaryFile(delete=True) as fh: proc = subprocess.run([gnu_time, '-f', '%M KB', '-o', fh.name, '--', 'true']) assert proc.returncode == 0 with open(fh.name) as fh1: data = fh1.read() int(utils.remove_suffix(data.rstrip().splitlines()[-1], ' KB')) return True except NameError: raise # NameError is not a runtime error caused by the environment, but a coding mistake except AttributeError: raise # AttributeError is also a mistake except Exception as e: log.debug(traceback.format_exc()) return False
def download_data(self, *, session: Optional[requests.Session] = None) -> AtCoderSubmissionDetailedData: """ :note: `Exec Time` is undefined when the status is `RE` or `TLE` :note: `Memory` is undefined when the status is `RE` or `TLE` """ session = session or utils.get_default_session() resp = _request('GET', self.get_url(type='beta', lang='en'), session=session) soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) timestamp = datetime.datetime.now(datetime.timezone.utc).astimezone() # Submission #N id_, = soup.find_all('span', class_='h2') assert id_.text == 'Submission #{}'.format(self.submission_id) # Source Code source_code = soup.find(id='submission-code') source_code = source_code.text.encode() # get tables tables = soup.find_all('table') if len(tables) == 3: submission_info, test_cases_summary, test_cases_data = tables elif len(tables) == 1: submission_info, = tables test_cases_summary = None test_cases_data = None else: assert False # Submission Info data = {} # type: Dict[str, str] problem_id = None # type: Optional[str] for tr in submission_info.find_all('tr'): key = tr.find('th').text.strip() value = tr.find('td').text.strip() data[key] = value if key == 'Task': problem = AtCoderProblem.from_url('https://atcoder.jp' + tr.find('a')['href']) assert problem is not None problem_id = problem.problem_id assert problem_id is not None submission_time = datetime.datetime.strptime(data['Submission Time'], '%Y-%m-%d %H:%M:%S+0900').replace(tzinfo=utils.tzinfo_jst) user_id = data['User'] language_name = data['Language'] score = float(data['Score']) code_size = int(utils.remove_suffix(data['Code Size'], ' Byte')) status = data['Status'] if 'Exec Time' in data: exec_time_msec = int(utils.remove_suffix(data['Exec Time'], ' ms')) # type: Optional[int] else: exec_time_msec = None if 'Memory' in data: # TODO: confirm this is KB truly, not KiB memory_byte = int(utils.remove_suffix(data['Memory'], ' KB')) * 1000 # type: Optional[int] else: memory_byte = None # Compile Error compile_error_tag = soup.find('h4', text='Compile Error') if compile_error_tag is not None: compile_error = compile_error_tag.find_next_sibling('pre').text else: compile_error = None # Test Cases if test_cases_summary is not None: trs = test_cases_summary.find('tbody').find_all('tr') test_sets = [AtCoderSubmissionTestSet._from_table_row(tr) for tr in trs] # type: Optional[List[AtCoderSubmissionTestSet]] else: test_sets = None if test_cases_data is not None: trs = test_cases_data.find('tbody').find_all('tr') test_cases = [AtCoderSubmissionTestCaseResult._from_table_row(tr) for tr in trs] # type: Optional[List[AtCoderSubmissionTestCaseResult]] else: test_cases = None return AtCoderSubmissionDetailedData( code_size=code_size, compile_error=compile_error, exec_time_msec=exec_time_msec, language_name=language_name, memory_byte=memory_byte, problem=AtCoderProblem(contest_id=self.contest_id, problem_id=problem_id), problem_id=problem_id, response=resp, score=score, session=session, source_code=source_code, status=status, submission=self, submission_time=submission_time, test_cases=test_cases, test_sets=test_sets, timestamp=timestamp, user_id=user_id, )
def _load_details(self, session: Optional[requests.Session] = None) -> None: session = session or utils.get_default_session() resp = _request('GET', self.get_url(type='beta', lang='en'), session=session) soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) # Submission #N id_, = soup.find_all('span', class_='h2') assert id_.text == 'Submission #{}'.format(self.submission_id) # Source Code source_code = soup.find(id='submission-code') self._source_code = source_code.text.encode() # get tables tables = soup.find_all('table') if len(tables) == 3: submission_info, test_cases_summary, test_cases_data = tables elif len(tables) == 1: submission_info, = tables test_cases_summary = None test_cases_data = None else: assert False # Submission Info data = {} # type: Dict[str, str] for tr in submission_info.find_all('tr'): key = tr.find('th').text.strip() value = tr.find('td').text.strip() data[key] = value if key == 'Task': problem = AtCoderProblem.from_url('https://atcoder.jp' + tr.find('a')['href']) assert problem is not None self._problem_id = problem.problem_id self._submission_time = datetime.datetime.strptime( data['Submission Time'], '%Y-%m-%d %H:%M:%S+0900').replace(tzinfo=utils.tzinfo_jst) self._user_id = data['User'] self._language_name = data['Language'] self._score = float(data['Score']) self._code_size = int(utils.remove_suffix(data['Code Size'], ' Byte')) self._status = data['Status'] if 'Exec Time' in data: self._exec_time_msec = int( utils.remove_suffix(data['Exec Time'], ' ms')) if 'Memory' in data: self._memory_byte = int(utils.remove_suffix( data['Memory'], ' KB')) * 1000 # TODO: confirm this is KB truly, not KiB # Compile Error compile_error = soup.find('h4', text='Compile Error') if compile_error is None: self.compile_error = '' else: compile_error = compile_error.find_next_sibling('pre') self.compile_error = compile_error.text # Test Cases if test_cases_summary is not None: trs = test_cases_summary.find('tbody').find_all('tr') self._test_sets = [ AtCoderSubmissionTestSet._from_table_row(tr) for tr in trs ] if test_cases_data is not None: trs = test_cases_data.find('tbody').find_all('tr') self._test_cases = [ AtCoderSubmissionTestCaseResult._from_table_row(tr) for tr in trs ]