def download_sample_cases(self, *, session: Optional[requests.Session] = None) -> List[TestCase]: session = session or utils.get_default_session() if self.domain == 'codingcompetitions.withgoogle.com': url = 'https://codejam.googleapis.com/dashboard/{}/poll?p=e30'.format(self.contest_id) resp = utils.request('GET', url, session=session) data = json.loads(base64.urlsafe_b64decode(resp.content + b'=' * ((-len(resp.content)) % 4)).decode()) logger.debug('%s', data) # parse JSON for task in data['challenge']['tasks']: if task['id'] == self.problem_id: statement = task['statement'] break else: raise SampleParseError("the problem {} is not found in the challenge {}".format(repr(self.problem_id), repr(self.contest_id))) elif self.domain == 'code.google.com': try: url = 'https://{}/{}/contest/{}/dashboard/ContestInfo'.format(self.domain, self.kind, self.contest_id) resp = utils.request('GET', url, session=session) except requests.HTTPError: logger.warning('hint: Google Code Jam moves old problems to the new platform') raise data = json.loads(resp.content.decode()) # parse JSON assert self.problem_id.startswith('p') i = int(self.problem_id[1:]) statement = data['problems'][i]['body'] else: assert False # parse HTML soup = bs4.BeautifulSoup(statement, utils.HTML_PARSER) io_contents = soup.find_all('pre', class_='io-content') if len(io_contents) % 2 != 0: raise SampleParseError("""the number of <pre class="io-content"> is not multiple of two""") input_contents = islice(io_contents, 0, None, 2) output_contents = islice(io_contents, 1, None, 2) samples = [] for index, (input_content, output_content) in enumerate(zip(input_contents, output_contents)): if input_content.text.startswith('Case #'): logger.warning('''the sample input starts with "Case #"''') if not output_content.text.startswith('Case #'): logger.warning('''the sample output doesn't start with "Case #"''') samples.append(TestCase( 'sample-{}'.format(index + 1), 'Input {}'.format(index + 1), utils.textfile(input_content.text.rstrip()).encode(), 'Output {}'.format(index + 1), utils.textfile(output_content.text.rstrip()).encode(), )) return samples
def download_sample_cases( self, *, session: Optional[requests.Session] = None) -> List[TestCase]: session = session or utils.get_default_session() if self.domain == 'codingcompetitions.withgoogle.com': url = 'https://codejam.googleapis.com/dashboard/{}/poll?p=e30'.format( self.contest_id) resp = utils.request('GET', url, session=session) data = json.loads( base64.urlsafe_b64decode(resp.content + b'=' * ((-len(resp.content)) % 4)).decode()) log.debug('%s', data) # parse JSON for task in data['challenge']['tasks']: if task['id'] == self.problem_id: statement = task['statement'] break else: raise SampleParseError( "the problem {} is not found in the challenge {}".format( repr(self.problem_id), repr(self.contest_id))) elif self.domain == 'code.google.com': url = 'https://{}/{}/contest/{}/dashboard/ContestInfo'.format( self.domain, self.kind, self.contest_id) resp = utils.request('GET', url, session=session) data = json.loads(resp.content.decode()) # parse JSON assert self.problem_id.startswith('p') i = int(self.problem_id[1:]) statement = data['problems'][i]['body'] else: assert False # parse HTML soup = bs4.BeautifulSoup(statement, utils.html_parser) io_contents = soup.find_all('pre', class_='io-content') if len(io_contents) != 2: raise SampleParseError( """the number of <pre class="io-content"> is not two""") if io_contents[0].text.startswith('Case #'): log.warning('''the sample input starts with "Case #"''') if not io_contents[1].text.startswith('Case #'): log.warning('''the sample output doesn't start with "Case #"''') sample = TestCase( 'sample', 'Input', utils.textfile(io_contents[0].text.rstrip()).encode(), 'Output', utils.textfile(io_contents[1].text.rstrip()).encode(), ) return [sample]
def download_sample_cases( self, *, session: Optional[requests.Session] = None ) -> List[onlinejudge.type.TestCase]: raise SampleParseError( "removed. see https://github.com/online-judge-tools/api-client/issues/49" )
def download(args: argparse.Namespace) -> None: # prepare values problem = dispatch.problem_from_url(args.url) if problem is None: if dispatch.contest_from_url(args.url) is not None: logger.warning('You specified a URL for a contest instead of a problem. If you want to download for all problems of a contest at once, please try to use `oj-prepare` command of https://github.com/online-judge-tools/template-generator') raise requests.exceptions.InvalidURL('The URL "%s" is not supported' % args.url) is_default_format = args.format is None and args.directory is None # must be here since args.directory and args.format are overwritten if args.directory is None: args.directory = pathlib.Path('test') if args.format is None: args.format = '%b.%e' # get samples from the server with utils.new_session_with_our_user_agent(path=args.cookie) as sess: if args.yukicoder_token and isinstance(problem, YukicoderProblem): sess.headers['Authorization'] = 'Bearer {}'.format(args.yukicoder_token) if args.system: samples = problem.download_system_cases(session=sess) else: samples = problem.download_sample_cases(session=sess) if not samples: raise SampleParseError("Sample not found") # append the history for submit subcommand if not args.dry_run and is_default_format: history = onlinejudge_command.download_history.DownloadHistory() if not list(args.directory.glob('*')): # reset the history to help users who use only one directory for many problems history.remove(directory=pathlib.Path.cwd()) history.add(problem, directory=pathlib.Path.cwd()) # prepare files to write def iterate_files_to_write(sample: TestCase, *, i: int) -> Iterator[Tuple[str, pathlib.Path, bytes]]: for ext in ['in', 'out']: data = getattr(sample, ext + 'put_data') if data is None: continue name = sample.name table = {} table['i'] = str(i + 1) table['e'] = ext table['n'] = name table['b'] = os.path.basename(name) table['d'] = os.path.dirname(name) path: pathlib.Path = args.directory / format_utils.percentformat(args.format, table) yield ext, path, data for i, sample in enumerate(samples): for _, path, _ in iterate_files_to_write(sample, i=i): if path.exists(): raise FileExistsError('Failed to download since file already exists: ' + str(path)) # write samples to files for i, sample in enumerate(samples): logger.info('') logger.info('sample %d', i) for ext, path, data in iterate_files_to_write(sample, i=i): content = '' if not args.silent: content = '\n' + pretty_printers.make_pretty_large_file_content(data, limit=40, head=20, tail=10, bold=True) logger.info('%sput: %s%s', ext, sample.name, content) if not args.dry_run: path.parent.mkdir(parents=True, exist_ok=True) with path.open('wb') as fh: fh.write(data) logger.info(utils.SUCCESS + 'saved to: %s', path) if args.log_file: with args.log_file.open(mode='w') as fhs: json.dump(list(map(convert_sample_to_dict, samples)), fhs)
def download_sample_cases( self, *, session: Optional[requests.Session] = None ) -> List[onlinejudge.type.TestCase]: session = session or utils.get_default_session() # get url = self.get_url() resp = utils.request('GET', url, session=session) # parse HTML soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) for div in soup.find_all('div', class_='content'): # TODO: find a proper way to detect this tag # find a tag which contains something like Markdown if len( list( filter(lambda line: line.startswith('###'), div.decode_contents().splitlines()))) >= 3: log.debug('%s', str(div)) break else: raise SampleParseError('no markdown') # parse Markdown # TODO: Should we use a Markdown parser? But I want to avoid making a new dependency only for CodeChef # pattern 1: "### Example Input" and a code block https://www.codechef.com/COOK113A/problems/DAND # pattern 2: "###Sample Input:" and a indent https://www.codechef.com/PLIN2020/problems/CNTSET # not implemented: "<h3>Example</h3> <pre><b>Input:</b> 1 5 1 2 3 4 5 <b>Output:</b> 2 </pre>" https://www.codechef.com/CNES2017/problems/ACESQN def iterate(): header = None # type: Optional[str] fenced = None # type: Optional[str] indented = None # type: Optional[str] for line in div.decode_contents().splitlines(keepends=True): if indented and not (line.startswith(' ' * 4) or line.startswith('\t')): yield header, indented indented = None if line.startswith('###'): header = ' '.join(line.strip(' \r\n#:').split()) elif not fenced and (line.startswith(' ' * 4) or line.startswith('\t')): if indented is None: indented = '' indented += line.lstrip() elif not indented and line.rstrip() == '```': if fenced is None: fenced = '' else: yield header, fenced fenced = None else: if fenced is not None: fenced += line if indented: yield header, indented indented = None # make a testcase object name = None # type: Optional[str] input_name = None # type: Optional[str] input_data = None # type: Optional[bytes] output_name = None # type: Optional[str] output_data = None # type: Optional[bytes] for header, codeblock in iterate(): if header is None: pass elif header.lower() in ('sample input', 'example input'): if input_data is not None: raise SampleParseError('two inputs found') input_name = header input_data = codeblock.encode() elif header.lower() in ('sample output', 'example output'): if output_data is not None: raise SampleParseError('two outputs found') output_name = header output_data = codeblock.encode() elif header.lower() in ('sample', 'example'): name = header if input_data is None: input_data = codeblock.encode() elif output_data is None: output_data = codeblock.encode() else: raise SampleParseError('two samples found') if input_data is None: raise SampleParseError('no input found') if output_data is None: raise SampleParseError('no output found') testcase = onlinejudge.type.TestCase( name=name or 'sample', input_name=input_name or 'input', input_data=input_data, output_name=output_name or 'output', output_data=output_data, ) return [testcase]
def _download_data( self, *, session: Optional[requests.Session] = None) -> _TopcoderData: session = session or utils.get_default_session() # download HTML url = 'https://community.topcoder.com/stat?c=problem_statement&pm={}'.format( self.problem_id) resp = utils.request('GET', url, session=session) # parse HTML soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding), utils.html_parser) problem_texts = soup.find_all('td', class_='problemText') if len(problem_texts) != 1: raise SampleParseError( """<td class="problemText"> is not found or not unique""") problem_text = problem_texts[0] # parse Definition section # format: # <tr>...<h3>Definition</h3>...<tr> # <tr><td>...</td> # <td><table> # ... # <tr><td>Class:</td><td>...</td></tr> # <tr><td>Method:</td><td>...</td></tr> # ... # </table></td></tr> logger.debug('parse Definition section') h3 = problem_text.find('h3', text='Definition') if h3 is None: raise SampleParseError("""<h3>Definition</h3> is not found""") definition = {} for text, key in { 'Class:': 'class', 'Method:': 'method', 'Parameters:': 'parameters', 'Returns:': 'returns', 'Method signature:': 'method_signature', }.items(): td = h3.parent.parent.next_sibling.find('td', class_='statText', text=text) logger.debug('%s', td.parent) definition[key] = td.next_sibling.string # parse Examples section # format: # <tr>...<h3>Examples</h3>...<tr> # <tr><td>0)</td><td></td></tr> # <tr><td></td> # <td><table> # ... # <pre>{5, 8}</pre> # <pre>"foo"</pre> # <pre>3.5</pre> # <pre>Returns: 40.0</pre> # ... # </table></td></tr> # <tr><td>1)</td><td></td></tr> # ... logger.debug('parse Examples section') h3 = problem_text.find('h3', text='Examples') if h3 is None: raise SampleParseError("""<h3>Examples</h3> is not found""") raw_sample_cases = [] # type: List[Tuple[List[str], str]] cursor = h3.parent.parent while True: # read the header like "0)" cursor = cursor.next_sibling logger.debug('%s', cursor) if not cursor or cursor.name != 'tr': break if cursor.find('td').string != '{})'.format(len(raw_sample_cases)): raise SampleParseError( """<td ...>){})</td> is expected, but not found""".format( len(raw_sample_cases))) # collect <pre>s cursor = cursor.next_sibling logger.debug('%s', cursor) if not cursor or cursor.name != 'tr': raise SampleParseError( """<tr>...</tr> is expected, but not found""") input_items = [] for pre in cursor.find_all('pre'): marker = 'Returns: ' if pre.string.startswith(marker): output_item = pre.string[len(marker):] break else: input_items.append(pre.string) else: raise SampleParseError( """<pre>Returns: ...</pre> is expected, but not found""") raw_sample_cases.append((input_items, output_item)) # convert samples cases to the Greed format sample_cases = [] for i, (input_items, output_item) in enumerate(raw_sample_cases): sample_cases.append( TestCase( 'example-{}'.format(i), 'input', ('\n'.join(map(_convert_to_greed, input_items)) + '\n').encode(), 'output', (_convert_to_greed(output_item) + '\n').encode(), )) return _TopcoderData(definition=definition, raw_sample_cases=raw_sample_cases, sample_cases=sample_cases)
def download_sample_cases( self, *, session: Optional[requests.Session] = None) -> List[TestCase]: session = session or utils.get_default_session() base_url = self.get_url() # get csrftoken resp = utils.request('GET', base_url, session=session) csrftoken = None for cookie in session.cookies: if cookie.name == 'csrftoken' and cookie.domain == 'csacademy.com': # type: ignore csrftoken = cookie.value # type: ignore if csrftoken is None: logger.error('csrftoken is not found') return [] # get config headers = { 'x-csrftoken': csrftoken, 'x-requested-with': 'XMLHttpRequest', } contest_url = 'https://csacademy.com/contest/{}/'.format( self.contest_name) resp = utils.request('GET', contest_url, session=session, headers=headers) # parse config try: config = json.loads( resp.content.decode() ) # NOTE: Should I memoize this? Is the CSAcademyRound class required? except json.JSONDecodeError as e: raise SampleParseError( 'failed to parse the config JSON: {}'.format(e.msg)) from e task_config = None for it in config['state']['contesttask']: if it['name'] == self.task_name: task_config = it if task_config is None: logger.error('no such task: %s', self.task_name) return [] # get get_contest_task_url = 'https://csacademy.com/contest/get_contest_task/' payload = {'contestTaskId': (None, str(task_config['id']))} headers = { 'x-csrftoken': csrftoken, 'x-requested-with': 'XMLHttpRequest', 'Referer': base_url, } resp = utils.request('POST', get_contest_task_url, session=session, files=payload, headers=headers) # parse try: contest_task = json.loads(resp.content.decode()) except json.JSONDecodeError as e: raise SampleParseError('failed to parse the task JSON: {}'.format( e.msg)) from e if contest_task.get('title') == 'Page not found': logger.error('something wrong') return [] samples = [] for test_number, example_test in enumerate( contest_task['state']['EvalTask'][0]['exampleTests']): inname = 'Input {}'.format(test_number) outname = 'Output {}'.format(test_number) samples += [ TestCase( 'sample-{}'.format(test_number + 1), inname, example_test['input'].encode(), outname, example_test['output'].encode(), ) ] return samples