Beispiel #1
0
    def download_sample_cases(self, *, session: Optional[requests.Session] = None) -> List[TestCase]:
        session = session or utils.get_default_session()
        if self.domain == 'codingcompetitions.withgoogle.com':
            url = 'https://codejam.googleapis.com/dashboard/{}/poll?p=e30'.format(self.contest_id)
            resp = utils.request('GET', url, session=session)
            data = json.loads(base64.urlsafe_b64decode(resp.content + b'=' * ((-len(resp.content)) % 4)).decode())
            logger.debug('%s', data)

            # parse JSON
            for task in data['challenge']['tasks']:
                if task['id'] == self.problem_id:
                    statement = task['statement']
                    break
            else:
                raise SampleParseError("the problem {} is not found in the challenge {}".format(repr(self.problem_id), repr(self.contest_id)))

        elif self.domain == 'code.google.com':
            try:
                url = 'https://{}/{}/contest/{}/dashboard/ContestInfo'.format(self.domain, self.kind, self.contest_id)
                resp = utils.request('GET', url, session=session)
            except requests.HTTPError:
                logger.warning('hint: Google Code Jam moves old problems to the new platform')
                raise
            data = json.loads(resp.content.decode())

            # parse JSON
            assert self.problem_id.startswith('p')
            i = int(self.problem_id[1:])
            statement = data['problems'][i]['body']

        else:
            assert False

        # parse HTML
        soup = bs4.BeautifulSoup(statement, utils.HTML_PARSER)
        io_contents = soup.find_all('pre', class_='io-content')
        if len(io_contents) % 2 != 0:
            raise SampleParseError("""the number of <pre class="io-content"> is not multiple of two""")

        input_contents = islice(io_contents, 0, None, 2)
        output_contents = islice(io_contents, 1, None, 2)

        samples = []

        for index, (input_content, output_content) in enumerate(zip(input_contents, output_contents)):
            if input_content.text.startswith('Case #'):
                logger.warning('''the sample input starts with "Case #"''')
            if not output_content.text.startswith('Case #'):
                logger.warning('''the sample output doesn't start with "Case #"''')
            samples.append(TestCase(
                'sample-{}'.format(index + 1),
                'Input {}'.format(index + 1),
                utils.textfile(input_content.text.rstrip()).encode(),
                'Output {}'.format(index + 1),
                utils.textfile(output_content.text.rstrip()).encode(),
            ))

        return samples
Beispiel #2
0
    def download_sample_cases(
            self,
            *,
            session: Optional[requests.Session] = None) -> List[TestCase]:
        session = session or utils.get_default_session()
        if self.domain == 'codingcompetitions.withgoogle.com':
            url = 'https://codejam.googleapis.com/dashboard/{}/poll?p=e30'.format(
                self.contest_id)
            resp = utils.request('GET', url, session=session)
            data = json.loads(
                base64.urlsafe_b64decode(resp.content + b'=' *
                                         ((-len(resp.content)) % 4)).decode())
            log.debug('%s', data)

            # parse JSON
            for task in data['challenge']['tasks']:
                if task['id'] == self.problem_id:
                    statement = task['statement']
                    break
            else:
                raise SampleParseError(
                    "the problem {} is not found in the challenge {}".format(
                        repr(self.problem_id), repr(self.contest_id)))

        elif self.domain == 'code.google.com':
            url = 'https://{}/{}/contest/{}/dashboard/ContestInfo'.format(
                self.domain, self.kind, self.contest_id)
            resp = utils.request('GET', url, session=session)
            data = json.loads(resp.content.decode())

            # parse JSON
            assert self.problem_id.startswith('p')
            i = int(self.problem_id[1:])
            statement = data['problems'][i]['body']

        else:
            assert False

        # parse HTML
        soup = bs4.BeautifulSoup(statement, utils.html_parser)
        io_contents = soup.find_all('pre', class_='io-content')
        if len(io_contents) != 2:
            raise SampleParseError(
                """the number of <pre class="io-content"> is not two""")
        if io_contents[0].text.startswith('Case #'):
            log.warning('''the sample input starts with "Case #"''')
        if not io_contents[1].text.startswith('Case #'):
            log.warning('''the sample output doesn't start with "Case #"''')
        sample = TestCase(
            'sample',
            'Input',
            utils.textfile(io_contents[0].text.rstrip()).encode(),
            'Output',
            utils.textfile(io_contents[1].text.rstrip()).encode(),
        )
        return [sample]
Beispiel #3
0
 def download_sample_cases(
     self,
     *,
     session: Optional[requests.Session] = None
 ) -> List[onlinejudge.type.TestCase]:
     raise SampleParseError(
         "removed. see https://github.com/online-judge-tools/api-client/issues/49"
     )
Beispiel #4
0
def download(args: argparse.Namespace) -> None:
    # prepare values
    problem = dispatch.problem_from_url(args.url)
    if problem is None:
        if dispatch.contest_from_url(args.url) is not None:
            logger.warning('You specified a URL for a contest instead of a problem. If you want to download for all problems of a contest at once, please try to use `oj-prepare` command of https://github.com/online-judge-tools/template-generator')
        raise requests.exceptions.InvalidURL('The URL "%s" is not supported' % args.url)
    is_default_format = args.format is None and args.directory is None  # must be here since args.directory and args.format are overwritten
    if args.directory is None:
        args.directory = pathlib.Path('test')
    if args.format is None:
        args.format = '%b.%e'

    # get samples from the server
    with utils.new_session_with_our_user_agent(path=args.cookie) as sess:
        if args.yukicoder_token and isinstance(problem, YukicoderProblem):
            sess.headers['Authorization'] = 'Bearer {}'.format(args.yukicoder_token)
        if args.system:
            samples = problem.download_system_cases(session=sess)
        else:
            samples = problem.download_sample_cases(session=sess)

    if not samples:
        raise SampleParseError("Sample not found")

    # append the history for submit subcommand
    if not args.dry_run and is_default_format:
        history = onlinejudge_command.download_history.DownloadHistory()
        if not list(args.directory.glob('*')):
            # reset the history to help users who use only one directory for many problems
            history.remove(directory=pathlib.Path.cwd())
        history.add(problem, directory=pathlib.Path.cwd())

    # prepare files to write
    def iterate_files_to_write(sample: TestCase, *, i: int) -> Iterator[Tuple[str, pathlib.Path, bytes]]:
        for ext in ['in', 'out']:
            data = getattr(sample, ext + 'put_data')
            if data is None:
                continue
            name = sample.name
            table = {}
            table['i'] = str(i + 1)
            table['e'] = ext
            table['n'] = name
            table['b'] = os.path.basename(name)
            table['d'] = os.path.dirname(name)
            path: pathlib.Path = args.directory / format_utils.percentformat(args.format, table)
            yield ext, path, data

    for i, sample in enumerate(samples):
        for _, path, _ in iterate_files_to_write(sample, i=i):
            if path.exists():
                raise FileExistsError('Failed to download since file already exists: ' + str(path))

    # write samples to files
    for i, sample in enumerate(samples):
        logger.info('')
        logger.info('sample %d', i)
        for ext, path, data in iterate_files_to_write(sample, i=i):
            content = ''
            if not args.silent:
                content = '\n' + pretty_printers.make_pretty_large_file_content(data, limit=40, head=20, tail=10, bold=True)
            logger.info('%sput: %s%s', ext, sample.name, content)
            if not args.dry_run:
                path.parent.mkdir(parents=True, exist_ok=True)
                with path.open('wb') as fh:
                    fh.write(data)
                logger.info(utils.SUCCESS + 'saved to: %s', path)

    if args.log_file:
        with args.log_file.open(mode='w') as fhs:
            json.dump(list(map(convert_sample_to_dict, samples)), fhs)
Beispiel #5
0
    def download_sample_cases(
        self,
        *,
        session: Optional[requests.Session] = None
    ) -> List[onlinejudge.type.TestCase]:
        session = session or utils.get_default_session()

        # get
        url = self.get_url()
        resp = utils.request('GET', url, session=session)

        # parse HTML
        soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding),
                                 utils.html_parser)
        for div in soup.find_all('div', class_='content'):
            # TODO: find a proper way to detect this tag
            # find a tag which contains something like Markdown
            if len(
                    list(
                        filter(lambda line: line.startswith('###'),
                               div.decode_contents().splitlines()))) >= 3:
                log.debug('%s', str(div))
                break
        else:
            raise SampleParseError('no markdown')

        # parse Markdown
        # TODO: Should we use a Markdown parser? But I want to avoid making a new dependency only for CodeChef
        # pattern 1: "### Example Input" and a code block  https://www.codechef.com/COOK113A/problems/DAND
        # pattern 2: "###Sample Input:" and a indent https://www.codechef.com/PLIN2020/problems/CNTSET
        # not implemented: "<h3>Example</h3> <pre><b>Input:</b> 1 5 1 2 3 4 5 <b>Output:</b> 2 </pre>" https://www.codechef.com/CNES2017/problems/ACESQN
        def iterate():
            header = None  # type: Optional[str]
            fenced = None  # type: Optional[str]
            indented = None  # type: Optional[str]
            for line in div.decode_contents().splitlines(keepends=True):
                if indented and not (line.startswith(' ' * 4)
                                     or line.startswith('\t')):
                    yield header, indented
                    indented = None
                if line.startswith('###'):
                    header = ' '.join(line.strip(' \r\n#:').split())
                elif not fenced and (line.startswith(' ' * 4)
                                     or line.startswith('\t')):
                    if indented is None:
                        indented = ''
                    indented += line.lstrip()
                elif not indented and line.rstrip() == '```':
                    if fenced is None:
                        fenced = ''
                    else:
                        yield header, fenced
                        fenced = None
                else:
                    if fenced is not None:
                        fenced += line
            if indented:
                yield header, indented
                indented = None

        # make a testcase object
        name = None  # type: Optional[str]
        input_name = None  # type: Optional[str]
        input_data = None  # type: Optional[bytes]
        output_name = None  # type: Optional[str]
        output_data = None  # type: Optional[bytes]
        for header, codeblock in iterate():
            if header is None:
                pass
            elif header.lower() in ('sample input', 'example input'):
                if input_data is not None:
                    raise SampleParseError('two inputs found')
                input_name = header
                input_data = codeblock.encode()
            elif header.lower() in ('sample output', 'example output'):
                if output_data is not None:
                    raise SampleParseError('two outputs found')
                output_name = header
                output_data = codeblock.encode()
            elif header.lower() in ('sample', 'example'):
                name = header
                if input_data is None:
                    input_data = codeblock.encode()
                elif output_data is None:
                    output_data = codeblock.encode()
                else:
                    raise SampleParseError('two samples found')

        if input_data is None:
            raise SampleParseError('no input found')
        if output_data is None:
            raise SampleParseError('no output found')
        testcase = onlinejudge.type.TestCase(
            name=name or 'sample',
            input_name=input_name or 'input',
            input_data=input_data,
            output_name=output_name or 'output',
            output_data=output_data,
        )
        return [testcase]
Beispiel #6
0
    def _download_data(
            self,
            *,
            session: Optional[requests.Session] = None) -> _TopcoderData:
        session = session or utils.get_default_session()

        # download HTML
        url = 'https://community.topcoder.com/stat?c=problem_statement&pm={}'.format(
            self.problem_id)
        resp = utils.request('GET', url, session=session)

        # parse HTML
        soup = bs4.BeautifulSoup(resp.content.decode(resp.encoding),
                                 utils.html_parser)

        problem_texts = soup.find_all('td', class_='problemText')
        if len(problem_texts) != 1:
            raise SampleParseError(
                """<td class="problemText"> is not found or not unique""")
        problem_text = problem_texts[0]

        # parse Definition section
        # format:
        #     <tr>...<h3>Definition</h3>...<tr>
        #     <tr><td>...</td>
        #         <td><table>
        #             ...
        #             <tr><td>Class:</td><td>...</td></tr>
        #             <tr><td>Method:</td><td>...</td></tr>
        #             ...
        #         </table></td></tr>
        logger.debug('parse Definition section')
        h3 = problem_text.find('h3', text='Definition')
        if h3 is None:
            raise SampleParseError("""<h3>Definition</h3> is not found""")
        definition = {}
        for text, key in {
                'Class:': 'class',
                'Method:': 'method',
                'Parameters:': 'parameters',
                'Returns:': 'returns',
                'Method signature:': 'method_signature',
        }.items():
            td = h3.parent.parent.next_sibling.find('td',
                                                    class_='statText',
                                                    text=text)
            logger.debug('%s', td.parent)
            definition[key] = td.next_sibling.string

        # parse Examples section
        # format:
        #     <tr>...<h3>Examples</h3>...<tr>
        #     <tr><td>0)</td><td></td></tr>
        #     <tr><td></td>
        #         <td><table>
        #             ...
        #             <pre>{5, 8}</pre>
        #             <pre>"foo"</pre>
        #             <pre>3.5</pre>
        #             <pre>Returns: 40.0</pre>
        #             ...
        #         </table></td></tr>
        #     <tr><td>1)</td><td></td></tr>
        #     ...
        logger.debug('parse Examples section')
        h3 = problem_text.find('h3', text='Examples')
        if h3 is None:
            raise SampleParseError("""<h3>Examples</h3> is not found""")

        raw_sample_cases = []  # type: List[Tuple[List[str], str]]
        cursor = h3.parent.parent
        while True:
            # read the header like "0)"
            cursor = cursor.next_sibling
            logger.debug('%s', cursor)
            if not cursor or cursor.name != 'tr':
                break
            if cursor.find('td').string != '{})'.format(len(raw_sample_cases)):
                raise SampleParseError(
                    """<td ...>){})</td> is expected, but not found""".format(
                        len(raw_sample_cases)))

            # collect <pre>s
            cursor = cursor.next_sibling
            logger.debug('%s', cursor)
            if not cursor or cursor.name != 'tr':
                raise SampleParseError(
                    """<tr>...</tr> is expected, but not found""")
            input_items = []
            for pre in cursor.find_all('pre'):
                marker = 'Returns: '
                if pre.string.startswith(marker):
                    output_item = pre.string[len(marker):]
                    break
                else:
                    input_items.append(pre.string)
            else:
                raise SampleParseError(
                    """<pre>Returns: ...</pre> is expected, but not found""")
            raw_sample_cases.append((input_items, output_item))

        # convert samples cases to the Greed format
        sample_cases = []
        for i, (input_items, output_item) in enumerate(raw_sample_cases):
            sample_cases.append(
                TestCase(
                    'example-{}'.format(i),
                    'input',
                    ('\n'.join(map(_convert_to_greed, input_items)) +
                     '\n').encode(),
                    'output',
                    (_convert_to_greed(output_item) + '\n').encode(),
                ))

        return _TopcoderData(definition=definition,
                             raw_sample_cases=raw_sample_cases,
                             sample_cases=sample_cases)
    def download_sample_cases(
            self,
            *,
            session: Optional[requests.Session] = None) -> List[TestCase]:
        session = session or utils.get_default_session()
        base_url = self.get_url()

        # get csrftoken
        resp = utils.request('GET', base_url, session=session)
        csrftoken = None
        for cookie in session.cookies:
            if cookie.name == 'csrftoken' and cookie.domain == 'csacademy.com':  # type: ignore
                csrftoken = cookie.value  # type: ignore
        if csrftoken is None:
            logger.error('csrftoken is not found')
            return []

        # get config
        headers = {
            'x-csrftoken': csrftoken,
            'x-requested-with': 'XMLHttpRequest',
        }
        contest_url = 'https://csacademy.com/contest/{}/'.format(
            self.contest_name)
        resp = utils.request('GET',
                             contest_url,
                             session=session,
                             headers=headers)
        # parse config
        try:
            config = json.loads(
                resp.content.decode()
            )  # NOTE: Should I memoize this? Is the CSAcademyRound class required?
        except json.JSONDecodeError as e:
            raise SampleParseError(
                'failed to parse the config JSON: {}'.format(e.msg)) from e
        task_config = None
        for it in config['state']['contesttask']:
            if it['name'] == self.task_name:
                task_config = it
        if task_config is None:
            logger.error('no such task: %s', self.task_name)
            return []

        # get
        get_contest_task_url = 'https://csacademy.com/contest/get_contest_task/'
        payload = {'contestTaskId': (None, str(task_config['id']))}
        headers = {
            'x-csrftoken': csrftoken,
            'x-requested-with': 'XMLHttpRequest',
            'Referer': base_url,
        }
        resp = utils.request('POST',
                             get_contest_task_url,
                             session=session,
                             files=payload,
                             headers=headers)
        # parse
        try:
            contest_task = json.loads(resp.content.decode())
        except json.JSONDecodeError as e:
            raise SampleParseError('failed to parse the task JSON: {}'.format(
                e.msg)) from e
        if contest_task.get('title') == 'Page not found':
            logger.error('something wrong')
            return []
        samples = []
        for test_number, example_test in enumerate(
                contest_task['state']['EvalTask'][0]['exampleTests']):
            inname = 'Input {}'.format(test_number)
            outname = 'Output {}'.format(test_number)
            samples += [
                TestCase(
                    'sample-{}'.format(test_number + 1),
                    inname,
                    example_test['input'].encode(),
                    outname,
                    example_test['output'].encode(),
                )
            ]
        return samples