def get_questions(self) -> List[Question]: contest = self.uri.problemset logger.info( f'Downloading page {self.base_url}/contest/{contest}/problems') body = self.download_response(f"/contest/{contest}/problems") questions: List[Question] = [] doc = document_fromstring(body) caption = doc.xpath('//div[@class="caption"]/text()')[0] logger.info(f'Found: {caption} ✅') logger.info('Scraping problems:') problems = doc.xpath('//div[@class="problem-statement"]') for idx, problem in enumerate(problems, start=1): title = problem.find_class("title")[0].text_content() time_limit = problem.find_class("time-limit")[0].text_content() time_limit = time_limit[len('time limit per test'):].split(' ')[0] question = Question(idx, title, self.base_dir, time_limit) sample_tests = problem.find_class("sample-test")[0] inputs = sample_tests.find_class('input') outputs = sample_tests.find_class('output') for inp, out in zip(inputs, outputs): sample_input = inp.xpath('descendant-or-self::pre/text()')[0] sample_output = out.xpath('descendant-or-self::pre/text()')[0] question.add_test(sample_input, sample_output) questions.append(question) logger.info(question) return questions
def get_questions(self) -> List[Question]: contest = self.uri.problemset logger.info(f'Downloading page {self.base_url}/contests/{contest}/tasks_print') body = self.download_response(f"/contests/{contest}/tasks_print") questions: List[Question] = [] doc = document_fromstring(body) caption = doc.xpath('/html/head/title')[0].text_content() logger.info(f'Found: {caption} ✅') logger.info('Scraping problems:') problems = doc.xpath('//div[@class="col-sm-12"]') for idx, problem in enumerate(problems, start=1): title = problem.find_class("h2")[0].text_content() time_limit_memory = problem.xpath('descendant-or-self::p')[0].text_content() try: time_limit = re.findall(r'Time Limit: (\d+) sec.*', time_limit_memory)[0] except IndexError: time_limit = 5 question = Question(idx, title, self.base_dir, time_limit) # [4:] -> Skip the `Problem Statement`, `Constraints`, `Input`, `Output` (format) sample_tests = problem.find_class("lang-en")[0].find_class("part")[4:] inputs = sample_tests[::2] outputs = sample_tests[1::2] assert len(inputs) == len(outputs) for inp, out in zip(inputs, outputs): sample_input = inp.xpath('descendant-or-self::pre/text()')[0].strip() sample_output = out.xpath('descendant-or-self::pre/text()')[0].strip() question.add_test(sample_input, sample_output, custom_testcase=False) questions.append(question) logger.info(question) return questions
def download_question(self, idx: int, problem: CSESProblem) -> Question: problem_html = self.download_response(problem.url) doc = document_fromstring(problem_html) time_limit = doc.xpath( '//ul[@class="task-constraints"]/li[1]/text()')[0] # time = ' 1.00 s' -> 1.00 time_limit = time_limit.strip()[:-1].strip() question = Question(idx, problem.name, self.base_dir, time_limit) # Fetch the samples curr_idx = 0 while curr_idx != -1: start_idx = problem_html.find('Input:', curr_idx) end_idx = problem_html.find('</code>', start_idx) + 7 input_html = problem_html[start_idx:end_idx] start_idx = problem_html.find('Output:', end_idx + 1) end_idx = problem_html.find('</code>', start_idx) + 7 output_html = problem_html[start_idx:end_idx] sample_input = document_fromstring(input_html).xpath( '//code/text()') sample_output = document_fromstring(output_html).xpath( '//code/text()') if isinstance(sample_input, List): sample_input = '\n'.join(sample_input) if isinstance(sample_output, List): sample_output = '\n'.join(sample_output) question.add_test(sample_input, sample_output) curr_idx = problem_html.find('Input:', end_idx + 1) logger.info(question) return question
def load_questions(self, force_download=False) -> None: if force_download or (not os.path.exists(self.metadata_path)): if self.platform == 'cf': self.questions = self.get_questions_codeforces() elif self.platform == 'cc': self.questions = self.get_questions_codechef() self.save_questions() return self.questions = [] with open(self.metadata_path, 'r') as file: metadata = json.load(file) for question in metadata['questions']: self.questions.append(Question.from_dict(question))
def load_questions(self, force_download=False) -> None: if force_download or (not os.path.exists(self.platform.metadata_path)): try: self.questions = self.platform.get_questions() except InvalidProblemSetURI as err: logger.error(err) self.save_questions() return self.questions = [] with open(self.platform.metadata_path, 'r') as file: metadata = json.load(file) for question in metadata['questions']: self.questions.append(Question.from_dict(question))
def get_questions_codeforces(self) -> List[Question]: print( f'Downloading page https://codeforces.com/contest/{self.contest}/problems' ) url = 'codeforces.com' conn = HTTPSConnection(url) conn.request("GET", f"/contest/{self.contest}/problems") response = conn.getresponse() if response.getcode() != 200: err = Exception( f'No contest found for codeforces/{self.contest} ❌❌') conn.close() raise err html = response.read().decode() conn.close() questions: List[Question] = [] doc = document_fromstring(html) caption = doc.xpath('//div[@class="caption"]/text()')[0] print(f'Found: {caption} ✅') print('Scraping problems:') problems = doc.xpath('//div[@class="problem-statement"]') for idx, problem in enumerate(problems): title = problem.find_class("title")[0].text_content() time_limit = problem.find_class("time-limit")[0].text_content() time_limit = time_limit[len('time limit per test'):].split(' ')[0] try: question = Question(idx, title, self.base_dir, float(time_limit)) except ValueError: question = Question(idx, title, self.base_dir, 5.0) sample_tests = problem.find_class("sample-test")[0] inputs = sample_tests.find_class('input') outputs = sample_tests.find_class('output') for inp, out in zip(inputs, outputs): sample_input = inp.xpath('descendant-or-self::pre/text()')[0] sample_output = out.xpath('descendant-or-self::pre/text()')[0] question.add_test(sample_input, sample_output) questions.append(question) print(question) return questions
def parse_question(self, idx: int, problem: Dict) -> Question: title = problem['problem_code'] + ' ' + problem['problem_name'] time_limit = problem['max_timelimit'] question = Question(idx, title, self.base_dir, time_limit) body = problem['body'] for inp, out in self._scape_test_cases('example input', 'example output', body): question.add_test(inp, out) for inp, out in self._scape_test_cases('sample input', 'sample output', body): question.add_test(inp, out) return question