def get_problem(self, *args, **kwargs): url = 'http://acm.wust.edu.cn/problem.php?id=' + str(kwargs['pid']) + '&soj=0' problem = Problem() try: website_data = Spider.get_data(url, self.code_type) problem.remote_id = kwargs['pid'] problem.remote_url = url problem.remote_oj = 'WUST' problem.title = re.search(r': ([\s\S]*?)</h2>', website_data).group(1) problem.time_limit = re.search(r'(\d* Sec)', website_data).group(1) problem.memory_limit = re.search(r'(\d* MB)', website_data).group(1) problem.special_judge = re.search(r'class=red>Special Judge</span>', website_data) is not None soup = BeautifulSoup(website_data, 'lxml') # case:problem.picture=self.parse_html("img", soup, website_data) problem.description = self.parse_html("Description", soup, website_data) problem.input = self.parse_html("Input", soup, website_data) problem.output = self.parse_html("Output", soup, website_data) input_data = self.parse_html("Sample Input", soup, website_data) output_data = self.parse_html("Sample Output", soup, website_data) problem.hint = self.parse_html("HINT", soup, website_data) problem.author = self.parse_html("Author", soup, website_data) problem.source = self.parse_html("Source", soup, website_data) problem.sample = [ {'input': input_data, 'output': output_data}] except: return Problem.PROBLEM_NOT_FOUND return problem
def get_problem(self, *args, **kwargs): url = 'http://acm.hdu.edu.cn/showproblem.php?pid=' + str(kwargs['pid']) problem = Problem() try: website_data = Spider.get_data(url, self.code_type) problem.remote_id = kwargs['pid'] problem.remote_url = url problem.remote_oj = 'HDU' problem.title = re.search(r'color:#1A5CC8\'>([\s\S]*?)</h1>', website_data).group(1) problem.time_limit = re.search(r'(\d* MS)', website_data).group(1) problem.memory_limit = re.search(r'/(\d* K)', website_data).group(1) problem.special_judge = re.search( r'color=red>Special Judge</font>', website_data) is not None problem.description = re.search( r'>Problem Description</div>[\s\S]*?panel_content>([\s\S]*?)</div>', website_data).group(1) problem.input = re.search( r'>Input</div>[\s\S]*?panel_content>([\s\S]*?)</div>', website_data).group(1) problem.output = re.search( r'>Output</div>[\s\S]*?panel_content>([\s\S]*?)</div>', website_data).group(1) match_group = re.search( r'>Sample Input</div>[\s\S]*?panel_content>([\s\S]*?)</div', website_data) input_data = '' if match_group: input_data = re.search(r'(<pre><div[\s\S]*?>)?([\s\S]*)', match_group.group(1)).group(2) output_data = '' match_group = re.search( r'>Sample Output</div>[\s\S]*?panel_content>([\s\S]*?)</div', website_data) if match_group: output_data = re.search(r'(<pre><div[\s\S]*?>)?([\s\S]*)', match_group.group(1)).group(2) if re.search('<div', output_data): output_data = re.search(r'([\s\S]*?)<div', output_data).group(1) problem.sample = [{'input': input_data, 'output': output_data}] match_group = re.search( r'>Author</div>[\s\S]*?panel_content>([\s\S]*?)</div>', website_data) if match_group: problem.author = match_group.group(1) match_group = re.search( r'<i>Hint</i>[\s\S]*?/div>[\s]*([\s\S]+?)</div>', website_data) if match_group: problem.hint = match_group.group(1) except: return Problem.PROBLEM_NOT_FOUND return problem
def get_problem(self, *args, **kwargs): pid = kwargs.get('pid') url = 'http://codeforces.com/problemset/problem/' + pid[: -1] + '/' + pid[ -1:] problem = Problem() try: website_data = self.req.get(url) problem.remote_id = kwargs.get('pid') problem.remote_url = url problem.remote_oj = 'CODEFORCES' problem.title = re.search(r'class="title">([\s\S]*?)</div>', website_data.text).group(1) temp_result = re.search( r'time-limit"><div class="property-title">([\s\S]*?)</div>([\s\S]*?)</div>', website_data.text) problem.time_limit = temp_result.group( 1) + ': ' + temp_result.group(2) temp_result = re.search( r'class="memory-limit"><div class="property-title">([\s\S]*?)</div>([\s\S]*?)</div>', website_data.text) problem.memory_limit = temp_result.group( 1) + ': ' + temp_result.group(2) problem.special_judge = None problem.description = re.search( r'class="property-title">' '([\s\S]*?)</div>([\s\S]*?)</div>' '([\s\S]*?)<div>([\s\S]*?)</div>', website_data.text).group(4) problem.input = re.search( r'class="section-title">Input</div>([\s\S]*?)</div>', website_data.text).group(1) problem.output = re.search( r'class="section-title">Output</div>([\s\S]*?)</div>', website_data.text).group(1) input_data = '' soup = BeautifulSoup(website_data.text, 'lxml') lines = soup.find_all('div', attrs={'class': 'input'}) if lines: for line in lines: input_data += str(line.find('pre')) + '\n' output_data = '' lines = soup.find_all('div', attrs={'class': 'output'}) if lines: for line in lines: output_data += str(line.find('pre')) + '\n' problem.sample = [{'input': input_data, 'output': output_data}] temp_result = re.search( r'div class="note">([\s\S]*?)</div>([\s\S]*?)</div>', website_data.text).group(2) problem.hint = temp_result except: return None
def get_problem(self, *args, **kwargs): url = 'http://poj.org/problem?id=' + str(kwargs['pid']) problem = Problem() try: res = self.req.get(url=url) website_data = res.text problem.remote_id = kwargs['pid'] problem.remote_url = url problem.remote_oj = 'POJ' problem.title = re.search(r'ptt" lang="en-US">([\s\S]*?)</div>', website_data).group(1) problem.time_limit = re.search(r'(\d*MS)', website_data).group(1) problem.memory_limit = re.search( r'Memory Limit:</b> ([\s\S]*?)</td>', website_data).group(1) problem.special_judge = re.search(r'red;">Special Judge</td>', website_data) is not None problem.description = re.search( r'>Description</p>[\s\S]*?lang="en-US">([\s\S]*?)</div>', website_data).group(1) # problem.input = re.search( r'>Input</p>[\s\S]*?lang="en-US">([\s\S]*?)</div>', website_data).group(1) problem.output = re.search( r'>Output</p>[\s\S]*?lang="en-US">([\s\S]*?)</div>', website_data).group(1) match_group = re.search(r'>Sample Input</p>([\s\S]*?)<p class', website_data) input_data = '' if match_group: input_data = re.search('"sio">([\s\S]*?)</pre>', match_group.group(1)).group(1) output_data = '' match_group = re.search(r'>Sample Output</p>([\s\S]*?)<p class', website_data) if match_group: output_data = re.search('"sio">([\s\S]*?)</pre>', match_group.group(1)).group(1) problem.sample = [{'input': input_data, 'output': output_data}] # match_group = re.search(r'>Author</div>[\s\S]*?panel_content>([\s\S]*?)</div>', website_data) # if match_group: # problem.author = match_group.group(1) match_group = re.search( r'>Hint</p>[\s\S]*?"en-US">([\s\S]*?)</div>', website_data) if match_group: problem.hint = match_group.group(1) match_group = re.search( r'>Source</p>[\s\S]*?"en-US">([\s\S]*?)</div>', website_data) if match_group: problem.source = match_group.group(1) return problem except: pass return None
def get_problem(self, *args, **kwargs): url = 'http://acm.fzu.edu.cn/problem.php?pid=' + str(kwargs['pid']) problem = Problem() try: website_data = Spider.get_data(url, self.code_type) soup = BeautifulSoup(website_data, 'lxml') problem.remote_id = kwargs['pid'] problem.remote_url = url problem.remote_oj = 'FZU' problem.title = re.search(r'<b> Problem [\d]* ([\s\S]*?)</b>', website_data).group(1) problem.time_limit = re.search(r'(\d* mSec)', website_data).group(1) problem.memory_limit = re.search(r'(\d* KB)', website_data).group(1) problem.special_judge = re.search( r'<font color="blue">Special Judge</font>', website_data) is not None pro_desc = soup.find_all(attrs={"class": 'pro_desc'}) problem.description = pro_desc[0].get_text() if len(pro_desc) >= 2: problem.input = pro_desc[1].get_text() if len(pro_desc) >= 3: problem.output = pro_desc[2].get_text() data = soup.find_all(attrs={"class": 'data'}) if len(data) > 1: input_data = data[0].get_text() output_data = data[1].get_text() problem.sample = [{'input': input_data, 'output': output_data}] h2s = soup.find_all('h2') for h2 in h2s[-2:]: if (h2.get_text().strip() == 'Hint'): problem.hint = h2.next_sibling if (h2.get_text().strip() == 'Source'): problem.source = h2.next_sibling except Exception as e: #print(e) return Problem.PROBLEM_NOT_FOUND return problem