def parse(self, response): html = response.body.\ replace('<=', ' ≤ ').\ replace(' < ', ' < ').\ replace(' > ', ' > ').\ replace('>=', ' ≥ ') sel = Selector(text=html) item = ProblemItem() item['origin_oj'] = 'poj' item['problem_id'] = self.problem_id item['problem_url'] = response.url item['title'] = sel.css('.ptt').xpath('./text()').extract()[0] item['description'] = sel.css('.ptx').extract()[0] item['input'] = sel.css('.ptx').extract()[1] item['output'] = sel.css('.ptx').extract()[2] try: item['time_limit'] = sel.css('.plm').re( 'Case\sT[\S*\s]*MS')[0][21:] except: item['time_limit'] = sel.css('.plm').re('T[\S*\s]*MS')[0][16:] item['memory_limit'] = sel.css('.plm').re('Me[\S*\s]*K')[0][18:] item['sample_input'] = sel.css('.sio').extract()[0] item['sample_output'] = sel.css('.sio').extract()[1] item['update_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") return item
def parse(self, response): html = response.body.\ replace(' <= ', ' ≤ ').\ replace(' < ', ' < ').\ replace(' > ', ' > ').\ replace(' >= ', ' ≥ ').\ replace(' << ', ' << ').\ replace(' >> ', ' >> ') sel = Selector(text=html) item = ProblemItem() item['origin_oj'] = 'fzu' item['problem_id'] = self.problem_id item['problem_url'] = response.url item['title'] = sel.xpath(\ '//div[contains(@class,\ "problem_title")]/b/text()' ).extract()[0][14:].rstrip() item['description'] = \ sel.css('.pro_desc').extract()[0][22:-6].\ replace('<div class="data">', '<pre>').\ replace('</div>', '</pre>') try: item['input'] = sel.css('.pro_desc').extract()[1] except: item['input'] = [] try: item['output'] = sel.css('.pro_desc').extract()[2] except: item['output'] = [] item['time_limit'] = sel.css('.problem_desc').re('T[\S*\s]*c')[0][12:] item['memory_limit'] = sel.css('.problem_desc').re( 'M[\S*\s]*B')[0][15:] item['accept'] = sel.css('.problem_desc').re('Accept:*\s[0-9]+')[0][8:] item['submit'] = sel.css('.problem_desc').re('Submit:*\s[0-9]+')[0][8:] item['sample_input'] = \ sel.css('.data').extract()[-2].\ replace('<div class="data">', '<pre>').\ replace('</div>', '</pre>') item['sample_output'] = \ sel.css('.data').extract()[-1].\ replace('<div class="data">', '<pre>').\ replace('</div>', '</pre>') item['update_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") return item
def parse(self, response): sel = Selector(response) item = ProblemItem() item['origin_oj'] = 'sdut' item['problem_id'] = self.problem_id item['problem_url'] = response.url item['title'] = sel.xpath('//center/h2/text()').extract()[0] item['description'] = sel.css('.pro_desc').extract()[0] item['input'] = sel.css('.pro_desc').extract()[1] item['output'] = sel.css('.pro_desc').extract()[2] item['time_limit'] = sel.xpath('//a/h5/text()').re('T[\S*\s]*s')[0][12:] item['memory_limit'] = \ sel.xpath('//a/h5/text()').re('M[\S*\s]*K')[0][14:] item['sample_input'] = sel.xpath('//div[@class="data"]/pre').extract()[0] item['sample_output'] = sel.xpath('//div[@class="data"]/pre').extract()[1] item['update_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") return item
def parse(self, response): sel = Selector(response) item = ProblemItem() item['origin_oj'] = 'hdu' item['problem_id'] = self.problem_id item['problem_url'] = response.url item['title'] = sel.xpath('//h1/text()').extract()[0] item['description'] = sel.css('.panel_content').extract()[0] item['input'] = sel.css('.panel_content').extract()[1] item['output'] = sel.css('.panel_content').extract()[2] item['time_limit'] = \ sel.xpath('//b/span/text()').re('T[\S*\s]*S')[0][12:] item['memory_limit'] = \ sel.xpath('//b/span/text()').re('Me[\S*\s]*K')[0][14:] item['sample_input'] = sel.xpath('//pre').extract()[0] item['sample_output'] = sel.xpath('//pre').extract()[1] item['update_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S") return item