Ejemplo n.º 1
0
    def parse(self, response):
        html = response.body.\
            replace('<=', ' &le; ').\
            replace(' < ', ' &lt; ').\
            replace(' > ', ' &gt; ').\
            replace('>=', ' &ge; ')

        sel = Selector(text=html)

        item = ProblemItem()
        item['origin_oj'] = 'poj'
        item['problem_id'] = self.problem_id
        item['problem_url'] = response.url
        item['title'] = sel.css('.ptt').xpath('./text()').extract()[0]
        item['description'] = sel.css('.ptx').extract()[0]
        item['input'] = sel.css('.ptx').extract()[1]
        item['output'] = sel.css('.ptx').extract()[2]
        try:
            item['time_limit'] = sel.css('.plm').re(
                'Case\sT[\S*\s]*MS')[0][21:]
        except:
            item['time_limit'] = sel.css('.plm').re('T[\S*\s]*MS')[0][16:]
        item['memory_limit'] = sel.css('.plm').re('Me[\S*\s]*K')[0][18:]
        item['sample_input'] = sel.css('.sio').extract()[0]
        item['sample_output'] = sel.css('.sio').extract()[1]
        item['update_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        return item
Ejemplo n.º 2
0
    def parse(self, response):
        html = response.body.\
            replace(' <= ', ' &le; ').\
            replace(' < ', ' &lt; ').\
            replace(' > ', ' &gt; ').\
            replace(' >= ', ' &ge; ').\
            replace(' << ', ' &lt;&lt; ').\
            replace(' >> ', ' &gt;&gt; ')

        sel = Selector(text=html)

        item = ProblemItem()
        item['origin_oj'] = 'fzu'
        item['problem_id'] = self.problem_id
        item['problem_url'] = response.url
        item['title'] = sel.xpath(\
            '//div[contains(@class,\
            "problem_title")]/b/text()'                                       ).extract()[0][14:].rstrip()
        item['description'] = \
            sel.css('.pro_desc').extract()[0][22:-6].\
                replace('<div class="data">', '<pre>').\
                replace('</div>', '</pre>')

        try:
            item['input'] = sel.css('.pro_desc').extract()[1]
        except:
            item['input'] = []
        try:
            item['output'] = sel.css('.pro_desc').extract()[2]
        except:
            item['output'] = []
        item['time_limit'] = sel.css('.problem_desc').re('T[\S*\s]*c')[0][12:]
        item['memory_limit'] = sel.css('.problem_desc').re(
            'M[\S*\s]*B')[0][15:]
        item['accept'] = sel.css('.problem_desc').re('Accept:*\s[0-9]+')[0][8:]
        item['submit'] = sel.css('.problem_desc').re('Submit:*\s[0-9]+')[0][8:]
        item['sample_input'] = \
            sel.css('.data').extract()[-2].\
                replace('<div class="data">', '<pre>').\
                replace('</div>', '</pre>')
        item['sample_output'] = \
            sel.css('.data').extract()[-1].\
                replace('<div class="data">', '<pre>').\
                replace('</div>', '</pre>')
        item['update_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        return item
    def parse(self, response):
        sel = Selector(response)

        item = ProblemItem()
        item['origin_oj'] = 'sdut'
        item['problem_id'] = self.problem_id
        item['problem_url'] = response.url
        item['title'] = sel.xpath('//center/h2/text()').extract()[0]
        item['description'] = sel.css('.pro_desc').extract()[0]
        item['input'] = sel.css('.pro_desc').extract()[1]
        item['output'] = sel.css('.pro_desc').extract()[2]
        item['time_limit'] = sel.xpath('//a/h5/text()').re('T[\S*\s]*s')[0][12:]
        item['memory_limit'] = \
            sel.xpath('//a/h5/text()').re('M[\S*\s]*K')[0][14:]
        item['sample_input'] = sel.xpath('//div[@class="data"]/pre').extract()[0]
        item['sample_output'] = sel.xpath('//div[@class="data"]/pre').extract()[1]
        item['update_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        return item
    def parse(self, response):
        sel = Selector(response)

        item = ProblemItem()
        item['origin_oj'] = 'hdu'
        item['problem_id'] = self.problem_id
        item['problem_url'] = response.url
        item['title'] = sel.xpath('//h1/text()').extract()[0]
        item['description'] = sel.css('.panel_content').extract()[0]
        item['input'] = sel.css('.panel_content').extract()[1]
        item['output'] = sel.css('.panel_content').extract()[2]
        item['time_limit'] = \
            sel.xpath('//b/span/text()').re('T[\S*\s]*S')[0][12:]
        item['memory_limit'] = \
            sel.xpath('//b/span/text()').re('Me[\S*\s]*K')[0][14:]
        item['sample_input'] = sel.xpath('//pre').extract()[0]
        item['sample_output'] = sel.xpath('//pre').extract()[1]
        item['update_time'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        return item