def parse_search(self, response):
     common.dump_response(self.settings, response)
     body = response.body_as_unicode()
     jsonresponse = json.loads(body)
     for project in jsonresponse['projects']:
         yield scrapy.http.Request(project['urls']['web']['project'],
                                   callback=self.parse_project,
                                   meta={'json': project})
     yield self.generate_search_request(response.meta['index'] + 1)
 def parse_search(self, response):
     common.dump_response(self.settings, response)
     body = response.body_as_unicode()
     jsonresponse = json.loads(body)
     for project in jsonresponse['projects']:
         yield scrapy.http.Request(
             project['urls']['web']['project'],
             callback = self.parse_project,
             meta = {'json': project}
         )
     yield self.generate_search_request(response.meta['index'] + 1)           
    def parse(self, response):
        common.dump_response(self.settings, response)
        sel = Selector(response)

        for game_title in sel.xpath('//table[@id="mof_object_list"]//a[contains(@href, "/game/")]/text()'):
            item = MobyItem()
            item['value'] = game_title.extract()
            yield item

        for pagination_links in sel.xpath('//div[@class="mobFooter"]'):
            for link in pagination_links.xpath('.//a/@href'):
                yield scrapy.http.Request(urljoin_rfc(response.url, link.extract()), callback = self.parse)
                pass
            break
Example #4
0
    def parse(self, response):
        common.dump_response(self.settings, response)
        sel = Selector(response)

        for game_title in sel.xpath(
                '//table[@id="mof_object_list"]//a[contains(@href, "/game/")]/text()'
        ):
            item = MobyItem()
            item['value'] = game_title.extract()
            yield item

        for pagination_links in sel.xpath('//div[@class="mobFooter"]'):
            for link in pagination_links.xpath('.//a/@href'):
                yield scrapy.http.Request(urljoin_rfc(response.url,
                                                      link.extract()),
                                          callback=self.parse)
                pass
            break
    def parse_project(self, response):
        common.dump_response(self.settings, response)
        json = response.meta['json']
        sel = Selector(response)
        item = KickstarterItem()
        item['title'] = json['name']
        item['currency'] = json['currency']
        item['goal'] = float(json['goal'])
        item['date'] = int(json['deadline'])

        # Remove html tags from description here since we're in the scrapy context and have relevant utilities
        item['rawtext'] = ' '.join(
            map(lambda sel: sel.extract(),
                sel.xpath('//div[@class="full-description"]//text()'))
        ) + ' ' + ' '.join(
            map(lambda sel: sel.extract(),
                sel.xpath('//div[@class="short_blurb"]//text()')))

        item['web'] = response.url

        return [item]
    def parse_project(self, response):
        common.dump_response(self.settings, response)
        json = response.meta['json']
        sel = Selector(response)
        item = KickstarterItem()
        item['title'] = json['name']
        item['currency'] = json['currency']
        item['goal'] = float(json['goal'])
        item['date'] = int(json['deadline'])

        # Remove html tags from description here since we're in the scrapy context and have relevant utilities
        item['rawtext'] = ' '.join(map(
            lambda sel: sel.extract(),
            sel.xpath('//div[@class="full-description"]//text()')
        )) + ' ' + ' '.join(map(
            lambda sel: sel.extract(),
            sel.xpath('//div[@class="short_blurb"]//text()')
        ))

        item['web'] = response.url

        return [item]