Ejemplo n.º 1
0
    def parse(self, response):
        # team_item = TeamItem()
        l = TeamLoader(item=TeamItem(), response=response)

        playoff_appearances = response.xpath(
            '//*[@id="info_box"]/div[5]/div/p[3]/text()'
            ).extract()[0].split('(')[0]  # .strip()
        championships = response.xpath(
            '//*[@id="info_box"]/div[5]/div/p[3]/text()'
            ).extract()[1].split('(')[0]
        full_name = response.xpath(
            '//*[@id="info_box"]/div[5]/div/p[1]/text()[2]'
            ).extract()  # _first().strip()
        wins = response.xpath(
            '//*[@id="info_box"]/div[5]/div/p[2]/text()'
            ).extract()[1].split('(')[0]
        losses = response.xpath(
            '//*[@id="info_box"]/div[5]/div/p[2]/text()'
            ).extract()[1].split('(')[0]

        l.add_value('playoff_appearances', playoff_appearances)
        l.add_value('championships', championships)
        l.add_value('full_name', full_name)
        l.add_value('wins', wins)
        l.add_value('losses', losses)

        yield l.load_item()
Ejemplo n.º 2
0
    def parse_team(self, response):
        tl = TeamLoader(item=TeamItem(), response=response)

        playoff_appearances = response.xpath(
            '//*[@id="info_box"]/div[5]/div/p[3]/text()'
            ).extract()[0].split('(')[0]  # .strip()
        championships = response.xpath(
            '//*[@id="info_box"]/div[5]/div/p[3]/text()'
            ).extract()[1].split('(')[0]
        full_name = response.xpath(
            '//*[@id="info_box"]/div[5]/div/p[1]/text()[2]'
            ).extract()  # _first().strip()
        wins = response.xpath(
            '//*[@id="info_box"]/div[5]/div/p[2]/text()'
            ).extract()[1].split('(')[0]
        losses = response.xpath(
            '//*[@id="info_box"]/div[5]/div/p[2]/text()'
            ).extract()[1].split('(')[0]

        seasons = {}

        tl.add_value('playoff_appearances', playoff_appearances)
        tl.add_value('championships', championships)
        tl.add_value('full_name', full_name)
        tl.add_value('wins', wins)
        tl.add_value('losses', losses)

        abbr = response.xpath(
            '//*[@id="info_box"]/div[4]/ul/li[1]/a/@href'
        ).extract_first().split('/')[2]
        for sel in response.xpath(
            '//*[@id="' + abbr + '"]/tbody//tr/td[1]/a/@href'
        ).extract():
            url = response.urljoin(sel)  # [:-5] + '/gamelog/'

            request = scrapy.Request(url, callback=self.parse_season)
            request.meta['seasons'] = seasons
            print '\n\n\n', request, '\n\n\n'
        tl.add_value('seasons', seasons)
        yield tl.load_item()