def parse(self, response): # team_item = TeamItem() l = TeamLoader(item=TeamItem(), response=response) playoff_appearances = response.xpath( '//*[@id="info_box"]/div[5]/div/p[3]/text()' ).extract()[0].split('(')[0] # .strip() championships = response.xpath( '//*[@id="info_box"]/div[5]/div/p[3]/text()' ).extract()[1].split('(')[0] full_name = response.xpath( '//*[@id="info_box"]/div[5]/div/p[1]/text()[2]' ).extract() # _first().strip() wins = response.xpath( '//*[@id="info_box"]/div[5]/div/p[2]/text()' ).extract()[1].split('(')[0] losses = response.xpath( '//*[@id="info_box"]/div[5]/div/p[2]/text()' ).extract()[1].split('(')[0] l.add_value('playoff_appearances', playoff_appearances) l.add_value('championships', championships) l.add_value('full_name', full_name) l.add_value('wins', wins) l.add_value('losses', losses) yield l.load_item()
def parse_team(self, response): tl = TeamLoader(item=TeamItem(), response=response) playoff_appearances = response.xpath( '//*[@id="info_box"]/div[5]/div/p[3]/text()' ).extract()[0].split('(')[0] # .strip() championships = response.xpath( '//*[@id="info_box"]/div[5]/div/p[3]/text()' ).extract()[1].split('(')[0] full_name = response.xpath( '//*[@id="info_box"]/div[5]/div/p[1]/text()[2]' ).extract() # _first().strip() wins = response.xpath( '//*[@id="info_box"]/div[5]/div/p[2]/text()' ).extract()[1].split('(')[0] losses = response.xpath( '//*[@id="info_box"]/div[5]/div/p[2]/text()' ).extract()[1].split('(')[0] seasons = {} tl.add_value('playoff_appearances', playoff_appearances) tl.add_value('championships', championships) tl.add_value('full_name', full_name) tl.add_value('wins', wins) tl.add_value('losses', losses) abbr = response.xpath( '//*[@id="info_box"]/div[4]/ul/li[1]/a/@href' ).extract_first().split('/')[2] for sel in response.xpath( '//*[@id="' + abbr + '"]/tbody//tr/td[1]/a/@href' ).extract(): url = response.urljoin(sel) # [:-5] + '/gamelog/' request = scrapy.Request(url, callback=self.parse_season) request.meta['seasons'] = seasons print '\n\n\n', request, '\n\n\n' tl.add_value('seasons', seasons) yield tl.load_item()