def parse_hda_scnd(self, response): from json_analysers import home_draw_away response.meta['item'].add_value('hda_scnd_results', home_draw_away(response.body)) logger_absent.info('Saved: ' + str(response.meta['item'].__dict__)) yield response.meta['item'].load_item()
def parse_category(self, response): hxs = HtmlXPathSelector(response) tournament_links = hxs.select("//table[@id='tournamentTable']//a[not(@class)]/@href").extract() for i, link in enumerate(tournament_links[1:]): new_url = urljoin_rfc(get_base_url(response), link) checker = session.query(models.Result).filter_by(tournament_url = new_url).all() if checker: logger_present.info(new_url) else: logger_absent.info(new_url) yield Request(new_url, meta={'url': response.url, 'year': response.meta['year'], 'group': response.meta['group'], 'league': response.meta['league']}, callback=self.parse_item)