def parse_start_url(self, response): needle = "matchCentreData" is_array = False data = response.xpath('//script[contains(., "var ' + needle + '")]/text()').re_first(needle + r" = ([\w\W]*?);") if len(data) == 0: needle = "initialMatchDataForScrappers" is_array = True data = response.xpath('//script[contains(., "var ' + needle + '")]/text()').re_first(needle + r" = ([\w\W]*?);") if len(data) == 0: raise CloseSpider("Match data not found") if len(data): if is_array: data = Utils.parse_json(data) match_data = MatchData() match_data['id'] = self.match_id match_data['match_data'] = data return match_data else: raise CloseSpider("Match data not found") # response.xpath('//script[contains(., "var matchCentData")]/text()').re(r"var matchCentreData = ([\w\W]*?);") # if needle == "matchCentreData": # return Request(self.base_url.format(self.match_id) + "MatchReport/", self.parse_match_stats) return
def parse_start_url(self, response): needle = "matchCentreData" is_array = False data = response.xpath('//script[contains(., "var ' + needle + '")]/text()').re_first(needle + r" = ([\w\W]*?);") if len(data) == 0: needle = "initialMatchDataForScrappers" is_array = True data = response.xpath('//script[contains(., "var ' + needle + '")]/text()').re_first( needle + r" = ([\w\W]*?);" ) if len(data) == 0: raise CloseSpider("Match data not found") if len(data): if is_array: data = Utils.parse_json(data) match_data = MatchData() match_data["id"] = self.match_id match_data["match_data"] = data return match_data else: raise CloseSpider("Match data not found") # response.xpath('//script[contains(., "var matchCentData")]/text()').re(r"var matchCentreData = ([\w\W]*?);") # if needle == "matchCentreData": # return Request(self.base_url.format(self.match_id) + "MatchReport/", self.parse_match_stats) return
def get_fixture(self, raw): fixtures = json.loads(Utils.parse_json(raw)) for record in fixtures: ret = Fixture() ret['stage'] = self.stage ret['id'] = record[0] ret['status'] = record[1] ret['start_date'] = record[2] ret['start_time'] = record[3] ret['home_team_id'] = record[4] ret['home_team_name'] = record[5] ret['home_red_cards'] = record[6] ret['away_team_id'] = record[7] ret['away_team_name'] = record[8] ret['away_red_cards'] = record[9] ret['score'] = record[10] ret['ht_score'] = record[11] ret['has_incidents'] = record[12] ret['has_preview'] = record[13] ret['elapsed'] = record[14] ret['result'] = record[15] ret['is_international'] = record[16] ret['is_opta'] = record[19] or record[17] yield ret
def parse_teams(self, response): teams = Utils.parse_json(response.body) teams = json.loads(teams) for team in teams: item = Team() item['id'] = team[0] item['name'] = team[1].encode('utf8') yield item
def parse_match_stats(self, response): match_data = response.xpath('//script[contains(., "var matchStats")]/text()').re_first( r"matchStats = ([\w\W]*?);" ) if match_data: data = Utils.parse_json(match_data) path = "data/" + str(self.match_id) + "/" filename = path + "matchStats.json" try: os.makedirs(path) except OSError: if not os.path.isdir(path): raise with open(filename, "wb") as f: f.write(data.encode("utf8")) else: self.logger.warning("No matchStats found.") return
def parse_match_stats(self, response): match_data = response.xpath( '//script[contains(., "var matchStats")]/text()').re_first( r"matchStats = ([\w\W]*?);") if match_data: data = Utils.parse_json(match_data) path = "data/" + str(self.match_id) + "/" filename = path + "matchStats.json" try: os.makedirs(path) except OSError: if not os.path.isdir(path): raise with open(filename, 'wb') as f: f.write(data.encode('utf8')) else: self.logger.warning("No matchStats found.") return