def parse(self, response): # Get this game code from file with open(os.getcwd() + "/tmpfiles/" + ''.join(e for e in response.url if e.isalnum()) + ".txt") as f: data = f.read() m = re.search(r"Code: (?P<code>\d+)", data) code = str(m.group('code')).zfill(16) # Scrape box score away = int(long(code) / 1e12) home = int((long(code) / 1e8) % 1e3) date = int(long(code) % 1e8) away_TGS = Team_Game_Statistics(code, away) home_TGS = Team_Game_Statistics(code, home) # MOVE SOME OF THESE TO MATCHUP SCRAPER # Scrape passing pass_div = response.xpath('//div[@id="gamepackage-passing"]') # away away_pass_div = pass_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]') away_TGS = Extract_Passing(away_TGS, away_pass_div) # home home_pass_div = pass_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]') home_TGS = Extract_Passing(home_TGS, home_pass_div) # Scrape rushing rush_div = response.xpath('//div[@id="gamepackage-rushing"]') # away away_rush_div = rush_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]') away_TGS = Extract_Rushing(away_TGS, away_rush_div) # home home_rush_div = rush_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]') home_TGS = Extract_Rushing(home_TGS, home_rush_div) # Scrape kick returns kr_div = response.xpath('//div[@id="gamepackage-kickReturns"]') # away away_kr_div = kr_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]') away_TGS = Extract_KickReturns(away_TGS, away_kr_div) # home home_kr_div = kr_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]') home_TGS = Extract_KickReturns(home_TGS, home_kr_div) # Scrape punt returns pr_div = response.xpath('//div[@id="gamepackage-puntReturns"]') # away away_pr_div = pr_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]') away_TGS = Extract_PuntReturns(away_TGS, away_pr_div) # home home_pr_div = pr_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]') home_TGS = Extract_PuntReturns(home_TGS, home_pr_div) # Scrape interception returns int_div = response.xpath('//div[@id="gamepackage-interceptions"]') # away away_int_div = int_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]') away_TGS = Extract_Interceptions(away_TGS, away_int_div) # home home_int_div = int_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]') home_TGS = Extract_Interceptions(home_TGS, home_int_div) # Scrape kicking kick_div = response.xpath('//div[@id="gamepackage-kicking"]') # away away_kick_div = kick_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]') away_TGS = Extract_Kicking(away_TGS, away_kick_div) # home home_kick_div = kick_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]') home_TGS = Extract_Kicking(home_TGS, home_kick_div) # Scrape punting punt_div = response.xpath('//div[@id="gamepackage-punting"]') # away away_punt_div = punt_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]') away_TGS = Extract_Punting(away_TGS, away_punt_div) # home home_punt_div = punt_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]') home_TGS = Extract_Punting(home_TGS, home_punt_div) # Get points points_div = response.xpath('//div[@class="competitors"]') away_points = points_div.xpath('.//div[contains(@class,"away")]') away_TGS = Extract_Points(away_TGS, away_points) home_points = points_div.xpath('.//div[contains(@class,"home")]') home_TGS = Extract_Points(home_TGS, home_points) # Write stats to file if os.path.isfile(str(year) + " Stats/boxscore-stats.csv"): f = open(str(year) + " Stats/boxscore-stats.csv","a") data_writer = csv.writer(f, lineterminator = '\n') new_rows = [] new_rows.append(away_TGS.Compile()) new_rows.append(home_TGS.Compile()) data_writer.writerows(new_rows) f.close() else: new_rows = [] new_rows.append(away_TGS.Header()) new_rows.append(away_TGS.Compile()) new_rows.append(home_TGS.Compile()) Write_CSV(new_rows, str(year) + " Stats/boxscore-stats.csv")
def parse(self, response): # Get this game code from file with open(os.getcwd() + "/tmpfiles/" + ''.join(e for e in response.url if e.isalnum()) + ".txt") as f: data = f.read() m = re.search(r"Code: (?P<code>\d+)", data) code = str(m.group('code')).zfill(16) # Scrape box score away = int(long(code) / 1e12) home = int((long(code) / 1e8) % 1e3) date = int(long(code) % 1e8) away_TGS = Team_Game_Statistics(code, away) home_TGS = Team_Game_Statistics(code, home) # Scrape first downs first_div = response.xpath('//tr[@data-stat-attr="firstDowns"]') away_TGS.First_Down_Total = re.sub( r'[\\\t|\\\n]', '', first_div.xpath('.//td/text()').extract()[1]) home_TGS.First_Down_Total = re.sub( r'[\\\t|\\\n]', '', first_div.xpath('.//td/text()').extract()[2]) # Scrape turnovers fumble_div = response.xpath('//tr[@data-stat-attr="fumblesLost"]') away_TGS.Fum_Lost = re.sub( r'[\\\t|\\\n]', '', fumble_div.xpath('.//td/text()').extract()[1]) home_TGS.Fum_Lost = re.sub( r'[\\\t|\\\n]', '', fumble_div.xpath('.//td/text()').extract()[2]) away_TGS.Fum_Ret = home_TGS.Fum_Lost home_TGS.Fum_Ret = away_TGS.Fum_Lost # Scrape first down efficiency on 3rd down eff_div = response.xpath('//tr[@data-stat-attr="thirdDownEff"]') fatt_away = re.sub(r'[\\\t|\\\n]', '', eff_div.xpath('.//td/text()').extract()[1]) fatt_home = re.sub(r'[\\\t|\\\n]', '', eff_div.xpath('.//td/text()').extract()[2]) # away m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_away) away_TGS.Third_Down_Att = m.group('a') away_TGS.Third_Down_Conv = m.group('f') # home m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_home) home_TGS.Third_Down_Att = m.group('a') home_TGS.Third_Down_Conv = m.group('f') # Scrape first down efficiency on 4th down eff_div = response.xpath('//tr[@data-stat-attr="fourthDownEff"]') fatt_away = re.sub(r'[\\\t|\\\n]', '', eff_div.xpath('.//td/text()').extract()[1]) fatt_home = re.sub(r'[\\\t|\\\n]', '', eff_div.xpath('.//td/text()').extract()[2]) # away m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_away) away_TGS.Fourth_Down_Att = m.group('a') away_TGS.Fourth_Down_Conv = m.group('f') # home m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_home) home_TGS.Fourth_Down_Att = m.group('a') home_TGS.Fourth_Down_Conv = m.group('f') # Scrape time of possession top_div = response.xpath('//tr[@data-stat-attr="possessionTime"]') try: top_away = re.sub(r'[\\\t|\\\n]', '', top_div.xpath('.//td/text()').extract()[1]) except: top_away = "30:00" try: top_home = re.sub(r'[\\\t|\\\n]', '', top_div.xpath('.//td/text()').extract()[2]) except: top_home = "30:00" # away m_away = re.search(r'(?P<h>\d+)\:(?P<m>\d+)', top_away) # home m_home = re.search(r'(?P<h>\d+)\:(?P<m>\d+)', top_home) try: away_TGS.Time_Of_Possession = str(60 * int(m_away.group('h')) + int(m_away.group('m'))) home_TGS.Time_Of_Possession = str(60 * int(m_home.group('h')) + int(m_home.group('m'))) except: away_TGS.Time_Of_Possession = 1800 home_TGS.Time_Of_Possession = 1800 if int(away_TGS.Time_Of_Possession) == 1800 and int( home_TGS.Time_Of_Possession) != 1800: away_TGS.Time_Of_Possession = str(3600 - int(home_TGS.Time_Of_Possession)) elif int(home_TGS.Time_Of_Possession) == 1800 and int( away_TGS.Time_Of_Possession) != 1800: home_TGS.Time_Of_Possession = str(3600 - int(away_TGS.Time_Of_Possession)) # Scrape penalties pen_div = response.xpath('//tr[@data-stat-attr="totalPenaltiesYards"]') pen_away = re.sub(r'[\\\t|\\\n]', '', pen_div.xpath('.//td/text()').extract()[1]) pen_home = re.sub(r'[\\\t|\\\n]', '', pen_div.xpath('.//td/text()').extract()[2]) # away m = re.search(r'(?P<tot>\d+)\-(?P<yds>\d+)', pen_away) away_TGS.Penalty = m.group('tot') away_TGS.Penalty_Yard = m.group('yds') # home m = re.search(r'(?P<tot>\d+)\-(?P<yds>\d+)', pen_home) home_TGS.Penalty = m.group('tot') home_TGS.Penalty_Yard = m.group('yds') # Write stats to file if os.path.isfile(str(year) + " Stats/matchup-stats.csv"): f = open(str(year) + " Stats/matchup-stats.csv", "a") data_writer = csv.writer(f, lineterminator='\n') new_rows = [] new_rows.append(away_TGS.Compile()) new_rows.append(home_TGS.Compile()) data_writer.writerows(new_rows) f.close() else: new_rows = [] new_rows.append(away_TGS.Header()) new_rows.append(away_TGS.Compile()) new_rows.append(home_TGS.Compile()) Write_CSV(new_rows, str(year) + " Stats/matchup-stats.csv")