Ejemplo n.º 1
0
    def parse(self, response):
        # Get this game code from file
        with open(os.getcwd() + "/tmpfiles/" +
                  ''.join(e
                          for e in response.url if e.isalnum()) + ".txt") as f:
            data = f.read()
            m = re.search(r"Code: (?P<code>\d+)", data)
            code = str(m.group('code')).zfill(16)

        # Scrape box score
        away = int(long(code) / 1e12)
        home = int((long(code) / 1e8) % 1e3)
        date = int(long(code) % 1e8)
        away_TGS = Team_Game_Statistics(code, away)
        home_TGS = Team_Game_Statistics(code, home)

        # Scrape first downs
        first_div = response.xpath('//tr[@data-stat-attr="firstDowns"]')
        away_TGS.First_Down_Total = re.sub(
            r'[\\\t|\\\n]', '',
            first_div.xpath('.//td/text()').extract()[1])
        home_TGS.First_Down_Total = re.sub(
            r'[\\\t|\\\n]', '',
            first_div.xpath('.//td/text()').extract()[2])

        # Scrape turnovers
        fumble_div = response.xpath('//tr[@data-stat-attr="fumblesLost"]')
        away_TGS.Fum_Lost = re.sub(
            r'[\\\t|\\\n]', '',
            fumble_div.xpath('.//td/text()').extract()[1])
        home_TGS.Fum_Lost = re.sub(
            r'[\\\t|\\\n]', '',
            fumble_div.xpath('.//td/text()').extract()[2])
        away_TGS.Fum_Ret = home_TGS.Fum_Lost
        home_TGS.Fum_Ret = away_TGS.Fum_Lost

        # Scrape first down efficiency on 3rd down
        eff_div = response.xpath('//tr[@data-stat-attr="thirdDownEff"]')
        fatt_away = re.sub(r'[\\\t|\\\n]', '',
                           eff_div.xpath('.//td/text()').extract()[1])
        fatt_home = re.sub(r'[\\\t|\\\n]', '',
                           eff_div.xpath('.//td/text()').extract()[2])
        # away
        m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_away)
        away_TGS.Third_Down_Att = m.group('a')
        away_TGS.Third_Down_Conv = m.group('f')
        # home
        m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_home)
        home_TGS.Third_Down_Att = m.group('a')
        home_TGS.Third_Down_Conv = m.group('f')

        # Scrape first down efficiency on 4th down
        eff_div = response.xpath('//tr[@data-stat-attr="fourthDownEff"]')
        fatt_away = re.sub(r'[\\\t|\\\n]', '',
                           eff_div.xpath('.//td/text()').extract()[1])
        fatt_home = re.sub(r'[\\\t|\\\n]', '',
                           eff_div.xpath('.//td/text()').extract()[2])
        # away
        m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_away)
        away_TGS.Fourth_Down_Att = m.group('a')
        away_TGS.Fourth_Down_Conv = m.group('f')
        # home
        m = re.search(r'(?P<f>\d+)\-(?P<a>\d+)', fatt_home)
        home_TGS.Fourth_Down_Att = m.group('a')
        home_TGS.Fourth_Down_Conv = m.group('f')

        # Scrape time of possession
        top_div = response.xpath('//tr[@data-stat-attr="possessionTime"]')
        try:
            top_away = re.sub(r'[\\\t|\\\n]', '',
                              top_div.xpath('.//td/text()').extract()[1])
        except:
            top_away = "30:00"
        try:
            top_home = re.sub(r'[\\\t|\\\n]', '',
                              top_div.xpath('.//td/text()').extract()[2])
        except:
            top_home = "30:00"

        # away
        m_away = re.search(r'(?P<h>\d+)\:(?P<m>\d+)', top_away)
        # home
        m_home = re.search(r'(?P<h>\d+)\:(?P<m>\d+)', top_home)
        try:
            away_TGS.Time_Of_Possession = str(60 * int(m_away.group('h')) +
                                              int(m_away.group('m')))
            home_TGS.Time_Of_Possession = str(60 * int(m_home.group('h')) +
                                              int(m_home.group('m')))
        except:
            away_TGS.Time_Of_Possession = 1800
            home_TGS.Time_Of_Possession = 1800
        if int(away_TGS.Time_Of_Possession) == 1800 and int(
                home_TGS.Time_Of_Possession) != 1800:
            away_TGS.Time_Of_Possession = str(3600 -
                                              int(home_TGS.Time_Of_Possession))
        elif int(home_TGS.Time_Of_Possession) == 1800 and int(
                away_TGS.Time_Of_Possession) != 1800:
            home_TGS.Time_Of_Possession = str(3600 -
                                              int(away_TGS.Time_Of_Possession))

        # Scrape penalties
        pen_div = response.xpath('//tr[@data-stat-attr="totalPenaltiesYards"]')
        pen_away = re.sub(r'[\\\t|\\\n]', '',
                          pen_div.xpath('.//td/text()').extract()[1])
        pen_home = re.sub(r'[\\\t|\\\n]', '',
                          pen_div.xpath('.//td/text()').extract()[2])
        # away
        m = re.search(r'(?P<tot>\d+)\-(?P<yds>\d+)', pen_away)
        away_TGS.Penalty = m.group('tot')
        away_TGS.Penalty_Yard = m.group('yds')
        # home
        m = re.search(r'(?P<tot>\d+)\-(?P<yds>\d+)', pen_home)
        home_TGS.Penalty = m.group('tot')
        home_TGS.Penalty_Yard = m.group('yds')

        # Write stats to file
        if os.path.isfile(str(year) + " Stats/matchup-stats.csv"):
            f = open(str(year) + " Stats/matchup-stats.csv", "a")
            data_writer = csv.writer(f, lineterminator='\n')
            new_rows = []
            new_rows.append(away_TGS.Compile())
            new_rows.append(home_TGS.Compile())
            data_writer.writerows(new_rows)
            f.close()
        else:
            new_rows = []
            new_rows.append(away_TGS.Header())
            new_rows.append(away_TGS.Compile())
            new_rows.append(home_TGS.Compile())
            Write_CSV(new_rows, str(year) + " Stats/matchup-stats.csv")
Ejemplo n.º 2
0
	def parse(self, response):
		# Get this game code from file
		with open(os.getcwd() + "/tmpfiles/" + ''.join(e for e in response.url if e.isalnum()) + ".txt") as f:
			data = f.read()
			m = re.search(r"Code: (?P<code>\d+)", data)
			code = str(m.group('code')).zfill(16)

		# Scrape box score
		away = int(long(code) / 1e12)
		home = int((long(code) / 1e8) % 1e3)
		date = int(long(code) % 1e8)
		away_TGS = Team_Game_Statistics(code, away)
		home_TGS = Team_Game_Statistics(code, home)

		# MOVE SOME OF THESE TO MATCHUP SCRAPER

		# Scrape passing
		pass_div = response.xpath('//div[@id="gamepackage-passing"]')
		# away
		away_pass_div = pass_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]')
		away_TGS = Extract_Passing(away_TGS, away_pass_div)
		# home
		home_pass_div = pass_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]')
		home_TGS = Extract_Passing(home_TGS, home_pass_div)

		# Scrape rushing
		rush_div = response.xpath('//div[@id="gamepackage-rushing"]')
		# away
		away_rush_div = rush_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]')
		away_TGS = Extract_Rushing(away_TGS, away_rush_div)
		# home
		home_rush_div = rush_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]')
		home_TGS = Extract_Rushing(home_TGS, home_rush_div)

		# Scrape kick returns
		kr_div = response.xpath('//div[@id="gamepackage-kickReturns"]')
		# away
		away_kr_div = kr_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]')
		away_TGS = Extract_KickReturns(away_TGS, away_kr_div)
		# home
		home_kr_div = kr_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]')
		home_TGS = Extract_KickReturns(home_TGS, home_kr_div)

		# Scrape punt returns
		pr_div = response.xpath('//div[@id="gamepackage-puntReturns"]')
		# away
		away_pr_div = pr_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]')
		away_TGS = Extract_PuntReturns(away_TGS, away_pr_div)
		# home
		home_pr_div = pr_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]')
		home_TGS = Extract_PuntReturns(home_TGS, home_pr_div)

		# Scrape interception returns
		int_div = response.xpath('//div[@id="gamepackage-interceptions"]')
		# away
		away_int_div = int_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]')
		away_TGS = Extract_Interceptions(away_TGS, away_int_div)
		# home
		home_int_div = int_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]')
		home_TGS = Extract_Interceptions(home_TGS, home_int_div)

		# Scrape kicking
		kick_div = response.xpath('//div[@id="gamepackage-kicking"]')
		# away
		away_kick_div = kick_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]')
		away_TGS = Extract_Kicking(away_TGS, away_kick_div)
		# home
		home_kick_div = kick_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]')
		home_TGS = Extract_Kicking(home_TGS, home_kick_div)

		# Scrape punting
		punt_div = response.xpath('//div[@id="gamepackage-punting"]')
		# away
		away_punt_div = punt_div.xpath('.//div[contains(@class,"gamepackage-away-wrap")]')
		away_TGS = Extract_Punting(away_TGS, away_punt_div)
		# home
		home_punt_div = punt_div.xpath('.//div[contains(@class,"gamepackage-home-wrap")]')
		home_TGS = Extract_Punting(home_TGS, home_punt_div)

		# Get points
		points_div = response.xpath('//div[@class="competitors"]')
		away_points = points_div.xpath('.//div[contains(@class,"away")]')
		away_TGS = Extract_Points(away_TGS, away_points)
		home_points = points_div.xpath('.//div[contains(@class,"home")]')
		home_TGS = Extract_Points(home_TGS, home_points)

		# Write stats to file
		if os.path.isfile(str(year) + " Stats/boxscore-stats.csv"):
			f = open(str(year) + " Stats/boxscore-stats.csv","a")
			data_writer = csv.writer(f, lineterminator = '\n')
			new_rows = []
			new_rows.append(away_TGS.Compile())
			new_rows.append(home_TGS.Compile())
			data_writer.writerows(new_rows)
			f.close()
		else:
			new_rows = []
			new_rows.append(away_TGS.Header())
			new_rows.append(away_TGS.Compile())
			new_rows.append(home_TGS.Compile())
			Write_CSV(new_rows, str(year) + " Stats/boxscore-stats.csv")
Ejemplo n.º 3
0
    play_data.append(play)
Write_CSV(play_data, str(year) + " Stats temp/unparsed_plays.csv")

# Build team-game-statistics
prev_game_code = 0
allTGS = []
for play in allPlays:
    # found a new game
    if float(play.Game_Code) != prev_game_code:
        # save old data
        if prev_game_code != 0:
            allTGS.append(home_tgs)
            allTGS.append(visitor_tgs)
        visitor_code = int(math.floor(float(play.Game_Code) / 1e12))
        home_code = int(math.floor(float(play.Game_Code) / 1e8)) % 1e4
        home_tgs = Team_Game_Statistics(play.Game_Code, home_code)
        visitor_tgs = Team_Game_Statistics(play.Game_Code, visitor_code)
        prev_game_code = float(play.Game_Code)
    # increment data
    if play.Offense == home_tgs.Team_Code:
        home_tgs.Extract_Play_Offense(play)
    elif play.Offense == visitor_tgs.Team_Code:
        visitor_tgs.Extract_Play_Offense(play)

# Write team-game-statistics to file
tgs_data = []
tgs_data.append(allTGS[0].Header())
for tgs in allTGS:
    tgs_data.append(tgs.Compile_Stats())
Write_CSV(tgs_data, str(year) + " Stats temp/play_TGS.csv")