# Create the file headings player_data_w = open(scrapersettings.player_data, "w") #player_data_w.writelines("player_id\tplayer_name\tteam_id\tteam_name\tgame\tpos\tminutes\tfgm\tfga\tthree_fgm\tthree_fga\tft\tfta\tpts\toffreb\tdefreb\ttotreb\tast\tto\tstl\tblk\tfouls\tgame_date\tneutral_site\n") if (scrapersettings.ind_team_stats == 1): # Create the file headings team_data_w = open(scrapersettings.team_data, "w") #team_data_w.writelines("game_id\tgame_date\tsite\tteam_id\tteam_name\tteam_minutes\tteam_fgm\tteam_fga\tteam_three_fgm\tteam_three_fga\tteam_ft\tteam_fta\tteam_pts\tteam_offreb\tteam_defreb\tteam_totreb\tteam_ast\tteam_to\tteam_stl\tteam_blk\tteam_fouls\n") if (scrapersettings.ind_game_stats == 1) or (scrapersettings.ind_player_stats == 1) or (scrapersettings.ind_team_stats == 1): print "Generating individual statistics for players and/or teams" # Grab data # Parse our mappings file to get our list of teams team_mapping = scraperfunctions.get_team_mappings() # Parse our schedule file to get a list of games game_mapping = scraperfunctions.get_game_mappings() # Parse the stats tables team_stats_total = [] # Create an empty list for storing the team stats alphanum = re.compile(r'[^\w\s:]+') for value, game in enumerate(game_mapping): # For each game in our dictionary if scrapersettings.debugmode == 1: print "Processing game " + str(game) + " (" + str(value+1) + " of " + str(len(game_mapping)) + ")" game_url = game_mapping[game][4] # from the game_url grab box_score try: result = requests.get(game_url) game_page_data = result.content except:
import scrapersettings import csv import re import requests import urllib2 import time #from urllib.request import urlopen from bs4 import BeautifulSoup tstats = open(scrapersettings.tstats_data, "w") tstats.writelines("team\tstats\trankings\tvalue\n") record = open(scrapersettings.record_data,"w") record.writelines("team\twin\tlosses\n") team_mapping = scraperfunctions.get_team_mappings() for value, team in enumerate(team_mapping): roster_url = str(scrapersettings.domain_base) + "/team/" + team + "/" + str(scrapersettings.year_index) team_name = team_mapping[team][0] try: #time.sleep(2) r = requests.get(roster_url) # , timeout=10 r.raise_for_status() #ht = urlopen(roster_url) #except HTTPError except Exception: roster_url = str(scrapersettings.domain_base) + "/team/" + team + "/" + str(scrapersettings.year_index) + "?game_sport_year_ctl_id=" + str(scrapersettings.year_index) #team_mainpage_data = scraperfunctions.grabber(roster_url,scrapersettings.params, scrapersettings.http_header)