def get_spreads_for_months_and_write_to_csv(): # this fcn uses oddsshark to scrape past game lines all_line_data = lines.scrapeLineData() parsed_line_data = lines.parseLineData(all_line_data) final_data = lines.finalizeLineData(parsed_line_data) folder = jsonData.LOCAL_DATA_PATH filename = folder + 'game-line-data.csv' csvOps.writeToCsv(final_data, filename)
def getMissingData(): filename = "missing_player_data.csv" fileLoc = jsonData.LOCAL_DATA_PATH + filename rows = csvOps.openCsvFromFileToArrays(fileLoc) gamesForSeason = api.getGamesInRange('2015-11-01', '2016-04-13') teamDict = jsonData.TEAM_ABBREV_TO_ID session = requests.Session() matchedData = [] missingData = [] toDelete = [] for player in rows: playerId = player[0] brefId = player[1] missingDates = player[2].split(",") baseUrl = "http://www.basketball-reference.com/players/" firstLetter = brefId[0] + "/" endUrl = "/gamelog/2016/" fullUrl = baseUrl + firstLetter + brefId + endUrl rawHtml = session.get(fullUrl).content tree = html.fromstring(rawHtml) games = tree.cssselect('table#pgl_basic tbody tr:not(.thead)') totalGamesInSeason = len(games) # check for matching games, removing dates if they match: for game in games: gameDate = game[2].text_content().strip() teamId = teamDict[game[4].text_content().strip()] # check if matching game date found for missing date if gameDate in missingDates: gameBref = game[2].cssselect('a')[0].get('href').split("/")[-1].replace(".html", "") game = next((game for game in gamesForSeason if game["bref_slug"] == gameBref), None) missingDates.remove(gameDate) if game is None: print("NO GAME MATCH", gameDate, str(missingDates), brefId) continue dataRow = [playerId, gameDate, teamId, game["game_id"]] matchedData.append(dataRow) # for any leftover missing dates: for gameDate in missingDates: missingData.append([playerId, brefId, gameDate]) csvOps.writeToCsv(matchedData, jsonData.LOCAL_DATA_PATH + "matched-data.csv")
def get_usual_depth_positions(season): session = requests.Session() current_players = api.getCurrentPlayerData() # teams_url = 'http://www.basketball-reference.com/leagues/NBA_' + str(season) + '.html' depth_url = 'http://basketball.realgm.com/nba/depth-charts/' + str(season) page = session.get(depth_url) tree = html.fromstring(page.text) # team_rows = tree.cssselect('table#confs_standings_E tbody tr th a, table#confs_standings_W tbody tr th a') team_section = tree.cssselect('table.basketball tbody') all_depth_pos = {} for section in team_section: rows = section.cssselect('tr') for depth, row in enumerate(rows): players = row.cssselect("td.depth-chart-cell a") for player in players: player_name = " ".join( player.get("href").split("/")[2].split("-")).strip() player_obj = next((player for player in current_players if player["player_name"] == player_name), None) player_depth = depth + 1 if player_obj is None: player_id = int( input("What is the player_id for " + player_name + "? ")) if player_id == 0: continue else: player_id = player_obj["player_id"] if player_id in all_depth_pos: if all_depth_pos[player_id] < player_depth: all_depth_pos[player_id] = player_depth else: all_depth_pos[player_id] = player_depth depth_rows = [] filename = './../local-data/usual_depth_pos_2017.csv' for player, depth in all_depth_pos.items(): depth_rows.append([player, depth]) csvOps.writeToCsv(depth_rows, filename)
def pullGoogleTrainingDataAndWriteToCsv(dateArr, statType): ''' Pulls in data for multiple dates, in an arr, for a specific stat type ''' folder = jsonData.LOCAL_DATA_PATH # filename = 'nba-' + statType + '-google-initial-training-data.csv' filename = config["GOOGLE_CLOUD"]["TRAINING_DATA_FILES"][statType] location = folder + filename print("Pulling factor data from API...") data = getDataForMultipleDates(dateArr, statType, True, 10) # training = True print("Writing factor data to csv. File location: ", location) try: csv.writeToCsv(data, location) # no header row! return location except: return False
def pullAzureTrainingDataAndWriteToCsv(dateArr, statType): ''' Pulls in data for multiple dates, in an arr, for a specific stat type ''' folder = jsonData.LOCAL_DATA_PATH filename = 'nba-' + statType + '-azure-initial-training-data.csv' location = folder + filename print("Pulling factor data from API...") data = getDataForMultipleDates(dateArr, statType, True, 10) # training = True print("Writing factor data to csv. File location: ", location) try: csv.writeToCsv(data, location, header=getColumns()) # header row! print("CSV WRITE SUCCESS", location) return location except: print("COULDNT WRITE CSV") return False
def pullRetrainDataAndWriteToCsv(retrainDate, statType): ''' Pulls in data for a specific date to retrain ''' folder = './../local-data/retrain-data/' filename = retrainDate + '-nba-' + statType + '-retrain-data.csv' location = folder + filename print("Pulling retraining data from API...") # training = True, # recentGames = 10 data = ml.getAndPrepFinalData(retrainDate, statType, True, 10) print("Data pulled, num rows ", len(data)) print("Writing retraining data to csv w/ header. File location: ", location) try: csv.writeToCsv(data, location, header=ml.getColumns()) return location except: return False