def query(): if file_access(GAME_RESULTS_PATH) and file_access(TEST_GAMES_PATH): with open(GAME_RESULTS_PATH) as file: games = json.load(file) with open(TEST_GAMES_PATH) as file: test_games = json.load(file) return games, test_games games = [] for year in years: for week in weeks: headers = { 'accept': 'application/json', } params = ( ('year', str(year)), ('week', str(week)), ('seasonType', 'regular'), ) response = requests.get('https://api.collegefootballdata.com/games', headers=headers, params=params) game = response.json() games.append(game) test_games = [] if test_year != 0: for week in weeks: headers = { 'accept': 'application/json', } params = ( ('year', str(test_year)), ('week', str(week)), ('seasonType', 'regular'), ) response = requests.get('https://api.collegefootballdata.com/games', headers=headers, params=params) game = response.json() test_games.append(game) with open(TEST_GAMES_PATH, 'w') as f: json.dump(test_games, f) with open(GAME_RESULTS_PATH, 'w') as f: json.dump(games, f) return games, test_games
def stat_files_exist(): for statistic in statistics: for year in years: stat_file = STAT_PATH + statistic + str(year) + ".json" if not file_access(stat_file): return False return True
def main(): if file_access(GAME_DATA_PATH) and file_access(TEST_GAME_DATA_PATH): print("Loading existing game data") game_data = load(GAME_DATA_PATH) test_game_data = load(TEST_GAME_DATA_PATH) else: print("Running queries to download game data...") os.makedirs("data", exist_ok=True) print("Loading all games in the specified range...") games, test_games = games_query.query() # talent = talent_query.query() print("Finding dates of games...") dates_to_games = retreive_all_dates(games) print("Loading historical polls...") ap_polls, coaches_polls, pred_polls = polls_query.query(dates_to_games) print("Loading historical statistics...") stats_query.query(dates_to_games) print("Combining and normalizing game data...") game_data = combine_game_data(dates_to_games, ap_polls, coaches_polls, pred_polls) game_data = normalize(game_data) print("Saving data to", GAME_DATA_PATH) save(game_data, GAME_DATA_PATH) # Repeat all that for test seasons print("Doing that all again for test seasons...") dates_to_games = retreive_all_dates(test_games) ap_polls, coaches_polls, pred_polls = polls_query.query(dates_to_games, append=True) stats_query.query(dates_to_games, append=True) test_game_data = combine_game_data(dates_to_games, ap_polls, coaches_polls, pred_polls) test_game_data = normalize(test_game_data) save(test_game_data, TEST_GAME_DATA_PATH) # game_data = game_data[:1000] all_inputs, all_outputs = create_netdata_from_gamedata(game_data) test_inputs, test_outputs = create_netdata_from_gamedata(test_game_data) # train_neat(all_inputs, all_outputs) print("Doing NN stuff...") # scikit_net(all_inputs, all_outputs, test_inputs, test_outputs) tf_net(all_inputs, all_outputs, test_inputs, test_outputs)
def query(dates_to_games, append=False): for statistic in statistics: game_years = set( [year_from_date(date) for date in dates_to_games.keys()]) for year in game_years: stat_file = STAT_PATH + statistic + str(year) + ".json" if append == True or not file_access(stat_file): stats = {} games_in_year = [(date, dates_to_games[date]) for date in dates_to_games.keys() if year_from_date(date) == str(year)] for date, games in games_in_year: teams = teams_in_games(games) stats = update_stats(stats, statistic, date, teams) stat_file = STAT_PATH + statistic + str(year) + ".json" os.makedirs(os.path.dirname(stat_file), exist_ok=True) with open(stat_file, 'w+') as f: json.dump(stats, f)
def train_net(inputs, outputs, test_inputs, test_outputs, load=False): inputs = np.asarray(inputs) outputs = np.asarray(outputs) test_inputs = np.asarray(test_inputs) if load and file_access(KERAS_MODEL_PATH): print("Loading model from", KERAS_MODEL_PATH) model = load_model(KERAS_MODEL_PATH) else: model = create_uniform_model(inputs.shape[1]) # early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss', # min_delta=5 * 10 ** -6, patience=20, # verbose=0, mode='auto', # baseline=None, # restore_best_weights=True) # model.fit(x=inputs, y=outputs, # validation_data=(val_in, val_out), # epochs=1000, callbacks=[early_stopping], verbose=1) print(model.summary()) model.fit(x=inputs, y=outputs, epochs=1000, verbose=1) return model, model.predict(x=test_inputs)
def query(dates_to_games, append=False): ''' Query online historical poll data to build a database of team ranking history :param dates_to_games: dictionary using a date as keys to all games played on that date :param append: Add to exiting data? :return: Three dictionaries: ap_polls, coaches_polls, pred_polls ''' # Check if we already have all the poll data if append == False and file_access(PATH_AP_JSON) and file_access( PATH_COACHES_JSON) and file_access(PATH_ALL_JSON): with open(PATH_AP_JSON) as file: ap_polls = json.load(file) with open(PATH_COACHES_JSON) as file: coaches_polls = json.load(file) with open(PATH_ALL_JSON) as file: pred_polls = json.load(file) return ap_polls, coaches_polls, pred_polls ap_polls = {} coaches_polls = {} pred_polls = {} game_years = set([year_from_date(date) for date in dates_to_games.keys()]) for year in game_years: for week in weeks: # Query for ap and coaches poll data from api.collegefootballdata.com headers = { 'accept': 'application/json', } params = (('year', str(year)), ('week', str(week))) response = requests.get( 'https://api.collegefootballdata.com/rankings', headers=headers, params=params) ap_poll_query = response.json()[0]['polls'][1]['ranks'] coaches_poll_query = response.json()[0]['polls'][0]['ranks'] ap_poll = {} coaches_poll = {} for pos in ap_poll_query: team_name = pos['school'] rank = pos['rank'] ap_poll[team_name] = rank for pos in coaches_poll_query: team_name = pos['school'] rank = pos['rank'] coaches_poll[team_name] = rank # Store the polls for every date a game was played ap_polls[str(year) + "," + str(week)] = ap_poll coaches_polls[str(year) + "," + str(week)] = coaches_poll for date, games in dates_to_games.items(): teams = teams_in_games(games) pred_polls = update_pred_polls(pred_polls, date, teams) if append: write = "w+" else: write = "w" with open(PATH_AP_JSON, write) as f: json.dump(ap_polls, f) with open(PATH_COACHES_JSON, write) as f: json.dump(coaches_polls, f) with open(PATH_ALL_JSON, write) as f: json.dump(pred_polls, f) return ap_polls, coaches_polls, pred_polls