예제 #1
0
def get_season_slots():
    slot_data = batch.get_data("stage1/MNCAATourneySlots.csv")
    seasons = batch.get_data("stage1/MSeasons.csv")
    #print(slot_data)
    slot_season = pd.merge(slot_data,seasons,left_on=["Season"],right_on=["Season"])
    slot_season = slot_season.drop(["DayZero"],axis=1)
    #print(slot_season)
    return slot_season
예제 #2
0
def load_top_level_files():
    players = batch.get_data("MPlayers.csv")
    print(players)
    events15 = batch.get_data("MEvents2015.csv")
    #events16 = batch.get_data("MEvents2016.csv")
    #events17 = batch.get_data("MEvents2017.csv")
    #events18 = batch.get_data("MEvents2018.csv")
    #events19 = batch.get_data("MEvents2019.csv")
    print(events15)
예제 #3
0
def show_compact_results():
    season_compact_results = batch.get_data(
        "stage1/MRegularSeasonCompactResults.csv")
    print("<---------------- SEASON_COMPACT_RESULTS -------------->")
    print(season_compact_results)
    tourney_compact_results = batch.get_data(
        "stage1/MNCAATourneyCompactResults.csv")
    print("<---------------- TOURNEY_COMPACT_RESULTS -------------->")
    print(tourney_compact_results)
예제 #4
0
def get_all_seed():
    #TeamID1 Is Winning Seed
    results_df = batch.get_data("stage1/MNCAATourneyCompactResults.csv")
    seeds_df = batch.get_data("stage1/MNCAATourneySeeds.csv")

    winning_seed = get_winning_seed()
    losing_seed = get_losing_seed()

    all_seed = pd.concat([winning_seed,losing_seed],sort=True)
    #all_seed.to_csv("./data/seed_data-consolidated.csv",index=False)
    return all_seed
예제 #5
0
def get_team_win_loss_data(write=False):
    player_data = get_player_wins_losses(write)

    team_data = batch.get_data("stage1/MTeamConferences.csv")

    #These are teams ids from the player object
    team_ids = player_data.TeamID.unique()
    #Drop The Teams That Have No Wins or Losses In The Tourney
    team_data = team_data[team_data["TeamID"].isin(team_ids)]

    visited = []
    data = []
    for team_id in team_ids:
        if team_id in visited:
            continue
        visited.append(team_id)
        team_players = player_data[player_data["TeamID"] == team_id]
        wins_stat = team_players["player_total_wins"].iloc[0]
        losses_stat = team_players["player_total_losses"].iloc[0]
        data.append({"TeamID":team_id,\
                     "tourney_game_wins":wins_stat,\
                     "tourney_game_losses":losses_stat\
        })
    team_win_loss_data = pd.DataFrame(data)

    team_win_loss_data = pd.merge(team_data,team_win_loss_data,left_on=["TeamID","Season"],right_on=["TeamID","Season"])


    
    if write:
        team_win_loss_data.to_csv("./data/derived/MTeams.csv",index=False)

    return team_win_loss_data
예제 #6
0
def get_player_wins_losses(write=False): #This returns the players df with total number of each player wins in the tourney
    myDataWin = get_player_detail_seed_season_win()
    winData = []
    playerIDs = myDataWin.PlayerID.unique()
    for playerid in playerIDs:
        total_wins = myDataWin["PlayerID"].value_counts()[playerid]
        winData.append({"PlayerID":playerid,\
                     "player_total_wins":total_wins,\
        })

    winners_df = (pd.DataFrame(winData))

    myDataLoss = get_player_detail_seed_season_loss()
    
    lossData = []
    playerIDs = myDataLoss.PlayerID.unique()
    for playerid in playerIDs:
        total_losses = myDataLoss["PlayerID"].value_counts()[playerid]
        lossData.append({"PlayerID":playerid,\
                     "player_total_losses":total_losses,\
        })

    losers_df = pd.DataFrame(lossData)
    count_df = pd.merge(winners_df,losers_df,left_on=["PlayerID"],right_on=["PlayerID"])
    
    player_data = batch.get_data("stage1/MPlayers.csv")
    player_data = pd.merge(player_data,count_df,left_on=["PlayerID"],right_on=["PlayerID"])

    if write:
        player_data.to_csv("./data/derived/MPlayers.csv",index=False)
    return player_data
예제 #7
0
def get_player_detail_seed_season_loss(): #Players id matched with winning team
    player_data = batch.get_data("stage1/MPlayers.csv")
    detail_seed_season_data = get_detail_seed_season()
    #print(player_data)
    detail_seed_player_season_data = pd.merge(player_data,detail_seed_season_data,left_on=["TeamID"],right_on=["LTeamID"])
    #print(detail_seed_player_season_data)
    return detail_seed_player_season_data
예제 #8
0
def get_detail_seed_season():
    seasons = batch.get_data("stage1/MSeasons.csv")
    seed_details = get_seed_detailed_results()
    detail_seed_season = pd.merge(seasons,seed_details,left_on=["Season"],right_on=["Season"])
    #print(detail_seed_season)
    detail_seed_season = detail_seed_season.drop(["DayZero"],axis=1)
    return detail_seed_season
예제 #9
0
def cut_preprocessed_data():
    data = batch.get_data("master.csv")
    result_ids = []
    for index,row in data.iterrows():
        team_1 = row["WTeamID"]
        team_2 = row["LTeamID"]
        season = row["Season"]
        ied = str(season) + "_" + str(team_1) + "_" + str(team_2)
        result_ids.append(ied)


    data["result_id"] = result_ids

    print(data)

    predict_season = data[data.Season == 2019]
    historical_data = data[data.Season < 2019]
    print(predict_season)
    print(historical_data)
    print(historical_data.info())
    predict_season.to_csv("data/test.csv")
    historical_data.to_csv("data/train.csv")


    return
예제 #10
0
def show_seasons():
    seasons = batch.get_data("stage1/MSeasons.csv")
    print("<--------------- SEASONS --------------->")
    print(seasons)
    print("<---------------- SEASON SEED DETAILS -------------->")
    print(consolidation.get_detail_seed_season())
    return
예제 #11
0
def development():
    team_data = batch.get_data("stage1/MTeams.csv")
    print(team_data)
    seed_data = consolidation.get_seed_detailed_results()
    print(seed_data)
    combo_win = pd.merge(team_data,
                         seed_data,
                         left_on=["TeamID"],
                         right_on=["WTeamID"])
    combo_loss = pd.merge(team_data,
                          seed_data,
                          left_on=["TeamID"],
                          right_on=["LTeamID"])
    print(combo_win)
    print(combo_loss)
    wins = []
    for index, row in combo_win.iterrows():
        wins.append(1)
    combo_win["result"] = wins
    losses = []
    for index, row in combo_loss.iterrows():
        losses.append(0)
    combo_loss["result"] = losses

    combo = pd.concat([combo_win, combo_loss])
    print(combo.info())
    combo.to_csv("./data/master.csv", index=0)

    return
예제 #12
0
def get_seed_compact_results():
    all_seed = get_all_seed()
    tourney_compact_data = batch.get_data("stage1/MNCAATourneyCompactResults.csv")
    all_seed = all_seed.rename(mapper={"TeamID1":"WTeamID","TeamID2":"LTeamID"},axis=1)
    #tourney_compact_data = tourney_compact_data.drop(["Season"],axis=1)
    seed_compact = pd.merge(tourney_compact_data,all_seed,left_on=["Season","WTeamID","LTeamID"],right_on=["Season","WTeamID","LTeamID"])

    seed_compact = seed_compact.rename(mapper={"Seed1":"WSeed","Seed2":"LSeed"},axis=1)
    #print(seed_compact)
    return seed_compact
예제 #13
0
def derive_team_data(write=False):
    team_data = batch.get_data("derived/MTeams.csv")
    slot_data = batch.get_data("stage1/MNCAATourneySlots.csv")
    seed_data = get_seed_detailed_results()
    
    print(slot_data)
    print(seed_data)
    print(team_data)
    
    team_seeds = []
    teams = team_data["TeamID"].unique()
    for nTeam in teams:
        team = seed_data[seed_data["WTeamID"] == nTeam]
        team_seed = team.WSeed.iloc[0]
        team_seeds.append({"TeamID":nTeam,\
                           "Seed":team_seed,})
    team_seeds = pd.DataFrame(team_seeds)
    team_data = pd.merge(team_data,team_seeds,left_on="TeamID",right_on="TeamID")
    team_data.sort_values("Season",axis=0,ascending=True)
    print(team_data)
        

    
    return
예제 #14
0
def show_conference_tourney_games():
    conference_tourney_games = batch.get_data(
        "stage1/MConferenceTourneyGames.csv")
    print("<-------------- CONFERENCE GAMES ---------------->")
    print(conference_tourney_games)
예제 #15
0
def show_team_data():
    print("<----------- TEAM DATA ---------->")
    team_data = batch.get_data("stage1/MTeams.csv")
    print(team_data)
예제 #16
0
def load_stage1_files():
    cities = batch.get_data("stage1/Cities.csv")
    print(cities)
    conferences = batch.get_data("stage1/Conferences.csv")
    print(conferences)
    conference_tourney_games = batch.get_data(
        "stage1/MConferenceTourneyGames.csv")
    print(conference_tourney_games)
    seasons = batch.get_data("stage1/MSeasons.csv")
    print(seasons)
    tourney_compact_results = batch.get_data(
        "stage1/MNCAATourneyCompactResults.csv")
    print(tourney_compact_results)
    tourney_detailed_results = batch.get_data(
        "stage1/MNCAATourneyDetailedResults.csv")
    print(tourney_detailed_results)
    season_compact_results = batch.get_data(
        "stage1/MRegularSeasonCompactResults.csv")
    print(season_compact_results)
    season_detailed_results = batch.get_data(
        "stage1/MRegularSeasonDetailedResults.csv")
    print(season_detailed_results)
    ordinals = batch.get_data("stage1/MMasseyOrdinals.csv")
    print(ordinals)
    seeds = batch.get_data("stage1/MNCAATourneySeeds.csv")
    print(seeds)
    seed_round_slots = batch.get_data("stage1/MNCAATourneySeedRoundSlots.csv")
    print(seed_round_slots)
예제 #17
0
def show_slot_data():
    slot_data = batch.get_data("stage1/MNCAATourneySlots.csv")
    print("<--------------- SLOTS --------------->")
    print(slot_data)
    print("<--------------- SEASON SLOTS --------------->")
    print(consolidation.get_season_slots())
예제 #18
0
#!/usr/bin/env python3

import pandas as pd
import refine, display
import batch_handler as batch
import sklearn
import pandas as pd
import numpy as np

results_df = batch.get_data("stage1/MNCAATourneyCompactResults.csv")
seeds_df = batch.get_data("stage1/MNCAATourneySeeds.csv")




"""
losing seed is seed 2

"""


def get_winning_seed():

    winning_seed = pd.merge(results_df,seeds_df,left_on=["Season","WTeamID"],right_on=["Season","TeamID"])
    winning_seed = winning_seed.drop(["TeamID"],axis=1)
    winning_seed = winning_seed.rename(mapper={"Seed": "Seed1", "WTeamID": "TeamID1"},axis=1)
    winning_seed["Result"] = 1
    
    winning_seed = pd.merge(winning_seed,seeds_df,left_on=["Season","LTeamID"],right_on=["Season","TeamID"])
    winning_seed = winning_seed.drop(["TeamID"],axis=1)
    winning_seed = winning_seed.rename(mapper={"Seed": "Seed2", "LTeamID": "TeamID2"},axis=1)