Exemple #1
0
def team_array():
    '''Returns a dictionary of pd dataframe objects.  Filters out the data
    to only include a specific team. Ex usage:
    
    df = team_array()
    print df['Dallas Cowboys']
    '''
    table = load.getDataset()
    teams = get_teams(table)
    team_keys = get_team_keys()
    combined = {}
    for team in team_keys:
        combined[team] = pd.merge(teams[team + ' home'], \
                                  teams[team + ' away'], how = 'outer')
    return combined
Exemple #2
0
def write_teams():
    table = load.getDataset()
    teams_table = separate_teams(table)
    team_keys = teams_table.keys()
    team_keys = list(set([x[:-5] for x in team_keys]))
    yes_ser = pd.Series(np.repeat(np.array(['True']), len(table)))
    no_ser = pd.Series(np.repeat(np.array(['False']), len(table)))
    for team in team_keys:
        home_team = teams_table[team + ' home'] 
        away_team = teams_table[team + ' away'] 
        home_team = format_home(home_team)
        home_team['home_field?'] = yes_ser 
        away_team = format_away(away_team)
        away_team['home_field?'] = no_ser 
        table_out = pd.merge(home_team, away_team, how = 'outer')
        dirpath =  os.getcwd()[:-10] + '/data/teamdata/'
        table_out.to_csv(dirpath + team + '.csv')        
Exemple #3
0
def validate():
    '''basic sanity checks on the numerical parts of the data'''
    table = load.getDataset()
    # These are inequalities
    for team in ['ht_', 'at_']:
        for elem in [('rush_attempts', 'rush_TDs'), ('pass_attempt', 'pass_comp'), 
                     ('pass_comp', 'pass_TDs'), ('fumbles', 'fumbles_lost'), 
                     ('3rd_down_attempts', '3rd_down_converted'),
                     ('4th_down_attempts', '4th_down_converted')]:
            test = table[team + elem[0]] >= table[team + elem[1]]
            assert(len(set(test)) == 1)
        # These are equalities
        for elem in [('net_pass_yards', 'rush_yards', 'total_yards'),
                     ('fumbles_lost', 'INT', 'turnovers')]:
            test = table[team + elem[0]] + table[team + elem[1]] - \
                table[team + elem[2]]
        assert(len(set(test)) == 1)
    home_TOP = np.array([int(x.split(':')[0])*60 + \
                         int(x.split(':')[1]) for x in table['ht_TOP']])
    away_TOP = np.array([int(x.split(':')[0])*60 + \
                         int(x.split(':')[1]) for x in table['at_TOP']])
    assert(len(set(home_TOP + away_TOP >= 3595)) == 1 )
Exemple #4
0
def home_away_differences():
    ''' Finds the differences between teams and their opponents based on
    location.  Returns a dictionary of numpy arrays.  Ex. use:
    
    a = home_away_differences()
    plt.hist(a['Dallas Cowboys home score'], 20)
    
    which would plot a hist of how many more points the Cowboys have vs their
    opponent when the Cowboys are playing at home.  
    a['Dallas Cowboys away score'] would be their stats when they are away

    Might delete.  Too specific of a function
    '''
    table =load.getDataset()
    all_teams = get_teams(table)
    team_keys = get_team_keys()
    columns = list(table.keys())
    # ghetto, hardcoded filtering method :(
    columns = [x[3:] for x in columns if ('at' in x)]    
    columns.remove('endance')
    columns.remove('TOP')
    stat_diff = {}
    for team in team_keys:
        for stat in columns:
            for location in [(' home', 'ht_', 'at_'), (' away', 'at_', 'ht_')]:
                stat_diff[team + location[0] + ' ' + stat] = \
                    np.array(all_teams[team + location[0]][location[1] + stat]) - \
                    np.array(all_teams[team + location[0]][location[2] + stat])
    return stat_diff


    
    
    
    
    
    
Exemple #5
0
def cum_array(stat):
    '''Returns a pd series of a particular stat from the table. Input is 
    a string'''
    return load.getDataset()[stat]
Exemple #6
0
def cum_stat(stat):
    '''Gives a rough idea of the dsistribution of a stat for all games at
    a glance.  Input is a string'''
    table = load.getDataset()
    return table.describe()[stat]
Exemple #7
0
def get_team_keys():
    '''gets a list of all teams'''
    table = load.getDataset()
    teams = list(set(list(table['home_team'].values) + list(table['away_team'].values)))
    teams = [x.strip() for x in teams]
    return teams