Ejemplo n.º 1
0
def proj_vs_actual(start_date, end_date):
    compare = None
    days = (end_date - start_date).days
    for offset in range(days + 1):
        cur_date = start_date + dt.timedelta(days = offset)
        try:
            stats = pd.read_csv(format_fpath('stat', cur_date))
        except:
            continue
        stats.loc[:,'Starters'] = stats.Starters.apply(lambda x: format_name(x))
        lineups = pd.read_csv(format_fpath('line',cur_date))
        lineups.loc[:,'Name'] = lineups.Name.apply(lambda x: format_name(x))
        combo = lineups.join(stats.set_index('Starters').FP.rename('actual'), on = 'Name').sort_values('Name').set_index('Name')
        combo['date'] = cur_date
        mapper = player_team_map()
        listed = combo.index.to_series().apply(lambda x: x in mapper.index)
        combo['PTeam'] = 'UNK'
        combo.loc[listed, 'PTeam'] = combo[listed].index.to_series().apply(lambda x: mapper.loc[x]).values
        combo['Loc'] = combo.apply(lambda x: 'Home' if x.PTeam == x.Team else 'Away', axis = 1)
        combo['Name'] = combo.index.values
        combo.index = range(len(combo))
        away = combo[combo.Loc == 'Away'].index
        temp = combo.loc[away,'Team'] 
        combo.loc[away,'Team'] = combo.loc[away,'Opp'] 
        combo.loc[away,'Opp'] = temp
        combo.set_index('Name')
        compare = combo if compare is None else compare.append(combo)
        compare.loc[:,'date'] = pd.to_datetime(compare.date)
    return compare
Ejemplo n.º 2
0
def boxStats(date):
    try:
        games = get_games(date)
        url_base = 'https://www.basketball-reference.com/boxscores/{}0{}.html'
        stats = None

        for home, away in games:
            t = pd.read_html(url_base.format(date.strftime('%Y%m%d'),home))
            for idx in [0, int(len(t)/2)]:
                temp = t[idx]
                temp.columns = temp.columns.droplevel(0)
                temp = temp.set_index('Starters').drop('Reserves').drop('Team Totals').fillna(0)
                if idx == 0:
                    temp['Loc'] = 'Away'
                    temp['Team'] = away
                    temp['Opp'] = home
                else: 
                    temp['Loc'] = 'Home'
                    temp['Team'] = home
                    temp['Opp'] = away
                stats = temp if stats is None else stats.append(temp)

        stats.drop(stats[stats.MP.str[:3] == 'Did'].index, inplace=True)
        stats.drop(stats[stats.MP.str[:3] == 'Not'].index, inplace=True)
        stats['FP'] = stats.PTS.astype('int') + stats.TRB.astype('int') * 1.2 + stats.AST.astype('int') * 1.5 + stats.BLK.astype('int') * 3 + stats.STL.astype('int') * 3 - stats.TOV.astype('int')
        stats.to_csv(format_fpath('stat', date))

    except:
        print('No games for {}'.format(date))
        traceback.print_exc(file = sys.stdout)
Ejemplo n.º 3
0
def main():
    url = "https://www.numberfire.com/nba/daily-fantasy/daily-basketball-projections"
    soup = BeautifulSoup(requests.get(url).text, features="lxml")
    table = soup.find_all("table")
    row_marker = 0
    data = []
    for row in table[3].find_all("tr"):
        columns = row.find_all("td")
        data.append([x.get_text().strip() for x in columns])

    cols = "Player,FP,Cost,Value,Min,Pts,Reb,Ast,Stl,Blk,TO".split(",")
    df = pd.DataFrame(data, columns=cols).dropna()

    player_data = df.Player.apply(parse_player)
    df = df.join(
        pd.DataFrame(
            player_data.to_list(),
            index=player_data.index,
            columns=["Name", "Pos", "Team", "Opp", "Game"],
        )).drop("Player", axis=1)

    for pos in df.Pos.unique():
        col = 'is{}'.format(pos)
        df[col] = (df.Pos == pos).astype('int').to_numpy()

    df.loc[:, 'Cost'] = df.Cost.apply(lambda x: int(''.join(x[1:].split(','))))

    df.to_csv(format_fpath('proj'), index=False)
Ejemplo n.º 4
0
def generate(date=dt.date.today(), lineups=25, to_file=True):

    data = get_proj(date)
    fp_col = 'FP'
    df = data.copy()
    pos_mat = np.transpose(df.loc[:, ('isPG', 'isSG', 'isSF', 'isPF',
                                      'isC')].to_numpy())
    cur_proj = df[fp_col].copy()
    b = np.array([2, 2, 2, 2, 1])
    sal_max = 60000
    x = cp.Variable(len(df), boolean=True)
    salary_columns = 'Cost'
    sal = df[salary_columns].to_numpy()
    selections = None

    for round in range(1, lineups + 1):
        c = cur_proj.to_numpy()
        objective = cp.Maximize(x.T @ c)
        constraints = [pos_mat @ x == b, x >= 0, x <= 1,
                       x.T @ sal <= sal_max]  #pos_mat @ x >= b_low
        prob = cp.Problem(objective, constraints)
        prob.solve(solver='GLPK_MI')
        picks = df.iloc[x.value == 1].copy()
        picks['round'] = round
        selections = picks if selections is None else selections.append(picks)
        cur_proj.loc[picks.index] = cur_proj.loc[picks.index].values * .95

    if to_file:
        selections.to_csv(format_fpath('line', date), index=False)
    else:
        return selections
Ejemplo n.º 5
0
def statRange(start_date, end_date):
    stats = None
    for date in pd.date_range(start_date,end_date):
        try:
            temp = pd.read_csv(format_fpath('stat',date.date()))
            temp['Date'] = date
        except:
            continue
        stats = temp if stats is None else stats.append(temp)
    return stats
Ejemplo n.º 6
0
def build_feature_set(date = dt.date.today()):
    proj = pd.read_csv(format_fpath('proj', date))
    team_translation(proj)
    teams = proj.Team.drop_duplicates()
    hist = game_data(date - dt.timedelta(days = 1), 15)
    offense, defense = calc_ratings(hist)
    def_dict = defense.to_dict()
    off_dict = offense.to_dict()
    off_def = teams.apply(lambda x: off_dict[x]).rename('ortg').to_frame().join(teams.apply(lambda x: def_dict[x]).rename('drtg')).mean()
    lineups = pd.read_csv(format_fpath('line', date))
    team_translation(lineups)
    lineups['ortg'] = off_def.ortg
    lineups['drtg'] = off_def.drtg
    lineups['Games'] = len(teams)/2
    enc = oneHotTeams(defense)
    defense_mat = enc.transform(np.reshape(lineups.Opp.to_numpy(),(-1,1)))
    offense_mat = enc.transform(np.reshape(lineups.Team.to_numpy(),(-1,1)))
    lineups['l_drtg'] = np.reshape(np.matmul(defense_mat,np.reshape(defense.values,(-1,1))),(-1))
    lineups['l_ortg'] = np.reshape(np.matmul(offense_mat,np.reshape(offense.values,(-1,1))),(-1))
    lineups['dscore'] = np.reshape(np.matmul(defense_mat, np.reshape(pd.read_csv(format_fpath('score',date - dt.timedelta(days = 1))).set_index('Defense').values,(-1,1))),(-1))
    return lineups
Ejemplo n.º 7
0
def fp_score(cur_date, lookback):
    stats = statRange(cur_date - dt.timedelta(days = lookback),cur_date)
    df = game_data(cur_date,lookback)
    df = df.join(df.index.to_frame()).set_index(['Date', 'Offense']).join(stats.groupby(['Date','Team']).sum().FP, on = ['Date', 'Offense']).dropna()
    offense = df.groupby('Offense').mean().FP.rename('ortg')
    defense = df.groupby('Defense').mean().FP.rename('drtg')
    for _ in range(20):
        mapper = df.join(offense, on = 'Offense').join(defense, on = 'Defense')
        mapper['new-ortg'] = mapper.FP * 2 - mapper['drtg']
        mapper['new-drtg'] = mapper.FP * 2 - mapper['ortg']
        mapper.drtg = (mapper['new-drtg'] + mapper.drtg)/2
        mapper.ortg = (mapper['new-ortg'] + mapper.ortg)/2
        offense = mapper.groupby('Offense').mean().ortg
        defense = mapper.groupby('Defense').mean().drtg
    defense.to_csv(format_fpath('score',cur_date))
Ejemplo n.º 8
0
def get_proj(date):
    df = pd.read_csv(format_fpath('proj', date))
    df.loc[:, 'Name'] = df.Name.apply(lambda x: format_name(x))
    mapper = player_team_map()
    listed = df.Name.apply(lambda x: x in mapper.index)
    df['PTeam'] = 'UNK'
    df.loc[listed,
           'PTeam'] = df.loc[listed].Name.apply(lambda x: mapper.loc[x]).values
    df['Loc'] = df.apply(lambda x: 'Home' if x.PTeam == x.Team else 'Away',
                         axis=1)
    df['Name'] = df.index.values
    df.index = range(len(df))
    away = df[df.Loc == 'Away'].index
    temp = df.loc[away, 'Team']
    df.loc[away, 'Team'] = df.loc[away, 'Opp']
    df.loc[away, 'Opp'] = temp
    df.set_index('Name')
    return df