def retrieve_data(): y_val = 'pts_scored' y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) def_data = pull_data.pull_model_features(y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features(y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features(y_val, 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features(y_val, 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] x_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None data = x_data.join(y_data, how='inner') data = data.reset_index() Y = data['pts'] x_feats = [ 'expected_pts_pg_for', '75_g_HAspread_for_floor-percentage', 'pregame_pts_pg_for', 'expected_poss_pg_for', 'expected_ppp_for', '50_game_avg_15_g_HAweight_allow_assist--per--turnover-ratio', '75_g_HAspread_allow_points-per-game', '100_g_HAspread_allow_block-pct', 'pregame_poss_pg_for', '10_game_avg_30_g_HAweight_allow_personal-foul-pct', 'expected_turnovers-per-possession_for', 'expected_offensive-rebounding-pct_for', '30_g_HAspread_for_floor-percentage', 'expected_ftm-per-100-possessions_for', 'expected_effective-field-goal-pct_for', 'pregame_effective-field-goal-pct_for', '100_g_HAspread_allow_assist--per--turnover-ratio', '30_g_HAspread_allow_floor-percentage', '10_game_avg_30_g_HAweight_allow_two-point-rate', '5_game_avg_50_g_HAweight_for_points-per-game`/`possessions-per-game', '10_game_avg_50_g_Tweight_for_effective-field-goal-pct', '30_game_avg_5_g_Tweight_for_points-per-game`/`possessions-per-game' ] X = data[x_feats] return X, Y
def raw_data(): def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_scored', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_scored', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') x_data = x_data.loc[x_data.index.isin(validation_index)] y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) team_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_allowed', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_allowed', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') opponent_data = x_data.join(tar_data, how='inner') def_data = None off_data = None poss_data = None tar_data = None cnx = update_dbs.mysql_client() cursor = cnx.cursor() query = 'SELECT * from gamedata;' cursor.execute(query) switch = pd.DataFrame(cursor.fetchall(), columns=['teamname', 'date', 'opponent', 'location']) idx_switch = {} for t, d, o, l in np.array(switch): idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_') idx = [] for idxx in opponent_data.index: idx.append(idx_switch[idxx]) opponent_data['idx'] = idx opponent_data = opponent_data.set_index('idx') opponent_data *= -1 opponent_data = opponent_data.rename( columns={i: '-' + i for i in list(opponent_data)}) data = opponent_data.join(team_data) data = data.join(y_data, how='inner') data = data.replace([np.inf, -np.inf], np.nan) data = data.replace('NULL', np.nan) data = data.dropna(how='any') return data
def update(): for y_val in ['pts_scored', 'pts_allowed']: if y_val == 'pts_scored': train_index = pull_data.update_idx(update_dbs.mysql_client(), 'offensive_preds') if y_val == 'pts_allowed': train_index = pull_data.update_idx(update_dbs.mysql_client(), 'defensive_preds') update_df = pd.DataFrame() if len(train_index) == 0: continue update_df['idx'] = train_index update_df = update_df.set_index('idx') for x_vals in [ 'defensive_stats', 'offensive_stats', 'full-team', 'all', 'possessions', 'target' ]: if x_vals in ['defensive_stats', 'offensive_stats' ] and y_val == 'pts_allowed': continue if x_vals in ['full-team', 'defensive_stats' ] and y_val == 'pts_scored': continue if x_vals == 'possessions': y_data = pull_data.pull_possessions(y_val, update_dbs.mysql_client()) x_data = pull_data.pull_model_features( y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['possessions'] x_data = x_data.join(y_data, how='inner')[list(x_data)] elif x_vals in [ 'target', 'defensive_stats', 'offensive_stats', 'full-team', 'all' ]: y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client()) if x_vals == 'full-team': def_data = pull_data.pull_model_features( y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features( y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [ i for i in list(off_data) if i not in list(def_data) ] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] off_data = None def_data = None elif x_vals == 'all': def_data = pull_data.pull_model_features( y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features( y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [ i for i in list(off_data) if i not in list(def_data) ] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features( y_val, 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features( y_val, 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None elif x_vals in ['target', 'defensive_stats', 'offensive_stats']: x_data = pull_data.pull_model_features( y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] for model_name, model_details in saved_models.stored_models[y_val][ x_vals].items(): if os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))): print('Loading %s Values' % (model_name)) model = joblib.load( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))) scale = joblib.load( os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (y_val, x_vals, model_name))) preds = model.predict( scale.fit_transform(x_data[model_details['features']])) indy_pred = pd.DataFrame() if x_vals == 'offensive_stats': indy_pred[model_name + '_team'] = preds elif y_val == 'pts_allowed' and x_vals == 'full-team': indy_pred[model_name + '_team'] = preds else: indy_pred[model_name + '_' + x_vals] = preds indy_pred['idx'] = list(x_data.index) indy_pred = indy_pred.set_index('idx') update_df = update_df.join(indy_pred, how='inner') print('Loaded %s' % (model_name)) if y_val == 'pts_scored': update_df = update_df[[ 'lightgbm_team', 'linsvm_team', 'linsvm_all', 'ridge_all', 'lasso_possessions', 'lightgbm_possessions', 'linsvm_possessions', 'lightgbm_target', 'linsvm_target', 'ridge_target', 'lasso_target' ]] add_derived.update('offensive_preds', update_df) elif y_val == 'pts_allowed': update_df = update_df[[ 'lightgbm_all', 'ridge_all', 'lasso_team', 'lightgbm_team', 'linsvm_team', 'ridge_team', 'lasso_possessions', 'lightgbm_possessions', 'ridge_possessions', 'lasso_target', 'lightgbm_target' ]] add_derived.update('defensive_preds', update_df)
away_data *= -1 home_data = patch_data[patch_data[1]==1] patch_data = home_data.append(away_data) del patch_data[1] x = patch_data.join(keep_data) print('Completed HFA Patch') return x for x_vals in ['defensive_stats', 'offensive_stats', 'full-team', 'all', 'possessions', 'target']: for y_val in ['pts_scored', 'pts_allowed']: #for each in [('all', 'pts_allowed'), ('full-team', 'pts_allowed'), ('offensive_stats', 'pts_scored'), ('all', 'pts_scored')]: #for each in [('target', 'pts_allowed'), ('target', 'pts_scored')]: # x_vals, y_val = each if x_vals == 'possessions': y_data = pull_data.pull_possessions(y_val, update_dbs.mysql_client()) x_data = pull_data.pull_model_features(y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how = 'inner')['possessions'] x_data = x_data.join(y_data, how = 'inner')[list(x_data)] elif x_vals in ['target', 'defensive_stats', 'offensive_stats', 'full-team', 'all']: y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client()) if x_vals == 'full-team': def_data = pull_data.pull_model_features(y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features(y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)]
def save(): def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat) patch_data = x[patch_stats] keep_data = x[keep_stats] cursor = cnx.cursor() query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;' cursor.execute(query) patch = pd.DataFrame(cursor.fetchall(), columns=['date', 't1', 't2', 'location']) cursor.close() loc_adj = {} for d, t1, t2, l in np.array(patch): if l == 0: loc_adj[str(d) + t1.replace(' ', '_')] = 1 loc_adj[str(d) + t2.replace(' ', '_')] = -1 else: loc_adj[str(d) + t1.replace(' ', '_')] = -1 loc_adj[str(d) + t2.replace(' ', '_')] = 1 patch = None patch_data = patch_data.join(pd.DataFrame.from_dict( list(loc_adj.items())).set_index(0), how='left') away_data = patch_data[patch_data[1] == -1] away_data *= -1 home_data = patch_data[patch_data[1] == 1] patch_data = home_data.append(away_data) del patch_data[1] x = patch_data.join(keep_data) print('...Completed HFA Patch') return x for y_val in ['pts_scored', 'pts_allowed']: for x_vals in [ 'defensive_stats', 'offensive_stats', 'full-team', 'all', 'possessions', 'target' ]: if x_vals in ['defensive_stats', 'offensive_stats' ] and y_val == 'pts_allowed': continue if x_vals in ['full-team', 'defensive_stats' ] and y_val == 'pts_scored': continue if x_vals == 'possessions': y_data = pull_data.pull_possessions(y_val, update_dbs.mysql_client()) x_data = pull_data.pull_model_features( y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['possessions'] x_data = x_data.join(y_data, how='inner')[list(x_data)] elif x_vals in [ 'target', 'defensive_stats', 'offensive_stats', 'full-team', 'all' ]: y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client()) if x_vals == 'full-team': def_data = pull_data.pull_model_features( y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features( y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [ i for i in list(off_data) if i not in list(def_data) ] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] off_data = None def_data = None elif x_vals == 'all': def_data = pull_data.pull_model_features( y_val, 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features( y_val, 'offensive_stats', update_dbs.mongodb_client) off_feats = [ i for i in list(off_data) if i not in list(def_data) ] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features( y_val, 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features( y_val, 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None elif x_vals in ['target', 'defensive_stats', 'offensive_stats']: x_data = pull_data.pull_model_features( y_val, x_vals, update_dbs.mongodb_client) x_data = hfa_patch(x_data, update_dbs.mysql_client()) train_index = pull_data.pull_train_index( update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = x_data.join(y_data, how='inner')['ppp'] x_data = x_data.join(y_data, how='inner')[list(x_data)] for model_name, model_details in saved_models.stored_models[y_val][ x_vals].items(): if not os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))): print('Loading %s Values' % (model_name)) model = model_details['model'] scale = model_details['scale'] scale.fit(x_data[model_details['features']]) joblib.dump( scale, os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (y_val, x_vals, model_name))) model.fit( scale.transform(x_data[model_details['features']]), np.ravel(y_data)) joblib.dump( model, os.path.join( model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))) print('Stored %s' % (model_name))