def keras_preds(): raw_x = raw_data() x_score = pull_data.score(update_dbs.mysql_client()) raw_x = raw_x.join(x_score, how='inner') line = pull_data.pull_odds_data(update_dbs.mysql_client()) idx = [] gameline = [] line_data = line[['fav_idx', 'dog_idx', 'line']] for fix, dix, ln in np.array(line_data): idx.append(fix) idx.append(dix) gameline.append(ln) gameline.append(ln * -1) linedata = pd.DataFrame() linedata['idx'] = idx linedata['vegas_line'] = gameline linedata = linedata.set_index('idx') idx = [] gameou = [] ou_data = line[['fav_idx', 'dog_idx', 'overunder']] for fix, dix, ou in np.array(ou_data): idx.append(fix) idx.append(dix) gameou.append(ou) gameou.append(ou * -1) oudata = pd.DataFrame() oudata['idx'] = idx oudata['vegas_ou'] = gameou oudata = oudata.set_index('idx') raw_x = raw_x.join(oudata, how='inner') raw_x = raw_x.join(linedata, how='inner') y_wl = pull_data.pull_wl(update_dbs.mysql_client()) x_ou = pull_data.ou_preds(update_dbs.mysql_client()) y_ou = pull_data.ou_wl(update_dbs.mysql_client()) y_line = pull_data.line_wl(update_dbs.mysql_client()) x_line = pull_data.line_preds(update_dbs.mysql_client()) all_x_data = { 'winner': { 'raw': raw_x.join(y_wl, how='inner') }, 'line': { 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner') }, 'ou': { 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner') }, } all_y_data = { 'winner': { 'raw': raw_x.join(y_wl, how='inner')['outcome'] }, 'line': { 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'] }, 'ou': { 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'] }, } raw_x = None x_score = None y_wl = None x_ou = None y_ou = None y_line = None x_line = None random.seed(86) for sort in ['keras']: print('... starting %s' % (sort)) for kind in ['winner', 'ou', 'line']: results = pd.read_csv( os.path.join(output_folder, '%s_results.csv' % (kind))) results = results.set_index('idx') print('... starting %s' % (kind)) X = all_x_data[kind]['raw'] save_index = list(X.index) X = X.reset_index() X = X[saved_models.stored_models[kind]['raw'][sort]['features']] Y = all_y_data[kind]['raw'] Y = Y.reset_index() if kind != 'winner': Y = Y[kind] else: Y = Y['outcome'] print('...loading %s' % (kind)) model = load_model( os.path.join(model_storage, '%s_%s_regression_model.h5' % (kind, sort))) scale = joblib.load( os.path.join(model_storage, '%s_%s_regression_scaler.pkl' % (kind, sort))) preds = model.predict(scale.transform(X)) winner = [] confidence = [] for game in preds: if game[0] < .5: winner.append(0) confidence.append(1 - game[0]) else: winner.append(1) confidence.append(game[0]) model_outcome = pd.DataFrame() model_outcome['idx'] = save_index model_outcome['raw_keras_prediction'] = winner model_outcome['raw_keras_confidence'] = confidence model_outcome = model_outcome.set_index('idx') results = results.join(model_outcome, how='inner') results.to_csv( os.path.join(output_folder, '%s_results.csv' % (kind))) print('Finished %s' % (kind)) print('Finished %s' % (sort))
opponent_data['idx'] = idx opponent_data = opponent_data.set_index('idx') opponent_data *= -1 opponent_data = opponent_data.rename( columns={i: '-' + i for i in list(opponent_data)}) data = opponent_data.join(team_data) data = data.join(y_data, how='inner') data = data.replace([np.inf, -np.inf], np.nan) data = data.replace('NULL', np.nan) data = data.dropna(how='any') return data raw_x = raw_data() x_score = pull_data.score(update_dbs.mysql_client()) y_wl = pull_data.pull_wl(update_dbs.mysql_client()) x_ou = pull_data.ou_preds(update_dbs.mysql_client()) y_ou = pull_data.ou_wl(update_dbs.mysql_client()) y_line = pull_data.line_wl(update_dbs.mysql_client()) x_line = pull_data.line_preds(update_dbs.mysql_client()) all_x_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner'), 'raw': raw_x.join(y_wl, how='inner'), }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner'), 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'), },
def save(): train_index = pull_data.pull_train_index(update_dbs.mysql_client()) random.seed(86) random.shuffle(train_index) def hfa_patch(x, cnx): print('Running HFA Patch') keep_stats = [] patch_stats = [] for stat in list(x): try: stat.split('_HAspread_')[1] patch_stats.append(stat) except IndexError: keep_stats.append(stat) patch_data = x[patch_stats] keep_data = x[keep_stats] cursor = cnx.cursor() query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;' cursor.execute(query) patch = pd.DataFrame(cursor.fetchall(), columns=['date', 't1', 't2', 'location']) cursor.close() loc_adj = {} for d, t1, t2, l in np.array(patch): if l == 0: loc_adj[str(d) + t1.replace(' ', '_')] = 1 loc_adj[str(d) + t2.replace(' ', '_')] = -1 else: loc_adj[str(d) + t1.replace(' ', '_')] = -1 loc_adj[str(d) + t2.replace(' ', '_')] = 1 patch = None patch_data = patch_data.join(pd.DataFrame.from_dict( list(loc_adj.items())).set_index(0), how='left') away_data = patch_data[patch_data[1] == -1] away_data *= -1 home_data = patch_data[patch_data[1] == 1] patch_data = home_data.append(away_data) del patch_data[1] x = patch_data.join(keep_data) print('Completed HFA Patch') return x def raw_data(): def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_scored', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_scored', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') x_data = x_data.join(tar_data, how='inner') train_index = pull_data.pull_train_index(update_dbs.mysql_client()) x_data = x_data.loc[x_data.index.isin(train_index)] y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client()) team_data = x_data.join(y_data, how='inner')[list(x_data)] def_data = None off_data = None poss_data = None tar_data = None def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats', update_dbs.mongodb_client) def_data = hfa_patch(def_data, update_dbs.mysql_client()) off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats', update_dbs.mongodb_client) off_feats = [i for i in list(off_data) if i not in list(def_data)] off_data = off_data[off_feats] off_data = hfa_patch(off_data, update_dbs.mysql_client()) poss_data = pull_data.pull_model_features('pts_allowed', 'possessions', update_dbs.mongodb_client) poss_data = hfa_patch(poss_data, update_dbs.mysql_client()) tar_data = pull_data.pull_model_features('pts_allowed', 'target', update_dbs.mongodb_client) tar_data = hfa_patch(tar_data, update_dbs.mysql_client()) x_data = def_data.join(off_data, how='inner') x_data = x_data.join(poss_data, how='inner') opponent_data = x_data.join(tar_data, how='inner') def_data = None off_data = None poss_data = None tar_data = None cnx = update_dbs.mysql_client() cursor = cnx.cursor() query = 'SELECT * from gamedata;' cursor.execute(query) switch = pd.DataFrame( cursor.fetchall(), columns=['teamname', 'date', 'opponent', 'location']) idx_switch = {} for t, d, o, l in np.array(switch): idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_') idx = [] for idxx in opponent_data.index: idx.append(idx_switch[idxx]) opponent_data['idx'] = idx opponent_data = opponent_data.set_index('idx') opponent_data *= -1 opponent_data = opponent_data.rename( columns={i: '-' + i for i in list(opponent_data)}) data = opponent_data.join(team_data) data = data.join(y_data, how='inner') data = data.replace([np.inf, -np.inf], np.nan) data = data.replace('NULL', np.nan) data = data.dropna(how='any') return data raw_x = raw_data() x_score = pull_data.score(update_dbs.mysql_client()) y_wl = pull_data.pull_wl(update_dbs.mysql_client()) x_ou = pull_data.ou_preds(update_dbs.mysql_client()) y_ou = pull_data.ou_wl(update_dbs.mysql_client()) y_line = pull_data.line_wl(update_dbs.mysql_client()) x_line = pull_data.line_preds(update_dbs.mysql_client()) all_x_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner'), 'raw': raw_x.join(y_wl, how='inner'), }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner'), 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'), }, 'ou': { '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner'), 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner'), }, } all_y_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner')['outcome'], 'raw': raw_x.join(y_wl, how='inner')['outcome'], }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner')['line'], 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'], }, 'ou': { '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner')['ou'], 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'], }, } raw_x = None x_score = None y_wl = None x_ou = None y_ou = None y_line = None x_line = None random.seed(86) for sort in ['ou', 'winner', 'line']: print('... starting %s' % (sort)) for kind in ['raw', '+pts']: print('... starting %s' % (kind)) for model_name, model_details in saved_models.stored_models[sort][ kind].items(): if model_name == 'keras': continue if not os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (sort, kind, model_name))): print('...storing %s' % (model_name)) model = model_details['model'] scale = model_details['scale'] scale.fit( all_x_data[sort][kind][model_details['features']]) joblib.dump( scale, os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (sort, kind, model_name))) model.fit( scale.transform( all_x_data[sort][kind][model_details['features']]), np.ravel(all_y_data[sort][kind])) joblib.dump( model, os.path.join( model_storage, '%s_%s_%s_model.pkl' % (sort, kind, model_name))) print('Stored %s' % (model_name)) print('Finished %s' % (kind)) print('Finished %s' % (sort))
def sklearn_preds(): raw_x = raw_data() x_score = pull_data.score(update_dbs.mysql_client()) y_wl = pull_data.pull_wl(update_dbs.mysql_client()) x_ou = pull_data.ou_preds(update_dbs.mysql_client()) y_ou = pull_data.ou_wl(update_dbs.mysql_client()) y_line = pull_data.line_wl(update_dbs.mysql_client()) x_line = pull_data.line_preds(update_dbs.mysql_client()) all_x_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner'), 'raw': raw_x.join(y_wl, how='inner'), }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner'), 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'), }, 'ou': { '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner'), 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner'), }, } all_y_data = { 'winner': { '+pts': x_score.join(y_wl, how='inner')['outcome'], 'raw': raw_x.join(y_wl, how='inner')['outcome'], }, 'line': { '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner')['line'], 'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'], }, 'ou': { '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner')['ou'], 'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'], }, } raw_x = None x_score = None y_wl = None x_ou = None y_ou = None y_line = None x_line = None random.seed(86) for sort in ['ou', 'winner', 'line']: outcomes = pd.DataFrame() # outcomes[sort] = np.ravel(all_y_data[sort]['raw']) outcomes['idx'] = list(all_y_data[sort]['raw'].index) outcomes = outcomes.set_index('idx') print('... starting %s' % (sort)) for kind in ['raw', '+pts']: print('... starting %s' % (kind)) for model_name, model_details in saved_models.stored_models[sort][ kind].items(): if model_name == 'keras': continue if os.path.isfile( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (sort, kind, model_name))): print('Evaluating %s ' % (model_name)) model = joblib.load( os.path.join( model_storage, '%s_%s_%s_model.pkl' % (sort, kind, model_name))) scale = joblib.load( os.path.join( model_storage, '%s_%s_%s_scaler.pkl' % (sort, kind, model_name))) preds = model.predict_proba( scale.transform( all_x_data[sort][kind][model_details['features']])) model_outcome = pd.DataFrame() winner = [] confidence = [] for game in preds: if game[0] > game[1]: winner.append(0) confidence.append(game[0]) else: winner.append(1) confidence.append(game[1]) # print('Accuracy: %s' % (accuracy_score(np.ravel(all_y_data[sort][kind]), winner))) # print('Log Loss: %s' % (log_loss(np.ravel(all_y_data[sort][kind]), preds))) model_outcome['idx'] = list(all_x_data[sort][kind][ model_details['features']].index) model_outcome['%s_%s_prediction' % (kind, model_name)] = winner model_outcome['%s_%s_confidence' % (kind, model_name)] = confidence model_outcome = model_outcome.set_index('idx') outcomes = outcomes.join(model_outcome, how='inner') print('Finished %s' % (kind)) print('Finished %s' % (sort)) outcomes.to_csv(os.path.join(output_folder, '%s_results.csv' % (sort)))
import linsvc_tuning import knn_tuning import feature_lists import rbfsvc_tuning import polysvc_tuning import random train_index = pull_data.pull_train_index(update_dbs.mysql_client()) #cnx = update_dbs.mysql_client() random.seed(86) random.shuffle(train_index) derived_data = {} x_vals = 'points' y_val = '+pts' x_data_stable = pull_data.score(update_dbs.mysql_client()) x_cols = list(x_data_stable) x_cols.remove('+pts') x_cols.remove('+possessions') x_cols.remove('-possessions') y_data_stable = pull_data.pull_wl(update_dbs.mysql_client()) alldata = y_data_stable.join(x_data_stable, how = 'inner') y_data = alldata['outcome'] #x_data = x_data_stable.join(y_data_stable, how = 'inner')[x_cols] #result = lgclass_tuning.execute(y_val, x_vals, X_data = x_data, Y_data = y_data) #print("Best %s %s score: %s" % (x_vals, y_val, result)) x_data = x_data_stable[x_cols] result = knn_tuning.execute(y_val, x_vals, X_data = x_data, Y_data = y_data) print("Best %s %s score: %s" % (x_vals, y_val, result))