Example #1
0
def update(name, data):
    #    name, data = 'offensive_preds', update_df
    cnx = update_dbs.mysql_client()
    cursor = cnx.cursor()
    insertlist = []
    continuance = 0
    for idx, entry in zip(list(data.index), np.array(data)):
        insert = list(entry)
        #        idx = insert[0]
        date = '"' + idx[:10] + '"'
        tname = '"' + idx[10:].replace('_', ' ') + '"'
        #        insert = insert[1:]
        sql_insert = []
        sql_insert.append(tname)
        sql_insert.append(date)
        for each in insert:
            sql_insert.append(str(each))
        sql_insert = '(' + ', '.join(sql_insert) + ')'
        insertlist.append(sql_insert)
        continuance += 1
        if continuance == 500:
            #            break
            insertlist = ', '.join(insertlist)
            oddslist = ['INSERT INTO %s VALUES ' % (name), insertlist, ';']
            initialoddsinsert = ' '.join(oddslist)
            add_odds = initialoddsinsert
            cursor.execute('SET foreign_key_checks = 0;')
            try:
                cursor.execute(add_odds)
                cnx.commit()
                print(entry)
            except:
                print(entry)
                pass
            cursor.execute('SET foreign_key_checks = 1;')
            insertlist = []
            continuance = 0
    insertlist = ', '.join(insertlist)
    oddslist = ['INSERT INTO %s VALUES ' % (name), insertlist, ';']
    initialoddsinsert = ' '.join(oddslist)
    add_odds = initialoddsinsert
    cursor.execute('SET foreign_key_checks = 0;')
    try:
        cursor.execute(add_odds)
        cnx.commit()
        print(entry)
    except:
        pass
    cursor.execute('SET foreign_key_checks = 1;')
    insertlist = []
    continuance = 0
def retrieve_data():
    y_val = 'pts_scored'
    y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
    def_data = pull_data.pull_model_features(y_val, 'defensive_stats',
                                             update_dbs.mongodb_client)
    def_data = hfa_patch(def_data, update_dbs.mysql_client())
    off_data = pull_data.pull_model_features(y_val, 'offensive_stats',
                                             update_dbs.mongodb_client)
    off_feats = [i for i in list(off_data) if i not in list(def_data)]
    off_data = off_data[off_feats]
    off_data = hfa_patch(off_data, update_dbs.mysql_client())
    poss_data = pull_data.pull_model_features(y_val, 'possessions',
                                              update_dbs.mongodb_client)
    poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
    tar_data = pull_data.pull_model_features(y_val, 'target',
                                             update_dbs.mongodb_client)
    tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
    x_data = def_data.join(off_data, how='inner')
    x_data = x_data.join(poss_data, how='inner')
    x_data = x_data.join(tar_data, how='inner')
    train_index = pull_data.pull_train_index(update_dbs.mysql_client())
    x_data = x_data.loc[x_data.index.isin(train_index)]
    x_data = x_data.join(y_data, how='inner')[list(x_data)]
    def_data = None
    off_data = None
    poss_data = None
    tar_data = None
    data = x_data.join(y_data, how='inner')
    data = data.reset_index()
    Y = data['pts']
    x_feats = [
        'expected_pts_pg_for', '75_g_HAspread_for_floor-percentage',
        'pregame_pts_pg_for', 'expected_poss_pg_for', 'expected_ppp_for',
        '50_game_avg_15_g_HAweight_allow_assist--per--turnover-ratio',
        '75_g_HAspread_allow_points-per-game',
        '100_g_HAspread_allow_block-pct', 'pregame_poss_pg_for',
        '10_game_avg_30_g_HAweight_allow_personal-foul-pct',
        'expected_turnovers-per-possession_for',
        'expected_offensive-rebounding-pct_for',
        '30_g_HAspread_for_floor-percentage',
        'expected_ftm-per-100-possessions_for',
        'expected_effective-field-goal-pct_for',
        'pregame_effective-field-goal-pct_for',
        '100_g_HAspread_allow_assist--per--turnover-ratio',
        '30_g_HAspread_allow_floor-percentage',
        '10_game_avg_30_g_HAweight_allow_two-point-rate',
        '5_game_avg_50_g_HAweight_for_points-per-game`/`possessions-per-game',
        '10_game_avg_50_g_Tweight_for_effective-field-goal-pct',
        '30_game_avg_5_g_Tweight_for_points-per-game`/`possessions-per-game'
    ]
    X = data[x_feats]
    return X, Y
Example #3
0
def save():
    train_index = pull_data.pull_train_index(update_dbs.mysql_client())
    random.seed(86)
    random.shuffle(train_index)

    def hfa_patch(x, cnx):
        print('Running HFA Patch')
        keep_stats = []
        patch_stats = []
        for stat in list(x):
            try:
                stat.split('_HAspread_')[1]
                patch_stats.append(stat)
            except IndexError:
                keep_stats.append(stat)

        patch_data = x[patch_stats]
        keep_data = x[keep_stats]
        cursor = cnx.cursor()
        query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;'
        cursor.execute(query)
        patch = pd.DataFrame(cursor.fetchall(),
                             columns=['date', 't1', 't2', 'location'])
        cursor.close()

        loc_adj = {}
        for d, t1, t2, l in np.array(patch):
            if l == 0:
                loc_adj[str(d) + t1.replace(' ', '_')] = 1
                loc_adj[str(d) + t2.replace(' ', '_')] = -1
            else:
                loc_adj[str(d) + t1.replace(' ', '_')] = -1
                loc_adj[str(d) + t2.replace(' ', '_')] = 1
        patch = None

        patch_data = patch_data.join(pd.DataFrame.from_dict(
            list(loc_adj.items())).set_index(0),
                                     how='left')
        away_data = patch_data[patch_data[1] == -1]
        away_data *= -1
        home_data = patch_data[patch_data[1] == 1]
        patch_data = home_data.append(away_data)
        del patch_data[1]
        x = patch_data.join(keep_data)
        print('Completed HFA Patch')
        return x

    def raw_data():
        def_data = pull_data.pull_model_features('pts_scored',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_scored',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_scored', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_scored', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        x_data = x_data.join(tar_data, how='inner')
        train_index = pull_data.pull_train_index(update_dbs.mysql_client())
        x_data = x_data.loc[x_data.index.isin(train_index)]
        y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
        team_data = x_data.join(y_data, how='inner')[list(x_data)]
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        def_data = pull_data.pull_model_features('pts_allowed',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_allowed',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_allowed', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_allowed', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        opponent_data = x_data.join(tar_data, how='inner')
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        cnx = update_dbs.mysql_client()
        cursor = cnx.cursor()
        query = 'SELECT * from gamedata;'
        cursor.execute(query)
        switch = pd.DataFrame(
            cursor.fetchall(),
            columns=['teamname', 'date', 'opponent', 'location'])
        idx_switch = {}
        for t, d, o, l in np.array(switch):
            idx_switch[str(d) +
                       t.replace(' ', '_')] = str(d) + o.replace(' ', '_')
        idx = []
        for idxx in opponent_data.index:
            idx.append(idx_switch[idxx])
        opponent_data['idx'] = idx
        opponent_data = opponent_data.set_index('idx')
        opponent_data *= -1
        opponent_data = opponent_data.rename(
            columns={i: '-' + i
                     for i in list(opponent_data)})
        data = opponent_data.join(team_data)
        data = data.join(y_data, how='inner')
        data = data.replace([np.inf, -np.inf], np.nan)
        data = data.replace('NULL', np.nan)
        data = data.dropna(how='any')
        return data

    raw_x = raw_data()
    x_score = pull_data.score(update_dbs.mysql_client())
    y_wl = pull_data.pull_wl(update_dbs.mysql_client())
    x_ou = pull_data.ou_preds(update_dbs.mysql_client())
    y_ou = pull_data.ou_wl(update_dbs.mysql_client())
    y_line = pull_data.line_wl(update_dbs.mysql_client())
    x_line = pull_data.line_preds(update_dbs.mysql_client())

    all_x_data = {
        'winner': {
            '+pts': x_score.join(y_wl, how='inner'),
            'raw': raw_x.join(y_wl, how='inner'),
        },
        'line': {
            '+pts': x_score.join(y_line, how='inner').join(x_line,
                                                           how='inner'),
            'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'),
        },
        'ou': {
            '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner'),
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner'),
        },
    }

    all_y_data = {
        'winner': {
            '+pts': x_score.join(y_wl, how='inner')['outcome'],
            'raw': raw_x.join(y_wl, how='inner')['outcome'],
        },
        'line': {
            '+pts':
            x_score.join(y_line, how='inner').join(x_line,
                                                   how='inner')['line'],
            'raw':
            raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'],
        },
        'ou': {
            '+pts': x_score.join(y_ou, how='inner').join(x_ou,
                                                         how='inner')['ou'],
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'],
        },
    }

    raw_x = None
    x_score = None
    y_wl = None
    x_ou = None
    y_ou = None
    y_line = None
    x_line = None
    random.seed(86)

    for sort in ['ou', 'winner', 'line']:
        print('... starting %s' % (sort))
        for kind in ['raw', '+pts']:
            print('... starting %s' % (kind))
            for model_name, model_details in saved_models.stored_models[sort][
                    kind].items():
                if model_name == 'keras':
                    continue
                if not os.path.isfile(
                        os.path.join(
                            model_storage, '%s_%s_%s_model.pkl' %
                            (sort, kind, model_name))):
                    print('...storing %s' % (model_name))

                    model = model_details['model']
                    scale = model_details['scale']

                    scale.fit(
                        all_x_data[sort][kind][model_details['features']])
                    joblib.dump(
                        scale,
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_scaler.pkl' % (sort, kind, model_name)))

                    model.fit(
                        scale.transform(
                            all_x_data[sort][kind][model_details['features']]),
                        np.ravel(all_y_data[sort][kind]))
                    joblib.dump(
                        model,
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_model.pkl' % (sort, kind, model_name)))

                    print('Stored %s' % (model_name))
            print('Finished %s' % (kind))
        print('Finished %s' % (sort))
Example #4
0
def save():
    train_index = pull_data.pull_train_index(update_dbs.mysql_client())
    for x_vals in ['offense', 'defense']:
        for y_val in ['pace', 'ppp']:
            if y_val == 'ppp':
                data = pull_data.ppp(update_dbs.mysql_client(), x_vals)
                y_data = data[[y_val]]
                x_feats = list(data)
                x_feats.remove(y_val)
                x_data = data[x_feats]
                data = x_data.join(y_data, how='inner')
                data = data.loc[data.index.isin(train_index)]
                x_data = data[x_feats]
                y_data = data[[y_val]]
            elif y_val == 'pace':
                data = pull_data.pace(update_dbs.mysql_client(), x_vals)
                y_data = data[['possessions']]
                x_feats = list(data)
                x_feats.remove('possessions')
                x_data = data[x_feats]
                data = x_data.join(y_data, how='inner')
                data = data.loc[data.index.isin(train_index)]
                x_data = data[x_feats]
                y_data = data[['possessions']]

            if not os.path.isfile(
                    os.path.join(
                        model_storage, '%s_%s_regression_model.pkl' %
                        (y_val, x_vals))):
                print('Loading %s_%s' % (x_vals, y_val))
                model = saved_models.stored_models[x_vals][y_val]['model']
                scale = saved_models.stored_models[x_vals][y_val]['scale']
                scale.fit(x_data[saved_models.stored_models[x_vals][y_val]
                                 ['features']])
                joblib.dump(
                    scale,
                    os.path.join(
                        model_storage,
                        '%s_%s_regression_scaler.pkl' % (y_val, x_vals)))
                model.fit(
                    scale.transform(x_data[saved_models.stored_models[x_vals]
                                           [y_val]['features']]),
                    np.ravel(y_data))
                joblib.dump(
                    model,
                    os.path.join(
                        model_storage,
                        '%s_%s_regression_model.pkl' % (y_val, x_vals)))
                print('Stored %s_%s' % (x_vals, y_val))

    def hfa_patch(x, cnx):
        print('Running HFA Patch')
        keep_stats = []
        patch_stats = []
        for stat in list(x):
            try:
                stat.split('_HAspread_')[1]
                patch_stats.append(stat)
            except IndexError:
                keep_stats.append(stat)

        patch_data = x[patch_stats]
        keep_data = x[keep_stats]
        cursor = cnx.cursor()
        query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;'
        cursor.execute(query)
        patch = pd.DataFrame(cursor.fetchall(),
                             columns=['date', 't1', 't2', 'location'])
        cursor.close()

        loc_adj = {}
        for d, t1, t2, l in np.array(patch):
            if l == 0:
                loc_adj[str(d) + t1.replace(' ', '_')] = 1
                loc_adj[str(d) + t2.replace(' ', '_')] = -1
            else:
                loc_adj[str(d) + t1.replace(' ', '_')] = -1
                loc_adj[str(d) + t2.replace(' ', '_')] = 1
        patch = None

        patch_data = patch_data.join(pd.DataFrame.from_dict(
            list(loc_adj.items())).set_index(0),
                                     how='left')
        away_data = patch_data[patch_data[1] == -1]
        away_data *= -1
        home_data = patch_data[patch_data[1] == 1]
        patch_data = home_data.append(away_data)
        del patch_data[1]
        x = patch_data.join(keep_data)
        print('Completed HFA Patch')
        return x

    def raw_data():
        def_data = pull_data.pull_model_features('pts_scored',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_scored',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_scored', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_scored', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        x_data = x_data.join(tar_data, how='inner')
        train_index = pull_data.pull_train_index(update_dbs.mysql_client())
        x_data = x_data.loc[x_data.index.isin(train_index)]
        y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
        team_data = x_data.join(y_data, how='inner')[list(x_data)]
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        def_data = pull_data.pull_model_features('pts_allowed',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_allowed',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_allowed', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_allowed', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        opponent_data = x_data.join(tar_data, how='inner')
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        cnx = update_dbs.mysql_client()
        cursor = cnx.cursor()
        query = 'SELECT * from gamedata;'
        cursor.execute(query)
        switch = pd.DataFrame(
            cursor.fetchall(),
            columns=['teamname', 'date', 'opponent', 'location'])
        idx_switch = {}
        for t, d, o, l in np.array(switch):
            idx_switch[str(d) +
                       t.replace(' ', '_')] = str(d) + o.replace(' ', '_')
        idx = []
        for idxx in opponent_data.index:
            idx.append(idx_switch[idxx])
        opponent_data['idx'] = idx
        opponent_data = opponent_data.set_index('idx')
        opponent_data *= -1
        opponent_data = opponent_data.rename(
            columns={i: '-' + i
                     for i in list(opponent_data)})
        data = opponent_data.join(team_data)
        data = data.join(y_data, how='inner')
        data = data.replace([np.inf, -np.inf], np.nan)
        data = data.replace('NULL', np.nan)
        data = data.dropna(how='any')
        return data

    data = raw_data()
    x_data_stable = pull_data.share(update_dbs.mysql_client())
    data = data.join(x_data_stable, how='inner')
    data = data.reset_index()
    x_vals = 'share'
    for y_val in ['+pts', 'keras']:
        if not os.path.isfile(
                os.path.join(model_storage, '%s_%s_regression_model.pkl' %
                             (x_vals, y_val))) and not os.path.isfile(
                                 os.path.join(
                                     model_storage,
                                     '%s_%s_regression_model.h5' %
                                     (x_vals, y_val))):
            print('Loading %s_%s' % (x_vals, y_val))

            model = saved_models.stored_models[x_vals][y_val]['model']
            scale = saved_models.stored_models[x_vals][y_val]['scale']

            scale.fit(
                data[saved_models.stored_models[x_vals][y_val]['features']])
            joblib.dump(
                scale,
                os.path.join(model_storage,
                             '%s_%s_regression_scaler.pkl' % (y_val, x_vals)))
            model.fit(
                scale.transform(data[saved_models.stored_models[x_vals][y_val]
                                     ['features']]), np.ravel(data['share']))
            if y_val != 'keras':
                joblib.dump(
                    model,
                    os.path.join(
                        model_storage,
                        '%s_%s_regression_model.pkl' % (y_val, x_vals)))
            else:
                model.model.save(
                    os.path.join(model_storage, '%s_%s_regression_model.h5' %
                                 (y_val, x_vals)))

            print('Stored %s_%s' % (x_vals, y_val))
Example #5
0
    away_data = patch_data[patch_data[1]==-1]
    away_data *= -1
    home_data = patch_data[patch_data[1]==1]
    patch_data = home_data.append(away_data)
    del patch_data[1]
    x = patch_data.join(keep_data)
    print('Completed HFA Patch')
    return x

for x_vals in ['defensive_stats', 'offensive_stats', 'full-team', 'all', 'possessions', 'target']:
    for y_val in ['pts_scored', 'pts_allowed']:    
#for each in [('all', 'pts_allowed'), ('full-team', 'pts_allowed'), ('offensive_stats', 'pts_scored'), ('all', 'pts_scored')]:
#for each in [('target', 'pts_allowed'), ('target', 'pts_scored')]:
#        x_vals, y_val = each
        if x_vals == 'possessions':
            y_data = pull_data.pull_possessions(y_val, update_dbs.mysql_client())
            x_data = pull_data.pull_model_features(y_val, x_vals, update_dbs.mongodb_client)
            x_data = hfa_patch(x_data, update_dbs.mysql_client())             
            train_index = pull_data.pull_train_index(update_dbs.mysql_client())
            x_data = x_data.loc[x_data.index.isin(train_index)]
            y_data = x_data.join(y_data, how = 'inner')['possessions']
            x_data = x_data.join(y_data, how = 'inner')[list(x_data)]
            
        elif x_vals in ['target', 'defensive_stats', 'offensive_stats', 'full-team', 'all']:
            y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client())
            
            
        if x_vals == 'full-team':
            def_data = pull_data.pull_model_features(y_val, 'defensive_stats', update_dbs.mongodb_client)
            def_data = hfa_patch(def_data, update_dbs.mysql_client())            
            off_data = pull_data.pull_model_features(y_val, 'offensive_stats', update_dbs.mongodb_client)
Example #6
0
sys.path.insert(-1, os.path.join(cur_path, 'model_conf'))
sys.path.insert(-1, os.path.join(cur_path, 'db_utils'))
sys.path.insert(-1, os.path.join(cur_path, 'model_tuning'))

output_folder = os.path.join(cur_path, 'model_results')
model_storage = os.path.join(cur_path, 'saved_models')

import numpy as np
import pull_data
import update_dbs
import random
import pandas as pd
import saved_models
from sklearn.externals import joblib

future_index = pull_data.future_idx(update_dbs.mysql_client())
random.seed(86)


def hfa_patch(x, cnx):
    print('Running HFA Patch')
    keep_stats = []
    patch_stats = []
    for stat in list(x):
        try:
            stat.split('_HAspread_')[1]
            patch_stats.append(stat)
        except IndexError:
            keep_stats.append(stat)

    patch_data = x[patch_stats]
Example #7
0
def raw_data():
    def_data = pull_data.pull_model_features('pts_scored', 'defensive_stats',
                                             update_dbs.mongodb_client)
    def_data = hfa_patch(def_data, update_dbs.mysql_client())
    off_data = pull_data.pull_model_features('pts_scored', 'offensive_stats',
                                             update_dbs.mongodb_client)
    off_feats = [i for i in list(off_data) if i not in list(def_data)]
    off_data = off_data[off_feats]
    off_data = hfa_patch(off_data, update_dbs.mysql_client())
    poss_data = pull_data.pull_model_features('pts_scored', 'possessions',
                                              update_dbs.mongodb_client)
    poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
    tar_data = pull_data.pull_model_features('pts_scored', 'target',
                                             update_dbs.mongodb_client)
    tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
    x_data = def_data.join(off_data, how='inner')
    x_data = x_data.join(poss_data, how='inner')
    x_data = x_data.join(tar_data, how='inner')
    x_data = x_data.loc[x_data.index.isin(validation_index)]
    y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
    team_data = x_data.join(y_data, how='inner')[list(x_data)]
    def_data = None
    off_data = None
    poss_data = None
    tar_data = None

    def_data = pull_data.pull_model_features('pts_allowed', 'defensive_stats',
                                             update_dbs.mongodb_client)
    def_data = hfa_patch(def_data, update_dbs.mysql_client())
    off_data = pull_data.pull_model_features('pts_allowed', 'offensive_stats',
                                             update_dbs.mongodb_client)
    off_feats = [i for i in list(off_data) if i not in list(def_data)]
    off_data = off_data[off_feats]
    off_data = hfa_patch(off_data, update_dbs.mysql_client())
    poss_data = pull_data.pull_model_features('pts_allowed', 'possessions',
                                              update_dbs.mongodb_client)
    poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
    tar_data = pull_data.pull_model_features('pts_allowed', 'target',
                                             update_dbs.mongodb_client)
    tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
    x_data = def_data.join(off_data, how='inner')
    x_data = x_data.join(poss_data, how='inner')
    opponent_data = x_data.join(tar_data, how='inner')
    def_data = None
    off_data = None
    poss_data = None
    tar_data = None

    cnx = update_dbs.mysql_client()
    cursor = cnx.cursor()
    query = 'SELECT * from gamedata;'
    cursor.execute(query)
    switch = pd.DataFrame(cursor.fetchall(),
                          columns=['teamname', 'date', 'opponent', 'location'])
    idx_switch = {}
    for t, d, o, l in np.array(switch):
        idx_switch[str(d) + t.replace(' ', '_')] = str(d) + o.replace(' ', '_')
    idx = []
    for idxx in opponent_data.index:
        idx.append(idx_switch[idxx])
    opponent_data['idx'] = idx
    opponent_data = opponent_data.set_index('idx')
    opponent_data *= -1
    opponent_data = opponent_data.rename(
        columns={i: '-' + i
                 for i in list(opponent_data)})
    data = opponent_data.join(team_data)
    data = data.join(y_data, how='inner')
    data = data.replace([np.inf, -np.inf], np.nan)
    data = data.replace('NULL', np.nan)
    data = data.dropna(how='any')
    return data
Example #8
0
sys.path.insert(-1, os.path.join(cur_path, 'model_conf'))
sys.path.insert(-1, os.path.join(cur_path, 'db_utils'))
sys.path.insert(-1, os.path.join(cur_path, 'model_tuning'))

output_folder = os.path.join(cur_path, 'model_results')
features_folder = os.path.join(cur_path, 'feature_dumps')
model_storage = os.path.join(cur_path, 'saved_models')

import numpy as np
import pull_data
import update_dbs
import random
import saved_models
import pandas as pd
from sklearn.model_selection import cross_validate, StratifiedKFold
train_index = pull_data.pull_train_index(update_dbs.mysql_client())
random.seed(86)
random.shuffle(train_index)
derived_data = {}


def hfa_patch(x, cnx):
    print('Running HFA Patch')
    keep_stats = []
    patch_stats = []
    for stat in list(x):
        try:
            stat.split('_HAspread_')[1]
            patch_stats.append(stat)
        except IndexError:
            keep_stats.append(stat)
Example #9
0
def update():
    for x_vals in ['line', 'ou']:
        train_index = pull_data.update_idx(update_dbs.mysql_client(),
                                           '%s_preds' % (x_vals))
        if len(train_index) == 0:
            continue
        update_df = pd.DataFrame()
        update_df['idx'] = train_index
        update_df = update_df.set_index('idx')

        y_val = 'result'
        print('Loading rolling betting stats')
        x_data_stable = vegas_watson.rolling_vegas(x_vals)
        print('... Loaded rolling betting stats')
        x_data_stable = x_data_stable.loc[x_data_stable.index.isin(
            train_index)]
        x_cols = list(x_data_stable)
        x_cols.remove(y_val)
        x_data_stable = x_data_stable[x_cols]
        for model_name, model_details in saved_models.stored_models[y_val][
                x_vals].items():
            if os.path.isfile(
                    os.path.join(
                        model_storage,
                        '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))):
                print('Loading %s Values' % (model_name))

                model = joblib.load(
                    os.path.join(
                        model_storage,
                        '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name)))
                scale = joblib.load(
                    os.path.join(
                        model_storage,
                        '%s_%s_%s_scaler.pkl' % (y_val, x_vals, model_name)))

                preds = model.predict(
                    scale.fit_transform(
                        x_data_stable[model_details['features']]))
                indy_pred = pd.DataFrame()
                indy_pred[model_name + '_' + x_vals] = preds
                indy_pred['idx'] = list(x_data_stable.index)
                indy_pred = indy_pred.set_index('idx')
                update_df = update_df.join(indy_pred, how='inner')
                print('Loaded %s' % (model_name))

        for model_name in ['PCA', "TSVD"]:
            if os.path.isfile(
                    os.path.join(
                        model_storage,
                        '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))):
                print('Loading %s Values' % (model_name))
                model = joblib.load(
                    os.path.join(
                        model_storage,
                        '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name)))
                if x_vals == 'ou':
                    feats = [
                        '10_game_avg', '15_game_avg', '50_game_avg',
                        '30_game_avg', 'streak', '5_game_avg', '3_game_avg'
                    ]
                elif x_vals == 'line':
                    feats = ['10_game_avg', 'ha', 'streak', '50_game_avg']
                preds = model.fit_transform(x_data_stable[feats])
                indy_pred = pd.DataFrame()
                indy_pred['idx'] = list(x_data_stable.index)
                indy_pred[model_name + '_' + x_vals] = preds
                indy_pred = indy_pred.set_index('idx')
                update_df = update_df.join(indy_pred, how='inner')
                print('Loaded %s' % (model_name))

        if x_vals == 'line':
            update_df = update_df[[
                'PCA_line', 'TSVD_line', 'lasso_line', 'lightgbm_line',
                'ridge_line'
            ]]
            add_derived.update('%s_preds' % (x_vals), update_df)

        elif x_vals == 'ou':
            update_df = update_df[[
                'PCA_ou', 'TSVD_ou', 'lasso_ou', 'lightgbm_ou', 'ridge_ou'
            ]]
            add_derived.update('%s_preds' % (x_vals), update_df)
Example #10
0
def rolling_vegas(result):
    odds = pull_data.pull_odds_data(update_dbs.mysql_client())
    teams = bb_odds.teamnames
    aou_data = {}
    atl_data = {}

    aou_streak = {}
    atl_streak = {}
    vegas_data_line = {}
    vegas_data_ou = {}
    for team in teams:
        aou_data[team] = []
        atl_data[team] = []
        aou_streak[team] = 0
        atl_streak[team] = 0
    for date, fav, dog, line, overunder, favscore, dogscore, homeaway in np.array(
            odds[[
                'date', 'fav', 'dog', 'line', 'overunder', 'fav-score',
                'dog-score', 'ha'
            ]]):
        if len(aou_data[fav]) > 0 and len(atl_data[fav]) > 0:
            vegas_data_line[str(date) + fav.replace(' ', '_')] = {}
            vegas_data_ou[str(date) + fav.replace(' ', '_')] = {}
            for n_games in [3, 5, 10, 15, 30, 50]:
                vegas_data_ou[str(date) + fav.replace(' ', '_')][
                    '%s_game_avg' % (n_games)] = np.mean(
                        aou_data[fav][-n_games:])
                if n_games in [10, 50]:
                    vegas_data_line[str(date) + fav.replace(' ', '_')][
                        '%s_game_avg' % (n_games)] = np.mean(
                            atl_data[fav][-n_games:])
            if homeaway == 0:
                vegas_data_line[str(date) + fav.replace(' ', '_')]['ha'] = 1
            elif homeaway == 1:
                vegas_data_line[str(date) + fav.replace(' ', '_')]['ha'] = 0
            vegas_data_ou[str(date) +
                          fav.replace(' ', '_')]['streak'] = aou_streak[fav]
            vegas_data_line[str(date) +
                            fav.replace(' ', '_')]['streak'] = atl_streak[fav]
            if favscore + dogscore > overunder:
                vegas_data_ou[str(date) + fav.replace(' ', '_')]['result'] = 1
            else:
                vegas_data_ou[str(date) + fav.replace(' ', '_')]['result'] = -1
            if favscore + line > dogscore:
                vegas_data_line[str(date) +
                                fav.replace(' ', '_')]['result'] = 1
            else:
                vegas_data_line[str(date) +
                                fav.replace(' ', '_')]['result'] = -1

        if len(aou_data[dog]) > 0 and len(atl_data[dog]) > 0:
            vegas_data_line[str(date) + dog.replace(' ', '_')] = {}
            vegas_data_ou[str(date) + dog.replace(' ', '_')] = {}
            for n_games in [3, 5, 10, 15, 30, 50]:
                vegas_data_ou[str(date) + dog.replace(' ', '_')][
                    '%s_game_avg' % (n_games)] = np.mean(
                        aou_data[dog][-n_games:])
                if n_games in [10, 50]:
                    vegas_data_line[str(date) + dog.replace(' ', '_')][
                        '%s_game_avg' % (n_games)] = np.mean(
                            atl_data[dog][-n_games:])
            if homeaway == 0:
                vegas_data_line[str(date) + dog.replace(' ', '_')]['ha'] = 0
            elif homeaway == 1:
                vegas_data_line[str(date) + dog.replace(' ', '_')]['ha'] = 1
            vegas_data_ou[str(date) +
                          dog.replace(' ', '_')]['streak'] = aou_streak[dog]
            vegas_data_line[str(date) +
                            dog.replace(' ', '_')]['streak'] = atl_streak[dog]
            if favscore + dogscore > overunder:
                vegas_data_ou[str(date) + dog.replace(' ', '_')]['result'] = 1
            else:
                vegas_data_ou[str(date) + dog.replace(' ', '_')]['result'] = -1
            if favscore + line > dogscore:
                vegas_data_line[str(date) +
                                dog.replace(' ', '_')]['result'] = -1
            else:
                vegas_data_line[str(date) +
                                dog.replace(' ', '_')]['result'] = 1

        if dogscore + favscore < overunder:
            aou_data[fav].append(-1)
            aou_data[dog].append(-1)
            if aou_streak[fav] > 0:
                aou_streak[fav] = -1
            else:
                aou_streak[fav] -= 1
            if aou_streak[dog] > 0:
                aou_streak[dog] = -1
            else:
                aou_streak[dog] -= 1
        elif dogscore + favscore > overunder:
            aou_data[fav].append(1)
            aou_data[dog].append(1)
            if aou_streak[fav] < 0:
                aou_streak[fav] = 1
            else:
                aou_streak[fav] += 1
            if aou_streak[dog] < 0:
                aou_streak[dog] = 1
            else:
                aou_streak[dog] += 1
        if (favscore - dogscore) + line > 0:
            atl_data[fav].append(1)
            atl_data[dog].append(-1)
            if atl_streak[fav] < 0:
                atl_streak[fav] = 1
            else:
                atl_streak[fav] += 1
            if atl_streak[dog] > 0:
                atl_streak[dog] = -1
            else:
                atl_streak[dog] -= 1
        elif (favscore - dogscore) + line < 0:
            atl_data[fav].append(-1)
            atl_data[dog].append(1)
            if atl_streak[dog] < 0:
                atl_streak[dog] = 1
            else:
                atl_streak[dog] += 1
            if atl_streak[fav] > 0:
                atl_streak[fav] = -1
            else:
                atl_streak[fav] -= 1
        for source in (atl_data, aou_data):
            for tm in (fav, dog):
                if len(source[tm]) > 50:
                    source[tm] = source[tm][-50:]

    if result == 'line':
        vegas_data_line = pd.DataFrame.from_dict(vegas_data_line)
        vegas_data_line = vegas_data_line.T
        return vegas_data_line

    if result == 'ou':
        vegas_data_ou = pd.DataFrame.from_dict(vegas_data_ou)
        vegas_data_ou = vegas_data_ou.T
        return vegas_data_ou
Example #11
0
derived_folder = os.path.join(cur_path, 'derived_data')

import pull_data
import update_dbs
import pandas as pd
import numpy as np
import log_tuning
import lgclass_tuning
import linsvc_tuning
import knn_tuning
import feature_lists
import rbfsvc_tuning
import polysvc_tuning
import random

train_index = pull_data.pull_train_index(update_dbs.mysql_client())
#cnx = update_dbs.mysql_client()
random.seed(86)
random.shuffle(train_index)
derived_data = {}

x_vals = 'points'
y_val = '+pts'
x_data_stable = pull_data.score(update_dbs.mysql_client())
x_cols = list(x_data_stable)
x_cols.remove('+pts')
x_cols.remove('+possessions')
x_cols.remove('-possessions')
y_data_stable = pull_data.pull_wl(update_dbs.mysql_client())
alldata = y_data_stable.join(x_data_stable, how = 'inner')
y_data = alldata['outcome']
derived_folder = os.path.join(cur_path, 'derived_data')

import pull_data
import update_dbs
import pandas as pd
import numpy as np
import log_tuning
import lgclass_tuning
import linsvc_tuning
import knn_tuning
import feature_lists
import rbfsvc_tuning
import polysvc_tuning
import random

train_index = pull_data.pull_train_index(update_dbs.mysql_client())
#cnx = update_dbs.mysql_client()
random.seed(86)
random.shuffle(train_index)
derived_data = {}

x_vals = 'points'
y_val = 'line'
x_data_stable = pull_data.score(update_dbs.mysql_client())
line_preds = pull_data.line_preds(update_dbs.mysql_client())
x_data_stable = x_data_stable.join(line_preds, how='inner')
x_cols = list(x_data_stable)
x_cols.remove('+pts')
x_cols.remove('+possessions')
x_cols.remove('-possessions')
y_data = pull_data.line_wl(update_dbs.mysql_client())
Example #13
0
    cur_path = os.path.abspath(os.path.join(cur_path, os.pardir))
sys.path.insert(-1, os.path.join(cur_path, 'model_conf'))
sys.path.insert(-1, os.path.join(cur_path, 'db_utils'))
sys.path.insert(-1, os.path.join(cur_path, 'model_tuning'))
model_storage = os.path.join(cur_path, 'saved_models')

import pull_data
import update_dbs
import saved_models
from sklearn.externals import joblib
import vegas_watson
import numpy as np
from sklearn.decomposition import PCA, TruncatedSVD
import pickle

train_index = pull_data.pull_train_index(update_dbs.mysql_client())

def save():
    for x_vals in ['line', 'ou']:
        y_val = 'result'
        print('Loading rolling betting stats')
        x_data_stable = vegas_watson.rolling_vegas(x_vals)
        print('... Loaded rolling betting stats')
        x_data_stable = x_data_stable.loc[x_data_stable.index.isin(train_index)]
        y_data = x_data_stable[[y_val]]
        x_cols = list(x_data_stable)
        x_cols.remove(y_val)
        x_data_stable = x_data_stable[x_cols]
        for model_name, model_details in saved_models.stored_models[y_val][x_vals].items():
            if not os.path.isfile(os.path.join(model_storage, '%s_%s_%s_model.pkl' % (y_val, x_vals, model_name))):
                print('Loading %s Values'%(model_name))        
Example #14
0
    '20_game_avg_30_g_HAweight_for_defensive-rebounds-per-game',
    '-20_game_avg_50_g_Tweight_for_floor-percentage',
    '20_game_avg_10_g_HAweight_for_possessions-per-game',
    '-20_game_avg_50_g_Tweight_allow_points-per-game',
    '-100_g_HAspread_allow_assist--per--turnover-ratio',
    '-10_game_avg_10_g_HAweight_allow_points-per-game',
    '75_g_HAspread_allow_percent-of-points-from-3-pointers',
    '-15_g_HAspread_allow_block-pct',
    '-20_game_avg_25_g_Tweight_allow_possessions-per-game',
    '-10_game_avg_15_g_HAweight_allow_defensive-rebounds-per-game',
    '-20_game_avg_50_g_HAweight_allow_defensive-efficiency',
    '50_game_avg_50_g_HAweight_for_assists-per-game',
    '-30_game_avg_25_g_Tweight_allow_points-per-game',
    '-25_g_HAspread_allow_possessions-per-game'
]
y_data = pull_data.ou_wl(update_dbs.mysql_client())
ou_preds = pull_data.ou_preds(update_dbs.mysql_client())
all_data = data.join(y_data, how='inner')
all_data = all_data.join(ou_preds, how='inner')
y_data = np.ravel(all_data[['ou']])
for pred in list(ou_preds):
    x_cols.append(pred)
x_data_stable = all_data[x_cols]

#import linsvc_tuning
#import lgclass_tuning
#import log_tuning
#import knn_tuning
x_vals = 'raw'
y_val = 'ou'
Example #15
0
def sklearn_preds():
    raw_x = raw_data()
    x_score = pull_data.score(update_dbs.mysql_client())
    y_wl = pull_data.pull_wl(update_dbs.mysql_client())
    x_ou = pull_data.ou_preds(update_dbs.mysql_client())
    y_ou = pull_data.ou_wl(update_dbs.mysql_client())
    y_line = pull_data.line_wl(update_dbs.mysql_client())
    x_line = pull_data.line_preds(update_dbs.mysql_client())

    all_x_data = {
        'winner': {
            '+pts': x_score.join(y_wl, how='inner'),
            'raw': raw_x.join(y_wl, how='inner'),
        },
        'line': {
            '+pts': x_score.join(y_line, how='inner').join(x_line,
                                                           how='inner'),
            'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'),
        },
        'ou': {
            '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner'),
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner'),
        },
    }

    all_y_data = {
        'winner': {
            '+pts': x_score.join(y_wl, how='inner')['outcome'],
            'raw': raw_x.join(y_wl, how='inner')['outcome'],
        },
        'line': {
            '+pts':
            x_score.join(y_line, how='inner').join(x_line,
                                                   how='inner')['line'],
            'raw':
            raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'],
        },
        'ou': {
            '+pts': x_score.join(y_ou, how='inner').join(x_ou,
                                                         how='inner')['ou'],
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'],
        },
    }

    raw_x = None
    x_score = None
    y_wl = None
    x_ou = None
    y_ou = None
    y_line = None
    x_line = None
    random.seed(86)
    for sort in ['ou', 'winner', 'line']:
        outcomes = pd.DataFrame()
        #        outcomes[sort] = np.ravel(all_y_data[sort]['raw'])
        outcomes['idx'] = list(all_y_data[sort]['raw'].index)
        outcomes = outcomes.set_index('idx')

        print('... starting %s' % (sort))
        for kind in ['raw', '+pts']:
            print('... starting %s' % (kind))
            for model_name, model_details in saved_models.stored_models[sort][
                    kind].items():
                if model_name == 'keras':
                    continue

                if os.path.isfile(
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_model.pkl' % (sort, kind, model_name))):
                    print('Evaluating %s ' % (model_name))

                    model = joblib.load(
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_model.pkl' % (sort, kind, model_name)))
                    scale = joblib.load(
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_scaler.pkl' % (sort, kind, model_name)))

                    preds = model.predict_proba(
                        scale.transform(
                            all_x_data[sort][kind][model_details['features']]))
                    model_outcome = pd.DataFrame()
                    winner = []
                    confidence = []
                    for game in preds:
                        if game[0] > game[1]:
                            winner.append(0)
                            confidence.append(game[0])
                        else:
                            winner.append(1)
                            confidence.append(game[1])

#                    print('Accuracy: %s' % (accuracy_score(np.ravel(all_y_data[sort][kind]), winner)))
#                    print('Log Loss: %s' % (log_loss(np.ravel(all_y_data[sort][kind]), preds)))

                    model_outcome['idx'] = list(all_x_data[sort][kind][
                        model_details['features']].index)
                    model_outcome['%s_%s_prediction' %
                                  (kind, model_name)] = winner
                    model_outcome['%s_%s_confidence' %
                                  (kind, model_name)] = confidence
                    model_outcome = model_outcome.set_index('idx')

                    outcomes = outcomes.join(model_outcome, how='inner')
            print('Finished %s' % (kind))
        print('Finished %s' % (sort))
        outcomes.to_csv(os.path.join(output_folder, '%s_results.csv' % (sort)))
Example #16
0
    cur_path = os.getcwd()
while cur_path.split('/')[-1] != 'bb_preds':
    cur_path = os.path.abspath(os.path.join(cur_path, os.pardir))
sys.path.insert(-1, os.path.join(cur_path, 'model_conf'))
sys.path.insert(-1, os.path.join(cur_path, 'db_utils'))
sys.path.insert(-1, os.path.join(cur_path, 'model_tuning'))

output_folder = os.path.join(cur_path, 'model_results')
model_storage = os.path.join(cur_path, 'saved_models')

import pandas as pd
import pull_data
import update_dbs
import numpy as np

vegas_data = pull_data.pull_odds_data(update_dbs.mysql_client())
#stored_results = {}
#for sort in ['ou', 'winner', 'line']:


def moneyline_analysis():
    ml_data = vegas_data[[
        'fav-ml', 'dog-ml', 'fav_idx', 'dog_idx', 'fav-score', 'dog-score'
    ]]
    ml_data = ml_data.dropna(how='any')
    vegas_target_1 = 'fav-ml'
    vegas_target_2 = 'dog-ml'

    print('------ vegas: money-line')
    data = pd.read_csv(os.path.join(output_folder, 'winner_results.csv'))
    data = data.set_index('idx')
Example #17
0
def save():
    def hfa_patch(x, cnx):
        print('Running HFA Patch')
        keep_stats = []
        patch_stats = []
        for stat in list(x):
            try:
                stat.split('_HAspread_')[1]
                patch_stats.append(stat)
            except IndexError:
                keep_stats.append(stat)

        patch_data = x[patch_stats]
        keep_data = x[keep_stats]
        cursor = cnx.cursor()
        query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;'
        cursor.execute(query)
        patch = pd.DataFrame(cursor.fetchall(),
                             columns=['date', 't1', 't2', 'location'])
        cursor.close()

        loc_adj = {}
        for d, t1, t2, l in np.array(patch):
            if l == 0:
                loc_adj[str(d) + t1.replace(' ', '_')] = 1
                loc_adj[str(d) + t2.replace(' ', '_')] = -1
            else:
                loc_adj[str(d) + t1.replace(' ', '_')] = -1
                loc_adj[str(d) + t2.replace(' ', '_')] = 1
        patch = None

        patch_data = patch_data.join(pd.DataFrame.from_dict(
            list(loc_adj.items())).set_index(0),
                                     how='left')
        away_data = patch_data[patch_data[1] == -1]
        away_data *= -1
        home_data = patch_data[patch_data[1] == 1]
        patch_data = home_data.append(away_data)
        del patch_data[1]
        x = patch_data.join(keep_data)
        print('...Completed HFA Patch')
        return x

    for y_val in ['pts_scored', 'pts_allowed']:
        for x_vals in [
                'defensive_stats', 'offensive_stats', 'full-team', 'all',
                'possessions', 'target'
        ]:
            if x_vals in ['defensive_stats', 'offensive_stats'
                          ] and y_val == 'pts_allowed':
                continue
            if x_vals in ['full-team', 'defensive_stats'
                          ] and y_val == 'pts_scored':
                continue
            if x_vals == 'possessions':
                y_data = pull_data.pull_possessions(y_val,
                                                    update_dbs.mysql_client())
                x_data = pull_data.pull_model_features(
                    y_val, x_vals, update_dbs.mongodb_client)
                x_data = hfa_patch(x_data, update_dbs.mysql_client())
                train_index = pull_data.pull_train_index(
                    update_dbs.mysql_client())
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['possessions']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]

            elif x_vals in [
                    'target', 'defensive_stats', 'offensive_stats',
                    'full-team', 'all'
            ]:
                y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client())

            if x_vals == 'full-team':
                def_data = pull_data.pull_model_features(
                    y_val, 'defensive_stats', update_dbs.mongodb_client)
                def_data = hfa_patch(def_data, update_dbs.mysql_client())
                off_data = pull_data.pull_model_features(
                    y_val, 'offensive_stats', update_dbs.mongodb_client)
                off_feats = [
                    i for i in list(off_data) if i not in list(def_data)
                ]
                off_data = off_data[off_feats]
                off_data = hfa_patch(off_data, update_dbs.mysql_client())
                x_data = def_data.join(off_data, how='inner')
                train_index = pull_data.pull_train_index(
                    update_dbs.mysql_client())
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['ppp']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]
                off_data = None
                def_data = None

            elif x_vals == 'all':
                def_data = pull_data.pull_model_features(
                    y_val, 'defensive_stats', update_dbs.mongodb_client)
                def_data = hfa_patch(def_data, update_dbs.mysql_client())
                off_data = pull_data.pull_model_features(
                    y_val, 'offensive_stats', update_dbs.mongodb_client)
                off_feats = [
                    i for i in list(off_data) if i not in list(def_data)
                ]
                off_data = off_data[off_feats]
                off_data = hfa_patch(off_data, update_dbs.mysql_client())
                poss_data = pull_data.pull_model_features(
                    y_val, 'possessions', update_dbs.mongodb_client)
                poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
                tar_data = pull_data.pull_model_features(
                    y_val, 'target', update_dbs.mongodb_client)
                tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
                x_data = def_data.join(off_data, how='inner')
                x_data = x_data.join(poss_data, how='inner')
                x_data = x_data.join(tar_data, how='inner')
                train_index = pull_data.pull_train_index(
                    update_dbs.mysql_client())
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['ppp']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]
                def_data = None
                off_data = None
                poss_data = None
                tar_data = None

            elif x_vals in ['target', 'defensive_stats', 'offensive_stats']:
                x_data = pull_data.pull_model_features(
                    y_val, x_vals, update_dbs.mongodb_client)
                x_data = hfa_patch(x_data, update_dbs.mysql_client())
                train_index = pull_data.pull_train_index(
                    update_dbs.mysql_client())
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['ppp']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]

            for model_name, model_details in saved_models.stored_models[y_val][
                    x_vals].items():
                if not os.path.isfile(
                        os.path.join(
                            model_storage, '%s_%s_%s_model.pkl' %
                            (y_val, x_vals, model_name))):
                    print('Loading %s Values' % (model_name))

                    model = model_details['model']
                    scale = model_details['scale']

                    scale.fit(x_data[model_details['features']])
                    joblib.dump(
                        scale,
                        os.path.join(
                            model_storage, '%s_%s_%s_scaler.pkl' %
                            (y_val, x_vals, model_name)))

                    model.fit(
                        scale.transform(x_data[model_details['features']]),
                        np.ravel(y_data))
                    joblib.dump(
                        model,
                        os.path.join(
                            model_storage, '%s_%s_%s_model.pkl' %
                            (y_val, x_vals, model_name)))

                    print('Stored %s' % (model_name))
def update():
    for y_val in ['pts_scored', 'pts_allowed']:
        if y_val == 'pts_scored':
            train_index = pull_data.update_idx(update_dbs.mysql_client(),
                                               'offensive_preds')
        if y_val == 'pts_allowed':
            train_index = pull_data.update_idx(update_dbs.mysql_client(),
                                               'defensive_preds')
        update_df = pd.DataFrame()
        if len(train_index) == 0:
            continue
        update_df['idx'] = train_index
        update_df = update_df.set_index('idx')
        for x_vals in [
                'defensive_stats', 'offensive_stats', 'full-team', 'all',
                'possessions', 'target'
        ]:
            if x_vals in ['defensive_stats', 'offensive_stats'
                          ] and y_val == 'pts_allowed':
                continue
            if x_vals in ['full-team', 'defensive_stats'
                          ] and y_val == 'pts_scored':
                continue

            if x_vals == 'possessions':
                y_data = pull_data.pull_possessions(y_val,
                                                    update_dbs.mysql_client())
                x_data = pull_data.pull_model_features(
                    y_val, x_vals, update_dbs.mongodb_client)
                x_data = hfa_patch(x_data, update_dbs.mysql_client())
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['possessions']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]

            elif x_vals in [
                    'target', 'defensive_stats', 'offensive_stats',
                    'full-team', 'all'
            ]:
                y_data = pull_data.pull_ppp(y_val, update_dbs.mysql_client())

            if x_vals == 'full-team':
                def_data = pull_data.pull_model_features(
                    y_val, 'defensive_stats', update_dbs.mongodb_client)
                def_data = hfa_patch(def_data, update_dbs.mysql_client())
                off_data = pull_data.pull_model_features(
                    y_val, 'offensive_stats', update_dbs.mongodb_client)
                off_feats = [
                    i for i in list(off_data) if i not in list(def_data)
                ]
                off_data = off_data[off_feats]
                off_data = hfa_patch(off_data, update_dbs.mysql_client())
                x_data = def_data.join(off_data, how='inner')
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['ppp']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]
                off_data = None
                def_data = None

            elif x_vals == 'all':
                def_data = pull_data.pull_model_features(
                    y_val, 'defensive_stats', update_dbs.mongodb_client)
                def_data = hfa_patch(def_data, update_dbs.mysql_client())
                off_data = pull_data.pull_model_features(
                    y_val, 'offensive_stats', update_dbs.mongodb_client)
                off_feats = [
                    i for i in list(off_data) if i not in list(def_data)
                ]
                off_data = off_data[off_feats]
                off_data = hfa_patch(off_data, update_dbs.mysql_client())
                poss_data = pull_data.pull_model_features(
                    y_val, 'possessions', update_dbs.mongodb_client)
                poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
                tar_data = pull_data.pull_model_features(
                    y_val, 'target', update_dbs.mongodb_client)
                tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
                x_data = def_data.join(off_data, how='inner')
                x_data = x_data.join(poss_data, how='inner')
                x_data = x_data.join(tar_data, how='inner')
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['ppp']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]
                def_data = None
                off_data = None
                poss_data = None
                tar_data = None

            elif x_vals in ['target', 'defensive_stats', 'offensive_stats']:
                x_data = pull_data.pull_model_features(
                    y_val, x_vals, update_dbs.mongodb_client)
                x_data = hfa_patch(x_data, update_dbs.mysql_client())
                x_data = x_data.loc[x_data.index.isin(train_index)]
                y_data = x_data.join(y_data, how='inner')['ppp']
                x_data = x_data.join(y_data, how='inner')[list(x_data)]

            for model_name, model_details in saved_models.stored_models[y_val][
                    x_vals].items():
                if os.path.isfile(
                        os.path.join(
                            model_storage, '%s_%s_%s_model.pkl' %
                            (y_val, x_vals, model_name))):
                    print('Loading %s Values' % (model_name))

                    model = joblib.load(
                        os.path.join(
                            model_storage, '%s_%s_%s_model.pkl' %
                            (y_val, x_vals, model_name)))
                    scale = joblib.load(
                        os.path.join(
                            model_storage, '%s_%s_%s_scaler.pkl' %
                            (y_val, x_vals, model_name)))

                    preds = model.predict(
                        scale.fit_transform(x_data[model_details['features']]))
                    indy_pred = pd.DataFrame()
                    if x_vals == 'offensive_stats':
                        indy_pred[model_name + '_team'] = preds
                    elif y_val == 'pts_allowed' and x_vals == 'full-team':
                        indy_pred[model_name + '_team'] = preds
                    else:
                        indy_pred[model_name + '_' + x_vals] = preds
                    indy_pred['idx'] = list(x_data.index)
                    indy_pred = indy_pred.set_index('idx')
                    update_df = update_df.join(indy_pred, how='inner')
                    print('Loaded %s' % (model_name))

        if y_val == 'pts_scored':
            update_df = update_df[[
                'lightgbm_team', 'linsvm_team', 'linsvm_all', 'ridge_all',
                'lasso_possessions', 'lightgbm_possessions',
                'linsvm_possessions', 'lightgbm_target', 'linsvm_target',
                'ridge_target', 'lasso_target'
            ]]
            add_derived.update('offensive_preds', update_df)

        elif y_val == 'pts_allowed':
            update_df = update_df[[
                'lightgbm_all', 'ridge_all', 'lasso_team', 'lightgbm_team',
                'linsvm_team', 'ridge_team', 'lasso_possessions',
                'lightgbm_possessions', 'ridge_possessions', 'lasso_target',
                'lightgbm_target'
            ]]
            add_derived.update('defensive_preds', update_df)
Example #19
0
'30_g_HAspread_allow_free-throw-rate',
'-100_g_HAspread_for_defensive-efficiency',
'pregame_turnovers-per-possession_for',
'-50_g_HAspread_for_personal-fouls-per-game',
'75_g_HAspread_for_defensive-efficiency',
'100_g_HAspread_for_defensive-efficiency',
'75_g_HAspread_allow_points-per-game',
'-75_g_HAspread_allow_floor-percentage',
'30_g_HAspread_for_floor-percentage',
'expected_ftm-per-100-possessions_for',
'-75_g_HAspread_allow_defensive-efficiency',
'-50_g_HAspread_allow_points-per-game`/`possessions-per-game',
'-50_game_avg_30_g_Tweight_allow_fta-per-fga',
'-50_g_HAspread_for_assist--per--turnover-ratio',
'-10_g_HAspread_allow_ftm-per-100-possessions']
y_data = pull_data.pull_wl(update_dbs.mysql_client())
all_data = data.join(y_data, how = 'inner')
y_data = np.ravel(all_data[['outcome']])
x_data_stable = all_data[x_cols]

import linsvc_tuning
import lgclass_tuning
import log_tuning
import knn_tuning
x_vals = 'raw'
y_val = 'winner'


#
x_data = x_data_stable   
result = lgclass_tuning.execute(y_val, x_vals, X_data = x_data, Y_data = y_data)
Example #20
0
sys.path.insert(-1, os.path.join(cur_path, 'model_tuning'))

output_folder = os.path.join(cur_path, 'model_results')
features_folder = os.path.join(cur_path, 'feature_dumps')
model_storage = os.path.join(cur_path, 'saved_models')

import numpy as np
import pull_data
import update_dbs
import random
import saved_models
import pandas as pd
from keras.models import load_model
from sklearn.externals import joblib

validation_index = pull_data.pull_validation_index(update_dbs.mysql_client())
random.seed(86)


def hfa_patch(x, cnx):
    print('Running HFA Patch')
    keep_stats = []
    patch_stats = []
    for stat in list(x):
        try:
            stat.split('_HAspread_')[1]
            patch_stats.append(stat)
        except IndexError:
            keep_stats.append(stat)

    patch_data = x[patch_stats]
Example #21
0
import os, sys
try:  # if running in CLI
    cur_path = os.path.abspath(__file__)
except NameError:  # if running in IDE
    cur_path = os.getcwd()
while cur_path.split('/')[-1] != 'bb_preds':
    cur_path = os.path.abspath(os.path.join(cur_path, os.pardir))
sys.path.insert(-1, os.path.join(cur_path, 'model_conf'))
sys.path.insert(-1, os.path.join(cur_path, 'db_utils'))
sys.path.insert(-1, os.path.join(cur_path, 'model_tuning'))
derived_folder = os.path.join(cur_path, 'derived_data')
import pandas as pd
import update_dbs
import numpy as np
cnx = update_dbs.mysql_client()

for name in ['offensive_preds', 'defensive_preds']:
    data = pd.read_csv('%s.csv' % (name))
    cursor = cnx.cursor()
    insertlist = []
    continuance = 0
    for entry in np.array(data):
        insert = list(entry)
        idx = insert[0]
        date = '"' + idx[:10] + '"'
        tname = '"' + idx[10:].replace('_', ' ') + '"'
        insert = insert[1:]
        sql_insert = []
        sql_insert.append(tname)
        sql_insert.append(date)
        for each in insert:
Example #22
0
def keras_preds():
    raw_x = raw_data()
    x_score = pull_data.score(update_dbs.mysql_client())
    raw_x = raw_x.join(x_score, how='inner')

    line = pull_data.pull_odds_data(update_dbs.mysql_client())
    idx = []
    gameline = []
    line_data = line[['fav_idx', 'dog_idx', 'line']]
    for fix, dix, ln in np.array(line_data):
        idx.append(fix)
        idx.append(dix)
        gameline.append(ln)
        gameline.append(ln * -1)

    linedata = pd.DataFrame()
    linedata['idx'] = idx
    linedata['vegas_line'] = gameline
    linedata = linedata.set_index('idx')

    idx = []
    gameou = []
    ou_data = line[['fav_idx', 'dog_idx', 'overunder']]
    for fix, dix, ou in np.array(ou_data):
        idx.append(fix)
        idx.append(dix)
        gameou.append(ou)
        gameou.append(ou * -1)

    oudata = pd.DataFrame()
    oudata['idx'] = idx
    oudata['vegas_ou'] = gameou
    oudata = oudata.set_index('idx')

    raw_x = raw_x.join(oudata, how='inner')
    raw_x = raw_x.join(linedata, how='inner')

    y_wl = pull_data.pull_wl(update_dbs.mysql_client())
    x_ou = pull_data.ou_preds(update_dbs.mysql_client())
    y_ou = pull_data.ou_wl(update_dbs.mysql_client())
    y_line = pull_data.line_wl(update_dbs.mysql_client())
    x_line = pull_data.line_preds(update_dbs.mysql_client())

    all_x_data = {
        'winner': {
            'raw': raw_x.join(y_wl, how='inner')
        },
        'line': {
            'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')
        },
        'ou': {
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')
        },
    }

    all_y_data = {
        'winner': {
            'raw': raw_x.join(y_wl, how='inner')['outcome']
        },
        'line': {
            'raw':
            raw_x.join(y_line, how='inner').join(x_line, how='inner')['line']
        },
        'ou': {
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou']
        },
    }

    raw_x = None
    x_score = None
    y_wl = None
    x_ou = None
    y_ou = None
    y_line = None
    x_line = None
    random.seed(86)

    for sort in ['keras']:
        print('... starting %s' % (sort))
        for kind in ['winner', 'ou', 'line']:
            results = pd.read_csv(
                os.path.join(output_folder, '%s_results.csv' % (kind)))
            results = results.set_index('idx')

            print('... starting %s' % (kind))
            X = all_x_data[kind]['raw']
            save_index = list(X.index)
            X = X.reset_index()
            X = X[saved_models.stored_models[kind]['raw'][sort]['features']]
            Y = all_y_data[kind]['raw']
            Y = Y.reset_index()
            if kind != 'winner':
                Y = Y[kind]
            else:
                Y = Y['outcome']

            print('...loading %s' % (kind))
            model = load_model(
                os.path.join(model_storage,
                             '%s_%s_regression_model.h5' % (kind, sort)))
            scale = joblib.load(
                os.path.join(model_storage,
                             '%s_%s_regression_scaler.pkl' % (kind, sort)))

            preds = model.predict(scale.transform(X))

            winner = []
            confidence = []
            for game in preds:
                if game[0] < .5:
                    winner.append(0)
                    confidence.append(1 - game[0])
                else:
                    winner.append(1)
                    confidence.append(game[0])

            model_outcome = pd.DataFrame()
            model_outcome['idx'] = save_index
            model_outcome['raw_keras_prediction'] = winner
            model_outcome['raw_keras_confidence'] = confidence
            model_outcome = model_outcome.set_index('idx')

            results = results.join(model_outcome, how='inner')

            results.to_csv(
                os.path.join(output_folder, '%s_results.csv' % (kind)))
            print('Finished %s' % (kind))
        print('Finished %s' % (sort))
Example #23
0
'75_g_HAspread_for_defensive-efficiency',
'1_game_avg_10_g_HAweight_for_points-per-game',
'-50_game_avg_30_g_Tweight_allow_block-pct',
'25_g_HAspread_for_possessions-per-game',
'-5_game_avg_10_g_Tweight_allow_possessions-per-game',
'100_g_HAspread_for_defensive-efficiency',
'-10_game_avg_50_g_Tweight_for_assists-per-game',
'-20_game_avg_15_g_Tweight_allow_extra-chances-per-game',
'pregame_ppp_for',
'-expected_effective-field-goal-pct_allowed',
'-5_game_avg_50_g_HAweight_allow_possessions-per-game',
'-10_g_HAspread_allow_points-per-game`/`possessions-per-game',
'-50_game_avg_15_g_Tweight_allow_blocks-per-game',
'-50_game_avg_50_g_HAweight_for_offensive-rebounding-pct',
'-20_game_avg_50_g_Tweight_for_block-pct']
y_data = pull_data.line_wl(update_dbs.mysql_client())
all_data = data.join(y_data, how = 'inner')
line_preds = pull_data.line_preds(update_dbs.mysql_client())
all_data = all_data.join(line_preds, how = 'inner')
y_data = np.ravel(all_data[['line']])
for pred in list(line_preds):
    x_cols.append(pred)
x_data_stable = all_data[x_cols]


import linsvc_tuning
import lgclass_tuning
#import log_tuning
#import knn_tuning
x_vals = 'raw'
y_val = 'line'