Ejemplo n.º 1
0
def keras_preds():
    raw_x = raw_data()
    x_score = pull_data.score(update_dbs.mysql_client())
    raw_x = raw_x.join(x_score, how='inner')

    line = pull_data.pull_odds_data(update_dbs.mysql_client())
    idx = []
    gameline = []
    line_data = line[['fav_idx', 'dog_idx', 'line']]
    for fix, dix, ln in np.array(line_data):
        idx.append(fix)
        idx.append(dix)
        gameline.append(ln)
        gameline.append(ln * -1)

    linedata = pd.DataFrame()
    linedata['idx'] = idx
    linedata['vegas_line'] = gameline
    linedata = linedata.set_index('idx')

    idx = []
    gameou = []
    ou_data = line[['fav_idx', 'dog_idx', 'overunder']]
    for fix, dix, ou in np.array(ou_data):
        idx.append(fix)
        idx.append(dix)
        gameou.append(ou)
        gameou.append(ou * -1)

    oudata = pd.DataFrame()
    oudata['idx'] = idx
    oudata['vegas_ou'] = gameou
    oudata = oudata.set_index('idx')

    raw_x = raw_x.join(oudata, how='inner')
    raw_x = raw_x.join(linedata, how='inner')

    y_wl = pull_data.pull_wl(update_dbs.mysql_client())
    x_ou = pull_data.ou_preds(update_dbs.mysql_client())
    y_ou = pull_data.ou_wl(update_dbs.mysql_client())
    y_line = pull_data.line_wl(update_dbs.mysql_client())
    x_line = pull_data.line_preds(update_dbs.mysql_client())

    all_x_data = {
        'winner': {
            'raw': raw_x.join(y_wl, how='inner')
        },
        'line': {
            'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')
        },
        'ou': {
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')
        },
    }

    all_y_data = {
        'winner': {
            'raw': raw_x.join(y_wl, how='inner')['outcome']
        },
        'line': {
            'raw':
            raw_x.join(y_line, how='inner').join(x_line, how='inner')['line']
        },
        'ou': {
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou']
        },
    }

    raw_x = None
    x_score = None
    y_wl = None
    x_ou = None
    y_ou = None
    y_line = None
    x_line = None
    random.seed(86)

    for sort in ['keras']:
        print('... starting %s' % (sort))
        for kind in ['winner', 'ou', 'line']:
            results = pd.read_csv(
                os.path.join(output_folder, '%s_results.csv' % (kind)))
            results = results.set_index('idx')

            print('... starting %s' % (kind))
            X = all_x_data[kind]['raw']
            save_index = list(X.index)
            X = X.reset_index()
            X = X[saved_models.stored_models[kind]['raw'][sort]['features']]
            Y = all_y_data[kind]['raw']
            Y = Y.reset_index()
            if kind != 'winner':
                Y = Y[kind]
            else:
                Y = Y['outcome']

            print('...loading %s' % (kind))
            model = load_model(
                os.path.join(model_storage,
                             '%s_%s_regression_model.h5' % (kind, sort)))
            scale = joblib.load(
                os.path.join(model_storage,
                             '%s_%s_regression_scaler.pkl' % (kind, sort)))

            preds = model.predict(scale.transform(X))

            winner = []
            confidence = []
            for game in preds:
                if game[0] < .5:
                    winner.append(0)
                    confidence.append(1 - game[0])
                else:
                    winner.append(1)
                    confidence.append(game[0])

            model_outcome = pd.DataFrame()
            model_outcome['idx'] = save_index
            model_outcome['raw_keras_prediction'] = winner
            model_outcome['raw_keras_confidence'] = confidence
            model_outcome = model_outcome.set_index('idx')

            results = results.join(model_outcome, how='inner')

            results.to_csv(
                os.path.join(output_folder, '%s_results.csv' % (kind)))
            print('Finished %s' % (kind))
        print('Finished %s' % (sort))
Ejemplo n.º 2
0
def rolling_vegas(result):
    odds = pull_data.pull_odds_data(update_dbs.mysql_client())
    teams = bb_odds.teamnames
    aou_data = {}
    atl_data = {}

    aou_streak = {}
    atl_streak = {}
    vegas_data_line = {}
    vegas_data_ou = {}
    for team in teams:
        aou_data[team] = []
        atl_data[team] = []
        aou_streak[team] = 0
        atl_streak[team] = 0
    for date, fav, dog, line, overunder, favscore, dogscore, homeaway in np.array(
            odds[[
                'date', 'fav', 'dog', 'line', 'overunder', 'fav-score',
                'dog-score', 'ha'
            ]]):
        if len(aou_data[fav]) > 0 and len(atl_data[fav]) > 0:
            vegas_data_line[str(date) + fav.replace(' ', '_')] = {}
            vegas_data_ou[str(date) + fav.replace(' ', '_')] = {}
            for n_games in [3, 5, 10, 15, 30, 50]:
                vegas_data_ou[str(date) + fav.replace(' ', '_')][
                    '%s_game_avg' % (n_games)] = np.mean(
                        aou_data[fav][-n_games:])
                if n_games in [10, 50]:
                    vegas_data_line[str(date) + fav.replace(' ', '_')][
                        '%s_game_avg' % (n_games)] = np.mean(
                            atl_data[fav][-n_games:])
            if homeaway == 0:
                vegas_data_line[str(date) + fav.replace(' ', '_')]['ha'] = 1
            elif homeaway == 1:
                vegas_data_line[str(date) + fav.replace(' ', '_')]['ha'] = 0
            vegas_data_ou[str(date) +
                          fav.replace(' ', '_')]['streak'] = aou_streak[fav]
            vegas_data_line[str(date) +
                            fav.replace(' ', '_')]['streak'] = atl_streak[fav]
            if favscore + dogscore > overunder:
                vegas_data_ou[str(date) + fav.replace(' ', '_')]['result'] = 1
            else:
                vegas_data_ou[str(date) + fav.replace(' ', '_')]['result'] = -1
            if favscore + line > dogscore:
                vegas_data_line[str(date) +
                                fav.replace(' ', '_')]['result'] = 1
            else:
                vegas_data_line[str(date) +
                                fav.replace(' ', '_')]['result'] = -1

        if len(aou_data[dog]) > 0 and len(atl_data[dog]) > 0:
            vegas_data_line[str(date) + dog.replace(' ', '_')] = {}
            vegas_data_ou[str(date) + dog.replace(' ', '_')] = {}
            for n_games in [3, 5, 10, 15, 30, 50]:
                vegas_data_ou[str(date) + dog.replace(' ', '_')][
                    '%s_game_avg' % (n_games)] = np.mean(
                        aou_data[dog][-n_games:])
                if n_games in [10, 50]:
                    vegas_data_line[str(date) + dog.replace(' ', '_')][
                        '%s_game_avg' % (n_games)] = np.mean(
                            atl_data[dog][-n_games:])
            if homeaway == 0:
                vegas_data_line[str(date) + dog.replace(' ', '_')]['ha'] = 0
            elif homeaway == 1:
                vegas_data_line[str(date) + dog.replace(' ', '_')]['ha'] = 1
            vegas_data_ou[str(date) +
                          dog.replace(' ', '_')]['streak'] = aou_streak[dog]
            vegas_data_line[str(date) +
                            dog.replace(' ', '_')]['streak'] = atl_streak[dog]
            if favscore + dogscore > overunder:
                vegas_data_ou[str(date) + dog.replace(' ', '_')]['result'] = 1
            else:
                vegas_data_ou[str(date) + dog.replace(' ', '_')]['result'] = -1
            if favscore + line > dogscore:
                vegas_data_line[str(date) +
                                dog.replace(' ', '_')]['result'] = -1
            else:
                vegas_data_line[str(date) +
                                dog.replace(' ', '_')]['result'] = 1

        if dogscore + favscore < overunder:
            aou_data[fav].append(-1)
            aou_data[dog].append(-1)
            if aou_streak[fav] > 0:
                aou_streak[fav] = -1
            else:
                aou_streak[fav] -= 1
            if aou_streak[dog] > 0:
                aou_streak[dog] = -1
            else:
                aou_streak[dog] -= 1
        elif dogscore + favscore > overunder:
            aou_data[fav].append(1)
            aou_data[dog].append(1)
            if aou_streak[fav] < 0:
                aou_streak[fav] = 1
            else:
                aou_streak[fav] += 1
            if aou_streak[dog] < 0:
                aou_streak[dog] = 1
            else:
                aou_streak[dog] += 1
        if (favscore - dogscore) + line > 0:
            atl_data[fav].append(1)
            atl_data[dog].append(-1)
            if atl_streak[fav] < 0:
                atl_streak[fav] = 1
            else:
                atl_streak[fav] += 1
            if atl_streak[dog] > 0:
                atl_streak[dog] = -1
            else:
                atl_streak[dog] -= 1
        elif (favscore - dogscore) + line < 0:
            atl_data[fav].append(-1)
            atl_data[dog].append(1)
            if atl_streak[dog] < 0:
                atl_streak[dog] = 1
            else:
                atl_streak[dog] += 1
            if atl_streak[fav] > 0:
                atl_streak[fav] = -1
            else:
                atl_streak[fav] -= 1
        for source in (atl_data, aou_data):
            for tm in (fav, dog):
                if len(source[tm]) > 50:
                    source[tm] = source[tm][-50:]

    if result == 'line':
        vegas_data_line = pd.DataFrame.from_dict(vegas_data_line)
        vegas_data_line = vegas_data_line.T
        return vegas_data_line

    if result == 'ou':
        vegas_data_ou = pd.DataFrame.from_dict(vegas_data_ou)
        vegas_data_ou = vegas_data_ou.T
        return vegas_data_ou
Ejemplo n.º 3
0
    cur_path = os.getcwd()
while cur_path.split('/')[-1] != 'bb_preds':
    cur_path = os.path.abspath(os.path.join(cur_path, os.pardir))
sys.path.insert(-1, os.path.join(cur_path, 'model_conf'))
sys.path.insert(-1, os.path.join(cur_path, 'db_utils'))
sys.path.insert(-1, os.path.join(cur_path, 'model_tuning'))

output_folder = os.path.join(cur_path, 'model_results')
model_storage = os.path.join(cur_path, 'saved_models')

import pandas as pd
import pull_data
import update_dbs
import numpy as np

vegas_data = pull_data.pull_odds_data(update_dbs.mysql_client())
#stored_results = {}
#for sort in ['ou', 'winner', 'line']:


def moneyline_analysis():
    ml_data = vegas_data[[
        'fav-ml', 'dog-ml', 'fav_idx', 'dog_idx', 'fav-score', 'dog-score'
    ]]
    ml_data = ml_data.dropna(how='any')
    vegas_target_1 = 'fav-ml'
    vegas_target_2 = 'dog-ml'

    print('------ vegas: money-line')
    data = pd.read_csv(os.path.join(output_folder, 'winner_results.csv'))
    data = data.set_index('idx')
Ejemplo n.º 4
0
def save():
    train_index = pull_data.pull_train_index(update_dbs.mysql_client())
    random.seed(86)
    random.shuffle(train_index)

    def hfa_patch(x, cnx):
        print('Running HFA Patch')
        keep_stats = []
        patch_stats = []
        for stat in list(x):
            try:
                stat.split('_HAspread_')[1]
                patch_stats.append(stat)
            except IndexError:
                keep_stats.append(stat)

        patch_data = x[patch_stats]
        keep_data = x[keep_stats]
        cursor = cnx.cursor()
        query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;'
        cursor.execute(query)
        patch = pd.DataFrame(cursor.fetchall(),
                             columns=['date', 't1', 't2', 'location'])
        cursor.close()

        loc_adj = {}
        for d, t1, t2, l in np.array(patch):
            if l == 0:
                loc_adj[str(d) + t1.replace(' ', '_')] = 1
                loc_adj[str(d) + t2.replace(' ', '_')] = -1
            else:
                loc_adj[str(d) + t1.replace(' ', '_')] = -1
                loc_adj[str(d) + t2.replace(' ', '_')] = 1
        patch = None

        patch_data = patch_data.join(pd.DataFrame.from_dict(
            list(loc_adj.items())).set_index(0),
                                     how='left')
        away_data = patch_data[patch_data[1] == -1]
        away_data *= -1
        home_data = patch_data[patch_data[1] == 1]
        patch_data = home_data.append(away_data)
        del patch_data[1]
        x = patch_data.join(keep_data)
        print('Completed HFA Patch')
        return x

    def raw_data():
        def_data = pull_data.pull_model_features('pts_scored',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_scored',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_scored', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_scored', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        x_data = x_data.join(tar_data, how='inner')
        train_index = pull_data.pull_train_index(update_dbs.mysql_client())
        x_data = x_data.loc[x_data.index.isin(train_index)]
        y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
        team_data = x_data.join(y_data, how='inner')[list(x_data)]
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        def_data = pull_data.pull_model_features('pts_allowed',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_allowed',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_allowed', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_allowed', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        opponent_data = x_data.join(tar_data, how='inner')
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        cnx = update_dbs.mysql_client()
        cursor = cnx.cursor()
        query = 'SELECT * from gamedata;'
        cursor.execute(query)
        switch = pd.DataFrame(
            cursor.fetchall(),
            columns=['teamname', 'date', 'opponent', 'location'])
        idx_switch = {}
        for t, d, o, l in np.array(switch):
            idx_switch[str(d) +
                       t.replace(' ', '_')] = str(d) + o.replace(' ', '_')
        idx = []
        for idxx in opponent_data.index:
            idx.append(idx_switch[idxx])
        opponent_data['idx'] = idx
        opponent_data = opponent_data.set_index('idx')
        opponent_data *= -1
        opponent_data = opponent_data.rename(
            columns={i: '-' + i
                     for i in list(opponent_data)})
        data = opponent_data.join(team_data)
        data = data.join(y_data, how='inner')
        data = data.replace([np.inf, -np.inf], np.nan)
        data = data.replace('NULL', np.nan)
        data = data.dropna(how='any')
        return data

    raw_x = raw_data()
    x_score = pull_data.score(update_dbs.mysql_client())
    raw_x = raw_x.join(x_score, how='inner')

    line = pull_data.pull_odds_data(update_dbs.mysql_client())
    idx = []
    gameline = []
    line_data = line[['fav_idx', 'dog_idx', 'line']]
    for fix, dix, ln in np.array(line_data):
        idx.append(fix)
        idx.append(dix)
        gameline.append(ln)
        gameline.append(ln * -1)

    linedata = pd.DataFrame()
    linedata['idx'] = idx
    linedata['vegas_line'] = gameline
    linedata = linedata.set_index('idx')

    idx = []
    gameou = []
    ou_data = line[['fav_idx', 'dog_idx', 'overunder']]
    for fix, dix, ou in np.array(ou_data):
        idx.append(fix)
        idx.append(dix)
        gameou.append(ou)
        gameou.append(ou * -1)

    oudata = pd.DataFrame()
    oudata['idx'] = idx
    oudata['vegas_ou'] = gameou
    oudata = oudata.set_index('idx')

    raw_x = raw_x.join(oudata, how='inner')
    raw_x = raw_x.join(linedata, how='inner')

    y_wl = pull_data.pull_wl(update_dbs.mysql_client())
    x_ou = pull_data.ou_preds(update_dbs.mysql_client())
    y_ou = pull_data.ou_wl(update_dbs.mysql_client())
    y_line = pull_data.line_wl(update_dbs.mysql_client())
    x_line = pull_data.line_preds(update_dbs.mysql_client())

    all_x_data = {
        'winner': {
            'raw': raw_x.join(y_wl, how='inner')
        },
        'line': {
            'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')
        },
        'ou': {
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')
        },
    }

    all_y_data = {
        'winner': {
            'raw': raw_x.join(y_wl, how='inner')['outcome']
        },
        'line': {
            'raw':
            raw_x.join(y_line, how='inner').join(x_line, how='inner')['line']
        },
        'ou': {
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou']
        },
    }

    raw_x = None
    x_score = None
    y_wl = None
    x_ou = None
    y_ou = None
    y_line = None
    x_line = None
    random.seed(86)
    for kind in ['keras']:
        print('... starting %s' % (kind))
        for sort in ['winner', 'line', 'ou']:
            print('... starting %s' % (sort))
            if not os.path.isfile(
                    os.path.join(model_storage, '%s_%s_regression.h5' %
                                 (sort, kind))):

                X = all_x_data[sort]['raw']
                X = X.reset_index()
                X = X[saved_models.stored_models[sort]['raw'][kind]
                      ['features']]
                Y = all_y_data[sort]['raw']
                Y = Y.reset_index()
                if sort != 'winner':
                    Y = Y[sort]
                else:
                    Y = Y['outcome']

                print('...storing %s Keras' % (sort))

                model = saved_models.stored_models[sort]['raw'][kind]['model']
                scale = saved_models.stored_models[sort]['raw'][kind]['scale']

                scale.fit(X[saved_models.stored_models[sort]['raw'][kind]
                            ['features']])
                joblib.dump(
                    scale,
                    os.path.join(model_storage,
                                 '%s_%s_regression_scaler.pkl' % (sort, kind)))
                model.fit(
                    scale.transform(X[saved_models.stored_models[sort]['raw']
                                      [kind]['features']]), np.ravel(Y))
                model.model.save(
                    os.path.join(model_storage,
                                 '%s_%s_regression_model.h5' % (sort, kind)))

                print('Stored %s_%s' % (sort, kind))

            print('Finished %s' % (sort))
        print('Finished %s' % (kind))