Beispiel #1
0
def keras_preds():
    raw_x = raw_data()
    x_score = pull_data.score(update_dbs.mysql_client())
    raw_x = raw_x.join(x_score, how='inner')

    line = pull_data.pull_odds_data(update_dbs.mysql_client())
    idx = []
    gameline = []
    line_data = line[['fav_idx', 'dog_idx', 'line']]
    for fix, dix, ln in np.array(line_data):
        idx.append(fix)
        idx.append(dix)
        gameline.append(ln)
        gameline.append(ln * -1)

    linedata = pd.DataFrame()
    linedata['idx'] = idx
    linedata['vegas_line'] = gameline
    linedata = linedata.set_index('idx')

    idx = []
    gameou = []
    ou_data = line[['fav_idx', 'dog_idx', 'overunder']]
    for fix, dix, ou in np.array(ou_data):
        idx.append(fix)
        idx.append(dix)
        gameou.append(ou)
        gameou.append(ou * -1)

    oudata = pd.DataFrame()
    oudata['idx'] = idx
    oudata['vegas_ou'] = gameou
    oudata = oudata.set_index('idx')

    raw_x = raw_x.join(oudata, how='inner')
    raw_x = raw_x.join(linedata, how='inner')

    y_wl = pull_data.pull_wl(update_dbs.mysql_client())
    x_ou = pull_data.ou_preds(update_dbs.mysql_client())
    y_ou = pull_data.ou_wl(update_dbs.mysql_client())
    y_line = pull_data.line_wl(update_dbs.mysql_client())
    x_line = pull_data.line_preds(update_dbs.mysql_client())

    all_x_data = {
        'winner': {
            'raw': raw_x.join(y_wl, how='inner')
        },
        'line': {
            'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner')
        },
        'ou': {
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')
        },
    }

    all_y_data = {
        'winner': {
            'raw': raw_x.join(y_wl, how='inner')['outcome']
        },
        'line': {
            'raw':
            raw_x.join(y_line, how='inner').join(x_line, how='inner')['line']
        },
        'ou': {
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou']
        },
    }

    raw_x = None
    x_score = None
    y_wl = None
    x_ou = None
    y_ou = None
    y_line = None
    x_line = None
    random.seed(86)

    for sort in ['keras']:
        print('... starting %s' % (sort))
        for kind in ['winner', 'ou', 'line']:
            results = pd.read_csv(
                os.path.join(output_folder, '%s_results.csv' % (kind)))
            results = results.set_index('idx')

            print('... starting %s' % (kind))
            X = all_x_data[kind]['raw']
            save_index = list(X.index)
            X = X.reset_index()
            X = X[saved_models.stored_models[kind]['raw'][sort]['features']]
            Y = all_y_data[kind]['raw']
            Y = Y.reset_index()
            if kind != 'winner':
                Y = Y[kind]
            else:
                Y = Y['outcome']

            print('...loading %s' % (kind))
            model = load_model(
                os.path.join(model_storage,
                             '%s_%s_regression_model.h5' % (kind, sort)))
            scale = joblib.load(
                os.path.join(model_storage,
                             '%s_%s_regression_scaler.pkl' % (kind, sort)))

            preds = model.predict(scale.transform(X))

            winner = []
            confidence = []
            for game in preds:
                if game[0] < .5:
                    winner.append(0)
                    confidence.append(1 - game[0])
                else:
                    winner.append(1)
                    confidence.append(game[0])

            model_outcome = pd.DataFrame()
            model_outcome['idx'] = save_index
            model_outcome['raw_keras_prediction'] = winner
            model_outcome['raw_keras_confidence'] = confidence
            model_outcome = model_outcome.set_index('idx')

            results = results.join(model_outcome, how='inner')

            results.to_csv(
                os.path.join(output_folder, '%s_results.csv' % (kind)))
            print('Finished %s' % (kind))
        print('Finished %s' % (sort))
    opponent_data['idx'] = idx
    opponent_data = opponent_data.set_index('idx')
    opponent_data *= -1
    opponent_data = opponent_data.rename(
        columns={i: '-' + i
                 for i in list(opponent_data)})
    data = opponent_data.join(team_data)
    data = data.join(y_data, how='inner')
    data = data.replace([np.inf, -np.inf], np.nan)
    data = data.replace('NULL', np.nan)
    data = data.dropna(how='any')
    return data


raw_x = raw_data()
x_score = pull_data.score(update_dbs.mysql_client())
y_wl = pull_data.pull_wl(update_dbs.mysql_client())
x_ou = pull_data.ou_preds(update_dbs.mysql_client())
y_ou = pull_data.ou_wl(update_dbs.mysql_client())
y_line = pull_data.line_wl(update_dbs.mysql_client())
x_line = pull_data.line_preds(update_dbs.mysql_client())

all_x_data = {
    'winner': {
        '+pts': x_score.join(y_wl, how='inner'),
        'raw': raw_x.join(y_wl, how='inner'),
    },
    'line': {
        '+pts': x_score.join(y_line, how='inner').join(x_line, how='inner'),
        'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'),
    },
Beispiel #3
0
def save():
    train_index = pull_data.pull_train_index(update_dbs.mysql_client())
    random.seed(86)
    random.shuffle(train_index)

    def hfa_patch(x, cnx):
        print('Running HFA Patch')
        keep_stats = []
        patch_stats = []
        for stat in list(x):
            try:
                stat.split('_HAspread_')[1]
                patch_stats.append(stat)
            except IndexError:
                keep_stats.append(stat)

        patch_data = x[patch_stats]
        keep_data = x[keep_stats]
        cursor = cnx.cursor()
        query = 'Select oddsdate, favorite, underdog, homeaway from oddsdata;'
        cursor.execute(query)
        patch = pd.DataFrame(cursor.fetchall(),
                             columns=['date', 't1', 't2', 'location'])
        cursor.close()

        loc_adj = {}
        for d, t1, t2, l in np.array(patch):
            if l == 0:
                loc_adj[str(d) + t1.replace(' ', '_')] = 1
                loc_adj[str(d) + t2.replace(' ', '_')] = -1
            else:
                loc_adj[str(d) + t1.replace(' ', '_')] = -1
                loc_adj[str(d) + t2.replace(' ', '_')] = 1
        patch = None

        patch_data = patch_data.join(pd.DataFrame.from_dict(
            list(loc_adj.items())).set_index(0),
                                     how='left')
        away_data = patch_data[patch_data[1] == -1]
        away_data *= -1
        home_data = patch_data[patch_data[1] == 1]
        patch_data = home_data.append(away_data)
        del patch_data[1]
        x = patch_data.join(keep_data)
        print('Completed HFA Patch')
        return x

    def raw_data():
        def_data = pull_data.pull_model_features('pts_scored',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_scored',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_scored', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_scored', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        x_data = x_data.join(tar_data, how='inner')
        train_index = pull_data.pull_train_index(update_dbs.mysql_client())
        x_data = x_data.loc[x_data.index.isin(train_index)]
        y_data = pull_data.pull_pts('offensive', update_dbs.mysql_client())
        team_data = x_data.join(y_data, how='inner')[list(x_data)]
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        def_data = pull_data.pull_model_features('pts_allowed',
                                                 'defensive_stats',
                                                 update_dbs.mongodb_client)
        def_data = hfa_patch(def_data, update_dbs.mysql_client())
        off_data = pull_data.pull_model_features('pts_allowed',
                                                 'offensive_stats',
                                                 update_dbs.mongodb_client)
        off_feats = [i for i in list(off_data) if i not in list(def_data)]
        off_data = off_data[off_feats]
        off_data = hfa_patch(off_data, update_dbs.mysql_client())
        poss_data = pull_data.pull_model_features('pts_allowed', 'possessions',
                                                  update_dbs.mongodb_client)
        poss_data = hfa_patch(poss_data, update_dbs.mysql_client())
        tar_data = pull_data.pull_model_features('pts_allowed', 'target',
                                                 update_dbs.mongodb_client)
        tar_data = hfa_patch(tar_data, update_dbs.mysql_client())
        x_data = def_data.join(off_data, how='inner')
        x_data = x_data.join(poss_data, how='inner')
        opponent_data = x_data.join(tar_data, how='inner')
        def_data = None
        off_data = None
        poss_data = None
        tar_data = None

        cnx = update_dbs.mysql_client()
        cursor = cnx.cursor()
        query = 'SELECT * from gamedata;'
        cursor.execute(query)
        switch = pd.DataFrame(
            cursor.fetchall(),
            columns=['teamname', 'date', 'opponent', 'location'])
        idx_switch = {}
        for t, d, o, l in np.array(switch):
            idx_switch[str(d) +
                       t.replace(' ', '_')] = str(d) + o.replace(' ', '_')
        idx = []
        for idxx in opponent_data.index:
            idx.append(idx_switch[idxx])
        opponent_data['idx'] = idx
        opponent_data = opponent_data.set_index('idx')
        opponent_data *= -1
        opponent_data = opponent_data.rename(
            columns={i: '-' + i
                     for i in list(opponent_data)})
        data = opponent_data.join(team_data)
        data = data.join(y_data, how='inner')
        data = data.replace([np.inf, -np.inf], np.nan)
        data = data.replace('NULL', np.nan)
        data = data.dropna(how='any')
        return data

    raw_x = raw_data()
    x_score = pull_data.score(update_dbs.mysql_client())
    y_wl = pull_data.pull_wl(update_dbs.mysql_client())
    x_ou = pull_data.ou_preds(update_dbs.mysql_client())
    y_ou = pull_data.ou_wl(update_dbs.mysql_client())
    y_line = pull_data.line_wl(update_dbs.mysql_client())
    x_line = pull_data.line_preds(update_dbs.mysql_client())

    all_x_data = {
        'winner': {
            '+pts': x_score.join(y_wl, how='inner'),
            'raw': raw_x.join(y_wl, how='inner'),
        },
        'line': {
            '+pts': x_score.join(y_line, how='inner').join(x_line,
                                                           how='inner'),
            'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'),
        },
        'ou': {
            '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner'),
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner'),
        },
    }

    all_y_data = {
        'winner': {
            '+pts': x_score.join(y_wl, how='inner')['outcome'],
            'raw': raw_x.join(y_wl, how='inner')['outcome'],
        },
        'line': {
            '+pts':
            x_score.join(y_line, how='inner').join(x_line,
                                                   how='inner')['line'],
            'raw':
            raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'],
        },
        'ou': {
            '+pts': x_score.join(y_ou, how='inner').join(x_ou,
                                                         how='inner')['ou'],
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'],
        },
    }

    raw_x = None
    x_score = None
    y_wl = None
    x_ou = None
    y_ou = None
    y_line = None
    x_line = None
    random.seed(86)

    for sort in ['ou', 'winner', 'line']:
        print('... starting %s' % (sort))
        for kind in ['raw', '+pts']:
            print('... starting %s' % (kind))
            for model_name, model_details in saved_models.stored_models[sort][
                    kind].items():
                if model_name == 'keras':
                    continue
                if not os.path.isfile(
                        os.path.join(
                            model_storage, '%s_%s_%s_model.pkl' %
                            (sort, kind, model_name))):
                    print('...storing %s' % (model_name))

                    model = model_details['model']
                    scale = model_details['scale']

                    scale.fit(
                        all_x_data[sort][kind][model_details['features']])
                    joblib.dump(
                        scale,
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_scaler.pkl' % (sort, kind, model_name)))

                    model.fit(
                        scale.transform(
                            all_x_data[sort][kind][model_details['features']]),
                        np.ravel(all_y_data[sort][kind]))
                    joblib.dump(
                        model,
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_model.pkl' % (sort, kind, model_name)))

                    print('Stored %s' % (model_name))
            print('Finished %s' % (kind))
        print('Finished %s' % (sort))
Beispiel #4
0
def sklearn_preds():
    raw_x = raw_data()
    x_score = pull_data.score(update_dbs.mysql_client())
    y_wl = pull_data.pull_wl(update_dbs.mysql_client())
    x_ou = pull_data.ou_preds(update_dbs.mysql_client())
    y_ou = pull_data.ou_wl(update_dbs.mysql_client())
    y_line = pull_data.line_wl(update_dbs.mysql_client())
    x_line = pull_data.line_preds(update_dbs.mysql_client())

    all_x_data = {
        'winner': {
            '+pts': x_score.join(y_wl, how='inner'),
            'raw': raw_x.join(y_wl, how='inner'),
        },
        'line': {
            '+pts': x_score.join(y_line, how='inner').join(x_line,
                                                           how='inner'),
            'raw': raw_x.join(y_line, how='inner').join(x_line, how='inner'),
        },
        'ou': {
            '+pts': x_score.join(y_ou, how='inner').join(x_ou, how='inner'),
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner'),
        },
    }

    all_y_data = {
        'winner': {
            '+pts': x_score.join(y_wl, how='inner')['outcome'],
            'raw': raw_x.join(y_wl, how='inner')['outcome'],
        },
        'line': {
            '+pts':
            x_score.join(y_line, how='inner').join(x_line,
                                                   how='inner')['line'],
            'raw':
            raw_x.join(y_line, how='inner').join(x_line, how='inner')['line'],
        },
        'ou': {
            '+pts': x_score.join(y_ou, how='inner').join(x_ou,
                                                         how='inner')['ou'],
            'raw': raw_x.join(y_ou, how='inner').join(x_ou, how='inner')['ou'],
        },
    }

    raw_x = None
    x_score = None
    y_wl = None
    x_ou = None
    y_ou = None
    y_line = None
    x_line = None
    random.seed(86)
    for sort in ['ou', 'winner', 'line']:
        outcomes = pd.DataFrame()
        #        outcomes[sort] = np.ravel(all_y_data[sort]['raw'])
        outcomes['idx'] = list(all_y_data[sort]['raw'].index)
        outcomes = outcomes.set_index('idx')

        print('... starting %s' % (sort))
        for kind in ['raw', '+pts']:
            print('... starting %s' % (kind))
            for model_name, model_details in saved_models.stored_models[sort][
                    kind].items():
                if model_name == 'keras':
                    continue

                if os.path.isfile(
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_model.pkl' % (sort, kind, model_name))):
                    print('Evaluating %s ' % (model_name))

                    model = joblib.load(
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_model.pkl' % (sort, kind, model_name)))
                    scale = joblib.load(
                        os.path.join(
                            model_storage,
                            '%s_%s_%s_scaler.pkl' % (sort, kind, model_name)))

                    preds = model.predict_proba(
                        scale.transform(
                            all_x_data[sort][kind][model_details['features']]))
                    model_outcome = pd.DataFrame()
                    winner = []
                    confidence = []
                    for game in preds:
                        if game[0] > game[1]:
                            winner.append(0)
                            confidence.append(game[0])
                        else:
                            winner.append(1)
                            confidence.append(game[1])

#                    print('Accuracy: %s' % (accuracy_score(np.ravel(all_y_data[sort][kind]), winner)))
#                    print('Log Loss: %s' % (log_loss(np.ravel(all_y_data[sort][kind]), preds)))

                    model_outcome['idx'] = list(all_x_data[sort][kind][
                        model_details['features']].index)
                    model_outcome['%s_%s_prediction' %
                                  (kind, model_name)] = winner
                    model_outcome['%s_%s_confidence' %
                                  (kind, model_name)] = confidence
                    model_outcome = model_outcome.set_index('idx')

                    outcomes = outcomes.join(model_outcome, how='inner')
            print('Finished %s' % (kind))
        print('Finished %s' % (sort))
        outcomes.to_csv(os.path.join(output_folder, '%s_results.csv' % (sort)))
import linsvc_tuning
import knn_tuning
import feature_lists
import rbfsvc_tuning
import polysvc_tuning
import random

train_index = pull_data.pull_train_index(update_dbs.mysql_client())
#cnx = update_dbs.mysql_client()
random.seed(86)
random.shuffle(train_index)
derived_data = {}

x_vals = 'points'
y_val = '+pts'
x_data_stable = pull_data.score(update_dbs.mysql_client())
x_cols = list(x_data_stable)
x_cols.remove('+pts')
x_cols.remove('+possessions')
x_cols.remove('-possessions')
y_data_stable = pull_data.pull_wl(update_dbs.mysql_client())
alldata = y_data_stable.join(x_data_stable, how = 'inner')
y_data = alldata['outcome']

#x_data = x_data_stable.join(y_data_stable, how = 'inner')[x_cols]   
#result = lgclass_tuning.execute(y_val, x_vals, X_data = x_data, Y_data = y_data)
#print("Best %s %s score: %s" % (x_vals, y_val, result)) 

x_data = x_data_stable[x_cols]   
result = knn_tuning.execute(y_val, x_vals, X_data = x_data, Y_data = y_data)
print("Best %s %s score: %s" % (x_vals, y_val, result))