import sqlite3
from sqlalchemy import create_engine
import time
time_sleep = 2

#list of the competition available on throme.ai (10/12/2017)
#competitions = ['English Championship','English Premier League','Italian Serie A','Spanish La Liga','NBA','NFL','NHL']
competitions = ['Italian Serie A']

#ThroneUser = "******"
#ThroneToken = "bcebc6d7-f224-4940-a816-74e3e6d4c34a" # Julien
ThroneUser = "******"
ThroneToken = "802f4123-4696-4b35-9e31-e8c32d91f658"  #Nico

# API throne.ai :
throne = peyton.Throne(username=ThroneUser, token=ThroneToken)


def ThroneData(df):
    df.reset_index()
    #df.drop('Unnamed: 0', 1, inplace=True)
    df['year'] = df.date.str.slice(0, 4)
    df['month'] = df.date.str.slice(5, 7)
    df['day'] = df.date.str.slice(8, 10)


# Get data to predict for each competitions. Is it possible to loop with the throttling limits?
#columns = Throne_historical_data.columns.tolist()
#print(columns)
i = 0
for compet in competitions:
def GetFrameWithFeatures():

    # get competition data via peyton
    throne = peyton.Throne(username='******',
                           token=os.environ['THRONE_TOKEN'])

    # Get historical data for a competition
    throne.competition('English Premier League').get_historical_data()
    df = throne.competition.historical_data

    # Get competition data for a competition
    throne.competition('English Premier League').get_competition_data()
    df_comp = throne.competition.competition_data

    #df = pd.read_csv('english_premier_league_historical_data.csv')
    #df_comp = pd.read_csv('english_premier_league_competition_data.csv')

    # push additional columns
    df['team_1_prob'] = np.nan
    df['team_2_prob'] = np.nan
    df['team_tie_prob'] = np.nan
    df['data_type'] = 'hist'

    df_comp['team_1_score'] = np.nan
    df_comp['team_2_score'] = np.nan
    df_comp['data_type'] = 'comp'

    # bring together dfs
    df = pd.concat([df, df_comp], axis=0)

    # sort by time.
    df = df.sort_values(['date'])

    # get team list
    teams = list(
        set(df['team_1_name'].unique().tolist())
        | set(df['team_2_name'].unique().tolist()))

    #tmp_team_frame

    team_frames = []

    # break out dataframe by team (manual group by for ability to do complex feature construction)
    for team in tqdm(teams):
        # build tmp frame for all times team_1 appears
        tmp_team_1 = df[df['team_1_name'] == team].copy()
        tmp_team_1 = tmp_team_1[[
            'id', 'date', 'team_1_score', 'team_2_score', 'team_2_name'
        ]]
        tmp_team_1.columns = [
            'id', 'date', 'team_score', 'opponent_score', 'opponent_name'
        ]
        tmp_team_1['team_type'] = 'team_name_1'
        # build tmp frame for all times team_1 appears
        tmp_team_2 = df[df['team_2_name'] == team].copy()
        tmp_team_2 = tmp_team_2[[
            'id', 'date', 'team_2_score', 'team_1_score', 'team_1_name'
        ]]
        tmp_team_2.columns = [
            'id', 'date', 'team_score', 'opponent_score', 'opponent_name'
        ]
        tmp_team_2['team_type'] = 'team_name_2'
        # concatenate these frames and append
        tmp_team_frame = pd.concat([tmp_team_1, tmp_team_2])
        tmp_team_frame['team_name'] = team

        if len(tmp_team_frame) > 20:
            # build features
            tmp_team_frame = build_rolling_features(tmp_team_frame)

            # break out dataframe by team (manual group by for ability to do complex feature construction)
            team_frames.append(tmp_team_frame)

    df['result'] = df[['team_1_score', 'team_2_score']].apply(
        lambda x: win_tie_loss(x['team_1_score'], x['team_2_score']), 1)

    team_frame = pd.concat(team_frames, axis=0)

    team_frame['result'] = team_frame[['team_score', 'opponent_score']].apply(
        lambda x: win_tie_loss(x['team_score'], x['opponent_score']), 1)

    # bring all team features together
    #team_frame[['id','match_count','opponent_last_score','opponent_score_ema_10','opponent_score_ma_10','team_last_score','team_score_ema_10','team_score_ma_10','team_win_index','team_loss_index','tie_index']]

    team_frame_1 = team_frame[[
        'id', 'team_name', 'match_count', 'opponent_last_score',
        'opponent_score_ema_10', 'opponent_score_ma_10', 'team_last_score',
        'team_score_ema_10', 'team_score_ma_10', 'team_win_index',
        'team_loss_index', 'tie_index'
    ]].copy()
    team_frame_1.columns = [
        'team_1_' + x if index > 1 else x
        for index, x in enumerate(team_frame_1.columns.tolist())
    ]

    team_frame_2 = team_frame[[
        'id', 'team_name', 'match_count', 'opponent_last_score',
        'opponent_score_ema_10', 'opponent_score_ma_10', 'team_last_score',
        'team_score_ema_10', 'team_score_ma_10', 'team_win_index',
        'team_loss_index', 'tie_index'
    ]].copy()
    team_frame_2.columns = [
        'team_2_' + x if index > 1 else x
        for index, x in enumerate(team_frame_2.columns.tolist())
    ]

    #team_frame

    #len(df)

    df = pd.merge(df,
                  team_frame_1,
                  left_on=['id', 'team_1_name'],
                  right_on=['id', 'team_name'])

    df = pd.merge(df,
                  team_frame_2,
                  left_on=['id', 'team_2_name'],
                  right_on=['id', 'team_name'])

    df['x_year'] = df['date'].apply(lambda x: year_fourier_components(x)[0])
    df['y_year'] = df['date'].apply(lambda x: year_fourier_components(x)[1])
    df['x_week'] = df['date'].apply(lambda x: week_fourier_components(x)[0])
    df['y_week'] = df['date'].apply(lambda x: week_fourier_components(x)[1])
    df['x_day'] = df['date'].apply(lambda x: day_fourier_components(x)[0])
    df['y_day'] = df['date'].apply(lambda x: day_fourier_components(x)[1])

    df

    df[[
        'id', 'data_type', 'result', 'team_1_score', 'team_2_score',
        'is_february', 'is_november', 'c_ability_3', 'd_ability_1',
        'd_ability_3', 'd_ability_4', 'd_form_4', 'd_h2h_2', 'team_1_name',
        'team_2_name', 'team_1_match_count', 'team_1_opponent_last_score',
        'team_1_opponent_score_ema_10', 'team_1_opponent_score_ma_10',
        'team_1_team_last_score', 'team_1_team_score_ema_10',
        'team_1_team_score_ma_10', 'team_1_team_win_index',
        'team_1_team_loss_index', 'team_1_tie_index', 'team_2_match_count',
        'team_2_opponent_last_score', 'team_2_opponent_score_ema_10',
        'team_2_opponent_score_ma_10', 'team_2_team_last_score',
        'team_2_team_score_ema_10', 'team_2_team_score_ma_10',
        'team_2_team_win_index', 'team_2_team_loss_index', 'team_2_tie_index',
        'x_year', 'y_year', 'x_week', 'y_week', 'x_day', 'y_day'
    ]].to_csv('epl_data_w_features.csv', index=False)

    return (df)
Beispiel #3
0
        'team_1_score_pred_lin', 'team_2_score_pred_lin',
        'team_1_score_pred_tree', 'team_2_score_pred_tree'
    ], comp_data_ens)

comp_data["confidence"] = 1.0

## subbmit comp data
# check competition results
submit_frame = comp_data[[
    'id', 'team_1_name', 'team_2_name', 'team_1_prob', 'confidence',
    'team_tie_prob', 'team_2_prob'
]]

submit_frame

# manual confidence lever settings.
#submit_frame.set_value(5219,'confidence', 0.5)
#submit_frame.set_value(5221,'confidence', 0.25)

#submit
throne = peyton.Throne(username='******',
                       token=os.environ['THRONE_TOKEN'])
throne.competition('English Premier League').submit(submit_frame)

obj = {'hist_data': hist_data, 'features_list': feature_list}

import pickle

with open('tmp.pkl', 'wb') as f:
    pickle.dump(obj, f)
import peyton

throne = peyton.Throne(username='******',
                       token="bcebc6d7-f224-4940-a816-74e3e6d4c34a")

# Get historical data for a competition
throne.competition('Italian Serie A').get_historical_data()
my_historical_data = throne.competition.historical_data

# Get competition data for a competition
throne.competition('Italian Serie A').get_competition_data()
my_competition_data = throne.competition.competition_data
print(my_competition_data)