import sqlite3 from sqlalchemy import create_engine import time time_sleep = 2 #list of the competition available on throme.ai (10/12/2017) #competitions = ['English Championship','English Premier League','Italian Serie A','Spanish La Liga','NBA','NFL','NHL'] competitions = ['Italian Serie A'] #ThroneUser = "******" #ThroneToken = "bcebc6d7-f224-4940-a816-74e3e6d4c34a" # Julien ThroneUser = "******" ThroneToken = "802f4123-4696-4b35-9e31-e8c32d91f658" #Nico # API throne.ai : throne = peyton.Throne(username=ThroneUser, token=ThroneToken) def ThroneData(df): df.reset_index() #df.drop('Unnamed: 0', 1, inplace=True) df['year'] = df.date.str.slice(0, 4) df['month'] = df.date.str.slice(5, 7) df['day'] = df.date.str.slice(8, 10) # Get data to predict for each competitions. Is it possible to loop with the throttling limits? #columns = Throne_historical_data.columns.tolist() #print(columns) i = 0 for compet in competitions:
def GetFrameWithFeatures(): # get competition data via peyton throne = peyton.Throne(username='******', token=os.environ['THRONE_TOKEN']) # Get historical data for a competition throne.competition('English Premier League').get_historical_data() df = throne.competition.historical_data # Get competition data for a competition throne.competition('English Premier League').get_competition_data() df_comp = throne.competition.competition_data #df = pd.read_csv('english_premier_league_historical_data.csv') #df_comp = pd.read_csv('english_premier_league_competition_data.csv') # push additional columns df['team_1_prob'] = np.nan df['team_2_prob'] = np.nan df['team_tie_prob'] = np.nan df['data_type'] = 'hist' df_comp['team_1_score'] = np.nan df_comp['team_2_score'] = np.nan df_comp['data_type'] = 'comp' # bring together dfs df = pd.concat([df, df_comp], axis=0) # sort by time. df = df.sort_values(['date']) # get team list teams = list( set(df['team_1_name'].unique().tolist()) | set(df['team_2_name'].unique().tolist())) #tmp_team_frame team_frames = [] # break out dataframe by team (manual group by for ability to do complex feature construction) for team in tqdm(teams): # build tmp frame for all times team_1 appears tmp_team_1 = df[df['team_1_name'] == team].copy() tmp_team_1 = tmp_team_1[[ 'id', 'date', 'team_1_score', 'team_2_score', 'team_2_name' ]] tmp_team_1.columns = [ 'id', 'date', 'team_score', 'opponent_score', 'opponent_name' ] tmp_team_1['team_type'] = 'team_name_1' # build tmp frame for all times team_1 appears tmp_team_2 = df[df['team_2_name'] == team].copy() tmp_team_2 = tmp_team_2[[ 'id', 'date', 'team_2_score', 'team_1_score', 'team_1_name' ]] tmp_team_2.columns = [ 'id', 'date', 'team_score', 'opponent_score', 'opponent_name' ] tmp_team_2['team_type'] = 'team_name_2' # concatenate these frames and append tmp_team_frame = pd.concat([tmp_team_1, tmp_team_2]) tmp_team_frame['team_name'] = team if len(tmp_team_frame) > 20: # build features tmp_team_frame = build_rolling_features(tmp_team_frame) # break out dataframe by team (manual group by for ability to do complex feature construction) team_frames.append(tmp_team_frame) df['result'] = df[['team_1_score', 'team_2_score']].apply( lambda x: win_tie_loss(x['team_1_score'], x['team_2_score']), 1) team_frame = pd.concat(team_frames, axis=0) team_frame['result'] = team_frame[['team_score', 'opponent_score']].apply( lambda x: win_tie_loss(x['team_score'], x['opponent_score']), 1) # bring all team features together #team_frame[['id','match_count','opponent_last_score','opponent_score_ema_10','opponent_score_ma_10','team_last_score','team_score_ema_10','team_score_ma_10','team_win_index','team_loss_index','tie_index']] team_frame_1 = team_frame[[ 'id', 'team_name', 'match_count', 'opponent_last_score', 'opponent_score_ema_10', 'opponent_score_ma_10', 'team_last_score', 'team_score_ema_10', 'team_score_ma_10', 'team_win_index', 'team_loss_index', 'tie_index' ]].copy() team_frame_1.columns = [ 'team_1_' + x if index > 1 else x for index, x in enumerate(team_frame_1.columns.tolist()) ] team_frame_2 = team_frame[[ 'id', 'team_name', 'match_count', 'opponent_last_score', 'opponent_score_ema_10', 'opponent_score_ma_10', 'team_last_score', 'team_score_ema_10', 'team_score_ma_10', 'team_win_index', 'team_loss_index', 'tie_index' ]].copy() team_frame_2.columns = [ 'team_2_' + x if index > 1 else x for index, x in enumerate(team_frame_2.columns.tolist()) ] #team_frame #len(df) df = pd.merge(df, team_frame_1, left_on=['id', 'team_1_name'], right_on=['id', 'team_name']) df = pd.merge(df, team_frame_2, left_on=['id', 'team_2_name'], right_on=['id', 'team_name']) df['x_year'] = df['date'].apply(lambda x: year_fourier_components(x)[0]) df['y_year'] = df['date'].apply(lambda x: year_fourier_components(x)[1]) df['x_week'] = df['date'].apply(lambda x: week_fourier_components(x)[0]) df['y_week'] = df['date'].apply(lambda x: week_fourier_components(x)[1]) df['x_day'] = df['date'].apply(lambda x: day_fourier_components(x)[0]) df['y_day'] = df['date'].apply(lambda x: day_fourier_components(x)[1]) df df[[ 'id', 'data_type', 'result', 'team_1_score', 'team_2_score', 'is_february', 'is_november', 'c_ability_3', 'd_ability_1', 'd_ability_3', 'd_ability_4', 'd_form_4', 'd_h2h_2', 'team_1_name', 'team_2_name', 'team_1_match_count', 'team_1_opponent_last_score', 'team_1_opponent_score_ema_10', 'team_1_opponent_score_ma_10', 'team_1_team_last_score', 'team_1_team_score_ema_10', 'team_1_team_score_ma_10', 'team_1_team_win_index', 'team_1_team_loss_index', 'team_1_tie_index', 'team_2_match_count', 'team_2_opponent_last_score', 'team_2_opponent_score_ema_10', 'team_2_opponent_score_ma_10', 'team_2_team_last_score', 'team_2_team_score_ema_10', 'team_2_team_score_ma_10', 'team_2_team_win_index', 'team_2_team_loss_index', 'team_2_tie_index', 'x_year', 'y_year', 'x_week', 'y_week', 'x_day', 'y_day' ]].to_csv('epl_data_w_features.csv', index=False) return (df)
'team_1_score_pred_lin', 'team_2_score_pred_lin', 'team_1_score_pred_tree', 'team_2_score_pred_tree' ], comp_data_ens) comp_data["confidence"] = 1.0 ## subbmit comp data # check competition results submit_frame = comp_data[[ 'id', 'team_1_name', 'team_2_name', 'team_1_prob', 'confidence', 'team_tie_prob', 'team_2_prob' ]] submit_frame # manual confidence lever settings. #submit_frame.set_value(5219,'confidence', 0.5) #submit_frame.set_value(5221,'confidence', 0.25) #submit throne = peyton.Throne(username='******', token=os.environ['THRONE_TOKEN']) throne.competition('English Premier League').submit(submit_frame) obj = {'hist_data': hist_data, 'features_list': feature_list} import pickle with open('tmp.pkl', 'wb') as f: pickle.dump(obj, f)
import peyton throne = peyton.Throne(username='******', token="bcebc6d7-f224-4940-a816-74e3e6d4c34a") # Get historical data for a competition throne.competition('Italian Serie A').get_historical_data() my_historical_data = throne.competition.historical_data # Get competition data for a competition throne.competition('Italian Serie A').get_competition_data() my_competition_data = throne.competition.competition_data print(my_competition_data)