def update_league_data(league_df, n_prev_match): logger.info('> Updating league data') league_name = list(league_df['league'].unique())[0] assert league_name in K.LEAGUE_NAMES, f'Update League Data: Wrong League Name >> {league_name} provided' for season_i, path in enumerate(K.get_league_csv_paths(league_name)): season_df = extract_season_data(path, season_i, league_name) #---------CHECK LAST DATE---------- last_date = pd.to_datetime(league_df.iloc[-1]['Date']) date = season_df.iloc[-1]['Date'] if (date > last_date): update_df = pd.DataFrame() update_df = update_df.append(season_df, sort=False)\ .reset_index(drop=True) update_df = feature_engineering_league(update_df, n_prev_match) update_df = update_df[update_df['Date'] > last_date] league_df = league_df.append(update_df).reset_index(drop=True) #---------------------------------- league_df['Date'] = pd.to_datetime(league_df['Date']) return league_df
def save_all_params(save_dir, league_params, data_params, model_params, production_params=None): ensure_folder(save_dir) filename = '1.league_params' filepath = f'{save_dir}{filename}' save_params(league_params, filepath, format='json') filename = '2.data_params' filepath = f'{save_dir}{filename}' save_params(data_params, filepath, format='json') filename = '3.model_params' filepath = f'{save_dir}{filename}' save_params(model_params, filepath, format='json') if (production_params is not None): filename = '4.production_params' filepath = f'{save_dir}{filename}' save_params(model_params, filepath, format='json') logger.info(f'> Saving PARAMS at {save_dir}') return
def save_params(params, filepath, format='json', verbose=True): if (format == 'json'): filepath += '.json' params = _json_item_to_str(params) save_json(params, filepath) elif (format == 'str'): str_params = str(params).replace(',', ',\n') filename = filepath.split('/')[-1] content = f'\n> {filename}\n\n' content += str_params filepath += '.txt' save_str_file(content, filepath) else: raise ValueError(f'Format is not recognized: provided --> {format}') if verbose: logger.info(f' > Saving params at {filepath}\n') return
def wrapper(*args, **kwargs): t1 = time() logger.info(f'> {func.__name__} starts') result = func(*args, **kwargs) t2 = time() exec_time = spent_time(t1, t2) logger.info(f'> {func.__name__} executed in {exec_time}') return result
def save_soccer_model(model): # folder = f'{model.save_dir}{model.name}/' filename = f'{model.name}.pth' folder = model.save_dir filepath = f'{folder}{filename}' ensure_folder(folder) save_model(model, filepath) if (model.verbose): logger.info(f'> Saving checkpoint epoch {model.epoch} at {folder}') return filepath
def load_configs(league_name): production_paths = load_production_paths() try: model_path = production_paths[league_name]['model_path'] data_config_path = production_paths[league_name]['data_params'] league_config_path = production_paths[league_name]['league_params'] model_config_path = production_paths[league_name]['model_params'] feat_eng_path = production_paths[league_name]['feat_eng'] model = load_model(model_path) logger.info(f'> Loading Model: {model_path}') model_config = load_json(model_config_path) data_config = load_json(data_config_path) league_config = load_json(league_config_path) logger.info(f'> Loading Params') feat_eng = load_object(feat_eng_path) logger.info(f'> Loading Feature Engineering object\n\n') config = { 'data': data_config, 'league': league_config, 'feat_eng': feat_eng, 'model': model_config } except Exception as error: model, config = None, None logger.info(f'Loading Model: {league_name} not found: {error}') return model, config
def save_model_paths_production(league_name, model_dir, model_name): production_paths = load_production_paths() model_dir = f'{model_dir}' league_params_path = f'{model_dir}1.league_params.json' data_params_path = f'{model_dir}2.data_params.json' model_params_path = f'{model_dir}3.model_params.json' model_path = f'{model_dir}{model_name}.pth' feat_eng_path = f'{model_dir}feat_eng' production_paths[league_name] = { 'model_dir': model_dir, 'model_params': model_params_path, 'league_params': league_params_path, 'data_params': data_params_path, 'model_path': model_path, 'feat_eng': feat_eng_path } save_path = f'{PRODUCTION_DIR}production_paths.json' save_json(production_paths, save_path) logger.info('---------------------------------------------------------') logger.info(f'\n\nUpdating production paths: {league_name.upper()}\n') logger.info(f'New Path : {model_dir}') logger.info('---------------------------------------------------------')
def save_simulation_details(sim_result, params, folder_dir): field = params['field'] thr = params['thr'] filter_bet = params['filter_bet'] # filename = f'5.simulations_details_{field}.txt' filename = f'5.simulations_details_{field}_thr={thr}_filter={filter_bet}.json' filepath = f'{folder_dir}{filename}' if (params['verbose']): logger.info(f' > Saving training details at {filepath}') save_json(sim_result, filepath) return
def feature_engineering_league(league_df, n_prev_match): logger.info('\t\t\t > Feature Engineering for the league') # league_df = league_df.set_index('match_n') league_df = league_df.rename( columns={ 'B365H': 'bet_1', 'B365D': 'bet_X', 'B365A': 'bet_2', 'FTR': 'result_1X2', 'FTHG': 'home_goals', 'FTAG': 'away_goals' }) league_df.loc[league_df['result_1X2'] == 'H', ['result_1X2']] = '1' league_df.loc[league_df['result_1X2'] == 'D', ['result_1X2']] = 'X' league_df.loc[league_df['result_1X2'] == 'A', ['result_1X2']] = '2' league_df.loc[league_df['result_1X2'] == '1', 'home_points'] = 3 league_df.loc[league_df['result_1X2'] == '1', 'away_points'] = 0 league_df.loc[league_df['result_1X2'] == 'X', 'home_points'] = 1 league_df.loc[league_df['result_1X2'] == 'X', 'away_points'] = 1 league_df.loc[league_df['result_1X2'] == '2', 'home_points'] = 0 league_df.loc[league_df['result_1X2'] == '2', 'away_points'] = 3 league_df = league_df[[ 'league', 'season', 'Date', 'HomeTeam', 'AwayTeam', 'home_goals', 'away_goals', 'result_1X2', 'bet_1', 'bet_X', 'bet_2', 'home_points', 'away_points' ]] league_df = league_df.dropna() league_df = league_df.reset_index(drop=True) league_df = creating_features(league_df) league_df = bind_last_matches(league_df, n_prev_match) # league_df = _bind_trend_last_previous_match(league_df, n_prev_match) return league_df
def update_league(league_name): npm = request.args['npm'] assert int(npm) > 0, 'NPM must be greater than 0' league_params = {} response_league_name = check_league(league_name) response_league_data = check_data_league( league_name, npm) if response_league_name['check'] else { 'check': False, 'msg': '' } msg = f"League_name: {response_league_name['msg']} \nLeague_data: {response_league_data['msg']}" if not response_league_name['check'] or not response_league_data['check']: response = make_response(msg, 404) else: league_params['league_name'] = league_name league_params['n_prev_match'] = npm league_params['league_dir'] = DATA_DIR response = update_data_league(league_params) if response['check']: succ_msg = f'Successful Update: {league_name} - npm={npm}' logger.info(succ_msg) response = make_response( f'Successful Update: {league_name} -> npm = {npm}', 200) else: fail_msg = f'Failed Update: {league_name} - npm={npm} \n {response["msg"]}' logger.error(fail_msg) response = make_response( f'Failed Update: {league_name} -> npm = {npm} \n {response["msg"]}', 400) # UPDATE MATCHES LEAGUE Data_Api().write_league_matches(league_name, DATA_DIR) return response
def update_input_data(league_df, input_data, n_prev_match): logger.info(f'> Updating league data preprocessed: data ') last_date = pd.to_datetime(league_df.iloc[-1]['Date']) date_home = pd.to_datetime(input_data['home'].iloc[-1]['date']) date_away = pd.to_datetime(input_data['away'].iloc[-1]['date']) assert date_home == date_away date = date_home if (date < last_date): data = league_df[league_df['Date'] > date] update_data = _split_teams(data, n_prev_match) input_data['home'] = input_data['home'].append( update_data['home']).reset_index(drop=True) input_data['away'] = input_data['away'].append( update_data['away']).reset_index(drop=True) return input_data
def _load_model_config(paths): model_path = paths['model'] model_config_path = paths['model_params'] league_config_path = paths['league_params'] data_config_path = paths['data_params'] feat_eng_path = paths['feat_eng'] model = load_model(model_path) logger.info(f'\n> Loading Model: {model_path}') model_config = load_json(model_config_path) data_config = load_json(data_config_path) league_config = load_json(league_config_path) logger.info(f'\n> Loading Params') feat_eng = load_object(feat_eng_path) logger.info(f'\n> Loading Feature Engineering object') config = { 'data': data_config, 'league': league_config, 'feat_eng': feat_eng, 'model': model_config } return model, config
def predict(self, input_data, field): model_name = str(field).lower() assert model_name == 'home' or model_name == 'away', 'ERROR - model predict: WRONG model name. Give "home" or "away"' preds = [] for i_fold in range(self.n_folds): if (model_name == 'home'): logger.info('> Calling Home Network') field_model = self.fold_model[i_fold].home_network elif (model_name == 'away'): logger.info('> Calling Away Network') field_model = self.fold_model[i_fold].away_network else: raise ValueError('Model - predict: Wrong model name') logger.info('> Prediction') with torch.no_grad(): for x in input_data: x = torch.Tensor(x).to(self.device) out = field_model(x) out = out.squeeze() preds.append(out.item()) return np.mean(preds, axis=0)
def update_all_leagues(): """ Request Args: - npm: int """ npm = request.args['npm'] assert int(npm) > 0, 'NPM must be greater than 0' response = {} for league_name in LEAGUE.LEAGUE_NAMES: response_league_name = check_league(league_name) response_league_data = check_data_league( league_name, npm) if response_league_name['check'] else { 'check': False, 'msg': '' } msg = f"League_name: {response_league_name['msg']} \nLeague_data: {response_league_data['msg']}" if not response_league_name['check'] or not response_league_data[ 'check']: response[league_name] = msg else: logger.info('--------------------------------------') logger.info(f'\nUpdating {league_name.upper()}\n') league_params = {} league_params['league_name'] = league_name league_params['n_prev_match'] = npm league_params['league_dir'] = DATA_DIR update_response = update_data_league(league_params) if update_response['check']: succ_msg = f'Successful Update: {league_name} - npm={npm}' logger.info(succ_msg) response[ league_name] = f'Successful Update: {league_name} -> npm = {npm}' else: fail_msg = f'Failed Update: {league_name} - npm={npm} \n {update_response["msg"]}' logger.error(fail_msg) response[ league_name] = f'Failed Update: {league_name} -> npm = {npm} : {fail_msg}' return make_response(response)
def extract_data_league(self): league_name = self.params['league_name'] n_prev_match = int(self.params['n_prev_match']) train = str2bool(self.params['train']) test_size = int(self.params['test_size']) league_dir = self.params['league_dir'] update = self.params['update'] logger.info(f'> Extracting {league_name} data: train={train}') if (train): # LOADING TRAINING DATA --> ALL DATA SEASON league_path = f'{league_dir}{league_name}/{league_name}_npm={n_prev_match}.csv' \ if league_dir is not None else None # LEAGUE CSV ALREADY EXISTING if (league_path is not None and exists(league_path)): league_df = pd.read_csv(league_path, index_col=0) league_df = update_league_data( league_df, n_prev_match) if update else league_df logger.info('> Updating league data') league_df.to_csv(league_path) # GENERATING LEAGUE CSV else: league_df = extract_training_data(league_name, n_prev_match) logger.info(f'Saving data at {league_path}') league_df.to_csv(league_path) else: # LOADING JUST THE LAST SEASON league_path = f'{league_dir}{league_name}/{league_name}_npm={n_prev_match}.csv' \ if league_dir is not None else None assert league_path is not None league_df = pd.read_csv(league_path, index_col=0).iloc[-test_size:] # league_df = extract_test_data(league_name, n_prev_match, test_size) return league_df
def production_training(): """ Requested Args: - epochs - patience Requested Params: dict{'league': LEAGUE_PARAMS, 'data': DATA_PARAMS, 'model': MODEL_PARAMS 'production': PRODUCTION_PARAMS} LEAGUE_PARAMS: dict { "league_name": "serie_a", "n_prev_match": int, "league_dir": DATA_DIR, "train": bool } DATA_PARAMS: dict{ "normalize": bool, "window_size": int, "dataset": ["base" | "windowed"], "batch_size": int, "split_size": float, "test_size": int, "version": [1 | 2], "league_dir": DATA_DIR, "train": bool } MODEL_PARAMS: dict{ "dataset": ["base" | "windowed"], "version": [1 | 2], "out_lstm": int, "n_lstm_layer": int, "bidirectional": bool, "kernel": int, "padding": int, "conv_layers": int, "optimizer": "adam", "lr": float, "loss": "bce", "seed": int, "device": "gpu", "verbose": bool, "plot": bool, "static_dir": STATIC_DIR } Returns: json_response: dict{'model_dir': str, 'model_name': str, 'losses': list, 'mean loss': float } """ params = request.json args = request.args check_args = check_training_args(args) check_params = check_training_params(params) if (not check_args['check'] or not check_args['check']): msg = f'> Args: {check_args["msg"]} \n> Params: {check_params["msg"]}' logger.error(msg) return make_response(msg, 400) else: epochs, patience = args['epochs'], args['patience'] league_name = params['league']['league_name'] logger.info(f'> Training {league_name.upper()}\n') # TRAINING FOR CATCHING OPTIMAL TRAIN LOSS params['data']['test_size'] = 0 params['data']['split_size'] = 0 print(f'\n\n>>> Params: \n{params}\n\n') model_response = training_snippet(epochs, patience, params, production=True) print(model_response) optimal_train_loss = model_response['losses']['train'] if model_response['mean_loss'] is None else \ model_response['mean_loss']['train'] # TRAINING FOR PRODUCTION SETTINGS params['data']['split_size'] = 1 params['model']['stop_loss'] = optimal_train_loss print(f'\n\n>>> Params: \n{params}\n\n') model_response = training_snippet(epochs, patience, params, production=True) model_dir, model_name = model_response['model_dir'], model_response[ 'model_name'] save_model_paths_production(league_name, model_dir, model_name) return make_response(model_response, 200)
def ensure_folder(folder): if (exists(folder) == False): logger.info(f'\n> Creating folder at {folder}\n') os.makedirs(folder) return
def multiple_training(): """ Requested Args: - epochs - patience Requested Params: dict{'league': LEAGUE_PARAMS, 'data': DATA_PARAMS, 'model': MODEL_PARAMS 'production': PRODUCTION_PARAMS, 'multi_training': MULTI_TRAINING_PARAMS} LEAGUE_PARAMS: dict{} DATA_PARAMS: dict{} MODEL_PARAMS: dict{} PRODUCTION_PARAMS: dict{'production': bool, 'stop_loss': float} MULTI_TRAINING_PARAMS: dict{'param': str, 'values': list} Returns: json_response: dict{'model_dir': str, 'model_name': str, 'losses': list, 'mean loss': float } """ params = request.json args = request.args check_args = check_training_args(args) check_params = check_training_params(params) if (not check_args['check'] or not check_args['check']): msg = f'> Args: {check_args["msg"]} \n> Params: {check_params["msg"]}' logger.error(msg) response = make_response(msg, 400) else: league_params, data_params, model_params = params['league'], params[ 'data'], params['model'] production_params = params.get('production') model_params = { **model_params, **production_params } if production_params is not None else model_params data_params = { **data_params, **production_params } if production_params is not None else data_params epochs, patience = args['epochs'], args['patience'] # MULTI TRAINING SETUP multi_training_params = params['multi_training'] param_name = multi_training_params['param'] param_values = multi_training_params['values'] for value in param_values: if (param_name in data_params.keys()): data_params[param_name] = value elif (param_name in model_params.keys()): model_params[param_name] = value else: raise ValueError( f'Multi-Training: Wrong param name >> {param_name} <<') logger.info( f'\n>>> Multi training on {param_name.upper()}: {value} \n') response = {param_name: {}} init_env_paths(model_params['version']) model_name, model_dir = model_directory(league_params, data_params, model_params) model_params['name'] = model_name model_params['save_dir'] = model_dir data_params['save_dir'] = model_dir # SAVING PARAMS save_all_params(model_dir, league_params, data_params, model_params, production_params) try: # EXTRACTION DATA LEAGUE params = {**league_params, **data_params} params = check_data_params(params) league_csv, input_data = extract_data_league(params) # DATALOADER GENERATION dataset_params = check_dataset_params(data_params) dataloader, feat_eng, in_features = generate_dataset( input_data, dataset_params) # NETWORK INITIALIZATION network = init_network(in_features, model_params) Model = init_model(data_params['dataset']) soccer_model = Model(network, model_params, dataloader) soccer_model.train(epochs, patience) losses, mean_loss = soccer_model.get_losses() model_response = { 'model_dir': model_dir, 'model_name': model_name, 'losses': losses, 'mean_loss': mean_loss } params = {**league_params, **data_params} # if(str2bool(model_params.get('production'))): # soccer_model = production_training(dataset_params, model_params) if (params['test_size'] > 0): for field in [HOME, AWAY]: params['field'] = field testset, pred, true = real_case_inference( soccer_model, params, feat_eng) thr_result, thr_dict, _ = thr_analysis( true, pred, params) json_response = {**model_response, **thr_dict} simulation_params = check_simulation_params(params) result_df = strategy_stats(testset, pred, true, simulation_params) except Exception as error: response = make_response({'msg': f'General Error: {error}'}, 400) return response except KeyboardInterrupt: return make_response({'Interrupt': 'Keyboard Interrupt'}) response[param_name][str(value)] = model_response return make_response(response, 200)
def training(): """ Requested Args: - epochs - patience - simulation - stats Requested Params: dict{'league': LEAGUE_PARAMS, 'data': DATA_PARAMS, 'model': MODEL_PARAMS 'production': PRODUCTION_PARAMS} LEAGUE_PARAMS: dict{} DATA_PARAMS: dict{} MODEL_PARAMS: dict{} PRODUCTION_PARAMS: dict{'active': bool 'phase': eval / final, 'stop_loss': float} Returns: json_response: dict{'model_dir': str, 'model_name': str, 'losses': list, 'mean loss': float } """ params = request.json args = request.args check_args = check_training_args(args) check_params = check_training_params(params) if (not check_args['check'] or not check_args['check']): msg = f'> Args: {check_args["msg"]} \n> Params: {check_params["msg"]}' logger.error(msg) response = make_response(msg, 400) else: epochs, patience = args['epochs'], args['patience'] league_name = params['league']['league_name'] logger.info(f'> Training {league_name.upper()}\n') production = str2bool(params.get('production').get('production')) model_response, model_config = training_snippet( epochs, patience, params, production) # SIMULATION AND STATISTICS stats_option, simulation_option = str2bool( args.get('stats')), str2bool(args.get('simulation')) sim_params = {**params['league'], **params['data']} feat_eng = model_config['feat_eng'] model = model_config['model'] test_size = sim_params['test_size'] if (stats_option and stats_option is not None): if (test_size > 0): stats_df = generate_strategy_stats(model, params, feat_eng) if (simulation_option and simulation_option is not None and test_size > 0): model_dir = model_response['model_dir'] sim_df = simulation_process(model, sim_params, feat_eng, save_dir=model_dir) if (production): model_dir = model_response['model_dir'] model_name = model_response['model_name'] save_model_paths_production(league_name, model_dir, model_name) response = make_response(model_response, 200) return response