def check_dataset_params(data_params): params_list = list(data_params.keys()) params = {} params[TRAIN_LABEL] = str2bool( data_params[TRAIN_LABEL]) if TRAIN_LABEL in params_list else -1 params[NORMALIZE_LABEL] = str2bool( data_params[NORMALIZE_LABEL] ) if NORMALIZE_LABEL in params_list else DEFAULT_NORMALIZE params[SAVE_DIR_LABEL] = data_params[ SAVE_DIR_LABEL] if SAVE_DIR_LABEL in params_list else DEFAULT_SAVE_DIR params[DATASET_LABEL] = data_params[ DATASET_LABEL] if DATASET_LABEL in params_list else DEFAULT_DATASET params[BATCH_SIZE_LABEL] = int( data_params[BATCH_SIZE_LABEL] ) if BATCH_SIZE_LABEL in params_list else DEFAULT_BATCH_SIZE params[TEST_SIZE_LABEL] = int( data_params[TEST_SIZE_LABEL] ) if TEST_SIZE_LABEL in params_list else DEFAULT_TEST_SIZE params[WINDOW_LABEL] = int( data_params[WINDOW_LABEL] ) if WINDOW_LABEL in params_list else DEFAULT_WINDOW params[VERSION_LABEL] = int( data_params[VERSION_LABEL] ) if VERSION_LABEL in params_list else DEFAULT_VERSION if (params[DATASET_LABEL] == WINDOWED_DATASET): params[EVAL_SIZE_LABEL] = int( data_params[EVAL_SIZE_LABEL] ) if EVAL_SIZE_LABEL in params_list else DEFAULT_EVAL_SIZE params[MAX_TRAIN_SIZE_LABEL] = int( data_params[MAX_TRAIN_SIZE_LABEL] ) if MAX_TRAIN_SIZE_LABEL in params_list else DEFAULT_MAX_TRAIN_SIZE params[N_SPLITS_LABEL] = int( data_params[N_SPLITS_LABEL] ) if N_SPLITS_LABEL in params_list else DEFAULT_N_SPLITS params[GAP_LABEL] = int(data_params[GAP_LABEL] ) if GAP_LABEL in params_list else DEFAULT_GAP else: params[SPLIT_SIZE_LABEL] = float( data_params[SPLIT_SIZE_LABEL] ) if SPLIT_SIZE_LABEL in params_list else DEFAULT_SPLIT_SIZE params[PRODUCTION_LABEL] = str2bool( data_params[PRODUCTION_LABEL] ) if PRODUCTION_LABEL in params_list else DEFAULT_PRODUCTION # params[PROD_PHASE_LABEL] = data_params[PROD_PHASE_LABEL] if PROD_PHASE_LABEL in params_list else DEFAULT_PROD_PHASE params[PLOT_LABEL] = str2bool( data_params[PLOT_LABEL]) if PLOT_LABEL in params_list else DEFAULT_PLOT if -1 in params.values(): raise ParameterError( f'Invalid or Missing Data Params:\n {data_params}') return params
def generate_dataset(input_data, params): train = str2bool(params['train']) normalize = str2bool(params['normalize']) home_data = input_data['home'] away_data = input_data['away'] save_dir = params['save_dir'] if (int(params['version']) == 1): home_feat_eng = Feature_engineering_v1(home_data, normalize=normalize, field=HOME) away_feat_eng = Feature_engineering_v1(away_data, normalize=normalize, field=AWAY) elif (int(params['version']) == 2): home_feat_eng = Feature_engineering_v2(home_data, normalize=normalize, field=HOME) away_feat_eng = Feature_engineering_v2(away_data, normalize=normalize, field=AWAY) elif (int(params['version']) == 3): home_feat_eng = Feature_engineering_v3(home_data, normalize=normalize, field=HOME) away_feat_eng = Feature_engineering_v3(away_data, normalize=normalize, field=AWAY) else: raise ValueError('---- Error version number ----') feat_eng = {'home': home_feat_eng, 'away': away_feat_eng} data = { 'home': home_feat_eng.transforms(home_data, train), 'away': away_feat_eng.transforms(away_data, train) } dataloader, in_features = create_training_dataloader(data, params) if (save_dir is not None): filepath = f'{save_dir}feat_eng' # logger.info(f' > Saving Feat.Eng object at {filepath}') save_object(feat_eng, filepath) return dataloader, feat_eng, in_features
def __init__(self, network, params, dataloader): self.name = params['name'] self.device = get_device_from_name(params['device']) self.model = network.to(self.device) self.trainloader = dataloader['train'] self.evalloader = dataloader['eval'] # self.testloader = dataloader['test'] if 'test' in list(dataloader.keys()) else None self.optimizer = get_optimizer_from_name(params['optimizer'])( self.model.parameters(), lr=params['lr']) self.loss_function = get_loss_from_name(params['loss']) self.epoch = 0 self.losses = {'train': [], 'eval': []} # Visualization self.plot_type = params['plot_type'] if 'plot_type' in list( params.keys()) else 'pyplot' self.plot_freq = params['plot_freq'] if 'plot_freq' in list( params.keys()) else None self.seed = params['seed'] self.save_dir = params['save_dir'] self.static_dir = params['static_dir'] if 'static_dir' in list( params.keys()) else None self.save = True self.verbose = str2bool(params['verbose']) if 'verbose' in list( params.keys()) else True self.plot = str2bool(params['plot']) if 'plot' in list( params.keys()) else True # REPRODUCIBILITY np.random.seed(self.seed) torch.manual_seed(self.seed) # EARLY STOPPING self.es_patience = params['es_patience'] if 'es_patience' in list( params.keys()) else 0.005 # PRODUCTION PARAMS self.production = False if (len(self.evalloader.dataset.x['home']) == 0 and len(self.evalloader.dataset.x['away']) == 0): self.production = str2bool(params['production']) self.stop_loss = params.get('stop_loss')
def model_directory(league_params, data_params, model_params, production=False): league_name = league_params['league_name'] dataset_type = data_params['dataset'] feat_eng_v = data_params['version'] network_v = model_params['version'] production = str2bool(model_params.get('production')) name = f'{dataset_type}_fe={feat_eng_v}_net={network_v}' timestamp = get_timestamp_string() ckp_model_path = PRODUCTION_DIR if production else os.environ[ 'CKP_MODEL_PATH'] if (production is not None and production): model_name = f'{league_name.upper()}_{timestamp}_{name}_PRODUCTION' else: model_name = f'{league_name.upper()}_{timestamp}_{name}' model_dir = f'{ckp_model_path}{league_name}/{model_name}/' return model_name, model_dir
def __init__(self, name, in_features, params): super(LSTM_Network, self).__init__() self.name = name self.dataset_type = params['dataset'] self.bidirectional = str2bool(params['bidirectional']) # self.window = params['window']-1 if params['window'] is not None else None torch.manual_seed(int(params['seed'])) out_features = params['out_lstm'] self.lstm = nn.LSTM(input_size=in_features, hidden_size=out_features, num_layers=params['n_lstm_layer'], bidirectional=self.bidirectional) in_features = out_features * 2 if self.bidirectional else in_features out_features = in_features // 2 self.dense = nn.Linear(in_features, out_features) self.dense_act = nn.ReLU() in_features = out_features out_features = 1 self.fc = nn.Linear(in_features, out_features) self.fc_act = nn.Sigmoid()
def check_data_params(data_params): params_list = list(data_params.keys()) params = {} params[LEAGUE_NAME_LABEL] = data_params[ LEAGUE_NAME_LABEL] if LEAGUE_NAME_LABEL in params_list else -1 params[N_PREV_MATCH_LABEL] = int( data_params[N_PREV_MATCH_LABEL] ) if N_PREV_MATCH_LABEL in params_list else -1 params[TRAIN_LABEL] = str2bool( data_params[TRAIN_LABEL]) if TRAIN_LABEL in params_list else -1 params[TEST_SIZE_LABEL] = int( data_params[TEST_SIZE_LABEL] ) if TEST_SIZE_LABEL in params_list else DEFAULT_TEST_SIZE params[LEAGUE_DIR_LABEL] = data_params[ LEAGUE_DIR_LABEL] if LEAGUE_DIR_LABEL in params_list else -1 params[UPDATE_LABEL] = data_params[ UPDATE_LABEL] if UPDATE_LABEL in params_list else DEFAULT_UPDATE params[PLOT_LABEL] = data_params[ PLOT_LABEL] if PLOT_LABEL in params_list else DEFAULT_PLOT if -1 in params.values(): raise ParameterError( f'Invalid or Missing Data Params:\n {data_params}') return params
def extract_data_league(self): league_name = self.params['league_name'] n_prev_match = int(self.params['n_prev_match']) train = str2bool(self.params['train']) test_size = int(self.params['test_size']) league_dir = self.params['league_dir'] update = self.params['update'] logger.info(f'> Extracting {league_name} data: train={train}') if (train): # LOADING TRAINING DATA --> ALL DATA SEASON league_path = f'{league_dir}{league_name}/{league_name}_npm={n_prev_match}.csv' \ if league_dir is not None else None # LEAGUE CSV ALREADY EXISTING if (league_path is not None and exists(league_path)): league_df = pd.read_csv(league_path, index_col=0) league_df = update_league_data( league_df, n_prev_match) if update else league_df logger.info('> Updating league data') league_df.to_csv(league_path) # GENERATING LEAGUE CSV else: league_df = extract_training_data(league_name, n_prev_match) logger.info(f'Saving data at {league_path}') league_df.to_csv(league_path) else: # LOADING JUST THE LAST SEASON league_path = f'{league_dir}{league_name}/{league_name}_npm={n_prev_match}.csv' \ if league_dir is not None else None assert league_path is not None league_df = pd.read_csv(league_path, index_col=0).iloc[-test_size:] # league_df = extract_test_data(league_name, n_prev_match, test_size) return league_df
def data_preprocessing(league_df, params): n_prev_match = int(params['n_prev_match']) train = str2bool(params['train']) test_size = int(params['test_size']) league_dir = params['league_dir'] league_name = params['league_name'] update = params['update'] data = league_df.copy(deep=True) input_data = {} prep_league_path = { x: f'{league_dir}{league_name}/prep_{x}_{league_name}_npm={n_prev_match}.csv' for x in ['home', 'away'] } if (league_dir is not None and exists(prep_league_path['home']) and exists(prep_league_path['away'])): for x in ['home', 'away']: input_data[x] = pd.read_csv(prep_league_path[x], index_col=0) input_data = update_input_data(data, input_data, n_prev_match) if update else input_data if (len(input_data) == 0): input_data = _split_teams(data, n_prev_match) if (train): input_data['home'].to_csv(prep_league_path['home']) input_data['away'].to_csv(prep_league_path['away']) else: input_data['home'] = input_data['home'].iloc[-test_size:] input_data['away'] = input_data['away'].iloc[-test_size:] return input_data
def check_simulation_params(sim_params): params_list = list(sim_params.keys()) params = {} params[TEST_SIZE_LABEL] = int( sim_params[TEST_SIZE_LABEL] ) if TEST_SIZE_LABEL in params_list else DEFAULT_TEST_SIZE params[THR_LABEL] = float( sim_params[THR_LABEL]) if THR_LABEL in params_list else DEFAULT_THR params[N_MATCHES_LABEL] = sim_params[ N_MATCHES_LABEL] if N_MATCHES_LABEL in params_list else -1 params[COMBO_LABEL] = sim_params[ COMBO_LABEL] if COMBO_LABEL in params_list else None params[COMBO_LIST_LABEL] = sim_params[ COMBO_LIST_LABEL] if COMBO_LIST_LABEL in params_list else DEFAULT_COMBO_LIST params[FILTER_BET_LABEL] = sim_params[ FILTER_BET_LABEL] if FILTER_BET_LABEL in params_list else DEFAULT_FILTER_BET params[MONEY_BET_LABEL] = sim_params[ MONEY_BET_LABEL] if MONEY_BET_LABEL in params_list else DEFAULT_MONEY_BET params[THR_LIST_LABEL] = sim_params[ THR_LIST_LABEL] if THR_LIST_LABEL in params_list else DEFAULT_THR_LIST params[FILTER_BET_LIST_LABEL] = sim_params[ FILTER_BET_LIST_LABEL] if FILTER_BET_LIST_LABEL in params_list else DEFAULT_FILTER_BET_LIST params[FIELD_LABEL] = sim_params[ FIELD_LABEL] if FIELD_LABEL in params_list else -1 params[SAVE_DIR_LABEL] = sim_params[ SAVE_DIR_LABEL] if SAVE_DIR_LABEL in params_list else DEFAULT_SAVE_DIR params[VERBOSE_LABEL] = str2bool( sim_params[VERBOSE_LABEL] ) if VERBOSE_LABEL in params_list else DEFAULT_VERBOSE if (params[FIELD_LABEL] == -1): raise ParameterError( f'Invalid or Missing Field Params:\n {params[FIELD_LABEL]}') return params
def __init__(self, name, in_features, params): super(LSTM_FCN_Network, self).__init__() self.name = name self.dataset_type = params['dataset'] # self.window = params['window'] - 1 if params['window'] is not None else None torch.manual_seed(int(params['seed'])) out_features = int(params['out_lstm']) self.lstm = nn.LSTM(input_size=in_features, hidden_size=out_features, num_layers=int(params['n_lstm_layer']), bidirectional=str2bool(params['bidirectional'])) in_features = out_features * 2 if params[ 'bidirectional'] else in_features kernel = int(params['kernel']) padding = int(params['padding']) n_conv_layers = int(params['conv_layers']) self.conv_layers = nn.Sequential() for i_layer in range(n_conv_layers - 1): out_features = in_features // 2 self.conv_layers.add_module( f'Conv-1d-{i_layer+1}', nn.Conv1d(in_features, out_features, kernel_size=kernel, padding=padding)) self.conv_layers.add_module(f'Relu-{i_layer}', nn.ReLU()) in_features = out_features out_features = 1 self.fc = nn.Conv1d(in_features, out_features, kernel_size=1) self.fc_act = nn.Sigmoid()
def training(): """ Requested Args: - epochs - patience - simulation - stats Requested Params: dict{'league': LEAGUE_PARAMS, 'data': DATA_PARAMS, 'model': MODEL_PARAMS 'production': PRODUCTION_PARAMS} LEAGUE_PARAMS: dict{} DATA_PARAMS: dict{} MODEL_PARAMS: dict{} PRODUCTION_PARAMS: dict{'active': bool 'phase': eval / final, 'stop_loss': float} Returns: json_response: dict{'model_dir': str, 'model_name': str, 'losses': list, 'mean loss': float } """ params = request.json args = request.args check_args = check_training_args(args) check_params = check_training_params(params) if (not check_args['check'] or not check_args['check']): msg = f'> Args: {check_args["msg"]} \n> Params: {check_params["msg"]}' logger.error(msg) response = make_response(msg, 400) else: epochs, patience = args['epochs'], args['patience'] league_name = params['league']['league_name'] logger.info(f'> Training {league_name.upper()}\n') production = str2bool(params.get('production').get('production')) model_response, model_config = training_snippet( epochs, patience, params, production) # SIMULATION AND STATISTICS stats_option, simulation_option = str2bool( args.get('stats')), str2bool(args.get('simulation')) sim_params = {**params['league'], **params['data']} feat_eng = model_config['feat_eng'] model = model_config['model'] test_size = sim_params['test_size'] if (stats_option and stats_option is not None): if (test_size > 0): stats_df = generate_strategy_stats(model, params, feat_eng) if (simulation_option and simulation_option is not None and test_size > 0): model_dir = model_response['model_dir'] sim_df = simulation_process(model, sim_params, feat_eng, save_dir=model_dir) if (production): model_dir = model_response['model_dir'] model_name = model_response['model_name'] save_model_paths_production(league_name, model_dir, model_name) response = make_response(model_response, 200) return response