Esempio n. 1
0
def check_dataset_params(data_params):
    params_list = list(data_params.keys())

    params = {}
    params[TRAIN_LABEL] = str2bool(
        data_params[TRAIN_LABEL]) if TRAIN_LABEL in params_list else -1
    params[NORMALIZE_LABEL] = str2bool(
        data_params[NORMALIZE_LABEL]
    ) if NORMALIZE_LABEL in params_list else DEFAULT_NORMALIZE
    params[SAVE_DIR_LABEL] = data_params[
        SAVE_DIR_LABEL] if SAVE_DIR_LABEL in params_list else DEFAULT_SAVE_DIR
    params[DATASET_LABEL] = data_params[
        DATASET_LABEL] if DATASET_LABEL in params_list else DEFAULT_DATASET
    params[BATCH_SIZE_LABEL] = int(
        data_params[BATCH_SIZE_LABEL]
    ) if BATCH_SIZE_LABEL in params_list else DEFAULT_BATCH_SIZE
    params[TEST_SIZE_LABEL] = int(
        data_params[TEST_SIZE_LABEL]
    ) if TEST_SIZE_LABEL in params_list else DEFAULT_TEST_SIZE
    params[WINDOW_LABEL] = int(
        data_params[WINDOW_LABEL]
    ) if WINDOW_LABEL in params_list else DEFAULT_WINDOW
    params[VERSION_LABEL] = int(
        data_params[VERSION_LABEL]
    ) if VERSION_LABEL in params_list else DEFAULT_VERSION

    if (params[DATASET_LABEL] == WINDOWED_DATASET):
        params[EVAL_SIZE_LABEL] = int(
            data_params[EVAL_SIZE_LABEL]
        ) if EVAL_SIZE_LABEL in params_list else DEFAULT_EVAL_SIZE
        params[MAX_TRAIN_SIZE_LABEL] = int(
            data_params[MAX_TRAIN_SIZE_LABEL]
        ) if MAX_TRAIN_SIZE_LABEL in params_list else DEFAULT_MAX_TRAIN_SIZE
        params[N_SPLITS_LABEL] = int(
            data_params[N_SPLITS_LABEL]
        ) if N_SPLITS_LABEL in params_list else DEFAULT_N_SPLITS
        params[GAP_LABEL] = int(data_params[GAP_LABEL]
                                ) if GAP_LABEL in params_list else DEFAULT_GAP
    else:
        params[SPLIT_SIZE_LABEL] = float(
            data_params[SPLIT_SIZE_LABEL]
        ) if SPLIT_SIZE_LABEL in params_list else DEFAULT_SPLIT_SIZE

    params[PRODUCTION_LABEL] = str2bool(
        data_params[PRODUCTION_LABEL]
    ) if PRODUCTION_LABEL in params_list else DEFAULT_PRODUCTION
    # params[PROD_PHASE_LABEL] = data_params[PROD_PHASE_LABEL] if PROD_PHASE_LABEL in params_list else DEFAULT_PROD_PHASE

    params[PLOT_LABEL] = str2bool(
        data_params[PLOT_LABEL]) if PLOT_LABEL in params_list else DEFAULT_PLOT

    if -1 in params.values():
        raise ParameterError(
            f'Invalid or Missing Data Params:\n {data_params}')

    return params
Esempio n. 2
0
def generate_dataset(input_data, params):

    train = str2bool(params['train'])
    normalize = str2bool(params['normalize'])
    home_data = input_data['home']
    away_data = input_data['away']
    save_dir = params['save_dir']

    if (int(params['version']) == 1):
        home_feat_eng = Feature_engineering_v1(home_data,
                                               normalize=normalize,
                                               field=HOME)

        away_feat_eng = Feature_engineering_v1(away_data,
                                               normalize=normalize,
                                               field=AWAY)

    elif (int(params['version']) == 2):
        home_feat_eng = Feature_engineering_v2(home_data,
                                               normalize=normalize,
                                               field=HOME)
        away_feat_eng = Feature_engineering_v2(away_data,
                                               normalize=normalize,
                                               field=AWAY)

    elif (int(params['version']) == 3):
        home_feat_eng = Feature_engineering_v3(home_data,
                                               normalize=normalize,
                                               field=HOME)
        away_feat_eng = Feature_engineering_v3(away_data,
                                               normalize=normalize,
                                               field=AWAY)

    else:
        raise ValueError('---- Error version number ----')

    feat_eng = {'home': home_feat_eng, 'away': away_feat_eng}

    data = {
        'home': home_feat_eng.transforms(home_data, train),
        'away': away_feat_eng.transforms(away_data, train)
    }

    dataloader, in_features = create_training_dataloader(data, params)

    if (save_dir is not None):
        filepath = f'{save_dir}feat_eng'
        # logger.info(f' > Saving Feat.Eng object at {filepath}')
        save_object(feat_eng, filepath)

    return dataloader, feat_eng, in_features
Esempio n. 3
0
    def __init__(self, network, params, dataloader):

        self.name = params['name']
        self.device = get_device_from_name(params['device'])
        self.model = network.to(self.device)
        self.trainloader = dataloader['train']
        self.evalloader = dataloader['eval']
        # self.testloader = dataloader['test'] if 'test' in list(dataloader.keys()) else None

        self.optimizer = get_optimizer_from_name(params['optimizer'])(
            self.model.parameters(), lr=params['lr'])
        self.loss_function = get_loss_from_name(params['loss'])

        self.epoch = 0
        self.losses = {'train': [], 'eval': []}

        # Visualization
        self.plot_type = params['plot_type'] if 'plot_type' in list(
            params.keys()) else 'pyplot'
        self.plot_freq = params['plot_freq'] if 'plot_freq' in list(
            params.keys()) else None

        self.seed = params['seed']

        self.save_dir = params['save_dir']
        self.static_dir = params['static_dir'] if 'static_dir' in list(
            params.keys()) else None
        self.save = True

        self.verbose = str2bool(params['verbose']) if 'verbose' in list(
            params.keys()) else True
        self.plot = str2bool(params['plot']) if 'plot' in list(
            params.keys()) else True

        # REPRODUCIBILITY
        np.random.seed(self.seed)
        torch.manual_seed(self.seed)

        # EARLY STOPPING
        self.es_patience = params['es_patience'] if 'es_patience' in list(
            params.keys()) else 0.005

        # PRODUCTION PARAMS
        self.production = False
        if (len(self.evalloader.dataset.x['home']) == 0
                and len(self.evalloader.dataset.x['away']) == 0):
            self.production = str2bool(params['production'])
            self.stop_loss = params.get('stop_loss')
def model_directory(league_params,
                    data_params,
                    model_params,
                    production=False):
    league_name = league_params['league_name']
    dataset_type = data_params['dataset']
    feat_eng_v = data_params['version']
    network_v = model_params['version']
    production = str2bool(model_params.get('production'))

    name = f'{dataset_type}_fe={feat_eng_v}_net={network_v}'

    timestamp = get_timestamp_string()

    ckp_model_path = PRODUCTION_DIR if production else os.environ[
        'CKP_MODEL_PATH']

    if (production is not None and production):
        model_name = f'{league_name.upper()}_{timestamp}_{name}_PRODUCTION'
    else:
        model_name = f'{league_name.upper()}_{timestamp}_{name}'

    model_dir = f'{ckp_model_path}{league_name}/{model_name}/'

    return model_name, model_dir
Esempio n. 5
0
    def __init__(self, name, in_features, params):
        super(LSTM_Network, self).__init__()
        self.name = name
        self.dataset_type = params['dataset']
        self.bidirectional = str2bool(params['bidirectional'])

        # self.window = params['window']-1 if params['window'] is not None else None
        torch.manual_seed(int(params['seed']))

        out_features = params['out_lstm']
        self.lstm = nn.LSTM(input_size=in_features,
                            hidden_size=out_features,
                            num_layers=params['n_lstm_layer'],
                            bidirectional=self.bidirectional)

        in_features = out_features * 2 if self.bidirectional else in_features

        out_features = in_features // 2
        self.dense = nn.Linear(in_features, out_features)
        self.dense_act = nn.ReLU()
        in_features = out_features

        out_features = 1
        self.fc = nn.Linear(in_features, out_features)
        self.fc_act = nn.Sigmoid()
Esempio n. 6
0
def check_data_params(data_params):
    params_list = list(data_params.keys())

    params = {}
    params[LEAGUE_NAME_LABEL] = data_params[
        LEAGUE_NAME_LABEL] if LEAGUE_NAME_LABEL in params_list else -1
    params[N_PREV_MATCH_LABEL] = int(
        data_params[N_PREV_MATCH_LABEL]
    ) if N_PREV_MATCH_LABEL in params_list else -1
    params[TRAIN_LABEL] = str2bool(
        data_params[TRAIN_LABEL]) if TRAIN_LABEL in params_list else -1
    params[TEST_SIZE_LABEL] = int(
        data_params[TEST_SIZE_LABEL]
    ) if TEST_SIZE_LABEL in params_list else DEFAULT_TEST_SIZE
    params[LEAGUE_DIR_LABEL] = data_params[
        LEAGUE_DIR_LABEL] if LEAGUE_DIR_LABEL in params_list else -1
    params[UPDATE_LABEL] = data_params[
        UPDATE_LABEL] if UPDATE_LABEL in params_list else DEFAULT_UPDATE
    params[PLOT_LABEL] = data_params[
        PLOT_LABEL] if PLOT_LABEL in params_list else DEFAULT_PLOT

    if -1 in params.values():
        raise ParameterError(
            f'Invalid or Missing Data Params:\n {data_params}')

    return params
Esempio n. 7
0
    def extract_data_league(self):
        league_name = self.params['league_name']
        n_prev_match = int(self.params['n_prev_match'])
        train = str2bool(self.params['train'])
        test_size = int(self.params['test_size'])
        league_dir = self.params['league_dir']
        update = self.params['update']

        logger.info(f'> Extracting {league_name} data: train={train}')

        if (train):
            # LOADING TRAINING DATA --> ALL DATA SEASON
            league_path = f'{league_dir}{league_name}/{league_name}_npm={n_prev_match}.csv' \
                    if league_dir is not None else None

            # LEAGUE CSV ALREADY EXISTING
            if (league_path is not None and exists(league_path)):
                league_df = pd.read_csv(league_path, index_col=0)
                league_df = update_league_data(
                    league_df, n_prev_match) if update else league_df
                logger.info('> Updating league data')
                league_df.to_csv(league_path)

            # GENERATING LEAGUE CSV
            else:
                league_df = extract_training_data(league_name, n_prev_match)
                logger.info(f'Saving data at {league_path}')
                league_df.to_csv(league_path)

        else:
            # LOADING JUST THE LAST SEASON
            league_path = f'{league_dir}{league_name}/{league_name}_npm={n_prev_match}.csv' \
                if league_dir is not None else None

            assert league_path is not None

            league_df = pd.read_csv(league_path, index_col=0).iloc[-test_size:]
            # league_df = extract_test_data(league_name, n_prev_match, test_size)

        return league_df
def data_preprocessing(league_df, params):

    n_prev_match = int(params['n_prev_match'])
    train = str2bool(params['train'])
    test_size = int(params['test_size'])
    league_dir = params['league_dir']
    league_name = params['league_name']
    update = params['update']

    data = league_df.copy(deep=True)

    input_data = {}
    prep_league_path = {
        x:
        f'{league_dir}{league_name}/prep_{x}_{league_name}_npm={n_prev_match}.csv'
        for x in ['home', 'away']
    }
    if (league_dir is not None and exists(prep_league_path['home'])
            and exists(prep_league_path['away'])):

        for x in ['home', 'away']:
            input_data[x] = pd.read_csv(prep_league_path[x], index_col=0)

        input_data = update_input_data(data, input_data,
                                       n_prev_match) if update else input_data

    if (len(input_data) == 0):
        input_data = _split_teams(data, n_prev_match)

    if (train):
        input_data['home'].to_csv(prep_league_path['home'])
        input_data['away'].to_csv(prep_league_path['away'])
    else:
        input_data['home'] = input_data['home'].iloc[-test_size:]
        input_data['away'] = input_data['away'].iloc[-test_size:]

    return input_data
Esempio n. 9
0
def check_simulation_params(sim_params):
    params_list = list(sim_params.keys())

    params = {}

    params[TEST_SIZE_LABEL] = int(
        sim_params[TEST_SIZE_LABEL]
    ) if TEST_SIZE_LABEL in params_list else DEFAULT_TEST_SIZE
    params[THR_LABEL] = float(
        sim_params[THR_LABEL]) if THR_LABEL in params_list else DEFAULT_THR
    params[N_MATCHES_LABEL] = sim_params[
        N_MATCHES_LABEL] if N_MATCHES_LABEL in params_list else -1
    params[COMBO_LABEL] = sim_params[
        COMBO_LABEL] if COMBO_LABEL in params_list else None
    params[COMBO_LIST_LABEL] = sim_params[
        COMBO_LIST_LABEL] if COMBO_LIST_LABEL in params_list else DEFAULT_COMBO_LIST
    params[FILTER_BET_LABEL] = sim_params[
        FILTER_BET_LABEL] if FILTER_BET_LABEL in params_list else DEFAULT_FILTER_BET
    params[MONEY_BET_LABEL] = sim_params[
        MONEY_BET_LABEL] if MONEY_BET_LABEL in params_list else DEFAULT_MONEY_BET
    params[THR_LIST_LABEL] = sim_params[
        THR_LIST_LABEL] if THR_LIST_LABEL in params_list else DEFAULT_THR_LIST
    params[FILTER_BET_LIST_LABEL] = sim_params[
        FILTER_BET_LIST_LABEL] if FILTER_BET_LIST_LABEL in params_list else DEFAULT_FILTER_BET_LIST
    params[FIELD_LABEL] = sim_params[
        FIELD_LABEL] if FIELD_LABEL in params_list else -1
    params[SAVE_DIR_LABEL] = sim_params[
        SAVE_DIR_LABEL] if SAVE_DIR_LABEL in params_list else DEFAULT_SAVE_DIR
    params[VERBOSE_LABEL] = str2bool(
        sim_params[VERBOSE_LABEL]
    ) if VERBOSE_LABEL in params_list else DEFAULT_VERBOSE

    if (params[FIELD_LABEL] == -1):
        raise ParameterError(
            f'Invalid or Missing Field Params:\n {params[FIELD_LABEL]}')

    return params
Esempio n. 10
0
    def __init__(self, name, in_features, params):
        super(LSTM_FCN_Network, self).__init__()
        self.name = name
        self.dataset_type = params['dataset']

        # self.window = params['window'] - 1 if params['window'] is not None else None
        torch.manual_seed(int(params['seed']))

        out_features = int(params['out_lstm'])
        self.lstm = nn.LSTM(input_size=in_features,
                            hidden_size=out_features,
                            num_layers=int(params['n_lstm_layer']),
                            bidirectional=str2bool(params['bidirectional']))

        in_features = out_features * 2 if params[
            'bidirectional'] else in_features
        kernel = int(params['kernel'])
        padding = int(params['padding'])
        n_conv_layers = int(params['conv_layers'])

        self.conv_layers = nn.Sequential()

        for i_layer in range(n_conv_layers - 1):
            out_features = in_features // 2
            self.conv_layers.add_module(
                f'Conv-1d-{i_layer+1}',
                nn.Conv1d(in_features,
                          out_features,
                          kernel_size=kernel,
                          padding=padding))
            self.conv_layers.add_module(f'Relu-{i_layer}', nn.ReLU())
            in_features = out_features

        out_features = 1
        self.fc = nn.Conv1d(in_features, out_features, kernel_size=1)
        self.fc_act = nn.Sigmoid()
Esempio n. 11
0
def training():
    """
    Requested Args:
        - epochs
        - patience
        - simulation
        - stats

    Requested Params: dict{'league': LEAGUE_PARAMS,
                           'data': DATA_PARAMS,
                           'model': MODEL_PARAMS
                           'production': PRODUCTION_PARAMS}

        LEAGUE_PARAMS: dict{}
        DATA_PARAMS: dict{}
        MODEL_PARAMS: dict{}
        PRODUCTION_PARAMS: dict{'active': bool
                                'phase': eval / final,
                                'stop_loss': float}


    Returns:
        json_response: dict{'model_dir': str,
                            'model_name': str,
                            'losses': list,
                            'mean loss': float
                             }
    """

    params = request.json
    args = request.args

    check_args = check_training_args(args)
    check_params = check_training_params(params)

    if (not check_args['check'] or not check_args['check']):
        msg = f'> Args: {check_args["msg"]} \n> Params: {check_params["msg"]}'
        logger.error(msg)
        response = make_response(msg, 400)

    else:
        epochs, patience = args['epochs'], args['patience']
        league_name = params['league']['league_name']
        logger.info(f'> Training {league_name.upper()}\n')

        production = str2bool(params.get('production').get('production'))

        model_response, model_config = training_snippet(
            epochs, patience, params, production)

        # SIMULATION AND STATISTICS
        stats_option, simulation_option = str2bool(
            args.get('stats')), str2bool(args.get('simulation'))

        sim_params = {**params['league'], **params['data']}
        feat_eng = model_config['feat_eng']
        model = model_config['model']

        test_size = sim_params['test_size']
        if (stats_option and stats_option is not None):

            if (test_size > 0):
                stats_df = generate_strategy_stats(model, params, feat_eng)

        if (simulation_option and simulation_option is not None
                and test_size > 0):
            model_dir = model_response['model_dir']
            sim_df = simulation_process(model,
                                        sim_params,
                                        feat_eng,
                                        save_dir=model_dir)

        if (production):
            model_dir = model_response['model_dir']
            model_name = model_response['model_name']
            save_model_paths_production(league_name, model_dir, model_name)

        response = make_response(model_response, 200)

    return response