Ejemplo n.º 1
0
def update_league_data(league_df, n_prev_match):
    logger.info('> Updating league data')
    league_name = list(league_df['league'].unique())[0]

    assert league_name in K.LEAGUE_NAMES, f'Update League Data: Wrong League Name >> {league_name} provided'

    for season_i, path in enumerate(K.get_league_csv_paths(league_name)):
        season_df = extract_season_data(path, season_i, league_name)

        #---------CHECK LAST DATE----------
        last_date = pd.to_datetime(league_df.iloc[-1]['Date'])
        date = season_df.iloc[-1]['Date']

        if (date > last_date):
            update_df = pd.DataFrame()
            update_df = update_df.append(season_df, sort=False)\
                                 .reset_index(drop=True)
            update_df = feature_engineering_league(update_df, n_prev_match)
            update_df = update_df[update_df['Date'] > last_date]
            league_df = league_df.append(update_df).reset_index(drop=True)

        #----------------------------------

    league_df['Date'] = pd.to_datetime(league_df['Date'])

    return league_df
Ejemplo n.º 2
0
def save_all_params(save_dir,
                    league_params,
                    data_params,
                    model_params,
                    production_params=None):
    ensure_folder(save_dir)

    filename = '1.league_params'
    filepath = f'{save_dir}{filename}'
    save_params(league_params, filepath, format='json')

    filename = '2.data_params'
    filepath = f'{save_dir}{filename}'
    save_params(data_params, filepath, format='json')

    filename = '3.model_params'
    filepath = f'{save_dir}{filename}'
    save_params(model_params, filepath, format='json')

    if (production_params is not None):
        filename = '4.production_params'
        filepath = f'{save_dir}{filename}'
        save_params(model_params, filepath, format='json')

    logger.info(f'> Saving PARAMS at {save_dir}')

    return
Ejemplo n.º 3
0
def save_params(params, filepath, format='json', verbose=True):

    if (format == 'json'):
        filepath += '.json'
        params = _json_item_to_str(params)

        save_json(params, filepath)

    elif (format == 'str'):

        str_params = str(params).replace(',', ',\n')

        filename = filepath.split('/')[-1]
        content = f'\n> {filename}\n\n'
        content += str_params

        filepath += '.txt'

        save_str_file(content, filepath)

    else:
        raise ValueError(f'Format is not recognized: provided --> {format}')

    if verbose:
        logger.info(f' > Saving params at {filepath}\n')

    return
 def wrapper(*args, **kwargs):
     t1 = time()
     logger.info(f'> {func.__name__} starts')
     result = func(*args, **kwargs)
     t2 = time()
     exec_time = spent_time(t1, t2)
     logger.info(f'> {func.__name__} executed in {exec_time}')
     return result
Ejemplo n.º 5
0
def save_soccer_model(model):
    # folder = f'{model.save_dir}{model.name}/'
    filename = f'{model.name}.pth'
    folder = model.save_dir
    filepath = f'{folder}{filename}'

    ensure_folder(folder)
    save_model(model, filepath)

    if (model.verbose):
        logger.info(f'> Saving checkpoint epoch {model.epoch} at {folder}')

    return filepath
Ejemplo n.º 6
0
def load_configs(league_name):
    production_paths = load_production_paths()

    try:
        model_path = production_paths[league_name]['model_path']
        data_config_path = production_paths[league_name]['data_params']
        league_config_path = production_paths[league_name]['league_params']
        model_config_path = production_paths[league_name]['model_params']
        feat_eng_path = production_paths[league_name]['feat_eng']

        model = load_model(model_path)
        logger.info(f'> Loading Model: {model_path}')

        model_config = load_json(model_config_path)
        data_config = load_json(data_config_path)
        league_config = load_json(league_config_path)
        logger.info(f'> Loading Params')

        feat_eng = load_object(feat_eng_path)
        logger.info(f'> Loading Feature Engineering object\n\n')

        config = {
            'data': data_config,
            'league': league_config,
            'feat_eng': feat_eng,
            'model': model_config
        }

    except Exception as error:
        model, config = None, None
        logger.info(f'Loading Model: {league_name} not found: {error}')

    return model, config
Ejemplo n.º 7
0
def save_model_paths_production(league_name, model_dir, model_name):
    production_paths = load_production_paths()

    model_dir = f'{model_dir}'
    league_params_path = f'{model_dir}1.league_params.json'
    data_params_path = f'{model_dir}2.data_params.json'
    model_params_path = f'{model_dir}3.model_params.json'
    model_path = f'{model_dir}{model_name}.pth'
    feat_eng_path = f'{model_dir}feat_eng'

    production_paths[league_name] = {
        'model_dir': model_dir,
        'model_params': model_params_path,
        'league_params': league_params_path,
        'data_params': data_params_path,
        'model_path': model_path,
        'feat_eng': feat_eng_path
    }

    save_path = f'{PRODUCTION_DIR}production_paths.json'
    save_json(production_paths, save_path)

    logger.info('---------------------------------------------------------')
    logger.info(f'\n\nUpdating production paths: {league_name.upper()}\n')
    logger.info(f'New Path : {model_dir}')
    logger.info('---------------------------------------------------------')
Ejemplo n.º 8
0
def save_simulation_details(sim_result, params, folder_dir):
    field = params['field']
    thr = params['thr']
    filter_bet = params['filter_bet']

    # filename = f'5.simulations_details_{field}.txt'
    filename = f'5.simulations_details_{field}_thr={thr}_filter={filter_bet}.json'
    filepath = f'{folder_dir}{filename}'

    if (params['verbose']):
        logger.info(f' > Saving training details at {filepath}')

    save_json(sim_result, filepath)

    return
def feature_engineering_league(league_df, n_prev_match):

    logger.info('\t\t\t > Feature Engineering for the league')

    # league_df = league_df.set_index('match_n')

    league_df = league_df.rename(
        columns={
            'B365H': 'bet_1',
            'B365D': 'bet_X',
            'B365A': 'bet_2',
            'FTR': 'result_1X2',
            'FTHG': 'home_goals',
            'FTAG': 'away_goals'
        })

    league_df.loc[league_df['result_1X2'] == 'H', ['result_1X2']] = '1'
    league_df.loc[league_df['result_1X2'] == 'D', ['result_1X2']] = 'X'
    league_df.loc[league_df['result_1X2'] == 'A', ['result_1X2']] = '2'

    league_df.loc[league_df['result_1X2'] == '1', 'home_points'] = 3
    league_df.loc[league_df['result_1X2'] == '1', 'away_points'] = 0
    league_df.loc[league_df['result_1X2'] == 'X', 'home_points'] = 1
    league_df.loc[league_df['result_1X2'] == 'X', 'away_points'] = 1
    league_df.loc[league_df['result_1X2'] == '2', 'home_points'] = 0
    league_df.loc[league_df['result_1X2'] == '2', 'away_points'] = 3

    league_df = league_df[[
        'league', 'season', 'Date', 'HomeTeam', 'AwayTeam', 'home_goals',
        'away_goals', 'result_1X2', 'bet_1', 'bet_X', 'bet_2', 'home_points',
        'away_points'
    ]]

    league_df = league_df.dropna()
    league_df = league_df.reset_index(drop=True)

    league_df = creating_features(league_df)

    league_df = bind_last_matches(league_df, n_prev_match)

    # league_df = _bind_trend_last_previous_match(league_df, n_prev_match)

    return league_df
Ejemplo n.º 10
0
def update_league(league_name):

    npm = request.args['npm']

    assert int(npm) > 0, 'NPM must be greater than 0'

    league_params = {}

    response_league_name = check_league(league_name)
    response_league_data = check_data_league(
        league_name, npm) if response_league_name['check'] else {
            'check': False,
            'msg': ''
        }

    msg = f"League_name: {response_league_name['msg']} \nLeague_data: {response_league_data['msg']}"

    if not response_league_name['check'] or not response_league_data['check']:
        response = make_response(msg, 404)
    else:
        league_params['league_name'] = league_name
        league_params['n_prev_match'] = npm
        league_params['league_dir'] = DATA_DIR

        response = update_data_league(league_params)

        if response['check']:
            succ_msg = f'Successful Update: {league_name} - npm={npm}'
            logger.info(succ_msg)
            response = make_response(
                f'Successful Update: {league_name} -> npm = {npm}', 200)
        else:
            fail_msg = f'Failed Update: {league_name} - npm={npm} \n {response["msg"]}'
            logger.error(fail_msg)
            response = make_response(
                f'Failed Update: {league_name} -> npm = {npm} \n {response["msg"]}',
                400)

        # UPDATE MATCHES LEAGUE
        Data_Api().write_league_matches(league_name, DATA_DIR)

    return response
def update_input_data(league_df, input_data, n_prev_match):
    logger.info(f'> Updating league data preprocessed: data ')
    last_date = pd.to_datetime(league_df.iloc[-1]['Date'])

    date_home = pd.to_datetime(input_data['home'].iloc[-1]['date'])
    date_away = pd.to_datetime(input_data['away'].iloc[-1]['date'])

    assert date_home == date_away
    date = date_home

    if (date < last_date):
        data = league_df[league_df['Date'] > date]
        update_data = _split_teams(data, n_prev_match)

        input_data['home'] = input_data['home'].append(
            update_data['home']).reset_index(drop=True)
        input_data['away'] = input_data['away'].append(
            update_data['away']).reset_index(drop=True)

    return input_data
Ejemplo n.º 12
0
def _load_model_config(paths):
    model_path = paths['model']
    model_config_path = paths['model_params']
    league_config_path = paths['league_params']
    data_config_path = paths['data_params']
    feat_eng_path = paths['feat_eng']

    model = load_model(model_path)
    logger.info(f'\n> Loading Model: {model_path}')

    model_config = load_json(model_config_path)
    data_config = load_json(data_config_path)
    league_config = load_json(league_config_path)
    logger.info(f'\n> Loading Params')

    feat_eng = load_object(feat_eng_path)
    logger.info(f'\n> Loading Feature Engineering object')

    config = {
        'data': data_config,
        'league': league_config,
        'feat_eng': feat_eng,
        'model': model_config
    }

    return model, config
    def predict(self, input_data, field):
        model_name = str(field).lower()

        assert model_name == 'home' or model_name == 'away', 'ERROR - model predict: WRONG model name. Give "home" or "away"'

        preds = []
        for i_fold in range(self.n_folds):

            if (model_name == 'home'):
                logger.info('> Calling Home Network')
                field_model = self.fold_model[i_fold].home_network
            elif (model_name == 'away'):
                logger.info('> Calling Away Network')
                field_model = self.fold_model[i_fold].away_network
            else:
                raise ValueError('Model - predict: Wrong model name')

            logger.info('> Prediction')

            with torch.no_grad():

                for x in input_data:
                    x = torch.Tensor(x).to(self.device)
                    out = field_model(x)

                    out = out.squeeze()

                    preds.append(out.item())


        return np.mean(preds, axis=0)
Ejemplo n.º 14
0
def update_all_leagues():
    """
    Request Args:
                    - npm: int

    """
    npm = request.args['npm']

    assert int(npm) > 0, 'NPM must be greater than 0'

    response = {}

    for league_name in LEAGUE.LEAGUE_NAMES:
        response_league_name = check_league(league_name)
        response_league_data = check_data_league(
            league_name, npm) if response_league_name['check'] else {
                'check': False,
                'msg': ''
            }

        msg = f"League_name: {response_league_name['msg']} \nLeague_data: {response_league_data['msg']}"

        if not response_league_name['check'] or not response_league_data[
                'check']:
            response[league_name] = msg

        else:
            logger.info('--------------------------------------')
            logger.info(f'\nUpdating {league_name.upper()}\n')
            league_params = {}
            league_params['league_name'] = league_name
            league_params['n_prev_match'] = npm
            league_params['league_dir'] = DATA_DIR

            update_response = update_data_league(league_params)

            if update_response['check']:
                succ_msg = f'Successful Update: {league_name} - npm={npm}'
                logger.info(succ_msg)
                response[
                    league_name] = f'Successful Update: {league_name} -> npm = {npm}'

            else:
                fail_msg = f'Failed Update: {league_name} - npm={npm} \n {update_response["msg"]}'
                logger.error(fail_msg)
                response[
                    league_name] = f'Failed Update: {league_name} -> npm = {npm} : {fail_msg}'

    return make_response(response)
Ejemplo n.º 15
0
    def extract_data_league(self):
        league_name = self.params['league_name']
        n_prev_match = int(self.params['n_prev_match'])
        train = str2bool(self.params['train'])
        test_size = int(self.params['test_size'])
        league_dir = self.params['league_dir']
        update = self.params['update']

        logger.info(f'> Extracting {league_name} data: train={train}')

        if (train):
            # LOADING TRAINING DATA --> ALL DATA SEASON
            league_path = f'{league_dir}{league_name}/{league_name}_npm={n_prev_match}.csv' \
                    if league_dir is not None else None

            # LEAGUE CSV ALREADY EXISTING
            if (league_path is not None and exists(league_path)):
                league_df = pd.read_csv(league_path, index_col=0)
                league_df = update_league_data(
                    league_df, n_prev_match) if update else league_df
                logger.info('> Updating league data')
                league_df.to_csv(league_path)

            # GENERATING LEAGUE CSV
            else:
                league_df = extract_training_data(league_name, n_prev_match)
                logger.info(f'Saving data at {league_path}')
                league_df.to_csv(league_path)

        else:
            # LOADING JUST THE LAST SEASON
            league_path = f'{league_dir}{league_name}/{league_name}_npm={n_prev_match}.csv' \
                if league_dir is not None else None

            assert league_path is not None

            league_df = pd.read_csv(league_path, index_col=0).iloc[-test_size:]
            # league_df = extract_test_data(league_name, n_prev_match, test_size)

        return league_df
Ejemplo n.º 16
0
def production_training():
    """
    Requested Args:
        - epochs
        - patience

    Requested Params: dict{'league': LEAGUE_PARAMS,
                           'data': DATA_PARAMS,
                           'model': MODEL_PARAMS
                           'production': PRODUCTION_PARAMS}

        LEAGUE_PARAMS: dict {
                                "league_name": "serie_a",
                                "n_prev_match": int,
                                "league_dir": DATA_DIR,
                                "train": bool
                            }

        DATA_PARAMS: dict{
                            "normalize": bool,
                            "window_size": int,
                            "dataset": ["base" | "windowed"],
                            "batch_size": int,
                            "split_size": float,
                            "test_size": int,
                            "version": [1 | 2],
                            "league_dir": DATA_DIR,
                            "train": bool
                        }

        MODEL_PARAMS: dict{
                            "dataset": ["base" | "windowed"],
                            "version": [1 | 2],
                            "out_lstm": int,
                            "n_lstm_layer": int,
                            "bidirectional": bool,
                            "kernel": int,
                            "padding": int,
                            "conv_layers": int,
                            "optimizer": "adam",
                            "lr": float,
                            "loss": "bce",
                            "seed": int,
                            "device": "gpu",
                            "verbose": bool,
                            "plot": bool,
                            "static_dir": STATIC_DIR
                        }

    Returns:
        json_response: dict{'model_dir': str,
                            'model_name': str,
                            'losses': list,
                            'mean loss': float
                             }
    """

    params = request.json
    args = request.args

    check_args = check_training_args(args)
    check_params = check_training_params(params)

    if (not check_args['check'] or not check_args['check']):
        msg = f'> Args: {check_args["msg"]} \n> Params: {check_params["msg"]}'
        logger.error(msg)
        return make_response(msg, 400)

    else:
        epochs, patience = args['epochs'], args['patience']
        league_name = params['league']['league_name']
        logger.info(f'> Training {league_name.upper()}\n')

        # TRAINING FOR CATCHING OPTIMAL TRAIN LOSS
        params['data']['test_size'] = 0
        params['data']['split_size'] = 0
        print(f'\n\n>>> Params: \n{params}\n\n')
        model_response = training_snippet(epochs,
                                          patience,
                                          params,
                                          production=True)
        print(model_response)

        optimal_train_loss = model_response['losses']['train'] if model_response['mean_loss'] is None else \
                             model_response['mean_loss']['train']

        # TRAINING FOR PRODUCTION SETTINGS
        params['data']['split_size'] = 1
        params['model']['stop_loss'] = optimal_train_loss
        print(f'\n\n>>> Params: \n{params}\n\n')
        model_response = training_snippet(epochs,
                                          patience,
                                          params,
                                          production=True)

        model_dir, model_name = model_response['model_dir'], model_response[
            'model_name']
        save_model_paths_production(league_name, model_dir, model_name)

        return make_response(model_response, 200)
Ejemplo n.º 17
0
def ensure_folder(folder):
    if (exists(folder) == False):
        logger.info(f'\n> Creating folder at {folder}\n')
        os.makedirs(folder)

    return
Ejemplo n.º 18
0
def multiple_training():
    """
    Requested Args:
        - epochs
        - patience

    Requested Params: dict{'league': LEAGUE_PARAMS,
                           'data': DATA_PARAMS,
                           'model': MODEL_PARAMS
                           'production': PRODUCTION_PARAMS,
                           'multi_training': MULTI_TRAINING_PARAMS}

        LEAGUE_PARAMS: dict{}
        DATA_PARAMS: dict{}
        MODEL_PARAMS: dict{}

        PRODUCTION_PARAMS: dict{'production': bool,
                                'stop_loss': float}

        MULTI_TRAINING_PARAMS: dict{'param': str,
                                    'values': list}



    Returns:
        json_response: dict{'model_dir': str,
                            'model_name': str,
                            'losses': list,
                            'mean loss': float
                             }
    """

    params = request.json
    args = request.args

    check_args = check_training_args(args)
    check_params = check_training_params(params)

    if (not check_args['check'] or not check_args['check']):
        msg = f'> Args: {check_args["msg"]} \n> Params: {check_params["msg"]}'
        logger.error(msg)
        response = make_response(msg, 400)

    else:
        league_params, data_params, model_params = params['league'], params[
            'data'], params['model']
        production_params = params.get('production')

        model_params = {
            **model_params,
            **production_params
        } if production_params is not None else model_params
        data_params = {
            **data_params,
            **production_params
        } if production_params is not None else data_params

        epochs, patience = args['epochs'], args['patience']

        # MULTI TRAINING SETUP
        multi_training_params = params['multi_training']
        param_name = multi_training_params['param']
        param_values = multi_training_params['values']

        for value in param_values:
            if (param_name in data_params.keys()):
                data_params[param_name] = value
            elif (param_name in model_params.keys()):
                model_params[param_name] = value
            else:
                raise ValueError(
                    f'Multi-Training: Wrong param name >> {param_name} <<')

            logger.info(
                f'\n>>> Multi training on {param_name.upper()}: {value} \n')
            response = {param_name: {}}
            init_env_paths(model_params['version'])

            model_name, model_dir = model_directory(league_params, data_params,
                                                    model_params)
            model_params['name'] = model_name
            model_params['save_dir'] = model_dir
            data_params['save_dir'] = model_dir

            # SAVING PARAMS
            save_all_params(model_dir, league_params, data_params,
                            model_params, production_params)

            try:
                # EXTRACTION DATA LEAGUE
                params = {**league_params, **data_params}
                params = check_data_params(params)
                league_csv, input_data = extract_data_league(params)

                # DATALOADER GENERATION
                dataset_params = check_dataset_params(data_params)
                dataloader, feat_eng, in_features = generate_dataset(
                    input_data, dataset_params)

                # NETWORK INITIALIZATION
                network = init_network(in_features, model_params)

                Model = init_model(data_params['dataset'])
                soccer_model = Model(network, model_params, dataloader)

                soccer_model.train(epochs, patience)

                losses, mean_loss = soccer_model.get_losses()

                model_response = {
                    'model_dir': model_dir,
                    'model_name': model_name,
                    'losses': losses,
                    'mean_loss': mean_loss
                }

                params = {**league_params, **data_params}

                # if(str2bool(model_params.get('production'))):
                #     soccer_model = production_training(dataset_params, model_params)

                if (params['test_size'] > 0):

                    for field in [HOME, AWAY]:
                        params['field'] = field

                        testset, pred, true = real_case_inference(
                            soccer_model, params, feat_eng)
                        thr_result, thr_dict, _ = thr_analysis(
                            true, pred, params)
                        json_response = {**model_response, **thr_dict}

                        simulation_params = check_simulation_params(params)
                        result_df = strategy_stats(testset, pred, true,
                                                   simulation_params)

            except Exception as error:

                response = make_response({'msg': f'General Error: {error}'},
                                         400)
                return response

            except KeyboardInterrupt:
                return make_response({'Interrupt': 'Keyboard Interrupt'})

            response[param_name][str(value)] = model_response

    return make_response(response, 200)
Ejemplo n.º 19
0
def training():
    """
    Requested Args:
        - epochs
        - patience
        - simulation
        - stats

    Requested Params: dict{'league': LEAGUE_PARAMS,
                           'data': DATA_PARAMS,
                           'model': MODEL_PARAMS
                           'production': PRODUCTION_PARAMS}

        LEAGUE_PARAMS: dict{}
        DATA_PARAMS: dict{}
        MODEL_PARAMS: dict{}
        PRODUCTION_PARAMS: dict{'active': bool
                                'phase': eval / final,
                                'stop_loss': float}


    Returns:
        json_response: dict{'model_dir': str,
                            'model_name': str,
                            'losses': list,
                            'mean loss': float
                             }
    """

    params = request.json
    args = request.args

    check_args = check_training_args(args)
    check_params = check_training_params(params)

    if (not check_args['check'] or not check_args['check']):
        msg = f'> Args: {check_args["msg"]} \n> Params: {check_params["msg"]}'
        logger.error(msg)
        response = make_response(msg, 400)

    else:
        epochs, patience = args['epochs'], args['patience']
        league_name = params['league']['league_name']
        logger.info(f'> Training {league_name.upper()}\n')

        production = str2bool(params.get('production').get('production'))

        model_response, model_config = training_snippet(
            epochs, patience, params, production)

        # SIMULATION AND STATISTICS
        stats_option, simulation_option = str2bool(
            args.get('stats')), str2bool(args.get('simulation'))

        sim_params = {**params['league'], **params['data']}
        feat_eng = model_config['feat_eng']
        model = model_config['model']

        test_size = sim_params['test_size']
        if (stats_option and stats_option is not None):

            if (test_size > 0):
                stats_df = generate_strategy_stats(model, params, feat_eng)

        if (simulation_option and simulation_option is not None
                and test_size > 0):
            model_dir = model_response['model_dir']
            sim_df = simulation_process(model,
                                        sim_params,
                                        feat_eng,
                                        save_dir=model_dir)

        if (production):
            model_dir = model_response['model_dir']
            model_name = model_response['model_name']
            save_model_paths_production(league_name, model_dir, model_name)

        response = make_response(model_response, 200)

    return response