コード例 #1
0
def tune_gbdt_params(model, X, y, n_splits) -> dict:
    '''
    Tune parameters by training
    '''

    # start tuning train log
    create_logger('train', **c.log)
    logger_train = getLogger('train')
    logger_train.debug('{}\t{}\t{}\t{}'.format('fold', 'iteration',
                                               'train_auc', 'val_auc'))

    aucs = list()
    feature_importances = pd.DataFrame()
    feature_importances['feature'] = X.columns

    # split data into train, validation
    folds = TimeSeriesSplit(n_splits=n_splits)
    for i, (idx_train, idx_val) in enumerate(folds.split(X, y)):
        fold = i + 1
        with blocktimer(f'Fold {fold}'):
            # prepare
            logger.info(f'Training on fold {fold}')
            X_train = X.iloc[idx_train]
            y_train = y.iloc[idx_train]
            X_val = X.iloc[idx_val]
            y_val = y.iloc[idx_val]

            # train
            model = model.train_and_validate(X_train, y_train, X_val, y_val,
                                             logger_train, fold)

            # record result
            feature_importances[f'fold_{fold}'] = model.feature_importance
            aucs.append(model.validation_auc)
            # TODO: save models at each steps
            logger.debug(f'Fold {fold} finished')

    logger.info('Training has finished.')
    logger.debug(f'Mean AUC: {np.mean(aucs)}')
    # TODO: save feature importance and other

    # make optimal config from result
    optimal_c_model = model.config
    if model.best_iteration is not None:
        # new param
        optimal_c_model.params['num_boost_round'] = model.best_iteration
    else:
        logger.warn(
            'Did not meet early stopping. Try larger num_boost_rounds.')
    # no need after optimized num_boost_round
    del optimal_c_model.params['early_stopping_rounds']
    return optimal_c_model
コード例 #2
0
def tune_gbdt_params(model, X, y, n_splits) -> dict:
    '''
    Tune parameter num_boost_round
    '''
    # start tuning train log
    create_logger('train', **c.log)
    logger_train = getLogger('train')
    logger_train.debug('{}\t{}\t{}\t{}'.format('fold', 'iteration',
                                               'train_auc', 'val_auc'))

    # aucs = list()

    # split data into train, validation
    folds = TimeSeriesSplit(n_splits=n_splits)
    for i, (idx_train, idx_val) in enumerate(folds.split(X, y)):
        fold = i + 1
        with blocktimer(f'Training on Fold {fold}'):
            X_train = X.iloc[idx_train]
            y_train = y.iloc[idx_train]
            X_val = X.iloc[idx_val]
            y_val = y.iloc[idx_val]

            # train
            model = model.train_and_validate(X_train, y_train, X_val, y_val,
                                             logger_train, fold)
            model.save(
                c.model.dir /
                f'model_{c.runtime.VERSION}_{c.model.TYPE}_fold{fold}.pkl')

            # record result
            # aucs.append(model.val_auc)
            # logger.info(f'train_auc: {model.train_auc} val_auc: {model.val_auc}')

    # logger.info(f'Mean AUC: {np.mean(aucs)}')

    # make optimal config from result
    optimal_c_model = model.config
    if model.best_iteration is not None:
        optimal_c_model.params['num_boost_round'] = model.best_iteration
    else:
        logger.warn(
            'Did not meet early stopping. Try larger num_boost_rounds.')
    # no need after optimized num_boost_round
    del optimal_c_model.params['early_stopping_rounds']
    return optimal_c_model
コード例 #3
0
    return optimal_c_model


if __name__ == "__main__":
    gc.enable()
    warnings.filterwarnings('ignore')

    # read config & apply option
    c = EasyDict(config)
    opt = parse_option()
    c.transformer.USE_SMALL_DATA = opt.small
    c.log.slackauth.NO_SEND_MESSAGE = opt.nomsg

    seed_everything(c.runtime.RANDOM_SEED)

    create_logger('main', **c.log)
    logger = getLogger('main')
    logger.info(
        f':thinking_face: Starting experiment {c.runtime.VERSION}_{c.runtime.DESCRIPTION}'
    )

    try:
        main(c)
        logger.info(
            f':sunglasses: Finished experiment {c.runtime.VERSION}_{c.runtime.DESCRIPTION}'
        )
    except Exception:
        logger.critical(
            f':smiling_imp: Exception occured \n {traceback.format_exc()}')
        logger.critical(
            f':skull: Stopped experiment {c.runtime.VERSION}_{c.runtime.DESCRIPTION}'
コード例 #4
0
    seed_everything(c.runtime.random_seed)

    # check config.experiment_type
    if c.experiment_type != experiment_type:
        raise ValueError(
            f'experiment_type in config: {c.experiment_type} does not match this script'
        )

    # start logging
    r.paths.main_log_path = f'log/main_{c.runtime.version}{c.runtime.dsize}.log'
    r.paths.train_log_path = f'log/train_{c.runtime.version}{c.runtime.dsize}.tsv'
    slackauth = EasyDict(json.load(open('./slackauth.json', 'r')))
    slackauth.token_path = Path().home() / slackauth.token_file
    create_logger('main',
                  version=c.runtime.version,
                  log_path=r.paths.main_log_path,
                  slackauth=slackauth,
                  no_send_message=c.runtime.no_send_message)
    create_logger('train',
                  version=c.runtime.version,
                  log_path=r.paths.train_log_path,
                  slackauth=slackauth,
                  no_send_message=c.runtime.no_send_message)
    logger = getLogger('main')
    logger.info(
        f':thinking_face: Starting experiment {c.runtime.version}_{c.model.type}{c.runtime.dsize}'
    )
    logger.info(f'Options indicated: {opt}')
    '''
    logger_train = getLogger('train')
    logger_train.debug('{}\t{}\t{}\t{}'.format('fold', 'iteration', 'train_auc', 'val_auc'))