예제 #1
0
def main():
    api_token = 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiYWQxMjg3OGEtMGI1NC00NzFmLTg0YmMtZmIxZjcxZDM2NTAxIn0='
    neptune.init(api_token=api_token,
                 project_qualified_name='jamesmccarthy65/Numerai')
    data = utils.load_data('data/', mode='train')
    data, target, features, era = utils.preprocess_data(data, nn=True)
    datasets = {
        'data': data,
        'target': target,
        'features': features,
        'era': era
    }
    print('creating XGBoost Trials')
    xgb_exp = neptune.create_experiment('XGBoost_HPO')
    xgb_neptune_callback = opt_utils.NeptuneCallback(experiment=xgb_exp)
    study = optuna.create_study(direction='minimize')
    study.optimize(lambda x: optimize(x, datasets),
                   n_trials=10,
                   callbacks=[xgb_neptune_callback])
    joblib.dump(study,
                f'HPO/xgb_hpo_{str(datetime.datetime.now().date())}.pkl')
    print('Creating LightGBM Trials')
    lgb_exp = neptune.create_experiment('LGBM_HPO')
    lgbm_neptune_callback = opt_utils.NeptuneCallback(experiment=lgb_exp)
    study = optuna.create_study(direction='minimize')
    study.optimize(loptimize, n_trials=10, callbacks=[lgbm_neptune_callback])
    joblib.dump(study,
                f'HPO/lgb_hpo_{str(datetime.datetime.now().date())}.pkl')
예제 #2
0
def main():
    api_token = read_api_token()
    neptune.init(api_token=api_token,
                 project_qualified_name='jamesmccarthy65/JSMP')
    data = load_data('data/', mode='train', overide='filtered_train.csv')
    data, target, features, date = preprocess_data(data)
    data_dict = {
        'data': data,
        'target': target,
        'features': features,
        'date': date
    }
    print('creating XGBoost Trials')
    xgb_exp = neptune.create_experiment('XGBoost_HPO')
    xgb_neptune_callback = opt_utils.NeptuneCallback(experiment=xgb_exp)
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: optimize(trial, data_dict),
                   n_trials=100,
                   callbacks=[xgb_neptune_callback])
    joblib.dump(study,
                f'HPO/xgb_hpo_{str(datetime.datetime.now().date())}.pkl')
    print('Creating LightGBM Trials')
    lgb_exp = neptune.create_experiment('LGBM_HPO')
    lgbm_neptune_callback = opt_utils.NeptuneCallback(experiment=lgb_exp)
    study = optuna.create_study(direction='maximize')
    study.optimize(lambda trial: loptimize(trial, data_dict),
                   n_trials=100,
                   callbacks=[lgbm_neptune_callback])
    joblib.dump(study,
                f'HPO/lgb_hpo_{str(datetime.datetime.now().date())}.pkl')
    def optimize(self, optimizer: TModelOptimizer):
        optuna.logging.enable_default_handler()
        self.optuna_study.optimize(optimizer.evaluate_trial, n_trials=self.train_params['trials'],
                                   callbacks=[opt_utils.NeptuneCallback(log_study=True, log_charts=True)])
        opt_utils.log_study_info(self.optuna_study)

        study_importance = optuna.importance.get_param_importances(self.optuna_study)
        study_importance = dt.Frame(variable=list(study_importance.keys()),
                                    valor=list(study_importance.values()))
        return study_importance
예제 #4
0
    def find_minimizing_params(function, arguments, first_step_args=()):
        def objective_function(trial):
            mapped_arguments = [
                trial.suggest_int(argument.name, argument.min, argument.max)
                if argument.is_int else trial.suggest_uniform(
                    argument.name, argument.min, argument.max)
                for argument in arguments
            ]
            sum = 0
            for data in training_data:
                emg_single_data = data[0]
                try:
                    result = data[1]

                    if function == onset_sign_changes or function == "onset_two_step_first_step":
                        value, right_side = onset_sign_changes(
                            emg_single_data, *mapped_arguments)
                    elif function == onset_two_step_alg:
                        value = function(emg_single_data, *first_step_args,
                                         *mapped_arguments)
                    else:
                        value = function(emg_single_data, *mapped_arguments)

                    sum += abs(value - result)
                    if function == "onset_two_step_first_step" and (
                            value is None or value > result
                            or right_side < result):
                        sum += 5000
                    if value == -1:
                        sum += 5000
                except:
                    sum += 5000
            cost = sum
            return cost

        if function == "onset_two_step_second_step":
            function = onset_two_step_alg
            arguments = arguments

        neptune.init(project_qualified_name=project_name,
                     api_token=personal_token)
        neptune.create_experiment(
            name=function if isinstance(function, str) else function.__name__)
        neptune_callback = opt_utils.NeptuneCallback()
        study = optuna.create_study(direction='minimize')
        study.optimize(objective_function,
                       n_trials=OPTIMIZATION_TRIALS,
                       callbacks=[neptune_callback],
                       n_jobs=OPTIMIZATION_CONCURRENT_JOBS)
        print(study.best_params)
        print(study.best_value)
        print(study.best_trial)
        return study.best_params
예제 #5
0
def tune(classifer,
         params: dict,
         alg: str,
         tags=None,
         preprocessors=None,
         test_size=0.2,
         random_state=42):
    """
    :param classifer: sklearn regressor
    :param params: dict params for regressor for tuning
    :param tags: optional tags for neptune exps, by default module name
    :param preprocessors: optional preprocessors
    :param test_size: size for test datamodules
    :param random_state: random seed for split
    """

    model_name = classifer.__name__
    tags = tags if tags is not None else []
    tags.append(model_name)

    neptune.init(project_qualified_name='jiashuxu/folklore',
                 api_token=NEPTUNE_API)
    neptune.create_experiment(name=model_name, tags=tags)

    neptune_callback = opt_utils.NeptuneCallback(log_study=True,
                                                 log_charts=True)

    study = optuna.create_study(direction="minimize")

    objective = Objective(
        classifer, params, alg,
        *get_data(filter_no=10, preprocess=["standard_scaler", "pca"]))
    study.optimize(objective, n_trials=50, callbacks=[neptune_callback])

    opt_utils.log_study_info(study)

    print(f"best merror score: {study.best_value} with {study.best_params}")

    neptune.stop()
예제 #6
0
def main():
    seed_everything(0)
    data = load_data(root_dir='./data/', mode='train')
    data, target, features, era = preprocess_data(data, ordinal=True)
    api_token = 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vdWkubmVwdHVuZS5haSIsImFwaV91cmwiOiJodHRwczovL3VpLm5lcHR1bmUuYWkiLCJhcGlfa2V5IjoiYWQxMjg3OGEtMGI1NC00NzFmLTg0YmMtZmIxZjcxZDM2NTAxIn0='
    neptune.init(api_token=api_token,
                 project_qualified_name='jamesmccarthy65/Numerai')
    nn_exp = neptune.create_experiment('SupAE_HPO')
    nn_neptune_callback = opt_utils.NeptuneCallback(experiment=nn_exp)
    study = optuna.create_study(direction='minimize')
    data_dict = {
        'data': data,
        'target': target,
        'features': features,
        'era': era
    }
    study.optimize(lambda trial: optimize(trial, data_dict=data_dict),
                   n_trials=100,
                   callbacks=[nn_neptune_callback])
    joblib.dump(
        study,
        f'hpo/params/SupAEnn_hpo_{str(datetime.datetime.now().date())}.pkl')
예제 #7
0
def main():
    seed_everything(0)
    data = load_data(root_dir='./data/', mode='train')
    data, target, features, date = preprocess_data(data,
                                                   nn=True,
                                                   action='multi')

    api_token = read_api_token()
    neptune.init(api_token=api_token,
                 project_qualified_name='jamesmccarthy65/JSMP')
    nn_exp = neptune.create_experiment('Resnet_HPO_Multiclass')
    nn_neptune_callback = opt_utils.NeptuneCallback(experiment=nn_exp)
    study = optuna.create_study(direction='minimize')
    data_dict = {
        'data': data,
        'target': target,
        'features': features,
        'date': date
    }
    study.optimize(lambda trial: optimize(trial, data_dict=data_dict),
                   n_trials=100,
                   callbacks=[nn_neptune_callback])
    joblib.dump(study, f'HPO/nn_hpo_{str(datetime.datetime.now().date())}.pkl')
예제 #8
0
    def optuna_callback(self):
        if self.log and self._optuna_callback is None:
            self._optuna_callback = nmo.NeptuneCallback(
                experiment=self.experiment)

        return self._optuna_callback
예제 #9
0
    def find_best_params(self):
        def objective(trial, params):

            # Suggest values of the hyperparameters using a trial object.
            n_bot_layers = trial.suggest_int('n_bot_layers', 2, 5)
            n_top_layers = trial.suggest_int('n_top_layers', 2, 4)
            bot_layers = []
            top_layers = []
            arch_sparse_feature_size = trial.suggest_int(
                'arch_sparse_feature_size', 16, 32)
            for i in range(n_bot_layers):
                if i == 0:
                    bot_layers.append(
                        params["den_fea"]
                    )  # This value is related to the number of numerical columns (fixed by input data)
                elif i == (n_bot_layers - 1):
                    bot_layers.append(
                        arch_sparse_feature_size
                    )  # This value is related to the arch_sparse_feature_size
                else:
                    bot_features = trial.suggest_int(
                        'n_bot_units_l{}'.format(i), 32, 512)
                    bot_layers.append(bot_features)
            for i in range(n_top_layers):
                if i == (n_top_layers - 1):
                    top_layers.append(
                        1
                    )  # This value should always be 1, as it is a binary classification
                else:
                    top_features = trial.suggest_int(
                        'n_top_units_l{}'.format(i), 32, 512)
                    top_layers.append(top_features)
            arch_mlp_bot = '-'.join(str(x) for x in bot_layers)
            arch_mlp_top = '-'.join(str(x) for x in top_layers)
            learning_rate = trial.suggest_float('learning_rate', 0.001, 0.1)
            #loss_function = trial.suggest_categorical('loss_function', ['mse', 'bce'])

            # Assigning trial hyper-parameters to params
            params["arch_sparse_feature_size"] = arch_sparse_feature_size
            params["arch_mlp_bot"] = arch_mlp_bot
            params["arch_mlp_top"] = arch_mlp_top
            params["learning_rate"] = learning_rate

            # Run DLRM and get results
            dlrm_model = DLRM_Model(**params)
            validation_results = dlrm_model.run()
            for key in validation_results:
                if key not in ['classification_report', 'confusion_matrix']:
                    neptune.log_metric(key, validation_results[key])

            # Print trial (if verbose)
            if self.verbose:
                print('Parameters: ', params, '/n Results: ',
                      validation_results)

            return validation_results[
                'best_pre_auc_test']  # ['best_auc_test'] Need to decide which metric is best

        # Assigning fixed parameters to params
        params = {
            "data_generation": 'dataset',
            "data_set": 'normal',  #'kaggle',
            "raw_data_file": './input/trainday0day0day0day0.txt',
            # "processed_data_file": './input/kaggleAdDisplayChallenge_processed.npz',
            "loss_function": 'bce',  # loss_function,
            #"round_targets": True,  We want to have a ranked list instead of yes/no
            "mini_batch_size": 32,  # 128,
            "print_freq": 32,  # 256,
            "test_freq": 32,  # 128,
            "mlperf_logging": True,
            "print_time": True,
            "test_mini_batch_size": 32,  # 256,
            "den_fea": 13,
            "spa_fea": 26
            # "test_num_workers": 16
            # "save_model ":  'dlrm_criteo_kaggle_.pytorch'
            # "use_gpu": True
            # "enable_profiling": True,
            # "plot_compute_graph": True,
        }
        # params = {
        #     "data_generation": 'dataset',
        #     "data_set": 'normal',
        #     "raw_data_file": './input/recsys_users.txt',
        #     # "processed_data_file": './input/kaggleAdDisplayChallenge_processed.npz',
        #     "loss_function": 'wbce',  # loss_function,
        #     #"round_targets": True,  We want to have a ranked list instead of yes/no
        #     "mini_batch_size": 128,
        #     "print_freq": 256,
        #     "test_freq": 128,
        #     "mlperf_logging": True,
        #     "print_time": True,
        #     "test_mini_batch_size": 256,
        #     "loss_weights": '0.0348-0.9652',
        #     "den_fea": 240,  # 90  # 13 dense  features (numerical)          # PBV Main change between datasets
        #     "spa_fea": 35    # 51  # 26 sparse features (categorical)      # PBV Main change between datasets
        #     # "test_num_workers": 16
        #     # "save_model ":  'dlrm_criteo_kaggle_.pytorch'
        #     # "use_gpu": True
        #     # "enable_profiling": True,
        #     # "plot_compute_graph": True,
        # }

        neptune.init('pedrobaiz/dlrm', api_token=self.API_KEY)
        neptune.create_experiment('recsys-' + self.model_name,
                                  tags=[str(self.neptune_tags)])
        neptune_callback = optuna_utils.NeptuneCallback()
        study = optuna.create_study(direction='maximize')
        study.optimize(lambda trial: objective(trial, params),
                       n_trials=self.max_evals,
                       callbacks=[neptune_callback])
        optuna_utils.log_study(study)
        best_params = self.rebuild_mlps(params, study.best_params)

        print('finished find_best_params... ', best_params)
        return best_params
예제 #10
0
import neptune

neptune.init(api_token='ANONYMOUS',
             project_qualified_name='shared/optuna-integration')

# Quickstart

## Step 1: Create an Experiment

neptune.create_experiment('optuna-sweep')

## Step 2: Create the Neptune Callback

import neptunecontrib.monitoring.optuna as opt_utils

neptune_callback = opt_utils.NeptuneCallback()

## Step 3: Run Optuna with the Neptune Callback

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100, callbacks=[neptune_callback])

## Step 4: Stop logging

# tests
exp = neptune.get_experiment()

neptune.stop()

# tests
all_logs = exp.get_logs()
예제 #11
0
             'boosting_type': 'gbdt',
             'verbose': 1,
             'metric': 'auc'}
        x_tr, x_val = data[tr_idx], data[val_idx]
        y_tr, y_val = target[tr_idx], target[val_idx]
        train = lgb.Dataset(x_tr, label=y_tr)
        val = lgb.Dataset(x_val, label=y_val)
        clf = lgb.LGBMClassifier(n_estimators=1000, verbose_eval=True, **p)
        clf.fit(x_tr, y_tr, early_stopping_rounds=50)
        preds = clf.predict(x_val)
        score = roc_auc_score(y_val, preds)
        print(f'Fold {i} ROC AUC:\t', score)
"""
api_token = read_api_token()
neptune.init(api_token=api_token,
             project_qualified_name='jamesmccarthy65/JSMP')
data = load_data('data/', mode='train', overide='filtered_train.csv')
data, target, features, date = preprocess_data(data)
print('creating XGBoost Trials')
xgb_exp = neptune.create_experiment('XGBoost_HPO')
xgb_neptune_callback = opt_utils.NeptuneCallback(experiment=xgb_exp)
study = optuna.create_study(direction='maximize')
study.optimize(optimize, n_trials=500, callbacks=[xgb_neptune_callback])
joblib.dump(study, f'HPO/xgb_hpo_{str(datetime.datetime.now().date())}.pkl')
print('Creating LightGBM Trials')
lgb_exp = neptune.create_experiment('LGBM_HPO')
lgbm_neptune_callback = opt_utils.NeptuneCallback(experiment=lgb_exp)
study = optuna.create_study(direction='maximize')
study.optimize(loptimize, n_trials=500, callbacks=[lgbm_neptune_callback])
joblib.dump(study, f'HPO/lgb_hpo_{str(datetime.datetime.now().date())}.pkl')