コード例 #1
0
ファイル: trainer.py プロジェクト: dempseyzhang888/amptorch
    def load_skorch(self):
        skorch.net.to_tensor = to_tensor

        collate_fn = DataCollater(train=True, forcetraining=self.forcetraining)

        self.net = NeuralNetRegressor(
            module=self.model,
            criterion=self.criterion,
            criterion__force_coefficient=self.config["optim"].get(
                "force_coefficient", 0),
            criterion__loss=self.config["optim"].get("loss", "mse"),
            optimizer=self.optimizer,
            lr=self.config["optim"].get("lr", 1e-1),
            batch_size=self.config["optim"].get("batch_size", 32),
            max_epochs=self.config["optim"].get("epochs", 100),
            iterator_train__collate_fn=collate_fn,
            iterator_train__shuffle=True,
            iterator_valid__collate_fn=collate_fn,
            iterator_valid__shuffle=False,
            device=self.device,
            train_split=self.split,
            callbacks=self.callbacks,
            verbose=self.config["cmd"].get("verbose", True),
        )
        print("Loading skorch trainer")
コード例 #2
0
def get_skorch_regressor():
    X, y = make_regression(100, 5, n_informative=3, random_state=0)
    X = X.astype(np.float32)
    y = y / np.std(y)
    y = y.reshape(-1, 1).astype(np.float32)

    X_df = pd.DataFrame(X, columns=['col' + str(i) for i in range(X.shape[1])])

    class MyModule(nn.Module):
        def __init__(self, input_units=5, num_units=5, nonlin=nn.ReLU()):
            super(MyModule, self).__init__()

            self.dense0 = nn.Linear(input_units, num_units)
            self.nonlin = nonlin
            self.dense1 = nn.Linear(num_units, num_units)
            self.output = nn.Linear(num_units, 1)

        def forward(self, X, **kwargs):
            X = self.nonlin(self.dense0(X))
            X = self.nonlin(self.dense1(X))
            X = self.output(X)
            return X

    model = NeuralNetRegressor(
        MyModule,
        max_epochs=20,
        lr=0.2,
        iterator_train__shuffle=True,
    )

    model.fit(X_df.values, y)
    return model, X_df, y
コード例 #3
0
    def build_model(
        self,
        network=MVRegressor,
        device: str = "cpu",
        scale_data: bool = False,
        num_layers: int = 10,
        num_units: int = 50,
        dropout: float = 0.5,
        num_epochs: int = 10,
        batch_size: int = 128,
    ):

        self.scale_data = scale_data
        self.num_layers = num_layers
        self.num_units = num_units
        self.dropout = dropout
        self.num_epochs = num_epochs
        self.batch_size = batch_size

        if not all([hasattr(self, "input_dim"), hasattr(self, "output_dim")]):

            raise ValueError(
                "Please load dataset first to obtain proper sizes")

        if device == "cpu":
            self.device = device
        else:
            use_cuda = torch.cuda.is_available()
            self.device = torch.device("cuda" if use_cuda else "cpu")

        self.model = NeuralNetRegressor(
            network,
            device=self.device,
            module__input_dim=self.input_dim,
            module__output_dim=self.output_dim,
            module__n_layers=self.num_layers,
            module__num_units=self.num_units,
            module__p_dropout=self.dropout,
            max_epochs=self.num_epochs,
            criterion=nn.MSELoss,
            batch_size=self.batch_size,
            # Shuffle training data on each epoch
            iterator_train__shuffle=True,
            callbacks=[
                (
                    "lr_scheduler",
                    LRScheduler(policy=CyclicLR,
                                base_lr=0.001,
                                max_lr=0.01,
                                step_every="batch"),
                ),
            ],
        )
コード例 #4
0
ファイル: train_bandit.py プロジェクト: yijiezh/banditml
def fit_custom_pytorch_module_w_skorch(module, X, y, hyperparams):
    """Fit a custom PyTorch module using Skorch."""

    skorch_net = NeuralNetRegressor(
        module=module,
        optimizer=torch.optim.Adam,
        lr=hyperparams["learning_rate"],
        optimizer__weight_decay=hyperparams["l2_decay"],
        max_epochs=hyperparams["max_epochs"],
        batch_size=hyperparams["batch_size"],
        iterator_train__shuffle=True,
    )

    skorch_net.fit(X, y)
    return skorch_net
コード例 #5
0
ファイル: test_pytorch.py プロジェクト: mmccarty/dask-ml
async def test_pytorch(c, s, a, b):

    n_features = 10
    defaults = {
        "callbacks": False,
        "warm_start": False,
        "train_split": None,
        "max_epochs": 1,
    }
    model = NeuralNetRegressor(
        module=ShallowNet,
        module__n_features=n_features,
        criterion=nn.MSELoss,
        optimizer=optim.SGD,
        optimizer__lr=0.1,
        batch_size=64,
        **defaults,
    )

    model2 = clone(model)
    assert model.callbacks is False
    assert model.warm_start is False
    assert model.train_split is None
    assert model.max_epochs == 1

    params = {"optimizer__lr": loguniform(1e-3, 1e0)}
    X, y = make_regression(n_samples=100, n_features=n_features)
    X = X.astype("float32")
    y = y.astype("float32").reshape(-1, 1)
    search = IncrementalSearchCV(model2, params, max_iter=5, decay_rate=None)
    await search.fit(X, y)
    assert search.best_score_ >= 0
コード例 #6
0
def train_model(wavelet_scale, scattering_operators):

    print('Start training model ' f'{wavelet_scale} {scattering_operators}')

    model_save_path = osp.join(
        MODELS_SAVE_PATH, f'model_{wavelet_scale}_'
        f'{scop_to_str(scattering_operators)}.pkl')

    if not osp.exists(model_save_path):

        data_loader = DataLoader(
            DATASET, f'data_{wavelet_scale}_'
            f'{scop_to_str(scattering_operators)}')

        data = data_loader.load_data()

        x_train, y_train, _ = data[f'{DATASET}_training']

        x_train = x_train.astype(np.float32)
        y_train = y_train.astype(np.float32)

        # normalize data to 0 mean and unit std
        scaler = StandardScaler()
        scaler.fit(x_train)
        x_train = scaler.transform(x_train)

        n_in = x_train.shape[1]
        lr_policy = LRScheduler(StepLR, step_size=15, gamma=0.5)
        net = NeuralNetRegressor(
            GSGNN,
            module__n_in=n_in,
            criterion=torch.nn.MSELoss,
            max_epochs=400,
            optimizer=torch.optim.Adam,
            optimizer__lr=.005,
            callbacks=[lr_policy],
            device='cpu',
            batch_size=256,
            verbose=0,
        )

        params = {
            'module__n_h': [100, 200, 300, 400],
            'module__dropout': [0.0, 0.2, 0.4],
            'module__n_layers': [1, 2, 3, 4],
        }

        gs = GridSearchCV(net,
                          params,
                          refit=True,
                          cv=5,
                          scoring='r2',
                          n_jobs=-1)

        gs.fit(x_train, y_train)

        # save the trained model
        print(f"Save the model in {model_save_path}")
        torch.save(gs.best_estimator_, model_save_path)
        report(gs.cv_results_, 10)
コード例 #7
0
def make_skorch_network(net, paramc):
    return NeuralNetRegressor(
        net,
        max_epochs=param['n_epochs'],
        lr=param['learning_rate'],
        batch_size=param['batch_size'],
        device='cuda' if torch.cuda.is_available() else 'cpu',
        optimizer=torch.optim.SGD,
        optimizer__momentum=param['momentum'],
        optimizer__weight_decay=param['l2_penalty'],
        iterator_train__shuffle=True,
        verbose=0)
コード例 #8
0
 def __init__(self, n_features=34, lr=0.02, me=10):
     self.lr = lr  # learning rate
     self.max_epochs = me
     self.model = NeuralNetRegressor(ActivityNN(input_dim=n_features),
                                     max_epochs=self.max_epochs,
                                     lr=self.lr,
                                     iterator_train__shuffle=True,
                                     criterion=torch.nn.BCELoss,
                                     optimizer=torch.optim.Adam,
                                     warm_start=False,
                                     verbose=0)
     self.trained = False
コード例 #9
0
def train_nn_model_validate1(nodes, X_train_scaled, Y_train, max_evals=10):

    #我觉得0.12的设置有点多了,还有很多数据没用到呢,感觉这样子设置应该会好一些的吧?
    #X_split_train, X_split_test, Y_split_train, Y_split_test = train_test_split(X_train_scaled, Y_train, test_size=0.12, stratify=Y_train)
    X_split_train, X_split_test, Y_split_train, Y_split_test = train_test_split(
        X_train_scaled, Y_train, test_size=0.14)
    #由于神经网络模型初始化、dropout等的问题导致网络不够稳定
    #解决这个问题的办法就是多重复计算几次,选择其中靠谱的模型
    best_rmse = 99999999999.9
    best_model = 0.0
    for j in range(0, max_evals):

        rsg = NeuralNetRegressor(
            lr=nodes["lr"],
            optimizer__weight_decay=nodes["optimizer__weight_decay"],
            criterion=nodes["criterion"],
            batch_size=nodes["batch_size"],
            optimizer__betas=nodes["optimizer__betas"],
            module=create_nn_module(nodes["input_nodes"],
                                    nodes["hidden_layers"],
                                    nodes["hidden_nodes"],
                                    nodes["output_nodes"],
                                    nodes["percentage"]),
            max_epochs=nodes["max_epochs"],
            callbacks=[
                skorch.callbacks.EarlyStopping(patience=nodes["patience"])
            ],
            device=nodes["device"],
            optimizer=nodes["optimizer"])
        init_module(rsg.module, nodes["weight_mode"], nodes["bias"])
        rsg.fit(X_split_train.astype(np.float32),
                Y_split_train.astype(np.float32))

        #Y_pred = rsg.predict(X_split_test.astype(np.float32))
        metric = cal_nnrsg_rmse(rsg, X_split_test, Y_split_test)

        best_model, best_rmse, flag = record_best_model_rmse(
            rsg, metric, best_model, best_rmse)

    return best_model, best_rmse
コード例 #10
0
    def build_estimator(hyperparams, train_data, test=False):
        device = "cuda" if torch.cuda.is_available() else "cpu"

        # Extract info from training data
        X, y, *_ = train_data
        in_features = X.shape[1]

        callbacks = [
            ("r2_score_valid", EpochScoring("r2", lower_is_better=False)),
            (
                "early_stopping",
                EarlyStopping(monitor="valid_loss", patience=5, lower_is_better=True),
            ),
            (
                "learning_rate_scheduler",
                LRScheduler(
                    policy=lr_scheduler.ReduceLROnPlateau,
                    monitor="valid_loss",
                    # Following kargs are passed to the
                    # lr scheduler constructor
                    mode="min",
                    min_lr=1e-5,
                ),
            ),
        ]

        return NeuralNetRegressor(
            NNModule,
            criterion=nn.MSELoss,
            optimizer=torch.optim.SGD,
            max_epochs=300,
            iterator_train__shuffle=True,  # Shuffle training data on each epoch
            callbacks=callbacks,
            device=device,
            train_split=CVSplit(cv=5, random_state=RANDOM_STATE),
            lr=hyperparams["lr"],
            batch_size=hyperparams["batch_size"],
            module__in_features=in_features,
            module__n_layers=hyperparams["n_layers"],
            module__n_neuron_per_layer=hyperparams["n_neuron_per_layer"],
            module__activation=getattr(F, hyperparams["activation"]),
            module__p_dropout=hyperparams["p_dropout"],
            optimizer__momentum=hyperparams["momentum"],
            optimizer__weight_decay=hyperparams["weight_decay"],
            optimizer__nesterov=True,
            verbose=3,
            iterator_train__num_workers=4,
            iterator_valid__num_workers=4,
        )
コード例 #11
0
def create_pipeline(
        device='cpu',  # or 'cuda'
        max_epochs=50,
        lr=0.1,
        **kwargs):
    return Pipeline([('cast', Cast(np.float32)), ('scale', StandardScaler()),
                     ('net',
                      NeuralNetRegressor(
                          MyModule,
                          device=device,
                          max_epochs=max_epochs,
                          lr=lr,
                          train_split=None,
                          **kwargs,
                      ))])
コード例 #12
0
def nn_f(params):

    print("mean", params["mean"])
    print("std", params["std"])
    print("lr", params["lr"])
    print("optimizer__weight_decay", params["optimizer__weight_decay"])
    print("criterion", params["criterion"])
    print("batch_size", params["batch_size"])
    print("optimizer__betas", params["optimizer__betas"])
    print("bias", params["bias"])
    print("weight_mode", params["weight_mode"])
    print("patience", params["patience"])
    print("input_nodes", params["input_nodes"])
    print("hidden_layers", params["hidden_layers"])
    print("hidden_nodes", params["hidden_nodes"])
    print("output_nodes", params["output_nodes"])
    print("percentage", params["percentage"])

    rsg = NeuralNetRegressor(
        lr=params["lr"],
        optimizer__weight_decay=params["optimizer__weight_decay"],
        criterion=params["criterion"],
        batch_size=params["batch_size"],
        optimizer__betas=params["optimizer__betas"],
        module=create_nn_module(params["input_nodes"], params["hidden_layers"],
                                params["hidden_nodes"], params["output_nodes"],
                                params["percentage"]),
        max_epochs=params["max_epochs"],
        callbacks=[
            skorch.callbacks.EarlyStopping(patience=params["patience"])
        ],
        device=params["device"],
        optimizer=params["optimizer"])
    init_module(rsg.module, params["weight_mode"], params["bias"])

    #这里好像是无法使用skf的呀,不对只是新的skf需要其他设置啊,需要修改Y_train的shape咯
    #skf = StratifiedKFold(Y_train, n_folds=5, shuffle=True, random_state=None)
    #这里sklearn的均方误差是可以为负数的,我还以为是自己的代码出现了问题了呢
    metric = cross_val_score(rsg,
                             X_train_scaled.values.astype(np.float32),
                             Y_train.values.astype(np.float32),
                             cv=8,
                             scoring="neg_mean_squared_log_error").mean()
    #metric = cross_val_score(rsg, X_train_scaled.values.astype(np.float32), Y_train.values.astype(np.float32), cv=2, scoring="neg_mean_squared_error").mean()
    print(metric)
    return -metric
コード例 #13
0
ファイル: model.py プロジェクト: Gert-JanD/COIN
def create_pipeline(
        device='cpu',  # or 'cuda'
        max_epochs=50,
        lr=2e-4,
        **kwargs):
    return Pipeline([('cast', Cast(np.float32)),
                     ('scale', MinMaxScaler((-1, 1))),
                     ('net',
                      NeuralNetRegressor(
                          module=MyModule,
                          device=device,
                          max_epochs=max_epochs,
                          lr=lr,
                          train_split=None,
                          optimizer=torch.optim.Adam,
                          **kwargs,
                      ))])
コード例 #14
0
    def build_estimator(hyperparams, train_data, test=False):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

        # Extract info from training data
        X, y, *_ = train_data
        in_features = X.shape[1]

        callbacks = [
            ('r2_score_valid', EpochScoring('r2',
                                            lower_is_better=False)),
            ('early_stopping', EarlyStopping(monitor='valid_loss',
                                             patience=5,
                                             lower_is_better=True)),
            ('learning_rate_scheduler', LRScheduler(policy=lr_scheduler.ReduceLROnPlateau,
                                                    monitor='valid_loss',
                                                    # Following kargs are passed to the
                                                    # lr scheduler constructor
                                                    mode='min',
                                                    min_lr=1e-5
                                                    )),
        ]

        return NeuralNetRegressor(
            NNModule,
            criterion=nn.MSELoss,
            optimizer=torch.optim.SGD,
            max_epochs=300,
            iterator_train__shuffle=True, # Shuffle training data on each epoch
            callbacks=callbacks,
            device=device,
            train_split=CVSplit(cv=5, random_state=RANDOM_STATE),
            lr=hyperparams['lr'],
            batch_size=hyperparams['batch_size'],
            module__in_features=in_features,
            module__n_layers=hyperparams['n_layers'],
            module__n_neuron_per_layer=hyperparams['n_neuron_per_layer'],
            module__activation=getattr(F, hyperparams['activation']),
            module__p_dropout=hyperparams['p_dropout'],
            optimizer__momentum=hyperparams['momentum'],
            optimizer__weight_decay=hyperparams['weight_decay'],
            optimizer__nesterov=True,
            verbose=3,
            iterator_train__num_workers=4,
            iterator_valid__num_workers=4
        )
コード例 #15
0
def hyper_tunning(model, samples, target):
    """ Hyperparameters tuning using grid seacrh from sklearn."""
    params = {
        'lr': [0.001, 0.0005, 0.005, 0.05],
        'max_epochs': list(range(1, 12, 4)),
        'batch_size': [1]
    }
    binary_target = []
    for t in target:
        tem = np.zeros(7)
        index = int(t.item())
        tem[index] = 1
        binary_target.append(tem)
    binary_target = torch.FloatTensor(binary_target)
    net = NeuralNetRegressor(model, max_epochs=12, lr=0.001, verbose=1)
    gs = GridSearchCV(net, params, refit=False, scoring='r2', verbose=1, cv=7)
    target = target.reshape(-1, 1)
    gs.fit(samples, binary_target)
コード例 #16
0
def regressor_training(crispr_model_regressor, X, y, cv_splitter_reg):
    net = NeuralNetRegressor(crispr_model_regressor,
                             optimizer=torch.optim.Adam,
                             lr=config.start_lr,
                             optimizer__weight_decay=config.lr_decay,
                             optimizer__betas=(0.9, 0.98),
                             optimizer__eps=1e-9,
                             batch_size=config.batch_size,
                             max_epochs=config.n_epochs,
                             device=device2)

    net = RandomForestRegressor(n_estimators=30)
    cv_results_reg = cross_validate(net, X, y,
                                    scoring={'spearman': make_scorer(spearman), 'pearson': make_scorer(pearson), 'neg_mean_squared_error': 'neg_mean_squared_error'},
                                    cv=cv_splitter_reg, return_estimator=True)
    logger.debug("{0!r}".format(cv_results_reg['test_spearman']))
    logger.debug("{0!r}".format(cv_results_reg['test_pearson']))
    logger.debug("{0!r}".format(cv_results_reg['test_neg_mean_squared_error']))
    logger.debug("{0!r}".format(cv_results_reg.keys()))
コード例 #17
0
 def get_pipeline(self):
     regressor = None
     if self.learning_method == "linear":
         regressor = MultiOutputRegressor(LinearRegression(fit_intercept=self.fit_intercept),
                                          n_jobs=6)
     elif self.learning_method == "booster":
         regressor = MultiOutputRegressor(XGBRegressor(n_jobs=12,
                                                       n_estimators=self.no_estimators))
     elif self.learning_method == "deep":
         regressor = NeuralNetRegressor(
             module=TemporalConvNet,
             module__num_inputs=1,
             module__num_channels=[2] * self.no_channels,
             module__output_sz=self.horizon,
             module__kernel_size=5,
             module__dropout=0.0,
             max_epochs=60,
             batch_size=256,
             lr=2e-3,
             optimizer=torch.optim.Adam,
             device='cpu',
             iterator_train__shuffle=True,
             callbacks=[GradientNormClipping(gradient_clip_value=1,
                                             gradient_clip_norm_type=2)],
             train_split=None,
         )
     return ForecasterPipeline([
         # Convert the `y` target into a horizon
         ('pre_horizon', HorizonTransformer(horizon=self.horizon)),
         ('pre_reversible_imputer', ReversibleImputer(y_only=True)),
         ('features', FeatureUnion([
             # Generate a week's worth of autoregressive features
             ('ar_features', AutoregressiveTransformer(
                 num_lags=int(self.horizon * self.num_lags), pred_stride=self.pred_stride)),
         ])),
         ('post_feature_imputer', ReversibleImputer()),
         ('regressor', regressor)
     ])
コード例 #18
0
    def optimize(model):

        logger.info("Checkpoint2")
        X = model.predictor_src  #+ self.predictor_tgt
        y = model.predictor_tgt
        # y = model.config.sentence_level
        print(X)
        print(y)

        #Hyperparameter Tuning with Random Search
        net = NeuralNetRegressor(
            model,
            max_epochs=10,
            lr=0.1,
            # Shuffle training data on each epoch
            iterator_train__shuffle=True,
        )

        net.fit(X, y)
        y_proba = net.predict_proba(X)

        # deactivate skorch-internal train-valid split and verbose logging
        net.set_params(train_split=False, verbose=0)
        params = {
            'epochs': [7],
            'hidden_LSTM': [32, 64, 128],
            'learning_rate_batch': [(32, '1e-3'), (64, '2e-3')],
            'dropout': [0.5],
        }
        gs = RandomizedSearchCV(net,
                                params,
                                refit=False,
                                cv=3,
                                scoring='accuracy',
                                verbose=2)

        gs.fit(X, y)
        print("best score: {:.3f}, best params: {}".format(
            gs.best_score_, gs.best_params_))
        return
コード例 #19
0
ファイル: trainer.py プロジェクト: dempseyzhang888/amptorch
class AtomsTrainer:
    def __init__(self, config):
        self.config = config
        self.pretrained = False

    def load(self):
        self.load_config()
        self.load_rng_seed()
        self.load_dataset()
        self.load_model()
        self.load_criterion()
        self.load_optimizer()
        self.load_logger()
        self.load_extras()
        self.load_skorch()

    def load_config(self):
        self.timestamp = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
        self.identifier = self.config["cmd"].get("identifier", False)
        if self.identifier:
            self.identifier = self.timestamp + "-{}".format(self.identifier)
        else:
            self.identifier = self.timestamp

        self.device = torch.device(self.config["optim"].get("device", "cpu"))
        self.debug = self.config["cmd"].get("debug", False)
        run_dir = self.config["cmd"].get("run_dir", "./")
        os.chdir(run_dir)
        if not self.debug:
            self.cp_dir = os.path.join(run_dir, "checkpoints", self.identifier)
            print(f"Results saved to {self.cp_dir}")
            os.makedirs(self.cp_dir, exist_ok=True)

    def load_rng_seed(self):
        seed = self.config["cmd"].get("seed", 0)
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    def get_unique_elements(self, training_images):
        elements = np.array(
            [atom.symbol for atoms in training_images for atom in atoms])
        elements = np.unique(elements)
        return elements

    def load_dataset(self):
        training_images = self.config["dataset"]["raw_data"]
        # TODO: Scalability when dataset to large to fit into memory
        if isinstance(training_images, str):
            training_images = ase.io.read(training_images, ":")
        self.elements = self.config["dataset"].get(
            "elements", self.get_unique_elements(training_images))

        self.forcetraining = self.config["model"].get("get_forces", True)
        self.fp_scheme = self.config["dataset"].get("fp_scheme",
                                                    "gaussian").lower()
        self.fp_params = self.config["dataset"]["fp_params"]
        self.cutoff_params = self.config["dataset"].get(
            "cutoff_params", {"cutoff_func": "Cosine"})

        self.train_dataset = AtomsDataset(
            images=training_images,
            descriptor_setup=(
                self.fp_scheme,
                self.fp_params,
                self.cutoff_params,
                self.elements,
            ),
            forcetraining=self.forcetraining,
            save_fps=self.config["dataset"].get("save_fps", True),
        )

        self.target_scaler = self.train_dataset.target_scaler
        if not self.debug:
            normalizers = {"target": self.target_scaler}
            torch.save(normalizers, os.path.join(self.cp_dir,
                                                 "normalizers.pt"))
        self.input_dim = self.train_dataset.input_dim
        self.val_split = self.config["dataset"].get("val_split", 0)
        print("Loading dataset: {} images".format(len(self.train_dataset)))

    def load_model(self):
        elements = list_symbols_to_indices(self.elements)
        self.model = BPNN(elements=elements,
                          input_dim=self.input_dim,
                          **self.config["model"])
        print("Loading model: {} parameters".format(self.model.num_params))

    def load_extras(self):
        callbacks = []
        load_best_loss = train_end_load_best_loss(self.identifier)
        self.split = CVSplit(cv=self.val_split) if self.val_split != 0 else 0

        metrics = evaluator(
            self.val_split,
            self.config["optim"].get("metric", "mae"),
            self.identifier,
            self.forcetraining,
        )
        callbacks.extend(metrics)

        if not self.debug:
            callbacks.append(load_best_loss)
        scheduler = self.config["optim"].get("scheduler", None)
        if scheduler:
            scheduler = LRScheduler(scheduler,
                                    **self.config["optim"]["scheduler_params"])
            callbacks.append(scheduler)
        if self.config["cmd"].get("logger", False):
            from skorch.callbacks import WandbLogger

            callbacks.append(
                WandbLogger(
                    self.wandb_run,
                    save_model=False,
                    keys_ignored="dur",
                ))
        self.callbacks = callbacks

    def load_criterion(self):
        self.criterion = self.config["optim"].get("loss_fn", CustomLoss)

    def load_optimizer(self):
        self.optimizer = self.config["optim"].get("optimizer",
                                                  torch.optim.Adam)

    def load_logger(self):
        if self.config["cmd"].get("logger", False):
            import wandb

            self.wandb_run = wandb.init(
                name=self.identifier,
                config=self.config,
                id=self.timestamp,
            )

    def load_skorch(self):
        skorch.net.to_tensor = to_tensor

        collate_fn = DataCollater(train=True, forcetraining=self.forcetraining)

        self.net = NeuralNetRegressor(
            module=self.model,
            criterion=self.criterion,
            criterion__force_coefficient=self.config["optim"].get(
                "force_coefficient", 0),
            criterion__loss=self.config["optim"].get("loss", "mse"),
            optimizer=self.optimizer,
            lr=self.config["optim"].get("lr", 1e-1),
            batch_size=self.config["optim"].get("batch_size", 32),
            max_epochs=self.config["optim"].get("epochs", 100),
            iterator_train__collate_fn=collate_fn,
            iterator_train__shuffle=True,
            iterator_valid__collate_fn=collate_fn,
            iterator_valid__shuffle=False,
            device=self.device,
            train_split=self.split,
            callbacks=self.callbacks,
            verbose=self.config["cmd"].get("verbose", True),
        )
        print("Loading skorch trainer")

    def train(self, raw_data=None):
        if raw_data is not None:
            self.config["dataset"]["raw_data"] = raw_data
        if not self.pretrained:
            self.load()

        self.net.fit(self.train_dataset, None)

    def predict(self, images, batch_size=32):
        if len(images) < 1:
            warnings.warn("No images found!", stacklevel=2)
            return images

        a2d = AtomsToData(
            descriptor=self.train_dataset.descriptor,
            r_energy=False,
            r_forces=False,
            save_fps=True,
            fprimes=self.forcetraining,
            cores=1,
        )

        data_list = a2d.convert_all(images, disable_tqdm=True)

        self.net.module.eval()
        collate_fn = DataCollater(train=False,
                                  forcetraining=self.forcetraining)

        predictions = {"energy": [], "forces": []}
        for data in data_list:
            collated = collate_fn([data])
            energy, forces = self.net.module(collated)

            energy = self.target_scaler.denorm(
                energy, pred="energy").detach().tolist()
            forces = self.target_scaler.denorm(forces,
                                               pred="forces").detach().numpy()

            predictions["energy"].extend(energy)
            predictions["forces"].append(forces)

        return predictions

    def load_pretrained(self, checkpoint_path=None):
        print(f"Loading checkpoint from {checkpoint_path}")
        self.load()
        self.net.initialize()
        self.pretrained = True
        try:
            self.net.load_params(
                f_params=os.path.join(checkpoint_path, "params.pt"),
                f_optimizer=os.path.join(checkpoint_path, "optimizer.pt"),
                f_criterion=os.path.join(checkpoint_path, "criterion.pt"),
                f_history=os.path.join(checkpoint_path, "history.json"),
            )
            # TODO(mshuaibi): remove dataset load, use saved normalizers
        except NotImplementedError:
            print("Unable to load checkpoint!")
コード例 #20
0
    def forward(self, x):
        out = self.fc1(x)
        out = self.l3(out)
        out = self.ln(out)
        #out = self.dp(out)
        out = self.l1(out)
        out = self.fc2(out)
        return out


net_regr = NeuralNetRegressor(
    Net(hidden_size=500),
    max_epochs=5000,
    lr=0.01,
    device='cuda',
    optimizer=torch.optim.Adam,
    train_split=None,
    verbose=1,
)

res = net_regr.fit(t_d_inp, t_d_oup)
# save
net_regr.save_params(f_params='step1result')

pred = net_regr.predict(test_inp)
mse = ((test_oup - pred)**2).mean()
print('test error = ' + str(mse))
# plot 1 loss
loss = net_regr.history[:, 'train_loss']
plt.figure()
コード例 #21
0
class PyTorchModel(BaseModel):
    def build_model(
        self,
        network=MVRegressor,
        device: str = "cpu",
        scale_data: bool = False,
        num_layers: int = 10,
        num_units: int = 50,
        dropout: float = 0.5,
        num_epochs: int = 10,
        batch_size: int = 128,
    ):

        self.scale_data = scale_data
        self.num_layers = num_layers
        self.num_units = num_units
        self.dropout = dropout
        self.num_epochs = num_epochs
        self.batch_size = batch_size

        if not all([hasattr(self, "input_dim"), hasattr(self, "output_dim")]):

            raise ValueError("Please load dataset first to obtain proper sizes")

        if device == "cpu":
            self.device = device
        else:
            use_cuda = torch.cuda.is_available()
            self.device = torch.device("cuda" if use_cuda else "cpu")

        self.model = NeuralNetRegressor(
            network,
            device=self.device,
            module__input_dim=self.input_dim,
            module__output_dim=self.output_dim,
            module__n_layers=self.num_layers,
            module__num_units=self.num_units,
            module__p_dropout=self.dropout,
            max_epochs=self.num_epochs,
            criterion=nn.MSELoss,
            batch_size=self.batch_size,
            # Shuffle training data on each epoch
            iterator_train__shuffle=True,
            callbacks=[
                (
                    "lr_scheduler",
                    LRScheduler(
                        policy=CyclicLR, base_lr=0.001, max_lr=0.01, step_every="batch"
                    ),
                ),
            ],
        )

    def fit(self, X, y, **fit_params):

        if self.scale_data:
            X, y = self.scalar(X, y)

        X, y = (
            torch.tensor(X).float().to(device=self.device),
            torch.tensor(y).float().to(device=self.device),
        )
        self.model.fit(X, y, **fit_params)

    def load_model(
        self, input_dim: str, output_dim: str, filename: str, scale_data: bool = False,
    ):

        self.scale_data = scale_data
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.build_model()
        self.model = pickle.load(open(filename, "rb"))

    def predict(self, X):

        if self.scale_data:
            X = self.xscalar.transform(X)
        X = torch.tensor(X).float().to(device=self.device)
        preds = self.model.predict(X)

        if self.scale_data:
            preds = self.yscalar.inverse_transform(preds)

        return preds

    def sweep(
        self,
        params: Dict,
        X,
        y,
        search_algorithm: str = "bayesian",
        num_trials: int = 3,
        scoring_func: str = "r2",
    ):

        from tune_sklearn import TuneGridSearchCV, TuneSearchCV

        X, y = (
            torch.tensor(X).float().to(device=self.device),
            torch.tensor(y).float().to(device=self.device),
        )
        tune_search = TuneSearchCV(
            self.model,
            params,
            search_optimization=search_algorithm,
            n_trials=num_trials,
            early_stopping=True,
            scoring=scoring_func,
        )
        tune_search.fit(X, y)

        return tune_search
コード例 #22
0
        )

    def forward(self, x):
        return self.encoder(x)


if __name__ == "__main__":
    cp = Checkpoint(dirname='segnet_mse_no_sigmoid_sgd_150ep_b8_lr_0.01_30enc/checkpoints')
    train_end_cp = TrainEndCheckpoint(dirname='segnet_mse_no_sigmoid_sgd_150ep_b8_lr_0.01_30enc/checkpoints')
    net = NeuralNetRegressor(
        SegNet,
        module__encoding_size=30,
        device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
        max_epochs=150,
        batch_size=8,
        criterion=MSELoss,
        lr=0.01,
        iterator_train__shuffle=True,
        optimizer=torch.optim.SGD,
        optimizer__momentum=.9,
        callbacks=[cp, train_end_cp]
    )
    net.initialize()
    net.load_params(checkpoint=cp)
    mean = np.array([0.5020, 0.4690, 0.4199])
    std = np.array([0.2052, 0.2005, 0.1966])
    inverse_transform = transforms.Normalize(
        mean=(-mean) / std,
        std=1 / std
    )
    transform = transforms.Compose([transforms.Resize((224, 224),
コード例 #23
0
                    fn_prefix='./histories/%i_valid_best_' % k)
    load_best_valid_loss = train_end_load_best_valid_loss()

    # Train this fold's network
    net = NeuralNetRegressor(CrystalGraphConvNet,
                             module__orig_atom_fea_len=orig_atom_fea_len,
                             module__nbr_fea_len=nbr_fea_len,
                             batch_size=214,
                             module__classification=False,
                             lr=0.0056,
                             max_epochs=100,
                             module__atom_fea_len=46,
                             module__h_fea_len=83,
                             module__n_conv=8,
                             module__n_h=4,
                             optimizer=Adam,
                             iterator_train__pin_memory=True,
                             iterator_train__num_workers=0,
                             iterator_train__collate_fn=collate_pool,
                             iterator_train__shuffle=True,
                             iterator_valid__pin_memory=True,
                             iterator_valid__num_workers=0,
                             iterator_valid__collate_fn=collate_pool,
                             iterator_valid__shuffle=False,
                             device=device,
                             criterion=torch.nn.L1Loss,
                             dataset=MergeDataset,
                             callbacks=[cp, load_best_valid_loss, LR_schedule])
    net.initialize()
    net.fit(stds_train_, targets_train_)
    nets.append(net)
コード例 #24
0
ファイル: amptorch_test.py プロジェクト: ray38/SIMPLE-NN-MCSH
net = NeuralNetRegressor(
    module=FullNN(unique_atoms, [fp_length, 3, 10], device, forcetraining=forcetraining),
    criterion=CustomMSELoss,
    criterion__force_coefficient=0.1,
    optimizer=torch.optim.LBFGS,
    optimizer__line_search_fn="strong_wolfe",
    lr=1e-3,
    batch_size=len(images),
    max_epochs=20,
    iterator_train__collate_fn=collate_amp,
    iterator_train__shuffle=True,
    iterator_valid__collate_fn=collate_amp,
    device=device,
    # train_split=0,
    verbose=1,
    callbacks=[
        EpochScoring(
            forces_score,
            on_train=True,
            use_caching=True,
            target_extractor=target_extractor,
        ),
        EpochScoring(
            energy_score,
            on_train=True,
            use_caching=True,
            target_extractor=target_extractor,
        ),
    ],
)
コード例 #25
0
LR_schedule = callbacks.lr_scheduler.LRScheduler('MultiStepLR',
                                                 milestones=[100],
                                                 gamma=0.1)

net = NeuralNetRegressor(CrystalGraphConvNet,
                         module__orig_atom_fea_len=orig_atom_fea_len,
                         module__nbr_fea_len=nbr_fea_len,
                         batch_size=214,
                         module__classification=False,
                         lr=0.0056,
                         max_epochs=292,
                         module__atom_fea_len=46,
                         module__h_fea_len=83,
                         module__n_conv=8,
                         module__n_h=4,
                         optimizer=Adam,
                         iterator_train__pin_memory=True,
                         iterator_train__num_workers=0,
                         iterator_train__collate_fn=collate_pool,
                         iterator_train__shuffle=True,
                         iterator_valid__pin_memory=True,
                         iterator_valid__num_workers=0,
                         iterator_valid__collate_fn=collate_pool,
                         iterator_valid__shuffle=False,
                         device=device,
                         criterion=torch.nn.L1Loss,
                         dataset=MergeDataset,
                         callbacks=[cp, load_best_valid_loss, LR_schedule])

net.initialize()
net.load_params(f_history='../CGCNN/valid_best_history.json',
                f_optimizer='../CGCNN/valid_best_optimizer.pt',
コード例 #26
0
    #valid_target = np.load(os.path.join(args.data_dir, 'valid_data_target.npy'), allow_pickle=True)

    train_dataset = MRIDataset(train_img, train_target, args.resize)
    #train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.train_batch_size)
    #valid_dataset = MRIDataset(valid_img, valid_target, args.resize)
    #valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=args.valid_batch_size)

    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam

    net = NeuralNetRegressor(model,
                             max_epochs=3,
                             lr=0.001,
                             optimizer=optimizer,
                             optimizer__weight_decay=0.001,
                             verbose=1,
                             batch_size=12)

    params = {
        'lr': [0.001, 0.01, 0.02, 0.04, 0.1],
        'optimizer__weight_decay': [0.001, 0.005, 0.01, 0.05, 0.1]
    }

    slice_dataset = SliceMRIDataset(train_dataset)
    gs = GridSearchCV(net,
                      params,
                      refit=False,
                      cv=3,
                      scoring='neg_mean_squared_error',
コード例 #27
0
def test_skorch():
    distances = np.linspace(2, 5, 100)
    label = "skorch_example"
    images = []
    energies = []
    forces = []
    for l in distances:
        image = Atoms(
            "CuCO",
            [
                (-l * np.sin(0.65), l * np.cos(0.65), 0),
                (0, 0, 0),
                (l * np.sin(0.65), l * np.cos(0.65), 0),
            ],
        )
        image.set_cell([10, 10, 10])
        image.wrap(pbc=True)
        image.set_calculator(EMT())
        images.append(image)
        energies.append(image.get_potential_energy())
        forces.append(image.get_forces())

    energies = np.array(energies)
    forces = np.concatenate(np.array(forces))
    Gs = {}
    Gs["G2_etas"] = np.logspace(np.log10(0.05), np.log10(5.0), num=2)
    Gs["G2_rs_s"] = [0] * 2
    Gs["G4_etas"] = [0.005]
    Gs["G4_zetas"] = [1.0]
    Gs["G4_gammas"] = [+1.0, -1]
    Gs["cutoff"] = 6.5

    forcetraining = True
    training_data = AtomsDataset(
        images,
        SNN_Gaussian,
        Gs,
        forcetraining=forcetraining,
        label=label,
        cores=1,
        delta_data=None,
    )
    unique_atoms = training_data.elements
    fp_length = training_data.fp_length
    device = "cpu"

    net = NeuralNetRegressor(
        module=FullNN(unique_atoms, [fp_length, 2, 2],
                      device,
                      forcetraining=forcetraining),
        criterion=CustomMSELoss,
        criterion__force_coefficient=0.3,
        optimizer=torch.optim.LBFGS,
        optimizer__line_search_fn="strong_wolfe",
        lr=1,
        batch_size=100,
        max_epochs=150,
        iterator_train__collate_fn=collate_amp,
        iterator_train__shuffle=True,
        iterator_valid__collate_fn=collate_amp,
        device=device,
        train_split=0,
        verbose=0,
        callbacks=[
            EpochScoring(
                forces_score,
                on_train=True,
                use_caching=True,
                target_extractor=target_extractor,
            ),
            EpochScoring(
                energy_score,
                on_train=True,
                use_caching=True,
                target_extractor=target_extractor,
            ),
        ],
    )
    calc = AMP(training_data, net, "test")
    calc.train(overwrite=True)
    num_of_atoms = 3
    calculated_energies = np.array(
        [calc.get_potential_energy(image) for image in images])
    energy_rmse = np.sqrt(
        (((calculated_energies - energies) / num_of_atoms)**2).sum() /
        len(images))

    calculated_forces = np.concatenate(
        np.array([calc.get_forces(image) for image in images]))
    force_rmse = np.sqrt((((calculated_forces - forces))**2).sum() /
                         (3 * num_of_atoms * len(images)))
    l1_force = np.sum(np.abs(calculated_forces - forces) / num_of_atoms, 1)
    idx = 0
    force_loss_image = np.zeros((len(calculated_energies), 1))
    for i in range(len(calculated_energies)):
        force_loss_image[i] = np.sum(l1_force[idx:idx + 3])
        idx += 3
    force_loss_image /= 3

    reported_energy_score = net.history[-1]["energy_score"]
    reported_forces_score = net.history[-1]["forces_score"]
    assert force_rmse <= 0.005, "Force training convergence not met!"
    assert energy_rmse <= 0.005, "Energy training convergence not met!"
    assert round(reported_energy_score,
                 4) == round(energy_rmse,
                             4), "Shuffled reported energy scores incorrect!"
    assert round(reported_forces_score,
                 4) == round(force_rmse,
                             4), "Shuffled reported forces score incorrect!"
コード例 #28
0
for idx in range(len(trial_idx)):
    if trial_idx[idx] == 700:
        break

train = TensorDataset(torch.tensor(data_features[:idx,:]).float(), torch.tensor(data_targets[:idx,:]).float())
train_loader = DataLoader(train, batch_size=128, shuffle=True, num_workers=4)

valid = TensorDataset(torch.tensor(data_features[idx:,:]).float(), torch.tensor(data_targets[idx:,:]).float())
valid_loader = DataLoader(valid, batch_size=128, shuffle=True, num_workers=4)

net = NeuralNetRegressor(
    Network,
    optimizer=torch.optim.Adam,
    # iterator_train = train_loader,
    # iterator_valid = valid_loader,
    module__size_layer = 512,
    module__p = 0.2,
    max_epochs=20,
    lr=0.0015,
    # Shuffle training data on each epoch
    iterator_train__shuffle=True,
)
net.initialize()
params = {
    'lr': [1e-3, 2e-3, 5e-3],
    'max_epochs': [15,25],
    'module__size_layer': [256,512,684],
    'module__p': [0.1,0.3,0.5]
}

gs = GridSearchCV(net, params, refit=False, cv=3,scoring='neg_mean_squared_error')
print(np.min(np.float32(data_features)))
コード例 #29
0
def train_calc(inputs):
    images, filename, file_dir, Gs, lj, forcesonly, scaling = inputs

    class train_end_load_best_valid_loss(skorch.callbacks.base.Callback):
        def on_train_end(self, net, X, y):
            net.load_params(
                "./results/checkpoints/{}_params.pt".format(filename))

    cp = Checkpoint(
        monitor="forces_score_best",
        fn_prefix="./results/checkpoints/{}_".format(filename),
    )

    if not os.path.exists(file_dir):
        os.makedirs(file_dir, exist_ok=True)

    forcetraining = True
    training_data = AtomsDataset(
        images,
        SNN_Gaussian,
        Gs,
        forcetraining=forcetraining,
        label=filename,
        cores=1,
        lj_data=None,
        scaling=scaling,
    )
    unique_atoms = training_data.elements
    fp_length = training_data.fp_length
    device = "cpu"

    torch.set_num_threads(1)

    net = NeuralNetRegressor(
        module=FullNN(unique_atoms, [fp_length, 3, 20],
                      device,
                      forcetraining=forcetraining),
        criterion=CustomMSELoss,
        criterion__force_coefficient=0.04,
        optimizer=torch.optim.LBFGS,
        lr=1e-1,
        batch_size=len(training_data),
        max_epochs=200,
        iterator_train__collate_fn=collate_amp,
        iterator_train__shuffle=False,
        iterator_valid__collate_fn=collate_amp,
        iterator_valid__shuffle=False,
        device=device,
        train_split=CVSplit(cv=5, random_state=1),
        callbacks=[
            EpochScoring(
                forces_score,
                on_train=False,
                use_caching=True,
                target_extractor=target_extractor,
            ),
            EpochScoring(
                energy_score,
                on_train=False,
                use_caching=True,
                target_extractor=target_extractor,
            ),
        ],
    )
    calc = AMP(training_data, net, label=filename)
    calc.train()
    return [training_data, net, filename]
コード例 #30
0
class PyTorchModel(BaseModel):
    def build_model(
        self,
        network=MVRegressor,
        device: str = "cpu",
        scale_data: bool = False,
        num_layers: int = 10,
        num_units: int = 50,
        dropout: float = 0.5,
        num_epochs: int = 10,
        batch_size: int = 128,
    ):

        self.scale_data = scale_data
        self.num_layers = num_layers
        self.num_units = num_units
        self.dropout = dropout
        self.num_epochs = num_epochs
        self.batch_size = batch_size

        if not all([hasattr(self, "input_dim"), hasattr(self, "output_dim")]):

            raise ValueError(
                "Please load dataset first to obtain proper sizes")

        if device == "cpu":
            self.device = device
        else:
            use_cuda = torch.cuda.is_available()
            self.device = torch.device("cuda" if use_cuda else "cpu")

        self.model = NeuralNetRegressor(
            network,
            device=self.device,
            module__input_dim=self.input_dim,
            module__output_dim=self.output_dim,
            module__n_layers=self.num_layers,
            module__num_units=self.num_units,
            module__p_dropout=self.dropout,
            max_epochs=self.num_epochs,
            criterion=nn.MSELoss,
            batch_size=self.batch_size,
            # Shuffle training data on each epoch
            iterator_train__shuffle=True,
            callbacks=[
                (
                    "lr_scheduler",
                    LRScheduler(policy=CyclicLR,
                                base_lr=0.001,
                                max_lr=0.01,
                                step_every="batch"),
                ),
            ],
        )

    def fit(self, X, y, **fit_params):

        if self.scale_data:
            X, y = self.scalar(X, y)

        X, y = (
            torch.tensor(X).float().to(device=self.device),
            torch.tensor(y).float().to(device=self.device),
        )
        self.model.fit(X, y, **fit_params)

    def load_model(
        self,
        input_dim: str,
        output_dim: str,
        filename: str,
        scale_data: bool = False,
    ):

        self.scale_data = scale_data
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.build_model(scale_data=scale_data)
        self.model = pickle.load(open(filename, "rb"))

    def predict(self, X):

        if self.scale_data:
            X = self.xscalar.transform(X)
        X = torch.tensor(X).float().to(device=self.device)
        preds = self.model.predict(X)

        if self.scale_data:
            preds = self.yscalar.inverse_transform(preds)

        return preds

    def sweep(
        self,
        params: Dict,
        X,
        y,
        search_algorithm: str = "bayesian",
        num_trials: int = 3,
        scoring_func: str = "r2",
        early_stopping: bool = False,
        results_csv_path: str = "outputs/results.csv",
        splitting_criteria: str = "timeseries",
        num_splits: int = 5,
    ):

        start_dir = str(pathlib.Path(os.getcwd()).parent)
        module_dir = str(pathlib.Path(__file__).parent)
        # temporarily change directory to file directory and then reset
        os.chdir(module_dir)

        if self.scale_data:
            X, y = self.scalar(X, y)

        X, y = (
            torch.tensor(X).float().to(device=self.device),
            torch.tensor(y).float().to(device=self.device),
        )

        if splitting_criteria.lower() == "cv":
            cv = None
        elif splitting_criteria.lower() == "timeseries":
            cv = TimeSeriesSplit(n_splits=num_splits)
        elif splitting_criteria.lower() == "grouped":
            cv = GroupShuffleSplit(n_splits=num_splits)
        elif splitting_criteria.lower() == "fixed":
            if type(test_indices) != list:
                raise ValueError(
                    "fixed split used but no test-indices provided...")
            cv = PredefinedSplit(test_fold=test_indices)
        else:
            raise ValueError(
                "Unknowing splitting criteria provided: {splitting_criteria}, should be one of [cv, timeseries, grouped]"
            )

        if search_algorithm.lower() == "bohb":
            early_stopping = True

        if any([
                search_algorithm.lower()
                in ["bohb", "bayesian", "hyperopt", "optuna"]
        ]):
            search = TuneSearchCV(
                self.model,
                params,
                search_optimization=search_algorithm,
                n_trials=num_trials,
                early_stopping=early_stopping,
                scoring=scoring_func,
            )
        elif search_algorithm == "grid":
            search = GridSearchCV(
                self.model,
                param_grid=params,
                refit=True,
                cv=num_trials,
                scoring=scoring_func,
            )
        elif search_algorithm == "random":
            search = RandomizedSearchCV(
                self.model,
                param_distributions=params,
                refit=True,
                cv=num_trials,
                scoring=scoring_func,
            )
        else:
            raise NotImplementedError(
                "Search algorithm should be one of grid, hyperopt, bohb, optuna, bayesian, or random"
            )
        with mlflow.start_run() as run:
            search.fit(X, y)
        self.model = search.best_estimator_

        # set path back to initial
        os.chdir(start_dir)

        results_df = pd.DataFrame(search.cv_results_)
        logger.info(f"Best hyperparams: {search.best_params_}")

        if not pathlib.Path(results_csv_path).parent.exists():
            pathlib.Path(results_csv_path).parent.mkdir(exist_ok=True,
                                                        parents=True)
        logger.info(f"Saving sweeping results to {results_csv_path}")
        logger.info(f"Best score: {search.best_score_}")
        results_df.to_csv(results_csv_path)
        cols_keep = [col for col in results_df if "param_" in col]
        cols_keep += ["mean_test_score"]

        results_df = results_df[cols_keep]

        return results_df