def _check_datasets(self, dataset=None, csv_dataset=None):
     if csv_dataset:
         if dataset:
             print(
                 "Dataset and csv.dataset are given, hence dataset will be overwritten by csv.data."
             )
         dataset = DataSet.read_csv(csv_dataset, index_col=None)
     return dataset
def test_train_experimental_emulator():
    model_name = f"reizman_suzuki_case_1"
    domain = ReizmanSuzukiEmulator.setup_domain()
    ds = DataSet.read_csv(DATA_PATH / f"{model_name}.csv")
    exp = ExperimentalEmulator(model_name,
                               domain,
                               dataset=ds,
                               regressor=ANNRegressor)

    # Test grid search cross validation and training
    # params = {
    #     "regressor__net__max_epochs": [1, 1000],
    # }
    params = None
    exp.train(cv_folds=5,
              max_epochs=1000,
              random_state=100,
              search_params=params,
              verbose=0)

    # Testing
    res = exp.test()
    r2 = res["test_r2"].mean()
    assert r2 > 0.8

    # Test plotting
    fig, ax = exp.parity_plot(output_variables="yld", include_test=True)

    # Test saving/loading
    exp.save("test_ee")
    exp_2 = ExperimentalEmulator.load(model_name, "test_ee")
    assert all(exp.descriptors_features) == all(exp_2.descriptors_features)
    assert exp.n_examples == exp_2.n_examples
    assert all(exp.output_variable_names) == all(exp_2.output_variable_names)
    assert exp.clip == exp_2.clip
    exp_2.X_train, exp_2.y_train, exp_2.X_test, exp_2.y_test = (
        exp.X_train,
        exp.y_train,
        exp.X_test,
        exp.y_test,
    )
    res = exp_2.test(X_test=exp.X_test, y_test=exp.y_test)
    exp.parity_plot(output_variables="yld", include_test=True)
    r2 = res["test_r2"].mean()
    assert r2 > 0.8
    shutil.rmtree("test_ee")
Beispiel #3
0
def _train_baumgartner(use_descriptors=False,
                       show_plots=False,
                       save_plots=True):
    # Setup
    model_name = f"baumgartner_aniline_cn_crosscoupling"
    domain = BaumgartnerCrossCouplingEmulator.setup_domain()
    ds = DataSet.read_csv(DATA_PATH / f"{model_name}.csv")

    # Create emulator and train
    model_name += "_descriptors" if use_descriptors else ""
    exp = ExperimentalEmulator(
        model_name,
        domain,
        dataset=ds,
        regressor=ANNRegressor,
        output_variable_names=["yield"],
        descriptors_features=["catalyst", "base"] if use_descriptors else [],
    )
    res = exp.train(max_epochs=MAX_EPOCHS,
                    cv_folds=CV_FOLDS,
                    random_state=100,
                    test_size=0.2)

    # Run test
    res_test = exp.test()
    res.update(res_test)

    # Save emulator
    model_path = pathlib.Path(MODELS_PATH / model_name)
    model_path.mkdir(exist_ok=True)
    exp.save(model_path)

    # Make plot for posteriority sake
    fig, ax = exp.parity_plot(include_test=True)
    if save_plots:
        fig.savefig(f"results/{model_name}.png", dpi=100)
    if show_plots:
        plt.show()

    return res
Beispiel #4
0
    results_average = [{
        f"avg_{score_name}": scores.mean()
        for score_name, scores in result.items()
    } for result in results]
    index = [f"case_{i}" for i in range(1, 5)]

    results_df = pd.DataFrame.from_records(results_average, index=index)
    results_df.index.rename("case", inplace=True)
    results_df.to_csv(f"results/reizman_suzuki_scores.csv")


def train_one_reizman(case, show_plots=False, save_plots=True):
    # Setup
    model_name = f"reizman_suzuki_case_{case}"
    domain = ReizmanSuzukiEmulator.setup_domain()
    ds = DataSet.read_csv(DATA_PATH / f"{model_name}.csv")

    # Create emulator and train
    exp = ExperimentalEmulator(
        model_name,
        domain,
        dataset=ds,
        regressor=ANNRegressor,
    )
    res = exp.train(max_epochs=MAX_EPOCHS,
                    cv_folds=CV_FOLDS,
                    random_state=100,
                    test_size=0.2)

    # Run test
    res_test = exp.test()