def test_model_persistence(mocker): # Simple single task SISSO runs with various options for the run directory # Mock the run of the custodian by just copying a reference SISSO.out file def copy_sisso_out(): shutil.copy( os.path.join(TEST_FILES_DIR, "runs", "perfect_linear_5pts", "SISSO.out"), "SISSO.out", ) mocker.patch.object( pysisso.sklearn.Custodian, "run", return_value=[], side_effect=copy_sisso_out, ) with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0") sisso_reg.fit(np.array([[1], [2], [3], [4], [5]]), np.array([0, 1, 2, 3, 4])) joblib.dump(sisso_reg, filename="model.joblib") sisso_reg_loaded = joblib.load("model.joblib") pred = sisso_reg_loaded.predict([[1.5], [4.5]]) assert pred[0] == 0.5 assert pred[1] == 3.5 assert sisso_reg.get_params() == sisso_reg_loaded.get_params() model = sisso_reg.sisso_out.model model_loaded = sisso_reg_loaded.sisso_out.model assert len(model.coefficients) == 1 assert len(model_loaded.coefficients) == 1 assert model.coefficients[0] == pytest.approx( model_loaded.coefficients[0])
def test_sisso_regressor_omp(mocker): # Simple SISSO run with OMP # Mock the run of the custodian by just copying a reference SISSO.out file def copy_sisso_out(): shutil.copy( os.path.join(TEST_FILES_DIR, "runs", "OMP", "SISSO.out"), "SISSO.out", ) mocker.patch.object( pysisso.sklearn.Custodian, "run", return_value=[], side_effect=copy_sisso_out, ) with ScratchDir("."): sisso_reg = SISSORegressor.OMP(desc_dim=4) assert sisso_reg.rung == 0 assert sisso_reg.subs_sis == 1 assert sisso_reg.desc_dim == 4 assert sisso_reg.method == "L0" assert sisso_reg.L1L0_size4L0 is None X = np.array([ [8, 1, 3.01, 4], [6, 2, 3.02, 3], [2, 3, 3.01, 0], [10, 4, 3.02, -8], [4, 5, 3.01, 10], ]) y = 0.9 * X[:, 1] + 0.1 * X[:, 3] - 1.0 sisso_reg.fit(X, y) actual_sin = "SISSO_dir/SISSO.in" ref_sin = os.path.join(TEST_FILES_DIR, "runs", "OMP", "SISSO.in") assert [line for line in open(actual_sin) ] == [line for line in open(ref_sin)] sisso_out = SISSOOut.from_file(filepath="SISSO_dir/SISSO.out") assert sisso_out.params.n_rungs == sisso_reg.rung assert sisso_out.params.SIS_subspaces_sizes == [sisso_reg.subs_sis] assert sisso_out.params.descriptor_dimension == sisso_reg.desc_dim assert sisso_out.params.sparsification_method == sisso_reg.method sisso_model = sisso_out.model assert str(sisso_model.descriptors[0]) == "(feature_1)" assert str(sisso_model.descriptors[1]) == "(feature_3)"
from pysisso.outputs import SISSOOut from pysisso.sklearn import SISSORegressor # Define the data set X = np.array([ [8, 1, 3.01, 4], [6, 2, 3.02, 3], [2, 3, 3.01, 0], [10, 4, 3.02, -8], [4, 5, 3.01, 10], ]) y = 0.9 * X[:, 1] + 0.1 * X[:, 3] - 1.0 # Define the regressor and fit the data sisso_reg = SISSORegressor.OMP(desc_dim=4) sisso_reg.fit(X, y, columns=["feature_0", "feature_1", "feature_2", "feature_3"]) # Get the final model obtained sisso_out = SISSOOut.from_file(filepath="SISSO_dir/SISSO.out") sisso_model = sisso_out.model # Get the descriptors descriptors = [str(d) for d in sisso_model.descriptors] # Print the order of the OMP features # Should start with feature_1, then feature_3. # feature_0 and feature_2 might be interchanged. for idesc, desc in enumerate(descriptors):
fig, subplot = plt.subplots() subplot.plot(xlin, ylin, "-", color="C0", label="True function") subplot.plot(X, y, "o", color="C1", label="Data") subplot.set_xlabel("x") subplot.set_ylabel("f(x)") subplot.set_title(TITLE) subplot.legend() if SAVE_FIGURES: fig.savefig("true_data.pdf") if PLOT_FIGURES: plt.show() # Define the regressor, fit the data and predict sisso_regressor = SISSORegressor( rung=1, opset="(+)(*)(^2)(^3)(^-1)(cos)(sin)", desc_dim=3, clean_run_dir=CLEAN_RUN_DIR, ) X = X.reshape( -1, 1) # only one feature, X is initially defined as 1D, sklearn needs 2D sisso_regressor.fit(X, y) ylin_pred = sisso_regressor.predict(xlin) # Plot the true and predicted functions, together with the data fig, subplot = plt.subplots() subplot.plot(xlin, ylin, "-", color="C0", label="True function") subplot.plot(X, y, "o", color="C1", label="Data") subplot.plot(xlin, ylin_pred, "-", color="C2", label="Predicted function") subplot.set_xlabel("x") subplot.set_ylabel("f(x)") subplot.set_title(TITLE)
def test_sisso_regressor(mocker): # Simple single task SISSO runs with various options for the run directory # Mock the run of the custodian by just copying a reference SISSO.out file def copy_sisso_out(): shutil.copy( os.path.join(TEST_FILES_DIR, "runs", "perfect_linear_5pts", "SISSO.out"), "SISSO.out", ) mocker.patch.object( pysisso.sklearn.Custodian, "run", return_value=[], side_effect=copy_sisso_out, ) makedirs_spy = mocker.spy(pysisso.sklearn, "makedirs_p") with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0") sisso_reg.fit(np.array([[1], [2], [3], [4], [5]]), np.array([0, 1, 2, 3, 4])) pred = sisso_reg.predict([[1.5], [4.5]]) assert pred[0] == 0.5 assert pred[1] == 3.5 assert os.path.exists("SISSO_dir") makedirs_spy.assert_called_with("SISSO_dir") assert makedirs_spy.call_count == 1 makedirs_spy.reset_mock() with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0", run_dir="mySISSOdir") sisso_reg.fit(np.array([[1], [2], [3], [4], [5]]), np.array([0, 1, 2, 3, 4])) pred = sisso_reg.predict([[1.5], [4.5]]) assert pred[0] == 0.5 assert pred[1] == 3.5 assert os.path.exists("mySISSOdir") assert not os.path.exists("SISSO_dir") makedirs_spy.assert_called_with("mySISSOdir") assert makedirs_spy.call_count == 1 makedirs_spy.reset_mock() with ScratchDir("."): sisso_reg = SISSORegressor( desc_dim=1, rung=0, subs_sis=1, method="L0", run_dir="mySISSOdir", clean_run_dir=True, ) sisso_reg.fit(np.array([[1], [2], [3], [4], [5]]), np.array([0, 1, 2, 3, 4])) pred = sisso_reg.predict([[1.5], [4.5]]) assert pred[0] == 0.5 assert pred[1] == 3.5 assert not os.path.exists("mySISSOdir") makedirs_spy.assert_called_with("mySISSOdir") assert makedirs_spy.call_count == 1 makedirs_spy.reset_mock() with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0", clean_run_dir=True) sisso_reg.fit(np.array([[1], [2], [3], [4], [5]]), np.array([0, 1, 2, 3, 4])) pred = sisso_reg.predict([[1.5], [4.5]]) assert pred[0] == 0.5 assert pred[1] == 3.5 assert not os.path.exists("SISSO_dir") makedirs_spy.assert_called_with("SISSO_dir") assert makedirs_spy.call_count == 1 makedirs_spy.reset_mock() # Run with a temporary directory (i.e. when run_dir is None, useful for CV) # TODO : mocking tempfile did not work here for some reason ... mocker.patch( "pysisso.sklearn.get_timestamp", return_value="2018_09_28_16_04_54_017895", ) with ScratchDir("."): sisso_reg = SISSORegressor( desc_dim=1, rung=0, subs_sis=1, method="L0", run_dir=None, clean_run_dir=False, ) sisso_reg.fit(np.array([[1], [2], [3], [4], [5]]), np.array([0, 1, 2, 3, 4])) pred = sisso_reg.predict([[1.5], [4.5]]) assert pred[0] == 0.5 assert pred[1] == 3.5 assert os.path.exists("SISSO_runs") dirs = os.listdir("SISSO_runs") assert len(dirs) == 1 sisso_dir = dirs[0] assert sisso_dir.startswith("SISSO_dir_2018_09_28_16_04_54_017895_") makedirs_spy.assert_called_with("SISSO_runs") assert makedirs_spy.call_count == 1 makedirs_spy.reset_mock() # Run with a temporary directory (i.e. when run_dir is None, useful for CV) with ScratchDir("."): sisso_reg = SISSORegressor( desc_dim=1, rung=0, subs_sis=1, method="L0", run_dir=None, clean_run_dir=True, ) sisso_reg.fit(np.array([[1], [2], [3], [4], [5]]), np.array([0, 1, 2, 3, 4])) pred = sisso_reg.predict([[1.5], [4.5]]) assert pred[0] == 0.5 assert pred[1] == 3.5 assert os.path.exists("SISSO_runs") dirs = os.listdir("SISSO_runs") assert len(dirs) == 0 assert makedirs_spy.call_count == 1 makedirs_spy.reset_mock() # Simple multi task SISSO run # Mock the run of the custodian by just copying a reference SISSO.out file def copy_sisso_out(): shutil.copy( os.path.join(TEST_FILES_DIR, "runs", "perfect_linear_5pts_multi", "SISSO.out"), "SISSO.out", ) mocker.patch.object( pysisso.sklearn.Custodian, "run", return_value=[], side_effect=copy_sisso_out, ) with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0") sisso_reg.fit( np.array([[1], [2], [3], [4], [5]]), np.array([[0, -3], [1, -5], [2, -7], [3, -9], [4, -11]]), ) pred = sisso_reg.predict([[1.5], [4.5]]) assert pred[0] == pytest.approx([0.5, -4]) assert pred[1] == pytest.approx([3.5, -10]) assert sisso_reg.columns == ["feat1"] # Test of initializations and errors # Run with a numpy array with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0") sisso_reg.fit( np.array([[1, 5], [2, 3], [3, 89], [4, 1], [5, 4]]), np.array([[0, -3], [1, -5], [2, -7], [3, -9], [4, -11]]), ) assert sisso_reg.columns == ["feat1", "feat2"] # Run with a pandas Dataframe with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0") X_df = pd.DataFrame([[1, 5], [2, 3], [3, 89], [4, 1], [5, 4]], columns=["a", "b"]) sisso_reg.fit(X_df, np.array([[0, -3], [1, -5], [2, -7], [3, -9], [4, -11]])) assert sisso_reg.columns == ["a", "b"] # Run raising errors about columns with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0") X_df = pd.DataFrame([[1, 5], [2, 3], [3, 89], [4, 1], [5, 4]], columns=["a", "b"]) with pytest.raises( ValueError, match=r"Columns should be of the size of the " r"second axis of X.", ): sisso_reg.fit( X_df, np.array([[0, -3], [1, -5], [2, -7], [3, -9], [4, -11]]), columns=["a", "b", "c"], ) # Run raising errors about index with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0") X_df = pd.DataFrame([[1], [2], [3], [4], [5]]) with pytest.raises(ValueError, match=r"Index, X and y should have same size."): sisso_reg.fit(X_df, np.array([[0], [1], [2], [3]])) # Run raising errors about index with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0") X_df = pd.DataFrame([[1], [2], [3], [4], [5]]) with pytest.raises(ValueError, match=r"Index, X and y should have same size."): sisso_reg.fit( X_df, np.array([[0], [1], [2], [3], [4]]), index=["a", "b", "c", "d", "e", "f"], ) # Run with a wrong shape for y target with ScratchDir("."): sisso_reg = SISSORegressor(desc_dim=1, rung=0, subs_sis=1, method="L0") X_df = pd.DataFrame([[1], [2], [3], [4], [5]]) with pytest.raises(ValueError, match=r"Wrong shapes."): sisso_reg.fit(X_df, np.array([[[0], [1], [2], [3], [4]]]))
subplot.set_ylabel("f(x)") subplot.set_title(TITLE) subplot.legend() if SAVE_FIGURES: fig.savefig("true_data.pdf") if PLOT_FIGURES: plt.show() # Define the regressor and the grid search, fit the data and predict # Note that run_dir HAS to be None here, so that concurrent SISSO runs from different # folds and/or hyperparameters sets do not interfere with one another sisso_regressor = SISSORegressor( rung=1, opset="(+)(-)(*)(^2)(^3)(^-1)(exp)(sin)(cos)", desc_dim=3, subs_sis=40, method="L1L0", L1L0_size4L0=15, run_dir=None, clean_run_dir=CLEAN_RUN_DIR, ) param_grid = {"rung": [0, 1, 2], "desc_dim": [2, 3, 4]} grid_search = GridSearchCV( estimator=sisso_regressor, param_grid=param_grid, cv=4, n_jobs=4 ) X = X.reshape(-1, 1) # only one feature, X is initially defined as 1D, sklearn needs 2D grid_search.fit(X, y) ylin_pred = grid_search.predict(xlin) # Plot the true and predicted functions, together with the data fig, subplot = plt.subplots() subplot.plot(xlin, ylin, "-", color="C0", label="True function")