Ejemplo n.º 1
0
    def setUp(self):
        """

        :return:
        """
        try:
            two_class = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/classification_twoclass.csv'))
            multiclass = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/classification_multiclass.csv'))

        except OSError as exp:
            #os.system("python gen_synthetic_datasets.py")
            import tests.gen_synthetic_datasets

            two_class = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/classification_twoclass.csv'))
            multiclass = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/classification_multiclass.csv'))

        finally:
            # Load expected values for a PLS da with 2 classes
            self.expected_cvParams = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/pls_da_cvoarams.csv'))

            # check this
            self.da_mat = multiclass['Class_Vector'].values
            self.da = two_class['Class'].values
            self.xmat_multi = multiclass.iloc[:, 5::].values
            self.xmat = two_class.iloc[:, 1::].values

        x_scaler = ChemometricsScaler(1)
        y_scaler = ChemometricsScaler(1, with_mean=True, with_std=False)
        self.plsda = ChemometricsPLSDA(n_comps=3,
                                       xscaler=x_scaler,
                                       y_scaler=y_scaler)
        self.plsda_multiy = ChemometricsPLSDA(n_comps=3,
                                              xscaler=x_scaler,
                                              y_scaler=y_scaler)
Ejemplo n.º 2
0
    def test_scalers(self):
        """

        :return:
        """
        x_scaler_par = ChemometricsScaler(1 / 2)
        x_scaler_mc = ChemometricsScaler(0)

        pareto_model = ChemometricsPCA(ncomps=3, scaler=x_scaler_par)
        mc_model = ChemometricsPCA(ncomps=3, scaler=x_scaler_mc)

        pareto_model.fit(self.xmat)
        mc_model.fit(self.xmat)

        assert_allclose(pareto_model.loadings, self.expected_loadings_par)
        assert_allclose(pareto_model.scores, self.expected_scores_par)

        assert_allclose(mc_model.loadings, self.expected_loadings_mc)
        assert_allclose(mc_model.scores, self.expected_scores_mc)
Ejemplo n.º 3
0
    def test_scalers(self):
        """

        :return:
        """
        x_scaler_par = ChemometricsScaler(1 / 2)
        y_scaler_par = ChemometricsScaler(1 / 2)
        x_scaler_mc = ChemometricsScaler(0)
        y_scaler_mc = ChemometricsScaler(0)

        pareto_model = ChemometricsPLS(ncomps=3,
                                       xscaler=x_scaler_par,
                                       yscaler=y_scaler_par)
        pareto_model_multiy = ChemometricsPLS(ncomps=3,
                                              xscaler=x_scaler_par,
                                              yscaler=y_scaler_par)
        mc_model = ChemometricsPLS(ncomps=3,
                                   xscaler=x_scaler_mc,
                                   yscaler=y_scaler_mc)
        mc_model_multiy = ChemometricsPLS(ncomps=3,
                                          xscaler=x_scaler_mc,
                                          yscaler=y_scaler_mc)

        pareto_model.fit(self.xmat, self.y)
        pareto_model_multiy.fit(self.xmat_multiy, self.ymat)
        mc_model.fit(self.xmat, self.y)
        mc_model_multiy.fit(self.xmat_multiy, self.ymat)

        assert_allclose(pareto_model.scores_t, self.expected_scores_t_par)
        assert_allclose(pareto_model.beta_coeffs, self.expected_betas_par)
        assert_allclose(pareto_model.VIP(), self.expected_vip_par)

        #assert_allclose(pareto_model_multiy.scores_t, self.expected_scores_t_yblock_par)
        #assert_allclose(pareto_model_multiy.beta_coeffs, self.expected_betacoefs_yblock_par)

        assert_allclose(mc_model.scores_t, self.expected_scores_t_mc)
        assert_allclose(mc_model.beta_coeffs, self.expected_betas_mc)
        assert_allclose(mc_model.VIP(), self.expected_vip_mc)
Ejemplo n.º 4
0
    def setUp(self):

        # Generate 2 fake classification datasets, one with 2 classes and another with 3
        self.twoclass_dataset = make_classification(40,
                                                    n_features=100,
                                                    n_informative=5,
                                                    n_redundant=5,
                                                    n_classes=2)
        self.three_classdataset = make_classification(40,
                                                      n_features=100,
                                                      n_informative=5,
                                                      n_redundant=5,
                                                      n_classes=3)
        y_scaler = ChemometricsScaler(with_mean=False, with_std=False)
        self.plsreg = ChemometricsPLS(n_comps=3, yscaler=y_scaler)
        self.plslog = ChemometricsPLS_Logistic(n_comps=3)
    def setUp(self):
        """

        :return:
        """
        try:
            multiclass = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/classification_multiclass.csv'))
            twoclass = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/classification_twoclass.csv'))
        except OSError as exp:
            #os.system("python gen_synthetic_datasets.py")
            import tests.gen_synthetic_datasets

            multiclass = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/classification_multiclass.csv'))
            twoclass = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/classification_twoclass.csv'))
        finally:
            # check this
            self.da_mat = multiclass['Class_Vector'].values
            self.da = twoclass['Class'].values
            self.xmat_multi = multiclass.iloc[:, 5::].values
            self.xmat = twoclass.iloc[:, 1::].values

        # Set up the same scalers
        y_scaler = ChemometricsScaler(0, with_std=False, with_mean=True)
        self.plsreg = ChemometricsPLS(ncomps=3, yscaler=y_scaler)
        self.plsda = ChemometricsPLSDA(ncomps=3)

        # Generate the dummy matrix so we can run the pls regression objects in the same conditions as
        # the discriminant ones
        self.dummy_y = pds.get_dummies(self.da_mat).values
Ejemplo n.º 6
0
    def setUp(self):
        try:
            regression_problem = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/regression.csv'))

        except (IOError, OSError) as ioerr:
            import tests.gen_synthetic_datasets
            regression_problem = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/regression.csv'))

        self.mc_scaler = ChemometricsScaler(0)
        self.uv_scaler = ChemometricsScaler(1)
        self.par_scaler = ChemometricsScaler(1 / 2)
        self.y = regression_problem.values[:, 0][np.newaxis].T
        self.xmat = regression_problem.values[:, 1:4]

        self.xmat_mc = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/scaler_xmat_mc.csv'),
                                  delimiter=',')
        self.xmat_uv = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/scaler_xmat_uv.csv'),
                                  delimiter=',')
        self.xmat_par = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/scaler_xmat_par.csv'),
                                   delimiter=',')

        self.y_mc = np.loadtxt(os.path.join(os.path.dirname(__file__),
                                            './test_data/scaler_y_mc.csv'),
                               delimiter=',')
        self.y_uv = np.loadtxt(os.path.join(os.path.dirname(__file__),
                                            './test_data/scaler_y_uv.csv'),
                               delimiter=',')
        self.y_par = np.loadtxt(os.path.join(os.path.dirname(__file__),
                                             './test_data/scaler_y_par.csv'),
                                delimiter=',')
Ejemplo n.º 7
0
from pyChemometrics import ChemometricsScaler, ChemometricsPCA
import numpy as np

import pandas as pds

t_dset = pds.read_csv('./tests/test_data/classification_twoclass.csv')
xmat = t_dset.iloc[:, 1::].values

x_scaler = ChemometricsScaler(1)
pcamodel = ChemometricsPCA(ncomps=3, scaler=x_scaler)

pcamodel.fit(xmat)

#pcamodel._screecv_optimize_ncomps(xmat, 10, stopping_condition=0.05)

np.random.seed(0)

pcamodel.cross_validation(xmat)

pcamodel._screecv_optimize_ncomps(xmat, 10, stopping_condition=0.05)

np.savetxt('./tests/test_data/pca_loadings.csv',
           pcamodel.loadings,
           fmt='%.18e',
           delimiter=',',
           newline='\n',
           header='',
           footer='',
           comments='#')

np.savetxt('./tests/test_data/pca_scores.csv',
Ejemplo n.º 8
0
class TestScalerObject(unittest.TestCase):
    """

    Use a made up dataset

    """
    def setUp(self):
        try:
            regression_problem = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/regression.csv'))

        except (IOError, OSError) as ioerr:
            import tests.gen_synthetic_datasets
            regression_problem = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/regression.csv'))

        self.mc_scaler = ChemometricsScaler(0)
        self.uv_scaler = ChemometricsScaler(1)
        self.par_scaler = ChemometricsScaler(1 / 2)
        self.y = regression_problem.values[:, 0][np.newaxis].T
        self.xmat = regression_problem.values[:, 1:4]

        self.xmat_mc = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/scaler_xmat_mc.csv'),
                                  delimiter=',')
        self.xmat_uv = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/scaler_xmat_uv.csv'),
                                  delimiter=',')
        self.xmat_par = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/scaler_xmat_par.csv'),
                                   delimiter=',')

        self.y_mc = np.loadtxt(os.path.join(os.path.dirname(__file__),
                                            './test_data/scaler_y_mc.csv'),
                               delimiter=',')
        self.y_uv = np.loadtxt(os.path.join(os.path.dirname(__file__),
                                            './test_data/scaler_y_uv.csv'),
                               delimiter=',')
        self.y_par = np.loadtxt(os.path.join(os.path.dirname(__file__),
                                             './test_data/scaler_y_par.csv'),
                                delimiter=',')

    def test_scaleVector(self):
        """
        Check that scaling works with arbitrary value between 0 and 1 as expected on a single vector.
        """

        assert_allclose(
            self.mc_scaler.fit_transform(self.y).squeeze(), self.y_mc)
        assert_allclose(
            self.uv_scaler.fit_transform(self.y).squeeze(), self.y_uv)
        assert_allclose(
            self.par_scaler.fit_transform(self.y).squeeze(), self.y_par)

    def test_scaleMatrix(self):
        """
        Check that scaling works with arbitrary value between 0 and 1 as expected on a matrix of m samples by n features.
        """

        assert_allclose(self.mc_scaler.fit_transform(self.xmat), self.xmat_mc)
        assert_allclose(self.uv_scaler.fit_transform(self.xmat), self.xmat_uv)
        assert_allclose(self.par_scaler.fit_transform(self.xmat),
                        self.xmat_par)

    def test_inverseTransformVector(self):
        """
        Test inverse transform of a vector
        """

        self.mc_scaler.fit(self.y)
        self.uv_scaler.fit(self.y)
        self.par_scaler.fit(self.y)

        assert_allclose(self.mc_scaler.inverse_transform(self.y_mc),
                        self.y.squeeze())
        assert_allclose(self.uv_scaler.inverse_transform(self.y_uv),
                        self.y.squeeze())
        assert_allclose(self.par_scaler.inverse_transform(self.y_par),
                        self.y.squeeze())

    def test_inverseTransformMatrix(self):
        """
        Test inverse transform of a matrix
        """

        self.mc_scaler.fit(self.xmat)
        self.uv_scaler.fit(self.xmat)
        self.par_scaler.fit(self.xmat)

        assert_allclose(self.mc_scaler.inverse_transform(self.xmat_mc),
                        self.xmat)
        assert_allclose(self.uv_scaler.inverse_transform(self.xmat_uv),
                        self.xmat)
        assert_allclose(self.par_scaler.inverse_transform(self.xmat_par),
                        self.xmat)
Ejemplo n.º 9
0
    def setUp(self):

        try:
            regression_problem = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/regression.csv'))
            multiblock_regression_problem = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/regression_multiblock.csv'))

        except (IOError, OSError) as ioerr:
            #os.system("python gen_synthetic_datasets.py")
            import tests.gen_synthetic_datasets
            regression_problem = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/regression.csv'))
            multiblock_regression_problem = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/regression_multiblock.csv'))

        finally:
            # Load expected values for a PLS regression against a Y vector
            self.expected_loadings_p = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_loadings_p.csv'),
                                                  delimiter=',')
            self.expected_loadings_q = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_loadings_q.csv'),
                                                  delimiter=',')[np.newaxis, :]
            self.expected_weights_w = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_weights_w.csv'),
                                                 delimiter=',')
            self.expected_weights_c = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_weights_c.csv'),
                                                 delimiter=',')[np.newaxis, :]
            self.expected_scores_t = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_scores_t.csv'),
                                                delimiter=',')
            self.expected_scores_u = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_scores_u.csv'),
                                                delimiter=',')
            self.expected_betacoefs = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_betas.csv'),
                                                 delimiter=',')[:, np.newaxis]
            self.expected_vips = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_vip.csv'),
                                            delimiter=',')
            self.expected_dmodx = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_dmodx.csv'),
                                             delimiter=',')

            # Load expected values for a PLS regression model against a Y matrix
            #self.expected_loadings_p_yblock = np.loadtxt('./test_data/pls_reg_yblock_loadings_p.csv', delimiter=',')
            #self.expected_weights_w_yblock = np.loadtxt('./test_data/pls_reg_yblock_weights_w.csv', delimiter=',')
            #self.expected_scores_t_yblock = np.loadtxt('./test_data/pls_reg_yblock_scores_t.csv', delimiter=',')
            #self.expected_scores_u_yblock = np.loadtxt('./test_data/pls_reg_yblock_scores_u.csv', delimiter=',')
            #self.expected_weights_c_yblock = np.loadtxt('./test_data/pls_reg_yblock_weights_c.csv', delimiter=',')
            #self.expected_loadings_q_yblock = np.loadtxt('./test_data/pls_reg_yblock_loadings_q.csv', delimiter=',')
            #self.expected_betacoefs_yblock = np.loadtxt('./test_data/pls_reg_yblock_betacoefs.csv', delimiter=',')

            self.expected_modelParameters = {
                'R2Y': 0.99442967438303576,
                'R2X': 0.022903901163376705,
                'SSYcomp': np.array([5.42418672, 1.20742786, 0.27851628]),
                'SSXcomp':
                np.array([9750.59475071, 9779.57249348, 9770.96098837])
            }

            self.expected_cvParameters = {
                'Q2Y': 0.069284226071602006,
                'Q2X': -0.12391667143436425,
                'MeanR2X_Training': 0.025896665665079883,
                'MeanR2Y_Training': 0.99636477396947942,
                'StdevR2Y_Training': 0.00091660538957527582,
                'StdevR2X_Training': 0.0010098198504153058,
                'StdevR2X_Test': 0.02386260538832127,
                'StdevR2Y_Test': 0.25034195769401973,
                'MeanR2X_Test': -0.022542842216950101,
                'MeanR2Y_Test': 0.096991536519031446
            }

            self.expected_t2 = np.array([7.00212848, 6.63400492, 5.6325462])
            self.expected_outliers_t2 = np.array([5, 33])
            self.expected_outliers_dmodx = np.array([])

            self.expected_scores_t_par = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_scores_t_par.csv'),
                                                    delimiter=',')
            self.expected_betas_par = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_betas_par.csv'),
                                                 delimiter=',')[:, np.newaxis]
            self.expected_scores_t_mc = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_scores_t_mc.csv'),
                                                   delimiter=',')
            self.expected_betas_mc = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_betas_mc.csv'),
                                                delimiter=',')[:, np.newaxis]

            self.expected_vip_mc = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_vip_mc.csv'),
                                              delimiter=',')
            self.expected_vip_par = np.loadtxt(os.path.join(
                os.path.dirname(__file__), './test_data/pls_vip_par.csv'),
                                               delimiter=',')

            # check this
            self.y = regression_problem.iloc[:, 0].values
            self.ymat = multiblock_regression_problem.values
            self.xmat = regression_problem.iloc[:, 1::].values
            self.xmat_multiy = multiblock_regression_problem.values

            self.expected_permutation = {}

        x_scaler = ChemometricsScaler(1)
        y_scaler = ChemometricsScaler(1)
        self.plsreg = ChemometricsPLS(ncomps=3,
                                      xscaler=x_scaler,
                                      yscaler=y_scaler)
        self.plsreg_multiblock = ChemometricsPLS(ncomps=3,
                                                 xscaler=x_scaler,
                                                 yscaler=y_scaler)
Ejemplo n.º 10
0
    def setUp(self):
        """

        :return:
        """
        try:
            # Generate a fake classification dataset
            t_dset = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/classification_twoclass.csv'))
            self.xmat = t_dset.iloc[:, 1::].values

        except (IOError, OSError, FileNotFoundError) as ioerr:
            import tests.gen_synthetic_datasets
            #os.system('python gen_synthetic_datasets.py')
            t_dset = pds.read_csv(
                os.path.join(os.path.dirname(__file__),
                             './test_data/classification_twoclass.csv'))
            self.xmat = t_dset.iloc[:, 1::].values

        self.expected_modelParameters = {
            'R2X': 0.12913056143673818,
            'S0': 0.9803124001345157,
            'VarExp': np.array([9.44045066, 8.79710591, 8.11561924]),
            'VarExpRatio': np.array([0.04625821, 0.04310582, 0.03976653])
        }
        self.expected_cvParameters = {
            'Q2X':
            -0.10571035538454221,
            'Mean_VarExp_Test':
            -0.0090083829247783621,
            'Stdev_VarExp_Test':
            0.0037778709253728452,
            'Mean_VarExpRatio_Training':
            np.array([0.05108043, 0.04669199, 0.04380617]),
            'Stdev_VarExpRatio_Training':
            np.array([0.00130025, 0.00094489, 0.00044059])
        }

        self.expected_scores = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/pca_scores.csv'),
                                          delimiter=',')
        self.expected_loadings = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/pca_loadings.csv'),
                                            delimiter=',')

        self.expected_scores_mc = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/pca_scores_mc.csv'),
                                             delimiter=',')
        self.expected_loadings_mc = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/pca_loadings_mc.csv'),
                                               delimiter=',')

        self.expected_scores_par = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/pca_scores_par.csv'),
                                              delimiter=',')
        self.expected_loadings_par = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/pca_loadings_par.csv'),
                                                delimiter=',')

        self.expected_dmodx = np.loadtxt(os.path.join(
            os.path.dirname(__file__), './test_data/pca_dmodx.csv'),
                                         delimiter=',')
        cvloadings = np.loadtxt(os.path.join(os.path.dirname(__file__),
                                             './test_data/pca_cvloads.csv'),
                                delimiter=',')
        self.expected_cv_meanloadings = cvloadings[0:3, :]
        self.expected_cv_stdevloadings = cvloadings[3::, :]

        self.expected_t2 = np.array([9.00313686, 8.69095296, 8.34753638])
        self.expected_outlier_dmodx = np.array([])
        self.expected_outlier_t2 = np.array([14])
        self.x_scaler = ChemometricsScaler(1)
        self.pcamodel = ChemometricsPCA(ncomps=3, scaler=self.x_scaler)
from pyChemometrics import ChemometricsScaler, ChemometricsPLS
import numpy as np

np.random.seed(0)

import pandas as pds
# Use the standard datasets
t_dset = pds.read_csv('./tests/test_data/regression.csv')
xmat = t_dset.iloc[:, 1:4].values
y = t_dset.iloc[:, 0].values

y = y[np.newaxis].T

mc_scaler = ChemometricsScaler(0)
uv_scaler = ChemometricsScaler(1)
par_scaler = ChemometricsScaler(1 / 2)

xmat_mc = mc_scaler.fit_transform(xmat)
y_mc = mc_scaler.fit_transform(y)

xmat_uv = uv_scaler.fit_transform(xmat)
y_uv = uv_scaler.fit_transform(y)

xmat_par = par_scaler.fit_transform(xmat)
y_par = par_scaler.fit_transform(y)

np.savetxt('./tests/test_data/scaler_xmat_mc.csv',
           xmat_mc,
           fmt='%.18e',
           delimiter=',',
           newline='\n',