def run(self):
        load_fit = False
        col = 'wvl'
        method = self.chooseMethodComboBox.currentText()
        datakey = self.chooseDataComboBox.currentText()
        if method == 'LDA' or method == 'LFDA':
            params, modelkey, ycol = self.getMethodParams(self.chooseMethodComboBox.currentIndex())
            df, dimred_obj = dim_red(self.data[datakey].df, col, method, [], params, load_fit, ycol=ycol)
        else:
            params, modelkey = self.getMethodParams(self.chooseMethodComboBox.currentIndex())
            df, dimred_obj = dim_red(self.data[datakey].df, col, method, [], params, load_fit)

        dimredkey = datakey+'-'+method
        self.dimredkeys.append(dimredkey)
        self.dimred[dimredkey] = dimred_obj
Ejemplo n.º 2
0
 def dim_red(self, col, method, params, kws, load_fit):
     self.df, self.dim_red = dim_red.dim_red(self.df,
                                             col=col,
                                             method=method,
                                             params=params,
                                             kws=kws,
                                             load_fit=load_fit)
Ejemplo n.º 3
0
def test_dimred_LDA():

    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    kws = {
        'n_clusters': 5,
        'n_init': 10,
        'max_iter': 100,
        'tol': 0.01,
        'n_jobs': 1,
        'random_state': 1
    }
    cluster.cluster(df, 'wvl', 'K-Means', [], kws)
    params = {'n_components': 3}
    df, dimred_obj = dim_red.dim_red(df,
                                     'wvl',
                                     'LDA', [],
                                     params,
                                     ycol='K-Means')
    expected_coefs = [
        -0.02209121, -0.0016516, -0.01139357, -0.06448139, 0.07085655
    ]
    expected_scores = [-11.89340048, 0.41598425, 0.22964169]
    assert df['LDA'].shape == (103, 3)
    np.testing.assert_array_almost_equal(expected_coefs, dimred_obj.coef_[:,
                                                                          0])
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['LDA'].iloc[0, :]))
Ejemplo n.º 4
0
 def run(self):
     method = self.chooseMethodComboBox.currentText()
     datakey = self.chooseDataComboBox.currentText()
     # xvars = [str(x.text()) for x in self.xVariableList.selectedItems()]
     params, modelkey = self.getMethodParams(self.chooseMethodComboBox.currentIndex())
     load_fit = False
     col = 'wvl'
     df, PCA_obj = dim_red(self.data[datakey].df, col, method, [], params, load_fit)
Ejemplo n.º 5
0
def test_dimred_PCA():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])

    params = {'n_components': 3}
    df, dimred_obj = dim_red.dim_red(df, 'wvl', 'PCA', [], params)
    expected_expl_var = [0.96051211, 0.01683739, 0.01471955]
    expected_scores = [10092.96265442, -628.16699776, -359.06894452]
    assert df['PCA'].shape == (103, 3)
    np.testing.assert_array_almost_equal(expected_expl_var,
                                         dimred_obj.explained_variance_ratio_)
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['PCA'].iloc[0, :]))
Ejemplo n.º 6
0
def test_dimred_FastICA():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])

    params = {'n_components': 3, 'random_state': 1}
    df, dimred_obj = dim_red.dim_red(df, 'wvl', 'FastICA', [], params)
    expected_comps = [-2.190278e-05, 1.498101e-06, 9.082887e-07]
    expected_scores = [0.03252833, -0.03749623, -0.11434307]

    assert df['FastICA'].shape == (103, 3)
    np.testing.assert_array_almost_equal(expected_comps,
                                         dimred_obj.components_[:, 0])
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['FastICA'].iloc[0, :]))
Ejemplo n.º 7
0
def test_dimred_NMF():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    df['wvl'] = df[
        'wvl'] - 1000  #make some values negative to test adding a constant
    dim_red.check_positive(df['wvl'])
    params = {'n_components': 3, 'random_state': 0, 'add_constant': True}
    df, dimred_obj = dim_red.dim_red(df, 'wvl', 'NMF', [], params)
    expected_comps = [10.27191532, 34.62489686, 3.06822373]
    expected_scores = [49.42458628, 3.9910722, 27.03100371]
    assert df['NMF'].shape == (103, 3)
    np.testing.assert_array_almost_equal(expected_comps,
                                         dimred_obj.components_[:, 0])
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['NMF'].iloc[0, :]))
Ejemplo n.º 8
0
def test_dimred_JADE():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])

    params = {'n_components': 3}

    df, dimred_obj = dim_red.dim_red(df, 'wvl', 'JADE-ICA', [], params)
    expected_loadings = [0.56247385, 0.19292341, 3.42289881]
    expected_scores = [174708.34499912, 125682.55985134, 145155.40758151]

    assert df['JADE-ICA'].shape == (103, 3)
    np.testing.assert_almost_equal(
        expected_loadings,
        np.squeeze(np.array(dimred_obj.ica_jade_loadings[:, 0])))
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['JADE-ICA'].iloc[0, :]))
Ejemplo n.º 9
0
def test_dimred_LLE():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])

    params = {'n_components': 3, 'n_neighbors': 10, 'reg': 1e-3}
    df, dimred_obj = dim_red.dim_red(df, 'wvl', 'LLE', [], params)
    expected_err = 2.0687806439705738e-05
    expected_scores = [0.11088153, 0.01215013, -0.03551393]

    assert df['LLE'].shape == (103, 3)
    np.testing.assert_almost_equal(expected_err,
                                   dimred_obj.reconstruction_error_)
    np.testing.assert_array_almost_equal(np.abs(expected_scores),
                                         np.abs(np.array(
                                             df['LLE'].iloc[0, :])),
                                         decimal=4)
Ejemplo n.º 10
0
def test_dimred_tSNE():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])

    params = {
        'n_components': 2,
        'learning_rate': 200.0,
        'n_iter': 1000,
        'n_iter_without_progress': 300,
        'perplexity': 30,
        'init': 'pca'
    }
    df, dimred_obj = dim_red.dim_red(df, 'wvl', 't-SNE', [], params)
    expected_div = 0.38829776644706726
    expected_scores = [9938.469727, -802.161682]

    assert df['t-SNE'].shape == (103, 2)
    np.testing.assert_almost_equal(expected_div, dimred_obj.kl_divergence_)
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['t-SNE'].iloc[0, :]))