예제 #1
0
def test_mask():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    result = mask.mask(df, get_path('mask.csv'))
    assert result['wvl'].columns[0] == 586.049
    assert result['wvl'].columns[-1] == 589.869
    assert result['wvl'].shape == (103, 18)
    assert result['masked'].shape == (103, 26)
예제 #2
0
def test_multiply_vector():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    result = multiply_vector.multiply_vector(df, get_path('vector.csv'))
    expected = [1646.12, 1548.12, 1656.12, 1656.12, 1732.12]
    np.testing.assert_array_almost_equal(expected,
                                         np.array(result['wvl'].iloc[0, 0:5]))

    result = multiply_vector.multiply_vector(df, get_path('bad_vector.csv'))
    assert result == 0
예제 #3
0
def test_peak_area_from_file():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    df = df[df[('meta', 'LIBS ID')] == 'LIB00041']
    peaks_mins_file = get_path('peaks_mins.csv')
    df_result, peaks_result, mins_result = pa.peak_area(
        df, peaks_mins_file=peaks_mins_file)
    expected_peaks = [588.072, 592.338]
    expected_mins = [586.273, 589.195, 594.133]
    expected_areas = [47938.78, 39204.32]
    np.testing.assert_array_almost_equal(expected_peaks, peaks_result)
    np.testing.assert_array_almost_equal(expected_mins, mins_result)
    np.testing.assert_array_almost_equal(
        expected_areas,
        np.array(df_result['peak_area'])[0, :])
예제 #4
0
파일: test_utils.py 프로젝트: sumesh1/PyHAT
    def test_ratio(self):
        df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
        result = utils.ratio(df, ['580', '590'], ['590', '600'])
        expected = np.array([3.93136608, 1.88664136, 1.47565463, 1.74094051])

        cleaned_array = result['ratio'][0:4].values
        np.testing.assert_array_almost_equal(cleaned_array, expected)
예제 #5
0
파일: test_cv.py 프로젝트: sumesh1/PyHAT
def test_cv():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    df = stratified_folds(df, nfolds=3, sortby=('comp', 'SiO2'))

    params = {'n_components': [1, 2, 3], 'scale': [False]}
    paramgrid = list(ParameterGrid(params))

    cv_obj = cv.cv(paramgrid)
    df_out, output, models, modelkeys, predictkeys = cv_obj.do_cv(
        df,
        xcols='wvl',
        ycol=[('comp', 'SiO2')],
        method='PLS',
        yrange=[0, 100],
        calc_path=False,
        alphas=None)

    expected_predicts = [
        56.55707481, 57.93716105, 59.34785052, 60.59708391, 55.83934129,
        56.7456989
    ]
    expected_output_rmsec = [18.6509206, 14.64015186, 13.80182457]

    np.testing.assert_array_almost_equal(
        expected_predicts, np.array(df_out['predict'].iloc[0, :]))
    np.testing.assert_array_almost_equal(expected_output_rmsec,
                                         np.array(output[('cv', 'RMSEC')]))
    assert output.shape == (3, 8)
    assert len(models) == 3
    assert len(modelkeys) == 3
    assert modelkeys[
        0] == 'PLS - SiO2 - (0, 100) {\'n_components\': 1, \'scale\': False}'
    assert len(predictkeys) == 6
    assert predictkeys[
        0] == '"PLS- CV -{\'n_components\': 1, \'scale\': False}"'
예제 #6
0
def test_meancenter():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    result_df, mean_vect = meancenter.meancenter(df, 'wvl')
    expected = [
        -168.05398058, 579.71601942, 309.16601942, 709.21601942, -341.00398058
    ]
    expected_mv = [
        991.11398058, 1160.24990291, 1287.87126214, 931.56058252, 838.89067961
    ]
    np.testing.assert_array_almost_equal(
        expected, np.array(result_df['wvl'].iloc[0:5, 0]))
    np.testing.assert_array_almost_equal(expected_mv, np.array(mean_vect)[0:5])

    #test providing the mean vector
    mean_vect.iloc[:] = 1
    result_df2, mean_vect2 = meancenter.meancenter(df,
                                                   'wvl',
                                                   previous_mean=mean_vect)
    expected2 = np.array(expected) - 1.0
    expected_mv2 = [1., 1., 1., 1., 1.]
    np.testing.assert_array_almost_equal(
        expected2, np.array(result_df2['wvl'].iloc[0:5, 0]))
    np.testing.assert_array_almost_equal(expected_mv2,
                                         np.array(mean_vect2)[0:5])

    #test mismatched wvls
    mean_vect.index = np.array(mean_vect.index, dtype=float) + 1.0
    result = meancenter.meancenter(df, 'wvl', previous_mean=mean_vect)
    assert result == 0
예제 #7
0
def test_run_analytics_band_minima(expected_wavelengths, expected_values):
    spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc'))
    minima = analytics.run_analytics(spectra, analytics.band_minima)
    wavelengths = [np.mean(val[0]) for val in minima]
    values = [val[1] for val in minima]
    assert np.mean(wavelengths) == pytest.approx(expected_wavelengths)
    assert np.mean(values) == pytest.approx(expected_values)
예제 #8
0
def test_interp():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    result = interp.interp(df, [588, 590, 592, 594])
    expected = [1637.58, 1104.47964286, 830.53321429, 857.77875]
    assert result['wvl'].shape == (103, 4)
    np.testing.assert_array_almost_equal(expected,
                                         np.array(result['wvl'].iloc[0, :]))
예제 #9
0
def test_dimred_LDA():

    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    kws = {
        'n_clusters': 5,
        'n_init': 10,
        'max_iter': 100,
        'tol': 0.01,
        'n_jobs': 1,
        'random_state': 1
    }
    cluster.cluster(df, 'wvl', 'K-Means', [], kws)
    params = {'n_components': 3}
    df, dimred_obj = dim_red.dim_red(df,
                                     'wvl',
                                     'LDA', [],
                                     params,
                                     ycol='K-Means')
    expected_coefs = [
        -0.02209121, -0.0016516, -0.01139357, -0.06448139, 0.07085655
    ]
    expected_scores = [-11.89340048, 0.41598425, 0.22964169]
    assert df['LDA'].shape == (103, 3)
    np.testing.assert_array_almost_equal(expected_coefs, dimred_obj.coef_[:,
                                                                          0])
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['LDA'].iloc[0, :]))
예제 #10
0
def test_run_analytics_band_minima(expected_wavelengths, expected_values):
    spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc'))
    minima = analytics.run_analytics(spectra, analytics.band_minima)
    wavelengths = [np.mean(val[0]) for val in minima]
    values = [val[1] for val in minima]
    assert np.mean(wavelengths) == pytest.approx(expected_wavelengths)
    assert np.mean(values) == pytest.approx(expected_values)
예제 #11
0
def test_run_analytics_band_center_spectrum(expected_center, expected_wavelengths, expected_values):
    spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc'))
    spectrum = spectra[spectra.columns[1]]
    center, center_fit = analytics.run_analytics(spectrum, analytics.band_center, 512.6, 2587.9)
    assert center_fit.mean() == pytest.approx(expected_center)
    assert np.mean(center[0]) == pytest.approx(expected_wavelengths)
    assert np.mean(center[1]) == expected_values
예제 #12
0
def test_peak_area():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    df = df[df[('meta', 'LIBS ID')] == 'LIB00041']

    expected_peaks = np.array([
        586.723, 588.746, 589.42, 590.767, 591.216, 592.114, 592.787, 593.46,
        593.909, 594.357
    ])
    expected_mins = np.array([
        585.374, 587.173, 589.195, 590.543, 590.992, 591.89, 592.338, 593.236,
        593.685, 594.133, 594.582
    ])
    expected_areas = np.array([
        7248.48, 43986.54, 25421.36, 1843.12, 3593.24, 1661.12, 3316.24,
        1679.12, 1690.12, 1739.12
    ])
    pa_df = pd.DataFrame(expected_areas).T
    pa_df.columns = pd.MultiIndex.from_tuples([('peak_area', i)
                                               for i in expected_peaks])
    df_result, peaks_result, mins_result = pa.peak_area(df,
                                                        peaks_mins_file=None)
    np.testing.assert_array_almost_equal(np.array(peaks_result, dtype=float),
                                         expected_peaks)
    np.testing.assert_array_almost_equal(np.array(mins_result, dtype=float),
                                         expected_mins)
    np.testing.assert_array_almost_equal(
        np.squeeze(np.array(df_result['peak_area'])), expected_areas)
예제 #13
0
def test_common():
    #this test hits parts of the common baseline code not covered above
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    wvls = np.array(df['wvl'].columns.values, dtype='float')
    spectra = np.array(df['wvl'], dtype='float')

    #test fit_transform
    br_obj = als.ALS()
    result = br_obj.fit_transform(wvls, spectra)
    expected = [
        -151.88026557, 200.84238645, 525.56518276, -166.71174241, -398.98828107
    ]
    np.testing.assert_array_almost_equal(expected, result[5, 0:5])

    #test fit on single spectrum
    result = br_obj.fit(wvls, spectra[0, :])
    expected = [
        1063.366517, 1059.53780945, 1055.70887361, 1051.87920998, 1048.0481028
    ]
    np.testing.assert_array_almost_equal(expected, result.baseline[0:5])

    #test segmenting
    wvls = np.array(df['wvl'].columns.values, dtype=float)
    wvls[20:] = wvls[20:] + 10

    result = [i for i in common._segment(wvls, np.array(df['wvl']))]
    assert result[0][0][0] == 585.149
    assert result[1][0][0] == 599.644
예제 #14
0
def test_KK():
    #test case where bottom width is too small
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    methodParameters = {
        'top_width': 10,
        'bottom_width': 0,
        'exponent': 2,
        'tangent': False
    }
    result, result_baseline = remove_baseline(df,
                                              'KK',
                                              params=methodParameters)
    assert np.isnan(result['wvl'].iloc[0, 0])

    #test case using top and bottom widths and tangent
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    methodParameters = {
        'top_width': 10,
        'bottom_width': 50,
        'exponent': 2,
        'tangent': True
    }
    expected = [-0.119923, -0.117072, -0.114455, -0.120391, -0.122455]
    expected_baseline = [0.130102, 0.130128, 0.130152, 0.130174, 0.130194]
    br_caller(df, 'KK', methodParameters, expected, expected_baseline)

    #test using just bottom width
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    methodParameters = {
        'top_width': 0,
        'bottom_width': 50,
        'exponent': 2,
        'tangent': False
    }
    expected = [0.002431, 0.005307, 0.007949, 0.002039, 0.]
    expected_baseline = [0.007748, 0.007749, 0.007748, 0.007745, 0.00774]
    br_caller(df, 'KK', methodParameters, expected, expected_baseline)

    # test ranges
    expected_ranges = {
        'top_width': (0, 100, 'integer'),
        'bottom_width': (0, 100, 'integer')
    }
    br_obj = kajfosz_kwiatek.KajfoszKwiatek()
    assert br_obj.param_ranges() == expected_ranges
예제 #15
0
def test_shift_spect():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    result = shift_spect.shift_spect(df, -1.0)
    expected = [
        898.64928571, 973.62444444, 1034.46444444, 1004.54, 939.16222222
    ]
    np.testing.assert_array_almost_equal(expected,
                                         np.array(result['wvl'].iloc[0, 0:5]))
    assert result[('meta', 'Shift')].shape == (103, )
예제 #16
0
def test_run_analytics_band_center(expected_center, expected_wavelengths, expected_values):
    spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc'))
    center, center_fit = analytics.run_analytics(spectra, analytics.band_center)
    wavelengths = [np.mean(val[0]) for val in center]
    values = [np.mean(val[1]) for val in center]
    # print(center_fit)
    center_fit = [center_fit[spectrum] for spectrum in center_fit]
    assert np.mean(center_fit) == pytest.approx(expected_center)
    assert np.mean(wavelengths) == pytest.approx(expected_wavelengths)
    assert np.mean(values) == pytest.approx(expected_values)
예제 #17
0
def test_isolation_forest():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    params = {'n_estimators': 10, 'contamination': 'auto', 'random_state': 1}
    result = libpyhat.utils.outlier_removal.outlier_removal(
        df, 'wvl', 'Isolation Forest', params)
    expected_scores = [
        0.07998454, 0.01812089, 0.06773168, 0.01483949, -0.04311234
    ]
    np.testing.assert_array_almost_equal(
        expected_scores,
        np.array(result[('meta', result['meta'].columns[-2])])[0:5])
예제 #18
0
def test_median():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    methodParameters = {'window_size': 30}
    expected = [0.00244, 0.00477, 0.007133, 0.001127, -0.001438]
    expected_baseline = [0.00774, 0.008286, 0.008564, 0.008657, 0.009178]
    br_caller(df, 'Median', methodParameters, expected, expected_baseline)

    # test ranges
    expected_ranges = {'window_size': (201, 901, 'integer')}
    br_obj = median.MedianFilter()
    assert br_obj.param_ranges() == expected_ranges
예제 #19
0
def test_run_analytics_band_center_spectrum(expected_center,
                                            expected_wavelengths,
                                            expected_values):
    spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc'))
    spectrum = spectra[spectra.columns[1]]
    center, center_fit = analytics.run_analytics(spectrum,
                                                 analytics.band_center, 512.6,
                                                 2587.9)
    assert center_fit.mean() == pytest.approx(expected_center)
    assert np.mean(center[0]) == pytest.approx(expected_wavelengths)
    assert np.mean(center[1]) == expected_values
예제 #20
0
def test_dimred_PCA():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])

    params = {'n_components': 3}
    df, dimred_obj = dim_red.dim_red(df, 'wvl', 'PCA', [], params)
    expected_expl_var = [0.96051211, 0.01683739, 0.01471955]
    expected_scores = [10092.96265442, -628.16699776, -359.06894452]
    assert df['PCA'].shape == (103, 3)
    np.testing.assert_array_almost_equal(expected_expl_var,
                                         dimred_obj.explained_variance_ratio_)
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['PCA'].iloc[0, :]))
예제 #21
0
def test_Rubberband():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    methodParameters = {'num_iters': 8, 'num_ranges': 4}
    expected = [0., 0.002516, 0.005217, -0.000218, -0.001363]
    expected_baseline = [0.010179, 0.01054, 0.010481, 0.010002, 0.009102]
    br_caller(df, 'Rubberband', methodParameters, expected, expected_baseline)

    #test no iterations
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    methodParameters = {'num_iters': 0, 'num_ranges': 4}
    expected = [0., 0.003487, 0.006738, 0.001434, 0.]
    expected_baseline = [0.010179, 0.009569, 0.008959, 0.008349, 0.00774]
    br_caller(df, 'Rubberband', methodParameters, expected, expected_baseline)

    # test ranges
    expected_ranges = {
        'num_ranges': (1, 100, 'integer'),
        'num_iters': (0, 36, 'integer')
    }
    br_obj = rubberband.Rubberband()
    assert br_obj.param_ranges() == expected_ranges
예제 #22
0
def test_run_analytics_band_center(expected_center, expected_wavelengths,
                                   expected_values):
    spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc'))
    center, center_fit = analytics.run_analytics(spectra,
                                                 analytics.band_center)
    wavelengths = [np.mean(val[0]) for val in center]
    values = [np.mean(val[1]) for val in center]
    # print(center_fit)
    center_fit = [center_fit[spectrum] for spectrum in center_fit]
    assert np.mean(center_fit) == pytest.approx(expected_center)
    assert np.mean(wavelengths) == pytest.approx(expected_wavelengths)
    assert np.mean(values) == pytest.approx(expected_values)
예제 #23
0
파일: test_sm.py 프로젝트: sumesh1/PyHAT
def test_sm_blend():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    x = df['wvl']
    y = df[('comp', 'SiO2')]

    model1 = reg.regression(method=['PLS'],
                            params=[{
                                'n_components': 3,
                                'scale': False
                            }])
    model1.fit(x, y)
    df[('predict', 'model1')] = model1.predict(x)

    model2 = reg.regression(method=['PLS'],
                            params=[{
                                'n_components': 5,
                                'scale': False
                            }])
    model2.fit(x, y)
    df[('predict', 'model2')] = model2.predict(x)

    model3 = reg.regression(method=['PLS'],
                            params=[{
                                'n_components': 4,
                                'scale': False
                            }])
    model3.fit(x, y)
    df[('predict', 'model3')] = model3.predict(x)

    predictions = [
        df[('predict', 'model2')], df[('predict', 'model1')],
        df[('predict', 'model3')], df[('predict', 'model1')]
    ]

    blendranges = [[-9999, 30], [20, 60], [50, 9999]]
    sm_obj = sm.sm(blendranges)
    blended_predictions = sm_obj.do_blend(
        np.array(predictions))  #without optimization
    rmse = np.sqrt(np.average((blended_predictions - df[('comp', 'SiO2')])**2))
    np.testing.assert_almost_equal(rmse, 12.703434300128926, decimal=5)

    blended_predictions = sm_obj.do_blend(
        np.array(predictions),
        truevals=np.array(df[('comp', 'SiO2')]))  #with optimization
    rmse = np.sqrt(np.average((blended_predictions - df[('comp', 'SiO2')])**2))
    expected_blendranges = [
        -9999., 36.5198746, 47.98157746, 56.2537253, 118.94036468, 9999.
    ]
    np.testing.assert_almost_equal(rmse, 9.954065920454982, decimal=5)
    np.testing.assert_allclose(expected_blendranges,
                               sm_obj.blendranges,
                               rtol=1e-5)
예제 #24
0
def test_dimred_FastICA():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])

    params = {'n_components': 3, 'random_state': 1}
    df, dimred_obj = dim_red.dim_red(df, 'wvl', 'FastICA', [], params)
    expected_comps = [-2.190278e-05, 1.498101e-06, 9.082887e-07]
    expected_scores = [0.03252833, -0.03749623, -0.11434307]

    assert df['FastICA'].shape == (103, 3)
    np.testing.assert_array_almost_equal(expected_comps,
                                         dimred_obj.components_[:, 0])
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['FastICA'].iloc[0, :]))
예제 #25
0
def test_FABC():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    methodParameters = {'dilation_param': 50, 'smoothness_param': 1e3}
    expected = [-0.013034, -0.01017, -0.007552, -0.013497, -0.015585]
    expected_baseline = [0.023213, 0.023226, 0.02325, 0.02328, 0.023325]
    br_caller(df, 'FABC', methodParameters, expected, expected_baseline)

    # test ranges
    expected_ranges = {
        'dilation_param': (1, 100, 'integer'),
        'smoothness_param': (1, 1e6, 'log')
    }
    br_obj = fabc.FABC()
    assert br_obj.param_ranges() == expected_ranges
예제 #26
0
def test_LOF():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    params = {
        'n_neighbors': 10,
        'contamination': 'auto',
        'leaf_size': 10,
        'p': 2
    }
    result = libpyhat.utils.outlier_removal.outlier_removal(
        df, 'wvl', 'Local Outlier Factor', params)
    expected_scores = [-1.010267, -1.35764, -1.383224, -1.620422, -1.036561]
    np.testing.assert_array_almost_equal(
        expected_scores,
        np.array(result[('meta', result['meta'].columns[-1])])[0:5])
예제 #27
0
def test_dimred_NMF():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    df['wvl'] = df[
        'wvl'] - 1000  #make some values negative to test adding a constant
    dim_red.check_positive(df['wvl'])
    params = {'n_components': 3, 'random_state': 0, 'add_constant': True}
    df, dimred_obj = dim_red.dim_red(df, 'wvl', 'NMF', [], params)
    expected_comps = [10.27191532, 34.62489686, 3.06822373]
    expected_scores = [49.42458628, 3.9910722, 27.03100371]
    assert df['NMF'].shape == (103, 3)
    np.testing.assert_array_almost_equal(expected_comps,
                                         dimred_obj.components_[:, 0])
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['NMF'].iloc[0, :]))
예제 #28
0
파일: test_cv.py 프로젝트: sumesh1/PyHAT
def test_cv_calc_path():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    df = stratified_folds(df, nfolds=3, sortby=('comp', 'SiO2'))

    params = {
        'fit_intercept': [True, False],
        'max_iter': [1000],
        'tol': [1e-3],
        'precompute': [True],
        'copy_X': [True],
        'positive': [True, False],
        'selection': ['random'],
        'random_state': [1]
    }
    alphas = np.logspace(np.log10(0.0000001), np.log10(0.01), num=10)
    paramgrid = list(ParameterGrid(params))

    cv_obj = cv.cv(paramgrid)
    df_out, output, models, modelkeys, predictkeys = cv_obj.do_cv(
        df,
        xcols='wvl',
        ycol=[('comp', 'SiO2')],
        method='LASSO',
        yrange=[0, 100],
        calc_path=True,
        alphas=alphas)

    expected_predicts = [
        57.87064, 57.868983, 57.868983, 57.868983, 57.868983, 59.315111,
        59.315113, 59.315114, 59.315114, 59.315114
    ]
    expected_output_rmsec = [
        18.490365, 18.490365, 18.490365, 18.490365, 18.490365, 7.042796,
        6.986007, 6.967643, 6.959045, 6.953588
    ]

    np.testing.assert_array_almost_equal(
        expected_predicts, np.array(df_out['predict'].iloc[0, 5:15]))
    np.testing.assert_array_almost_equal(
        expected_output_rmsec, np.array(output[('cv', 'RMSEC')].iloc[5:15]))

    assert output.shape == (40, 15)
    assert len(models) == 40
    assert len(modelkeys) == 40
    assert modelkeys[
        0] == 'LASSO - SiO2 - (0, 100) Alpha: 0.01, {\'copy_X\': True, \'fit_intercept\': True, \'max_iter\': 1000, \'positive\': True, \'precompute\': True, \'random_state\': 1, \'selection\': \'random\', \'tol\': 0.001}'
    assert len(predictkeys) == 80
    assert predictkeys[
        0] == '"LASSO - SiO2 - CV - Alpha:0.01 - {\'copy_X\': True, \'fit_intercept\': True, \'max_iter\': 1000, \'positive\': True, \'precompute\': True, \'random_state\': 1, \'selection\': \'random\', \'tol\': 0.001}"'
예제 #29
0
파일: test_cv.py 프로젝트: sumesh1/PyHAT
def test_cv_nofolds():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    params = {'n_components': [1, 2, 3], 'scale': [False]}
    paramgrid = list(ParameterGrid(params))

    cv_obj = cv.cv(paramgrid)
    results = cv_obj.do_cv(df,
                           xcols='wvl',
                           ycol=[('comp', 'SiO2')],
                           method='PLS',
                           yrange=[0, 100],
                           calc_path=False,
                           alphas=None)
    print(results)
    assert results == 0
예제 #30
0
def test_dimred_LLE():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])

    params = {'n_components': 3, 'n_neighbors': 10, 'reg': 1e-3}
    df, dimred_obj = dim_red.dim_red(df, 'wvl', 'LLE', [], params)
    expected_err = 2.0687806439705738e-05
    expected_scores = [0.11088153, 0.01215013, -0.03551393]

    assert df['LLE'].shape == (103, 3)
    np.testing.assert_almost_equal(expected_err,
                                   dimred_obj.reconstruction_error_)
    np.testing.assert_array_almost_equal(np.abs(expected_scores),
                                         np.abs(np.array(
                                             df['LLE'].iloc[0, :])),
                                         decimal=4)
예제 #31
0
def test_dimred_JADE():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])

    params = {'n_components': 3}

    df, dimred_obj = dim_red.dim_red(df, 'wvl', 'JADE-ICA', [], params)
    expected_loadings = [0.56247385, 0.19292341, 3.42289881]
    expected_scores = [174708.34499912, 125682.55985134, 145155.40758151]

    assert df['JADE-ICA'].shape == (103, 3)
    np.testing.assert_almost_equal(
        expected_loadings,
        np.squeeze(np.array(dimred_obj.ica_jade_loadings[:, 0])))
    np.testing.assert_array_almost_equal(expected_scores,
                                         np.array(df['JADE-ICA'].iloc[0, :]))
예제 #32
0
파일: test_folds.py 프로젝트: sumesh1/PyHAT
def test_folds():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])
    result = libpyhat.utils.folds.stratified_folds(df,
                                                   nfolds=3,
                                                   sortby=('comp', 'SiO2'))
    expected = [3., 1., 1., 1., 3., 2., 2., 1., 1., 1.]
    np.testing.assert_array_almost_equal(
        expected, np.array(result[('meta', 'Folds')].iloc[0:10]))

    result = libpyhat.utils.folds.random(df, ('comp', 'SiO2'),
                                         nfolds=3,
                                         seed=10)
    expected = [1, 2, 2, 2, 1, 2, 1, 2, 2, 2]
    np.testing.assert_array_almost_equal(
        expected, np.array(result[('meta', 'Folds')].iloc[0:10]))
예제 #33
0
def test_wavelet_spline():
    df = pd.read_csv(get_path('test_data.csv'), header=[0, 1])

    #test case where levelmin is too big
    methodParameters = {'level': 6, 'levelmin': 5}
    expected = [0.010179, 0.013056, 0.015697, 0.009784, 0.00774]
    expected_baseline = [0., 0., 0., 0., 0.]
    br_caller(df, 'Wavelet a Trous + Spline', methodParameters, expected,
              expected_baseline)

    methodParameters = {'level': 6, 'levelmin': 2}
    expected = [0., 0.0039, 0.00726, 0.001804, 0.]
    expected_baseline = [0.010179, 0.009156, 0.008438, 0.00798, 0.00774]
    br_caller(df, 'Wavelet a Trous + Spline', methodParameters, expected,
              expected_baseline)
예제 #34
0
def spectral_profiler_2c():
    return get_path('SP_2C_02_02358_S138_E3586.spc')
예제 #35
0
def test_run_analytics_band_area(expected_val):
    spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc'))
    area = analytics.run_analytics(spectra, analytics.band_area)
    assert area.mean() == pytest.approx(expected_val)
예제 #36
0
def test_run_analytics_band_area_spectrum(expected_values):
    spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc'))
    spectrum = spectra[spectra.columns[1]]
    asymmetry = analytics.run_analytics(spectrum, analytics.band_area, 512.6, 2587.9)
    assert asymmetry == pytest.approx(expected_values)