def test_mask(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) result = mask.mask(df, get_path('mask.csv')) assert result['wvl'].columns[0] == 586.049 assert result['wvl'].columns[-1] == 589.869 assert result['wvl'].shape == (103, 18) assert result['masked'].shape == (103, 26)
def test_multiply_vector(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) result = multiply_vector.multiply_vector(df, get_path('vector.csv')) expected = [1646.12, 1548.12, 1656.12, 1656.12, 1732.12] np.testing.assert_array_almost_equal(expected, np.array(result['wvl'].iloc[0, 0:5])) result = multiply_vector.multiply_vector(df, get_path('bad_vector.csv')) assert result == 0
def test_peak_area_from_file(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) df = df[df[('meta', 'LIBS ID')] == 'LIB00041'] peaks_mins_file = get_path('peaks_mins.csv') df_result, peaks_result, mins_result = pa.peak_area( df, peaks_mins_file=peaks_mins_file) expected_peaks = [588.072, 592.338] expected_mins = [586.273, 589.195, 594.133] expected_areas = [47938.78, 39204.32] np.testing.assert_array_almost_equal(expected_peaks, peaks_result) np.testing.assert_array_almost_equal(expected_mins, mins_result) np.testing.assert_array_almost_equal( expected_areas, np.array(df_result['peak_area'])[0, :])
def test_ratio(self): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) result = utils.ratio(df, ['580', '590'], ['590', '600']) expected = np.array([3.93136608, 1.88664136, 1.47565463, 1.74094051]) cleaned_array = result['ratio'][0:4].values np.testing.assert_array_almost_equal(cleaned_array, expected)
def test_cv(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) df = stratified_folds(df, nfolds=3, sortby=('comp', 'SiO2')) params = {'n_components': [1, 2, 3], 'scale': [False]} paramgrid = list(ParameterGrid(params)) cv_obj = cv.cv(paramgrid) df_out, output, models, modelkeys, predictkeys = cv_obj.do_cv( df, xcols='wvl', ycol=[('comp', 'SiO2')], method='PLS', yrange=[0, 100], calc_path=False, alphas=None) expected_predicts = [ 56.55707481, 57.93716105, 59.34785052, 60.59708391, 55.83934129, 56.7456989 ] expected_output_rmsec = [18.6509206, 14.64015186, 13.80182457] np.testing.assert_array_almost_equal( expected_predicts, np.array(df_out['predict'].iloc[0, :])) np.testing.assert_array_almost_equal(expected_output_rmsec, np.array(output[('cv', 'RMSEC')])) assert output.shape == (3, 8) assert len(models) == 3 assert len(modelkeys) == 3 assert modelkeys[ 0] == 'PLS - SiO2 - (0, 100) {\'n_components\': 1, \'scale\': False}' assert len(predictkeys) == 6 assert predictkeys[ 0] == '"PLS- CV -{\'n_components\': 1, \'scale\': False}"'
def test_meancenter(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) result_df, mean_vect = meancenter.meancenter(df, 'wvl') expected = [ -168.05398058, 579.71601942, 309.16601942, 709.21601942, -341.00398058 ] expected_mv = [ 991.11398058, 1160.24990291, 1287.87126214, 931.56058252, 838.89067961 ] np.testing.assert_array_almost_equal( expected, np.array(result_df['wvl'].iloc[0:5, 0])) np.testing.assert_array_almost_equal(expected_mv, np.array(mean_vect)[0:5]) #test providing the mean vector mean_vect.iloc[:] = 1 result_df2, mean_vect2 = meancenter.meancenter(df, 'wvl', previous_mean=mean_vect) expected2 = np.array(expected) - 1.0 expected_mv2 = [1., 1., 1., 1., 1.] np.testing.assert_array_almost_equal( expected2, np.array(result_df2['wvl'].iloc[0:5, 0])) np.testing.assert_array_almost_equal(expected_mv2, np.array(mean_vect2)[0:5]) #test mismatched wvls mean_vect.index = np.array(mean_vect.index, dtype=float) + 1.0 result = meancenter.meancenter(df, 'wvl', previous_mean=mean_vect) assert result == 0
def test_run_analytics_band_minima(expected_wavelengths, expected_values): spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc')) minima = analytics.run_analytics(spectra, analytics.band_minima) wavelengths = [np.mean(val[0]) for val in minima] values = [val[1] for val in minima] assert np.mean(wavelengths) == pytest.approx(expected_wavelengths) assert np.mean(values) == pytest.approx(expected_values)
def test_interp(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) result = interp.interp(df, [588, 590, 592, 594]) expected = [1637.58, 1104.47964286, 830.53321429, 857.77875] assert result['wvl'].shape == (103, 4) np.testing.assert_array_almost_equal(expected, np.array(result['wvl'].iloc[0, :]))
def test_dimred_LDA(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) kws = { 'n_clusters': 5, 'n_init': 10, 'max_iter': 100, 'tol': 0.01, 'n_jobs': 1, 'random_state': 1 } cluster.cluster(df, 'wvl', 'K-Means', [], kws) params = {'n_components': 3} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'LDA', [], params, ycol='K-Means') expected_coefs = [ -0.02209121, -0.0016516, -0.01139357, -0.06448139, 0.07085655 ] expected_scores = [-11.89340048, 0.41598425, 0.22964169] assert df['LDA'].shape == (103, 3) np.testing.assert_array_almost_equal(expected_coefs, dimred_obj.coef_[:, 0]) np.testing.assert_array_almost_equal(expected_scores, np.array(df['LDA'].iloc[0, :]))
def test_run_analytics_band_center_spectrum(expected_center, expected_wavelengths, expected_values): spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc')) spectrum = spectra[spectra.columns[1]] center, center_fit = analytics.run_analytics(spectrum, analytics.band_center, 512.6, 2587.9) assert center_fit.mean() == pytest.approx(expected_center) assert np.mean(center[0]) == pytest.approx(expected_wavelengths) assert np.mean(center[1]) == expected_values
def test_peak_area(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) df = df[df[('meta', 'LIBS ID')] == 'LIB00041'] expected_peaks = np.array([ 586.723, 588.746, 589.42, 590.767, 591.216, 592.114, 592.787, 593.46, 593.909, 594.357 ]) expected_mins = np.array([ 585.374, 587.173, 589.195, 590.543, 590.992, 591.89, 592.338, 593.236, 593.685, 594.133, 594.582 ]) expected_areas = np.array([ 7248.48, 43986.54, 25421.36, 1843.12, 3593.24, 1661.12, 3316.24, 1679.12, 1690.12, 1739.12 ]) pa_df = pd.DataFrame(expected_areas).T pa_df.columns = pd.MultiIndex.from_tuples([('peak_area', i) for i in expected_peaks]) df_result, peaks_result, mins_result = pa.peak_area(df, peaks_mins_file=None) np.testing.assert_array_almost_equal(np.array(peaks_result, dtype=float), expected_peaks) np.testing.assert_array_almost_equal(np.array(mins_result, dtype=float), expected_mins) np.testing.assert_array_almost_equal( np.squeeze(np.array(df_result['peak_area'])), expected_areas)
def test_common(): #this test hits parts of the common baseline code not covered above df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) wvls = np.array(df['wvl'].columns.values, dtype='float') spectra = np.array(df['wvl'], dtype='float') #test fit_transform br_obj = als.ALS() result = br_obj.fit_transform(wvls, spectra) expected = [ -151.88026557, 200.84238645, 525.56518276, -166.71174241, -398.98828107 ] np.testing.assert_array_almost_equal(expected, result[5, 0:5]) #test fit on single spectrum result = br_obj.fit(wvls, spectra[0, :]) expected = [ 1063.366517, 1059.53780945, 1055.70887361, 1051.87920998, 1048.0481028 ] np.testing.assert_array_almost_equal(expected, result.baseline[0:5]) #test segmenting wvls = np.array(df['wvl'].columns.values, dtype=float) wvls[20:] = wvls[20:] + 10 result = [i for i in common._segment(wvls, np.array(df['wvl']))] assert result[0][0][0] == 585.149 assert result[1][0][0] == 599.644
def test_KK(): #test case where bottom width is too small df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) methodParameters = { 'top_width': 10, 'bottom_width': 0, 'exponent': 2, 'tangent': False } result, result_baseline = remove_baseline(df, 'KK', params=methodParameters) assert np.isnan(result['wvl'].iloc[0, 0]) #test case using top and bottom widths and tangent df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) methodParameters = { 'top_width': 10, 'bottom_width': 50, 'exponent': 2, 'tangent': True } expected = [-0.119923, -0.117072, -0.114455, -0.120391, -0.122455] expected_baseline = [0.130102, 0.130128, 0.130152, 0.130174, 0.130194] br_caller(df, 'KK', methodParameters, expected, expected_baseline) #test using just bottom width df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) methodParameters = { 'top_width': 0, 'bottom_width': 50, 'exponent': 2, 'tangent': False } expected = [0.002431, 0.005307, 0.007949, 0.002039, 0.] expected_baseline = [0.007748, 0.007749, 0.007748, 0.007745, 0.00774] br_caller(df, 'KK', methodParameters, expected, expected_baseline) # test ranges expected_ranges = { 'top_width': (0, 100, 'integer'), 'bottom_width': (0, 100, 'integer') } br_obj = kajfosz_kwiatek.KajfoszKwiatek() assert br_obj.param_ranges() == expected_ranges
def test_shift_spect(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) result = shift_spect.shift_spect(df, -1.0) expected = [ 898.64928571, 973.62444444, 1034.46444444, 1004.54, 939.16222222 ] np.testing.assert_array_almost_equal(expected, np.array(result['wvl'].iloc[0, 0:5])) assert result[('meta', 'Shift')].shape == (103, )
def test_run_analytics_band_center(expected_center, expected_wavelengths, expected_values): spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc')) center, center_fit = analytics.run_analytics(spectra, analytics.band_center) wavelengths = [np.mean(val[0]) for val in center] values = [np.mean(val[1]) for val in center] # print(center_fit) center_fit = [center_fit[spectrum] for spectrum in center_fit] assert np.mean(center_fit) == pytest.approx(expected_center) assert np.mean(wavelengths) == pytest.approx(expected_wavelengths) assert np.mean(values) == pytest.approx(expected_values)
def test_isolation_forest(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = {'n_estimators': 10, 'contamination': 'auto', 'random_state': 1} result = libpyhat.utils.outlier_removal.outlier_removal( df, 'wvl', 'Isolation Forest', params) expected_scores = [ 0.07998454, 0.01812089, 0.06773168, 0.01483949, -0.04311234 ] np.testing.assert_array_almost_equal( expected_scores, np.array(result[('meta', result['meta'].columns[-2])])[0:5])
def test_median(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) methodParameters = {'window_size': 30} expected = [0.00244, 0.00477, 0.007133, 0.001127, -0.001438] expected_baseline = [0.00774, 0.008286, 0.008564, 0.008657, 0.009178] br_caller(df, 'Median', methodParameters, expected, expected_baseline) # test ranges expected_ranges = {'window_size': (201, 901, 'integer')} br_obj = median.MedianFilter() assert br_obj.param_ranges() == expected_ranges
def test_dimred_PCA(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = {'n_components': 3} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'PCA', [], params) expected_expl_var = [0.96051211, 0.01683739, 0.01471955] expected_scores = [10092.96265442, -628.16699776, -359.06894452] assert df['PCA'].shape == (103, 3) np.testing.assert_array_almost_equal(expected_expl_var, dimred_obj.explained_variance_ratio_) np.testing.assert_array_almost_equal(expected_scores, np.array(df['PCA'].iloc[0, :]))
def test_Rubberband(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) methodParameters = {'num_iters': 8, 'num_ranges': 4} expected = [0., 0.002516, 0.005217, -0.000218, -0.001363] expected_baseline = [0.010179, 0.01054, 0.010481, 0.010002, 0.009102] br_caller(df, 'Rubberband', methodParameters, expected, expected_baseline) #test no iterations df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) methodParameters = {'num_iters': 0, 'num_ranges': 4} expected = [0., 0.003487, 0.006738, 0.001434, 0.] expected_baseline = [0.010179, 0.009569, 0.008959, 0.008349, 0.00774] br_caller(df, 'Rubberband', methodParameters, expected, expected_baseline) # test ranges expected_ranges = { 'num_ranges': (1, 100, 'integer'), 'num_iters': (0, 36, 'integer') } br_obj = rubberband.Rubberband() assert br_obj.param_ranges() == expected_ranges
def test_sm_blend(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) x = df['wvl'] y = df[('comp', 'SiO2')] model1 = reg.regression(method=['PLS'], params=[{ 'n_components': 3, 'scale': False }]) model1.fit(x, y) df[('predict', 'model1')] = model1.predict(x) model2 = reg.regression(method=['PLS'], params=[{ 'n_components': 5, 'scale': False }]) model2.fit(x, y) df[('predict', 'model2')] = model2.predict(x) model3 = reg.regression(method=['PLS'], params=[{ 'n_components': 4, 'scale': False }]) model3.fit(x, y) df[('predict', 'model3')] = model3.predict(x) predictions = [ df[('predict', 'model2')], df[('predict', 'model1')], df[('predict', 'model3')], df[('predict', 'model1')] ] blendranges = [[-9999, 30], [20, 60], [50, 9999]] sm_obj = sm.sm(blendranges) blended_predictions = sm_obj.do_blend( np.array(predictions)) #without optimization rmse = np.sqrt(np.average((blended_predictions - df[('comp', 'SiO2')])**2)) np.testing.assert_almost_equal(rmse, 12.703434300128926, decimal=5) blended_predictions = sm_obj.do_blend( np.array(predictions), truevals=np.array(df[('comp', 'SiO2')])) #with optimization rmse = np.sqrt(np.average((blended_predictions - df[('comp', 'SiO2')])**2)) expected_blendranges = [ -9999., 36.5198746, 47.98157746, 56.2537253, 118.94036468, 9999. ] np.testing.assert_almost_equal(rmse, 9.954065920454982, decimal=5) np.testing.assert_allclose(expected_blendranges, sm_obj.blendranges, rtol=1e-5)
def test_dimred_FastICA(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = {'n_components': 3, 'random_state': 1} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'FastICA', [], params) expected_comps = [-2.190278e-05, 1.498101e-06, 9.082887e-07] expected_scores = [0.03252833, -0.03749623, -0.11434307] assert df['FastICA'].shape == (103, 3) np.testing.assert_array_almost_equal(expected_comps, dimred_obj.components_[:, 0]) np.testing.assert_array_almost_equal(expected_scores, np.array(df['FastICA'].iloc[0, :]))
def test_FABC(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) methodParameters = {'dilation_param': 50, 'smoothness_param': 1e3} expected = [-0.013034, -0.01017, -0.007552, -0.013497, -0.015585] expected_baseline = [0.023213, 0.023226, 0.02325, 0.02328, 0.023325] br_caller(df, 'FABC', methodParameters, expected, expected_baseline) # test ranges expected_ranges = { 'dilation_param': (1, 100, 'integer'), 'smoothness_param': (1, 1e6, 'log') } br_obj = fabc.FABC() assert br_obj.param_ranges() == expected_ranges
def test_LOF(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = { 'n_neighbors': 10, 'contamination': 'auto', 'leaf_size': 10, 'p': 2 } result = libpyhat.utils.outlier_removal.outlier_removal( df, 'wvl', 'Local Outlier Factor', params) expected_scores = [-1.010267, -1.35764, -1.383224, -1.620422, -1.036561] np.testing.assert_array_almost_equal( expected_scores, np.array(result[('meta', result['meta'].columns[-1])])[0:5])
def test_dimred_NMF(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) df['wvl'] = df[ 'wvl'] - 1000 #make some values negative to test adding a constant dim_red.check_positive(df['wvl']) params = {'n_components': 3, 'random_state': 0, 'add_constant': True} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'NMF', [], params) expected_comps = [10.27191532, 34.62489686, 3.06822373] expected_scores = [49.42458628, 3.9910722, 27.03100371] assert df['NMF'].shape == (103, 3) np.testing.assert_array_almost_equal(expected_comps, dimred_obj.components_[:, 0]) np.testing.assert_array_almost_equal(expected_scores, np.array(df['NMF'].iloc[0, :]))
def test_cv_calc_path(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) df = stratified_folds(df, nfolds=3, sortby=('comp', 'SiO2')) params = { 'fit_intercept': [True, False], 'max_iter': [1000], 'tol': [1e-3], 'precompute': [True], 'copy_X': [True], 'positive': [True, False], 'selection': ['random'], 'random_state': [1] } alphas = np.logspace(np.log10(0.0000001), np.log10(0.01), num=10) paramgrid = list(ParameterGrid(params)) cv_obj = cv.cv(paramgrid) df_out, output, models, modelkeys, predictkeys = cv_obj.do_cv( df, xcols='wvl', ycol=[('comp', 'SiO2')], method='LASSO', yrange=[0, 100], calc_path=True, alphas=alphas) expected_predicts = [ 57.87064, 57.868983, 57.868983, 57.868983, 57.868983, 59.315111, 59.315113, 59.315114, 59.315114, 59.315114 ] expected_output_rmsec = [ 18.490365, 18.490365, 18.490365, 18.490365, 18.490365, 7.042796, 6.986007, 6.967643, 6.959045, 6.953588 ] np.testing.assert_array_almost_equal( expected_predicts, np.array(df_out['predict'].iloc[0, 5:15])) np.testing.assert_array_almost_equal( expected_output_rmsec, np.array(output[('cv', 'RMSEC')].iloc[5:15])) assert output.shape == (40, 15) assert len(models) == 40 assert len(modelkeys) == 40 assert modelkeys[ 0] == 'LASSO - SiO2 - (0, 100) Alpha: 0.01, {\'copy_X\': True, \'fit_intercept\': True, \'max_iter\': 1000, \'positive\': True, \'precompute\': True, \'random_state\': 1, \'selection\': \'random\', \'tol\': 0.001}' assert len(predictkeys) == 80 assert predictkeys[ 0] == '"LASSO - SiO2 - CV - Alpha:0.01 - {\'copy_X\': True, \'fit_intercept\': True, \'max_iter\': 1000, \'positive\': True, \'precompute\': True, \'random_state\': 1, \'selection\': \'random\', \'tol\': 0.001}"'
def test_cv_nofolds(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = {'n_components': [1, 2, 3], 'scale': [False]} paramgrid = list(ParameterGrid(params)) cv_obj = cv.cv(paramgrid) results = cv_obj.do_cv(df, xcols='wvl', ycol=[('comp', 'SiO2')], method='PLS', yrange=[0, 100], calc_path=False, alphas=None) print(results) assert results == 0
def test_dimred_LLE(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = {'n_components': 3, 'n_neighbors': 10, 'reg': 1e-3} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'LLE', [], params) expected_err = 2.0687806439705738e-05 expected_scores = [0.11088153, 0.01215013, -0.03551393] assert df['LLE'].shape == (103, 3) np.testing.assert_almost_equal(expected_err, dimred_obj.reconstruction_error_) np.testing.assert_array_almost_equal(np.abs(expected_scores), np.abs(np.array( df['LLE'].iloc[0, :])), decimal=4)
def test_dimred_JADE(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) params = {'n_components': 3} df, dimred_obj = dim_red.dim_red(df, 'wvl', 'JADE-ICA', [], params) expected_loadings = [0.56247385, 0.19292341, 3.42289881] expected_scores = [174708.34499912, 125682.55985134, 145155.40758151] assert df['JADE-ICA'].shape == (103, 3) np.testing.assert_almost_equal( expected_loadings, np.squeeze(np.array(dimred_obj.ica_jade_loadings[:, 0]))) np.testing.assert_array_almost_equal(expected_scores, np.array(df['JADE-ICA'].iloc[0, :]))
def test_folds(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) result = libpyhat.utils.folds.stratified_folds(df, nfolds=3, sortby=('comp', 'SiO2')) expected = [3., 1., 1., 1., 3., 2., 2., 1., 1., 1.] np.testing.assert_array_almost_equal( expected, np.array(result[('meta', 'Folds')].iloc[0:10])) result = libpyhat.utils.folds.random(df, ('comp', 'SiO2'), nfolds=3, seed=10) expected = [1, 2, 2, 2, 1, 2, 1, 2, 2, 2] np.testing.assert_array_almost_equal( expected, np.array(result[('meta', 'Folds')].iloc[0:10]))
def test_wavelet_spline(): df = pd.read_csv(get_path('test_data.csv'), header=[0, 1]) #test case where levelmin is too big methodParameters = {'level': 6, 'levelmin': 5} expected = [0.010179, 0.013056, 0.015697, 0.009784, 0.00774] expected_baseline = [0., 0., 0., 0., 0.] br_caller(df, 'Wavelet a Trous + Spline', methodParameters, expected, expected_baseline) methodParameters = {'level': 6, 'levelmin': 2} expected = [0., 0.0039, 0.00726, 0.001804, 0.] expected_baseline = [0.010179, 0.009156, 0.008438, 0.00798, 0.00774] br_caller(df, 'Wavelet a Trous + Spline', methodParameters, expected, expected_baseline)
def spectral_profiler_2c(): return get_path('SP_2C_02_02358_S138_E3586.spc')
def test_run_analytics_band_area(expected_val): spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc')) area = analytics.run_analytics(spectra, analytics.band_area) assert area.mean() == pytest.approx(expected_val)
def test_run_analytics_band_area_spectrum(expected_values): spectra = phat.Spectra.from_file(get_path('SP_2C_02_02358_S138_E3586.spc')) spectrum = spectra[spectra.columns[1]] asymmetry = analytics.run_analytics(spectrum, analytics.band_area, 512.6, 2587.9) assert asymmetry == pytest.approx(expected_values)