def test_save_ds_meta_ds_doesnt_exist(spark_context, create_test_db, drop_test_db, sm_config, ds_config): work_dir_man_mock = MagicMock(WorkDirManager) work_dir_man_mock.ds_coord_path = '/ds_path' work_dir_man_mock.txt_path = '/txt_path' SMConfig._config_dict = sm_config with patch('sm.engine.tests.util.SparkContext.textFile') as m: m.return_value = spark_context.parallelize([ '0,1,1\n', '1,100,200\n']) dataset = Dataset(spark_context, 'ds_name', '', ds_config, work_dir_man_mock, DB(sm_config['db'])) dataset.save_ds_meta() db = DB(sm_config['db']) ds_row = db.select_one('SELECT name, file_path, img_bounds, config from dataset') assert ds_row == ('ds_name', '/txt_path', {u'x': {u'min': 1, u'max': 100}, u'y': {u'min': 1, u'max': 200}}, ds_config) coord_row = db.select_one('SELECT xs, ys from coordinates') assert coord_row == ([1, 100], [1, 200]) db.close()
def test_save_ds_meta_ds_doesnt_exist(spark_context, create_test_db, drop_test_db, sm_config, ds_config): work_dir_man_mock = MagicMock(WorkDirManager) work_dir_man_mock.ds_coord_path = '/ds_path' work_dir_man_mock.txt_path = '/txt_path' SMConfig._config_dict = sm_config with patch('sm.engine.tests.util.SparkContext.textFile') as m: m.return_value = spark_context.parallelize(['0,1,1\n', '1,100,200\n']) dataset = Dataset(spark_context, 'ds_name', '', 'input_path', ds_config, work_dir_man_mock, DB(sm_config['db'])) dataset.save_ds_meta() db = DB(sm_config['db']) ds_row = db.select_one( 'SELECT name, file_path, img_bounds, config from dataset') assert ds_row == ('ds_name', 'input_path', { u'x': { u'min': 1, u'max': 100 }, u'y': { u'min': 1, u'max': 200 } }, ds_config) coord_row = db.select_one('SELECT xs, ys from coordinates') assert coord_row == ([1, 100], [1, 200]) db.close()
def test_save_sf_iso_images_correct_db_call(spark_context, create_fill_sm_database, sm_config, ds_config): sf_iso_imgs = spark_context.parallelize([((1, '+H'), [ csr_matrix([[100, 0, 0], [0, 0, 0]]), csr_matrix([[0, 0, 0], [0, 0, 10]]) ])]) sf_adduct_peaksn = [(1, '+H', 2)] res = SearchResults(0, 0, 0, 'ds_name', sf_adduct_peaksn, db_mock, sm_config, ds_config) res.sf_iso_images = sf_iso_imgs res.nrows, res.ncols = 2, 3 res.store_sf_iso_images() correct_rows = [(0, 0, 1, '+H', 0, [0], [100], 0, 100), (0, 0, 1, '+H', 1, [5], [10], 0, 10)] db = DB(sm_config['db']) try: rows = db.select(( 'SELECT job_id, db_id, sf_id, adduct, peak, pixel_inds, intensities, min_int, max_int ' 'FROM iso_image ' 'ORDER BY sf_id, adduct')) assert correct_rows == rows finally: db.close()
def test_compute_sf_images_2by3(spark_context): sf_peak_imgs = spark_context.parallelize([(0, (0, np.array([[0, 100, 100], [0, 0, 0]]))), (0, (1, np.array([[0, 0, 0], [0, 100, 100]])))]) sf_imgs = compute_sf_images(sf_peak_imgs).collect() assert sf_imgs[0][0] == 0 assert_array_almost_equal(sf_imgs[0][1], [[[0, 100, 100], [0, 0, 0]], [[0, 0, 0], [0, 100, 100]]])
def test_compute_sf_images_2by3(spark_context): sf_peak_imgs = spark_context.parallelize([ (0, (0, np.array([[0, 100, 100], [0, 0, 0]]))), (0, (1, np.array([[0, 0, 0], [0, 100, 100]]))) ]) sf_imgs = compute_sf_images(sf_peak_imgs).collect() assert sf_imgs[0][0] == 0 assert_array_almost_equal( sf_imgs[0][1], [[[0, 100, 100], [0, 0, 0]], [[0, 0, 0], [0, 100, 100]]])
def test_compute_sf_peak_images_2by3(spark_context): ds = MagicMock(spec=Dataset) ds.get_dims.return_value = (2, 3) ds.get_norm_img_pixel_inds.return_value = np.array([1, 2, 3, 4, 5]) sf_sp_intens = spark_context.parallelize([((0, 0), (0, 100.0)), ((0, 0), (1, 100.0)), ((0, 1), (3, 100.0)), ((0, 1), (4, 100.0))]) sf_p_imgs = compute_sf_peak_images(ds, sf_sp_intens).collect() assert_array_almost_equal(sf_p_imgs[0][1][1].toarray(), np.array([[0, 100, 100], [0, 0, 0]])) assert_array_almost_equal(sf_p_imgs[1][1][1].toarray(), np.array([[0, 0, 0], [0, 100, 100]]))
def test_sf_image_metrics(spark_context, ds_formulas_images_mock, ds_config): with patch('sm.engine.msm_basic.formula_img_validator.get_compute_img_metrics') as mock: mock.return_value = lambda *args: (0.9, 0.9, 0.9) ds_mock, formulas_mock, ref_images = ds_formulas_images_mock ref_images_rdd = spark_context.parallelize(ref_images) metrics_df = sf_image_metrics(ref_images_rdd, spark_context, formulas_mock, ds_mock, ds_config) exp_metrics_df = (pd.DataFrame([[0, '+H', 0.9, 0.9, 0.9, 0.9**3], [1, '+H', 0.9, 0.9, 0.9, 0.9**3]], columns=['sf_id', 'adduct', 'chaos', 'spatial', 'spectral', 'msm']) .set_index(['sf_id', 'adduct'])) assert_frame_equal(metrics_df, exp_metrics_df)
def test_get_sample_area_mask_correctness(sm_config, ds_config, spark_context): work_dir_man_mock = MagicMock(WorkDirManager) work_dir_man_mock.ds_coord_path = '/ds_path' work_dir_man_mock.txt_path = '/txt_path' SMConfig._config_dict = sm_config with patch('sm.engine.tests.util.SparkContext.textFile') as m: m.return_value = spark_context.parallelize(['0,0,0\n', '2,1,1\n']) ds = Dataset(spark_context, 'ds_name', '', 'input_path', ds_config, work_dir_man_mock, None) #ds.norm_img_pixel_inds = np.array([0, 3]) assert tuple(ds.get_sample_area_mask()) == (True, False, False, True)
def test_sample_spectra_2by3(spark_context): ds = MagicMock(spec=Dataset) ds.get_spectra.return_value = spark_context.parallelize([(0, np.array([100.0]), np.array([0, 100.0])), (1, np.array([100.0]), np.array([0, 100.0])), (2, np.array([50.0]), np.array([0, 100.0])), (3, np.array([200.0]), np.array([0, 100.0])), (4, np.array([200.0]), np.array([0, 100.0]))]) formulas = MagicMock(spec=Formulas) formulas.get_sf_peak_bounds.return_value = np.array([100 - 0.01, 200 - 0.01]), np.array([100 + 0.01, 200 + 0.01]) formulas.get_sf_peak_map.return_value = np.array([(0, j) for j in [0, 1]]) ss = sample_spectra(spark_context, ds, formulas).collect() assert_array_almost_equal(ss, [((0, 0), (0, 100.0)), ((0, 0), (1, 100.0)), ((0, 1), (3, 100.0)), ((0, 1), (4, 100.0))])
def search_results(spark_context, sm_config, ds_config): sf_iso_imgs = spark_context.parallelize([((1, '+H'), [ csr_matrix([[100, 0, 0], [0, 0, 0]]), csr_matrix([[0, 0, 0], [0, 0, 10]]) ])]) sf_metrics_df = pd.DataFrame([(1, '+H', 0.9, 0.9, 0.9, 0.9**3, 0.5)], columns=[ 'sf_id', 'adduct', 'chaos', 'spatial', 'spectral', 'msm', 'fdr' ]) sf_adduct_peaksn = [(1, '+H', 2)] res = SearchResults(0, 0, 0, 'ds_name', sf_adduct_peaksn, db_mock, sm_config, ds_config) res.sf_metrics_df = sf_metrics_df res.metrics = ['chaos', 'spatial', 'spectral'] res.sf_iso_images = sf_iso_imgs return res
def test_sf_image_metrics(spark_context, ds_formulas_images_mock, ds_config): with patch( 'sm.engine.msm_basic.formula_img_validator.get_compute_img_metrics' ) as mock: mock.return_value = lambda *args: (0.9, 0.9, 0.9) ds_mock, formulas_mock, ref_images = ds_formulas_images_mock ref_images_rdd = spark_context.parallelize(ref_images) metrics_df = sf_image_metrics(ref_images_rdd, spark_context, formulas_mock, ds_mock, ds_config) exp_metrics_df = (pd.DataFrame( [[0, '+H', 0.9, 0.9, 0.9, 0.9**3], [1, '+H', 0.9, 0.9, 0.9, 0.9**3]], columns=['sf_id', 'adduct', 'chaos', 'spatial', 'spectral', 'msm']).set_index(['sf_id', 'adduct'])) assert_frame_equal(metrics_df, exp_metrics_df)
def test_sample_spectra_2by3(spark_context): ds = MagicMock(spec=Dataset) ds.get_spectra.return_value = spark_context.parallelize([ (0, np.array([100.0]), np.array([0, 100.0])), (1, np.array([100.0]), np.array([0, 100.0])), (2, np.array([50.0]), np.array([0, 100.0])), (3, np.array([200.0]), np.array([0, 100.0])), (4, np.array([200.0]), np.array([0, 100.0])) ]) formulas = MagicMock(spec=Formulas) formulas.get_sf_peak_bounds.return_value = np.array( [100 - 0.01, 200 - 0.01]), np.array([100 + 0.01, 200 + 0.01]) formulas.get_sf_peak_map.return_value = np.array([(0, j) for j in [0, 1]]) ss = sample_spectra(spark_context, ds, formulas).collect() assert_array_almost_equal(ss, [((0, 0), (0, 100.0)), ((0, 0), (1, 100.0)), ((0, 1), (3, 100.0)), ((0, 1), (4, 100.0))])
def test_gen_iso_sf_images(spark_context): iso_peak_images = spark_context.parallelize([((3079, '+H'), (0, coo_matrix([[1., 0., 0.]]))), ((3079, '+H'), (3, coo_matrix([[2., 1., 0.]]))), ((3079, '+H'), (3, coo_matrix([[0., 0., 10.]])))]) exp_iso_sf_imgs = [((3079, '+H'), [coo_matrix([[1., 0., 0.]]), None, None, coo_matrix([[2., 1., 0.]])])] iso_sf_imgs = gen_iso_sf_images(iso_peak_images, shape=(1, 3)).collect() assert len(iso_sf_imgs) == len(exp_iso_sf_imgs) for (k, l), (ek, el) in zip(iso_sf_imgs, exp_iso_sf_imgs): assert k == ek assert len(l) == len(el) for m, em in zip(l, el): if em is None: assert m is None else: assert (m == em).toarray().all()
def test_filter_sf_images(spark_context): sf_iso_images = spark_context.parallelize([(0, [ csr_matrix([[0, 100, 100], [10, 0, 3]]), csr_matrix([[0, 50, 50], [0, 20, 0]]) ]), (1, [ csr_matrix([[0, 0, 0], [0, 0, 0]]), csr_matrix([[0, 0, 0], [0, 0, 0]]) ])]) sf_metrics_df = (pd.DataFrame( [[0, '+H', 0.9, 0.9, 0.9, 0.9**3]], columns=['sf_id', 'adduct', 'chaos', 'spatial', 'spectral', 'msm']).set_index(['sf_id', 'adduct'])) search_alg = MSMBasicSearch(None, None, None, None, None) flt_iso_images = search_alg.filter_sf_images(sf_iso_images, sf_metrics_df) assert dict(flt_iso_images.take(1)).keys() == dict( sf_iso_images.take(1)).keys()
def test_gen_iso_sf_images(spark_context): iso_peak_images = spark_context.parallelize([ ((3079, '+H'), (0, coo_matrix([[1., 0., 0.]]))), ((3079, '+H'), (3, coo_matrix([[2., 1., 0.]]))), ((3079, '+H'), (3, coo_matrix([[0., 0., 10.]]))) ]) exp_iso_sf_imgs = [ ((3079, '+H'), [coo_matrix([[1., 0., 0.]]), None, None, coo_matrix([[2., 1., 0.]])]) ] iso_sf_imgs = gen_iso_sf_images(iso_peak_images, shape=(1, 3)).collect() assert len(iso_sf_imgs) == len(exp_iso_sf_imgs) for (k, l), (ek, el) in zip(iso_sf_imgs, exp_iso_sf_imgs): assert k == ek assert len(l) == len(el) for m, em in zip(l, el): if em is None: assert m is None else: assert (m == em).toarray().all()