def test_sf_image_metrics(spark_context, ds_formulas_images_mock, ds_config):
    with patch(
            'sm.engine.msm_basic.formula_img_validator.get_compute_img_metrics'
    ) as mock:
        mock.return_value = lambda *args: (0.9, 0.9, 0.9, [100., 10.], [0, 0],
                                           [10., 1.])

        ds_mock, ds_reader_mock, ref_images = ds_formulas_images_mock
        ref_images_rdd = spark_context.parallelize(ref_images)

        metrics = OrderedDict([('chaos', 0), ('spatial', 0), ('spectral', 0),
                               ('total_iso_ints', [0, 0, 0, 0]),
                               ('min_iso_ints', [0, 0, 0, 0]),
                               ('max_iso_ints', [0, 0, 0, 0])])
        ion_centr_ints = {0: [100, 10, 1], 1: [100, 10, 1]}
        metrics_df = sf_image_metrics(ref_images_rdd, metrics, ds_mock,
                                      ds_reader_mock, ion_centr_ints,
                                      spark_context)

        exp_metrics_df = (pd.DataFrame(
            [[0, 0.9, 0.9, 0.9, [100., 10.], [0, 0], [10., 1.], 0.9**3],
             [1, 0.9, 0.9, 0.9, [100., 10.], [0, 0], [10., 1.], 0.9**3]],
            columns=[
                'ion_i', 'chaos', 'spatial', 'spectral', 'total_iso_ints',
                'min_iso_ints', 'max_iso_ints', 'msm'
            ]).set_index(['ion_i']))
        assert_frame_equal(metrics_df, exp_metrics_df)
Beispiel #2
0
def test_filter_sf_images(spark_context):
    sf_iso_images = spark_context.parallelize([(0, [
        csr_matrix([[0, 100, 100], [10, 0, 3]]),
        csr_matrix([[0, 50, 50], [0, 20, 0]])
    ]),
                                               (1, [
                                                   csr_matrix([[0, 0, 0],
                                                               [0, 0, 0]]),
                                                   csr_matrix([[0, 0, 0],
                                                               [0, 0, 0]])
                                               ])])

    sf_metrics_df = (pd.DataFrame(
        [[0, 0.9, 0.9, 0.9, 0.9**3]],
        columns=['ion_i', 'chaos', 'spatial', 'spectral',
                 'msm']).set_index(['ion_i']))

    search_alg = MSMBasicSearch(sc=None,
                                ds=None,
                                ds_reader=None,
                                mol_db=None,
                                centr_gen=None,
                                fdr=None,
                                ds_config=None)
    flt_iso_images = search_alg.filter_sf_images(sf_iso_images, sf_metrics_df)

    assert dict(flt_iso_images.take(1)).keys() == dict(
        sf_iso_images.take(1)).keys()
def test_isotope_images_are_stored(search_results, spark_context):
    mask = np.array([[1, 1], [1, 0]])
    IMG_ID = "iso_image_id"
    img_store_mock = MagicMock(spec=ImageStoreServiceWrapper)
    img_store_mock.post_image.return_value = IMG_ID

    img_store_mock.reset_mock()
    ion_iso_images = spark_context.parallelize([
        (0, [ coo([[0, 0], [0, 1]]), None, coo([[2, 3], [1, 0]]), None ]),
        (1, [ coo([[1, 1], [0, 1]]), None, None, None])
    ])
    ids = search_results.post_images_to_image_store(ion_iso_images, mask, img_store_mock, 'fs')
    assert ids == { 0: {'iso_image_ids': [IMG_ID, None, IMG_ID, None]}, 1: {'iso_image_ids': [IMG_ID, None, None, None]} }
    assert img_store_mock.post_image.call_count == 3
def test_dataset_reader_get_sample_area_mask_correctness(sm_config, spark_context):
    work_dir_man_mock = MagicMock(WorkDirManager)
    work_dir_man_mock.ds_coord_path = '/ds_path'
    work_dir_man_mock.txt_path = '/txt_path'
    SMConfig._config_dict = sm_config

    with patch('sm.engine.tests.util.SparkContext.textFile') as m:
        m.return_value = spark_context.parallelize([
            '0,0,0\n',
            '2,1,1\n'])

        ds_reader = DatasetReader('input_path', spark_context, work_dir_man_mock)
        ds_reader._determine_pixel_order()

        assert tuple(ds_reader.get_sample_area_mask()) == (True, False, False, True)
def test_filter_sf_images(spark_context):
    sf_iso_images = spark_context.parallelize([(0, [csr_matrix([[0, 100, 100], [10, 0, 3]]),
                                                    csr_matrix([[0, 50, 50], [0, 20, 0]])]),
                                               (1, [csr_matrix([[0, 0, 0], [0, 0, 0]]),
                                                    csr_matrix([[0, 0, 0], [0, 0, 0]])])])

    sf_metrics_df = (pd.DataFrame([[0, 0.9, 0.9, 0.9, 0.9**3]],
                                  columns=['ion_i', 'chaos', 'spatial', 'spectral', 'msm'])
                     .set_index(['ion_i']))

    search_alg = MSMBasicSearch(sc=None, ds=None, ds_reader=None, mol_db=None,
                                centr_gen=None, fdr=None, ds_config=None)
    flt_iso_images = search_alg.filter_sf_images(sf_iso_images, sf_metrics_df)

    assert dict(flt_iso_images.take(1)).keys() == dict(sf_iso_images.take(1)).keys()
def test_dataset_reader_get_sample_area_mask_correctness(
        sm_config, spark_context):
    work_dir_man_mock = MagicMock(WorkDirManager)
    work_dir_man_mock.ds_coord_path = '/ds_path'
    work_dir_man_mock.txt_path = '/txt_path'
    SMConfig._config_dict = sm_config

    with patch('sm.engine.tests.util.SparkContext.textFile') as m:
        m.return_value = spark_context.parallelize(['0,0,0\n', '2,1,1\n'])

        ds_reader = DatasetReader('input_path', spark_context,
                                  work_dir_man_mock)
        ds_reader._determine_pixel_order()

        assert tuple(ds_reader.get_sample_area_mask()) == (True, False, False,
                                                           True)
def test_dataset_reader_get_spectra_works(sm_config, spark_context):
    work_dir_man_mock = MagicMock(WorkDirManager)
    work_dir_man_mock.ds_coord_path = '/ds_path'
    work_dir_man_mock.txt_path = '/txt_path'
    SMConfig._config_dict = sm_config

    with patch('sm.engine.tests.util.SparkContext.textFile') as m:
        m.side_effect = [spark_context.parallelize([b'0|100.0 200.0|1000.0 0\n', b'2|200.0 300.0|10.0 20.0\n'])]

        ds_reader = DatasetReader('input_path', spark_context, work_dir_man_mock)
        spectra_list = ds_reader.get_spectra().collect()

        assert [t[0] for t in spectra_list] == [0, 2]
        first_spectra = spectra_list[0]
        assert_array_equal(first_spectra[1], np.array([100.0, 200.0]))
        assert_array_equal(first_spectra[2], np.array([1000.0, 0]))
        second_spectra = spectra_list[1]
        assert_array_equal(second_spectra[1], np.array([200.0, 300.0]))
        assert_array_equal(second_spectra[2], np.array([10.0, 20.0]))
def test_gen_iso_sf_images(spark_context):
    iso_peak_images = spark_context.parallelize([((3079, '+H'), (0, coo_matrix([[1., 0., 0.]]))),
                                                 ((3079, '+H'), (3, coo_matrix([[2., 1., 0.]]))),
                                                 ((3079, '+H'), (3, coo_matrix([[0., 0., 10.]])))])
    exp_iso_sf_imgs = [((3079, '+H'), [coo_matrix([[1., 0., 0.]]),
                                       None,
                                       None,
                                       coo_matrix([[2., 1., 0.]])])]

    iso_sf_imgs = gen_iso_sf_images(iso_peak_images, shape=(1, 3)).collect()

    assert len(iso_sf_imgs) == len(exp_iso_sf_imgs)
    for (k, l), (ek, el) in zip(iso_sf_imgs, exp_iso_sf_imgs):
        assert k == ek
        assert len(l) == len(el)
        for m, em in zip(l, el):
            if em is None:
                assert m is None
            else:
                assert (m.toarray() == em.toarray()).all()
def test_dataset_reader_get_spectra_works(sm_config, spark_context):
    work_dir_man_mock = MagicMock(WorkDirManager)
    work_dir_man_mock.ds_coord_path = '/ds_path'
    work_dir_man_mock.txt_path = '/txt_path'
    SMConfig._config_dict = sm_config

    with patch('sm.engine.tests.util.SparkContext.textFile') as m:
        m.side_effect = [
            spark_context.parallelize(
                [b'0|100.0 200.0|1000.0 0\n', b'2|200.0 300.0|10.0 20.0\n'])
        ]

        ds_reader = DatasetReader('input_path', spark_context,
                                  work_dir_man_mock)
        spectra_list = ds_reader.get_spectra().collect()

        assert [t[0] for t in spectra_list] == [0, 2]
        first_spectra = spectra_list[0]
        assert_array_equal(first_spectra[1], np.array([100.0, 200.0]))
        assert_array_equal(first_spectra[2], np.array([1000.0, 0]))
        second_spectra = spectra_list[1]
        assert_array_equal(second_spectra[1], np.array([200.0, 300.0]))
        assert_array_equal(second_spectra[2], np.array([10.0, 20.0]))
def test_gen_iso_sf_images(spark_context):
    iso_peak_images = spark_context.parallelize([
        ((3079, '+H'), (0, coo_matrix([[1., 0., 0.]]))),
        ((3079, '+H'), (3, coo_matrix([[2., 1., 0.]]))),
        ((3079, '+H'), (3, coo_matrix([[0., 0., 10.]])))
    ])
    exp_iso_sf_imgs = [
        ((3079, '+H'),
         [coo_matrix([[1., 0., 0.]]), None, None,
          coo_matrix([[2., 1., 0.]])])
    ]

    iso_sf_imgs = gen_iso_sf_images(iso_peak_images, shape=(1, 3)).collect()

    assert len(iso_sf_imgs) == len(exp_iso_sf_imgs)
    for (k, l), (ek, el) in zip(iso_sf_imgs, exp_iso_sf_imgs):
        assert k == ek
        assert len(l) == len(el)
        for m, em in zip(l, el):
            if em is None:
                assert m is None
            else:
                assert (m.toarray() == em.toarray()).all()
def test_isotope_images_are_stored(search_results, spark_context):
    mask = np.array([[1, 1], [1, 0]])
    IMG_ID = "iso_image_id"
    img_store_mock = MagicMock(spec=ImageStoreServiceWrapper)
    img_store_mock.post_image.return_value = IMG_ID

    img_store_mock.reset_mock()
    ion_iso_images = spark_context.parallelize([
        (0, [coo([[0, 0], [0, 1]]), None,
             coo([[2, 3], [1, 0]]), None]),
        (1, [coo([[1, 1], [0, 1]]), None, None, None])
    ])
    ids = search_results.post_images_to_image_store(ion_iso_images, mask,
                                                    img_store_mock, 'fs')
    assert ids == {
        0: {
            'iso_image_ids': [IMG_ID, None, IMG_ID, None]
        },
        1: {
            'iso_image_ids': [IMG_ID, None, None, None]
        }
    }
    assert img_store_mock.post_image.call_count == 3