Python parallelize 예제들, sm.engine.tests.util.spark_context.parallelize Python 예제들

예제 #1

0

파일 보기

파일: test_dataset_db.py 프로젝트: frulo/SM_distributed

def test_save_ds_meta_ds_doesnt_exist(spark_context, create_test_db, drop_test_db, sm_config, ds_config):
    work_dir_man_mock = MagicMock(WorkDirManager)
    work_dir_man_mock.ds_coord_path = '/ds_path'
    work_dir_man_mock.txt_path = '/txt_path'

    SMConfig._config_dict = sm_config

    with patch('sm.engine.tests.util.SparkContext.textFile') as m:
        m.return_value = spark_context.parallelize([
            '0,1,1\n',
            '1,100,200\n'])

        dataset = Dataset(spark_context, 'ds_name', '', ds_config, work_dir_man_mock, DB(sm_config['db']))
        dataset.save_ds_meta()

    db = DB(sm_config['db'])
    ds_row = db.select_one('SELECT name, file_path, img_bounds, config from dataset')
    assert ds_row == ('ds_name', '/txt_path',
                      {u'x': {u'min': 1, u'max': 100}, u'y': {u'min': 1, u'max': 200}},
                      ds_config)

    coord_row = db.select_one('SELECT xs, ys from coordinates')
    assert coord_row == ([1, 100], [1, 200])

    db.close()

예제 #2

0

파일 보기

def test_save_ds_meta_ds_doesnt_exist(spark_context, create_test_db,
                                      drop_test_db, sm_config, ds_config):
    work_dir_man_mock = MagicMock(WorkDirManager)
    work_dir_man_mock.ds_coord_path = '/ds_path'
    work_dir_man_mock.txt_path = '/txt_path'

    SMConfig._config_dict = sm_config

    with patch('sm.engine.tests.util.SparkContext.textFile') as m:
        m.return_value = spark_context.parallelize(['0,1,1\n', '1,100,200\n'])

        dataset = Dataset(spark_context, 'ds_name', '', 'input_path',
                          ds_config, work_dir_man_mock, DB(sm_config['db']))
        dataset.save_ds_meta()

    db = DB(sm_config['db'])
    ds_row = db.select_one(
        'SELECT name, file_path, img_bounds, config from dataset')
    assert ds_row == ('ds_name', 'input_path', {
        u'x': {
            u'min': 1,
            u'max': 100
        },
        u'y': {
            u'min': 1,
            u'max': 200
        }
    }, ds_config)

    coord_row = db.select_one('SELECT xs, ys from coordinates')
    assert coord_row == ([1, 100], [1, 200])

    db.close()

예제 #3

0

파일 보기

def test_save_sf_iso_images_correct_db_call(spark_context,
                                            create_fill_sm_database, sm_config,
                                            ds_config):
    sf_iso_imgs = spark_context.parallelize([((1, '+H'), [
        csr_matrix([[100, 0, 0], [0, 0, 0]]),
        csr_matrix([[0, 0, 0], [0, 0, 10]])
    ])])
    sf_adduct_peaksn = [(1, '+H', 2)]
    res = SearchResults(0, 0, 0, 'ds_name', sf_adduct_peaksn, db_mock,
                        sm_config, ds_config)
    res.sf_iso_images = sf_iso_imgs
    res.nrows, res.ncols = 2, 3
    res.store_sf_iso_images()

    correct_rows = [(0, 0, 1, '+H', 0, [0], [100], 0, 100),
                    (0, 0, 1, '+H', 1, [5], [10], 0, 10)]

    db = DB(sm_config['db'])
    try:
        rows = db.select((
            'SELECT job_id, db_id, sf_id, adduct, peak, pixel_inds, intensities, min_int, max_int '
            'FROM iso_image '
            'ORDER BY sf_id, adduct'))
        assert correct_rows == rows
    finally:
        db.close()

예제 #4

0

파일 보기

파일: test_formula_imager.py 프로젝트: frulo/SM_distributed

def test_compute_sf_images_2by3(spark_context):
    sf_peak_imgs = spark_context.parallelize([(0, (0, np.array([[0, 100, 100], [0, 0, 0]]))),
                                              (0, (1, np.array([[0, 0, 0], [0, 100, 100]])))])

    sf_imgs = compute_sf_images(sf_peak_imgs).collect()

    assert sf_imgs[0][0] == 0
    assert_array_almost_equal(sf_imgs[0][1], [[[0, 100, 100], [0, 0, 0]],
                                              [[0, 0, 0], [0, 100, 100]]])

예제 #5

0

파일 보기

def test_compute_sf_images_2by3(spark_context):
    sf_peak_imgs = spark_context.parallelize([
        (0, (0, np.array([[0, 100, 100], [0, 0, 0]]))),
        (0, (1, np.array([[0, 0, 0], [0, 100, 100]])))
    ])

    sf_imgs = compute_sf_images(sf_peak_imgs).collect()

    assert sf_imgs[0][0] == 0
    assert_array_almost_equal(
        sf_imgs[0][1],
        [[[0, 100, 100], [0, 0, 0]], [[0, 0, 0], [0, 100, 100]]])

예제 #6

0

파일 보기

파일: test_formula_imager.py 프로젝트: frulo/SM_distributed

def test_compute_sf_peak_images_2by3(spark_context):
    ds = MagicMock(spec=Dataset)
    ds.get_dims.return_value = (2, 3)
    ds.get_norm_img_pixel_inds.return_value = np.array([1, 2, 3, 4, 5])

    sf_sp_intens = spark_context.parallelize([((0, 0), (0, 100.0)), ((0, 0), (1, 100.0)),
                                              ((0, 1), (3, 100.0)), ((0, 1), (4, 100.0))])

    sf_p_imgs = compute_sf_peak_images(ds, sf_sp_intens).collect()

    assert_array_almost_equal(sf_p_imgs[0][1][1].toarray(), np.array([[0, 100, 100], [0, 0, 0]]))
    assert_array_almost_equal(sf_p_imgs[1][1][1].toarray(), np.array([[0, 0, 0], [0, 100, 100]]))

예제 #7

0

파일 보기

파일: test_formula_img_validator.py 프로젝트: frulo/SM_distributed

def test_sf_image_metrics(spark_context, ds_formulas_images_mock, ds_config):
    with patch('sm.engine.msm_basic.formula_img_validator.get_compute_img_metrics') as mock:
        mock.return_value = lambda *args: (0.9, 0.9, 0.9)

        ds_mock, formulas_mock, ref_images = ds_formulas_images_mock
        ref_images_rdd = spark_context.parallelize(ref_images)

        metrics_df = sf_image_metrics(ref_images_rdd, spark_context, formulas_mock, ds_mock, ds_config)

        exp_metrics_df = (pd.DataFrame([[0, '+H', 0.9, 0.9, 0.9, 0.9**3],
                                       [1, '+H', 0.9, 0.9, 0.9, 0.9**3]],
                                       columns=['sf_id', 'adduct', 'chaos', 'spatial', 'spectral', 'msm'])
                          .set_index(['sf_id', 'adduct']))
        assert_frame_equal(metrics_df, exp_metrics_df)

예제 #8

0

파일 보기

def test_compute_sf_peak_images_2by3(spark_context):
    ds = MagicMock(spec=Dataset)
    ds.get_dims.return_value = (2, 3)
    ds.get_norm_img_pixel_inds.return_value = np.array([1, 2, 3, 4, 5])

    sf_sp_intens = spark_context.parallelize([((0, 0), (0, 100.0)),
                                              ((0, 0), (1, 100.0)),
                                              ((0, 1), (3, 100.0)),
                                              ((0, 1), (4, 100.0))])

    sf_p_imgs = compute_sf_peak_images(ds, sf_sp_intens).collect()

    assert_array_almost_equal(sf_p_imgs[0][1][1].toarray(),
                              np.array([[0, 100, 100], [0, 0, 0]]))
    assert_array_almost_equal(sf_p_imgs[1][1][1].toarray(),
                              np.array([[0, 0, 0], [0, 100, 100]]))

예제 #9

0

파일 보기

def test_get_sample_area_mask_correctness(sm_config, ds_config, spark_context):
    work_dir_man_mock = MagicMock(WorkDirManager)
    work_dir_man_mock.ds_coord_path = '/ds_path'
    work_dir_man_mock.txt_path = '/txt_path'

    SMConfig._config_dict = sm_config

    with patch('sm.engine.tests.util.SparkContext.textFile') as m:
        m.return_value = spark_context.parallelize(['0,0,0\n', '2,1,1\n'])

        ds = Dataset(spark_context, 'ds_name', '', 'input_path', ds_config,
                     work_dir_man_mock, None)

        #ds.norm_img_pixel_inds = np.array([0, 3])

        assert tuple(ds.get_sample_area_mask()) == (True, False, False, True)

예제 #10

0

파일 보기

파일: test_formula_imager.py 프로젝트: frulo/SM_distributed

def test_sample_spectra_2by3(spark_context):
    ds = MagicMock(spec=Dataset)
    ds.get_spectra.return_value = spark_context.parallelize([(0, np.array([100.0]), np.array([0, 100.0])),
                                                             (1, np.array([100.0]), np.array([0, 100.0])),
                                                             (2, np.array([50.0]), np.array([0, 100.0])),
                                                             (3, np.array([200.0]), np.array([0, 100.0])),
                                                             (4, np.array([200.0]), np.array([0, 100.0]))])

    formulas = MagicMock(spec=Formulas)
    formulas.get_sf_peak_bounds.return_value = np.array([100 - 0.01, 200 - 0.01]), np.array([100 + 0.01, 200 + 0.01])
    formulas.get_sf_peak_map.return_value = np.array([(0, j) for j in [0, 1]])

    ss = sample_spectra(spark_context, ds, formulas).collect()

    assert_array_almost_equal(ss, [((0, 0), (0, 100.0)), ((0, 0), (1, 100.0)),
                                   ((0, 1), (3, 100.0)), ((0, 1), (4, 100.0))])

예제 #11

0

파일 보기

def search_results(spark_context, sm_config, ds_config):
    sf_iso_imgs = spark_context.parallelize([((1, '+H'), [
        csr_matrix([[100, 0, 0], [0, 0, 0]]),
        csr_matrix([[0, 0, 0], [0, 0, 10]])
    ])])
    sf_metrics_df = pd.DataFrame([(1, '+H', 0.9, 0.9, 0.9, 0.9**3, 0.5)],
                                 columns=[
                                     'sf_id', 'adduct', 'chaos', 'spatial',
                                     'spectral', 'msm', 'fdr'
                                 ])
    sf_adduct_peaksn = [(1, '+H', 2)]

    res = SearchResults(0, 0, 0, 'ds_name', sf_adduct_peaksn, db_mock,
                        sm_config, ds_config)
    res.sf_metrics_df = sf_metrics_df
    res.metrics = ['chaos', 'spatial', 'spectral']
    res.sf_iso_images = sf_iso_imgs
    return res

예제 #12

0

파일 보기

def test_sf_image_metrics(spark_context, ds_formulas_images_mock, ds_config):
    with patch(
            'sm.engine.msm_basic.formula_img_validator.get_compute_img_metrics'
    ) as mock:
        mock.return_value = lambda *args: (0.9, 0.9, 0.9)

        ds_mock, formulas_mock, ref_images = ds_formulas_images_mock
        ref_images_rdd = spark_context.parallelize(ref_images)

        metrics_df = sf_image_metrics(ref_images_rdd, spark_context,
                                      formulas_mock, ds_mock, ds_config)

        exp_metrics_df = (pd.DataFrame(
            [[0, '+H', 0.9, 0.9, 0.9, 0.9**3],
             [1, '+H', 0.9, 0.9, 0.9, 0.9**3]],
            columns=['sf_id', 'adduct', 'chaos', 'spatial', 'spectral',
                     'msm']).set_index(['sf_id', 'adduct']))
        assert_frame_equal(metrics_df, exp_metrics_df)

예제 #13

0

파일 보기

def test_sample_spectra_2by3(spark_context):
    ds = MagicMock(spec=Dataset)
    ds.get_spectra.return_value = spark_context.parallelize([
        (0, np.array([100.0]), np.array([0, 100.0])),
        (1, np.array([100.0]), np.array([0, 100.0])),
        (2, np.array([50.0]), np.array([0, 100.0])),
        (3, np.array([200.0]), np.array([0, 100.0])),
        (4, np.array([200.0]), np.array([0, 100.0]))
    ])

    formulas = MagicMock(spec=Formulas)
    formulas.get_sf_peak_bounds.return_value = np.array(
        [100 - 0.01, 200 - 0.01]), np.array([100 + 0.01, 200 + 0.01])
    formulas.get_sf_peak_map.return_value = np.array([(0, j) for j in [0, 1]])

    ss = sample_spectra(spark_context, ds, formulas).collect()

    assert_array_almost_equal(ss, [((0, 0), (0, 100.0)), ((0, 0), (1, 100.0)),
                                   ((0, 1), (3, 100.0)), ((0, 1), (4, 100.0))])

예제 #14

0

파일 보기

파일: test_formula_imager_segm.py 프로젝트: frulo/SM_distributed

def test_gen_iso_sf_images(spark_context):
    iso_peak_images = spark_context.parallelize([((3079, '+H'), (0, coo_matrix([[1., 0., 0.]]))),
                                                 ((3079, '+H'), (3, coo_matrix([[2., 1., 0.]]))),
                                                 ((3079, '+H'), (3, coo_matrix([[0., 0., 10.]])))])
    exp_iso_sf_imgs = [((3079, '+H'), [coo_matrix([[1., 0., 0.]]),
                                       None,
                                       None,
                                       coo_matrix([[2., 1., 0.]])])]

    iso_sf_imgs = gen_iso_sf_images(iso_peak_images, shape=(1, 3)).collect()

    assert len(iso_sf_imgs) == len(exp_iso_sf_imgs)
    for (k, l), (ek, el) in zip(iso_sf_imgs, exp_iso_sf_imgs):
        assert k == ek
        assert len(l) == len(el)
        for m, em in zip(l, el):
            if em is None:
                assert m is None
            else:
                assert (m == em).toarray().all()

예제 #15

0

파일 보기

def test_filter_sf_images(spark_context):
    sf_iso_images = spark_context.parallelize([(0, [
        csr_matrix([[0, 100, 100], [10, 0, 3]]),
        csr_matrix([[0, 50, 50], [0, 20, 0]])
    ]),
                                               (1, [
                                                   csr_matrix([[0, 0, 0],
                                                               [0, 0, 0]]),
                                                   csr_matrix([[0, 0, 0],
                                                               [0, 0, 0]])
                                               ])])

    sf_metrics_df = (pd.DataFrame(
        [[0, '+H', 0.9, 0.9, 0.9, 0.9**3]],
        columns=['sf_id', 'adduct', 'chaos', 'spatial', 'spectral',
                 'msm']).set_index(['sf_id', 'adduct']))

    search_alg = MSMBasicSearch(None, None, None, None, None)
    flt_iso_images = search_alg.filter_sf_images(sf_iso_images, sf_metrics_df)

    assert dict(flt_iso_images.take(1)).keys() == dict(
        sf_iso_images.take(1)).keys()

예제 #16

0

파일 보기

def test_gen_iso_sf_images(spark_context):
    iso_peak_images = spark_context.parallelize([
        ((3079, '+H'), (0, coo_matrix([[1., 0., 0.]]))),
        ((3079, '+H'), (3, coo_matrix([[2., 1., 0.]]))),
        ((3079, '+H'), (3, coo_matrix([[0., 0., 10.]])))
    ])
    exp_iso_sf_imgs = [
        ((3079, '+H'),
         [coo_matrix([[1., 0., 0.]]), None, None,
          coo_matrix([[2., 1., 0.]])])
    ]

    iso_sf_imgs = gen_iso_sf_images(iso_peak_images, shape=(1, 3)).collect()

    assert len(iso_sf_imgs) == len(exp_iso_sf_imgs)
    for (k, l), (ek, el) in zip(iso_sf_imgs, exp_iso_sf_imgs):
        assert k == ek
        assert len(l) == len(el)
        for m, em in zip(l, el):
            if em is None:
                assert m is None
            else:
                assert (m == em).toarray().all()