Exemple #1
0
def test_delete_ds__completely(es, sm_index, sm_config):
    index = sm_config['elasticsearch']['index']
    es.create(index=index, doc_type='annotation', id='id1',
              body={'ds_id': 'dataset1', 'db_name': 'HMDB', 'db_version': '2016'})
    es.create(index=index, doc_type='annotation', id='id2',
              body={'ds_id': 'dataset1', 'db_name': 'ChEBI', 'db_version': '2016'})
    es.create(index=index, doc_type='annotation', id='id3',
              body={'ds_id': 'dataset2', 'db_name': 'HMDB', 'db_version': '2016'})
    es.create(index=index, doc_type='dataset', id='dataset1',
              body={'ds_id': 'dataset1', 'db_name': 'HMDB', 'db_version': '2016'})

    wait_for_es(sec=1)

    db_mock = MagicMock(spec=DB)

    es_exporter = ESExporter(db_mock)
    es_exporter.delete_ds(ds_id='dataset1')

    wait_for_es(sec=1)

    body = {
        'query': {
            'bool': {
                'filter': []
            }
        }
    }
    body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset1'}}, {'term': {'db_name': 'HMDB'}}]
    assert es.count(index=index, doc_type='annotation', body=body)['count'] == 0
    body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset1'}}, {'term': {'db_name': 'ChEBI'}}]
    assert es.count(index=index, doc_type='annotation', body=body)['count'] == 0
    body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset2'}}, {'term': {'db_name': 'HMDB'}}]
    assert es.count(index=index, doc_type='annotation', body=body)['count'] == 1
    body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset1'}}, {'term': {'_type': 'dataset'}}]
    assert es.count(index=index, doc_type='dataset', body=body)['count'] == 0
def test_delete_ds__completely(es, sm_index, sm_config):
    index = sm_config['elasticsearch']['index']
    es.create(index=index, doc_type='annotation', id='id1',
              body={'ds_id': 'dataset1', 'db_name': 'HMDB', 'db_version': '2016'})
    es.create(index=index, doc_type='annotation', id='id2',
              body={'ds_id': 'dataset1', 'db_name': 'ChEBI', 'db_version': '2016'})
    es.create(index=index, doc_type='annotation', id='id3',
              body={'ds_id': 'dataset2', 'db_name': 'HMDB', 'db_version': '2016'})
    es.create(index=index, doc_type='dataset', id='dataset1',
              body={'ds_id': 'dataset1', 'db_name': 'HMDB', 'db_version': '2016'})

    wait_for_es(sec=1)

    db_mock = MagicMock(spec=DB)

    es_exporter = ESExporter(db_mock)
    es_exporter.delete_ds(ds_id='dataset1')

    wait_for_es(sec=1)

    body = {
        'query': {
            'bool': {
                'filter': []
            }
        }
    }
    body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset1'}}, {'term': {'db_name': 'HMDB'}}]
    assert es.count(index=index, doc_type='annotation', body=body)['count'] == 0
    body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset1'}}, {'term': {'db_name': 'ChEBI'}}]
    assert es.count(index=index, doc_type='annotation', body=body)['count'] == 0
    body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset2'}}, {'term': {'db_name': 'HMDB'}}]
    assert es.count(index=index, doc_type='annotation', body=body)['count'] == 1
    body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset1'}}, {'term': {'_type': 'dataset'}}]
    assert es.count(index=index, doc_type='dataset', body=body)['count'] == 0
Exemple #3
0
def del_jobs(ds: Dataset, moldb_ids: Optional[Iterable[int]] = None):
    """
    Delete a dataset's jobs for the specified moldbs, or all jobs if moldb_ids is None.
    Also cleans up the annotations from ElasticSearch and deletes their ion images.
    """
    db = DB()
    es = ESExporter(db)

    if moldb_ids is None:
        moldb_ids = get_ds_moldb_ids(ds.id)
    moldbs = molecular_db.find_by_ids(moldb_ids)

    job_ids = DB().select_onecol(
        'SELECT j.id FROM job j WHERE ds_id = %s AND moldb_id = ANY(%s)',
        (ds.id, list(moldb_ids)))
    del_diagnostics(ds.id, job_ids)

    for moldb in moldbs:
        logger.info(
            f'Deleting isotopic images: ds_id={ds.id} ds_name={ds.name} moldb={moldb}'
        )
        img_id_rows = db.select_onecol(
            'SELECT iso_image_ids '
            'FROM annotation m '
            'JOIN job j ON j.id = m.job_id '
            'JOIN dataset d ON d.id = j.ds_id '
            'WHERE ds_id = %s AND j.moldb_id = %s',
            (ds.id, moldb.id),
        )

        image_ids = [
            img_id for img_ids in img_id_rows for img_id in img_ids
            if img_id is not None
        ]
        image_storage.delete_images(image_storage.ISO, ds.id, image_ids)

        logger.info(
            f"Deleting job results: ds_id={ds.id} ds_name={ds.name} moldb={moldb}"
        )
        db.alter('DELETE FROM job WHERE ds_id = %s and moldb_id = %s',
                 (ds.id, moldb.id))
        es.delete_ds(ds.id, moldb)
Exemple #4
0
def test_index_ds_works(es_dsl_search, sm_index, sm_config):
    ds_id = '2000-01-01_00h00m'
    upload_dt = datetime.now().isoformat(' ')
    mol_db_id = 0
    last_finished = '2017-01-01T00:00:00'

    def db_sel_side_effect(sql, params):
        if sql == DATASET_SEL:
            return [{
                'ds_id': ds_id,
                'ds_name': 'ds_name',
                'ds_input_path': 'ds_input_path',
                'ds_config': 'ds_config',
                'ds_meta': {},
                'ds_upload_dt': upload_dt,
                'ds_status': 'ds_status',
                'ds_last_finished': datetime.strptime(last_finished, '%Y-%m-%dT%H:%M:%S'),
                'ds_is_public': True,
                'ds_ion_img_storage': 'fs',
                'ds_acq_geometry': {}
            }]
        elif sql == ANNOTATIONS_SEL:
            return [{
                'sf': 'H2O',
                'sf_adduct': 'H2O+H',
                'chaos': 1,
                'image_corr': 1,
                'pattern_match': 1,
                'total_iso_ints': 100,
                'min_iso_ints': 0,
                'max_iso_ints': 100,
                'msm': 1,
                'adduct': '+H',
                'job_id': 1,
                'fdr': 0.1,
                'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'],
                'polarity': '+'
            }, {
                'sf': 'Au',
                'sf_adduct': 'Au+H',
                'chaos': 1,
                'image_corr': 1,
                'pattern_match': 1,
                'total_iso_ints': 100,
                'min_iso_ints': 0,
                'max_iso_ints': 100,
                'msm': 1,
                'adduct': '+H',
                'job_id': 1,
                'fdr': 0.05,
                'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'],
                'polarity': '+'
            }]
        else:
            logging.getLogger('engine').error('Wrong db_sel_side_effect arguments: ', args)

    db_mock = MagicMock(spec=DB)
    db_mock.select_with_fields.side_effect = db_sel_side_effect

    mol_db_mock = MagicMock(MolecularDB)
    mol_db_mock.id = mol_db_id
    mol_db_mock.name = 'db_name'
    mol_db_mock.version = '2017'
    mol_db_mock.get_molecules.return_value = pd.DataFrame([('H2O', 'mol_id', 'mol_name'), ('Au', 'mol_id', 'mol_name')],
                                                          columns=['sf', 'mol_id', 'mol_name'])

    isocalc_mock = MagicMock(IsocalcWrapper)
    isocalc_mock.ion_centroids = lambda sf, adduct: {
        ('H2O', '+H'): ([100., 200.], None),
        ('Au', '+H'): ([10., 20.], None)
    }[(sf, adduct)]

    es_exp = ESExporter(db_mock)
    es_exp.delete_ds(ds_id)
    es_exp.index_ds(ds_id=ds_id, mol_db=mol_db_mock, isocalc=isocalc_mock)

    wait_for_es(sec=1)

    ds_d = es_dsl_search.filter('term', _type='dataset').execute().to_dict()['hits']['hits'][0]['_source']
    assert ds_d == {
        'ds_last_finished': last_finished, 'ds_config': 'ds_config', 'ds_meta': {},
        'ds_status': 'ds_status', 'ds_name': 'ds_name', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id,
        'ds_upload_dt': upload_dt,
        'annotation_counts': [{'db': {'name': 'db_name', 'version': '2017'},
                               'counts': [{'level': 5, 'n': 1}, {'level': 10, 'n': 2},
                                          {'level': 20, 'n': 2}, {'level': 50, 'n': 2}]}],
        'ds_is_public': True,
        'ds_acq_geometry': {},
        'ds_ion_img_storage': 'fs'
    }
    ann_1_d = es_dsl_search.filter('term', sf='H2O').execute().to_dict()['hits']['hits'][0]['_source']
    assert ann_1_d == {
        'pattern_match': 1, 'image_corr': 1, 'fdr': 0.1, 'chaos': 1, 'sf': 'H2O', 'min_iso_ints': 0,
        'msm': 1, 'sf_adduct': 'H2O+H', 'total_iso_ints': 100, 'centroid_mzs': [100., 200.],
        'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'polarity': '+', 'job_id': 1, 'max_iso_ints': 100,
        'adduct': '+H', 'ds_name': 'ds_name', 'annotation_counts': [], 'db_version': '2017', 'ds_status': 'ds_status',
        'ion_add_pol': '[M+H]+', 'comp_names': ['mol_name'], 'db_name': 'db_name', 'mz': 100., 'ds_meta': {},
        'comp_ids': ['mol_id'], 'ds_config': 'ds_config', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id,
        'ds_upload_dt': upload_dt, 'ds_last_finished': last_finished,
        'ds_ion_img_storage': 'fs', 'ds_is_public': True
    }
    ann_2_d = es_dsl_search.filter('term', sf='Au').execute().to_dict()['hits']['hits'][0]['_source']
    assert ann_2_d == {
        'pattern_match': 1, 'image_corr': 1, 'fdr': 0.05, 'chaos': 1, 'sf': 'Au', 'min_iso_ints': 0,
        'msm': 1, 'sf_adduct': 'Au+H', 'total_iso_ints': 100, 'centroid_mzs': [10., 20.],
        'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'polarity': '+', 'job_id': 1, 'max_iso_ints': 100,
        'adduct': '+H',  'ds_name': 'ds_name', 'annotation_counts': [], 'db_version': '2017', 'ds_status': 'ds_status',
        'ion_add_pol': '[M+H]+', 'comp_names': ['mol_name'], 'db_name': 'db_name', 'mz': 10., 'ds_meta': {},
        'comp_ids': ['mol_id'], 'ds_config': 'ds_config', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id,
        'ds_upload_dt': upload_dt, 'ds_last_finished': last_finished,
        'ds_ion_img_storage': 'fs', 'ds_is_public': True
    }
def test_index_ds_works(es_dsl_search, sm_index, sm_config):
    ds_id = '2000-01-01_00h00m'
    upload_dt = datetime.now().isoformat(' ')
    mol_db_id = 0
    last_finished = '2017-01-01T00:00:00'

    def db_sel_side_effect(sql, params):
        if sql == DATASET_SEL:
            return [{
                'ds_id': ds_id,
                'ds_name': 'ds_name',
                'ds_input_path': 'ds_input_path',
                'ds_config': 'ds_config',
                'ds_meta': {},
                'ds_upload_dt': upload_dt,
                'ds_status': 'ds_status',
                'ds_last_finished': datetime.strptime(last_finished, '%Y-%m-%dT%H:%M:%S'),
                'ds_is_public': True,
                'ds_ion_img_storage': 'fs',
                'ds_acq_geometry': {}
            }]
        elif sql == ANNOTATIONS_SEL:
            return [{
                'sf': 'H2O',
                'sf_adduct': 'H2O+H',
                'chaos': 1,
                'image_corr': 1,
                'pattern_match': 1,
                'total_iso_ints': 100,
                'min_iso_ints': 0,
                'max_iso_ints': 100,
                'msm': 1,
                'adduct': '+H',
                'job_id': 1,
                'fdr': 0.1,
                'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'],
                'polarity': '+'
            }, {
                'sf': 'Au',
                'sf_adduct': 'Au+H',
                'chaos': 1,
                'image_corr': 1,
                'pattern_match': 1,
                'total_iso_ints': 100,
                'min_iso_ints': 0,
                'max_iso_ints': 100,
                'msm': 1,
                'adduct': '+H',
                'job_id': 1,
                'fdr': 0.05,
                'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'],
                'polarity': '+'
            }]
        else:
            logging.getLogger('engine').error('Wrong db_sel_side_effect arguments: ', args)

    db_mock = MagicMock(spec=DB)
    db_mock.select_with_fields.side_effect = db_sel_side_effect

    mol_db_mock = MagicMock(MolecularDB)
    mol_db_mock.id = mol_db_id
    mol_db_mock.name = 'db_name'
    mol_db_mock.version = '2017'
    mol_db_mock.get_molecules.return_value = pd.DataFrame([('H2O', 'mol_id', 'mol_name'), ('Au', 'mol_id', 'mol_name')],
                                                          columns=['sf', 'mol_id', 'mol_name'])

    isocalc_mock = MagicMock(IsocalcWrapper)
    isocalc_mock.ion_centroids = lambda sf, adduct: {
        ('H2O', '+H'): ([100., 200.], None),
        ('Au', '+H'): ([10., 20.], None)
    }[(sf, adduct)]

    es_exp = ESExporter(db_mock)
    es_exp.delete_ds(ds_id)
    es_exp.index_ds(ds_id=ds_id, mol_db=mol_db_mock, isocalc=isocalc_mock)

    wait_for_es(sec=1)

    ds_d = es_dsl_search.filter('term', _type='dataset').execute().to_dict()['hits']['hits'][0]['_source']
    assert ds_d == {
        'ds_last_finished': last_finished, 'ds_config': 'ds_config', 'ds_meta': {},
        'ds_status': 'ds_status', 'ds_name': 'ds_name', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id,
        'ds_upload_dt': upload_dt,
        'annotation_counts': [{'db': {'name': 'db_name', 'version': '2017'},
                               'counts': [{'level': 5, 'n': 1}, {'level': 10, 'n': 2},
                                          {'level': 20, 'n': 2}, {'level': 50, 'n': 2}]}],
        'ds_is_public': True,
        'ds_acq_geometry': {},
        'ds_ion_img_storage': 'fs'
    }
    ann_1_d = es_dsl_search.filter('term', sf='H2O').execute().to_dict()['hits']['hits'][0]['_source']
    assert ann_1_d == {
        'pattern_match': 1, 'image_corr': 1, 'fdr': 0.1, 'chaos': 1, 'sf': 'H2O', 'min_iso_ints': 0,
        'msm': 1, 'sf_adduct': 'H2O+H', 'total_iso_ints': 100, 'centroid_mzs': [100., 200.],
        'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'polarity': '+', 'job_id': 1, 'max_iso_ints': 100,
        'adduct': '+H', 'ds_name': 'ds_name', 'annotation_counts': [], 'db_version': '2017', 'ds_status': 'ds_status',
        'ion_add_pol': '[M+H]+', 'comp_names': ['mol_name'], 'db_name': 'db_name', 'mz': 100., 'ds_meta': {},
        'comp_ids': ['mol_id'], 'ds_config': 'ds_config', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id,
        'ds_upload_dt': upload_dt, 'ds_last_finished': last_finished,
        'ds_ion_img_storage': 'fs', 'ds_is_public': True
    }
    ann_2_d = es_dsl_search.filter('term', sf='Au').execute().to_dict()['hits']['hits'][0]['_source']
    assert ann_2_d == {
        'pattern_match': 1, 'image_corr': 1, 'fdr': 0.05, 'chaos': 1, 'sf': 'Au', 'min_iso_ints': 0,
        'msm': 1, 'sf_adduct': 'Au+H', 'total_iso_ints': 100, 'centroid_mzs': [10., 20.],
        'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'polarity': '+', 'job_id': 1, 'max_iso_ints': 100,
        'adduct': '+H',  'ds_name': 'ds_name', 'annotation_counts': [], 'db_version': '2017', 'ds_status': 'ds_status',
        'ion_add_pol': '[M+H]+', 'comp_names': ['mol_name'], 'db_name': 'db_name', 'mz': 10., 'ds_meta': {},
        'comp_ids': ['mol_id'], 'ds_config': 'ds_config', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id,
        'ds_upload_dt': upload_dt, 'ds_last_finished': last_finished,
        'ds_ion_img_storage': 'fs', 'ds_is_public': True
    }
Exemple #6
0
def test_index_ds_works(sm_config, test_db, es, es_dsl_search, sm_index,
                        ds_config, metadata, annotation_stats):
    ds_id = '2000-01-01_00h00m'
    upload_dt = datetime.now().isoformat()
    last_finished = '2017-01-01 00:00:00'
    iso_image_ids = ['iso_img_id_1', 'iso_img_id_2']
    stats = json.dumps(annotation_stats)

    db = DB()
    db.insert(
        "INSERT INTO dataset(id, name, input_path, config, metadata, upload_dt, status, "
        "status_update_dt, is_public, acq_geometry, ion_thumbnail) "
        "VALUES (%s, 'ds_name', 'ds_input_path', %s, %s, %s, 'ds_status', %s, true, '{}', %s)",
        [[
            ds_id,
            json.dumps(ds_config),
            json.dumps(metadata), upload_dt, upload_dt, 'thumb-id'
        ]],
    )
    moldb = create_test_molecular_db()
    (job_id, ) = db.insert_return(
        "INSERT INTO job(ds_id, moldb_id, status, start, finish) "
        "VALUES (%s, %s, 'job_status', %s, %s) RETURNING id",
        rows=[(ds_id, moldb.id, last_finished, last_finished)],
    )
    (user_id, ) = db.insert_return(
        "INSERT INTO graphql.user (email, name, role) "
        "VALUES ('email', 'user_name', 'user') RETURNING id",
        [[]],
    )
    (group_id, ) = db.insert_return(
        "INSERT INTO graphql.group (name, short_name) VALUES ('group name', 'grp') RETURNING id",
        [[]],
    )
    db.insert(
        "INSERT INTO graphql.dataset(id, user_id, group_id) VALUES (%s, %s, %s)",
        [[ds_id, user_id, group_id]],
    )
    ion_id1, ion_id2 = db.insert_return(
        "INSERT INTO graphql.ion(ion, formula, chem_mod, neutral_loss, adduct, charge, ion_formula) "
        "VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id",
        [
            ['H2O-H+O-H+H', 'H2O', '-H+O', '-H', '+H', 1, 'HO2'],
            ['Au+H', 'Au', '', '', '+H', 1, 'HAu'],
        ],
    )
    db.insert(
        "INSERT INTO annotation(job_id, formula, chem_mod, neutral_loss, adduct, "
        "msm, fdr, stats, iso_image_ids, ion_id) "
        "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",
        [
            [
                job_id, 'H2O', '-H+O', '-H', '+H', 1, 0.1, stats,
                iso_image_ids, ion_id1
            ],
            [
                job_id, 'Au', '', '', '+H', 1, 0.05, stats, iso_image_ids,
                ion_id2
            ],
        ],
    )

    isocalc_mock = MagicMock(IsocalcWrapper)
    isocalc_mock.centroids = lambda formula: {
        'H2O+H': ([100.0, 200.0], None),
        'H2O-H+O-H+H': ([100.0, 200.0, 300.0], None),
        'Au+H': ([10.0, 20.0], None),
    }[formula]
    isocalc_mock.mass_accuracy_bounds = lambda mzs: (mzs, mzs)

    with patch(
            'sm.engine.es_export.molecular_db.fetch_molecules',
            return_value=pd.DataFrame(
                [('H2O', 'mol_id', 'mol_name'), ('Au', 'mol_id', 'mol_name')],
                columns=['formula', 'mol_id', 'mol_name'],
            ),
    ):
        es_exp = ESExporter(db, sm_config)
        es_exp.delete_ds(ds_id)
        es_exp.index_ds(
            ds_id=ds_id,
            moldb=moldb,
            isocalc=isocalc_mock,
        )

    wait_for_es(es, sm_config['elasticsearch']['index'])

    ds_d = (es_dsl_search.filter(
        'term',
        _type='dataset').execute().to_dict()['hits']['hits'][0]['_source'])
    expected_ds_fields = {
        'ds_last_finished': last_finished,
        'ds_config': ds_config,
        'ds_adducts': ds_config['isotope_generation']['adducts'],
        'ds_moldb_ids': ds_config['database_ids'],
        'ds_chem_mods': [],
        'ds_neutral_losses': [],
        'ds_project_ids': [],
        'ds_project_names': [],
        'ds_meta': metadata,
        'ds_status': 'ds_status',
        'ds_status_update_dt': upload_dt,
        'ds_name': 'ds_name',
        'ds_input_path': 'ds_input_path',
        'ds_id': ds_id,
        'ds_upload_dt': upload_dt,
        'ds_is_public': True,
        'ds_submitter_email': 'email',
        'ds_submitter_id': user_id,
        'ds_submitter_name': 'user_name',
        'ds_group_approved': False,
        'ds_group_id': group_id,
        'ds_group_name': 'group name',
        'ds_group_short_name': 'grp',
    }
    assert ds_d == {
        **expected_ds_fields,
        'ds_acq_geometry': {},
        'annotation_counts': [{
            'db': {
                'id': moldb.id,
                'name': moldb.name
            },
            'counts': [
                {
                    'level': 5,
                    'n': 1
                },
                {
                    'level': 10,
                    'n': 2
                },
                {
                    'level': 20,
                    'n': 2
                },
                {
                    'level': 50,
                    'n': 2
                },
            ],
        }],
    }
    ann_1_d = (es_dsl_search.filter(
        'term',
        formula='H2O').execute().to_dict()['hits']['hits'][0]['_source'])
    top_level_stats = {
        'pattern_match': annotation_stats['spectral'],
        'image_corr': annotation_stats['spatial'],
        'chaos': annotation_stats['chaos'],
        **{
            key: value
            for key, value in annotation_stats.items() if key in NON_METRIC_STATS
        },
    }
    metrics = {
        key: value
        for key, value in annotation_stats.items()
        if key not in NON_METRIC_STATS
    }
    assert ann_1_d == {
        **expected_ds_fields,
        **top_level_stats,
        'metrics':
        metrics,
        'fdr':
        0.1,
        'formula':
        'H2O',
        'msm':
        1.0,
        'ion':
        'H2O-H+O-H+H+',
        'ion_formula':
        'HO2',
        'centroid_mzs': [100.0, 200.0, 300.0],
        'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'],
        'iso_image_urls': [
            f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_1',
            f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_2',
        ],
        'isobars': [],
        'isomer_ions': [],
        'polarity':
        '+',
        'job_id':
        1,
        'adduct':
        '+H',
        'neutral_loss':
        '-H',
        'chem_mod':
        '-H+O',
        'annotation_counts': [],
        'comp_names': ['mol_name'],
        'comps_count_with_isomers':
        1,
        'db_id':
        moldb.id,
        'db_name':
        moldb.name,
        'db_version':
        moldb.version,
        'mz':
        100.0,
        'comp_ids': ['mol_id'],
        'annotation_id':
        1,
        'off_sample_label':
        None,
        'off_sample_prob':
        None,
    }
    ann_2_d = (es_dsl_search.filter(
        'term',
        formula='Au').execute().to_dict()['hits']['hits'][0]['_source'])
    assert ann_2_d == {
        **expected_ds_fields,
        **top_level_stats,
        'metrics':
        metrics,
        'fdr':
        0.05,
        'formula':
        'Au',
        'msm':
        1.0,
        'ion':
        'Au+H+',
        'ion_formula':
        'HAu',
        'centroid_mzs': [10.0, 20.0],
        'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'],
        'iso_image_urls': [
            f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_1',
            f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_2',
        ],
        'isobars': [],
        'isomer_ions': [],
        'polarity':
        '+',
        'job_id':
        1,
        'adduct':
        '+H',
        'neutral_loss':
        '',
        'chem_mod':
        '',
        'annotation_counts': [],
        'comp_names': ['mol_name'],
        'comps_count_with_isomers':
        1,
        'db_id':
        moldb.id,
        'db_name':
        moldb.name,
        'db_version':
        moldb.version,
        'mz':
        10.0,
        'comp_ids': ['mol_id'],
        'annotation_id':
        2,
        'off_sample_label':
        None,
        'off_sample_prob':
        None,
    }
Exemple #7
0
def test_delete_ds__completely(sm_config, test_db, es, sm_index):
    moldb = MolecularDB(0, 'HMDB', '2016')
    moldb2 = MolecularDB(1, 'ChEBI', '2016')

    index = sm_config['elasticsearch']['index']
    es.create(
        index=index,
        doc_type='annotation',
        id='id1',
        body={
            'ds_id': 'dataset1',
            'db_id': moldb.id,
            'db_name': moldb.name,
            'db_version': moldb.version,
        },
    )
    es.create(
        index=index,
        doc_type='annotation',
        id='id2',
        body={
            'ds_id': 'dataset1',
            'db_id': moldb2.id,
            'db_name': moldb2.name,
            'db_version': moldb2.version,
        },
    )
    es.create(
        index=index,
        doc_type='annotation',
        id='id3',
        body={
            'ds_id': 'dataset2',
            'db_id': moldb.id,
            'db_name': moldb.name,
            'db_version': moldb.version,
        },
    )
    es.create(
        index=index,
        doc_type='dataset',
        id='dataset1',
        body={
            'ds_id': 'dataset1',
            'db_id': moldb.id,
            'db_name': moldb.name,
            'db_version': moldb.version,
        },
    )

    wait_for_es(es, index)

    db_mock = MagicMock(spec=DB)

    es_exporter = ESExporter(db_mock, sm_config)
    es_exporter.delete_ds(ds_id='dataset1')

    wait_for_es(es, index)

    body = {'query': {'bool': {'filter': []}}}
    body['query']['bool']['filter'] = [
        {
            'term': {
                'ds_id': 'dataset1'
            }
        },
        {
            'term': {
                'db_id': moldb.id
            }
        },
    ]
    assert es.count(index=index, doc_type='annotation',
                    body=body)['count'] == 0
    body['query']['bool']['filter'] = [
        {
            'term': {
                'ds_id': 'dataset1'
            }
        },
        {
            'term': {
                'db_id': moldb2.id
            }
        },
    ]
    assert es.count(index=index, doc_type='annotation',
                    body=body)['count'] == 0
    body['query']['bool']['filter'] = [
        {
            'term': {
                'ds_id': 'dataset2'
            }
        },
        {
            'term': {
                'db_id': moldb.id
            }
        },
    ]
    assert es.count(index=index, doc_type='annotation',
                    body=body)['count'] == 1
    body['query']['bool']['filter'] = [
        {
            'term': {
                'ds_id': 'dataset1'
            }
        },
        {
            'term': {
                '_type': 'dataset'
            }
        },
    ]
    assert es.count(index=index, doc_type='dataset', body=body)['count'] == 0