def test_delete_ds__completely(es, sm_index, sm_config): index = sm_config['elasticsearch']['index'] es.create(index=index, doc_type='annotation', id='id1', body={'ds_id': 'dataset1', 'db_name': 'HMDB', 'db_version': '2016'}) es.create(index=index, doc_type='annotation', id='id2', body={'ds_id': 'dataset1', 'db_name': 'ChEBI', 'db_version': '2016'}) es.create(index=index, doc_type='annotation', id='id3', body={'ds_id': 'dataset2', 'db_name': 'HMDB', 'db_version': '2016'}) es.create(index=index, doc_type='dataset', id='dataset1', body={'ds_id': 'dataset1', 'db_name': 'HMDB', 'db_version': '2016'}) wait_for_es(sec=1) db_mock = MagicMock(spec=DB) es_exporter = ESExporter(db_mock) es_exporter.delete_ds(ds_id='dataset1') wait_for_es(sec=1) body = { 'query': { 'bool': { 'filter': [] } } } body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset1'}}, {'term': {'db_name': 'HMDB'}}] assert es.count(index=index, doc_type='annotation', body=body)['count'] == 0 body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset1'}}, {'term': {'db_name': 'ChEBI'}}] assert es.count(index=index, doc_type='annotation', body=body)['count'] == 0 body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset2'}}, {'term': {'db_name': 'HMDB'}}] assert es.count(index=index, doc_type='annotation', body=body)['count'] == 1 body['query']['bool']['filter'] = [{'term': {'ds_id': 'dataset1'}}, {'term': {'_type': 'dataset'}}] assert es.count(index=index, doc_type='dataset', body=body)['count'] == 0
def del_jobs(ds: Dataset, moldb_ids: Optional[Iterable[int]] = None): """ Delete a dataset's jobs for the specified moldbs, or all jobs if moldb_ids is None. Also cleans up the annotations from ElasticSearch and deletes their ion images. """ db = DB() es = ESExporter(db) if moldb_ids is None: moldb_ids = get_ds_moldb_ids(ds.id) moldbs = molecular_db.find_by_ids(moldb_ids) job_ids = DB().select_onecol( 'SELECT j.id FROM job j WHERE ds_id = %s AND moldb_id = ANY(%s)', (ds.id, list(moldb_ids))) del_diagnostics(ds.id, job_ids) for moldb in moldbs: logger.info( f'Deleting isotopic images: ds_id={ds.id} ds_name={ds.name} moldb={moldb}' ) img_id_rows = db.select_onecol( 'SELECT iso_image_ids ' 'FROM annotation m ' 'JOIN job j ON j.id = m.job_id ' 'JOIN dataset d ON d.id = j.ds_id ' 'WHERE ds_id = %s AND j.moldb_id = %s', (ds.id, moldb.id), ) image_ids = [ img_id for img_ids in img_id_rows for img_id in img_ids if img_id is not None ] image_storage.delete_images(image_storage.ISO, ds.id, image_ids) logger.info( f"Deleting job results: ds_id={ds.id} ds_name={ds.name} moldb={moldb}" ) db.alter('DELETE FROM job WHERE ds_id = %s and moldb_id = %s', (ds.id, moldb.id)) es.delete_ds(ds.id, moldb)
def test_index_ds_works(es_dsl_search, sm_index, sm_config): ds_id = '2000-01-01_00h00m' upload_dt = datetime.now().isoformat(' ') mol_db_id = 0 last_finished = '2017-01-01T00:00:00' def db_sel_side_effect(sql, params): if sql == DATASET_SEL: return [{ 'ds_id': ds_id, 'ds_name': 'ds_name', 'ds_input_path': 'ds_input_path', 'ds_config': 'ds_config', 'ds_meta': {}, 'ds_upload_dt': upload_dt, 'ds_status': 'ds_status', 'ds_last_finished': datetime.strptime(last_finished, '%Y-%m-%dT%H:%M:%S'), 'ds_is_public': True, 'ds_ion_img_storage': 'fs', 'ds_acq_geometry': {} }] elif sql == ANNOTATIONS_SEL: return [{ 'sf': 'H2O', 'sf_adduct': 'H2O+H', 'chaos': 1, 'image_corr': 1, 'pattern_match': 1, 'total_iso_ints': 100, 'min_iso_ints': 0, 'max_iso_ints': 100, 'msm': 1, 'adduct': '+H', 'job_id': 1, 'fdr': 0.1, 'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'polarity': '+' }, { 'sf': 'Au', 'sf_adduct': 'Au+H', 'chaos': 1, 'image_corr': 1, 'pattern_match': 1, 'total_iso_ints': 100, 'min_iso_ints': 0, 'max_iso_ints': 100, 'msm': 1, 'adduct': '+H', 'job_id': 1, 'fdr': 0.05, 'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'polarity': '+' }] else: logging.getLogger('engine').error('Wrong db_sel_side_effect arguments: ', args) db_mock = MagicMock(spec=DB) db_mock.select_with_fields.side_effect = db_sel_side_effect mol_db_mock = MagicMock(MolecularDB) mol_db_mock.id = mol_db_id mol_db_mock.name = 'db_name' mol_db_mock.version = '2017' mol_db_mock.get_molecules.return_value = pd.DataFrame([('H2O', 'mol_id', 'mol_name'), ('Au', 'mol_id', 'mol_name')], columns=['sf', 'mol_id', 'mol_name']) isocalc_mock = MagicMock(IsocalcWrapper) isocalc_mock.ion_centroids = lambda sf, adduct: { ('H2O', '+H'): ([100., 200.], None), ('Au', '+H'): ([10., 20.], None) }[(sf, adduct)] es_exp = ESExporter(db_mock) es_exp.delete_ds(ds_id) es_exp.index_ds(ds_id=ds_id, mol_db=mol_db_mock, isocalc=isocalc_mock) wait_for_es(sec=1) ds_d = es_dsl_search.filter('term', _type='dataset').execute().to_dict()['hits']['hits'][0]['_source'] assert ds_d == { 'ds_last_finished': last_finished, 'ds_config': 'ds_config', 'ds_meta': {}, 'ds_status': 'ds_status', 'ds_name': 'ds_name', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id, 'ds_upload_dt': upload_dt, 'annotation_counts': [{'db': {'name': 'db_name', 'version': '2017'}, 'counts': [{'level': 5, 'n': 1}, {'level': 10, 'n': 2}, {'level': 20, 'n': 2}, {'level': 50, 'n': 2}]}], 'ds_is_public': True, 'ds_acq_geometry': {}, 'ds_ion_img_storage': 'fs' } ann_1_d = es_dsl_search.filter('term', sf='H2O').execute().to_dict()['hits']['hits'][0]['_source'] assert ann_1_d == { 'pattern_match': 1, 'image_corr': 1, 'fdr': 0.1, 'chaos': 1, 'sf': 'H2O', 'min_iso_ints': 0, 'msm': 1, 'sf_adduct': 'H2O+H', 'total_iso_ints': 100, 'centroid_mzs': [100., 200.], 'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'polarity': '+', 'job_id': 1, 'max_iso_ints': 100, 'adduct': '+H', 'ds_name': 'ds_name', 'annotation_counts': [], 'db_version': '2017', 'ds_status': 'ds_status', 'ion_add_pol': '[M+H]+', 'comp_names': ['mol_name'], 'db_name': 'db_name', 'mz': 100., 'ds_meta': {}, 'comp_ids': ['mol_id'], 'ds_config': 'ds_config', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id, 'ds_upload_dt': upload_dt, 'ds_last_finished': last_finished, 'ds_ion_img_storage': 'fs', 'ds_is_public': True } ann_2_d = es_dsl_search.filter('term', sf='Au').execute().to_dict()['hits']['hits'][0]['_source'] assert ann_2_d == { 'pattern_match': 1, 'image_corr': 1, 'fdr': 0.05, 'chaos': 1, 'sf': 'Au', 'min_iso_ints': 0, 'msm': 1, 'sf_adduct': 'Au+H', 'total_iso_ints': 100, 'centroid_mzs': [10., 20.], 'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'polarity': '+', 'job_id': 1, 'max_iso_ints': 100, 'adduct': '+H', 'ds_name': 'ds_name', 'annotation_counts': [], 'db_version': '2017', 'ds_status': 'ds_status', 'ion_add_pol': '[M+H]+', 'comp_names': ['mol_name'], 'db_name': 'db_name', 'mz': 10., 'ds_meta': {}, 'comp_ids': ['mol_id'], 'ds_config': 'ds_config', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id, 'ds_upload_dt': upload_dt, 'ds_last_finished': last_finished, 'ds_ion_img_storage': 'fs', 'ds_is_public': True }
def test_index_ds_works(sm_config, test_db, es, es_dsl_search, sm_index, ds_config, metadata, annotation_stats): ds_id = '2000-01-01_00h00m' upload_dt = datetime.now().isoformat() last_finished = '2017-01-01 00:00:00' iso_image_ids = ['iso_img_id_1', 'iso_img_id_2'] stats = json.dumps(annotation_stats) db = DB() db.insert( "INSERT INTO dataset(id, name, input_path, config, metadata, upload_dt, status, " "status_update_dt, is_public, acq_geometry, ion_thumbnail) " "VALUES (%s, 'ds_name', 'ds_input_path', %s, %s, %s, 'ds_status', %s, true, '{}', %s)", [[ ds_id, json.dumps(ds_config), json.dumps(metadata), upload_dt, upload_dt, 'thumb-id' ]], ) moldb = create_test_molecular_db() (job_id, ) = db.insert_return( "INSERT INTO job(ds_id, moldb_id, status, start, finish) " "VALUES (%s, %s, 'job_status', %s, %s) RETURNING id", rows=[(ds_id, moldb.id, last_finished, last_finished)], ) (user_id, ) = db.insert_return( "INSERT INTO graphql.user (email, name, role) " "VALUES ('email', 'user_name', 'user') RETURNING id", [[]], ) (group_id, ) = db.insert_return( "INSERT INTO graphql.group (name, short_name) VALUES ('group name', 'grp') RETURNING id", [[]], ) db.insert( "INSERT INTO graphql.dataset(id, user_id, group_id) VALUES (%s, %s, %s)", [[ds_id, user_id, group_id]], ) ion_id1, ion_id2 = db.insert_return( "INSERT INTO graphql.ion(ion, formula, chem_mod, neutral_loss, adduct, charge, ion_formula) " "VALUES (%s, %s, %s, %s, %s, %s, %s) RETURNING id", [ ['H2O-H+O-H+H', 'H2O', '-H+O', '-H', '+H', 1, 'HO2'], ['Au+H', 'Au', '', '', '+H', 1, 'HAu'], ], ) db.insert( "INSERT INTO annotation(job_id, formula, chem_mod, neutral_loss, adduct, " "msm, fdr, stats, iso_image_ids, ion_id) " "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", [ [ job_id, 'H2O', '-H+O', '-H', '+H', 1, 0.1, stats, iso_image_ids, ion_id1 ], [ job_id, 'Au', '', '', '+H', 1, 0.05, stats, iso_image_ids, ion_id2 ], ], ) isocalc_mock = MagicMock(IsocalcWrapper) isocalc_mock.centroids = lambda formula: { 'H2O+H': ([100.0, 200.0], None), 'H2O-H+O-H+H': ([100.0, 200.0, 300.0], None), 'Au+H': ([10.0, 20.0], None), }[formula] isocalc_mock.mass_accuracy_bounds = lambda mzs: (mzs, mzs) with patch( 'sm.engine.es_export.molecular_db.fetch_molecules', return_value=pd.DataFrame( [('H2O', 'mol_id', 'mol_name'), ('Au', 'mol_id', 'mol_name')], columns=['formula', 'mol_id', 'mol_name'], ), ): es_exp = ESExporter(db, sm_config) es_exp.delete_ds(ds_id) es_exp.index_ds( ds_id=ds_id, moldb=moldb, isocalc=isocalc_mock, ) wait_for_es(es, sm_config['elasticsearch']['index']) ds_d = (es_dsl_search.filter( 'term', _type='dataset').execute().to_dict()['hits']['hits'][0]['_source']) expected_ds_fields = { 'ds_last_finished': last_finished, 'ds_config': ds_config, 'ds_adducts': ds_config['isotope_generation']['adducts'], 'ds_moldb_ids': ds_config['database_ids'], 'ds_chem_mods': [], 'ds_neutral_losses': [], 'ds_project_ids': [], 'ds_project_names': [], 'ds_meta': metadata, 'ds_status': 'ds_status', 'ds_status_update_dt': upload_dt, 'ds_name': 'ds_name', 'ds_input_path': 'ds_input_path', 'ds_id': ds_id, 'ds_upload_dt': upload_dt, 'ds_is_public': True, 'ds_submitter_email': 'email', 'ds_submitter_id': user_id, 'ds_submitter_name': 'user_name', 'ds_group_approved': False, 'ds_group_id': group_id, 'ds_group_name': 'group name', 'ds_group_short_name': 'grp', } assert ds_d == { **expected_ds_fields, 'ds_acq_geometry': {}, 'annotation_counts': [{ 'db': { 'id': moldb.id, 'name': moldb.name }, 'counts': [ { 'level': 5, 'n': 1 }, { 'level': 10, 'n': 2 }, { 'level': 20, 'n': 2 }, { 'level': 50, 'n': 2 }, ], }], } ann_1_d = (es_dsl_search.filter( 'term', formula='H2O').execute().to_dict()['hits']['hits'][0]['_source']) top_level_stats = { 'pattern_match': annotation_stats['spectral'], 'image_corr': annotation_stats['spatial'], 'chaos': annotation_stats['chaos'], **{ key: value for key, value in annotation_stats.items() if key in NON_METRIC_STATS }, } metrics = { key: value for key, value in annotation_stats.items() if key not in NON_METRIC_STATS } assert ann_1_d == { **expected_ds_fields, **top_level_stats, 'metrics': metrics, 'fdr': 0.1, 'formula': 'H2O', 'msm': 1.0, 'ion': 'H2O-H+O-H+H+', 'ion_formula': 'HO2', 'centroid_mzs': [100.0, 200.0, 300.0], 'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'iso_image_urls': [ f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_1', f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_2', ], 'isobars': [], 'isomer_ions': [], 'polarity': '+', 'job_id': 1, 'adduct': '+H', 'neutral_loss': '-H', 'chem_mod': '-H+O', 'annotation_counts': [], 'comp_names': ['mol_name'], 'comps_count_with_isomers': 1, 'db_id': moldb.id, 'db_name': moldb.name, 'db_version': moldb.version, 'mz': 100.0, 'comp_ids': ['mol_id'], 'annotation_id': 1, 'off_sample_label': None, 'off_sample_prob': None, } ann_2_d = (es_dsl_search.filter( 'term', formula='Au').execute().to_dict()['hits']['hits'][0]['_source']) assert ann_2_d == { **expected_ds_fields, **top_level_stats, 'metrics': metrics, 'fdr': 0.05, 'formula': 'Au', 'msm': 1.0, 'ion': 'Au+H+', 'ion_formula': 'HAu', 'centroid_mzs': [10.0, 20.0], 'iso_image_ids': ['iso_img_id_1', 'iso_img_id_2'], 'iso_image_urls': [ f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_1', f'http://localhost:9000/{sm_config["image_storage"]["bucket"]}/iso/{ds_id}/iso_img_id_2', ], 'isobars': [], 'isomer_ions': [], 'polarity': '+', 'job_id': 1, 'adduct': '+H', 'neutral_loss': '', 'chem_mod': '', 'annotation_counts': [], 'comp_names': ['mol_name'], 'comps_count_with_isomers': 1, 'db_id': moldb.id, 'db_name': moldb.name, 'db_version': moldb.version, 'mz': 10.0, 'comp_ids': ['mol_id'], 'annotation_id': 2, 'off_sample_label': None, 'off_sample_prob': None, }
def test_delete_ds__completely(sm_config, test_db, es, sm_index): moldb = MolecularDB(0, 'HMDB', '2016') moldb2 = MolecularDB(1, 'ChEBI', '2016') index = sm_config['elasticsearch']['index'] es.create( index=index, doc_type='annotation', id='id1', body={ 'ds_id': 'dataset1', 'db_id': moldb.id, 'db_name': moldb.name, 'db_version': moldb.version, }, ) es.create( index=index, doc_type='annotation', id='id2', body={ 'ds_id': 'dataset1', 'db_id': moldb2.id, 'db_name': moldb2.name, 'db_version': moldb2.version, }, ) es.create( index=index, doc_type='annotation', id='id3', body={ 'ds_id': 'dataset2', 'db_id': moldb.id, 'db_name': moldb.name, 'db_version': moldb.version, }, ) es.create( index=index, doc_type='dataset', id='dataset1', body={ 'ds_id': 'dataset1', 'db_id': moldb.id, 'db_name': moldb.name, 'db_version': moldb.version, }, ) wait_for_es(es, index) db_mock = MagicMock(spec=DB) es_exporter = ESExporter(db_mock, sm_config) es_exporter.delete_ds(ds_id='dataset1') wait_for_es(es, index) body = {'query': {'bool': {'filter': []}}} body['query']['bool']['filter'] = [ { 'term': { 'ds_id': 'dataset1' } }, { 'term': { 'db_id': moldb.id } }, ] assert es.count(index=index, doc_type='annotation', body=body)['count'] == 0 body['query']['bool']['filter'] = [ { 'term': { 'ds_id': 'dataset1' } }, { 'term': { 'db_id': moldb2.id } }, ] assert es.count(index=index, doc_type='annotation', body=body)['count'] == 0 body['query']['bool']['filter'] = [ { 'term': { 'ds_id': 'dataset2' } }, { 'term': { 'db_id': moldb.id } }, ] assert es.count(index=index, doc_type='annotation', body=body)['count'] == 1 body['query']['bool']['filter'] = [ { 'term': { 'ds_id': 'dataset1' } }, { 'term': { '_type': 'dataset' } }, ] assert es.count(index=index, doc_type='dataset', body=body)['count'] == 0