def test_search_job_imzml_example_es_export_fails(get_compute_img_metrics_mock, filter_sf_metrics_mock, post_images_to_annot_service_mock, MolDBServiceWrapperMock, MolDBServiceWrapperMock2, sm_config, create_fill_sm_database, es_dsl_search, clean_isotope_storage): init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock) init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock2) get_compute_img_metrics_mock.return_value = lambda *args: (0.9, 0.9, 0.9, [100.], [0], [10.]) filter_sf_metrics_mock.side_effect = lambda x: x url_dict = { 'iso_image_ids': ['iso_image_1', None, None, None] } post_images_to_annot_service_mock.return_value = { 35: url_dict, 44: url_dict } db = DB(sm_config['db']) def throw_exception_function(*args): raise Exception('Test') try: ds_id = '2000-01-01_00h00m' upload_dt = datetime.now() ds_config_str = open(ds_config_path).read() db.insert(Dataset.DS_INSERT, [{ 'id': ds_id, 'name': test_ds_name, 'input_path': input_dir_path, 'upload_dt': upload_dt, 'metadata': '{}', 'config': ds_config_str, 'status': DatasetStatus.QUEUED, 'is_public': True, 'mol_dbs': ['HMDB-v4'], 'adducts': ['+H', '+Na', '+K'], 'ion_img_storage': 'fs' }]) with patch('sm.engine.search_job.ESExporter.index_ds') as index_ds_mock: index_ds_mock.side_effect = throw_exception_function img_store = ImageStoreServiceWrapper(sm_config['services']['img_service_url']) job = SearchJob(img_store=img_store) ds = Dataset.load(db, ds_id) job.run(ds) except ESExportFailedError as e: assert e # dataset table asserts row = db.select_one('SELECT status from dataset') assert row[0] == 'FAILED' else: raise AssertionError('ESExportFailedError should be raised') finally: db.close() with warn_only(): local('rm -rf {}'.format(data_dir_path))
def test_classify_ion_images_preds_saved(call_api_mock, image_storage_mock, fill_db): call_api_mock.return_value = { 'predictions': [{'prob': 0.1, 'label': 'on'}, {'prob': 0.9, 'label': 'off'}] } fp = io.BytesIO() Image.new('RGBA', (10, 10)).save(fp, format='PNG') fp.seek(0) img_bytes = fp.read() image_storage_mock.get_image.return_value = img_bytes db = DB() ds_id = '2000-01-01' ds = Dataset.load(db, ds_id) services_config = defaultdict(str) classify_dataset_ion_images(db, ds, services_config) annotations = db.select_with_fields( ( 'select off_sample ' 'from dataset d ' 'join job j on j.ds_id = d.id ' 'join annotation m on m.job_id = j.id ' 'where d.id = %s ' 'order by m.id ' ), params=(ds_id,), ) exp_annotations = [ {'off_sample': {'prob': 0.1, 'label': 'on'}}, {'off_sample': {'prob': 0.9, 'label': 'off'}}, ] assert annotations == exp_annotations
def run_off_sample(sm_config, ds_ids_str, sql_where, fix_missing, overwrite_existing): db = DB() ds_ids = None if ds_ids_str: ds_ids = ds_ids_str.split(',') elif sql_where: ds_ids = [ id for (id,) in db.select(f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}') ] elif fix_missing: logger.info('Checking for missing off-sample jobs...') results = db.select(MISSING_OFF_SAMPLE_SEL) ds_ids = [ds_id for ds_id, in results] logger.info(f'Found {len(ds_ids)} missing off-sample sets') if not ds_ids: logger.warning('No datasets match filter') return es_exp = ESExporter(db, sm_config) for i, ds_id in enumerate(ds_ids): try: logger.info(f'Running off-sample on {i+1} out of {len(ds_ids)}') ds = Dataset.load(db, ds_id) classify_dataset_ion_images(db, ds, sm_config['services'], overwrite_existing) es_exp.reindex_ds(ds_id) except Exception: logger.error(f'Failed to run off-sample on {ds_id}', exc_info=True)
def run(sm_config, ds_id_str, sql_where, algorithm, use_lithops): db = DB() if sql_where: ds_ids = [ id for (id, ) in db.select( f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}') ] else: ds_ids = ds_id_str.split(',') if not ds_ids: logger.warning('No datasets match filter') return if use_lithops: executor = Executor(sm_config['lithops']) for i, ds_id in enumerate(ds_ids): try: logger.info( f'[{i+1} / {len(ds_ids)}] Generating ion thumbnail for {ds_id}' ) ds = Dataset.load(db, ds_id) if use_lithops: # noinspection PyUnboundLocalVariable generate_ion_thumbnail_lithops(executor, db, ds, algorithm=algorithm) else: generate_ion_thumbnail(db, ds, algorithm=algorithm) except Exception: logger.error(f'Failed on {ds_id}', exc_info=True)
def _on_success(self, msg): ds = Dataset.load(self._db, msg['ds_id']) ds.set_status(self._db, self._manager.es, self._manager.status_queue, DatasetStatus.FINISHED) self.logger.info(f" SM update daemon: success") self._post_to_slack(msg)
def _on_failure(self, msg): ds = Dataset.load(self._db, msg['ds_id']) ds.set_status(self._db, self._manager.es, self._manager.status_queue, DatasetStatus.FAILED) self.logger.error(f" SM update daemon: failure", exc_info=True) self._post_to_slack(msg)
def test_dataset_update_status_works(fill_db, sm_config, ds_config): db = DB(sm_config['db']) es_mock = MagicMock(spec=ESExporter) status_queue_mock = MagicMock(spec=QueuePublisher) upload_dt = datetime.now() ds_id = '2000-01-01' ds = Dataset(ds_id, 'ds_name', 'input_path', upload_dt, {}, ds_config, DatasetStatus.INDEXING, mol_dbs=['HMDB'], adducts=['+H']) ds.set_status(db, es_mock, status_queue_mock, DatasetStatus.FINISHED) assert DatasetStatus.FINISHED == Dataset.load(db, ds_id).status status_queue_mock.publish.assert_called_once_with({ 'ds_id': ds_id, 'status': DatasetStatus.FINISHED })
def delete(self, ds_id, **kwargs): """ Send delete message to the queue """ ds = Dataset.load(self._db, ds_id) self._set_ds_busy(ds, kwargs.get('force', False)) self._post_sm_msg(ds=ds, queue=self._update_queue, action=DaemonAction.DELETE, **kwargs)
def test_sm_daemons_annot_fails(get_compute_img_metrics_mock, filter_sf_metrics_mock, post_images_to_annot_service_mock, MolDBServiceWrapperMock, sm_config, test_db, es_dsl_search, clean_isotope_storage): init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock) def throw_exception_function(*args): raise Exception('Test') get_compute_img_metrics_mock.return_value = throw_exception_function filter_sf_metrics_mock.side_effect = lambda x: x url_dict = { 'iso_image_ids': ['iso_image_1', None, None, None] } post_images_to_annot_service_mock.return_value = { 35: url_dict, 44: url_dict } db = DB(sm_config['db']) es = ESExporter(db) annotate_daemon = None try: ds_id = '2000-01-01_00h00m' upload_dt = datetime.now() ds_config_str = open(ds_config_path).read() db.insert(Dataset.DS_INSERT, [{ 'id': ds_id, 'name': test_ds_name, 'input_path': input_dir_path, 'upload_dt': upload_dt, 'metadata': '{}', 'config': ds_config_str, 'status': DatasetStatus.QUEUED, 'is_public': True, 'mol_dbs': ['HMDB-v4'], 'adducts': ['+H', '+Na', '+K'], 'ion_img_storage': 'fs' }]) ds = Dataset.load(db, ds_id) queue_pub.publish({'ds_id': ds.id, 'ds_name': ds.name, 'action': 'annotate'}) run_daemons(db, es) # dataset and job tables asserts row = db.select_one('SELECT status from dataset') assert row[0] == 'FAILED' row = db.select_one('SELECT status from job') assert row[0] == 'FAILED' finally: db.close() if annotate_daemon: annotate_daemon.stop() with warn_only(): local('rm -rf {}'.format(data_dir_path))
def test_dataset_save_overwrite_ds_works(fill_db, metadata, ds_config): db = DB() es_mock = MagicMock(spec=ESExporter) ds = create_test_ds() ds.save(db, es_mock) assert ds == Dataset.load(db, ds.id) es_mock.sync_dataset.assert_called_once_with(ds.id)
def test_dataset_update_status_works(fill_db, metadata, ds_config): db = DB() es_mock = MagicMock(spec=ESExporter) ds = create_test_ds(status=DatasetStatus.ANNOTATING) ds.set_status(db, es_mock, DatasetStatus.FINISHED) assert DatasetStatus.FINISHED == Dataset.load(db, ds.id).status
def _callback(self, msg): ds = Dataset.load(self._db, msg['ds_id']) ds.set_status(self._db, self._manager.es, self._manager.status_queue, DatasetStatus.INDEXING) self.logger.info(f' SM update daemon received a message: {msg}') self._manager.post_to_slack( 'new', f" [v] New {msg['action']} message: {json.dumps(msg)}") if msg['action'] == 'update': self._manager.index(ds=ds) elif msg['action'] == 'delete': self._manager.delete(ds=ds) else: raise Exception(f"Wrong action: {msg['action']}")
def add_optical_image(db, ds_id, url, transform, zoom_levels=(1, 2, 4, 8)): """Add optical image to dataset. Generates scaled and transformed versions of the provided optical image + creates the thumbnail """ ds = Dataset.load(db, ds_id) logger.info(f'Adding optical image {url} to "{ds.id}" dataset') dims = _annotation_image_shape(db, ds) resp = requests.get(url) optical_img = Image.open(io.BytesIO(resp.content)) raw_optical_img_id = url.split('/')[-1] _add_raw_optical_image(db, ds, raw_optical_img_id, transform) _add_zoom_optical_images(db, ds, dims, optical_img, transform, zoom_levels) _add_thumbnail_optical_image(db, ds, dims, optical_img, transform)
def update(self, ds_id, doc, async_es_update, **kwargs): """ Save dataset and send update message to the queue """ ds = Dataset.load(self._db, ds_id) ds.name = doc.get('name', ds.name) ds.input_path = doc.get('input_path', ds.input_path) if 'metadata' in doc: ds.metadata = doc['metadata'] ds.upload_dt = doc.get('upload_dt', ds.upload_dt) ds.is_public = doc.get('is_public', ds.is_public) ds.save(self._db, None if async_es_update else self._es) self._post_sm_msg( ds=ds, queue=self._update_queue, action=DaemonAction.UPDATE, fields=list(doc.keys()), **kwargs, )
def test_dataset_load_existing_ds_works(fill_db, sm_config, ds_config): db = DB(sm_config['db']) upload_dt = datetime.strptime('2000-01-01 00:00:00', "%Y-%m-%d %H:%M:%S") ds_id = '2000-01-01' metadata = {"meta": "data"} ds = Dataset.load(db, ds_id) assert ds.__dict__ == dict(id=ds_id, name='ds_name', input_path='input_path', upload_dt=upload_dt, metadata=metadata, config=ds_config, status=DatasetStatus.FINISHED, is_public=True, mol_dbs=['HMDB-v4'], adducts=['+H', '+Na', '+K'], ion_img_storage_type='fs')
def _callback(self, msg): ds = Dataset.load(self._db, msg['ds_id']) ds.set_status(self._db, self._manager.es, self._manager.status_queue, DatasetStatus.ANNOTATING) self.logger.info(f" SM annotate daemon received a message: {msg}") self._manager.post_to_slack( 'new', " [v] New annotation message: {}".format(json.dumps(msg))) self._manager.annotate(ds=ds, search_job_factory=SearchJob, del_first=msg.get('del_first', False)) upd_msg = { 'ds_id': msg['ds_id'], 'ds_name': msg['ds_name'], 'action': 'update' } self._upd_queue_pub.publish(msg=upd_msg, priority=2)
def del_optical_image(db, ds_id): """Delete raw and zoomed optical images from DB and FS.""" ds = Dataset.load(db, ds_id) logger.info(f'Deleting optical image of "{ds.id}" dataset') (raw_img_id, ) = db.select_one(SEL_DATASET_RAW_OPTICAL_IMAGE, params=(ds.id, )) if raw_img_id: image_storage.delete_image(image_storage.OPTICAL, ds_id, raw_img_id) for img_id in db.select_onecol(SEL_OPTICAL_IMAGE, params=(ds.id, )): image_storage.delete_image(image_storage.OPTICAL, ds_id, img_id) (thumbnail_img_id, ) = db.select_one(SEL_OPTICAL_IMAGE_THUMBNAIL, params=(ds.id, )) if thumbnail_img_id: image_storage.delete_image(image_storage.OPTICAL, ds_id, thumbnail_img_id) db.alter(DEL_DATASET_RAW_OPTICAL_IMAGE, params=(ds.id, )) db.alter(DEL_OPTICAL_IMAGE, params=(ds.id, )) db.alter(UPD_DATASET_THUMB_OPTICAL_IMAGE, params=(None, None, ds.id))
def test_dataset_load_existing_ds_works(fill_db, metadata, ds_config): db = DB() upload_dt = datetime.strptime('2000-01-01 00:00:00', '%Y-%m-%d %H:%M:%S') ds_id = '2000-01-01' ds = Dataset.load(db, ds_id) assert ds.metadata == metadata ds_fields = {k: v for k, v in ds.__dict__.items() if not k.startswith('_')} assert ds_fields == dict( id=ds_id, name='ds_name', input_path='input_path', upload_dt=upload_dt, metadata=metadata, config=ds_config, status=DatasetStatus.FINISHED, status_update_dt=upload_dt, is_public=True, )
def run(ds_id, sql_where): conf = SMConfig.get_conf() db = DB(conf['db']) img_store = ImageStoreServiceWrapper(conf['services']['img_service_url']) if sql_where: ds_ids = [ id for (id,) in db.select(f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}') ] else: ds_ids = ds_id.split(',') if not ds_ids: logger.warning('No datasets match filter') return for i, ds_id in enumerate(ds_ids): try: logger.info(f'[{i+1} / {len(ds_ids)}] Updating acq geometry for {ds_id}') ds = Dataset.load(db, ds_id) (sample_img_id,) = db.select_one( "SELECT iim.iso_image_ids[1] from job j " "JOIN iso_image_metrics iim on j.id = iim.job_id " "WHERE j.ds_id = %s LIMIT 1", [ds_id], ) print(sample_img_id) if sample_img_id: w, h = img_store.get_image_by_id('fs', 'iso_image', sample_img_id).size dims = (h, w) # n_cols, n_rows else: dims = (None, None) acq_geometry = make_acq_geometry('ims', None, ds.metadata, dims) ds.save_acq_geometry(db, acq_geometry) except Exception: logger.error(f'Failed on {ds_id}', exc_info=True)
def add(self, doc, use_lithops, **kwargs): """Save dataset and send ANNOTATE message to the queue.""" now = datetime.now() if 'id' not in doc: doc['id'] = now.strftime('%Y-%m-%d_%Hh%Mm%Ss') ds_config_kwargs = dict( (k, v) for k, v in doc.items() if k in FLAT_DS_CONFIG_KEYS) try: ds = Dataset.load(self._db, doc['id']) self._set_ds_busy(ds, kwargs.get('force', False)) config = update_ds_config(ds.config, doc['metadata'], **ds_config_kwargs) except UnknownDSID: config = generate_ds_config(doc.get('metadata'), **ds_config_kwargs) ds = Dataset( id=doc['id'], name=doc.get('name'), input_path=doc.get('input_path'), upload_dt=doc.get('upload_dt', now.isoformat()), metadata=doc.get('metadata'), config=config, is_public=doc.get('is_public'), status=DatasetStatus.QUEUED, ) ds.save(self._db, self._es, allow_insert=True) self._status_queue.publish({ 'ds_id': ds.id, 'action': DaemonAction.ANNOTATE, 'stage': DaemonActionStage.QUEUED }) queue = self._lit_queue if use_lithops else self._annot_queue self._post_sm_msg(ds=ds, queue=queue, action=DaemonAction.ANNOTATE, **kwargs) return doc['id']
def _on_success(self, msg): ds = Dataset.load(self._db, msg['ds_id']) ds.set_status(self._db, self._manager.es, self._manager.status_queue, DatasetStatus.FINISHED) self.logger.info(f" SM annotate daemon: success") ds_name, _ = self._manager.fetch_ds_metadata(msg['ds_id']) msg['web_app_link'] = self._manager.create_web_app_link(msg) self._manager.post_to_slack( 'dart', ' [v] Annotation succeeded: {}'.format(json.dumps(msg))) if msg.get('email'): email_body = ( 'Dear METASPACE user,\n\n' 'Thank you for uploading the "{}" dataset to the METASPACE annotation service. ' 'We are pleased to inform you that the dataset has been processed and is available at {}.\n\n' 'Best regards,\n' 'METASPACE Team').format(ds_name, msg['web_app_link']) self._send_email(msg['email'], 'METASPACE service notification (SUCCESS)', email_body)
def _on_failure(self, msg): ds = Dataset.load(self._db, msg['ds_id']) ds.set_status(self._db, self._manager.es, self._manager.status_queue, DatasetStatus.FAILED) self.logger.error(f" SM annotate daemon: failure", exc_info=True) ds_name, _ = self._manager.fetch_ds_metadata(msg['ds_id']) msg['web_app_link'] = self._manager.create_web_app_link(msg) self._manager.post_to_slack( 'hankey', ' [x] Annotation failed: {}'.format(json.dumps(msg))) if msg.get('email'): email_body = ( 'Dear METASPACE user,\n\n' 'We are sorry to inform you that there was a problem during processing of the "{}" dataset ' 'and it could not be annotated. ' 'If this is unexpected, please do not hesitate to contact us for support at [email protected]\n\n' 'Best regards,\n' 'METASPACE Team').format(ds_name) self._send_email(msg['email'], 'METASPACE service notification (FAILED)', email_body)
def test_dataset_save_overwrite_ds_works(fill_db, sm_config, ds_config): db = DB(sm_config['db']) es_mock = MagicMock(spec=ESExporter) status_queue_mock = MagicMock(spec=QueuePublisher) upload_dt = datetime.now() ds_id = '2000-01-01' ds = Dataset(ds_id, 'ds_name', 'input_path', upload_dt, {}, ds_config, mol_dbs=['HMDB'], adducts=['+H']) ds.save(db, es_mock, status_queue_mock) assert ds == Dataset.load(db, ds_id) es_mock.sync_dataset.assert_called_once_with(ds_id) status_queue_mock.publish.assert_called_with({ 'ds_id': ds_id, 'status': DatasetStatus.NEW })
def _func(ds_id): try: params = _json_params(req) logger.info('Received %s request: %s', request_name, params) db = _create_db_conn() ds = Dataset.load(db=db, ds_id=ds_id) ds_man = _create_dataset_manager(db) handler(ds_man, ds, params) db.close() return {'status': OK['status'], 'ds_id': ds_id} except UnknownDSID as e: logger.warning(e.message) resp.status = ERR_DS_NOT_EXIST['status_code'] return {'status': ERR_DS_NOT_EXIST['status'], 'ds_id': e.ds_id} except DSIsBusy as e: logger.warning(e.message) resp.status = ERR_DS_BUSY['status_code'] return {'status': ERR_DS_BUSY['status'], 'ds_id': e.ds_id} except Exception as e: logger.error(e, exc_info=True) resp.status = ERROR['status_code'] return {'status': ERROR['status'], 'ds_id': ds_id}
def test_search_job_imzml_example(get_compute_img_metrics_mock, filter_sf_metrics_mock, post_images_to_annot_service_mock, MolDBServiceWrapperMock, MolDBServiceWrapperMock2, sm_config, create_fill_sm_database, es_dsl_search, clean_isotope_storage): init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock) init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock2) get_compute_img_metrics_mock.return_value = lambda *args: (0.9, 0.9, 0.9, [100.], [0], [10.]) filter_sf_metrics_mock.side_effect = lambda x: x url_dict = { 'iso_image_ids': ['iso_image_1', None, None, None] } post_images_to_annot_service_mock.return_value = { 35: url_dict, 44: url_dict } db = DB(sm_config['db']) try: ds_config_str = open(ds_config_path).read() upload_dt = datetime.now() ds_id = '2000-01-01_00h00m' db.insert(Dataset.DS_INSERT, [{ 'id': ds_id, 'name': test_ds_name, 'input_path': input_dir_path, 'upload_dt': upload_dt, 'metadata': '{}', 'config': ds_config_str, 'status': DatasetStatus.QUEUED, 'is_public': True, 'mol_dbs': ['HMDB-v4'], 'adducts': ['+H', '+Na', '+K'], 'ion_img_storage': 'fs' }]) img_store = ImageStoreServiceWrapper(sm_config['services']['img_service_url']) job = SearchJob(img_store=img_store) job._sm_config['rabbitmq'] = {} # avoid talking to RabbitMQ during the test ds = Dataset.load(db, ds_id) job.run(ds) # dataset table asserts rows = db.select('SELECT id, name, input_path, upload_dt, status from dataset') input_path = join(dirname(__file__), 'data', test_ds_name) assert len(rows) == 1 assert rows[0] == (ds_id, test_ds_name, input_path, upload_dt, DatasetStatus.FINISHED) # ms acquisition geometry asserts rows = db.select('SELECT acq_geometry from dataset') assert len(rows) == 1 assert rows[0][0] == ds.get_acq_geometry(db) assert rows[0][0] == { ACQ_GEOMETRY_KEYS.LENGTH_UNIT: 'nm', ACQ_GEOMETRY_KEYS.AcqGridSection.section_name: { ACQ_GEOMETRY_KEYS.AcqGridSection.REGULAR_GRID: True, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_COUNT_X : 3, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_COUNT_Y : 3, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_SPACING_X : 100, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_SPACING_Y : 100 }, ACQ_GEOMETRY_KEYS.PixelSizeSection.section_name: { ACQ_GEOMETRY_KEYS.PixelSizeSection.REGULAR_SIZE: True, ACQ_GEOMETRY_KEYS.PixelSizeSection.PIXEL_SIZE_X : 100, ACQ_GEOMETRY_KEYS.PixelSizeSection.PIXEL_SIZE_Y : 100 } } # job table asserts rows = db.select('SELECT db_id, ds_id, status, start, finish from job') assert len(rows) == 1 db_id, ds_id, status, start, finish = rows[0] assert (db_id, ds_id, status) == (0, '2000-01-01_00h00m', 'FINISHED') assert start < finish # image metrics asserts rows = db.select(('SELECT db_id, sf, adduct, stats, iso_image_ids ' 'FROM iso_image_metrics ' 'ORDER BY sf, adduct')) assert rows[0] == (0, 'C12H24O', '+K', {'chaos': 0.9, 'spatial': 0.9, 'spectral': 0.9, 'total_iso_ints': [100.], 'min_iso_ints': [0], 'max_iso_ints': [10.]}, ['iso_image_1', None, None, None]) assert rows[1] == (0, 'C12H24O', '+Na', {'chaos': 0.9, 'spatial': 0.9, 'spectral': 0.9, 'total_iso_ints': [100.], 'min_iso_ints': [0], 'max_iso_ints': [10.]}, ['iso_image_1', None, None, None]) time.sleep(1) # Waiting for ES # ES asserts ds_docs = es_dsl_search.query('term', _type='dataset').execute().to_dict()['hits']['hits'] assert 1 == len(ds_docs) ann_docs = es_dsl_search.query('term', _type='annotation').execute().to_dict()['hits']['hits'] assert len(ann_docs) == len(rows) for doc in ann_docs: assert doc['_id'].startswith(ds_id) finally: db.close() with warn_only(): local('rm -rf {}'.format(data_dir_path))
def run_coloc_jobs( sm_config, ds_id_str, sql_where, fix_missing, fix_corrupt, skip_existing, use_lithops ): assert ( len( [ data_source for data_source in [ds_id_str, sql_where, fix_missing, fix_corrupt] if data_source ] ) == 1 ), "Exactly one data source (ds_id, sql_where, fix_missing, fix_corrupt) must be specified" assert not (ds_id_str and sql_where) db = DB() if ds_id_str: ds_ids = ds_id_str.split(',') elif sql_where: ds_ids = [ id for (id,) in db.select(f'SELECT DISTINCT dataset.id FROM dataset WHERE {sql_where}') ] else: mol_dbs = [ (doc['id'], doc['name']) for doc in db.select_with_fields('SELECT id, name FROM molecular_db m') ] mol_db_ids, mol_db_names = map(list, zip(*mol_dbs)) fdrs = [0.05, 0.1, 0.2, 0.5] algorithms = ['median_thresholded_cosine', 'cosine'] if fix_missing: logger.info('Checking for missing colocalization jobs...') results = db.select( MISSING_COLOC_JOBS_SEL, [list(mol_db_ids), list(mol_db_names), fdrs, algorithms] ) ds_ids = [ds_id for ds_id, in results] logger.info(f'Found {len(ds_ids)} missing colocalization sets') else: logger.info( 'Checking all colocalization jobs. ' 'This is super slow: ~5 minutes per 1000 datasets...' ) results = db.select( CORRUPT_COLOC_JOBS_SEL, [list(mol_db_ids), list(mol_db_names), fdrs, algorithms] ) ds_ids = [ds_id for ds_id, in results] logger.info(f'Found {len(ds_ids)} corrupt colocalization sets') if not ds_ids: logger.warning('No datasets match filter') return if use_lithops: executor = Executor(sm_config['lithops']) for i, ds_id in enumerate(ds_ids): try: logger.info(f'Running colocalization on {i+1} out of {len(ds_ids)}') ds = Dataset.load(db, ds_id) coloc = Colocalization(db) if use_lithops: # noinspection PyUnboundLocalVariable coloc.run_coloc_job_lithops(executor, ds, reprocess=not skip_existing) else: coloc.run_coloc_job(ds, reprocess=not skip_existing) except Exception: logger.error(f'Failed to run colocalization on {ds_id}', exc_info=True)
def load_ds(self, ds_id): return Dataset.load(self._db, ds_id)
def test_sm_daemons(get_compute_img_metrics_mock, filter_sf_metrics_mock, post_images_to_annot_service_mock, MolDBServiceWrapperMock, sm_config, test_db, es_dsl_search, clean_isotope_storage): init_mol_db_service_wrapper_mock(MolDBServiceWrapperMock) get_compute_img_metrics_mock.return_value = lambda *args: (0.9, 0.9, 0.9, [100.], [0], [10.]) filter_sf_metrics_mock.side_effect = lambda x: x url_dict = { 'iso_image_ids': ['iso_image_1', None, None, None] } post_images_to_annot_service_mock.return_value = { 35: url_dict, 44: url_dict } db = DB(sm_config['db']) es = ESExporter(db) annotate_daemon = None update_daemon = None try: ds_config_str = open(ds_config_path).read() upload_dt = datetime.now() ds_id = '2000-01-01_00h00m' db.insert(Dataset.DS_INSERT, [{ 'id': ds_id, 'name': test_ds_name, 'input_path': input_dir_path, 'upload_dt': upload_dt, 'metadata': '{"Data_Type": "Imaging MS"}', 'config': ds_config_str, 'status': DatasetStatus.QUEUED, 'is_public': True, 'mol_dbs': ['HMDB-v4'], 'adducts': ['+H', '+Na', '+K'], 'ion_img_storage': 'fs' }]) ds = Dataset.load(db, ds_id) queue_pub.publish({'ds_id': ds.id, 'ds_name': ds.name, 'action': 'annotate'}) run_daemons(db, es) # dataset table asserts rows = db.select('SELECT id, name, input_path, upload_dt, status from dataset') input_path = join(dirname(__file__), 'data', test_ds_name) assert len(rows) == 1 assert rows[0] == (ds_id, test_ds_name, input_path, upload_dt, DatasetStatus.FINISHED) # ms acquisition geometry asserts rows = db.select('SELECT acq_geometry from dataset') assert len(rows) == 1 assert rows[0][0] == ds.get_acq_geometry(db) assert rows[0][0] == { ACQ_GEOMETRY_KEYS.LENGTH_UNIT: 'nm', ACQ_GEOMETRY_KEYS.AcqGridSection.section_name: { ACQ_GEOMETRY_KEYS.AcqGridSection.REGULAR_GRID: True, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_COUNT_X : 3, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_COUNT_Y : 3, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_SPACING_X : 100, ACQ_GEOMETRY_KEYS.AcqGridSection.PIXEL_SPACING_Y : 100 }, ACQ_GEOMETRY_KEYS.PixelSizeSection.section_name: { ACQ_GEOMETRY_KEYS.PixelSizeSection.REGULAR_SIZE: True, ACQ_GEOMETRY_KEYS.PixelSizeSection.PIXEL_SIZE_X : 100, ACQ_GEOMETRY_KEYS.PixelSizeSection.PIXEL_SIZE_Y : 100 } } # job table asserts rows = db.select('SELECT db_id, ds_id, status, start, finish from job') assert len(rows) == 1 db_id, ds_id, status, start, finish = rows[0] assert (db_id, ds_id, status) == (0, '2000-01-01_00h00m', JobStatus.FINISHED) assert start < finish # image metrics asserts rows = db.select(('SELECT db_id, sf, adduct, stats, iso_image_ids ' 'FROM iso_image_metrics ' 'ORDER BY sf, adduct')) assert rows[0] == (0, 'C12H24O', '+K', {'chaos': 0.9, 'spatial': 0.9, 'spectral': 0.9, 'total_iso_ints': [100.], 'min_iso_ints': [0], 'max_iso_ints': [10.]}, ['iso_image_1', None, None, None]) assert rows[1] == (0, 'C12H24O', '+Na', {'chaos': 0.9, 'spatial': 0.9, 'spectral': 0.9, 'total_iso_ints': [100.], 'min_iso_ints': [0], 'max_iso_ints': [10.]}, ['iso_image_1', None, None, None]) time.sleep(1) # Waiting for ES # ES asserts ds_docs = es_dsl_search.query('term', _type='dataset').execute().to_dict()['hits']['hits'] assert 1 == len(ds_docs) ann_docs = es_dsl_search.query('term', _type='annotation').execute().to_dict()['hits']['hits'] assert len(ann_docs) == len(rows) for doc in ann_docs: assert doc['_id'].startswith(ds_id) finally: db.close() if annotate_daemon: annotate_daemon.stop() if update_daemon: update_daemon.stop() with warn_only(): local('rm -rf {}'.format(data_dir_path))