def create_ds_man(sm_config, db=None, es=None, img_store=None, action_queue=None, status_queue=None, sm_api=False): db = db or DB(sm_config['db']) es_mock = es or MagicMock(spec=ESExporter) action_queue_mock = action_queue or MagicMock(QueuePublisher) status_queue_mock = status_queue or MagicMock(QueuePublisher) img_store_mock = img_store or MagicMock(spec=ImageStoreServiceWrapper) if sm_api: return SMapiDatasetManager(db=db, es=es_mock, mode='queue', image_store=img_store_mock, action_queue=action_queue_mock, status_queue=status_queue_mock) else: return SMDaemonDatasetManager(db=db, es=es_mock, img_store=img_store_mock, mode=None, status_queue=status_queue_mock)
def test_add_ds(self, test_db, sm_config, ds_config): action_queue_mock = MagicMock(spec=QueuePublisher) es_mock = MagicMock(spec=ESExporter) db = DB(sm_config['db']) try: ds_man = create_ds_man(sm_config, db=db, es=es_mock, action_queue=action_queue_mock, sm_api=False) ds_id = '2000-01-01' ds_name = 'ds_name' input_path = 'input_path' upload_dt = datetime.now() metadata = {} ds = create_ds(ds_id=ds_id, ds_name=ds_name, input_path=input_path, upload_dt=upload_dt, metadata=metadata, ds_config=ds_config) ds_man.add(ds, search_job_factory=self.SearchJob) DS_SEL = 'select name, input_path, upload_dt, metadata, config from dataset where id=%s' assert db.select_one(DS_SEL, params=(ds_id, )) == (ds_name, input_path, upload_dt, metadata, ds_config) finally: db.close()
def test_add_optical_image(self, fill_db, sm_config, ds_config): db = DB(sm_config['db']) action_queue_mock = MagicMock(spec=QueuePublisher) es_mock = MagicMock(spec=ESExporter) img_store_mock = MagicMock(ImageStoreServiceWrapper) img_store_mock.post_image.side_effect = [ 'opt_img_id1', 'opt_img_id2', 'opt_img_id3', 'thumbnail_id' ] img_store_mock.get_image_by_id.return_value = Image.new( 'RGB', (100, 100)) ds_man = create_ds_man(sm_config=sm_config, db=db, es=es_mock, img_store=img_store_mock, action_queue=action_queue_mock, sm_api=True) ds_man._annotation_image_shape = MagicMock(return_value=(100, 100)) ds_id = '2000-01-01' ds = create_ds(ds_id=ds_id, ds_config=ds_config) zoom_levels = [1, 2, 3] raw_img_id = 'raw_opt_img_id' ds_man.add_optical_image(ds, raw_img_id, [[1, 0, 0], [0, 1, 0], [0, 0, 1]], zoom_levels=zoom_levels) assert db.select('SELECT * FROM optical_image') == [ ('opt_img_id{}'.format(i + 1), ds.id, zoom) for i, zoom in enumerate(zoom_levels) ] assert db.select('SELECT optical_image FROM dataset where id = %s', params=(ds_id, )) == [(raw_img_id, )] assert db.select('SELECT thumbnail FROM dataset where id = %s', params=(ds_id, )) == [('thumbnail_id', )]
def fill_db(test_db, sm_config, ds_config): upload_dt = '2000-01-01 00:00:00' ds_id = '2000-01-01' meta = {'Data_Type': 'Imaging MS'} db = DB(sm_config['db']) db.insert('INSERT INTO dataset values(%s, %s, %s, %s, %s, %s, %s)', rows=[(ds_id, 'ds_name', 'input_path', upload_dt, json.dumps(meta), json.dumps(ds_config), DatasetStatus.FINISHED)])
def test_dataset_load_existing_ds_works(fill_db, sm_config, ds_config): db = DB(sm_config['db']) upload_dt = datetime.strptime('2000-01-01 00:00:00', "%Y-%m-%d %H:%M:%S") ds_id = '2000-01-01' meta = {"meta": "data"} ds = Dataset.load(db, ds_id) assert (ds.id == ds_id and ds.name == 'ds_name' and ds.input_path == 'input_path' and ds.upload_dt == upload_dt and ds.meta == meta and ds.config == ds_config and ds.status == DatasetStatus.FINISHED)
def create_api_ds_man(db=None, es=None, img_store=None, action_queue=None, sm_config=None): db = db or DB(sm_config['db']) es = es or ESExporter(db) img_store = img_store or MagicMock(spec=ImageStoreServiceWrapper) action_queue = action_queue or QueuePublisher(sm_config['rabbitmq'], ACTION_QDESC) action_queue.queue_args = {'x-max-priority': 3} return SMapiDatasetManager(db=db, es=es, image_store=img_store, mode='queue', action_queue=action_queue)
def test_dataset_save_overwrite_ds_works(fill_db, sm_config, ds_config): db = DB(sm_config['db']) es_mock = MagicMock(spec=ESExporter) status_queue_mock = MagicMock(spec=QueuePublisher) upload_dt = datetime.now() ds_id = '2000-01-01' ds = Dataset(ds_id, 'ds_name', 'input_path', upload_dt, {}, ds_config) ds.save(db, es_mock, status_queue_mock) assert ds == Dataset.load(db, ds_id) es_mock.sync_dataset.assert_called_once_with(ds_id) status_queue_mock.publish.assert_called_with({ 'ds_id': ds_id, 'status': DatasetStatus.NEW })
def fill_db(test_db, sm_config, ds_config): upload_dt = '2000-01-01 00:00:00' ds_id = '2000-01-01' meta = {"meta": "data"} db = DB(sm_config['db']) db.insert('INSERT INTO dataset values (%s, %s, %s, %s, %s, %s, %s)', rows=[(ds_id, 'ds_name', 'input_path', upload_dt, json.dumps(meta), json.dumps(ds_config), DatasetStatus.FINISHED)]) db.insert("INSERT INTO job (id, db_id, ds_id) VALUES (%s, %s, %s)", rows=[(0, 0, ds_id)]) db.insert("INSERT INTO sum_formula (id, db_id, sf) VALUES (%s, %s, %s)", rows=[(1, 0, 'H2O')]) db.insert(( "INSERT INTO iso_image_metrics (job_id, db_id, sf, adduct, iso_image_ids) " "VALUES (%s, %s, %s, %s, %s)"), rows=[(0, 0, 'H2O', '+H', ['iso_image_1_id', 'iso_image_2_id'])]) db.close()
def test_dataset_update_status_works(fill_db, sm_config, ds_config): db = DB(sm_config['db']) es_mock = MagicMock(spec=ESExporter) status_queue_mock = MagicMock(spec=QueuePublisher) upload_dt = datetime.now() ds_id = '2000-01-01' ds = Dataset(ds_id, 'ds_name', 'input_path', upload_dt, {}, ds_config, DatasetStatus.INDEXING) ds.set_status(db, es_mock, status_queue_mock, DatasetStatus.FINISHED) assert DatasetStatus.FINISHED == Dataset.load(db, ds_id).status status_queue_mock.publish.assert_called_once_with({ 'ds_id': ds_id, 'status': DatasetStatus.FINISHED })
def _reindex_all(conf): es_config = conf['elasticsearch'] alias = es_config['index'] es_man = ESIndexManager(es_config) new_index = es_man.another_index_name(es_man.internal_index_name(alias)) es_man.create_index(new_index) try: tmp_es_config = deepcopy(es_config) tmp_es_config['index'] = new_index db = DB(conf['db']) es_exp = ESExporter(db, tmp_es_config) rows = db.select('select id, name, config from dataset') _reindex_datasets(rows, es_exp) es_man.remap_alias(tmp_es_config['index'], alias=alias) except Exception as e: es_man.delete_index(new_index) raise e
def reindex_results(ds_id, ds_mask): assert ds_id or ds_mask conf = SMConfig.get_conf() if ds_mask == '_all_': _reindex_all(conf) else: db = DB(conf['db']) es_exp = ESExporter(db) if ds_id: rows = db.select( "select id, name, config from dataset where id = '{}'".format( ds_id)) elif ds_mask: rows = db.select( "select id, name, config from dataset where name like '{}%'". format(ds_mask)) else: rows = [] _reindex_datasets(rows, es_exp)
def _callback(self, msg): log_msg = " SM daemon received a message: {}".format(msg) logger.info(log_msg) self._post_to_slack('new', " [v] Received: {}".format(json.dumps(msg))) db = DB(self._sm_config['db']) try: ds_man = self._dataset_manager_factory( db=db, es=ESExporter(db), img_store=ImageStoreServiceWrapper( self._sm_config['services']['img_service_url']), mode='queue', status_queue=QueuePublisher(config=self._sm_config['rabbitmq'], qdesc=SM_DS_STATUS, logger=logger)) ds_man.process(ds=Dataset.load(db, msg['ds_id']), action=msg['action'], search_job_factory=SearchJob, del_first=msg.get('del_first', False)) finally: if db: db.close()
def test_delete_ds(self, fill_db, sm_config, ds_config): db = DB(sm_config['db']) action_queue_mock = MagicMock(spec=QueuePublisher) es_mock = MagicMock(spec=ESExporter) img_store_service_mock = MagicMock(spec=ImageStoreServiceWrapper) ds_man = create_ds_man(sm_config, db=db, es=es_mock, img_store=img_store_service_mock, action_queue=action_queue_mock, sm_api=False) ds_id = '2000-01-01' ds = create_ds(ds_id=ds_id, ds_config=ds_config) ds_man.delete(ds) ids = ['iso_image_{}_id'.format(id) for id in range(1, 3)] img_store_service_mock.delete_image_by_id.assert_has_calls( [call('fs', 'iso_image', ids[0]), call('fs', 'iso_image', ids[1])]) es_mock.delete_ds.assert_called_with(ds_id) assert db.select_one('SELECT * FROM dataset WHERE id = %s', params=(ds_id, )) == []
def _fetch_ds_metadata(self, ds_id): db = DB(SMConfig.get_conf()['db']) res = db.select_one('SELECT name, metadata FROM dataset WHERE id = %s', params=(ds_id, )) return res or ('', {})
def _create_db_conn(): config = SMConfig.get_conf() return DB(config['db'])
from sm.engine.search_job import SearchJob if __name__ == "__main__": parser = argparse.ArgumentParser(description='SM process dataset at a remote spark location.') parser.add_argument('--ds-id', dest='ds_id', type=str, help='Dataset id') parser.add_argument('--ds-name', dest='ds_name', type=str, help='Dataset name') parser.add_argument('--input-path', type=str, help='Path to a dataset location') parser.add_argument('--no-clean', dest='no_clean', action='store_true', help="Don't clean dataset txt files after job is finished") parser.add_argument('--config', dest='sm_config_path', default='conf/config.json', type=str, help='SM config path') args = parser.parse_args() SMConfig.set_path(args.sm_config_path) sm_config = SMConfig.get_conf() init_loggers(sm_config['logs']) db = DB(sm_config['db']) img_store = ImageStoreServiceWrapper(sm_config['services']['img_service_url']) ds_man = SMDaemonDatasetManager(db, ESExporter(db), img_store, mode='local') try: ds = create_ds_from_files(args.ds_id, args.ds_name, args.input_path) ds_man.add(ds, SearchJob, del_first=True) except Exception as e: logging.getLogger('engine').error(e) sys.exit(1) sys.exit()
def set_metadata_thumbnail(db, config, ds_name): ds_thumb_query = 'SELECT id, transform, thumbnail from dataset {}'.format('WHERE name = %s' if ds_name != ALL_DS_MASK else '') for id, transform, thumbnail in db.select(ds_thumb_query, params=(ds_name,) if ds_name else None): if transform != None: ds = api.Dataset.load(db=db, ds_id=id) img_store = ImageStoreServiceWrapper(config['services']['img_service_url']) img_store.storage_type = 'fs' sm = SMapiDatasetManager(db=db, es=ESExporter(db), image_store=img_store, mode='queue') ds_opt_img_query = 'SELECT optical_image from dataset {}'.format('WHERE id = %s') img_id = db.select(ds_opt_img_query, params=(ds.id,)) sm._add_thumbnail_optical_image(ds, f"{img_id[0][0]}", transform) SMConfig.set_path('conf/config.json') sm_config = SMConfig.get_conf() set_metadata_thumbnail(DB(sm_config['db']), sm_config, 'Untreated_3_434') if __name__ == "__main__": parser = argparse.ArgumentParser(description="Updates thumbnail for a provided dataset") parser.add_argument('--ds-name', dest='ds_name', type=str, help="Process specific dataset given by its name") parser.add_argument('--config', dest='sm_config_path', default='conf/config.json', type=str, help='SM config path') args = parser.parse_args() SMConfig.set_path(args.sm_config_path) sm_config = SMConfig.get_conf() db = DB(sm_config['db']) if args.ds_name: set_metadata_thumbnail(db, sm_config, args.ds_name) else: