Exemple #1
0
    def test_add_ds(self, test_db, sm_config, ds_config):
        action_queue_mock = MagicMock(spec=QueuePublisher)
        es_mock = MagicMock(spec=ESExporter)
        db = DB(sm_config['db'])
        try:
            ds_man = create_ds_man(sm_config,
                                   db=db,
                                   es=es_mock,
                                   action_queue=action_queue_mock,
                                   sm_api=False)

            ds_id = '2000-01-01'
            ds_name = 'ds_name'
            input_path = 'input_path'
            upload_dt = datetime.now()
            metadata = {}
            ds = create_ds(ds_id=ds_id,
                           ds_name=ds_name,
                           input_path=input_path,
                           upload_dt=upload_dt,
                           metadata=metadata,
                           ds_config=ds_config)

            ds_man.add(ds, search_job_factory=self.SearchJob)

            DS_SEL = 'select name, input_path, upload_dt, metadata, config from dataset where id=%s'
            assert db.select_one(DS_SEL,
                                 params=(ds_id, )) == (ds_name, input_path,
                                                       upload_dt, metadata,
                                                       ds_config)
        finally:
            db.close()
Exemple #2
0
    def test_add_optical_image(self, fill_db, sm_config, ds_config):
        db = DB(sm_config['db'])
        action_queue_mock = MagicMock(spec=QueuePublisher)
        es_mock = MagicMock(spec=ESExporter)
        img_store_mock = MagicMock(ImageStoreServiceWrapper)
        img_store_mock.post_image.side_effect = [
            'opt_img_id1', 'opt_img_id2', 'opt_img_id3', 'thumbnail_id'
        ]
        img_store_mock.get_image_by_id.return_value = Image.new(
            'RGB', (100, 100))

        ds_man = create_ds_man(sm_config=sm_config,
                               db=db,
                               es=es_mock,
                               img_store=img_store_mock,
                               action_queue=action_queue_mock,
                               sm_api=True)
        ds_man._annotation_image_shape = MagicMock(return_value=(100, 100))

        ds_id = '2000-01-01'
        ds = create_ds(ds_id=ds_id, ds_config=ds_config)

        zoom_levels = [1, 2, 3]
        raw_img_id = 'raw_opt_img_id'
        ds_man.add_optical_image(ds,
                                 raw_img_id, [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                                 zoom_levels=zoom_levels)
        assert db.select('SELECT * FROM optical_image') == [
            ('opt_img_id{}'.format(i + 1), ds.id, zoom)
            for i, zoom in enumerate(zoom_levels)
        ]
        assert db.select('SELECT optical_image FROM dataset where id = %s',
                         params=(ds_id, )) == [(raw_img_id, )]
        assert db.select('SELECT thumbnail FROM dataset where id = %s',
                         params=(ds_id, )) == [('thumbnail_id', )]
Exemple #3
0
def fill_db(test_db, sm_config, ds_config):
    upload_dt = '2000-01-01 00:00:00'
    ds_id = '2000-01-01'
    meta = {'Data_Type': 'Imaging MS'}
    db = DB(sm_config['db'])
    db.insert('INSERT INTO dataset values(%s, %s, %s, %s, %s, %s, %s)',
              rows=[(ds_id, 'ds_name',
                     'input_path', upload_dt, json.dumps(meta),
                     json.dumps(ds_config), DatasetStatus.FINISHED)])
def fill_db(test_db, sm_config, ds_config):
    upload_dt = '2000-01-01 00:00:00'
    ds_id = '2000-01-01'
    meta = {'Data_Type': 'Imaging MS'}
    db = DB(sm_config['db'])
    db.insert('INSERT INTO dataset (id, name, input_path, upload_dt, metadata, config, '
              'status, is_public, mol_dbs, adducts) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',
              rows=[(ds_id, 'ds_name', 'input_path', upload_dt,
                     json.dumps(meta), json.dumps(ds_config), DatasetStatus.FINISHED,
                     True, ['HMDB-v4'], ['+H'])])
def fill_db(test_db, sm_config, ds_config):
    upload_dt = '2000-01-01 00:00:00'
    ds_id = '2000-01-01'
    metadata = {"meta": "data"}
    db = DB(sm_config['db'])
    db.insert(('INSERT INTO dataset (id, name, input_path, upload_dt, metadata, config, status, '
               'is_public, mol_dbs, adducts) '
               'VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'),
              rows=[(ds_id, 'ds_name', 'input_path', upload_dt,
                     json.dumps(metadata), json.dumps(ds_config), DatasetStatus.FINISHED,
                     True, ['HMDB-v4'], ['+H', '+Na', '+K'])])
Exemple #6
0
def create_ds_man(sm_config,
                  db=None,
                  es=None,
                  img_store=None,
                  action_queue=None,
                  status_queue=None,
                  sm_api=False):
    db = db or DB(sm_config['db'])
    es_mock = es or MagicMock(spec=ESExporter)
    action_queue_mock = action_queue or MagicMock(QueuePublisher)
    status_queue_mock = status_queue or MagicMock(QueuePublisher)
    img_store_mock = img_store or MagicMock(spec=ImageStoreServiceWrapper)
    if sm_api:
        return SMapiDatasetManager(db=db,
                                   es=es_mock,
                                   mode='queue',
                                   image_store=img_store_mock,
                                   action_queue=action_queue_mock,
                                   status_queue=status_queue_mock)
    else:
        return SMDaemonDatasetManager(db=db,
                                      es=es_mock,
                                      img_store=img_store_mock,
                                      mode=None,
                                      status_queue=status_queue_mock)
def reindex_results(ds_id, ds_mask):
    assert ds_id or ds_mask

    conf = SMConfig.get_conf()
    if ds_mask == '_all_':
        _reindex_all(conf)
    else:
        db = DB(conf['db'])
        es_exp = ESExporter(db)

        if ds_id:
            rows = db.select("select id, name, config from dataset where id = '{}'".format(ds_id))
        elif ds_mask:
            rows = db.select("select id, name, config from dataset where name like '{}%'".format(ds_mask))
        else:
            rows = []

        _reindex_datasets(rows, es_exp)
    def test_delete_ds(self, fill_db, sm_config, ds_config):
        db = DB(sm_config['db'])
        action_queue_mock = MagicMock(spec=QueuePublisher)
        es_mock = MagicMock(spec=ESExporter)
        img_store_service_mock = MagicMock(spec=ImageStoreServiceWrapper)
        ds_man = create_ds_man(sm_config, db=db, es=es_mock, img_store=img_store_service_mock,
                               action_queue=action_queue_mock, sm_api=False)

        ds_id = '2000-01-01'
        ds = create_ds(ds_id=ds_id, ds_config=ds_config)

        ds_man.delete(ds)

        ids = ['iso_image_{}_id'.format(id) for id in range(1, 3)]
        img_store_service_mock.delete_image_by_id.assert_has_calls(
            [call('fs', 'iso_image', ids[0]), call('fs', 'iso_image', ids[1])])
        es_mock.delete_ds.assert_called_with(ds_id)
        assert db.select_one('SELECT * FROM dataset WHERE id = %s', params=(ds_id,)) == []
Exemple #9
0
def _reindex_all(conf):
    es_config = conf['elasticsearch']
    alias = es_config['index']
    es_man = ESIndexManager(es_config)
    new_index = es_man.another_index_name(es_man.internal_index_name(alias))
    es_man.create_index(new_index)

    try:
        tmp_es_config = deepcopy(es_config)
        tmp_es_config['index'] = new_index

        db = DB(conf['db'])
        es_exp = ESExporter(db, tmp_es_config)
        rows = db.select('select id, name, config from dataset')
        _reindex_datasets(rows, es_exp)

        es_man.remap_alias(tmp_es_config['index'], alias=alias)
    except Exception as e:
        es_man.delete_index(new_index)
        raise e
def _reindex_all(conf):
    es_config = conf['elasticsearch']
    alias = es_config['index']
    es_man = ESIndexManager(es_config)
    new_index = es_man.another_index_name(es_man.internal_index_name(alias))
    es_man.create_index(new_index)

    try:
        tmp_es_config = deepcopy(es_config)
        tmp_es_config['index'] = new_index

        db = DB(conf['db'])
        es_exp = ESExporter(db, tmp_es_config)
        rows = db.select('select id, name, config from dataset')
        _reindex_datasets(rows, es_exp)

        es_man.remap_alias(tmp_es_config['index'], alias=alias)
    except Exception as e:
        es_man.delete_index(new_index)
        raise e
Exemple #11
0
def test_dataset_load_existing_ds_works(fill_db, sm_config, ds_config):
    db = DB(sm_config['db'])
    upload_dt = datetime.strptime('2000-01-01 00:00:00', "%Y-%m-%d %H:%M:%S")
    ds_id = '2000-01-01'
    meta = {"meta": "data"}

    ds = Dataset.load(db, ds_id)

    assert (ds.id == ds_id and ds.name == 'ds_name'
            and ds.input_path == 'input_path' and ds.upload_dt == upload_dt
            and ds.meta == meta and ds.config == ds_config
            and ds.status == DatasetStatus.FINISHED)
    def test_add_ds(self, test_db, sm_config, ds_config):
        action_queue_mock = MagicMock(spec=QueuePublisher)
        es_mock = MagicMock(spec=ESExporter)
        db = DB(sm_config['db'])
        try:
            ds_man = create_ds_man(sm_config, db=db, es=es_mock, action_queue=action_queue_mock, sm_api=False)

            ds_id = '2000-01-01'
            ds_name = 'ds_name'
            input_path = 'input_path'
            upload_dt = datetime.now()
            metadata = {}
            ds = create_ds(ds_id=ds_id, ds_name=ds_name, input_path=input_path, upload_dt=upload_dt,
                           metadata=metadata, ds_config=ds_config)

            ds_man.add(ds, search_job_factory=self.SearchJob)

            DS_SEL = 'select name, input_path, upload_dt, metadata, config from dataset where id=%s'
            assert db.select_one(DS_SEL, params=(ds_id,)) == (ds_name, input_path, upload_dt, metadata, ds_config)
        finally:
            db.close()
Exemple #13
0
def reindex_results(ds_id, ds_mask):
    assert ds_id or ds_mask

    conf = SMConfig.get_conf()
    if ds_mask == '_all_':
        _reindex_all(conf)
    else:
        db = DB(conf['db'])
        es_exp = ESExporter(db)

        if ds_id:
            rows = db.select(
                "select id, name, config from dataset where id = '{}'".format(
                    ds_id))
        elif ds_mask:
            rows = db.select(
                "select id, name, config from dataset where name like '{}%'".
                format(ds_mask))
        else:
            rows = []

        _reindex_datasets(rows, es_exp)
    def _callback(self, msg):
        log_msg = " SM daemon received a message: {}".format(msg)
        logger.info(log_msg)
        self._post_to_slack('new', " [v] Received: {}".format(json.dumps(msg)))

        db = DB(self._sm_config['db'])
        try:
            ds_man = self._dataset_manager_factory(
                db=db, es=ESExporter(db),
                img_store=ImageStoreServiceWrapper(self._sm_config['services']['img_service_url']),
                mode='queue',
                status_queue=QueuePublisher(config=self._sm_config['rabbitmq'],
                                            qdesc=SM_DS_STATUS,
                                            logger=logger)
            )
            ds_man.process(ds=Dataset.load(db, msg['ds_id']),
                           action=msg['action'],
                           search_job_factory=SearchJob,
                           del_first=msg.get('del_first', False))
        finally:
            if db:
                db.close()
Exemple #15
0
    def _callback(self, msg):
        log_msg = " SM daemon received a message: {}".format(msg)
        logger.info(log_msg)
        self._post_to_slack('new', " [v] Received: {}".format(json.dumps(msg)))

        db = DB(self._sm_config['db'])
        try:
            ds_man = self._dataset_manager_factory(
                db=db,
                es=ESExporter(db),
                img_store=ImageStoreServiceWrapper(
                    self._sm_config['services']['img_service_url']),
                mode='queue',
                status_queue=QueuePublisher(config=self._sm_config['rabbitmq'],
                                            qdesc=SM_DS_STATUS,
                                            logger=logger))
            ds_man.process(ds=Dataset.load(db, msg['ds_id']),
                           action=msg['action'],
                           search_job_factory=SearchJob,
                           del_first=msg.get('del_first', False))
        finally:
            if db:
                db.close()
    def test_add_optical_image(self, fill_db, sm_config, ds_config):
        db = DB(sm_config['db'])
        action_queue_mock = MagicMock(spec=QueuePublisher)
        es_mock = MagicMock(spec=ESExporter)
        img_store_mock = MagicMock(ImageStoreServiceWrapper)
        img_store_mock.post_image.side_effect = ['opt_img_id1', 'opt_img_id2', 'opt_img_id3', 'thumbnail_id']
        img_store_mock.get_image_by_id.return_value = Image.new('RGB', (100, 100))

        ds_man = create_ds_man(sm_config=sm_config, db=db, es=es_mock,
                               img_store=img_store_mock, action_queue=action_queue_mock, sm_api=True)
        ds_man._annotation_image_shape = MagicMock(return_value=(100, 100))

        ds_id = '2000-01-01'
        ds = create_ds(ds_id=ds_id, ds_config=ds_config)

        zoom_levels = [1, 2, 3]
        raw_img_id = 'raw_opt_img_id'
        ds_man.add_optical_image(ds, raw_img_id, [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                                 zoom_levels=zoom_levels)
        assert db.select('SELECT * FROM optical_image') == [
                ('opt_img_id{}'.format(i + 1), ds.id, zoom)
                for i, zoom in enumerate(zoom_levels)]
        assert db.select('SELECT optical_image FROM dataset where id = %s', params=(ds_id,)) == [(raw_img_id,)]
        assert db.select('SELECT thumbnail FROM dataset where id = %s', params=(ds_id,)) == [('thumbnail_id',)]
Exemple #17
0
    def test_delete_ds(self, fill_db, sm_config, ds_config):
        db = DB(sm_config['db'])
        action_queue_mock = MagicMock(spec=QueuePublisher)
        es_mock = MagicMock(spec=ESExporter)
        img_store_service_mock = MagicMock(spec=ImageStoreServiceWrapper)
        ds_man = create_ds_man(sm_config,
                               db=db,
                               es=es_mock,
                               img_store=img_store_service_mock,
                               action_queue=action_queue_mock,
                               sm_api=False)

        ds_id = '2000-01-01'
        ds = create_ds(ds_id=ds_id, ds_config=ds_config)

        ds_man.delete(ds)

        ids = ['iso_image_{}_id'.format(id) for id in range(1, 3)]
        img_store_service_mock.delete_image_by_id.assert_has_calls(
            [call('fs', 'iso_image', ids[0]),
             call('fs', 'iso_image', ids[1])])
        es_mock.delete_ds.assert_called_with(ds_id)
        assert db.select_one('SELECT * FROM dataset WHERE id = %s',
                             params=(ds_id, )) == []
Exemple #18
0
def create_api_ds_man(db=None,
                      es=None,
                      img_store=None,
                      action_queue=None,
                      sm_config=None):
    db = db or DB(sm_config['db'])
    es = es or ESExporter(db)
    img_store = img_store or MagicMock(spec=ImageStoreServiceWrapper)
    action_queue = action_queue or QueuePublisher(sm_config['rabbitmq'],
                                                  ACTION_QDESC)
    action_queue.queue_args = {'x-max-priority': 3}
    return SMapiDatasetManager(db=db,
                               es=es,
                               image_store=img_store,
                               mode='queue',
                               action_queue=action_queue)
Exemple #19
0
def test_dataset_save_overwrite_ds_works(fill_db, sm_config, ds_config):
    db = DB(sm_config['db'])
    es_mock = MagicMock(spec=ESExporter)
    status_queue_mock = MagicMock(spec=QueuePublisher)

    upload_dt = datetime.now()
    ds_id = '2000-01-01'
    ds = Dataset(ds_id, 'ds_name', 'input_path', upload_dt, {}, ds_config)

    ds.save(db, es_mock, status_queue_mock)

    assert ds == Dataset.load(db, ds_id)
    es_mock.sync_dataset.assert_called_once_with(ds_id)
    status_queue_mock.publish.assert_called_with({
        'ds_id': ds_id,
        'status': DatasetStatus.NEW
    })
Exemple #20
0
def test_dataset_update_status_works(fill_db, sm_config, ds_config):
    db = DB(sm_config['db'])
    es_mock = MagicMock(spec=ESExporter)
    status_queue_mock = MagicMock(spec=QueuePublisher)

    upload_dt = datetime.now()
    ds_id = '2000-01-01'
    ds = Dataset(ds_id, 'ds_name', 'input_path', upload_dt, {}, ds_config,
                 DatasetStatus.INDEXING)

    ds.set_status(db, es_mock, status_queue_mock, DatasetStatus.FINISHED)

    assert DatasetStatus.FINISHED == Dataset.load(db, ds_id).status
    status_queue_mock.publish.assert_called_once_with({
        'ds_id':
        ds_id,
        'status':
        DatasetStatus.FINISHED
    })
 def _fetch_ds_metadata(self, ds_id):
     db = DB(SMConfig.get_conf()['db'])
     res = db.select_one('SELECT name, metadata FROM dataset WHERE id = %s', params=(ds_id,))
     return res or ('', {})
def fill_db(test_db, sm_config, ds_config):
    upload_dt = '2000-01-01 00:00:00'
    ds_id = '2000-01-01'
    meta = {"meta": "data"}
    db = DB(sm_config['db'])
    db.insert('INSERT INTO dataset (id, name, input_path, upload_dt, metadata, config, '
              'status, is_public, mol_dbs, adducts) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)',
              rows=[(ds_id, 'ds_name', 'input_path', upload_dt,
                     json.dumps(meta), json.dumps(ds_config), DatasetStatus.FINISHED,
                     True, ['HMDB-v4'], ['+H'])])
    db.insert("INSERT INTO job (id, db_id, ds_id) VALUES (%s, %s, %s)",
              rows=[(0, 0, ds_id)])
    db.insert("INSERT INTO sum_formula (id, db_id, sf) VALUES (%s, %s, %s)",
              rows=[(1, 0, 'H2O')])
    db.insert(("INSERT INTO iso_image_metrics (job_id, db_id, sf, adduct, iso_image_ids) "
               "VALUES (%s, %s, %s, %s, %s)"),
              rows=[(0, 0, 'H2O', '+H', ['iso_image_1_id', 'iso_image_2_id'])])
    db.close()
from sm.engine.search_job import SearchJob


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='SM process dataset at a remote spark location.')
    parser.add_argument('--ds-id', dest='ds_id', type=str, help='Dataset id')
    parser.add_argument('--ds-name', dest='ds_name', type=str, help='Dataset name')
    parser.add_argument('--input-path', type=str, help='Path to a dataset location')
    parser.add_argument('--no-clean', dest='no_clean', action='store_true',
                        help="Don't clean dataset txt files after job is finished")
    parser.add_argument('--config', dest='sm_config_path', default='conf/config.json',
                        type=str, help='SM config path')
    args = parser.parse_args()

    SMConfig.set_path(args.sm_config_path)
    sm_config = SMConfig.get_conf()
    init_loggers(sm_config['logs'])

    db = DB(sm_config['db'])
    img_store = ImageStoreServiceWrapper(sm_config['services']['img_service_url'])
    ds_man = SMDaemonDatasetManager(db, ESExporter(db), img_store, mode='local')

    try:
        ds = create_ds_from_files(args.ds_id, args.ds_name, args.input_path)
        ds_man.add(ds, SearchJob, del_first=True)
    except Exception as e:
        logging.getLogger('engine').error(e)
        sys.exit(1)

    sys.exit()
Exemple #24
0
def fill_db(test_db, sm_config, ds_config):
    upload_dt = '2000-01-01 00:00:00'
    ds_id = '2000-01-01'
    meta = {"meta": "data"}
    db = DB(sm_config['db'])
    db.insert('INSERT INTO dataset values (%s, %s, %s, %s, %s, %s, %s)',
              rows=[(ds_id, 'ds_name',
                     'input_path', upload_dt, json.dumps(meta),
                     json.dumps(ds_config), DatasetStatus.FINISHED)])
    db.insert("INSERT INTO job (id, db_id, ds_id) VALUES (%s, %s, %s)",
              rows=[(0, 0, ds_id)])
    db.insert("INSERT INTO sum_formula (id, db_id, sf) VALUES (%s, %s, %s)",
              rows=[(1, 0, 'H2O')])
    db.insert((
        "INSERT INTO iso_image_metrics (job_id, db_id, sf, adduct, iso_image_ids) "
        "VALUES (%s, %s, %s, %s, %s)"),
              rows=[(0, 0, 'H2O', '+H', ['iso_image_1_id', 'iso_image_2_id'])])
    db.close()
def set_metadata_thumbnail(db, config, ds_name):
    ds_thumb_query = 'SELECT id, transform, thumbnail from dataset {}'.format('WHERE name = %s' if ds_name != ALL_DS_MASK else '')
    for id, transform, thumbnail in db.select(ds_thumb_query, params=(ds_name,) if ds_name else None):
        if transform != None:
            ds = api.Dataset.load(db=db, ds_id=id)
            img_store = ImageStoreServiceWrapper(config['services']['img_service_url'])
            img_store.storage_type = 'fs'
            sm = SMapiDatasetManager(db=db, es=ESExporter(db), image_store=img_store, mode='queue')
            ds_opt_img_query = 'SELECT optical_image from dataset {}'.format('WHERE id = %s')
            img_id = db.select(ds_opt_img_query, params=(ds.id,))
            sm._add_thumbnail_optical_image(ds, f"{img_id[0][0]}", transform)

SMConfig.set_path('conf/config.json')
sm_config = SMConfig.get_conf()
set_metadata_thumbnail(DB(sm_config['db']), sm_config, 'Untreated_3_434')

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Updates thumbnail for a provided dataset")
    parser.add_argument('--ds-name', dest='ds_name', type=str, help="Process specific dataset given by its name")
    parser.add_argument('--config', dest='sm_config_path', default='conf/config.json', type=str, help='SM config path')
    args = parser.parse_args()

    SMConfig.set_path(args.sm_config_path)
    sm_config = SMConfig.get_conf()

    db = DB(sm_config['db'])

    if args.ds_name:
        set_metadata_thumbnail(db, sm_config, args.ds_name)
    else:
Exemple #26
0
 def _fetch_ds_metadata(self, ds_id):
     db = DB(SMConfig.get_conf()['db'])
     res = db.select_one('SELECT name, metadata FROM dataset WHERE id = %s',
                         params=(ds_id, ))
     return res or ('', {})
Exemple #27
0
def _create_db_conn():
    config = SMConfig.get_conf()
    return DB(config['db'])