def _create_dataset_manager(db): config = SMConfig.get_conf() img_store = ImageStoreServiceWrapper(config['services']['img_service_url']) img_store.storage_type = 'fs' return SMapiDatasetManager(db=db, es=ESExporter(db), image_store=img_store, mode='queue', action_queue=_create_queue_publisher(SM_ANNOTATE), status_queue=_create_queue_publisher(SM_DS_STATUS))
def get_coloc_matrix(anns): for retry in range(3): try: from sm.engine.png_generator import ImageStoreServiceWrapper from sklearn.metrics.pairwise import pairwise_kernels from scipy.ndimage import median_filter anns = sorted(anns, key=lambda ann: -ann['msmScore'])[:2000] cnt = len(anns) img_ids = [ann['isotopeImages'][0]['url'][-32:] for ann in anns] img_names = [(ann['sumFormula'] + ann['adduct']) for ann in anns] img_svc = ImageStoreServiceWrapper('https://metaspace2020.eu/') images, mask, (h, w) = img_svc.get_ion_images_for_analysis( 'fs', img_ids, max_mem_mb=2048, hotspot_percentile=100) # Discard everything below 50% intensity images[ images < np.quantile(images, 0.5, axis=1, keepdims=True)] = 0 filtered_images = median_filter(images.reshape((cnt, h, w)), (1, 3, 3)).reshape((cnt, h * w)) distance_matrix = pairwise_kernels(filtered_images, metric='cosine') df = pd.DataFrame(distance_matrix, index=img_names, columns=img_names, dtype=np.float32) df.rename_axis(index='source', columns='target', inplace=True) df.reset_index(inplace=True) unpivoted = df.melt(id_vars='source', value_name='cosine') return unpivoted[lambda df: (df.source < df.target) & ( df.cosine > 0.01 )] # Reduce to uni-directional links & do initial thresholding except Exception as ex: print(ex) return None
def _create_dataset_manager(db): config = SMConfig.get_conf() img_store = ImageStoreServiceWrapper(config['services']['img_service_url']) img_store.storage_type = 'fs' return SMapiDatasetManager( db=db, es=ESExporter(db), image_store=img_store, mode='queue', action_queue=_create_queue_publisher(SM_ANNOTATE), status_queue=_create_queue_publisher(SM_DS_STATUS))
def set_metadata_thumbnail(db, config, ds_name): ds_thumb_query = 'SELECT id, transform, thumbnail from dataset {}'.format('WHERE name = %s' if ds_name != ALL_DS_MASK else '') for id, transform, thumbnail in db.select(ds_thumb_query, params=(ds_name,) if ds_name else None): if transform != None: ds = api.Dataset.load(db=db, ds_id=id) img_store = ImageStoreServiceWrapper(config['services']['img_service_url']) img_store.storage_type = 'fs' sm = SMapiDatasetManager(db=db, es=ESExporter(db), image_store=img_store, mode='queue') ds_opt_img_query = 'SELECT optical_image from dataset {}'.format('WHERE id = %s') img_id = db.select(ds_opt_img_query, params=(ds.id,)) sm._add_thumbnail_optical_image(ds, f"{img_id[0][0]}", transform)
def run_search(self, mock_img_store=False): if mock_img_store: img_store = self._create_img_store_mock() else: img_store = ImageStoreServiceWrapper( self.sm_config['services']['img_service_url']) manager = SMDaemonManager(db=self.db, es=ESExporter(self.db), img_store=img_store) ds = create_ds_from_files(self.ds_id, self.ds_name, self.input_path) from sm.engine.search_job import SearchJob manager.annotate(ds, search_job_factory=SearchJob, del_first=True)
def _callback(self, msg): log_msg = " SM daemon received a message: {}".format(msg) logger.info(log_msg) self._post_to_slack('new', " [v] Received: {}".format(json.dumps(msg))) db = DB(self._sm_config['db']) try: ds_man = self._dataset_manager_factory( db=db, es=ESExporter(db), img_store=ImageStoreServiceWrapper( self._sm_config['services']['img_service_url']), mode='queue', status_queue=QueuePublisher(config=self._sm_config['rabbitmq'], qdesc=SM_DS_STATUS, logger=logger)) ds_man.process(ds=Dataset.load(db, msg['ds_id']), action=msg['action'], search_job_factory=SearchJob, del_first=msg.get('del_first', False)) finally: if db: db.close()
def run_daemons(db, es): from sm.engine.queue import QueuePublisher, SM_DS_STATUS, SM_ANNOTATE, SM_UPDATE from sm.engine.png_generator import ImageStoreServiceWrapper from sm.engine.sm_daemons import SMDaemonManager, SMAnnotateDaemon, SMUpdateDaemon status_queue_pub = QueuePublisher(config=sm_config()['rabbitmq'], qdesc=SM_DS_STATUS, logger=logger) manager = SMDaemonManager( db=db, es=es, img_store=ImageStoreServiceWrapper(sm_config()['services']['img_service_url']), status_queue=status_queue_pub, logger=logger, sm_config=sm_config() ) annotate_daemon = SMAnnotateDaemon(manager=manager, annot_qdesc=SM_ANNOTATE, upd_qdesc=SM_UPDATE) annotate_daemon.start() annotate_daemon.stop() update_daemon = SMUpdateDaemon(manager=manager, update_qdesc=SM_UPDATE) update_daemon.start() update_daemon.stop()
def _img_store(self): return ImageStoreServiceWrapper( self._sm_config['services']['img_service_url'])
from sm.engine.search_job import SearchJob if __name__ == "__main__": parser = argparse.ArgumentParser(description='SM process dataset at a remote spark location.') parser.add_argument('--ds-id', dest='ds_id', type=str, help='Dataset id') parser.add_argument('--ds-name', dest='ds_name', type=str, help='Dataset name') parser.add_argument('--input-path', type=str, help='Path to a dataset location') parser.add_argument('--no-clean', dest='no_clean', action='store_true', help="Don't clean dataset txt files after job is finished") parser.add_argument('--config', dest='sm_config_path', default='conf/config.json', type=str, help='SM config path') args = parser.parse_args() SMConfig.set_path(args.sm_config_path) sm_config = SMConfig.get_conf() init_loggers(sm_config['logs']) db = DB(sm_config['db']) img_store = ImageStoreServiceWrapper(sm_config['services']['img_service_url']) ds_man = SMDaemonDatasetManager(db, ESExporter(db), img_store, mode='local') try: ds = create_ds_from_files(args.ds_id, args.ds_name, args.input_path) ds_man.add(ds, SearchJob, del_first=True) except Exception as e: logging.getLogger('engine').error(e) sys.exit(1) sys.exit()