Ejemplo n.º 1
0
def reset_queues(local_sm_config):
    from sm.engine.queue import QueuePublisher, SM_ANNOTATE, SM_UPDATE

    # Delete queues to clean up remaining messages so that they don't interfere with other tests
    for qdesc in [SM_ANNOTATE, SM_UPDATE]:
        queue_pub = QueuePublisher(config=local_sm_config['rabbitmq'],
                                   qdesc=qdesc,
                                   logger=logger)
        queue_pub.delete_queue()
Ejemplo n.º 2
0
def run_daemons(db, es, sm_config):
    from sm.engine.queue import QueuePublisher, SM_DS_STATUS, SM_ANNOTATE, SM_UPDATE
    from sm.engine.daemons.dataset_manager import DatasetManager
    from sm.engine.daemons.annotate import SMAnnotateDaemon
    from sm.engine.daemons.update import SMUpdateDaemon

    status_queue_pub = QueuePublisher(config=sm_config['rabbitmq'],
                                      qdesc=SM_DS_STATUS,
                                      logger=logger)

    manager = DatasetManager(
        db=db,
        es=es,
        status_queue=status_queue_pub,
        logger=logger,
        sm_config=sm_config,
    )
    annotate_daemon = SMAnnotateDaemon(manager=manager,
                                       annot_qdesc=SM_ANNOTATE,
                                       upd_qdesc=SM_UPDATE)
    annotate_daemon.start()
    time.sleep(0.1)
    annotate_daemon.stop()
    make_update_queue_cons = partial(QueueConsumer,
                                     config=sm_config['rabbitmq'],
                                     qdesc=SM_UPDATE,
                                     logger=logger,
                                     poll_interval=1)
    update_daemon = SMUpdateDaemon(manager, make_update_queue_cons)
    update_daemon.start()
    time.sleep(0.1)
    update_daemon.stop()
Ejemplo n.º 3
0
def delete_queue(sm_config):
    # delete before tests
    queue_pub = QueuePublisher(sm_config['rabbitmq'], QDESC)
    queue_pub.delete_queue()

    yield
    # delete after tests
    queue_pub = QueuePublisher(sm_config['rabbitmq'], QDESC)
    queue_pub.delete_queue()
Ejemplo n.º 4
0
def test_queue_msg_published_consumed_on_success_called(
        sm_config, delete_queue):
    config = sm_config['rabbitmq']
    queue_pub = QueuePublisher(config, QDESC)
    msg = {'test': 'message'}
    queue_pub.publish(msg)

    output_q = Queue()
    run_queue_consumer_thread(config,
                              callback=lambda *args: output_q.put('callback'),
                              output_q=output_q)

    assert not output_q.empty()
    assert output_q.get(block=False) == 'callback'
    assert output_q.get(block=False) == 'on_success'

    assert output_q.empty()
Ejemplo n.º 5
0
    def __init__(self, manager, annot_qdesc, upd_qdesc, poll_interval=1):
        self._sm_config = SMConfig.get_conf()
        self._stopped = False
        self._annot_queue_consumer = QueueConsumer(
            config=self._sm_config['rabbitmq'],
            qdesc=annot_qdesc,
            callback=self._callback,
            on_success=self._on_success,
            on_failure=self._on_failure,
            logger=self.logger,
            poll_interval=poll_interval)
        self._upd_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=upd_qdesc,
            logger=self.logger)

        self._db = DB(self._sm_config['db'])
        self._manager = manager
Ejemplo n.º 6
0
 def __init__(self, manager, annot_qdesc, upd_qdesc, poll_interval=1):
     self._sm_config = SMConfig.get_conf()
     self._stopped = False
     self._manager = manager
     self._annot_queue_consumer = QueueConsumer(
         config=self._sm_config['rabbitmq'],
         qdesc=annot_qdesc,
         callback=self._callback,
         on_success=self._on_success,
         on_failure=self._on_failure,
         logger=self.logger,
         poll_interval=poll_interval,
     )
     self._update_queue_pub = QueuePublisher(
         config=self._sm_config['rabbitmq'],
         qdesc=upd_qdesc,
         logger=self.logger)
     self._redis_client = redis.Redis(**self._sm_config.get('redis', {}))
     Path(self._sm_config['fs']['spark_data_path']).mkdir(parents=True,
                                                          exist_ok=True)
Ejemplo n.º 7
0
def get_manager():
    db = DB()
    status_queue_pub = QueuePublisher(config=sm_config['rabbitmq'],
                                      qdesc=SM_DS_STATUS,
                                      logger=logger)
    return DatasetManager(
        db=db,
        es=ESExporter(db, sm_config),
        status_queue=status_queue_pub,
        logger=logger,
    )
Ejemplo n.º 8
0
def test_queue_msg_published_consumed_on_failure_called(
        sm_config, delete_queue):
    config = sm_config['rabbitmq']
    queue_pub = QueuePublisher(config, QDESC)
    msg = {'test': 'message'}
    queue_pub.publish(msg)

    output_q = Queue()

    def raise_exception(*args):
        output_q.put('callback')
        raise Exception('Callback exception')

    run_queue_consumer_thread(config,
                              callback=raise_exception,
                              output_q=output_q)

    assert not output_q.empty()
    assert output_q.get(block=False) == 'callback'
    assert output_q.get(block=False) == 'on_failure'
    assert output_q.empty()
Ejemplo n.º 9
0
 def __init__(self, manager, lit_qdesc, annot_qdesc, upd_qdesc):
     self._sm_config = SMConfig.get_conf()
     self._stopped = False
     self._manager = manager
     self._lithops_queue_cons = QueueConsumer(
         config=self._sm_config['rabbitmq'],
         qdesc=lit_qdesc,
         logger=self.logger,
         poll_interval=1,
         callback=self._callback,
         on_success=self._on_success,
         on_failure=self._on_failure,
     )
     self._lithops_queue_pub = QueuePublisher(
         config=self._sm_config['rabbitmq'],
         qdesc=lit_qdesc,
         logger=self.logger)
     self._annot_queue_pub = QueuePublisher(
         config=self._sm_config['rabbitmq'],
         qdesc=annot_qdesc,
         logger=self.logger)
     self._update_queue_pub = QueuePublisher(
         config=self._sm_config['rabbitmq'],
         qdesc=upd_qdesc,
         logger=self.logger)
Ejemplo n.º 10
0
def test_queue_msg_published_consumed_on_failure_called(sm_config):
    config = sm_config['rabbitmq']
    queue_pub = QueuePublisher(config, QDESC)
    msg = {'test': 'message'}
    queue_pub.publish(msg)

    output_q = Queue()

    def raise_exception(*args):
        output_q.put('callback')
        raise Exception('Callback exception')

    run_queue_consumer_thread(config,
                              callback=raise_exception,
                              output_q=output_q,
                              wait=1)

    assert output_q.get() == 'callback'
    assert output_q.get() == 'on_failure'

    time.sleep(5)
    assert queue_is_empty(config)
Ejemplo n.º 11
0
def run_daemons(db, es):
    from sm.engine.queue import QueuePublisher, SM_DS_STATUS, SM_ANNOTATE, SM_UPDATE
    from sm.engine.png_generator import ImageStoreServiceWrapper
    from sm.engine.sm_daemons import SMDaemonManager, SMAnnotateDaemon, SMUpdateDaemon

    status_queue_pub = QueuePublisher(config=sm_config()['rabbitmq'],
                                      qdesc=SM_DS_STATUS,
                                      logger=logger)
    manager = SMDaemonManager(
        db=db, es=es,
        img_store=ImageStoreServiceWrapper(sm_config()['services']['img_service_url']),
        status_queue=status_queue_pub,
        logger=logger,
        sm_config=sm_config()
    )
    annotate_daemon = SMAnnotateDaemon(manager=manager,
                                       annot_qdesc=SM_ANNOTATE,
                                       upd_qdesc=SM_UPDATE)
    annotate_daemon.start()
    annotate_daemon.stop()
    update_daemon = SMUpdateDaemon(manager=manager,
                                   update_qdesc=SM_UPDATE)
    update_daemon.start()
    update_daemon.stop()
Ejemplo n.º 12
0
class SMAnnotateDaemon(object):
    """ Reads messages from annotation queue and starts annotation jobs
    """
    logger = logging.getLogger('annotate-daemon')

    def __init__(self, manager, annot_qdesc, upd_qdesc, poll_interval=1):
        self._sm_config = SMConfig.get_conf()
        self._stopped = False
        self._annot_queue_consumer = QueueConsumer(
            config=self._sm_config['rabbitmq'],
            qdesc=annot_qdesc,
            callback=self._callback,
            on_success=self._on_success,
            on_failure=self._on_failure,
            logger=self.logger,
            poll_interval=poll_interval)
        self._upd_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=upd_qdesc,
            logger=self.logger)

        self._db = DB(self._sm_config['db'])
        self._manager = manager

    def _send_email(self, email, subj, body):
        try:
            cred_dict = dict(
                aws_access_key_id=self._sm_config['aws']['aws_access_key_id'],
                aws_secret_access_key=self._sm_config['aws']
                ['aws_secret_access_key'])
            ses = boto3.client('ses', 'eu-west-1', **cred_dict)
            resp = ses.send_email(Source='*****@*****.**',
                                  Destination={'ToAddresses': [email]},
                                  Message={
                                      'Subject': {
                                          'Data': subj
                                      },
                                      'Body': {
                                          'Text': {
                                              'Data': body
                                          }
                                      }
                                  })
        except Exception as e:
            self.logger.warning(f'Send email exception {e} for {email}')
        else:
            if resp['ResponseMetadata']['HTTPStatusCode'] == 200:
                self.logger.info(
                    f'Email with "{subj}" subject was sent to {email}')
            else:
                self.logger.warning(f'SEM failed to send email to {email}')

    def _on_success(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.FINISHED)

        self.logger.info(f" SM annotate daemon: success")

        ds_name, _ = self._manager.fetch_ds_metadata(msg['ds_id'])
        msg['web_app_link'] = self._manager.create_web_app_link(msg)
        self._manager.post_to_slack(
            'dart', ' [v] Annotation succeeded: {}'.format(json.dumps(msg)))

        if msg.get('email'):
            email_body = (
                'Dear METASPACE user,\n\n'
                'Thank you for uploading the "{}" dataset to the METASPACE annotation service. '
                'We are pleased to inform you that the dataset has been processed and is available at {}.\n\n'
                'Best regards,\n'
                'METASPACE Team').format(ds_name, msg['web_app_link'])
            self._send_email(msg['email'],
                             'METASPACE service notification (SUCCESS)',
                             email_body)

    def _on_failure(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.FAILED)

        self.logger.error(f" SM annotate daemon: failure", exc_info=True)

        ds_name, _ = self._manager.fetch_ds_metadata(msg['ds_id'])
        msg['web_app_link'] = self._manager.create_web_app_link(msg)
        self._manager.post_to_slack(
            'hankey', ' [x] Annotation failed: {}'.format(json.dumps(msg)))

        if msg.get('email'):
            email_body = (
                'Dear METASPACE user,\n\n'
                'We are sorry to inform you that there was a problem during processing of the "{}" dataset '
                'and it could not be annotated. '
                'If this is unexpected, please do not hesitate to contact us for support at [email protected]\n\n'
                'Best regards,\n'
                'METASPACE Team').format(ds_name)
            self._send_email(msg['email'],
                             'METASPACE service notification (FAILED)',
                             email_body)

    def _callback(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.ANNOTATING)

        self.logger.info(f" SM annotate daemon received a message: {msg}")
        self._manager.post_to_slack(
            'new', " [v] New annotation message: {}".format(json.dumps(msg)))

        self._manager.annotate(ds=ds,
                               search_job_factory=SearchJob,
                               del_first=msg.get('del_first', False))

        upd_msg = {
            'ds_id': msg['ds_id'],
            'ds_name': msg['ds_name'],
            'action': 'update'
        }
        self._upd_queue_pub.publish(msg=upd_msg, priority=2)

    def start(self):
        self._stopped = False
        self._annot_queue_consumer.start()

    def stop(self):
        if not self._stopped:
            self._annot_queue_consumer.stop()
            self._annot_queue_consumer.join()
            self._stopped = True
        if self._db:
            self._db.close()
Ejemplo n.º 13
0
    parser.add_argument('--config',
                        dest='config_path',
                        default='conf/config.json',
                        type=str,
                        help='SM config path')
    args = parser.parse_args()

    SMConfig.set_path(args.config_path)
    sm_config = SMConfig.get_conf()
    init_loggers(sm_config['logs'])
    logger = logging.getLogger(f'{args.name}-daemon')
    logger.info(f'Starting {args.name}-daemon')

    db = DB(sm_config['db'])
    status_queue_pub = QueuePublisher(config=sm_config['rabbitmq'],
                                      qdesc=SM_DS_STATUS,
                                      logger=logger)
    manager = SMDaemonManager(db=db,
                              es=ESExporter(db),
                              img_store=ImageStoreServiceWrapper(
                                  sm_config['services']['img_service_url']),
                              status_queue=status_queue_pub,
                              logger=logger)
    if args.name == 'annotate':
        daemon = SMAnnotateDaemon(manager=manager,
                                  annot_qdesc=SM_ANNOTATE,
                                  upd_qdesc=SM_UPDATE)
    elif args.name == 'update':
        daemon = SMUpdateDaemon(manager=manager, update_qdesc=SM_UPDATE)
    else:
        raise Exception(f'Wrong SM daemon name: {args.name}')
Ejemplo n.º 14
0
class LithopsDaemon:
    logger = logging.getLogger('lithops-daemon')

    def __init__(self, manager, lit_qdesc, annot_qdesc, upd_qdesc):
        self._sm_config = SMConfig.get_conf()
        self._stopped = False
        self._manager = manager
        self._lithops_queue_cons = QueueConsumer(
            config=self._sm_config['rabbitmq'],
            qdesc=lit_qdesc,
            logger=self.logger,
            poll_interval=1,
            callback=self._callback,
            on_success=self._on_success,
            on_failure=self._on_failure,
        )
        self._lithops_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=lit_qdesc,
            logger=self.logger)
        self._annot_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=annot_qdesc,
            logger=self.logger)
        self._update_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=upd_qdesc,
            logger=self.logger)

    def _on_success(self, msg):
        self.logger.info(' SM lithops daemon: success')
        self._manager.post_to_slack(
            'dart', f' [v] Annotation succeeded: {json.dumps(msg)}')

    # pylint: disable=unused-argument
    def _on_failure(self, msg, e):
        exc = format_exc(limit=10)
        # Requeue the message so it retries
        if msg.get('retry_attempt', 0) < 1:
            self.logger.warning(f'Lithops annotation failed, retrying.\n{exc}')
            self._lithops_queue_pub.publish({
                **msg, 'retry_attempt':
                msg.get('retry_attempt', 0) + 1
            })
            self._manager.post_to_slack(
                'bomb',
                f" [x] Annotation failed, retrying: {json.dumps(msg)}\n```{exc}```",
            )
        else:
            self.logger.critical(
                f'Lithops annotation failed. Falling back to Spark\n{exc}')
            self._annot_queue_pub.publish(msg)

            self._manager.post_to_slack(
                'bomb',
                f" [x] Annotation failed, retrying on Spark: {json.dumps(msg)}\n```{exc}```",
            )

        # Exit the process and let supervisor restart it, in case Lithops was left in
        # an unrecoverable state
        os.kill(os.getpid(), signal.SIGINT)

    def _callback(self, msg):
        try:
            self.logger.info(f' SM lithops daemon received a message: {msg}')
            self._manager.post_to_slack(
                'new', f' [v] New annotation message: {json.dumps(msg)}')

            ds = self._manager.load_ds(msg['ds_id'])
            self._manager.set_ds_status(ds, DatasetStatus.ANNOTATING)
            self._manager.notify_update(ds.id, msg['action'],
                                        DaemonActionStage.STARTED)

            self._manager.annotate_lithops(ds=ds,
                                           del_first=msg.get(
                                               'del_first', False))

            update_msg = {
                'ds_id': msg['ds_id'],
                'ds_name': msg['ds_name'],
                'email': msg.get('email', None),
                'action': DaemonAction.INDEX,
            }
            self._update_queue_pub.publish(msg=update_msg,
                                           priority=DatasetActionPriority.HIGH)

            if self._sm_config['services'].get('off_sample', False):
                analyze_msg = {
                    'ds_id': msg['ds_id'],
                    'ds_name': msg['ds_name'],
                    'action': DaemonAction.CLASSIFY_OFF_SAMPLE,
                }
                self._update_queue_pub.publish(
                    msg=analyze_msg, priority=DatasetActionPriority.LOW)

            self._manager.set_ds_status(ds, DatasetStatus.FINISHED)
            self._manager.notify_update(ds.id, msg['action'],
                                        DaemonActionStage.FINISHED)
        except LithopsStalledException:
            raise
        except Exception as e:
            raise AnnotationError(ds_id=msg['ds_id'],
                                  traceback=format_exc(chain=False)) from e

    def start(self):
        self._stopped = False
        self._lithops_queue_cons.start()

    def stop(self):
        if not self._stopped:
            self._lithops_queue_cons.stop()
            self._lithops_queue_cons.join()
            self._stopped = True

    def join(self):
        if not self._stopped:
            self._lithops_queue_cons.join()
Ejemplo n.º 15
0
def _create_queue_publisher(qdesc):
    config = SMConfig.get_conf()
    return QueuePublisher(config['rabbitmq'], qdesc, logger)
Ejemplo n.º 16
0
    def run(self, ds):
        """ Entry point of the engine. Molecule search is completed in several steps:
            * Copying input data to the engine work dir
            * Conversion input mass spec files to plain text format. One line - one spectrum data
            * Generation and saving to the database theoretical peaks for all formulas from the molecule database
            * Molecules search. The most compute intensive part. Spark is used to run it in distributed manner.
            * Saving results (isotope images and their metrics of quality for each putative molecule) to the database

        Args
        ----
            ds : sm.engine.dataset_manager.Dataset
        """
        try:
            logger.info('*' * 150)
            start = time.time()

            self._init_db()
            self._es = ESExporter(self._db)
            self._ds = ds

            if self._sm_config['rabbitmq']:
                self._status_queue = QueuePublisher(
                    config=self._sm_config['rabbitmq'],
                    qdesc=SM_DS_STATUS,
                    logger=logger)
            else:
                self._status_queue = None

            self._wd_manager = WorkDirManager(ds.id)
            self._configure_spark()

            if not self.no_clean:
                self._wd_manager.clean()

            self._ds_reader = DatasetReader(self._ds.input_path, self._sc,
                                            self._wd_manager)
            self._ds_reader.copy_convert_input_data()

            self._save_data_from_raw_ms_file()
            self._img_store.storage_type = self._ds.get_ion_img_storage_type(
                self._db)

            logger.info('Dataset config:\n%s', pformat(self._ds.config))

            completed_moldb_ids, new_moldb_ids = self._moldb_ids()
            for moldb_id in completed_moldb_ids.symmetric_difference(
                    new_moldb_ids):  # ignore ids present in both sets
                mol_db = MolecularDB(
                    id=moldb_id,
                    db=self._db,
                    iso_gen_config=self._ds.config['isotope_generation'])
                if moldb_id not in new_moldb_ids:
                    self._remove_annotation_job(mol_db)
                elif moldb_id not in completed_moldb_ids:
                    self._run_annotation_job(mol_db)

            logger.info("All done!")
            time_spent = time.time() - start
            logger.info('Time spent: %d mins %d secs',
                        *divmod(int(round(time_spent)), 60))
        finally:
            if self._sc:
                self._sc.stop()
            if self._db:
                self._db.close()
            if self._wd_manager and not self.no_clean:
                self._wd_manager.clean()
            logger.info('*' * 150)
Ejemplo n.º 17
0
class SMAnnotateDaemon:
    """Reads messages from annotation queue and starts annotation jobs"""

    logger = logging.getLogger('annotate-daemon')

    def __init__(self, manager, annot_qdesc, upd_qdesc, poll_interval=1):
        self._sm_config = SMConfig.get_conf()
        self._stopped = False
        self._manager = manager
        self._annot_queue_consumer = QueueConsumer(
            config=self._sm_config['rabbitmq'],
            qdesc=annot_qdesc,
            callback=self._callback,
            on_success=self._on_success,
            on_failure=self._on_failure,
            logger=self.logger,
            poll_interval=poll_interval,
        )
        self._update_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=upd_qdesc,
            logger=self.logger)
        self._redis_client = redis.Redis(**self._sm_config.get('redis', {}))
        Path(self._sm_config['fs']['spark_data_path']).mkdir(parents=True,
                                                             exist_ok=True)

    def _on_success(self, msg):
        self.logger.info(' SM annotate daemon: success')

        ds = self._manager.load_ds(msg['ds_id'])
        self._manager.set_ds_status(ds, DatasetStatus.FINISHED)
        self._manager.notify_update(ds.id, msg['action'],
                                    DaemonActionStage.FINISHED)

        self._manager.post_to_slack(
            'dart', ' [v] Annotation succeeded: {}'.format(json.dumps(msg)))
        self._redis_client.set('cluster-busy', 'no')

    def _on_failure(self, msg, e):
        self._manager.ds_failure_handler(msg, e)

        if 'email' in msg:
            traceback = e.__cause__.traceback if isinstance(
                e.__cause__, ImzMLError) else None
            self._manager.send_failed_email(msg, traceback)
        self._redis_client.set('cluster-busy', 'no')

    def _callback(self, msg):
        try:
            self.logger.info(f' SM annotate daemon received a message: {msg}')
            self._redis_client.set('cluster-busy', 'yes',
                                   ex=3600 * 13)  # key expires in 13h

            ds = self._manager.load_ds(msg['ds_id'])
            self._manager.set_ds_status(ds, DatasetStatus.ANNOTATING)
            self._manager.notify_update(ds.id, msg['action'],
                                        DaemonActionStage.STARTED)

            self._manager.post_to_slack(
                'new', f' [v] New annotation message: {json.dumps(msg)}')

            self._manager.annotate(ds=ds,
                                   del_first=msg.get('del_first', False))

            update_msg = {
                'ds_id': msg['ds_id'],
                'ds_name': msg['ds_name'],
                'email': msg.get('email', None),
                'action': DaemonAction.INDEX,
            }
            self._update_queue_pub.publish(msg=update_msg,
                                           priority=DatasetActionPriority.HIGH)

            if self._sm_config['services'].get('off_sample', False):
                analyze_msg = {
                    'ds_id': msg['ds_id'],
                    'ds_name': msg['ds_name'],
                    'action': DaemonAction.CLASSIFY_OFF_SAMPLE,
                }
                self._update_queue_pub.publish(
                    msg=analyze_msg, priority=DatasetActionPriority.LOW)
        except Exception as e:
            raise AnnotationError(ds_id=msg['ds_id'],
                                  traceback=format_exc(chain=False)) from e

    def start(self):
        self._stopped = False
        self._annot_queue_consumer.start()

    def stop(self):
        """Must be called from main thread"""
        if not self._stopped:
            self._annot_queue_consumer.stop()
            self._annot_queue_consumer.join()
            self._stopped = True

    def join(self):
        if not self._stopped:
            self._annot_queue_consumer.join()