Ejemplo n.º 1
0
def test_queue_msg_published_consumed_on_success_called(
        sm_config, delete_queue):
    config = sm_config['rabbitmq']
    queue_pub = QueuePublisher(config, QDESC)
    msg = {'test': 'message'}
    queue_pub.publish(msg)

    output_q = Queue()
    run_queue_consumer_thread(config,
                              callback=lambda *args: output_q.put('callback'),
                              output_q=output_q)

    assert not output_q.empty()
    assert output_q.get(block=False) == 'callback'
    assert output_q.get(block=False) == 'on_success'

    assert output_q.empty()
Ejemplo n.º 2
0
def test_queue_msg_published_consumed_on_failure_called(
        sm_config, delete_queue):
    config = sm_config['rabbitmq']
    queue_pub = QueuePublisher(config, QDESC)
    msg = {'test': 'message'}
    queue_pub.publish(msg)

    output_q = Queue()

    def raise_exception(*args):
        output_q.put('callback')
        raise Exception('Callback exception')

    run_queue_consumer_thread(config,
                              callback=raise_exception,
                              output_q=output_q)

    assert not output_q.empty()
    assert output_q.get(block=False) == 'callback'
    assert output_q.get(block=False) == 'on_failure'
    assert output_q.empty()
Ejemplo n.º 3
0
def test_queue_msg_published_consumed_on_failure_called(sm_config):
    config = sm_config['rabbitmq']
    queue_pub = QueuePublisher(config, QDESC)
    msg = {'test': 'message'}
    queue_pub.publish(msg)

    output_q = Queue()

    def raise_exception(*args):
        output_q.put('callback')
        raise Exception('Callback exception')

    run_queue_consumer_thread(config,
                              callback=raise_exception,
                              output_q=output_q,
                              wait=1)

    assert output_q.get() == 'callback'
    assert output_q.get() == 'on_failure'

    time.sleep(5)
    assert queue_is_empty(config)
Ejemplo n.º 4
0
class SMAnnotateDaemon(object):
    """ Reads messages from annotation queue and starts annotation jobs
    """
    logger = logging.getLogger('annotate-daemon')

    def __init__(self, manager, annot_qdesc, upd_qdesc, poll_interval=1):
        self._sm_config = SMConfig.get_conf()
        self._stopped = False
        self._annot_queue_consumer = QueueConsumer(
            config=self._sm_config['rabbitmq'],
            qdesc=annot_qdesc,
            callback=self._callback,
            on_success=self._on_success,
            on_failure=self._on_failure,
            logger=self.logger,
            poll_interval=poll_interval)
        self._upd_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=upd_qdesc,
            logger=self.logger)

        self._db = DB(self._sm_config['db'])
        self._manager = manager

    def _send_email(self, email, subj, body):
        try:
            cred_dict = dict(
                aws_access_key_id=self._sm_config['aws']['aws_access_key_id'],
                aws_secret_access_key=self._sm_config['aws']
                ['aws_secret_access_key'])
            ses = boto3.client('ses', 'eu-west-1', **cred_dict)
            resp = ses.send_email(Source='*****@*****.**',
                                  Destination={'ToAddresses': [email]},
                                  Message={
                                      'Subject': {
                                          'Data': subj
                                      },
                                      'Body': {
                                          'Text': {
                                              'Data': body
                                          }
                                      }
                                  })
        except Exception as e:
            self.logger.warning(f'Send email exception {e} for {email}')
        else:
            if resp['ResponseMetadata']['HTTPStatusCode'] == 200:
                self.logger.info(
                    f'Email with "{subj}" subject was sent to {email}')
            else:
                self.logger.warning(f'SEM failed to send email to {email}')

    def _on_success(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.FINISHED)

        self.logger.info(f" SM annotate daemon: success")

        ds_name, _ = self._manager.fetch_ds_metadata(msg['ds_id'])
        msg['web_app_link'] = self._manager.create_web_app_link(msg)
        self._manager.post_to_slack(
            'dart', ' [v] Annotation succeeded: {}'.format(json.dumps(msg)))

        if msg.get('email'):
            email_body = (
                'Dear METASPACE user,\n\n'
                'Thank you for uploading the "{}" dataset to the METASPACE annotation service. '
                'We are pleased to inform you that the dataset has been processed and is available at {}.\n\n'
                'Best regards,\n'
                'METASPACE Team').format(ds_name, msg['web_app_link'])
            self._send_email(msg['email'],
                             'METASPACE service notification (SUCCESS)',
                             email_body)

    def _on_failure(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.FAILED)

        self.logger.error(f" SM annotate daemon: failure", exc_info=True)

        ds_name, _ = self._manager.fetch_ds_metadata(msg['ds_id'])
        msg['web_app_link'] = self._manager.create_web_app_link(msg)
        self._manager.post_to_slack(
            'hankey', ' [x] Annotation failed: {}'.format(json.dumps(msg)))

        if msg.get('email'):
            email_body = (
                'Dear METASPACE user,\n\n'
                'We are sorry to inform you that there was a problem during processing of the "{}" dataset '
                'and it could not be annotated. '
                'If this is unexpected, please do not hesitate to contact us for support at [email protected]\n\n'
                'Best regards,\n'
                'METASPACE Team').format(ds_name)
            self._send_email(msg['email'],
                             'METASPACE service notification (FAILED)',
                             email_body)

    def _callback(self, msg):
        ds = Dataset.load(self._db, msg['ds_id'])
        ds.set_status(self._db, self._manager.es, self._manager.status_queue,
                      DatasetStatus.ANNOTATING)

        self.logger.info(f" SM annotate daemon received a message: {msg}")
        self._manager.post_to_slack(
            'new', " [v] New annotation message: {}".format(json.dumps(msg)))

        self._manager.annotate(ds=ds,
                               search_job_factory=SearchJob,
                               del_first=msg.get('del_first', False))

        upd_msg = {
            'ds_id': msg['ds_id'],
            'ds_name': msg['ds_name'],
            'action': 'update'
        }
        self._upd_queue_pub.publish(msg=upd_msg, priority=2)

    def start(self):
        self._stopped = False
        self._annot_queue_consumer.start()

    def stop(self):
        if not self._stopped:
            self._annot_queue_consumer.stop()
            self._annot_queue_consumer.join()
            self._stopped = True
        if self._db:
            self._db.close()
Ejemplo n.º 5
0
class SMAnnotateDaemon:
    """Reads messages from annotation queue and starts annotation jobs"""

    logger = logging.getLogger('annotate-daemon')

    def __init__(self, manager, annot_qdesc, upd_qdesc, poll_interval=1):
        self._sm_config = SMConfig.get_conf()
        self._stopped = False
        self._manager = manager
        self._annot_queue_consumer = QueueConsumer(
            config=self._sm_config['rabbitmq'],
            qdesc=annot_qdesc,
            callback=self._callback,
            on_success=self._on_success,
            on_failure=self._on_failure,
            logger=self.logger,
            poll_interval=poll_interval,
        )
        self._update_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=upd_qdesc,
            logger=self.logger)
        self._redis_client = redis.Redis(**self._sm_config.get('redis', {}))
        Path(self._sm_config['fs']['spark_data_path']).mkdir(parents=True,
                                                             exist_ok=True)

    def _on_success(self, msg):
        self.logger.info(' SM annotate daemon: success')

        ds = self._manager.load_ds(msg['ds_id'])
        self._manager.set_ds_status(ds, DatasetStatus.FINISHED)
        self._manager.notify_update(ds.id, msg['action'],
                                    DaemonActionStage.FINISHED)

        self._manager.post_to_slack(
            'dart', ' [v] Annotation succeeded: {}'.format(json.dumps(msg)))
        self._redis_client.set('cluster-busy', 'no')

    def _on_failure(self, msg, e):
        self._manager.ds_failure_handler(msg, e)

        if 'email' in msg:
            traceback = e.__cause__.traceback if isinstance(
                e.__cause__, ImzMLError) else None
            self._manager.send_failed_email(msg, traceback)
        self._redis_client.set('cluster-busy', 'no')

    def _callback(self, msg):
        try:
            self.logger.info(f' SM annotate daemon received a message: {msg}')
            self._redis_client.set('cluster-busy', 'yes',
                                   ex=3600 * 13)  # key expires in 13h

            ds = self._manager.load_ds(msg['ds_id'])
            self._manager.set_ds_status(ds, DatasetStatus.ANNOTATING)
            self._manager.notify_update(ds.id, msg['action'],
                                        DaemonActionStage.STARTED)

            self._manager.post_to_slack(
                'new', f' [v] New annotation message: {json.dumps(msg)}')

            self._manager.annotate(ds=ds,
                                   del_first=msg.get('del_first', False))

            update_msg = {
                'ds_id': msg['ds_id'],
                'ds_name': msg['ds_name'],
                'email': msg.get('email', None),
                'action': DaemonAction.INDEX,
            }
            self._update_queue_pub.publish(msg=update_msg,
                                           priority=DatasetActionPriority.HIGH)

            if self._sm_config['services'].get('off_sample', False):
                analyze_msg = {
                    'ds_id': msg['ds_id'],
                    'ds_name': msg['ds_name'],
                    'action': DaemonAction.CLASSIFY_OFF_SAMPLE,
                }
                self._update_queue_pub.publish(
                    msg=analyze_msg, priority=DatasetActionPriority.LOW)
        except Exception as e:
            raise AnnotationError(ds_id=msg['ds_id'],
                                  traceback=format_exc(chain=False)) from e

    def start(self):
        self._stopped = False
        self._annot_queue_consumer.start()

    def stop(self):
        """Must be called from main thread"""
        if not self._stopped:
            self._annot_queue_consumer.stop()
            self._annot_queue_consumer.join()
            self._stopped = True

    def join(self):
        if not self._stopped:
            self._annot_queue_consumer.join()
Ejemplo n.º 6
0
class LithopsDaemon:
    logger = logging.getLogger('lithops-daemon')

    def __init__(self, manager, lit_qdesc, annot_qdesc, upd_qdesc):
        self._sm_config = SMConfig.get_conf()
        self._stopped = False
        self._manager = manager
        self._lithops_queue_cons = QueueConsumer(
            config=self._sm_config['rabbitmq'],
            qdesc=lit_qdesc,
            logger=self.logger,
            poll_interval=1,
            callback=self._callback,
            on_success=self._on_success,
            on_failure=self._on_failure,
        )
        self._lithops_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=lit_qdesc,
            logger=self.logger)
        self._annot_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=annot_qdesc,
            logger=self.logger)
        self._update_queue_pub = QueuePublisher(
            config=self._sm_config['rabbitmq'],
            qdesc=upd_qdesc,
            logger=self.logger)

    def _on_success(self, msg):
        self.logger.info(' SM lithops daemon: success')
        self._manager.post_to_slack(
            'dart', f' [v] Annotation succeeded: {json.dumps(msg)}')

    # pylint: disable=unused-argument
    def _on_failure(self, msg, e):
        exc = format_exc(limit=10)
        # Requeue the message so it retries
        if msg.get('retry_attempt', 0) < 1:
            self.logger.warning(f'Lithops annotation failed, retrying.\n{exc}')
            self._lithops_queue_pub.publish({
                **msg, 'retry_attempt':
                msg.get('retry_attempt', 0) + 1
            })
            self._manager.post_to_slack(
                'bomb',
                f" [x] Annotation failed, retrying: {json.dumps(msg)}\n```{exc}```",
            )
        else:
            self.logger.critical(
                f'Lithops annotation failed. Falling back to Spark\n{exc}')
            self._annot_queue_pub.publish(msg)

            self._manager.post_to_slack(
                'bomb',
                f" [x] Annotation failed, retrying on Spark: {json.dumps(msg)}\n```{exc}```",
            )

        # Exit the process and let supervisor restart it, in case Lithops was left in
        # an unrecoverable state
        os.kill(os.getpid(), signal.SIGINT)

    def _callback(self, msg):
        try:
            self.logger.info(f' SM lithops daemon received a message: {msg}')
            self._manager.post_to_slack(
                'new', f' [v] New annotation message: {json.dumps(msg)}')

            ds = self._manager.load_ds(msg['ds_id'])
            self._manager.set_ds_status(ds, DatasetStatus.ANNOTATING)
            self._manager.notify_update(ds.id, msg['action'],
                                        DaemonActionStage.STARTED)

            self._manager.annotate_lithops(ds=ds,
                                           del_first=msg.get(
                                               'del_first', False))

            update_msg = {
                'ds_id': msg['ds_id'],
                'ds_name': msg['ds_name'],
                'email': msg.get('email', None),
                'action': DaemonAction.INDEX,
            }
            self._update_queue_pub.publish(msg=update_msg,
                                           priority=DatasetActionPriority.HIGH)

            if self._sm_config['services'].get('off_sample', False):
                analyze_msg = {
                    'ds_id': msg['ds_id'],
                    'ds_name': msg['ds_name'],
                    'action': DaemonAction.CLASSIFY_OFF_SAMPLE,
                }
                self._update_queue_pub.publish(
                    msg=analyze_msg, priority=DatasetActionPriority.LOW)

            self._manager.set_ds_status(ds, DatasetStatus.FINISHED)
            self._manager.notify_update(ds.id, msg['action'],
                                        DaemonActionStage.FINISHED)
        except LithopsStalledException:
            raise
        except Exception as e:
            raise AnnotationError(ds_id=msg['ds_id'],
                                  traceback=format_exc(chain=False)) from e

    def start(self):
        self._stopped = False
        self._lithops_queue_cons.start()

    def stop(self):
        if not self._stopped:
            self._lithops_queue_cons.stop()
            self._lithops_queue_cons.join()
            self._stopped = True

    def join(self):
        if not self._stopped:
            self._lithops_queue_cons.join()