Ejemplo n.º 1
0
 def _execute_job(self):
     """Execute the owned job."""
     assert self._owned_job_token
     job = pickle.loads(self._owned_job_token.data)
     name = Name.from_job_token_name(self._owned_job_token.name)
     self._executor = JobExecutor.from_job(name.workflow, name.instance,
                                           name.job, job,
                                           self._data_builder,
                                           self._emailer)
     success = self._executor.prepare()
     if success:
         self._owned_job_token.data = pickle.dumps(self._executor.job)
         success = self._update_owned_job_token()
         if success:
             self._start_renew_ownership()
             success = self._executor.execute()
             self._stop_renew_ownership()
     if success:
         self._move_job_token_to_waiting(self._executor.job, True)
     elif self._executor.job.retry():
         self._keep_job_token_in_runnable(self._executor.job)
     else:
         signaller = Signaller(self._client, name.workflow, name.instance)
         # If ARCHIVE is not set, this is the first failed job in the
         # workflow.
         first_failure = not signaller.is_action_set(Signal.ARCHIVE)
         self._move_job_token_to_waiting(self._executor.job, False)
         self._send_job_failure_emails(first_failure)
     self._executor = None
     self._owned_job_token = None
     # If needed, archive the workflow.
     self._process_signals(name.workflow, name.instance)
Ejemplo n.º 2
0
 def _execute_job(self):
     """Execute the owned job."""
     assert self._owned_job_token
     job = pickle.loads(self._owned_job_token.data)
     name = Name.from_job_token_name(self._owned_job_token.name)
     self._executor = JobExecutor.from_job(name.workflow,
                                           name.instance,
                                           name.job,
                                           job,
                                           self._data_builder,
                                           self._emailer)
     success = self._executor.prepare()
     if success:
         self._owned_job_token.data = pickle.dumps(self._executor.job)
         success = self._update_owned_job_token()
         if success:
             self._start_renew_ownership()
             success = self._executor.execute()
             self._stop_renew_ownership()
     if success:
         self._move_job_token_to_waiting(self._executor.job, True)
     elif self._executor.job.retry():
         self._keep_job_token_in_runnable(self._executor.job)
     else:
         signaller = Signaller(self._client, name.workflow, name.instance)
         # If ARCHIVE is not set, this is the first failed job in the
         # workflow.
         first_failure = not signaller.is_action_set(Signal.ARCHIVE)
         self._move_job_token_to_waiting(self._executor.job, False)
         self._send_job_failure_emails(first_failure)
     self._executor = None
     self._owned_job_token = None
     # If needed, archive the workflow.
     self._process_signals(name.workflow, name.instance)
Ejemplo n.º 3
0
 def abort_running(self, client, store):
     running_instances = self._get_running_instances(store)
     for instance in running_instances:
         signaller = Signaller(client,
                               workflow=self.workflow,
                               instance=instance)
         signaller.set_action(Signal.ABORT)
         if not signaller.is_action_set(Signal.ABORT):
             return False
     return True
Ejemplo n.º 4
0
 def abort_running(self, client, store):
     running_instances = self._get_running_instances(store)
     for instance in running_instances:
         signaller = Signaller(client,
                               workflow=self.workflow,
                               instance=instance)
         signaller.set_action(Signal.ABORT)
         if not signaller.is_action_set(Signal.ABORT):
             return False
     return True
Ejemplo n.º 5
0
    def test_set_attribute_if_missing(self):
        client = self._factory.get_client()

        writing_signaller = Signaller(client, workflow='some_workflow',
                                      instance='123')

        self.assertFalse(writing_signaller.set_attribute_if_missing(
                         Signal.ARCHIVE, Signal.TIMESTAMP_ATTR, 12345))

        writing_signaller.set_action(Signal.ARCHIVE)
        self.assertTrue(writing_signaller.set_attribute_if_missing(
                        Signal.ARCHIVE, Signal.TIMESTAMP_ATTR, 12345))
        self.assertEqual(12345,
                         writing_signaller.get_attribute(
                             Signal.ARCHIVE,
                             Signal.TIMESTAMP_ATTR))

        self.assertFalse(writing_signaller.set_attribute_if_missing(
                         Signal.ARCHIVE, Signal.TIMESTAMP_ATTR, 123456))

        reading_signaller = Signaller(client, workflow='some_workflow',
                                      instance='123')
        self.assertEqual(12345,
                         reading_signaller.get_attribute(
                             Signal.ARCHIVE,
                             Signal.TIMESTAMP_ATTR))
Ejemplo n.º 6
0
 def run(self):
     """Run the worker."""
     LOG.info('Running worker ' + self._name)
     while True:
         signaller = Signaller(self._client)
         if signaller.is_action_set(Signal.EXIT):
             return
         if not signaller.is_action_set(Signal.DRAIN):
             self._own_runnable_job_token()
         if self._owned_job_token:
             self._execute_job()
         elif self._test_only_end_if_no_runnable:
             return
         else:
             time.sleep(Worker._randomized_worker_polling_time())
     LOG.info('Exiting worker ' + self._name)
Ejemplo n.º 7
0
 def run(self):
     """Run the worker."""
     LOG.info('Running worker ' + self._name)
     while True:
         signaller = Signaller(self._client)
         if signaller.is_action_set(Signal.EXIT):
             return
         if not signaller.is_action_set(Signal.DRAIN):
             self._own_runnable_job_token()
         if self._owned_job_token:
             self._execute_job()
         elif self._test_only_end_if_no_runnable:
             return
         else:
             time.sleep(Worker._randomized_worker_polling_time())
     LOG.info('Exiting worker ' + self._name)
Ejemplo n.º 8
0
    def _process_abort_signals(self):
        """Check if the running job should be aborted.

        Returns:
            False iff the job has been aborted.
        """
        name = Name.from_job_token_name(self._owned_job_token.name)
        abort = False
        try:
            signaller = Signaller(self._client, name.workflow, name.instance)
            abort = signaller.is_action_set(Signal.ABORT)
        except (TTransport.TTransportException, socket.timeout, socket.error):
            # We need this exception handler only in logic located in the
            # Timer thread.  If that thread fails, we should abort the process
            # and let the main thread decide what to do.
            LOG.exception('')
            abort = True
        if abort:
            self._abort()
        return not abort
Ejemplo n.º 9
0
    def _process_abort_signals(self):
        """Check if the running job should be aborted.

        Returns:
            False iff the job has been aborted.
        """
        name = Name.from_job_token_name(self._owned_job_token.name)
        abort = False
        try:
            signaller = Signaller(self._client, name.workflow, name.instance)
            abort = signaller.is_action_set(Signal.ABORT)
        except (TTransport.TTransportException, socket.timeout, socket.error):
            # We need this exception handler only in logic located in the
            # Timer thread.  If that thread fails, we should abort the process
            # and let the main thread decide what to do.
            LOG.exception('')
            abort = True
        if abort:
            self._abort()
        return not abort
Ejemplo n.º 10
0
    def test_get_attribute(self):
        client = self._factory.get_client()

        writing_signaller = Signaller(client)
        writing_signaller.set_action(Signal.EXIT)
        self.assertEqual(
            PinballConfig.GENERATION,
            writing_signaller.get_attribute(Signal.EXIT,
                                            Signal.GENERATION_ATTR))
        reading_signaller = Signaller(client)
        self.assertEqual(
            PinballConfig.GENERATION,
            reading_signaller.get_attribute(Signal.EXIT,
                                            Signal.GENERATION_ATTR))
Ejemplo n.º 11
0
    def _move_job_token_to_waiting(self, job, succeeded):
        """Move the owned job token to the waiting group.

        If the job succeeded, also post events to job outputs.  If the job
        failed or it is the final job (a job with no outputs),  post an archive
        signal to finish the workflow.

        Args:
            job: The job that should be stored in the data field of the waiting
                job token.
            succeeded: True if the job succeeded, otherwise False.
        """
        assert self._owned_job_token
        name = Name.from_job_token_name(self._owned_job_token.name)
        name.job_state = Name.WAITING_STATE
        waiting_job_token = Token(name=name.get_job_token_name(),
                                  priority=self._owned_job_token.priority,
                                  data=pickle.dumps(job))
        request = ModifyRequest(deletes=[self._owned_job_token],
                                updates=[waiting_job_token])
        if succeeded:
            request.updates.extend(self._get_output_event_tokens(job))
        if not job.outputs or not succeeded:
            # This is either the only job in the workflow with no outputs or a
            # failed job.  In either case, the workflow is done.
            signaller = Signaller(self._client,
                                  workflow=name.workflow,
                                  instance=name.instance)
            if not signaller.is_action_set(Signal.ARCHIVE):
                signal_name = Name(workflow=name.workflow,
                                   instance=name.instance,
                                   signal=Signal.action_to_string(
                                       Signal.ARCHIVE))
                signal = Signal(Signal.ARCHIVE)
                signal_token = Token(name=signal_name.get_signal_token_name())
                signal_token.data = pickle.dumps(signal)
                request.updates.append(signal_token)
        self._send_request(request)
Ejemplo n.º 12
0
    def _move_job_token_to_waiting(self, job, succeeded):
        """Move the owned job token to the waiting group.

        If the job succeeded, also post events to job outputs.  If the job
        failed or it is the final job (a job with no outputs),  post an archive
        signal to finish the workflow.

        Args:
            job: The job that should be stored in the data field of the waiting
                job token.
            succeeded: True if the job succeeded, otherwise False.
        """
        assert self._owned_job_token
        name = Name.from_job_token_name(self._owned_job_token.name)
        name.job_state = Name.WAITING_STATE
        waiting_job_token = Token(name=name.get_job_token_name(),
                                  priority=self._owned_job_token.priority,
                                  data=pickle.dumps(job))
        request = ModifyRequest(deletes=[self._owned_job_token],
                                updates=[waiting_job_token])
        if succeeded:
            request.updates.extend(self._get_output_event_tokens(job))
        if not job.outputs or not succeeded:
            # This is either the only job in the workflow with no outputs or a
            # failed job.  In either case, the workflow is done.
            signaller = Signaller(self._client,
                                  workflow=name.workflow,
                                  instance=name.instance)
            if not signaller.is_action_set(Signal.ARCHIVE):
                signal_name = Name(
                    workflow=name.workflow,
                    instance=name.instance,
                    signal=Signal.action_to_string(Signal.ARCHIVE))
                signal = Signal(Signal.ARCHIVE)
                signal_token = Token(name=signal_name.get_signal_token_name())
                signal_token.data = pickle.dumps(signal)
                request.updates.append(signal_token)
        self._send_request(request)
Ejemplo n.º 13
0
    def _process_signals(self, workflow, instance):
        """Process signals for a given workflow instance.

        Args:
            workflow: The workflow whose signals should be processed.
            instance: The instance whose signals should be processed.
        Returns:
            True if the worker should execute jobs in this instance.  Otherwise
            False.
        """
        signaller = Signaller(self._client, workflow, instance)
        archiver = Archiver(self._client, workflow, instance)
        if signaller.is_action_set(Signal.EXIT):
            return False
        if (signaller.is_action_set(Signal.ARCHIVE)
                and self._is_done(workflow, instance)):
            # TODO(pawel): enable this for all workflows after we gain
            # confidence that the master has enough memory to delay workflow
            # archiving.
            if workflow == 'indexing':
                ARCHIVE_DELAY_SEC = 7 * 24 * 60 * 60  # 7 days
            else:
                ARCHIVE_DELAY_SEC = 12 * 60 * 60  # 12 hours
            expiration_timestamp = int(time.time()) + ARCHIVE_DELAY_SEC
            if signaller.set_attribute_if_missing(Signal.ARCHIVE,
                                                  Signal.TIMESTAMP_ATTR,
                                                  expiration_timestamp):
                self._send_instance_end_email(workflow, instance)
            else:
                expiration_timestamp = signaller.get_attribute(
                    Signal.ARCHIVE, Signal.TIMESTAMP_ATTR)
                archiver.archive_if_expired(expiration_timestamp)
            return False
        if signaller.is_action_set(Signal.ABORT):
            if archiver.archive_if_aborted():
                self._send_instance_end_email(workflow, instance)
            return False
        if signaller.is_action_set(Signal.DRAIN):
            return False
        return True
Ejemplo n.º 14
0
    def test_get_attribute(self):
        client = self._factory.get_client()

        writing_signaller = Signaller(client)
        writing_signaller.set_action(Signal.EXIT)
        self.assertEqual(PinballConfig.GENERATION,
                         writing_signaller.get_attribute(
                             Signal.EXIT,
                             Signal.GENERATION_ATTR))
        reading_signaller = Signaller(client)
        self.assertEqual(PinballConfig.GENERATION,
                         reading_signaller.get_attribute(
                             Signal.EXIT,
                             Signal.GENERATION_ATTR))
Ejemplo n.º 15
0
    def _process_signals(self, workflow, instance):
        """Process signals for a given workflow instance.

        Args:
            workflow: The workflow whose signals should be processed.
            instance: The instance whose signals should be processed.
        Returns:
            True if the worker should execute jobs in this instance.  Otherwise
            False.
        """
        signaller = Signaller(self._client, workflow, instance)
        archiver = Archiver(self._client, workflow, instance)
        if signaller.is_action_set(Signal.EXIT):
            return False
        if (signaller.is_action_set(Signal.ARCHIVE) and
                self._is_done(workflow, instance)):
            # TODO(pawel): enable this for all workflows after we gain
            # confidence that the master has enough memory to delay workflow
            # archiving.
            if workflow == 'indexing':
                ARCHIVE_DELAY_SEC = 7 * 24 * 60 * 60  # 7 days
            else:
                ARCHIVE_DELAY_SEC = 12 * 60 * 60  # 12 hours
            expiration_timestamp = int(time.time()) + ARCHIVE_DELAY_SEC
            if signaller.set_attribute_if_missing(Signal.ARCHIVE,
                                                  Signal.TIMESTAMP_ATTR,
                                                  expiration_timestamp):
                self._send_instance_end_email(workflow, instance)
            else:
                expiration_timestamp = signaller.get_attribute(
                    Signal.ARCHIVE, Signal.TIMESTAMP_ATTR)
                archiver.archive_if_expired(expiration_timestamp)
            return False
        if signaller.is_action_set(Signal.ABORT):
            if archiver.archive_if_aborted():
                self._send_instance_end_email(workflow, instance)
            return False
        if signaller.is_action_set(Signal.DRAIN):
            return False
        return True
Ejemplo n.º 16
0
    def test_remove_sction(self):
        client = self._factory.get_client()

        writing_signaller = Signaller(client)
        writing_signaller.set_action(Signal.EXIT)
        writing_signaller.remove_action(Signal.EXIT)
        self.assertFalse(writing_signaller.is_action_set(Signal.EXIT))
        reading_signaller = Signaller(client)
        self.assertFalse(reading_signaller.is_action_set(Signal.EXIT))

        writing_signaller = Signaller(client, workflow='some_workflow')
        writing_signaller.set_action(Signal.DRAIN)
        reading_signaller = Signaller(client, workflow='some_workflow')
        self.assertTrue(reading_signaller.is_action_set(Signal.DRAIN))
        writing_signaller.remove_action(Signal.DRAIN)
        self.assertFalse(writing_signaller.is_action_set(Signal.DRAIN))
        reading_signaller = Signaller(client)
        self.assertFalse(reading_signaller.is_action_set(Signal.DRAIN))

        writing_signaller = Signaller(client,
                                      workflow='some_workflow',
                                      instance='123')
        writing_signaller.set_action(Signal.ABORT)
        reading_signaller = Signaller(client,
                                      workflow='some_workflow',
                                      instance='123')
        self.assertTrue(reading_signaller.is_action_set(Signal.ABORT))
        writing_signaller.remove_action(Signal.ABORT)
        self.assertFalse(writing_signaller.is_action_set(Signal.ABORT))
        reading_signaller = Signaller(client)
        self.assertFalse(reading_signaller.is_action_set(Signal.ABORT))
Ejemplo n.º 17
0
    def test_set_attribute_if_missing(self):
        client = self._factory.get_client()

        writing_signaller = Signaller(client,
                                      workflow='some_workflow',
                                      instance='123')

        self.assertFalse(
            writing_signaller.set_attribute_if_missing(Signal.ARCHIVE,
                                                       Signal.TIMESTAMP_ATTR,
                                                       12345))

        writing_signaller.set_action(Signal.ARCHIVE)
        self.assertTrue(
            writing_signaller.set_attribute_if_missing(Signal.ARCHIVE,
                                                       Signal.TIMESTAMP_ATTR,
                                                       12345))
        self.assertEqual(
            12345,
            writing_signaller.get_attribute(Signal.ARCHIVE,
                                            Signal.TIMESTAMP_ATTR))

        self.assertFalse(
            writing_signaller.set_attribute_if_missing(Signal.ARCHIVE,
                                                       Signal.TIMESTAMP_ATTR,
                                                       123456))

        reading_signaller = Signaller(client,
                                      workflow='some_workflow',
                                      instance='123')
        self.assertEqual(
            12345,
            reading_signaller.get_attribute(Signal.ARCHIVE,
                                            Signal.TIMESTAMP_ATTR))
Ejemplo n.º 18
0
    def test_is_action_set(self):
        client = self._factory.get_client()
        signaller = Signaller(client)
        self.assertFalse(signaller.is_action_set(Signal.EXIT))
        self.assertFalse(signaller.is_action_set(Signal.DRAIN))
        self.assertFalse(signaller.is_action_set(Signal.ABORT))

        self._post_signal_tokens()

        signaller = Signaller(client)
        self.assertTrue(signaller.is_action_set(Signal.EXIT))
        self.assertFalse(signaller.is_action_set(Signal.DRAIN))
        self.assertFalse(signaller.is_action_set(Signal.ABORT))

        signaller = Signaller(client, workflow='some_workflow')
        self.assertTrue(signaller.is_action_set(Signal.EXIT))
        self.assertTrue(signaller.is_action_set(Signal.DRAIN))
        self.assertFalse(signaller.is_action_set(Signal.ABORT))

        signaller = Signaller(client, workflow='some_workflow', instance='123')
        self.assertTrue(signaller.is_action_set(Signal.EXIT))
        self.assertTrue(signaller.is_action_set(Signal.DRAIN))
        self.assertTrue(signaller.is_action_set(Signal.ABORT))
Ejemplo n.º 19
0
    def test_set_action(self):
        client = self._factory.get_client()

        writing_signaller = Signaller(client)
        writing_signaller.set_action(Signal.EXIT)
        reading_signaller = Signaller(client)
        # New generation.
        self.assertFalse(reading_signaller.is_action_set(Signal.EXIT))
        # Old generation.
        with mock.patch('pinball.workflow.signaller.PinballConfig.GENERATION',
                        0):
            self.assertTrue(reading_signaller.is_action_set(Signal.EXIT))
        self.assertFalse(reading_signaller.is_action_set(Signal.DRAIN))
        self.assertFalse(reading_signaller.is_action_set(Signal.ABORT))

        writing_signaller = Signaller(client, workflow='some_workflow')
        writing_signaller.set_action(Signal.DRAIN)
        reading_signaller = Signaller(client, workflow='some_workflow')
        # Old generation.
        with mock.patch('pinball.workflow.signaller.PinballConfig.GENERATION',
                        0):
            self.assertTrue(reading_signaller.is_action_set(Signal.EXIT))
        self.assertTrue(reading_signaller.is_action_set(Signal.DRAIN))
        self.assertFalse(reading_signaller.is_action_set(Signal.ABORT))

        writing_signaller = Signaller(client,
                                      workflow='some_workflow',
                                      instance='123')
        writing_signaller.set_action(Signal.ABORT)
        reading_signaller = Signaller(client,
                                      workflow='some_workflow',
                                      instance='123')
        # Old generation.
        with mock.patch('pinball.workflow.signaller.PinballConfig.GENERATION',
                        0):
            self.assertTrue(reading_signaller.is_action_set(Signal.EXIT))
        self.assertTrue(reading_signaller.is_action_set(Signal.DRAIN))
        self.assertTrue(reading_signaller.is_action_set(Signal.ABORT))
Ejemplo n.º 20
0
    def test_remove_sction(self):
        client = self._factory.get_client()

        writing_signaller = Signaller(client)
        writing_signaller.set_action(Signal.EXIT)
        writing_signaller.remove_action(Signal.EXIT)
        self.assertFalse(writing_signaller.is_action_set(Signal.EXIT))
        reading_signaller = Signaller(client)
        self.assertFalse(reading_signaller.is_action_set(Signal.EXIT))

        writing_signaller = Signaller(client, workflow='some_workflow')
        writing_signaller.set_action(Signal.DRAIN)
        reading_signaller = Signaller(client, workflow='some_workflow')
        self.assertTrue(reading_signaller.is_action_set(Signal.DRAIN))
        writing_signaller.remove_action(Signal.DRAIN)
        self.assertFalse(writing_signaller.is_action_set(Signal.DRAIN))
        reading_signaller = Signaller(client)
        self.assertFalse(reading_signaller.is_action_set(Signal.DRAIN))

        writing_signaller = Signaller(client, workflow='some_workflow',
                                      instance='123')
        writing_signaller.set_action(Signal.ABORT)
        reading_signaller = Signaller(client, workflow='some_workflow',
                                      instance='123')
        self.assertTrue(reading_signaller.is_action_set(Signal.ABORT))
        writing_signaller.remove_action(Signal.ABORT)
        self.assertFalse(writing_signaller.is_action_set(Signal.ABORT))
        reading_signaller = Signaller(client)
        self.assertFalse(reading_signaller.is_action_set(Signal.ABORT))
Ejemplo n.º 21
0
    def test_set_action(self):
        client = self._factory.get_client()

        writing_signaller = Signaller(client)
        writing_signaller.set_action(Signal.EXIT)
        reading_signaller = Signaller(client)
        # New generation.
        self.assertFalse(reading_signaller.is_action_set(Signal.EXIT))
        # Old generation.
        with mock.patch('pinball.workflow.signaller.PinballConfig.GENERATION', 0):
            self.assertTrue(reading_signaller.is_action_set(Signal.EXIT))
        self.assertFalse(reading_signaller.is_action_set(Signal.DRAIN))
        self.assertFalse(reading_signaller.is_action_set(Signal.ABORT))

        writing_signaller = Signaller(client, workflow='some_workflow')
        writing_signaller.set_action(Signal.DRAIN)
        reading_signaller = Signaller(client, workflow='some_workflow')
        # Old generation.
        with mock.patch('pinball.workflow.signaller.PinballConfig.GENERATION', 0):
            self.assertTrue(reading_signaller.is_action_set(Signal.EXIT))
        self.assertTrue(reading_signaller.is_action_set(Signal.DRAIN))
        self.assertFalse(reading_signaller.is_action_set(Signal.ABORT))

        writing_signaller = Signaller(client, workflow='some_workflow',
                                      instance='123')
        writing_signaller.set_action(Signal.ABORT)
        reading_signaller = Signaller(client, workflow='some_workflow',
                                      instance='123')
        # Old generation.
        with mock.patch('pinball.workflow.signaller.PinballConfig.GENERATION', 0):
            self.assertTrue(reading_signaller.is_action_set(Signal.EXIT))
        self.assertTrue(reading_signaller.is_action_set(Signal.DRAIN))
        self.assertTrue(reading_signaller.is_action_set(Signal.ABORT))
Ejemplo n.º 22
0
    def test_is_action_set(self):
        client = self._factory.get_client()
        signaller = Signaller(client)
        self.assertFalse(signaller.is_action_set(Signal.EXIT))
        self.assertFalse(signaller.is_action_set(Signal.DRAIN))
        self.assertFalse(signaller.is_action_set(Signal.ABORT))

        self._post_signal_tokens()

        signaller = Signaller(client)
        self.assertTrue(signaller.is_action_set(Signal.EXIT))
        self.assertFalse(signaller.is_action_set(Signal.DRAIN))
        self.assertFalse(signaller.is_action_set(Signal.ABORT))

        signaller = Signaller(client, workflow='some_workflow')
        self.assertTrue(signaller.is_action_set(Signal.EXIT))
        self.assertTrue(signaller.is_action_set(Signal.DRAIN))
        self.assertFalse(signaller.is_action_set(Signal.ABORT))

        signaller = Signaller(client, workflow='some_workflow', instance='123')
        self.assertTrue(signaller.is_action_set(Signal.EXIT))
        self.assertTrue(signaller.is_action_set(Signal.DRAIN))
        self.assertTrue(signaller.is_action_set(Signal.ABORT))