def _execute_job(self): """Execute the owned job.""" assert self._owned_job_token job = pickle.loads(self._owned_job_token.data) name = Name.from_job_token_name(self._owned_job_token.name) self._executor = JobExecutor.from_job(name.workflow, name.instance, name.job, job, self._data_builder, self._emailer) success = self._executor.prepare() if success: self._owned_job_token.data = pickle.dumps(self._executor.job) success = self._update_owned_job_token() if success: self._start_renew_ownership() success = self._executor.execute() self._stop_renew_ownership() if success: self._move_job_token_to_waiting(self._executor.job, True) elif self._executor.job.retry(): self._keep_job_token_in_runnable(self._executor.job) else: signaller = Signaller(self._client, name.workflow, name.instance) # If ARCHIVE is not set, this is the first failed job in the # workflow. first_failure = not signaller.is_action_set(Signal.ARCHIVE) self._move_job_token_to_waiting(self._executor.job, False) self._send_job_failure_emails(first_failure) self._executor = None self._owned_job_token = None # If needed, archive the workflow. self._process_signals(name.workflow, name.instance)
def abort_running(self, client, store): running_instances = self._get_running_instances(store) for instance in running_instances: signaller = Signaller(client, workflow=self.workflow, instance=instance) signaller.set_action(Signal.ABORT) if not signaller.is_action_set(Signal.ABORT): return False return True
def test_set_attribute_if_missing(self): client = self._factory.get_client() writing_signaller = Signaller(client, workflow='some_workflow', instance='123') self.assertFalse(writing_signaller.set_attribute_if_missing( Signal.ARCHIVE, Signal.TIMESTAMP_ATTR, 12345)) writing_signaller.set_action(Signal.ARCHIVE) self.assertTrue(writing_signaller.set_attribute_if_missing( Signal.ARCHIVE, Signal.TIMESTAMP_ATTR, 12345)) self.assertEqual(12345, writing_signaller.get_attribute( Signal.ARCHIVE, Signal.TIMESTAMP_ATTR)) self.assertFalse(writing_signaller.set_attribute_if_missing( Signal.ARCHIVE, Signal.TIMESTAMP_ATTR, 123456)) reading_signaller = Signaller(client, workflow='some_workflow', instance='123') self.assertEqual(12345, reading_signaller.get_attribute( Signal.ARCHIVE, Signal.TIMESTAMP_ATTR))
def run(self): """Run the worker.""" LOG.info('Running worker ' + self._name) while True: signaller = Signaller(self._client) if signaller.is_action_set(Signal.EXIT): return if not signaller.is_action_set(Signal.DRAIN): self._own_runnable_job_token() if self._owned_job_token: self._execute_job() elif self._test_only_end_if_no_runnable: return else: time.sleep(Worker._randomized_worker_polling_time()) LOG.info('Exiting worker ' + self._name)
def _process_abort_signals(self): """Check if the running job should be aborted. Returns: False iff the job has been aborted. """ name = Name.from_job_token_name(self._owned_job_token.name) abort = False try: signaller = Signaller(self._client, name.workflow, name.instance) abort = signaller.is_action_set(Signal.ABORT) except (TTransport.TTransportException, socket.timeout, socket.error): # We need this exception handler only in logic located in the # Timer thread. If that thread fails, we should abort the process # and let the main thread decide what to do. LOG.exception('') abort = True if abort: self._abort() return not abort
def test_get_attribute(self): client = self._factory.get_client() writing_signaller = Signaller(client) writing_signaller.set_action(Signal.EXIT) self.assertEqual( PinballConfig.GENERATION, writing_signaller.get_attribute(Signal.EXIT, Signal.GENERATION_ATTR)) reading_signaller = Signaller(client) self.assertEqual( PinballConfig.GENERATION, reading_signaller.get_attribute(Signal.EXIT, Signal.GENERATION_ATTR))
def _move_job_token_to_waiting(self, job, succeeded): """Move the owned job token to the waiting group. If the job succeeded, also post events to job outputs. If the job failed or it is the final job (a job with no outputs), post an archive signal to finish the workflow. Args: job: The job that should be stored in the data field of the waiting job token. succeeded: True if the job succeeded, otherwise False. """ assert self._owned_job_token name = Name.from_job_token_name(self._owned_job_token.name) name.job_state = Name.WAITING_STATE waiting_job_token = Token(name=name.get_job_token_name(), priority=self._owned_job_token.priority, data=pickle.dumps(job)) request = ModifyRequest(deletes=[self._owned_job_token], updates=[waiting_job_token]) if succeeded: request.updates.extend(self._get_output_event_tokens(job)) if not job.outputs or not succeeded: # This is either the only job in the workflow with no outputs or a # failed job. In either case, the workflow is done. signaller = Signaller(self._client, workflow=name.workflow, instance=name.instance) if not signaller.is_action_set(Signal.ARCHIVE): signal_name = Name(workflow=name.workflow, instance=name.instance, signal=Signal.action_to_string( Signal.ARCHIVE)) signal = Signal(Signal.ARCHIVE) signal_token = Token(name=signal_name.get_signal_token_name()) signal_token.data = pickle.dumps(signal) request.updates.append(signal_token) self._send_request(request)
def _move_job_token_to_waiting(self, job, succeeded): """Move the owned job token to the waiting group. If the job succeeded, also post events to job outputs. If the job failed or it is the final job (a job with no outputs), post an archive signal to finish the workflow. Args: job: The job that should be stored in the data field of the waiting job token. succeeded: True if the job succeeded, otherwise False. """ assert self._owned_job_token name = Name.from_job_token_name(self._owned_job_token.name) name.job_state = Name.WAITING_STATE waiting_job_token = Token(name=name.get_job_token_name(), priority=self._owned_job_token.priority, data=pickle.dumps(job)) request = ModifyRequest(deletes=[self._owned_job_token], updates=[waiting_job_token]) if succeeded: request.updates.extend(self._get_output_event_tokens(job)) if not job.outputs or not succeeded: # This is either the only job in the workflow with no outputs or a # failed job. In either case, the workflow is done. signaller = Signaller(self._client, workflow=name.workflow, instance=name.instance) if not signaller.is_action_set(Signal.ARCHIVE): signal_name = Name( workflow=name.workflow, instance=name.instance, signal=Signal.action_to_string(Signal.ARCHIVE)) signal = Signal(Signal.ARCHIVE) signal_token = Token(name=signal_name.get_signal_token_name()) signal_token.data = pickle.dumps(signal) request.updates.append(signal_token) self._send_request(request)
def _process_signals(self, workflow, instance): """Process signals for a given workflow instance. Args: workflow: The workflow whose signals should be processed. instance: The instance whose signals should be processed. Returns: True if the worker should execute jobs in this instance. Otherwise False. """ signaller = Signaller(self._client, workflow, instance) archiver = Archiver(self._client, workflow, instance) if signaller.is_action_set(Signal.EXIT): return False if (signaller.is_action_set(Signal.ARCHIVE) and self._is_done(workflow, instance)): # TODO(pawel): enable this for all workflows after we gain # confidence that the master has enough memory to delay workflow # archiving. if workflow == 'indexing': ARCHIVE_DELAY_SEC = 7 * 24 * 60 * 60 # 7 days else: ARCHIVE_DELAY_SEC = 12 * 60 * 60 # 12 hours expiration_timestamp = int(time.time()) + ARCHIVE_DELAY_SEC if signaller.set_attribute_if_missing(Signal.ARCHIVE, Signal.TIMESTAMP_ATTR, expiration_timestamp): self._send_instance_end_email(workflow, instance) else: expiration_timestamp = signaller.get_attribute( Signal.ARCHIVE, Signal.TIMESTAMP_ATTR) archiver.archive_if_expired(expiration_timestamp) return False if signaller.is_action_set(Signal.ABORT): if archiver.archive_if_aborted(): self._send_instance_end_email(workflow, instance) return False if signaller.is_action_set(Signal.DRAIN): return False return True
def test_get_attribute(self): client = self._factory.get_client() writing_signaller = Signaller(client) writing_signaller.set_action(Signal.EXIT) self.assertEqual(PinballConfig.GENERATION, writing_signaller.get_attribute( Signal.EXIT, Signal.GENERATION_ATTR)) reading_signaller = Signaller(client) self.assertEqual(PinballConfig.GENERATION, reading_signaller.get_attribute( Signal.EXIT, Signal.GENERATION_ATTR))
def test_remove_sction(self): client = self._factory.get_client() writing_signaller = Signaller(client) writing_signaller.set_action(Signal.EXIT) writing_signaller.remove_action(Signal.EXIT) self.assertFalse(writing_signaller.is_action_set(Signal.EXIT)) reading_signaller = Signaller(client) self.assertFalse(reading_signaller.is_action_set(Signal.EXIT)) writing_signaller = Signaller(client, workflow='some_workflow') writing_signaller.set_action(Signal.DRAIN) reading_signaller = Signaller(client, workflow='some_workflow') self.assertTrue(reading_signaller.is_action_set(Signal.DRAIN)) writing_signaller.remove_action(Signal.DRAIN) self.assertFalse(writing_signaller.is_action_set(Signal.DRAIN)) reading_signaller = Signaller(client) self.assertFalse(reading_signaller.is_action_set(Signal.DRAIN)) writing_signaller = Signaller(client, workflow='some_workflow', instance='123') writing_signaller.set_action(Signal.ABORT) reading_signaller = Signaller(client, workflow='some_workflow', instance='123') self.assertTrue(reading_signaller.is_action_set(Signal.ABORT)) writing_signaller.remove_action(Signal.ABORT) self.assertFalse(writing_signaller.is_action_set(Signal.ABORT)) reading_signaller = Signaller(client) self.assertFalse(reading_signaller.is_action_set(Signal.ABORT))
def test_set_attribute_if_missing(self): client = self._factory.get_client() writing_signaller = Signaller(client, workflow='some_workflow', instance='123') self.assertFalse( writing_signaller.set_attribute_if_missing(Signal.ARCHIVE, Signal.TIMESTAMP_ATTR, 12345)) writing_signaller.set_action(Signal.ARCHIVE) self.assertTrue( writing_signaller.set_attribute_if_missing(Signal.ARCHIVE, Signal.TIMESTAMP_ATTR, 12345)) self.assertEqual( 12345, writing_signaller.get_attribute(Signal.ARCHIVE, Signal.TIMESTAMP_ATTR)) self.assertFalse( writing_signaller.set_attribute_if_missing(Signal.ARCHIVE, Signal.TIMESTAMP_ATTR, 123456)) reading_signaller = Signaller(client, workflow='some_workflow', instance='123') self.assertEqual( 12345, reading_signaller.get_attribute(Signal.ARCHIVE, Signal.TIMESTAMP_ATTR))
def test_is_action_set(self): client = self._factory.get_client() signaller = Signaller(client) self.assertFalse(signaller.is_action_set(Signal.EXIT)) self.assertFalse(signaller.is_action_set(Signal.DRAIN)) self.assertFalse(signaller.is_action_set(Signal.ABORT)) self._post_signal_tokens() signaller = Signaller(client) self.assertTrue(signaller.is_action_set(Signal.EXIT)) self.assertFalse(signaller.is_action_set(Signal.DRAIN)) self.assertFalse(signaller.is_action_set(Signal.ABORT)) signaller = Signaller(client, workflow='some_workflow') self.assertTrue(signaller.is_action_set(Signal.EXIT)) self.assertTrue(signaller.is_action_set(Signal.DRAIN)) self.assertFalse(signaller.is_action_set(Signal.ABORT)) signaller = Signaller(client, workflow='some_workflow', instance='123') self.assertTrue(signaller.is_action_set(Signal.EXIT)) self.assertTrue(signaller.is_action_set(Signal.DRAIN)) self.assertTrue(signaller.is_action_set(Signal.ABORT))
def test_set_action(self): client = self._factory.get_client() writing_signaller = Signaller(client) writing_signaller.set_action(Signal.EXIT) reading_signaller = Signaller(client) # New generation. self.assertFalse(reading_signaller.is_action_set(Signal.EXIT)) # Old generation. with mock.patch('pinball.workflow.signaller.PinballConfig.GENERATION', 0): self.assertTrue(reading_signaller.is_action_set(Signal.EXIT)) self.assertFalse(reading_signaller.is_action_set(Signal.DRAIN)) self.assertFalse(reading_signaller.is_action_set(Signal.ABORT)) writing_signaller = Signaller(client, workflow='some_workflow') writing_signaller.set_action(Signal.DRAIN) reading_signaller = Signaller(client, workflow='some_workflow') # Old generation. with mock.patch('pinball.workflow.signaller.PinballConfig.GENERATION', 0): self.assertTrue(reading_signaller.is_action_set(Signal.EXIT)) self.assertTrue(reading_signaller.is_action_set(Signal.DRAIN)) self.assertFalse(reading_signaller.is_action_set(Signal.ABORT)) writing_signaller = Signaller(client, workflow='some_workflow', instance='123') writing_signaller.set_action(Signal.ABORT) reading_signaller = Signaller(client, workflow='some_workflow', instance='123') # Old generation. with mock.patch('pinball.workflow.signaller.PinballConfig.GENERATION', 0): self.assertTrue(reading_signaller.is_action_set(Signal.EXIT)) self.assertTrue(reading_signaller.is_action_set(Signal.DRAIN)) self.assertTrue(reading_signaller.is_action_set(Signal.ABORT))