def test_set_when_already_defined(self): self.assertFalse('xyz' in logger.per_thread_worker_data.get_data()) with logger.PerThreadLoggingContext(xyz='value'): self.assertEqual(logger.per_thread_worker_data.get_data()['xyz'], 'value') with logger.PerThreadLoggingContext(xyz='value2'): self.assertEqual( logger.per_thread_worker_data.get_data()['xyz'], 'value2') self.assertEqual(logger.per_thread_worker_data.get_data()['xyz'], 'value') self.assertFalse('xyz' in logger.per_thread_worker_data.get_data())
def test_per_thread_attribute(self): self.assertFalse('xyz' in logger.per_thread_worker_data.get_data()) with logger.PerThreadLoggingContext(xyz='value'): self.assertEqual(logger.per_thread_worker_data.get_data()['xyz'], 'value') thread = threading.Thread(target=self.thread_check_attribute, args=('xyz', )) thread.start() thread.join() self.assertEqual(logger.per_thread_worker_data.get_data()['xyz'], 'value') self.assertFalse('xyz' in logger.per_thread_worker_data.get_data())
def test_nested_with_per_thread_info(self): formatter = logger.JsonLogFormatter(job_id='jobid', worker_id='workerid') with logger.PerThreadLoggingContext(work_item_id='workitem', stage_name='stage', step_name='step1'): record = self.create_log_record(**self.SAMPLE_RECORD) log_output1 = json.loads(formatter.format(record)) with logger.PerThreadLoggingContext(step_name='step2'): record = self.create_log_record(**self.SAMPLE_RECORD) log_output2 = json.loads(formatter.format(record)) record = self.create_log_record(**self.SAMPLE_RECORD) log_output3 = json.loads(formatter.format(record)) record = self.create_log_record(**self.SAMPLE_RECORD) log_output4 = json.loads(formatter.format(record)) self.assertEqual( log_output1, dict(self.SAMPLE_OUTPUT, work='workitem', stage='stage', step='step1')) self.assertEqual( log_output2, dict(self.SAMPLE_OUTPUT, work='workitem', stage='stage', step='step2')) self.assertEqual( log_output3, dict(self.SAMPLE_OUTPUT, work='workitem', stage='stage', step='step1')) self.assertEqual(log_output4, self.SAMPLE_OUTPUT)
def test_record_with_per_thread_info(self): with logger.PerThreadLoggingContext(work_item_id='workitem', stage_name='stage', step_name='step'): formatter = logger.JsonLogFormatter(job_id='jobid', worker_id='workerid') record = self.create_log_record(**self.SAMPLE_RECORD) log_output = json.loads(formatter.format(record)) expected_output = dict(self.SAMPLE_OUTPUT) expected_output.update({ 'work': 'workitem', 'stage': 'stage', 'step': 'step' }) self.assertEqual(log_output, expected_output)
def test_no_positional_args(self): with self.assertRaises(TypeError): with logger.PerThreadLoggingContext('something'): pass
def thread_check_attribute(self, name): self.assertFalse(name in logger.per_thread_worker_data.get_data()) with logger.PerThreadLoggingContext(**{name: 'thread-value'}): self.assertEqual(logger.per_thread_worker_data.get_data()[name], 'thread-value') self.assertFalse(name in logger.per_thread_worker_data.get_data())
def run(self): """Runs the worker loop for leasing and executing work items.""" if self.running_in_gce: auth.set_running_in_gce(worker_executing_project=self.project_id) # Deferred exceptions are used as a way to report unrecoverable errors that # happen before they could be reported to the service. If it is not None, # worker will use the first work item to report deferred exceptions and # fail eventually. # TODO(silviuc): Add the deferred exception mechanism to streaming worker deferred_exception_details = None if self.environment_info_path is not None: try: environment.check_sdk_compatibility(self.environment_info_path) except Exception: # pylint: disable=broad-except deferred_exception_details = traceback.format_exc() logging.error('SDK compatibility check failed: %s', deferred_exception_details, exc_info=True) if deferred_exception_details is None: logging.info('Loading main session from the staging area...') try: self._load_main_session(self.local_staging_directory) except Exception: # pylint: disable=broad-except deferred_exception_details = traceback.format_exc() logging.error('Could not load main session: %s', deferred_exception_details, exc_info=True) # Start status HTTP server thread. thread = threading.Thread(target=self.status_server) thread.daemon = True thread.start() # The batch execution context is currently a placeholder, so we don't yet # need to have it change between work items. execution_context = maptask.BatchExecutionContext() work_item = None # Loop forever leasing work items, executing them, and reporting status. while not self._shutdown: try: # Lease a work item. The lease_work call will retry for server errors # (e.g., 500s) however it will not retry for a 404 (no item to lease). # In such cases we introduce random sleep delays with the code below. should_sleep = False try: work = self.client.lease_work( self.worker_info_for_client(), self.default_desired_lease_duration()) work_item = workitem.get_work_items( work, self.environment, execution_context) if work_item is None: should_sleep = True except HttpError as exn: # Not found errors (404) are benign. The rest are not and must be # re-raised. if exn.status_code != 404: raise should_sleep = True if should_sleep: logging.debug('No work items. Sleeping a bit ...') # The sleeping is done with a bit of jitter to avoid having workers # requesting leases in lock step. time.sleep(1.0 * (1 - 0.5 * random.random())) continue stage_name = None if work_item.map_task: stage_name = work_item.map_task.stage_name with logger.PerThreadLoggingContext( work_item_id=work_item.proto.id, stage_name=stage_name): # TODO(silviuc): Add more detailed timing and profiling support. start_time = time.time() # Do the work. The do_work() call will mark the work completed or # failed. The progress reporting_thread will take care of sending # updates and updating in the workitem object the reporting indexes # and duration for the lease. if self.work_item_profiling: with profiler.Profile( profile_id=work_item.proto.id, profile_location=self.profile_location, log_results=True): self.do_work(work_item, deferred_exception_details= deferred_exception_details) else: self.do_work(work_item, deferred_exception_details= deferred_exception_details) logging.info('Completed work item: %s in %.9f seconds', work_item.proto.id, time.time() - start_time) except Exception: # pylint: disable=broad-except # This is an exception raised outside of executing a work item most # likely while leasing a work item. We log an error and march on. logging.error('Exception in worker loop: %s', traceback.format_exc(), exc_info=True) # sleeping a bit after Exception to prevent a busy loop. time.sleep(1)