def test_version_no_match(self): config_path = os.path.join(tempfile.mkdtemp(), 'config') self.create_temp_file(config_path, '{"language":"python", "version": "0.0.0"}') with self.assertRaises(RuntimeError) as exn: environment.check_sdk_compatibility(config_path) self.assertEqual( 'SDK version %s does not match container version 0.0.0. ' 'Please rebuild the container or use a matching version ' 'of the SDK.' % (version.__version__), exn.exception.message)
def test_language_no_match(self): config_path = os.path.join(tempfile.mkdtemp(), 'config') self.create_temp_file( config_path, '{"language":"java", "version": "%s"}' % version.__version__) with self.assertRaises(RuntimeError) as exn: environment.check_sdk_compatibility(config_path) self.assertEqual( 'SDK language \'python\' does not match container language \'java\'. ' 'Please rebuild the container using a matching language container.', exn.exception.message)
def test_version_no_match(self): config_path = os.path.join(tempfile.mkdtemp(), 'config') self.create_temp_file( config_path, '{"language":"python", "version": "0.0.0"}') with self.assertRaises(RuntimeError) as exn: environment.check_sdk_compatibility(config_path) self.assertEqual( 'SDK version %s does not match container version 0.0.0. ' 'Please rebuild the container or use a matching version ' 'of the SDK.' % ( version.__version__), exn.exception.message)
def run(self): """Runs the worker loop for leasing and executing work items.""" if self.running_in_gce: auth.set_running_in_gce(worker_executing_project=self.project_id) # Deferred exceptions are used as a way to report unrecoverable errors that # happen before they could be reported to the service. If it is not None, # worker will use the first work item to report deferred exceptions and # fail eventually. # TODO(silviuc): Add the deferred exception mechanism to streaming worker deferred_exception_details = None if self.environment_info_path is not None: try: environment.check_sdk_compatibility(self.environment_info_path) except Exception: # pylint: disable=broad-except deferred_exception_details = traceback.format_exc() logging.error('SDK compatibility check failed: %s', deferred_exception_details, exc_info=True) if deferred_exception_details is None: logging.info('Loading main session from the staging area...') try: self._load_main_session(self.local_staging_directory) except Exception: # pylint: disable=broad-except deferred_exception_details = traceback.format_exc() logging.error('Could not load main session: %s', deferred_exception_details, exc_info=True) # Start status HTTP server thread. thread = threading.Thread(target=self.status_server) thread.daemon = True thread.start() # The batch execution context is currently a placeholder, so we don't yet # need to have it change between work items. execution_context = maptask.BatchExecutionContext() work_item = None # Loop forever leasing work items, executing them, and reporting status. while not self._shutdown: try: # Lease a work item. The lease_work call will retry for server errors # (e.g., 500s) however it will not retry for a 404 (no item to lease). # In such cases we introduce random sleep delays with the code below. should_sleep = False try: work = self.client.lease_work(self.worker_info_for_client(), self.default_desired_lease_duration()) work_item = workitem.get_work_items(work, self.environment, execution_context) if work_item is None: should_sleep = True except HttpError as exn: # Not found errors (404) are benign. The rest are not and must be # re-raised. if exn.status_code != 404: raise should_sleep = True if should_sleep: logging.debug('No work items. Sleeping a bit ...') # The sleeping is done with a bit of jitter to avoid having workers # requesting leases in lock step. time.sleep(1.0 * (1 - 0.5 * random.random())) continue stage_name = None if work_item.map_task: stage_name = work_item.map_task.stage_name with logger.PerThreadLoggingContext( work_item_id=work_item.proto.id, stage_name=stage_name): # TODO(silviuc): Add more detailed timing and profiling support. start_time = time.time() # Do the work. The do_work() call will mark the work completed or # failed. The progress reporting_thread will take care of sending # updates and updating in the workitem object the reporting indexes # and duration for the lease. if self.work_item_profiling: with profiler.Profile( profile_id=work_item.proto.id, profile_location=self.profile_location, log_results=True): self.do_work( work_item, deferred_exception_details=deferred_exception_details) else: self.do_work(work_item, deferred_exception_details=deferred_exception_details) logging.info('Completed work item: %s in %.9f seconds', work_item.proto.id, time.time() - start_time) except Exception: # pylint: disable=broad-except # This is an exception raised outside of executing a work item most # likely while leasing a work item. We log an error and march on. logging.error('Exception in worker loop: %s', traceback.format_exc(), exc_info=True) # sleeping a bit after Exception to prevent a busy loop. time.sleep(1)
def test_basics(self): config_path = os.path.join(tempfile.mkdtemp(), 'config') self.create_temp_file( config_path, '{"language":"python", "version": "%s"}' % version.__version__) environment.check_sdk_compatibility(config_path)
def run(self): """Runs the worker loop for leasing and executing work items.""" if self.running_in_gce: auth.set_running_in_gce(worker_executing_project=self.project_id) # Deferred exceptions are used as a way to report unrecoverable errors that # happen before they could be reported to the service. If it is not None, # worker will use the first work item to report deferred exceptions and # fail eventually. # TODO(silviuc): Add the deferred exception mechanism to streaming worker deferred_exception_details = None if self.environment_info_path is not None: try: environment.check_sdk_compatibility(self.environment_info_path) except Exception: # pylint: disable=broad-except deferred_exception_details = traceback.format_exc() logging.error('SDK compatibility check failed: %s', deferred_exception_details, exc_info=True) if deferred_exception_details is None: logging.info('Loading main session from the staging area...') try: self._load_main_session(self.local_staging_directory) except Exception: # pylint: disable=broad-except deferred_exception_details = traceback.format_exc() logging.error('Could not load main session: %s', deferred_exception_details, exc_info=True) # Start status HTTP server thread. thread = threading.Thread(target=self.status_server) thread.daemon = True thread.start() # The batch execution context is currently a placeholder, so we don't yet # need to have it change between work items. execution_context = maptask.BatchExecutionContext() work_item = None # Loop forever leasing work items, executing them, and reporting status. while not self._shutdown: try: # Lease a work item. The lease_work call will retry for server errors # (e.g., 500s) however it will not retry for a 404 (no item to lease). # In such cases we introduce random sleep delays with the code below. should_sleep = False try: work = self.client.lease_work( self.worker_info_for_client(), self.default_desired_lease_duration()) work_item = workitem.get_work_items( work, self.environment, execution_context) if work_item is None: should_sleep = True except HttpError as exn: # Not found errors (404) are benign. The rest are not and must be # re-raised. if exn.status_code != 404: raise should_sleep = True if should_sleep: logging.debug('No work items. Sleeping a bit ...') # The sleeping is done with a bit of jitter to avoid having workers # requesting leases in lock step. time.sleep(1.0 * (1 - 0.5 * random.random())) continue stage_name = None if work_item.map_task: stage_name = work_item.map_task.stage_name with logger.PerThreadLoggingContext( work_item_id=work_item.proto.id, stage_name=stage_name): # TODO(silviuc): Add more detailed timing and profiling support. start_time = time.time() # Do the work. The do_work() call will mark the work completed or # failed. The progress reporting_thread will take care of sending # updates and updating in the workitem object the reporting indexes # and duration for the lease. if self.work_item_profiling: with profiler.Profile( profile_id=work_item.proto.id, profile_location=self.profile_location, log_results=True): self.do_work(work_item, deferred_exception_details= deferred_exception_details) else: self.do_work(work_item, deferred_exception_details= deferred_exception_details) logging.info('Completed work item: %s in %.9f seconds', work_item.proto.id, time.time() - start_time) except Exception: # pylint: disable=broad-except # This is an exception raised outside of executing a work item most # likely while leasing a work item. We log an error and march on. logging.error('Exception in worker loop: %s', traceback.format_exc(), exc_info=True) # sleeping a bit after Exception to prevent a busy loop. time.sleep(1)