def test_version_no_match(self):
     config_path = os.path.join(tempfile.mkdtemp(), 'config')
     self.create_temp_file(config_path,
                           '{"language":"python", "version": "0.0.0"}')
     with self.assertRaises(RuntimeError) as exn:
         environment.check_sdk_compatibility(config_path)
     self.assertEqual(
         'SDK version %s does not match container version 0.0.0. '
         'Please rebuild the container or use a matching version '
         'of the SDK.' % (version.__version__), exn.exception.message)
 def test_language_no_match(self):
     config_path = os.path.join(tempfile.mkdtemp(), 'config')
     self.create_temp_file(
         config_path,
         '{"language":"java", "version": "%s"}' % version.__version__)
     with self.assertRaises(RuntimeError) as exn:
         environment.check_sdk_compatibility(config_path)
     self.assertEqual(
         'SDK language \'python\' does not match container language \'java\'. '
         'Please rebuild the container using a matching language container.',
         exn.exception.message)
 def test_language_no_match(self):
   config_path = os.path.join(tempfile.mkdtemp(), 'config')
   self.create_temp_file(
       config_path,
       '{"language":"java", "version": "%s"}' % version.__version__)
   with self.assertRaises(RuntimeError) as exn:
     environment.check_sdk_compatibility(config_path)
   self.assertEqual(
       'SDK language \'python\' does not match container language \'java\'. '
       'Please rebuild the container using a matching language container.',
       exn.exception.message)
 def test_version_no_match(self):
   config_path = os.path.join(tempfile.mkdtemp(), 'config')
   self.create_temp_file(
       config_path, '{"language":"python", "version": "0.0.0"}')
   with self.assertRaises(RuntimeError) as exn:
     environment.check_sdk_compatibility(config_path)
   self.assertEqual(
       'SDK version %s does not match container version 0.0.0. '
       'Please rebuild the container or use a matching version '
       'of the SDK.' % (
           version.__version__),
       exn.exception.message)
  def run(self):
    """Runs the worker loop for leasing and executing work items."""
    if self.running_in_gce:
      auth.set_running_in_gce(worker_executing_project=self.project_id)

    # Deferred exceptions are used as a way to report unrecoverable errors that
    # happen before they could be reported to the service. If it is not None,
    # worker will use the first work item to report deferred exceptions and
    # fail eventually.
    # TODO(silviuc): Add the deferred exception mechanism to streaming worker
    deferred_exception_details = None

    if self.environment_info_path is not None:
      try:
        environment.check_sdk_compatibility(self.environment_info_path)
      except Exception:  # pylint: disable=broad-except
        deferred_exception_details = traceback.format_exc()
        logging.error('SDK compatibility check failed: %s',
                      deferred_exception_details, exc_info=True)

    if deferred_exception_details is None:
      logging.info('Loading main session from the staging area...')
      try:
        self._load_main_session(self.local_staging_directory)
      except Exception:  # pylint: disable=broad-except
        deferred_exception_details = traceback.format_exc()
        logging.error('Could not load main session: %s',
                      deferred_exception_details, exc_info=True)

    # Start status HTTP server thread.
    thread = threading.Thread(target=self.status_server)
    thread.daemon = True
    thread.start()

    # The batch execution context is currently a placeholder, so we don't yet
    # need to have it change between work items.
    execution_context = maptask.BatchExecutionContext()
    work_item = None
    # Loop forever leasing work items, executing them, and reporting status.
    while not self._shutdown:
      try:
        # Lease a work item. The lease_work call will retry for server errors
        # (e.g., 500s) however it will not retry for a 404 (no item to lease).
        # In such cases we introduce random sleep delays with the code below.
        should_sleep = False
        try:
          work = self.client.lease_work(self.worker_info_for_client(),
                                        self.default_desired_lease_duration())
          work_item = workitem.get_work_items(work, self.environment,
                                              execution_context)
          if work_item is None:
            should_sleep = True
        except HttpError as exn:
          # Not found errors (404) are benign. The rest are not and must be
          # re-raised.
          if exn.status_code != 404:
            raise
          should_sleep = True
        if should_sleep:
          logging.debug('No work items. Sleeping a bit ...')
          # The sleeping is done with a bit of jitter to avoid having workers
          # requesting leases in lock step.
          time.sleep(1.0 * (1 - 0.5 * random.random()))
          continue

        stage_name = None
        if work_item.map_task:
          stage_name = work_item.map_task.stage_name

        with logger.PerThreadLoggingContext(
            work_item_id=work_item.proto.id,
            stage_name=stage_name):
          # TODO(silviuc): Add more detailed timing and profiling support.
          start_time = time.time()

          # Do the work. The do_work() call will mark the work completed or
          # failed.  The progress reporting_thread will take care of sending
          # updates and updating in the workitem object the reporting indexes
          # and duration for the lease.
          if self.work_item_profiling:
            with profiler.Profile(
                profile_id=work_item.proto.id,
                profile_location=self.profile_location, log_results=True):
              self.do_work(
                  work_item,
                  deferred_exception_details=deferred_exception_details)
          else:
            self.do_work(work_item,
                         deferred_exception_details=deferred_exception_details)

          logging.info('Completed work item: %s in %.9f seconds',
                       work_item.proto.id, time.time() - start_time)

      except Exception:  # pylint: disable=broad-except
        # This is an exception raised outside of executing a work item most
        # likely while leasing a work item. We log an error and march on.
        logging.error('Exception in worker loop: %s',
                      traceback.format_exc(),
                      exc_info=True)
        # sleeping a bit after Exception to prevent a busy loop.
        time.sleep(1)
 def test_basics(self):
     config_path = os.path.join(tempfile.mkdtemp(), 'config')
     self.create_temp_file(
         config_path,
         '{"language":"python", "version": "%s"}' % version.__version__)
     environment.check_sdk_compatibility(config_path)
 def test_basics(self):
   config_path = os.path.join(tempfile.mkdtemp(), 'config')
   self.create_temp_file(
       config_path,
       '{"language":"python", "version": "%s"}' % version.__version__)
   environment.check_sdk_compatibility(config_path)
Exemple #8
0
    def run(self):
        """Runs the worker loop for leasing and executing work items."""
        if self.running_in_gce:
            auth.set_running_in_gce(worker_executing_project=self.project_id)

        # Deferred exceptions are used as a way to report unrecoverable errors that
        # happen before they could be reported to the service. If it is not None,
        # worker will use the first work item to report deferred exceptions and
        # fail eventually.
        # TODO(silviuc): Add the deferred exception mechanism to streaming worker
        deferred_exception_details = None

        if self.environment_info_path is not None:
            try:
                environment.check_sdk_compatibility(self.environment_info_path)
            except Exception:  # pylint: disable=broad-except
                deferred_exception_details = traceback.format_exc()
                logging.error('SDK compatibility check failed: %s',
                              deferred_exception_details,
                              exc_info=True)

        if deferred_exception_details is None:
            logging.info('Loading main session from the staging area...')
            try:
                self._load_main_session(self.local_staging_directory)
            except Exception:  # pylint: disable=broad-except
                deferred_exception_details = traceback.format_exc()
                logging.error('Could not load main session: %s',
                              deferred_exception_details,
                              exc_info=True)

        # Start status HTTP server thread.
        thread = threading.Thread(target=self.status_server)
        thread.daemon = True
        thread.start()

        # The batch execution context is currently a placeholder, so we don't yet
        # need to have it change between work items.
        execution_context = maptask.BatchExecutionContext()
        work_item = None
        # Loop forever leasing work items, executing them, and reporting status.
        while not self._shutdown:
            try:
                # Lease a work item. The lease_work call will retry for server errors
                # (e.g., 500s) however it will not retry for a 404 (no item to lease).
                # In such cases we introduce random sleep delays with the code below.
                should_sleep = False
                try:
                    work = self.client.lease_work(
                        self.worker_info_for_client(),
                        self.default_desired_lease_duration())
                    work_item = workitem.get_work_items(
                        work, self.environment, execution_context)
                    if work_item is None:
                        should_sleep = True
                except HttpError as exn:
                    # Not found errors (404) are benign. The rest are not and must be
                    # re-raised.
                    if exn.status_code != 404:
                        raise
                    should_sleep = True
                if should_sleep:
                    logging.debug('No work items. Sleeping a bit ...')
                    # The sleeping is done with a bit of jitter to avoid having workers
                    # requesting leases in lock step.
                    time.sleep(1.0 * (1 - 0.5 * random.random()))
                    continue

                stage_name = None
                if work_item.map_task:
                    stage_name = work_item.map_task.stage_name

                with logger.PerThreadLoggingContext(
                        work_item_id=work_item.proto.id,
                        stage_name=stage_name):
                    # TODO(silviuc): Add more detailed timing and profiling support.
                    start_time = time.time()

                    # Do the work. The do_work() call will mark the work completed or
                    # failed.  The progress reporting_thread will take care of sending
                    # updates and updating in the workitem object the reporting indexes
                    # and duration for the lease.
                    if self.work_item_profiling:
                        with profiler.Profile(
                                profile_id=work_item.proto.id,
                                profile_location=self.profile_location,
                                log_results=True):
                            self.do_work(work_item,
                                         deferred_exception_details=
                                         deferred_exception_details)
                    else:
                        self.do_work(work_item,
                                     deferred_exception_details=
                                     deferred_exception_details)

                    logging.info('Completed work item: %s in %.9f seconds',
                                 work_item.proto.id,
                                 time.time() - start_time)

            except Exception:  # pylint: disable=broad-except
                # This is an exception raised outside of executing a work item most
                # likely while leasing a work item. We log an error and march on.
                logging.error('Exception in worker loop: %s',
                              traceback.format_exc(),
                              exc_info=True)
                # sleeping a bit after Exception to prevent a busy loop.
                time.sleep(1)