def main(unused_argv):
  """Main entry point for SDK Fn Harness."""
  logging_service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor()
  text_format.Merge(os.environ['LOGGING_API_SERVICE_DESCRIPTOR'],
                    logging_service_descriptor)

  # Send all logs to the runner.
  fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
  # TODO(vikasrk): This should be picked up from pipeline options.
  logging.getLogger().setLevel(logging.INFO)
  logging.getLogger().addHandler(fn_log_handler)

  try:
    logging.info('Python sdk harness started.')
    service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor()
    text_format.Merge(os.environ['CONTROL_API_SERVICE_DESCRIPTOR'],
                      service_descriptor)
    # TODO(robertwb): Support credentials.
    assert not service_descriptor.oauth2_client_credentials_grant.url
    channel = grpc.insecure_channel(service_descriptor.url)
    SdkHarness(channel).run()
    logging.info('Python sdk harness exiting.')
  except:  # pylint: disable=broad-except
    logging.exception('Python sdk harness failed: ')
    raise
  finally:
    fn_log_handler.close()
Esempio n. 2
0
def main(unused_argv):
  """Main entry point for SDK Fn Harness."""
  if 'LOGGING_API_SERVICE_DESCRIPTOR' in os.environ:
    logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor()
    text_format.Merge(os.environ['LOGGING_API_SERVICE_DESCRIPTOR'],
                      logging_service_descriptor)

    # Send all logs to the runner.
    fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
    # TODO(BEAM-5468): This should be picked up from pipeline options.
    logging.getLogger().setLevel(logging.INFO)
    logging.getLogger().addHandler(fn_log_handler)
    logging.info('Logging handler created.')
  else:
    fn_log_handler = None

  # Start status HTTP server thread.
  thread = threading.Thread(target=StatusServer().start)
  thread.daemon = True
  thread.setName('status-server-demon')
  thread.start()

  if 'PIPELINE_OPTIONS' in os.environ:
    sdk_pipeline_options = _parse_pipeline_options(
        os.environ['PIPELINE_OPTIONS'])
  else:
    sdk_pipeline_options = PipelineOptions.from_dictionary({})

  if 'SEMI_PERSISTENT_DIRECTORY' in os.environ:
    semi_persistent_directory = os.environ['SEMI_PERSISTENT_DIRECTORY']
  else:
    semi_persistent_directory = None

  logging.info('semi_persistent_directory: %s', semi_persistent_directory)

  try:
    _load_main_session(semi_persistent_directory)
  except Exception:  # pylint: disable=broad-except
    exception_details = traceback.format_exc()
    logging.error(
        'Could not load main session: %s', exception_details, exc_info=True)

  try:
    logging.info('Python sdk harness started with pipeline_options: %s',
                 sdk_pipeline_options.get_all_options(drop_default=True))
    service_descriptor = endpoints_pb2.ApiServiceDescriptor()
    text_format.Merge(os.environ['CONTROL_API_SERVICE_DESCRIPTOR'],
                      service_descriptor)
    # TODO(robertwb): Support credentials.
    assert not service_descriptor.oauth2_client_credentials_grant.url
    SdkHarness(
        control_address=service_descriptor.url,
        worker_count=_get_worker_count(sdk_pipeline_options)).run()
    logging.info('Python sdk harness exiting.')
  except:  # pylint: disable=broad-except
    logging.exception('Python sdk harness failed: ')
    raise
  finally:
    if fn_log_handler:
      fn_log_handler.close()
Esempio n. 3
0
def main(unused_argv):
    """Main entry point for SDK Fn Harness."""
    logging_service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor()
    text_format.Merge(os.environ['LOGGING_API_SERVICE_DESCRIPTOR'],
                      logging_service_descriptor)

    # Send all logs to the runner.
    fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
    # TODO(vikasrk): This should be picked up from pipeline options.
    logging.getLogger().setLevel(logging.INFO)
    logging.getLogger().addHandler(fn_log_handler)

    try:
        logging.info('Python sdk harness started.')
        service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor()
        text_format.Merge(os.environ['CONTROL_API_SERVICE_DESCRIPTOR'],
                          service_descriptor)
        # TODO(robertwb): Support credentials.
        assert not service_descriptor.oauth2_client_credentials_grant.url
        channel = grpc.insecure_channel(service_descriptor.url)
        SdkHarness(channel).run()
        logging.info('Python sdk harness exiting.')
    except:  # pylint: disable=broad-except
        logging.exception('Python sdk harness failed: ')
        raise
    finally:
        fn_log_handler.close()
Esempio n. 4
0
def main(unused_argv):
    """Main entry point for SDK Fn Harness."""
    if 'LOGGING_API_SERVICE_DESCRIPTOR' in os.environ:
        logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor()
        text_format.Merge(os.environ['LOGGING_API_SERVICE_DESCRIPTOR'],
                          logging_service_descriptor)

        # Send all logs to the runner.
        fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
        # TODO(vikasrk): This should be picked up from pipeline options.
        logging.getLogger().setLevel(logging.INFO)
        logging.getLogger().addHandler(fn_log_handler)
        logging.info('Logging handler created.')
    else:
        fn_log_handler = None

    # Start status HTTP server thread.
    thread = threading.Thread(target=StatusServer().start)
    thread.daemon = True
    thread.setName('status-server-demon')
    thread.start()

    if 'PIPELINE_OPTIONS' in os.environ:
        sdk_pipeline_options = _parse_pipeline_options(
            os.environ['PIPELINE_OPTIONS'])
    else:
        sdk_pipeline_options = PipelineOptions.from_dictionary({})

    if 'SEMI_PERSISTENT_DIRECTORY' in os.environ:
        semi_persistent_directory = os.environ['SEMI_PERSISTENT_DIRECTORY']
    else:
        semi_persistent_directory = None

    logging.info('semi_persistent_directory: %s', semi_persistent_directory)

    try:
        _load_main_session(semi_persistent_directory)
    except Exception:  # pylint: disable=broad-except
        exception_details = traceback.format_exc()
        logging.error('Could not load main session: %s',
                      exception_details,
                      exc_info=True)

    try:
        logging.info('Python sdk harness started with pipeline_options: %s',
                     sdk_pipeline_options.get_all_options(drop_default=True))
        service_descriptor = endpoints_pb2.ApiServiceDescriptor()
        text_format.Merge(os.environ['CONTROL_API_SERVICE_DESCRIPTOR'],
                          service_descriptor)
        # TODO(robertwb): Support credentials.
        assert not service_descriptor.oauth2_client_credentials_grant.url
        SdkHarness(control_address=service_descriptor.url,
                   worker_count=_get_worker_count(sdk_pipeline_options)).run()
        logging.info('Python sdk harness exiting.')
    except:  # pylint: disable=broad-except
        logging.exception('Python sdk harness failed: ')
        raise
    finally:
        if fn_log_handler:
            fn_log_handler.close()
Esempio n. 5
0
def create_harness(environment, dry_run=False):
    """Creates SDK Fn Harness."""
    if 'LOGGING_API_SERVICE_DESCRIPTOR' in environment:
        try:
            logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor()
            text_format.Merge(environment['LOGGING_API_SERVICE_DESCRIPTOR'],
                              logging_service_descriptor)

            # Send all logs to the runner.
            fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
            # TODO(BEAM-5468): This should be picked up from pipeline options.
            logging.getLogger().setLevel(logging.INFO)
            logging.getLogger().addHandler(fn_log_handler)
            _LOGGER.info('Logging handler created.')
        except Exception:
            _LOGGER.error(
                "Failed to set up logging handler, continuing without.",
                exc_info=True)
            fn_log_handler = None
    else:
        fn_log_handler = None

    pipeline_options_dict = _load_pipeline_options(
        environment.get('PIPELINE_OPTIONS'))
    # These are used for dataflow templates.
    RuntimeValueProvider.set_runtime_options(pipeline_options_dict)
    sdk_pipeline_options = PipelineOptions.from_dictionary(
        pipeline_options_dict)
    filesystems.FileSystems.set_options(sdk_pipeline_options)

    if 'SEMI_PERSISTENT_DIRECTORY' in environment:
        semi_persistent_directory = environment['SEMI_PERSISTENT_DIRECTORY']
    else:
        semi_persistent_directory = None

    _LOGGER.info('semi_persistent_directory: %s', semi_persistent_directory)
    _worker_id = environment.get('WORKER_ID', None)

    try:
        _load_main_session(semi_persistent_directory)
    except CorruptMainSessionException:
        exception_details = traceback.format_exc()
        _LOGGER.error('Could not load main session: %s',
                      exception_details,
                      exc_info=True)
        raise
    except Exception:  # pylint: disable=broad-except
        exception_details = traceback.format_exc()
        _LOGGER.error('Could not load main session: %s',
                      exception_details,
                      exc_info=True)

    _LOGGER.info('Pipeline_options: %s',
                 sdk_pipeline_options.get_all_options(drop_default=True))
    control_service_descriptor = endpoints_pb2.ApiServiceDescriptor()
    status_service_descriptor = endpoints_pb2.ApiServiceDescriptor()
    text_format.Merge(environment['CONTROL_API_SERVICE_DESCRIPTOR'],
                      control_service_descriptor)
    if 'STATUS_API_SERVICE_DESCRIPTOR' in environment:
        text_format.Merge(environment['STATUS_API_SERVICE_DESCRIPTOR'],
                          status_service_descriptor)
    # TODO(robertwb): Support authentication.
    assert not control_service_descriptor.HasField('authentication')

    experiments = sdk_pipeline_options.view_as(DebugOptions).experiments or []
    enable_heap_dump = 'enable_heap_dump' in experiments
    if dry_run:
        return
    sdk_harness = SdkHarness(
        control_address=control_service_descriptor.url,
        status_address=status_service_descriptor.url,
        worker_id=_worker_id,
        state_cache_size=_get_state_cache_size(experiments),
        data_buffer_time_limit_ms=_get_data_buffer_time_limit_ms(experiments),
        profiler_factory=profiler.Profile.factory_from_options(
            sdk_pipeline_options.view_as(ProfilingOptions)),
        enable_heap_dump=enable_heap_dump)
    return fn_log_handler, sdk_harness
Esempio n. 6
0
def main(unused_argv):
  """Main entry point for SDK Fn Harness."""
  if 'LOGGING_API_SERVICE_DESCRIPTOR' in os.environ:
    try:
      logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor()
      text_format.Merge(
          os.environ['LOGGING_API_SERVICE_DESCRIPTOR'],
          logging_service_descriptor)

      # Send all logs to the runner.
      fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
      # TODO(BEAM-5468): This should be picked up from pipeline options.
      logging.getLogger().setLevel(logging.INFO)
      logging.getLogger().addHandler(fn_log_handler)
      _LOGGER.info('Logging handler created.')
    except Exception:
      _LOGGER.error(
          "Failed to set up logging handler, continuing without.",
          exc_info=True)
      fn_log_handler = None
  else:
    fn_log_handler = None

  # Start status HTTP server thread.
  thread = threading.Thread(
      name='status_http_server', target=StatusServer().start)
  thread.daemon = True
  thread.setName('status-server-demon')
  thread.start()

  if 'PIPELINE_OPTIONS' in os.environ:
    sdk_pipeline_options = _parse_pipeline_options(
        os.environ['PIPELINE_OPTIONS'])
  else:
    sdk_pipeline_options = PipelineOptions.from_dictionary({})

  if 'SEMI_PERSISTENT_DIRECTORY' in os.environ:
    semi_persistent_directory = os.environ['SEMI_PERSISTENT_DIRECTORY']
  else:
    semi_persistent_directory = None

  _LOGGER.info('semi_persistent_directory: %s', semi_persistent_directory)
  _worker_id = os.environ.get('WORKER_ID', None)

  try:
    _load_main_session(semi_persistent_directory)
  except Exception:  # pylint: disable=broad-except
    exception_details = traceback.format_exc()
    _LOGGER.error(
        'Could not load main session: %s', exception_details, exc_info=True)

  try:
    _LOGGER.info(
        'Python sdk harness started with pipeline_options: %s',
        sdk_pipeline_options.get_all_options(drop_default=True))
    control_service_descriptor = endpoints_pb2.ApiServiceDescriptor()
    status_service_descriptor = endpoints_pb2.ApiServiceDescriptor()
    text_format.Merge(
        os.environ['CONTROL_API_SERVICE_DESCRIPTOR'],
        control_service_descriptor)
    if 'STATUS_API_SERVICE_DESCRIPTOR' in os.environ:
      text_format.Merge(
          os.environ['STATUS_API_SERVICE_DESCRIPTOR'],
          status_service_descriptor)
    # TODO(robertwb): Support authentication.
    assert not control_service_descriptor.HasField('authentication')

    experiments = sdk_pipeline_options.view_as(DebugOptions).experiments or []
    enable_heap_dump = 'enable_heap_dump' in experiments
    SdkHarness(
        control_address=control_service_descriptor.url,
        status_address=status_service_descriptor.url,
        worker_id=_worker_id,
        state_cache_size=_get_state_cache_size(experiments),
        data_buffer_time_limit_ms=_get_data_buffer_time_limit_ms(experiments),
        profiler_factory=profiler.Profile.factory_from_options(
            sdk_pipeline_options.view_as(ProfilingOptions)),
        enable_heap_dump=enable_heap_dump).run()
    _LOGGER.info('Python sdk harness exiting.')
  except:  # pylint: disable=broad-except
    _LOGGER.exception('Python sdk harness failed: ')
    raise
  finally:
    if fn_log_handler:
      fn_log_handler.close()
    def _start_sdk_worker_main(
            self, start_worker_request: beam_fn_api_pb2.StartWorkerRequest):
        params = start_worker_request.params
        self._parse_param_lock.acquire()
        # The first thread to start is responsible for preparing all execution environment.
        if not self._ref_cnt:
            if 'PYTHONPATH' in params:
                self._old_python_path = sys.path[:]
                python_path_list = params['PYTHONPATH'].split(':')
                python_path_list.reverse()
                for path in python_path_list:
                    sys.path.insert(0, path)
            if '_PYTHON_WORKING_DIR' in params:
                self._old_working_dir = os.getcwd()
                os.chdir(params['_PYTHON_WORKING_DIR'])
            os.environ.update(params)
        self._ref_cnt += 1
        self._parse_param_lock.release()

        # read job information from provision stub
        metadata = [("worker_id", start_worker_request.worker_id)]
        provision_endpoint = start_worker_request.provision_endpoint.url
        with grpc.insecure_channel(provision_endpoint) as channel:
            client = ProvisionServiceStub(channel=channel)
            info = client.GetProvisionInfo(GetProvisionInfoRequest(),
                                           metadata=metadata).info
            options = json_format.MessageToJson(info.pipeline_options)
            logging_endpoint = info.logging_endpoint.url
            control_endpoint = info.control_endpoint.url

        try:
            logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor(
                url=logging_endpoint)

            # Send all logs to the runner.
            fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
            logging.getLogger().setLevel(logging.INFO)
            # Remove all the built-in log handles
            logging.getLogger().handlers = []
            logging.getLogger().addHandler(fn_log_handler)
            logging.info("Starting up Python worker in loopback mode.")
        except Exception:
            _LOGGER.error(
                "Failed to set up logging handler, continuing without.",
                exc_info=True)
            fn_log_handler = None

        sdk_pipeline_options = sdk_worker_main._parse_pipeline_options(options)

        _worker_id = start_worker_request.worker_id

        try:
            control_service_descriptor = endpoints_pb2.ApiServiceDescriptor(
                url=control_endpoint)
            status_service_descriptor = endpoints_pb2.ApiServiceDescriptor()

            experiments = sdk_pipeline_options.view_as(
                DebugOptions).experiments or []
            enable_heap_dump = 'enable_heap_dump' in experiments
            SdkHarness(control_address=control_service_descriptor.url,
                       status_address=status_service_descriptor.url,
                       worker_id=_worker_id,
                       state_cache_size=sdk_worker_main._get_state_cache_size(
                           experiments),
                       data_buffer_time_limit_ms=sdk_worker_main.
                       _get_data_buffer_time_limit_ms(experiments),
                       profiler_factory=profiler.Profile.factory_from_options(
                           sdk_pipeline_options.view_as(ProfilingOptions)),
                       enable_heap_dump=enable_heap_dump).run()
        except:  # pylint: disable=broad-except
            _LOGGER.exception('Python sdk harness failed: ')
            raise
        finally:
            self._parse_param_lock.acquire()
            self._ref_cnt -= 1
            # The last thread to exit is responsible for reverting working directory and sys.path.
            if self._ref_cnt == 0:
                if self._old_python_path is not None:
                    sys.path.clear()
                    for item in self._old_python_path:
                        sys.path.append(item)
                    self._old_python_path = None
                if self._old_working_dir is not None:
                    os.chdir(self._old_working_dir)
                    self._old_working_dir = None
            self._parse_param_lock.release()
            if fn_log_handler:
                fn_log_handler.close()
Esempio n. 8
0
    def _start_sdk_worker_main(
            self, start_worker_request: beam_fn_api_pb2.StartWorkerRequest):
        params = start_worker_request.params
        self._parse_param_lock.acquire()
        if 'PYTHONPATH' in params:
            python_path_list = params['PYTHONPATH'].split(':')
            python_path_list.reverse()
            for path in python_path_list:
                sys.path.insert(0, path)
        if '_PYTHON_WORKING_DIR' in params:
            os.chdir(params['_PYTHON_WORKING_DIR'])
        os.environ.update(params)
        self._parse_param_lock.release()

        # read job information from provision stub
        metadata = [("worker_id", start_worker_request.worker_id)]
        provision_endpoint = start_worker_request.provision_endpoint.url
        with grpc.insecure_channel(provision_endpoint) as channel:
            client = ProvisionServiceStub(channel=channel)
            info = client.GetProvisionInfo(GetProvisionInfoRequest(),
                                           metadata=metadata).info
            options = json_format.MessageToJson(info.pipeline_options)
            logging_endpoint = info.logging_endpoint.url
            control_endpoint = info.control_endpoint.url

        try:
            logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor(
                url=logging_endpoint)

            # Send all logs to the runner.
            fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
            logging.getLogger().setLevel(logging.ERROR)
            logging.getLogger().addHandler(fn_log_handler)
        except Exception:
            _LOGGER.error(
                "Failed to set up logging handler, continuing without.",
                exc_info=True)
            fn_log_handler = None

        sdk_pipeline_options = sdk_worker_main._parse_pipeline_options(options)

        _worker_id = start_worker_request.worker_id

        try:
            control_service_descriptor = endpoints_pb2.ApiServiceDescriptor(
                url=control_endpoint)
            status_service_descriptor = endpoints_pb2.ApiServiceDescriptor()

            experiments = sdk_pipeline_options.view_as(
                DebugOptions).experiments or []
            enable_heap_dump = 'enable_heap_dump' in experiments
            SdkHarness(control_address=control_service_descriptor.url,
                       status_address=status_service_descriptor.url,
                       worker_id=_worker_id,
                       state_cache_size=sdk_worker_main._get_state_cache_size(
                           experiments),
                       data_buffer_time_limit_ms=sdk_worker_main.
                       _get_data_buffer_time_limit_ms(experiments),
                       profiler_factory=profiler.Profile.factory_from_options(
                           sdk_pipeline_options.view_as(ProfilingOptions)),
                       enable_heap_dump=enable_heap_dump).run()
        except:  # pylint: disable=broad-except
            _LOGGER.exception('Python sdk harness failed: ')
            raise
        finally:
            if fn_log_handler:
                fn_log_handler.close()