def main(unused_argv): """Main entry point for SDK Fn Harness.""" logging_service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor() text_format.Merge(os.environ['LOGGING_API_SERVICE_DESCRIPTOR'], logging_service_descriptor) # Send all logs to the runner. fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor) # TODO(vikasrk): This should be picked up from pipeline options. logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(fn_log_handler) try: logging.info('Python sdk harness started.') service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor() text_format.Merge(os.environ['CONTROL_API_SERVICE_DESCRIPTOR'], service_descriptor) # TODO(robertwb): Support credentials. assert not service_descriptor.oauth2_client_credentials_grant.url channel = grpc.insecure_channel(service_descriptor.url) SdkHarness(channel).run() logging.info('Python sdk harness exiting.') except: # pylint: disable=broad-except logging.exception('Python sdk harness failed: ') raise finally: fn_log_handler.close()
def main(unused_argv): """Main entry point for SDK Fn Harness.""" if 'LOGGING_API_SERVICE_DESCRIPTOR' in os.environ: logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor() text_format.Merge(os.environ['LOGGING_API_SERVICE_DESCRIPTOR'], logging_service_descriptor) # Send all logs to the runner. fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor) # TODO(BEAM-5468): This should be picked up from pipeline options. logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(fn_log_handler) logging.info('Logging handler created.') else: fn_log_handler = None # Start status HTTP server thread. thread = threading.Thread(target=StatusServer().start) thread.daemon = True thread.setName('status-server-demon') thread.start() if 'PIPELINE_OPTIONS' in os.environ: sdk_pipeline_options = _parse_pipeline_options( os.environ['PIPELINE_OPTIONS']) else: sdk_pipeline_options = PipelineOptions.from_dictionary({}) if 'SEMI_PERSISTENT_DIRECTORY' in os.environ: semi_persistent_directory = os.environ['SEMI_PERSISTENT_DIRECTORY'] else: semi_persistent_directory = None logging.info('semi_persistent_directory: %s', semi_persistent_directory) try: _load_main_session(semi_persistent_directory) except Exception: # pylint: disable=broad-except exception_details = traceback.format_exc() logging.error( 'Could not load main session: %s', exception_details, exc_info=True) try: logging.info('Python sdk harness started with pipeline_options: %s', sdk_pipeline_options.get_all_options(drop_default=True)) service_descriptor = endpoints_pb2.ApiServiceDescriptor() text_format.Merge(os.environ['CONTROL_API_SERVICE_DESCRIPTOR'], service_descriptor) # TODO(robertwb): Support credentials. assert not service_descriptor.oauth2_client_credentials_grant.url SdkHarness( control_address=service_descriptor.url, worker_count=_get_worker_count(sdk_pipeline_options)).run() logging.info('Python sdk harness exiting.') except: # pylint: disable=broad-except logging.exception('Python sdk harness failed: ') raise finally: if fn_log_handler: fn_log_handler.close()
def main(unused_argv): """Main entry point for SDK Fn Harness.""" logging_service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor() text_format.Merge(os.environ['LOGGING_API_SERVICE_DESCRIPTOR'], logging_service_descriptor) # Send all logs to the runner. fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor) # TODO(vikasrk): This should be picked up from pipeline options. logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(fn_log_handler) try: logging.info('Python sdk harness started.') service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor() text_format.Merge(os.environ['CONTROL_API_SERVICE_DESCRIPTOR'], service_descriptor) # TODO(robertwb): Support credentials. assert not service_descriptor.oauth2_client_credentials_grant.url channel = grpc.insecure_channel(service_descriptor.url) SdkHarness(channel).run() logging.info('Python sdk harness exiting.') except: # pylint: disable=broad-except logging.exception('Python sdk harness failed: ') raise finally: fn_log_handler.close()
def main(unused_argv): """Main entry point for SDK Fn Harness.""" if 'LOGGING_API_SERVICE_DESCRIPTOR' in os.environ: logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor() text_format.Merge(os.environ['LOGGING_API_SERVICE_DESCRIPTOR'], logging_service_descriptor) # Send all logs to the runner. fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor) # TODO(vikasrk): This should be picked up from pipeline options. logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(fn_log_handler) logging.info('Logging handler created.') else: fn_log_handler = None # Start status HTTP server thread. thread = threading.Thread(target=StatusServer().start) thread.daemon = True thread.setName('status-server-demon') thread.start() if 'PIPELINE_OPTIONS' in os.environ: sdk_pipeline_options = _parse_pipeline_options( os.environ['PIPELINE_OPTIONS']) else: sdk_pipeline_options = PipelineOptions.from_dictionary({}) if 'SEMI_PERSISTENT_DIRECTORY' in os.environ: semi_persistent_directory = os.environ['SEMI_PERSISTENT_DIRECTORY'] else: semi_persistent_directory = None logging.info('semi_persistent_directory: %s', semi_persistent_directory) try: _load_main_session(semi_persistent_directory) except Exception: # pylint: disable=broad-except exception_details = traceback.format_exc() logging.error('Could not load main session: %s', exception_details, exc_info=True) try: logging.info('Python sdk harness started with pipeline_options: %s', sdk_pipeline_options.get_all_options(drop_default=True)) service_descriptor = endpoints_pb2.ApiServiceDescriptor() text_format.Merge(os.environ['CONTROL_API_SERVICE_DESCRIPTOR'], service_descriptor) # TODO(robertwb): Support credentials. assert not service_descriptor.oauth2_client_credentials_grant.url SdkHarness(control_address=service_descriptor.url, worker_count=_get_worker_count(sdk_pipeline_options)).run() logging.info('Python sdk harness exiting.') except: # pylint: disable=broad-except logging.exception('Python sdk harness failed: ') raise finally: if fn_log_handler: fn_log_handler.close()
def create_harness(environment, dry_run=False): """Creates SDK Fn Harness.""" if 'LOGGING_API_SERVICE_DESCRIPTOR' in environment: try: logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor() text_format.Merge(environment['LOGGING_API_SERVICE_DESCRIPTOR'], logging_service_descriptor) # Send all logs to the runner. fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor) # TODO(BEAM-5468): This should be picked up from pipeline options. logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(fn_log_handler) _LOGGER.info('Logging handler created.') except Exception: _LOGGER.error( "Failed to set up logging handler, continuing without.", exc_info=True) fn_log_handler = None else: fn_log_handler = None pipeline_options_dict = _load_pipeline_options( environment.get('PIPELINE_OPTIONS')) # These are used for dataflow templates. RuntimeValueProvider.set_runtime_options(pipeline_options_dict) sdk_pipeline_options = PipelineOptions.from_dictionary( pipeline_options_dict) filesystems.FileSystems.set_options(sdk_pipeline_options) if 'SEMI_PERSISTENT_DIRECTORY' in environment: semi_persistent_directory = environment['SEMI_PERSISTENT_DIRECTORY'] else: semi_persistent_directory = None _LOGGER.info('semi_persistent_directory: %s', semi_persistent_directory) _worker_id = environment.get('WORKER_ID', None) try: _load_main_session(semi_persistent_directory) except CorruptMainSessionException: exception_details = traceback.format_exc() _LOGGER.error('Could not load main session: %s', exception_details, exc_info=True) raise except Exception: # pylint: disable=broad-except exception_details = traceback.format_exc() _LOGGER.error('Could not load main session: %s', exception_details, exc_info=True) _LOGGER.info('Pipeline_options: %s', sdk_pipeline_options.get_all_options(drop_default=True)) control_service_descriptor = endpoints_pb2.ApiServiceDescriptor() status_service_descriptor = endpoints_pb2.ApiServiceDescriptor() text_format.Merge(environment['CONTROL_API_SERVICE_DESCRIPTOR'], control_service_descriptor) if 'STATUS_API_SERVICE_DESCRIPTOR' in environment: text_format.Merge(environment['STATUS_API_SERVICE_DESCRIPTOR'], status_service_descriptor) # TODO(robertwb): Support authentication. assert not control_service_descriptor.HasField('authentication') experiments = sdk_pipeline_options.view_as(DebugOptions).experiments or [] enable_heap_dump = 'enable_heap_dump' in experiments if dry_run: return sdk_harness = SdkHarness( control_address=control_service_descriptor.url, status_address=status_service_descriptor.url, worker_id=_worker_id, state_cache_size=_get_state_cache_size(experiments), data_buffer_time_limit_ms=_get_data_buffer_time_limit_ms(experiments), profiler_factory=profiler.Profile.factory_from_options( sdk_pipeline_options.view_as(ProfilingOptions)), enable_heap_dump=enable_heap_dump) return fn_log_handler, sdk_harness
def main(unused_argv): """Main entry point for SDK Fn Harness.""" if 'LOGGING_API_SERVICE_DESCRIPTOR' in os.environ: try: logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor() text_format.Merge( os.environ['LOGGING_API_SERVICE_DESCRIPTOR'], logging_service_descriptor) # Send all logs to the runner. fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor) # TODO(BEAM-5468): This should be picked up from pipeline options. logging.getLogger().setLevel(logging.INFO) logging.getLogger().addHandler(fn_log_handler) _LOGGER.info('Logging handler created.') except Exception: _LOGGER.error( "Failed to set up logging handler, continuing without.", exc_info=True) fn_log_handler = None else: fn_log_handler = None # Start status HTTP server thread. thread = threading.Thread( name='status_http_server', target=StatusServer().start) thread.daemon = True thread.setName('status-server-demon') thread.start() if 'PIPELINE_OPTIONS' in os.environ: sdk_pipeline_options = _parse_pipeline_options( os.environ['PIPELINE_OPTIONS']) else: sdk_pipeline_options = PipelineOptions.from_dictionary({}) if 'SEMI_PERSISTENT_DIRECTORY' in os.environ: semi_persistent_directory = os.environ['SEMI_PERSISTENT_DIRECTORY'] else: semi_persistent_directory = None _LOGGER.info('semi_persistent_directory: %s', semi_persistent_directory) _worker_id = os.environ.get('WORKER_ID', None) try: _load_main_session(semi_persistent_directory) except Exception: # pylint: disable=broad-except exception_details = traceback.format_exc() _LOGGER.error( 'Could not load main session: %s', exception_details, exc_info=True) try: _LOGGER.info( 'Python sdk harness started with pipeline_options: %s', sdk_pipeline_options.get_all_options(drop_default=True)) control_service_descriptor = endpoints_pb2.ApiServiceDescriptor() status_service_descriptor = endpoints_pb2.ApiServiceDescriptor() text_format.Merge( os.environ['CONTROL_API_SERVICE_DESCRIPTOR'], control_service_descriptor) if 'STATUS_API_SERVICE_DESCRIPTOR' in os.environ: text_format.Merge( os.environ['STATUS_API_SERVICE_DESCRIPTOR'], status_service_descriptor) # TODO(robertwb): Support authentication. assert not control_service_descriptor.HasField('authentication') experiments = sdk_pipeline_options.view_as(DebugOptions).experiments or [] enable_heap_dump = 'enable_heap_dump' in experiments SdkHarness( control_address=control_service_descriptor.url, status_address=status_service_descriptor.url, worker_id=_worker_id, state_cache_size=_get_state_cache_size(experiments), data_buffer_time_limit_ms=_get_data_buffer_time_limit_ms(experiments), profiler_factory=profiler.Profile.factory_from_options( sdk_pipeline_options.view_as(ProfilingOptions)), enable_heap_dump=enable_heap_dump).run() _LOGGER.info('Python sdk harness exiting.') except: # pylint: disable=broad-except _LOGGER.exception('Python sdk harness failed: ') raise finally: if fn_log_handler: fn_log_handler.close()
def _start_sdk_worker_main( self, start_worker_request: beam_fn_api_pb2.StartWorkerRequest): params = start_worker_request.params self._parse_param_lock.acquire() # The first thread to start is responsible for preparing all execution environment. if not self._ref_cnt: if 'PYTHONPATH' in params: self._old_python_path = sys.path[:] python_path_list = params['PYTHONPATH'].split(':') python_path_list.reverse() for path in python_path_list: sys.path.insert(0, path) if '_PYTHON_WORKING_DIR' in params: self._old_working_dir = os.getcwd() os.chdir(params['_PYTHON_WORKING_DIR']) os.environ.update(params) self._ref_cnt += 1 self._parse_param_lock.release() # read job information from provision stub metadata = [("worker_id", start_worker_request.worker_id)] provision_endpoint = start_worker_request.provision_endpoint.url with grpc.insecure_channel(provision_endpoint) as channel: client = ProvisionServiceStub(channel=channel) info = client.GetProvisionInfo(GetProvisionInfoRequest(), metadata=metadata).info options = json_format.MessageToJson(info.pipeline_options) logging_endpoint = info.logging_endpoint.url control_endpoint = info.control_endpoint.url try: logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor( url=logging_endpoint) # Send all logs to the runner. fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor) logging.getLogger().setLevel(logging.INFO) # Remove all the built-in log handles logging.getLogger().handlers = [] logging.getLogger().addHandler(fn_log_handler) logging.info("Starting up Python worker in loopback mode.") except Exception: _LOGGER.error( "Failed to set up logging handler, continuing without.", exc_info=True) fn_log_handler = None sdk_pipeline_options = sdk_worker_main._parse_pipeline_options(options) _worker_id = start_worker_request.worker_id try: control_service_descriptor = endpoints_pb2.ApiServiceDescriptor( url=control_endpoint) status_service_descriptor = endpoints_pb2.ApiServiceDescriptor() experiments = sdk_pipeline_options.view_as( DebugOptions).experiments or [] enable_heap_dump = 'enable_heap_dump' in experiments SdkHarness(control_address=control_service_descriptor.url, status_address=status_service_descriptor.url, worker_id=_worker_id, state_cache_size=sdk_worker_main._get_state_cache_size( experiments), data_buffer_time_limit_ms=sdk_worker_main. _get_data_buffer_time_limit_ms(experiments), profiler_factory=profiler.Profile.factory_from_options( sdk_pipeline_options.view_as(ProfilingOptions)), enable_heap_dump=enable_heap_dump).run() except: # pylint: disable=broad-except _LOGGER.exception('Python sdk harness failed: ') raise finally: self._parse_param_lock.acquire() self._ref_cnt -= 1 # The last thread to exit is responsible for reverting working directory and sys.path. if self._ref_cnt == 0: if self._old_python_path is not None: sys.path.clear() for item in self._old_python_path: sys.path.append(item) self._old_python_path = None if self._old_working_dir is not None: os.chdir(self._old_working_dir) self._old_working_dir = None self._parse_param_lock.release() if fn_log_handler: fn_log_handler.close()
def _start_sdk_worker_main( self, start_worker_request: beam_fn_api_pb2.StartWorkerRequest): params = start_worker_request.params self._parse_param_lock.acquire() if 'PYTHONPATH' in params: python_path_list = params['PYTHONPATH'].split(':') python_path_list.reverse() for path in python_path_list: sys.path.insert(0, path) if '_PYTHON_WORKING_DIR' in params: os.chdir(params['_PYTHON_WORKING_DIR']) os.environ.update(params) self._parse_param_lock.release() # read job information from provision stub metadata = [("worker_id", start_worker_request.worker_id)] provision_endpoint = start_worker_request.provision_endpoint.url with grpc.insecure_channel(provision_endpoint) as channel: client = ProvisionServiceStub(channel=channel) info = client.GetProvisionInfo(GetProvisionInfoRequest(), metadata=metadata).info options = json_format.MessageToJson(info.pipeline_options) logging_endpoint = info.logging_endpoint.url control_endpoint = info.control_endpoint.url try: logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor( url=logging_endpoint) # Send all logs to the runner. fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor) logging.getLogger().setLevel(logging.ERROR) logging.getLogger().addHandler(fn_log_handler) except Exception: _LOGGER.error( "Failed to set up logging handler, continuing without.", exc_info=True) fn_log_handler = None sdk_pipeline_options = sdk_worker_main._parse_pipeline_options(options) _worker_id = start_worker_request.worker_id try: control_service_descriptor = endpoints_pb2.ApiServiceDescriptor( url=control_endpoint) status_service_descriptor = endpoints_pb2.ApiServiceDescriptor() experiments = sdk_pipeline_options.view_as( DebugOptions).experiments or [] enable_heap_dump = 'enable_heap_dump' in experiments SdkHarness(control_address=control_service_descriptor.url, status_address=status_service_descriptor.url, worker_id=_worker_id, state_cache_size=sdk_worker_main._get_state_cache_size( experiments), data_buffer_time_limit_ms=sdk_worker_main. _get_data_buffer_time_limit_ms(experiments), profiler_factory=profiler.Profile.factory_from_options( sdk_pipeline_options.view_as(ProfilingOptions)), enable_heap_dump=enable_heap_dump).run() except: # pylint: disable=broad-except _LOGGER.exception('Python sdk harness failed: ') raise finally: if fn_log_handler: fn_log_handler.close()