def test_parse_pipeline_options(self): assert_that( sdk_worker_main._parse_pipeline_options( '{"options": {' + '"m_option": "/tmp/requirements.txt", ' + '"m_m_option":["beam_fn_api"]' + '}}').get_all_options(), all_of(has_entry('m_m_option', ['beam_fn_api']), has_entry('m_option', '/tmp/requirements.txt'))) assert_that( sdk_worker_main._parse_pipeline_options( '{"beam:option:m_option:v1": "/tmp/requirements.txt", ' + '"beam:option:m_m_option:v1":["beam_fn_api"]}'). get_all_options(), all_of(has_entry('m_m_option', ['beam_fn_api']), has_entry('m_option', '/tmp/requirements.txt'))) assert_that( sdk_worker_main._parse_pipeline_options( '{"options": {"beam:option:m_option:v":"mock_val"}}'). get_all_options(), has_entry('beam:option:m_option:v', 'mock_val')) assert_that( sdk_worker_main._parse_pipeline_options( '{"options": {"eam:option:m_option:v1":"mock_val"}}'). get_all_options(), has_entry('eam:option:m_option:v1', 'mock_val')) assert_that( sdk_worker_main._parse_pipeline_options( '{"options": {"eam:option:m_option:v":"mock_val"}}'). get_all_options(), has_entry('eam:option:m_option:v', 'mock_val'))
def test_parse_pipeline_options(self): expected_options = PipelineOptions([]) expected_options.view_as( SdkWorkerMainTest.MockOptions).m_m_option = ['beam_fn_api'] expected_options.view_as( SdkWorkerMainTest.MockOptions).m_option = '/tmp/requirements.txt' self.assertEqual( expected_options.get_all_options(), sdk_worker_main._parse_pipeline_options( '{"options": {' + '"m_option": "/tmp/requirements.txt", ' + '"m_m_option":["beam_fn_api"]' + '}}').get_all_options()) self.assertEqual( expected_options.get_all_options(), sdk_worker_main._parse_pipeline_options( '{"beam:option:m_option:v1": "/tmp/requirements.txt", ' + '"beam:option:m_m_option:v1":["beam_fn_api"]}'). get_all_options()) self.assertEqual( {'beam:option:m_option:v': 'mock_val'}, sdk_worker_main._parse_pipeline_options( '{"options": {"beam:option:m_option:v":"mock_val"}}'). get_all_options(drop_default=True)) self.assertEqual( {'eam:option:m_option:v1': 'mock_val'}, sdk_worker_main._parse_pipeline_options( '{"options": {"eam:option:m_option:v1":"mock_val"}}'). get_all_options(drop_default=True)) self.assertEqual( {'eam:option:m_option:v': 'mock_val'}, sdk_worker_main._parse_pipeline_options( '{"options": {"eam:option:m_option:v":"mock_val"}}'). get_all_options(drop_default=True))
def test_parse_pipeline_options(self): expected_options = PipelineOptions([]) expected_options.view_as( SdkWorkerMainTest.MockOptions).m_m_option = [ 'worker_threads=1', 'beam_fn_api' ] expected_options.view_as( SdkWorkerMainTest.MockOptions).m_option = '/tmp/requirements.txt' self.assertEqual( {'m_m_option': ['worker_threads=1']}, sdk_worker_main._parse_pipeline_options( '{"options": {"m_m_option":["worker_threads=1"]}}') .get_all_options(drop_default=True)) self.assertEqual( expected_options.get_all_options(), sdk_worker_main._parse_pipeline_options( '{"options": {' + '"m_option": "/tmp/requirements.txt", ' + '"m_m_option":["worker_threads=1", "beam_fn_api"]' + '}}').get_all_options()) self.assertEqual( {'m_m_option': ['worker_threads=1']}, sdk_worker_main._parse_pipeline_options( '{"beam:option:m_m_option:v1":["worker_threads=1"]}') .get_all_options(drop_default=True)) self.assertEqual( expected_options.get_all_options(), sdk_worker_main._parse_pipeline_options( '{"beam:option:m_option:v1": "/tmp/requirements.txt", ' + '"beam:option:m_m_option:v1":["worker_threads=1", ' + '"beam_fn_api"]}').get_all_options()) self.assertEqual( {'beam:option:m_option:v': 'mock_val'}, sdk_worker_main._parse_pipeline_options( '{"options": {"beam:option:m_option:v":"mock_val"}}') .get_all_options(drop_default=True)) self.assertEqual( {'eam:option:m_option:v1': 'mock_val'}, sdk_worker_main._parse_pipeline_options( '{"options": {"eam:option:m_option:v1":"mock_val"}}') .get_all_options(drop_default=True)) self.assertEqual( {'eam:option:m_option:v': 'mock_val'}, sdk_worker_main._parse_pipeline_options( '{"options": {"eam:option:m_option:v":"mock_val"}}') .get_all_options(drop_default=True))
def _start_sdk_worker_main( self, start_worker_request: beam_fn_api_pb2.StartWorkerRequest): params = start_worker_request.params self._parse_param_lock.acquire() # The first thread to start is responsible for preparing all execution environment. if not self._ref_cnt: if 'PYTHONPATH' in params: self._old_python_path = sys.path[:] python_path_list = params['PYTHONPATH'].split(':') python_path_list.reverse() for path in python_path_list: sys.path.insert(0, path) if '_PYTHON_WORKING_DIR' in params: self._old_working_dir = os.getcwd() os.chdir(params['_PYTHON_WORKING_DIR']) os.environ.update(params) self._ref_cnt += 1 self._parse_param_lock.release() # read job information from provision stub metadata = [("worker_id", start_worker_request.worker_id)] provision_endpoint = start_worker_request.provision_endpoint.url with grpc.insecure_channel(provision_endpoint) as channel: client = ProvisionServiceStub(channel=channel) info = client.GetProvisionInfo(GetProvisionInfoRequest(), metadata=metadata).info options = json_format.MessageToJson(info.pipeline_options) logging_endpoint = info.logging_endpoint.url control_endpoint = info.control_endpoint.url try: logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor( url=logging_endpoint) # Send all logs to the runner. fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor) logging.getLogger().setLevel(logging.INFO) # Remove all the built-in log handles logging.getLogger().handlers = [] logging.getLogger().addHandler(fn_log_handler) logging.info("Starting up Python worker in loopback mode.") except Exception: _LOGGER.error( "Failed to set up logging handler, continuing without.", exc_info=True) fn_log_handler = None sdk_pipeline_options = sdk_worker_main._parse_pipeline_options(options) _worker_id = start_worker_request.worker_id try: control_service_descriptor = endpoints_pb2.ApiServiceDescriptor( url=control_endpoint) status_service_descriptor = endpoints_pb2.ApiServiceDescriptor() experiments = sdk_pipeline_options.view_as( DebugOptions).experiments or [] enable_heap_dump = 'enable_heap_dump' in experiments SdkHarness(control_address=control_service_descriptor.url, status_address=status_service_descriptor.url, worker_id=_worker_id, state_cache_size=sdk_worker_main._get_state_cache_size( experiments), data_buffer_time_limit_ms=sdk_worker_main. _get_data_buffer_time_limit_ms(experiments), profiler_factory=profiler.Profile.factory_from_options( sdk_pipeline_options.view_as(ProfilingOptions)), enable_heap_dump=enable_heap_dump).run() except: # pylint: disable=broad-except _LOGGER.exception('Python sdk harness failed: ') raise finally: self._parse_param_lock.acquire() self._ref_cnt -= 1 # The last thread to exit is responsible for reverting working directory and sys.path. if self._ref_cnt == 0: if self._old_python_path is not None: sys.path.clear() for item in self._old_python_path: sys.path.append(item) self._old_python_path = None if self._old_working_dir is not None: os.chdir(self._old_working_dir) self._old_working_dir = None self._parse_param_lock.release() if fn_log_handler: fn_log_handler.close()
def _start_sdk_worker_main( self, start_worker_request: beam_fn_api_pb2.StartWorkerRequest): params = start_worker_request.params self._parse_param_lock.acquire() if 'PYTHONPATH' in params: python_path_list = params['PYTHONPATH'].split(':') python_path_list.reverse() for path in python_path_list: sys.path.insert(0, path) if '_PYTHON_WORKING_DIR' in params: os.chdir(params['_PYTHON_WORKING_DIR']) os.environ.update(params) self._parse_param_lock.release() # read job information from provision stub metadata = [("worker_id", start_worker_request.worker_id)] provision_endpoint = start_worker_request.provision_endpoint.url with grpc.insecure_channel(provision_endpoint) as channel: client = ProvisionServiceStub(channel=channel) info = client.GetProvisionInfo(GetProvisionInfoRequest(), metadata=metadata).info options = json_format.MessageToJson(info.pipeline_options) logging_endpoint = info.logging_endpoint.url control_endpoint = info.control_endpoint.url try: logging_service_descriptor = endpoints_pb2.ApiServiceDescriptor( url=logging_endpoint) # Send all logs to the runner. fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor) logging.getLogger().setLevel(logging.ERROR) logging.getLogger().addHandler(fn_log_handler) except Exception: _LOGGER.error( "Failed to set up logging handler, continuing without.", exc_info=True) fn_log_handler = None sdk_pipeline_options = sdk_worker_main._parse_pipeline_options(options) _worker_id = start_worker_request.worker_id try: control_service_descriptor = endpoints_pb2.ApiServiceDescriptor( url=control_endpoint) status_service_descriptor = endpoints_pb2.ApiServiceDescriptor() experiments = sdk_pipeline_options.view_as( DebugOptions).experiments or [] enable_heap_dump = 'enable_heap_dump' in experiments SdkHarness(control_address=control_service_descriptor.url, status_address=status_service_descriptor.url, worker_id=_worker_id, state_cache_size=sdk_worker_main._get_state_cache_size( experiments), data_buffer_time_limit_ms=sdk_worker_main. _get_data_buffer_time_limit_ms(experiments), profiler_factory=profiler.Profile.factory_from_options( sdk_pipeline_options.view_as(ProfilingOptions)), enable_heap_dump=enable_heap_dump).run() except: # pylint: disable=broad-except _LOGGER.exception('Python sdk harness failed: ') raise finally: if fn_log_handler: fn_log_handler.close()