def wait_for_upstream_status(self): unfinished_op = [] if self.DATA_MODEL != DataModelType.DEFAULT: for parent in self.get_parents_nodes(): if parent.get_status() in [Status.IDLE, Status.WAITING, Status.RUNNING]: self._SYS_LOGGER.info("Upstream operator [" + parent.get_node_name() + "] is still in status [" + ProtoUtil.get_name_by_value(enum_type=Status, value=parent.get_status()) + '].') self._logger.info("Upstream operator [" + parent.get_node_name() + "] is still in status [" + ProtoUtil.get_name_by_value(enum_type=Status, value=parent.get_status()) + '].') unfinished_op.append(parent.get_node_name()) elif parent.get_status() == Status.FAILED: self._SYS_LOGGER.info("Upstream operator [" + parent.get_node_name() + "] failed.") # streaming mode allows failure from its dependencies. if not self._config['allow_failure']: self._SYS_LOGGER.error('This results in failure of all the following descendant operators.') self._logger.error('This results in failure of all the following descendant operators.') self.set_status(status=Status.FAILED) unfinished_op = [] break else: self._SYS_LOGGER.warning("Failure is allowed in Streaming mode. The rest of operators will" + " continue.") self._logger.warning('This results in failure of all the following descendant operators.') return unfinished_op
def set_status(self, status): self._SYS_LOGGER.info( 'Container [' + self.get_container_name() + "] switching to [" + ProtoUtil.get_name_by_value(enum_type=Status, value=status) + "] status from [" + ProtoUtil.get_name_by_value(enum_type=Status, value=self._status) + '].') self._status = status
def initialize_from_dir(self, dir_name): self._SYS_LOGGER.fatal( "Initialize_from_dir function is not implemented for storage type " + ProtoUtil.get_name_by_value(enum_type=StorageType, value=self.STORAGE_TYPE) + '.') self._logger.fatal( "Initialize_from_dir function is not implemented for storage type " + ProtoUtil.get_name_by_value(enum_type=StorageType, value=self.STORAGE_TYPE) + '.') return
def get_containers_info(): containers_info = [] existing_containers = {} all_proto_files = set() if not FileUtil.is_local_path(backend_folder): all_cells = [''] else: all_cells = gclient.list_cells() for cell_name in all_cells: folder = FileUtil.convert_local_to_cell_path( path=backend_folder, cell=cell_name) proto_files = FileUtil.list_files_in_dir(folder) all_proto_files = all_proto_files.union(set(proto_files)) for proto_file in all_proto_files: storage = ProtoTableStorage() storage.initialize_from_file( file_name=proto_file ) raw_data = storage.read_all() if not raw_data: continue key = sorted(raw_data.keys())[-1] val = raw_data[key] result_proto = ProtoUtil.any_to_message( message_type=ContainerBackendValue, any_message=val ) ttl = result_proto.ttl if ttl > 0 and result_proto.updated_time and TimezoneUtil.cur_time_in_pst() - TimezoneUtil.cur_time_from_str( result_proto.updated_time) >= datetime.timedelta(days=ttl): FileUtil.remove_file(storage.get_file_name()) else: container_info = { 'container_name': result_proto.container_name, 'status': ProtoUtil.get_name_by_value( enum_type=Status, value=result_proto.container_status), 'updated_time': result_proto.updated_time, 'mode': ProtoUtil.get_name_by_value(enum_type=ModeType, value=result_proto.mode), 'data_model': ProtoUtil.get_name_by_value( enum_type=DataModelType, value=result_proto.data_model), 'run_cell': result_proto.run_cell, 'snapshot_cell': result_proto.snapshot_cell, } if container_info['container_name'] not in existing_containers: existing_containers[container_info['container_name']] = container_info['updated_time'] containers_info.append(container_info) else: if container_info['updated_time'] >= existing_containers[container_info['container_name']]: containers_info.append(container_info) return containers_info
def set_config(self, config): super().set_config(config=config) if 'override_to_prod' in self._config and self._file_name: self._file_name = self._file_name.replace( ProtoUtil.get_name_by_value(enum_type=ModeType, value=ModeType.TEST), ProtoUtil.get_name_by_value(enum_type=ModeType, value=ModeType.PROD)) if 'override_to_test' in self._config and self._file_name: self._file_name = self._file_name.replace( ProtoUtil.get_name_by_value(enum_type=ModeType, value=ModeType.PROD), ProtoUtil.get_name_by_value(enum_type=ModeType, value=ModeType.TEST))
def __init__(self, container_name, ttl=-1): super().__init__(container_name, ttl=ttl) self._logger = LoggingTool( name=(ProtoUtil.get_name_by_value(enum_type=DataModelType, value=self.DATA_MODEL) + '__' + self.get_class_name() + '__' + container_name), ttl=ttl)
def __init__(self, container_name): super().__init__(container_name) self._logger = glogging.get_logger( log_name=(ProtoUtil.get_name_by_value(enum_type=DataModelType, value=self.DATA_MODEL) + '__' + self.get_class_name() + '__' + container_name), log_dir=EnvUtil.get_pslx_env_variable('PSLX_DEFAULT_LOG_DIR'))
def get_response_and_status_impl(self, request): storage_value = ContainerBackendValue() storage_value.container_name = request.container_name storage_value.container_status = request.status for operator_name, operator_snapshot in dict( request.operator_snapshot_map).items(): operator_info = ContainerBackendValue.OperatorInfo() operator_info.status = operator_snapshot.status for parent in operator_snapshot.node_snapshot.parents_names: operator_info.parents.append(parent) operator_info.start_time = operator_snapshot.start_time operator_info.end_time = operator_snapshot.end_time storage_value.operator_info_map[operator_name].CopyFrom( operator_info) storage_value.mode = request.mode storage_value.data_model = request.data_model storage_value.updated_time = str(TimezoneUtil.cur_time_in_pst()) storage_value.start_time = request.start_time storage_value.end_time = request.end_time storage_value.log_dir = request.log_dir for key in request.counters: storage_value.counters[key] = request.counters[key] partitioner_dir = FileUtil.join_paths_to_dir_with_mode( root_dir=FileUtil.join_paths_to_dir( root_dir=self._backend_folder, base_name=ProtoUtil.get_name_by_value( enum_type=DataModelType, value=storage_value.data_model)), base_name=storage_value.container_name, ttl=EnvUtil.get_pslx_env_variable('PSLX_INTERNAL_TTL')) if storage_value.mode == ModeType.TEST: partitioner_dir = partitioner_dir.replace('PROD', 'TEST') storage = self._lru_cache_tool.get(key=partitioner_dir) if not storage: self.sys_log( "Did not find the storage in cache. Making a new one...") storage = DailyPartitionerStorage() proto_table = ProtoTableStorage() storage.set_underlying_storage(storage=proto_table) storage.initialize_from_dir(dir_name=partitioner_dir) self._lru_cache_tool.set(key=partitioner_dir, value=storage) else: self.sys_log("Found key in LRU cache.") storage.write(data={storage_value.container_name: storage_value}, params={ 'overwrite': True, 'make_partition': True, }) return None, Status.SUCCEEDED
def read(self, file_or_dir_path, params=None, is_test=False, root_certificate=None): assert 'PartitionerStorageType' in params and 'start_time' not in params and 'end_time' not in params request = RPCIORequest() request.is_test = is_test request.type = self.STORAGE_TYPE request.dir_name = file_or_dir_path if 'is_proto_table' in params and params['is_proto_table']: params['is_proto_table'] = '1' else: params['is_proto_table'] = '0' if 'message_type' in params: request.params[ 'message_type'] = ProtoUtil.infer_str_from_message_type( message_type=params['message_type']) if 'read_oldest' in params and params['read_oldest']: request.params['read_oldest'] = '1' for key, val in params.items(): if isinstance(val, str) or key in self.WHITELISTED_KEY: request.params[key] = str(val) request.params['PartitionerStorageType'] = ProtoUtil.get_name_by_value( enum_type=PartitionerStorageType, value=params['PartitionerStorageType']) response = self.send_request(request=request, root_certificate=root_certificate) if response: if params['is_proto_table'] == '1': result = {} for key, val in dict(response.dict_data).items(): result[key] = val.data[0].proto_data return result else: return [ rpc_data.string_data for rpc_data in response.list_data.data ] else: return None if params['is_proto_table'] == '1' else []
def _get_latest_status_of_operators(self): operator_status = {} snapshot_files = FileUtil.get_file_names_in_dir( dir_name=FileUtil.join_paths_to_dir(FileUtil.dir_name(self._snapshot_file_folder), 'operators')) for snapshot_file in snapshot_files[::-1]: operator_name = snapshot_file.split('_')[1] if operator_name not in operator_status: self._logger.info("Getting status for operator [" + operator_name + '].') self.sys_log("Getting status for operator [" + operator_name + '].') operator_status[operator_name] = self._node_name_to_node_dict[operator_name].get_status_from_snapshot( snapshot_file=snapshot_file ) self.sys_log("Status for operator [" + operator_name + '] is [' + ProtoUtil.get_name_by_value( enum_type=Status, value=operator_status[operator_name]) + '].') if len(operator_status) == len(self._node_name_to_node_dict): break return operator_status
def read_range(self, file_or_dir_path, params=None, is_test=False, root_certificate=None): assert 'PartitionerStorageType' in params and 'start_time' in params and 'end_time' in params if 'is_proto_table' in params and params['is_proto_table']: params['is_proto_table'] = '1' else: params['is_proto_table'] = '0' request = RPCIORequest() request.is_test = is_test request.type = StorageType.PARTITIONER_STORAGE request.dir_name = file_or_dir_path for key, val in params.items(): if isinstance(val, str) or key in self.WHITELISTED_KEY: request.params[key] = str(val) request.params['PartitionerStorageType'] = ProtoUtil.get_name_by_value( enum_type=PartitionerStorageType, value=params['PartitionerStorageType']) request.params['is_proto_table'] = params['is_proto_table'] response = self.send_request(request=request, root_certificate=root_certificate) result = {} if response: for key, val in response.dict_data.items(): if params['is_proto_table'] == '1': result[key] = {} for index in range(0, len(val.data) - 1, 2): result[key][val.data[index].string_data] = val.data[ index + 1].proto_data else: result[key] = [ rpc_data.string_data for rpc_data in val.data ] return result
def streaming_data_generator(): last_checked_key = None while True: pslx_dedicated_logging_storage = ProtoTableStorage(logger=pslx_frontend_logger) pslx_dedicated_logging_storage.initialize_from_file( file_name=pslx_dedicated_logging_storage_path ) if pslx_dedicated_logging_storage.get_num_entries() == 0: time.sleep(TimeSleepObj.ONE_TENTH_SECOND) continue all_data = pslx_dedicated_logging_storage.read_all() all_sorted_keys = sorted(list(all_data.keys())) if all_sorted_keys[-1] == last_checked_key: time.sleep(TimeSleepObj.ONE_TENTH_SECOND) continue last_checked_key = all_sorted_keys[-1] pslx_dedicated_logging_list = [] for key in all_sorted_keys: val = ProtoUtil.any_to_message( message_type=LoggingMessageRequest, any_message=all_data[key] ) if ProtoUtil.get_name_by_value(enum_type=DiskLoggerLevel, value=val.level) in log_levels: message = val.message for string_to_replace, string_after_replacing in strings_to_replace.items(): message = message.replace(string_to_replace, string_after_replacing) contain_key_word = False if key_words else True for key_word in key_words: if key_word in message: contain_key_word = True break if contain_key_word: pslx_dedicated_logging_list.append(message) yield '\\n'.join(pslx_dedicated_logging_list) time.sleep(TimeSleepObj.ONE_TENTH_SECOND)
def index(): config = pslx_frontend_ui_app.config['frontend_config'] service_info = [] if config.container_backend_config.server_url: server, port = config.container_backend_config.server_url.split(':') pslx_frontend_logger.info("Index checking health for url [" + config.container_backend_config.server_url + '].') status, qps = RPCUtil.check_health_and_qps( server_url=config.container_backend_config.server_url, root_certificate_path=config.container_backend_config. root_certificate_path) service_info.append({ 'name': 'container_backend', 'server': server, 'port': port, 'status': ProtoUtil.get_name_by_value(enum_type=Status, value=status), 'qps': round(qps, 3), }) for server_config in pslx_frontend_ui_app.config[ 'frontend_config'].proto_viewer_config: server, port = server_config.server_url.split(':') status, qps = RPCUtil.check_health_and_qps( server_url=server_config.server_url, root_certificate_path=server_config.root_certificate_path) pslx_frontend_logger.info("Index checking health for url [" + server_config.server_url + '].') service_info.append({ 'name': 'proto_viewer', 'server': server, 'port': port, 'status': ProtoUtil.get_name_by_value(enum_type=Status, value=status), 'qps': round(qps, 3), }) for server_config in pslx_frontend_ui_app.config[ 'frontend_config'].file_viewer_config: server, port = server_config.server_url.split(':') status, qps = RPCUtil.check_health_and_qps( server_url=server_config.server_url, root_certificate_path=server_config.root_certificate_path) pslx_frontend_logger.info("Index checking health for url [" + server_config.server_url + '].') service_info.append({ 'name': 'file_viewer', 'server': server, 'port': port, 'status': ProtoUtil.get_name_by_value(enum_type=Status, value=status), 'qps': round(qps, 3), }) for server_config in pslx_frontend_ui_app.config[ 'frontend_config'].instant_messaging_config: server, port = server_config.server_url.split(':') status, qps = RPCUtil.check_health_and_qps( server_url=server_config.server_url, root_certificate_path=server_config.root_certificate_path) pslx_frontend_logger.info("Index checking health for url [" + server_config.server_url + '].') service_info.append({ 'name': 'instant_messaging', 'server': server, 'port': port, 'status': ProtoUtil.get_name_by_value(enum_type=Status, value=status), 'qps': round(qps, 3), }) for server_config in pslx_frontend_ui_app.config[ 'frontend_config'].rpc_io_config: server, port = server_config.server_url.split(':') status, qps = RPCUtil.check_health_and_qps( server_url=server_config.server_url, root_certificate_path=server_config.root_certificate_path) pslx_frontend_logger.info("Index checking health for url [" + server_config.server_url + '].') service_info.append({ 'name': 'rpc_io', 'server': server, 'port': port, 'status': ProtoUtil.get_name_by_value(enum_type=Status, value=status), 'qps': round(qps, 3), }) for server_config in pslx_frontend_ui_app.config[ 'frontend_config'].email_config: server, port = server_config.server_url.split(':') status, qps = RPCUtil.check_health_and_qps( server_url=server_config.server_url, root_certificate_path=server_config.root_certificate_path) pslx_frontend_logger.info("Index checking health for url [" + server_config.server_url + '].') service_info.append({ 'name': 'email', 'server': server, 'port': port, 'status': ProtoUtil.get_name_by_value(enum_type=Status, value=status), 'qps': round(qps, 3), }) return render_template("index.html", service_info=sorted(service_info, key=lambda x: x['name']))
def set_status(self, status): self.sys_log('Node [' + self._node_name + "] switching to [" + ProtoUtil.get_name_by_value( enum_type=Status, value=status) + "] status from [" + ProtoUtil.get_name_by_value(enum_type=Status, value=self._status) + '].') self._status = status
def set_data_model(self, model): self.sys_log("Switching to [" + ProtoUtil.get_name_by_value(enum_type=DataModelType, value=model) + "] model from [" + ProtoUtil.get_name_by_value(enum_type=DataModelType, value=self.DATA_MODEL) + '].') self.DATA_MODEL = model
def initialize_from_file(self, file_name): self.sys_log( "Initialize_from_file function is not implemented for storage type [" + ProtoUtil.get_name_by_value(enum_type=StorageType, value=self.STORAGE_TYPE) + '].') pass
def get_container_info(container_name, cell_name, start_time): container_info = { 'log_file': '', 'start_time': '', 'end_time': '', 'counter_info': [], } operators_info = [] folder = FileUtil.convert_local_to_cell_path( path=backend_folder, cell=cell_name) pslx_frontend_logger.info( "Container backend checking folder [" + folder + '].') storage = ProtoTableStorage() storage.initialize_from_file( FileUtil.join_paths_to_file( root_dir=folder, base_name=container_name + '.pb' ) ) raw_data = storage.read_all() all_past_run = [] for key in sorted(list(raw_data.keys()), reverse=True): val = ProtoUtil.any_to_message( message_type=ContainerBackendValue, any_message=raw_data[key] ) all_past_run.append( { 'start_time': val.start_time, 'updated_time': val.updated_time, 'end_time': val.end_time, 'status': ProtoUtil.get_name_by_value( enum_type=Status, value=val.container_status), 'run_cell': val.run_cell, 'snapshot_cell': val.snapshot_cell, } ) if len(all_past_run) > 10: break key = start_time if start_time else sorted(raw_data.keys())[-1] val = raw_data[key] result_proto = ProtoUtil.any_to_message( message_type=ContainerBackendValue, any_message=val ) container_info['log_file'] = galaxy_viewer_url + result_proto.log_file container_info['start_time'] = result_proto.start_time container_info['end_time'] = result_proto.end_time for key in sorted(dict(result_proto.counters).keys()): container_info['counter_info'].append( { 'name': key, 'count': result_proto.counters[key], } ) for key, val in dict(result_proto.operator_info_map).items(): operators_info.append({ 'operator_name': key, 'status': ProtoUtil.get_name_by_value( enum_type=Status, value=val.status), 'start_time': val.start_time, 'end_time': val.end_time, 'dependencies': ', '.join(val.parents), 'log_file': galaxy_viewer_url + val.log_file, }) return (container_info, sorted(operators_info, key=lambda x: (x['dependencies'], x['operator_name'])), all_past_run)
def test_get_name_by_value(self): test_enum_type = ModeType test_value = ModeType.TEST self.assertEqual( ProtoUtil.get_name_by_value(enum_type=test_enum_type, value=test_value), 'TEST')