def get_container_snapshot(self, send_backend=True): if not self._is_initialized: self._logger.error("Warning: taking snapshot when the container [" + self.get_container_name() + "] is not initialized.") self.sys_log("Warning: taking snapshot when the container [" + self.get_container_name() + "] is not initialized.") snapshot = ContainerSnapshot() snapshot.container_name = self._container_name snapshot.is_initialized = self._is_initialized snapshot.status = self._status snapshot.class_name = self.get_full_class_name() snapshot.mode = self._mode snapshot.data_model = self.DATA_MODEL snapshot.log_dir = self._logger.get_log_dir() for key, val in self._counter.items(): snapshot.counters[key] = val if self._start_time: snapshot.start_time = str(self._start_time) if self._end_time: snapshot.end_time = str(self._end_time) for op_name, op in self._node_name_to_node_dict.items(): if 'Dummy' in op.get_class_name(): continue op_output_file = FileUtil.join_paths_to_file( root_dir=FileUtil.join_paths_to_dir(FileUtil.dir_name(self._snapshot_file_folder), 'operators'), base_name='SNAPSHOT_' + str(TimezoneUtil.cur_time_in_pst()) + '_' + op_name + '.pb' ) snapshot.operator_snapshot_map[op_name].CopyFrom(op.get_operator_snapshot(output_file=op_output_file)) self.sys_log("Snapshot saved to folder [" + self._snapshot_file_folder + '].') self._logger.info("Snapshot saved to folder [" + self._snapshot_file_folder + '].') output_file_name = FileUtil.join_paths_to_file( root_dir=FileUtil.dir_name(self._snapshot_file_folder), base_name='SNAPSHOT_' + str(TimezoneUtil.cur_time_in_pst()) + '_' + self._container_name + '.pb' ) with FileLockTool(output_file_name, read_mode=False): FileUtil.write_proto_to_file( proto=snapshot, file_name=output_file_name ) if self._backend and send_backend: try: self._backend.send_to_backend(snapshot=snapshot) except Exception as err: self._logger.error("Sending backend failed with error " + str(err) + '.') return snapshot
def _reinitialize_underlying_storage(self, file_base_name): file_name = FileUtil.join_paths_to_file(root_dir=self.get_latest_dir(), base_name=file_base_name) if not FileUtil.does_file_exist(file_name): self.sys_log("The file to read does not exist.") return self._underlying_storage.initialize_from_file(file_name=file_name)
def fetch_oldest(self): try: oldest_dir = self._partitioner.get_oldest_dir_in_root_directory() if not oldest_dir: self._logger.warning('[' + self._partitioner.get_dir_name() + '] is empty.') return None proto_table = ProtoTableStorage() proto_table.initialize_from_file( file_name=FileUtil.join_paths_to_file(root_dir=oldest_dir, base_name='data.pb')) all_data = proto_table.read_all() if all_data: self._logger.info( "Successfully get the oldest data in partition dir [" + self._partitioner.get_dir_name() + '].') min_key = min(all_data.keys()) return ProtoUtil.any_to_message(message_type=self.MESSAGE_TYPE, any_message=all_data[min_key]) else: return None except Exception as err: self._logger.error("Fetch oldest partition [" + self._partitioner.get_dir_name() + "] with error " + str(err) + '.') return None
def _reinitialize_underlying_storage(self, file_base_name): file_name = FileUtil.join_paths_to_file( root_dir=self._get_latest_dir_internal(), base_name=file_base_name) self.increment_rpc_count_by(n=1) if not FileUtil.does_file_exist(file_name): self._SYS_LOGGER.info("The file to read does not exist.") return self._underlying_storage.initialize_from_file(file_name=file_name)
def write(self, data, params=None): to_make_partition = True if params and 'make_partition' in params: to_make_partition = params['make_partition'] params.pop('make_partition', None) if self._underlying_storage.get_storage_type( ) == StorageType.PROTO_TABLE_STORAGE: file_base_name = 'data.pb' else: file_base_name = 'data' if params and 'base_name' in params: file_base_name = params['base_name'] params.pop('base_name', None) while self._reader_status != Status.IDLE: self.sys_log("Waiting for reader to finish.") time.sleep(TimeSleepObj.ONE_SECOND) self._writer_status = Status.RUNNING if to_make_partition: if not params or 'timezone' not in params or params[ 'timezone'] == 'PST': self.make_new_partition( timestamp=TimezoneUtil.cur_time_in_pst()) elif params['timezone'] == 'UTC': self.make_new_partition( timestamp=TimezoneUtil.cur_time_in_utc()) elif params['timezone'] == 'EST': self.make_new_partition( timestamp=TimezoneUtil.cur_time_in_est()) self.initialize_from_dir(dir_name=self._file_tree.get_root_name()) file_name = FileUtil.join_paths_to_file( root_dir=self._file_tree.get_leftmost_leaf(), base_name=file_base_name) if file_name != self._underlying_storage.get_file_name(): self.sys_log("Sync to the latest file to " + file_name) self._underlying_storage.initialize_from_file(file_name=file_name) try: self._underlying_storage.write(data=data, params=params) self._writer_status = Status.IDLE except Exception as err: self.sys_log("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.') self._logger.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.') raise StorageWriteException("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
def get_response_impl(backend_folder, request, lru_cache=None): storage_value = ContainerBackendValue() storage_value.container_name = request.container_name storage_value.container_status = request.status for operator_name, operator_snapshot in dict( request.operator_snapshot_map).items(): operator_info = ContainerBackendValue.OperatorInfo() operator_info.status = operator_snapshot.status for parent in operator_snapshot.node_snapshot.parents_names: operator_info.parents.append(parent) operator_info.start_time = operator_snapshot.start_time operator_info.end_time = operator_snapshot.end_time operator_info.log_file = operator_snapshot.log_file storage_value.operator_info_map[operator_name].CopyFrom( operator_info) storage_value.mode = request.mode storage_value.data_model = request.data_model storage_value.updated_time = str(TimezoneUtil.cur_time_in_pst()) storage_value.start_time = request.start_time storage_value.end_time = request.end_time storage_value.log_file = request.log_file storage_value.run_cell = request.run_cell storage_value.snapshot_cell = request.snapshot_cell for key in request.counters: storage_value.counters[key] = request.counters[key] storage_value.ttl = int( EnvUtil.get_pslx_env_variable('PSLX_BACKEND_CONTAINER_TTL')) storage = lru_cache.get( key=storage_value.container_name) if lru_cache else None if not storage: storage = ProtoTableStorage() storage.initialize_from_file(file_name=FileUtil.join_paths_to_file( root_dir=backend_folder, base_name=storage_value.container_name + '.pb')) if lru_cache: lru_cache.set(key=backend_folder, value=storage) all_data = storage.read_all() if len(all_data) >= int( EnvUtil.get_pslx_env_variable('PSLX_INTERNAL_CACHE')) > 0: key_to_delete = sorted(all_data.keys())[0] storage.delete(key=key_to_delete) storage.write(data={storage_value.start_time: storage_value})
def read(self, params=None): if self._underlying_storage.get_storage_type( ) == StorageType.PROTO_TABLE_STORAGE: file_base_name = 'data.pb' else: file_base_name = 'data' if params and 'base_name' in params: file_base_name = params['base_name'] params.pop('base_name', None) if params and 'reinitialize_underlying_storage' in params: self._reinitialize_underlying_storage( file_base_name=file_base_name) while self._writer_status != Status.IDLE: self.sys_log("Waiting for writer to finish.") time.sleep(TimeSleepObj.ONE_SECOND) self._reader_status = Status.RUNNING self.sys_log("Read from the latest partition.") latest_dir = self.get_latest_dir() if not latest_dir: self.sys_log("Current partitioner is empty, cannot read anything.") return [] file_name = FileUtil.join_paths_to_file(root_dir=latest_dir, base_name=file_base_name) if not FileUtil.does_file_exist(file_name): self.sys_log("The file [" + file_name + "] to read does not exist.") raise StorageReadException("The file [" + file_name + "] to read does not exist.") if file_name != self._underlying_storage.get_file_name(): self.sys_log("Sync to the latest file to " + file_name) self._underlying_storage.initialize_from_file(file_name=file_name) try: result = self._underlying_storage.read(params=params) self._reader_status = Status.IDLE return result except Exception as err: self.sys_log("Read dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.') self._logger.error("Read dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.') raise StorageReadException("Read dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
def write(self, data, params=None): to_make_partition = True if params and 'make_partition' in params: to_make_partition = params['make_partition'] params.pop('make_partition', None) if self._underlying_storage.get_storage_type( ) == StorageType.PROTO_TABLE_STORAGE: file_base_name = 'data.pb' else: file_base_name = 'data' if params and 'base_name' in params: file_base_name = params['base_name'] if to_make_partition: if not params or 'timezone' not in params or params[ 'timezone'] == 'PST': self.make_new_partition( timestamp=TimezoneUtil.cur_time_in_pst()) elif params['timezone'] == 'UTC': self.make_new_partition( timestamp=TimezoneUtil.cur_time_in_utc()) elif params['timezone'] == 'EST': self.make_new_partition( timestamp=TimezoneUtil.cur_time_in_est()) file_name = FileUtil.join_paths_to_file( root_dir=self._file_tree.get_leftmost_leaf(), base_name=file_base_name) if file_name != self._underlying_storage.get_file_name(): self._SYS_LOGGER.info("Sync to the latest file to " + file_name) self._underlying_storage.initialize_from_file(file_name=file_name) try: self._underlying_storage.write(data=data, params=params) self.increment_rpc_count_by( n=self._underlying_storage.get_rpc_call_count_and_reset()) except Exception as err: self._SYS_LOGGER.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.') self._logger.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.') raise StorageWriteException("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
def read(self, params=None): self.initialize_from_dir(dir_name=self.get_dir_name()) if self._underlying_storage.get_storage_type( ) == StorageType.PROTO_TABLE_STORAGE: file_base_name = 'data.pb' else: file_base_name = 'data' if params and 'base_name' in params: file_base_name = params['base_name'] params.pop('base_name', None) if params and 'reinitialize_underlying_storage' in params: self._reinitialize_underlying_storage( file_base_name=file_base_name) self._SYS_LOGGER.info("Read from the latest partition.") latest_dir = self._get_latest_dir_internal() if not latest_dir: self._SYS_LOGGER.info( "Current partitioner is empty, cannot read anything.") return [] file_name = FileUtil.join_paths_to_file(root_dir=latest_dir, base_name=file_base_name) try: if file_name != self._underlying_storage.get_file_name(): self._SYS_LOGGER.info("Sync to the latest file to " + file_name) self._underlying_storage.initialize_from_file( file_name=file_name) result = self._underlying_storage.read(params=params) self.increment_rpc_count_by( n=self._underlying_storage.get_rpc_call_count_and_reset()) return result except Exception as err: self._SYS_LOGGER.error("Read dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.') self._logger.error("Read dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.') raise StorageReadException("Read dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
def initialize_from_dir(self, dir_name): self._dir_name = dir_name self._index_map_file = FileUtil.join_paths_to_file( root_dir=dir_name, base_name='index_map.pb' ) self._index_map_file = FileUtil.normalize_file_name(file_name=self._index_map_file) self.increment_rpc_count_by(n=1) self._index_map = FileUtil.read_proto_from_file( proto_type=ProtoTableIndexMap, file_name=self._index_map_file ) if self._index_map is None: self._index_map = ProtoTableIndexMap() self._index_map.cur_shard = 0 assert self._size_per_shard > 0 self._index_map.size_per_shard = self._size_per_shard else: if self._size_per_shard and self._index_map.size_per_shard != self._size_per_shard: self._logger.error("Please use the correct size per shard of [" + str(self._size_per_shard) + '].') self._size_per_shard = self._index_map.size_per_shard self._logger.info("Using size per shard of [" + str(self._size_per_shard) + '].')
def _shard_to_file(self, shard): return FileUtil.join_paths_to_file( root_dir=self._dir_name, base_name='data@' + str(shard) + '.pb' )
def test_join_paths_to_file(self): root_dir = 'database' base_name = 'foo' self.assertEqual(FileUtil.join_paths_to_file(root_dir, base_name), 'database/foo')
def get_container_info(container_name, cell_name, start_time): container_info = { 'log_file': '', 'start_time': '', 'end_time': '', 'counter_info': [], } operators_info = [] folder = FileUtil.convert_local_to_cell_path( path=backend_folder, cell=cell_name) pslx_frontend_logger.info( "Container backend checking folder [" + folder + '].') storage = ProtoTableStorage() storage.initialize_from_file( FileUtil.join_paths_to_file( root_dir=folder, base_name=container_name + '.pb' ) ) raw_data = storage.read_all() all_past_run = [] for key in sorted(list(raw_data.keys()), reverse=True): val = ProtoUtil.any_to_message( message_type=ContainerBackendValue, any_message=raw_data[key] ) all_past_run.append( { 'start_time': val.start_time, 'updated_time': val.updated_time, 'end_time': val.end_time, 'status': ProtoUtil.get_name_by_value( enum_type=Status, value=val.container_status), 'run_cell': val.run_cell, 'snapshot_cell': val.snapshot_cell, } ) if len(all_past_run) > 10: break key = start_time if start_time else sorted(raw_data.keys())[-1] val = raw_data[key] result_proto = ProtoUtil.any_to_message( message_type=ContainerBackendValue, any_message=val ) container_info['log_file'] = galaxy_viewer_url + result_proto.log_file container_info['start_time'] = result_proto.start_time container_info['end_time'] = result_proto.end_time for key in sorted(dict(result_proto.counters).keys()): container_info['counter_info'].append( { 'name': key, 'count': result_proto.counters[key], } ) for key, val in dict(result_proto.operator_info_map).items(): operators_info.append({ 'operator_name': key, 'status': ProtoUtil.get_name_by_value( enum_type=Status, value=val.status), 'start_time': val.start_time, 'end_time': val.end_time, 'dependencies': ', '.join(val.parents), 'log_file': galaxy_viewer_url + val.log_file, }) return (container_info, sorted(operators_info, key=lambda x: (x['dependencies'], x['operator_name'])), all_past_run)
def _partitioner_storage_impl(self, request): self._logger.info("Getting request of partitioner storage read.") read_params = dict(request.params) is_proto_table = True if read_params['is_proto_table'] == '1' else False if 'base_name' in read_params: base_name = read_params['base_name'] else: base_name = 'data.pb' if is_proto_table else 'data' lru_key = (read_params['PartitionerStorageType'], request.dir_name) self._logger.info("Partitioner type is " + read_params['PartitionerStorageType']) storage = self._lru_cache_tool.get(key=lru_key) if not storage: self.sys_log("Did not find the storage in cache. Making a new one...") partitioner_type = ProtoUtil.get_value_by_name( enum_type=PartitionerStorageType, name=read_params['PartitionerStorageType'] ) storage = self.PARTITIONER_TYPE_TO_IMPL[partitioner_type]() storage.initialize_from_dir(dir_name=request.dir_name) self._lru_cache_tool.set( key=lru_key, value=storage ) else: self.sys_log("Found key in LRU cache.") self._logger.info('Current cache size ' + str(self._lru_cache_tool.get_cur_capacity())) read_params.pop('PartitionerStorageType', None) read_params.pop('is_proto_table', None) if is_proto_table: proto_table_storage = ProtoTableStorage() storage.set_underlying_storage(storage=proto_table_storage) else: read_params['num_line'] = -1 response = RPCIOResponse() if 'start_time' not in read_params: # calling read function if is_proto_table: # if underlying storage is proto table. if 'message_type' in read_params: assert 'proto_module' in read_params read_params['message_type'] = ProtoUtil.infer_message_type_from_str( message_type_str=read_params['message_type'], modules=read_params['proto_module'] ) proto_storage = ProtoTableStorage() if 'read_oldest' in read_params: proto_storage.initialize_from_file( file_name=FileUtil.join_paths_to_file( root_dir=storage.get_oldest_dir_in_root_directory(), base_name=base_name ) ) else: proto_storage.initialize_from_file( file_name=FileUtil.join_paths_to_file( root_dir=storage.get_latest_dir(), base_name=base_name ) ) data = proto_storage.read_all() for key, val in data.items(): rpc_list_data = RPCIOResponse.RPCListData() rpc_data = rpc_list_data.data.add() rpc_data.proto_data.CopyFrom(val) response.dict_data[key].CopyFrom(rpc_list_data) else: # if underlying storage is not proto table. default_storage = DefaultStorage() if 'read_oldest' in read_params: default_storage.initialize_from_file( file_name=FileUtil.join_paths_to_file( root_dir=storage.get_oldest_dir_in_root_directory(), base_name=base_name ) ) else: default_storage.initialize_from_file( file_name=FileUtil.join_paths_to_file( root_dir=storage.get_latest_dir(), base_name=base_name ) ) data = default_storage.read(params={ 'num_line': -1, }) rpc_list_data = RPCIOResponse.RPCListData() for item in data: rpc_data = rpc_list_data.data.add() rpc_data.string_data = item response.list_data.CopyFrom(rpc_list_data) else: # calling read_range function if 'start_time' in read_params: read_params['start_time'] = TimezoneUtil.cur_time_from_str( time_str=read_params['start_time'] ) if 'end_time' in read_params: read_params['end_time'] = TimezoneUtil.cur_time_from_str( time_str=read_params['end_time'] ) data = storage.read_range(params=read_params) if data: for key, val in data.items(): rpc_list_data = RPCIOResponse.RPCListData() if is_proto_table: for proto_key, any_message in val.items(): rpc_data = rpc_list_data.data.add() rpc_data.string_data = proto_key rpc_data = rpc_list_data.data.add() rpc_data.proto_data.CopyFrom(any_message) else: for entry in val: rpc_data = rpc_list_data.data.add() rpc_data.string_data = entry response.dict_data[key].CopyFrom(rpc_list_data) return response
def get_container_snapshot(self, send_backend=True): if not self._is_initialized: self._logger.error( "Warning: taking snapshot when the container [" + self.get_container_name() + "] is not initialized.") self._SYS_LOGGER.error( "Warning: taking snapshot when the container [" + self.get_container_name() + "] is not initialized.") snapshot = ContainerSnapshot() snapshot.container_name = self._container_name snapshot.is_initialized = self._is_initialized snapshot.status = self._status snapshot.class_name = self.get_full_class_name() snapshot.mode = self._mode snapshot.data_model = self.DATA_MODEL snapshot.log_file = FileUtil.convert_local_to_cell_path( glogging.get_logger_file(self._logger)) snapshot.run_cell = EnvUtil.get_other_env_variable( var='GALAXY_fs_cell', fallback_value='') snapshot.snapshot_cell = FileUtil.get_cell_from_path( FileUtil.convert_local_to_cell_path(self._snapshot_file_folder)) for key, val in self._counter.items(): snapshot.counters[key] = val if self._start_time: snapshot.start_time = str(self._start_time) if self._end_time: snapshot.end_time = str(self._end_time) for op_name, op in self._node_name_to_node_dict.items(): if 'Dummy' in op.get_class_name(): continue op_output_file = FileUtil.join_paths_to_file( root_dir=FileUtil.join_paths_to_dir( FileUtil.dir_name(self._snapshot_file_folder), 'operators'), base_name=op_name + '_SNAPSHOT_' + str(TimezoneUtil.cur_time_in_pst()) + '.pb') snapshot.operator_snapshot_map[op_name].CopyFrom( op.get_operator_snapshot(output_file=op_output_file)) self._SYS_LOGGER.info( "Snapshot saved to folder [" + FileUtil.convert_local_to_cell_path(self._snapshot_file_folder) + '].') self._logger.info( "Snapshot saved to folder [" + FileUtil.convert_local_to_cell_path(self._snapshot_file_folder) + '].') output_file_name = FileUtil.join_paths_to_file( root_dir=FileUtil.join_paths_to_dir( FileUtil.dir_name(self._snapshot_file_folder), 'containers'), base_name=self._container_name + '_SNAPSHOT_' + str(TimezoneUtil.cur_time_in_pst()) + '.pb') FileUtil.write_proto_to_file(proto=snapshot, file_name=output_file_name) if self._backend and send_backend: try: self._backend.send_to_backend(snapshot=snapshot) except Exception as err: self._logger.error("Sending backend failed with error " + str(err) + '.') return snapshot