def get_response_and_status_impl(self, request): file_path = request.file_path response = FileViewerRPCResponse() if FileUtil.is_file(path_name=file_path): file_name = FileUtil.die_if_file_not_exist(file_name=file_path) file_info = response.files_info.add() file_info.file_path = file_name file_info.file_size = FileUtil.get_file_size(file_name=file_name) file_info.modified_time = str( FileUtil.get_file_modified_time(file_name=file_name)) else: dir_name = FileUtil.die_if_dir_not_exist(dir_name=file_path) sub_files = FileUtil.list_files_in_dir(dir_name=dir_name) for sub_file in sub_files: file_info = response.files_info.add() file_info.file_path = sub_file file_info.file_size = FileUtil.get_file_size( file_name=sub_file) file_info.modified_time = str( FileUtil.get_file_modified_time(file_name=sub_file)) sub_dirs = FileUtil.list_dirs_in_dir(dir_name=dir_name) for sub_dir in sub_dirs: dirs_info = response.directories_info.add() dirs_info.file_path = sub_dir return response, Status.SUCCEEDED
def _delete_file(self, cur_time, path_name): num_file_removed, num_file_failed = 0, 0 if FileUtil.is_file(path_name=path_name): ttl = FileUtil.get_ttl_from_path(path_name=path_name) if ttl and cur_time - FileUtil.get_file_modified_time( file_name=path_name) > ttl: self._logger.info("Removing file " + path_name + '...') try: with FileLockTool(protected_file_path=path_name, read_mode=True, timeout=TimeSleepObj.ONE_TENTH_SECOND): FileUtil.remove_file(file_name=path_name) num_file_removed += 1 self.counter_increment("num_file_removed") except Exception as err: num_file_failed += 1 self.counter_increment("num_file_failed_to_be_removed") self._logger.error("Removing file " + path_name + ' failed with err ' + str(err) + '.') else: for file_name in FileUtil.list_files_in_dir(dir_name=path_name): stats = self._delete_file(cur_time=cur_time, path_name=file_name) num_file_removed += stats[0] num_file_failed += stats[1] for dir_name in FileUtil.list_dirs_in_dir(dir_name=path_name): stats = self._delete_file(cur_time=cur_time, path_name=dir_name) num_file_removed += stats[0] num_file_failed += stats[1] return num_file_removed, num_file_failed
def _recursively_check_dir_deletable(self, dir_name): if FileUtil.list_files_in_dir(dir_name=dir_name): return False sub_dirs = FileUtil.list_dirs_in_dir(dir_name=dir_name) if sub_dirs: for sub_dir in sub_dirs: if not self._recursively_check_dir_deletable(dir_name=sub_dir): return False return True
def get_containers_info(): containers_info = [] existing_containers = {} all_proto_files = set() if not FileUtil.is_local_path(backend_folder): all_cells = [''] else: all_cells = gclient.list_cells() for cell_name in all_cells: folder = FileUtil.convert_local_to_cell_path( path=backend_folder, cell=cell_name) proto_files = FileUtil.list_files_in_dir(folder) all_proto_files = all_proto_files.union(set(proto_files)) for proto_file in all_proto_files: storage = ProtoTableStorage() storage.initialize_from_file( file_name=proto_file ) raw_data = storage.read_all() if not raw_data: continue key = sorted(raw_data.keys())[-1] val = raw_data[key] result_proto = ProtoUtil.any_to_message( message_type=ContainerBackendValue, any_message=val ) ttl = result_proto.ttl if ttl > 0 and result_proto.updated_time and TimezoneUtil.cur_time_in_pst() - TimezoneUtil.cur_time_from_str( result_proto.updated_time) >= datetime.timedelta(days=ttl): FileUtil.remove_file(storage.get_file_name()) else: container_info = { 'container_name': result_proto.container_name, 'status': ProtoUtil.get_name_by_value( enum_type=Status, value=result_proto.container_status), 'updated_time': result_proto.updated_time, 'mode': ProtoUtil.get_name_by_value(enum_type=ModeType, value=result_proto.mode), 'data_model': ProtoUtil.get_name_by_value( enum_type=DataModelType, value=result_proto.data_model), 'run_cell': result_proto.run_cell, 'snapshot_cell': result_proto.snapshot_cell, } if container_info['container_name'] not in existing_containers: existing_containers[container_info['container_name']] = container_info['updated_time'] containers_info.append(container_info) else: if container_info['updated_time'] >= existing_containers[container_info['container_name']]: containers_info.append(container_info) return containers_info
def _get_latest_status_of_operators(self): operator_status = {} snapshot_files = FileUtil.list_files_in_dir( dir_name=FileUtil.join_paths_to_dir( FileUtil.dir_name(self._snapshot_file_folder), 'operators')) for snapshot_file in snapshot_files[::-1]: operator_name = snapshot_file.split('/')[-1].split('_')[1] if operator_name not in operator_status: self._logger.info("Getting status for operator [" + operator_name + '].') self._SYS_LOGGER.info("Getting status for operator [" + operator_name + '].') operator_status[operator_name] = self._node_name_to_node_dict[ operator_name].get_status_from_snapshot( snapshot_file=snapshot_file) self._SYS_LOGGER.info( "Status for operator [" + operator_name + '] is [' + ProtoUtil.get_name_by_value( enum_type=Status, value=operator_status[operator_name]) + '].') if len(operator_status) == len(self._node_name_to_node_dict): break return operator_status
def read_range(self, params): def _reformat_time(timestamp): if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY: timestamp = timestamp.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0, tzinfo=None) elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY: timestamp = timestamp.replace(day=1, hour=0, minute=0, second=0, microsecond=0, tzinfo=None) elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY: timestamp = timestamp.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None) elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY: timestamp = timestamp.replace(minute=0, second=0, microsecond=0, tzinfo=None) else: timestamp = timestamp.replace(second=0, microsecond=0, tzinfo=None) return timestamp assert 'start_time' in params and 'end_time' in params and params[ 'start_time'] <= params['end_time'] while self._writer_status != Status.IDLE: self.sys_log("Waiting for writer to finish.") time.sleep(TimeSleepObj.ONE_SECOND) self._reader_status = Status.RUNNING oldest_dir, latest_dir = self.get_oldest_dir(), self.get_latest_dir() if not latest_dir or not oldest_dir: if self.is_empty(): self._logger.warning("Current partitioner [" + self.get_dir_name() + "] is empty, cannot read anything.") self.sys_log("Current partitioner [" + self.get_dir_name() + "] is empty, cannot read anything.") return {} oldest_dir = oldest_dir.replace(self._file_tree.get_root_name(), '') latest_dir = latest_dir.replace(self._file_tree.get_root_name(), '') oldest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=oldest_dir) latest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=latest_dir) start_time = max(_reformat_time(params['start_time']), oldest_timestamp) end_time = min(_reformat_time(params['end_time']), latest_timestamp) result = {} try: while start_time <= end_time: dir_list = FileUtil.parse_timestamp_to_dir( timestamp=start_time).split('/') dir_name = '/'.join( dir_list[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[ self.PARTITIONER_TYPE]]) dir_name = FileUtil.join_paths_to_dir( root_dir=self._file_tree.get_root_name(), base_name=dir_name) if FileUtil.does_dir_exist(dir_name=dir_name): if self._underlying_storage.get_storage_type( ) == StorageType.PROTO_TABLE_STORAGE: storage = ProtoTableStorage() else: storage = DefaultStorage() file_names = FileUtil.list_files_in_dir(dir_name=dir_name) for file_name in file_names: storage.initialize_from_file(file_name=file_name) if storage.get_storage_type( ) == StorageType.PROTO_TABLE_STORAGE: result[file_name] = storage.read_all() else: result[file_name] = storage.read( params={'num_line': -1}) if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY: start_time = start_time.replace(year=start_time.year + 1, month=1, day=1) elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY: if start_time.month == 12: start_time = start_time.replace(year=start_time.year + 1, month=1, day=1) else: start_time = start_time.replace( month=start_time.month + 1) elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY: start_time += datetime.timedelta(days=1) elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY: start_time += datetime.timedelta(hours=1) else: start_time += datetime.timedelta(minutes=1) self._reader_status = Status.IDLE return result except Exception as err: self.sys_log("Read range in dir [" + self.get_dir_name() + "] got exception " + str(err) + '.') self._logger.error("Read range in dir [" + self.get_dir_name() + "] got exception " + str(err) + '.') raise StorageReadException("Read range in dir [" + self.get_dir_name() + "] got exception " + str(err) + '.')
def read_range(self, params): self.initialize_from_dir(dir_name=self.get_dir_name()) def _reformat_time(timestamp): if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY: timestamp = timestamp.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0, tzinfo=None) elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY: timestamp = timestamp.replace(day=1, hour=0, minute=0, second=0, microsecond=0, tzinfo=None) elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY: timestamp = timestamp.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None) elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY: timestamp = timestamp.replace(minute=0, second=0, microsecond=0, tzinfo=None) else: timestamp = timestamp.replace(second=0, microsecond=0, tzinfo=None) return timestamp assert 'start_time' in params and 'end_time' in params and params[ 'start_time'] <= params['end_time'] oldest_dir, latest_dir = self._get_oldest_dir_in_root_directory_interal( ), self._get_latest_dir_internal() if not latest_dir or not oldest_dir: self._logger.warning("Current partitioner [" + self.get_dir_name() + "] is empty, cannot read anything.") self._SYS_LOGGER.warning("Current partitioner [" + self.get_dir_name() + "] is empty, cannot read anything.") return {} oldest_dir = oldest_dir.replace(self._file_tree.get_root_name(), '') latest_dir = latest_dir.replace(self._file_tree.get_root_name(), '') oldest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=oldest_dir) latest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=latest_dir) start_time = max(_reformat_time(params['start_time']), oldest_timestamp) end_time = min(_reformat_time(params['end_time']), latest_timestamp) result = {} try: all_file_names = [] while start_time <= end_time: dir_list = FileUtil.parse_timestamp_to_dir( timestamp=start_time).split('/') dir_name = '/'.join( dir_list[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[ self.PARTITIONER_TYPE]]) dir_name = FileUtil.join_paths_to_dir( root_dir=self._file_tree.get_root_name(), base_name=dir_name) try: self.increment_rpc_count_by(n=1) file_names = FileUtil.list_files_in_dir(dir_name=dir_name) all_file_names.extend(file_names) except Exception as _: pass if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY: start_time = start_time.replace(year=start_time.year + 1, month=1, day=1) elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY: if start_time.month == 12: start_time = start_time.replace(year=start_time.year + 1, month=1, day=1) else: start_time = start_time.replace( month=start_time.month + 1) elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY: start_time += datetime.timedelta(days=1) elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY: start_time += datetime.timedelta(hours=1) else: start_time += datetime.timedelta(minutes=1) result = {} self.increment_rpc_count_by(n=1) if self._underlying_storage.get_storage_type( ) == StorageType.PROTO_TABLE_STORAGE: tmp_result = gclient_ext.read_proto_messages( paths=all_file_names, message_type=ProtoTable) for file_name, v in tmp_result.items(): result[file_name] = dict(v.data) else: tmp_result = gclient_ext.read_txts(all_file_names) for file_name, v in tmp_result.items(): result[file_name] = v.rstrip().split('\n') return result except Exception as err: self._SYS_LOGGER.error("Read range in dir [" + self.get_dir_name() + "] got exception " + str(err) + '.') self._logger.error("Read range in dir [" + self.get_dir_name() + "] got exception " + str(err) + '.') raise StorageReadException("Read range in dir [" + self.get_dir_name() + "] got exception " + str(err) + '.')