Esempio n. 1
0
    def get_response_and_status_impl(self, request):
        file_path = request.file_path
        response = FileViewerRPCResponse()
        if FileUtil.is_file(path_name=file_path):
            file_name = FileUtil.die_if_file_not_exist(file_name=file_path)
            file_info = response.files_info.add()
            file_info.file_path = file_name
            file_info.file_size = FileUtil.get_file_size(file_name=file_name)
            file_info.modified_time = str(
                FileUtil.get_file_modified_time(file_name=file_name))
        else:
            dir_name = FileUtil.die_if_dir_not_exist(dir_name=file_path)
            sub_files = FileUtil.list_files_in_dir(dir_name=dir_name)
            for sub_file in sub_files:
                file_info = response.files_info.add()
                file_info.file_path = sub_file
                file_info.file_size = FileUtil.get_file_size(
                    file_name=sub_file)
                file_info.modified_time = str(
                    FileUtil.get_file_modified_time(file_name=sub_file))
            sub_dirs = FileUtil.list_dirs_in_dir(dir_name=dir_name)
            for sub_dir in sub_dirs:
                dirs_info = response.directories_info.add()
                dirs_info.file_path = sub_dir

        return response, Status.SUCCEEDED
Esempio n. 2
0
    def _delete_file(self, cur_time, path_name):
        num_file_removed, num_file_failed = 0, 0
        if FileUtil.is_file(path_name=path_name):
            ttl = FileUtil.get_ttl_from_path(path_name=path_name)
            if ttl and cur_time - FileUtil.get_file_modified_time(
                    file_name=path_name) > ttl:
                self._logger.info("Removing file " + path_name + '...')
                try:
                    with FileLockTool(protected_file_path=path_name,
                                      read_mode=True,
                                      timeout=TimeSleepObj.ONE_TENTH_SECOND):
                        FileUtil.remove_file(file_name=path_name)
                    num_file_removed += 1
                    self.counter_increment("num_file_removed")
                except Exception as err:
                    num_file_failed += 1
                    self.counter_increment("num_file_failed_to_be_removed")
                    self._logger.error("Removing file " + path_name +
                                       ' failed with err ' + str(err) + '.')
        else:
            for file_name in FileUtil.list_files_in_dir(dir_name=path_name):
                stats = self._delete_file(cur_time=cur_time,
                                          path_name=file_name)
                num_file_removed += stats[0]
                num_file_failed += stats[1]
            for dir_name in FileUtil.list_dirs_in_dir(dir_name=path_name):
                stats = self._delete_file(cur_time=cur_time,
                                          path_name=dir_name)
                num_file_removed += stats[0]
                num_file_failed += stats[1]

        return num_file_removed, num_file_failed
Esempio n. 3
0
    def _recursively_check_dir_deletable(self, dir_name):
        if FileUtil.list_files_in_dir(dir_name=dir_name):
            return False
        sub_dirs = FileUtil.list_dirs_in_dir(dir_name=dir_name)
        if sub_dirs:
            for sub_dir in sub_dirs:
                if not self._recursively_check_dir_deletable(dir_name=sub_dir):
                    return False

        return True
def get_containers_info():
    containers_info = []
    existing_containers = {}
    all_proto_files = set()
    if not FileUtil.is_local_path(backend_folder):
        all_cells = ['']
    else:
        all_cells = gclient.list_cells()

    for cell_name in all_cells:
        folder = FileUtil.convert_local_to_cell_path(
            path=backend_folder, cell=cell_name)
        proto_files = FileUtil.list_files_in_dir(folder)
        all_proto_files = all_proto_files.union(set(proto_files))
    for proto_file in all_proto_files:
        storage = ProtoTableStorage()
        storage.initialize_from_file(
            file_name=proto_file
        )
        raw_data = storage.read_all()
        if not raw_data:
            continue
        key = sorted(raw_data.keys())[-1]
        val = raw_data[key]
        result_proto = ProtoUtil.any_to_message(
            message_type=ContainerBackendValue,
            any_message=val
        )
        ttl = result_proto.ttl

        if ttl > 0 and result_proto.updated_time and TimezoneUtil.cur_time_in_pst() - TimezoneUtil.cur_time_from_str(
                result_proto.updated_time) >= datetime.timedelta(days=ttl):
            FileUtil.remove_file(storage.get_file_name())
        else:
            container_info = {
                'container_name': result_proto.container_name,
                'status': ProtoUtil.get_name_by_value(
                    enum_type=Status, value=result_proto.container_status),
                'updated_time': result_proto.updated_time,
                'mode': ProtoUtil.get_name_by_value(enum_type=ModeType, value=result_proto.mode),
                'data_model': ProtoUtil.get_name_by_value(
                    enum_type=DataModelType, value=result_proto.data_model),
                'run_cell': result_proto.run_cell,
                'snapshot_cell': result_proto.snapshot_cell,
            }
            if container_info['container_name'] not in existing_containers:
                existing_containers[container_info['container_name']] = container_info['updated_time']
                containers_info.append(container_info)
            else:
                if container_info['updated_time'] >= existing_containers[container_info['container_name']]:
                    containers_info.append(container_info)

    return containers_info
Esempio n. 5
0
 def _get_latest_status_of_operators(self):
     operator_status = {}
     snapshot_files = FileUtil.list_files_in_dir(
         dir_name=FileUtil.join_paths_to_dir(
             FileUtil.dir_name(self._snapshot_file_folder), 'operators'))
     for snapshot_file in snapshot_files[::-1]:
         operator_name = snapshot_file.split('/')[-1].split('_')[1]
         if operator_name not in operator_status:
             self._logger.info("Getting status for operator [" +
                               operator_name + '].')
             self._SYS_LOGGER.info("Getting status for operator [" +
                                   operator_name + '].')
             operator_status[operator_name] = self._node_name_to_node_dict[
                 operator_name].get_status_from_snapshot(
                     snapshot_file=snapshot_file)
             self._SYS_LOGGER.info(
                 "Status for operator [" + operator_name + '] is [' +
                 ProtoUtil.get_name_by_value(
                     enum_type=Status,
                     value=operator_status[operator_name]) + '].')
         if len(operator_status) == len(self._node_name_to_node_dict):
             break
     return operator_status
Esempio n. 6
0
    def read_range(self, params):
        def _reformat_time(timestamp):
            if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
                timestamp = timestamp.replace(month=1,
                                              day=1,
                                              hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
                timestamp = timestamp.replace(day=1,
                                              hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
                timestamp = timestamp.replace(hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
                timestamp = timestamp.replace(minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            else:
                timestamp = timestamp.replace(second=0,
                                              microsecond=0,
                                              tzinfo=None)
            return timestamp

        assert 'start_time' in params and 'end_time' in params and params[
            'start_time'] <= params['end_time']
        while self._writer_status != Status.IDLE:
            self.sys_log("Waiting for writer to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        self._reader_status = Status.RUNNING

        oldest_dir, latest_dir = self.get_oldest_dir(), self.get_latest_dir()
        if not latest_dir or not oldest_dir:
            if self.is_empty():
                self._logger.warning("Current partitioner [" +
                                     self.get_dir_name() +
                                     "] is empty, cannot read anything.")
                self.sys_log("Current partitioner [" + self.get_dir_name() +
                             "] is empty, cannot read anything.")
                return {}

        oldest_dir = oldest_dir.replace(self._file_tree.get_root_name(), '')
        latest_dir = latest_dir.replace(self._file_tree.get_root_name(), '')

        oldest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=oldest_dir)
        latest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=latest_dir)
        start_time = max(_reformat_time(params['start_time']),
                         oldest_timestamp)
        end_time = min(_reformat_time(params['end_time']), latest_timestamp)
        result = {}
        try:
            while start_time <= end_time:
                dir_list = FileUtil.parse_timestamp_to_dir(
                    timestamp=start_time).split('/')
                dir_name = '/'.join(
                    dir_list[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[
                        self.PARTITIONER_TYPE]])
                dir_name = FileUtil.join_paths_to_dir(
                    root_dir=self._file_tree.get_root_name(),
                    base_name=dir_name)
                if FileUtil.does_dir_exist(dir_name=dir_name):
                    if self._underlying_storage.get_storage_type(
                    ) == StorageType.PROTO_TABLE_STORAGE:
                        storage = ProtoTableStorage()
                    else:
                        storage = DefaultStorage()
                    file_names = FileUtil.list_files_in_dir(dir_name=dir_name)
                    for file_name in file_names:
                        storage.initialize_from_file(file_name=file_name)
                        if storage.get_storage_type(
                        ) == StorageType.PROTO_TABLE_STORAGE:
                            result[file_name] = storage.read_all()
                        else:
                            result[file_name] = storage.read(
                                params={'num_line': -1})

                if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
                    start_time = start_time.replace(year=start_time.year + 1,
                                                    month=1,
                                                    day=1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
                    if start_time.month == 12:
                        start_time = start_time.replace(year=start_time.year +
                                                        1,
                                                        month=1,
                                                        day=1)
                    else:
                        start_time = start_time.replace(
                            month=start_time.month + 1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
                    start_time += datetime.timedelta(days=1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
                    start_time += datetime.timedelta(hours=1)
                else:
                    start_time += datetime.timedelta(minutes=1)

            self._reader_status = Status.IDLE
            return result
        except Exception as err:
            self.sys_log("Read range in dir [" + self.get_dir_name() +
                         "] got exception " + str(err) + '.')
            self._logger.error("Read range in dir [" + self.get_dir_name() +
                               "] got exception " + str(err) + '.')
            raise StorageReadException("Read range in dir [" +
                                       self.get_dir_name() +
                                       "] got exception " + str(err) + '.')
Esempio n. 7
0
    def read_range(self, params):
        self.initialize_from_dir(dir_name=self.get_dir_name())

        def _reformat_time(timestamp):
            if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
                timestamp = timestamp.replace(month=1,
                                              day=1,
                                              hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
                timestamp = timestamp.replace(day=1,
                                              hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
                timestamp = timestamp.replace(hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
                timestamp = timestamp.replace(minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            else:
                timestamp = timestamp.replace(second=0,
                                              microsecond=0,
                                              tzinfo=None)
            return timestamp

        assert 'start_time' in params and 'end_time' in params and params[
            'start_time'] <= params['end_time']

        oldest_dir, latest_dir = self._get_oldest_dir_in_root_directory_interal(
        ), self._get_latest_dir_internal()
        if not latest_dir or not oldest_dir:
            self._logger.warning("Current partitioner [" +
                                 self.get_dir_name() +
                                 "] is empty, cannot read anything.")
            self._SYS_LOGGER.warning("Current partitioner [" +
                                     self.get_dir_name() +
                                     "] is empty, cannot read anything.")
            return {}

        oldest_dir = oldest_dir.replace(self._file_tree.get_root_name(), '')
        latest_dir = latest_dir.replace(self._file_tree.get_root_name(), '')

        oldest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=oldest_dir)
        latest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=latest_dir)
        start_time = max(_reformat_time(params['start_time']),
                         oldest_timestamp)
        end_time = min(_reformat_time(params['end_time']), latest_timestamp)
        result = {}
        try:
            all_file_names = []
            while start_time <= end_time:
                dir_list = FileUtil.parse_timestamp_to_dir(
                    timestamp=start_time).split('/')
                dir_name = '/'.join(
                    dir_list[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[
                        self.PARTITIONER_TYPE]])
                dir_name = FileUtil.join_paths_to_dir(
                    root_dir=self._file_tree.get_root_name(),
                    base_name=dir_name)
                try:
                    self.increment_rpc_count_by(n=1)
                    file_names = FileUtil.list_files_in_dir(dir_name=dir_name)
                    all_file_names.extend(file_names)
                except Exception as _:
                    pass

                if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
                    start_time = start_time.replace(year=start_time.year + 1,
                                                    month=1,
                                                    day=1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
                    if start_time.month == 12:
                        start_time = start_time.replace(year=start_time.year +
                                                        1,
                                                        month=1,
                                                        day=1)
                    else:
                        start_time = start_time.replace(
                            month=start_time.month + 1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
                    start_time += datetime.timedelta(days=1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
                    start_time += datetime.timedelta(hours=1)
                else:
                    start_time += datetime.timedelta(minutes=1)
            result = {}
            self.increment_rpc_count_by(n=1)
            if self._underlying_storage.get_storage_type(
            ) == StorageType.PROTO_TABLE_STORAGE:
                tmp_result = gclient_ext.read_proto_messages(
                    paths=all_file_names, message_type=ProtoTable)
                for file_name, v in tmp_result.items():
                    result[file_name] = dict(v.data)
            else:
                tmp_result = gclient_ext.read_txts(all_file_names)
                for file_name, v in tmp_result.items():
                    result[file_name] = v.rstrip().split('\n')

            return result
        except Exception as err:
            self._SYS_LOGGER.error("Read range in dir [" +
                                   self.get_dir_name() + "] got exception " +
                                   str(err) + '.')
            self._logger.error("Read range in dir [" + self.get_dir_name() +
                               "] got exception " + str(err) + '.')
            raise StorageReadException("Read range in dir [" +
                                       self.get_dir_name() +
                                       "] got exception " + str(err) + '.')