コード例 #1
0
    def get_next_dir(self, cur_dir):
        cur_dir = cur_dir.replace(self._file_tree.get_root_name(), '')
        cur_time = FileUtil.parse_dir_to_timestamp(dir_name=cur_dir)
        if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
            next_time = datetime.datetime(cur_time.year + 1, 1, 1)
        elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
            if cur_time.month == 12:
                next_time = datetime.datetime(cur_time.year + 1, 1, 1)
            else:
                next_time = datetime.datetime(cur_time.year,
                                              cur_time.month + 1, 1)
        elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
            next_time = cur_time + datetime.timedelta(days=1)
        elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
            next_time = cur_time + datetime.timedelta(hours=1)
        else:
            next_time = cur_time + datetime.timedelta(minutes=1)

        next_dir_name = FileUtil.parse_timestamp_to_dir(
            timestamp=next_time).split('/')
        next_dir_name = '/'.join(
            next_dir_name[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[
                self.PARTITIONER_TYPE]])

        next_dir_name = FileUtil.join_paths_to_dir(
            root_dir=self._file_tree.get_root_name(), base_name=next_dir_name)
        if FileUtil.does_dir_exist(dir_name=next_dir_name):
            return next_dir_name
        else:
            return None
コード例 #2
0
ファイル: test_file_util.py プロジェクト: kfrancischen/pslx
 def test_parse_dir_to_timestamp(self):
     dir_name = '2020/01/01/12/30'
     self.assertEqual(
         FileUtil.parse_dir_to_timestamp(dir_name=dir_name),
         datetime.datetime(2020, 1, 1, 12, 30)
     )
     dir_name = '2020/01/01/00/00'
     self.assertEqual(
         FileUtil.parse_dir_to_timestamp(dir_name=dir_name),
         datetime.datetime(2020, 1, 1)
     )
     dir_name = '2020/01'
     self.assertEqual(
         FileUtil.parse_dir_to_timestamp(dir_name=dir_name),
         datetime.datetime(2020, 1, 1)
     )
コード例 #3
0
ファイル: partitioner_base.py プロジェクト: kfrancischen/pslx
 def get_previous_dir(self, cur_dir):
     self.initialize_from_dir(dir_name=self.get_dir_name())
     cur_dir = cur_dir.replace(self._file_tree.get_root_name(), '')
     cur_time = FileUtil.parse_dir_to_timestamp(dir_name=cur_dir)
     if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
         pre_time = datetime.datetime(cur_time.year - 1, 1, 1)
     elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
         if cur_time.month == 1:
             pre_time = datetime.datetime(cur_time.year - 1, 12, 1)
         else:
             pre_time = datetime.datetime(cur_time.year, cur_time.month - 1,
                                          1)
     elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
         pre_time = cur_time - datetime.timedelta(days=1)
     elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
         pre_time = cur_time - datetime.timedelta(hours=1)
     else:
         pre_time = cur_time - datetime.timedelta(minutes=1)
     last_dir_name = FileUtil.parse_timestamp_to_dir(
         timestamp=pre_time).split('/')
     last_dir_name = '/'.join(
         last_dir_name[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[
             self.PARTITIONER_TYPE]])
     last_dir_name = FileUtil.join_paths_to_dir(
         root_dir=self._file_tree.get_root_name(), base_name=last_dir_name)
     self.increment_rpc_count_by(n=1)
     if FileUtil.does_dir_exist(dir_name=last_dir_name):
         return last_dir_name
     else:
         return None
コード例 #4
0
    def _cmp_dir_by_timestamp(self, dir_name_1, dir_name_2):
        dir_name_1 = dir_name_1.replace(self._file_tree.get_root_name(), '')
        dir_name_2 = dir_name_2.replace(self._file_tree.get_root_name(), '')
        if not dir_name_2:
            return False
        else:
            dir_name_1 = FileUtil.normalize_dir_name(dir_name=dir_name_1)
            dir_name_2 = FileUtil.normalize_dir_name(dir_name=dir_name_2)
            dir_name_1_split, dir_name_2_split = dir_name_1.split(
                '/')[:-1], dir_name_2.split('/')[:-1]
            if len(dir_name_1_split) > len(dir_name_2_split):
                return False

            dir_name_2 = FileUtil.normalize_dir_name('/'.join(
                dir_name_2_split[:len(dir_name_1_split)]))
            dir_name_1_timestamp = FileUtil.parse_dir_to_timestamp(
                dir_name=dir_name_1)
            dir_name_2_timestamp = FileUtil.parse_dir_to_timestamp(
                dir_name=dir_name_2)
            return dir_name_1_timestamp < dir_name_2_timestamp
コード例 #5
0
    def read_range(self, params):
        def _reformat_time(timestamp):
            if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
                timestamp = timestamp.replace(month=1,
                                              day=1,
                                              hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
                timestamp = timestamp.replace(day=1,
                                              hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
                timestamp = timestamp.replace(hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
                timestamp = timestamp.replace(minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            else:
                timestamp = timestamp.replace(second=0,
                                              microsecond=0,
                                              tzinfo=None)
            return timestamp

        assert 'start_time' in params and 'end_time' in params and params[
            'start_time'] <= params['end_time']
        while self._writer_status != Status.IDLE:
            self.sys_log("Waiting for writer to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        self._reader_status = Status.RUNNING

        oldest_dir, latest_dir = self.get_oldest_dir(), self.get_latest_dir()
        if not latest_dir or not oldest_dir:
            if self.is_empty():
                self._logger.warning("Current partitioner [" +
                                     self.get_dir_name() +
                                     "] is empty, cannot read anything.")
                self.sys_log("Current partitioner [" + self.get_dir_name() +
                             "] is empty, cannot read anything.")
                return {}

        oldest_dir = oldest_dir.replace(self._file_tree.get_root_name(), '')
        latest_dir = latest_dir.replace(self._file_tree.get_root_name(), '')

        oldest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=oldest_dir)
        latest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=latest_dir)
        start_time = max(_reformat_time(params['start_time']),
                         oldest_timestamp)
        end_time = min(_reformat_time(params['end_time']), latest_timestamp)
        result = {}
        try:
            while start_time <= end_time:
                dir_list = FileUtil.parse_timestamp_to_dir(
                    timestamp=start_time).split('/')
                dir_name = '/'.join(
                    dir_list[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[
                        self.PARTITIONER_TYPE]])
                dir_name = FileUtil.join_paths_to_dir(
                    root_dir=self._file_tree.get_root_name(),
                    base_name=dir_name)
                if FileUtil.does_dir_exist(dir_name=dir_name):
                    if self._underlying_storage.get_storage_type(
                    ) == StorageType.PROTO_TABLE_STORAGE:
                        storage = ProtoTableStorage()
                    else:
                        storage = DefaultStorage()
                    file_names = FileUtil.list_files_in_dir(dir_name=dir_name)
                    for file_name in file_names:
                        storage.initialize_from_file(file_name=file_name)
                        if storage.get_storage_type(
                        ) == StorageType.PROTO_TABLE_STORAGE:
                            result[file_name] = storage.read_all()
                        else:
                            result[file_name] = storage.read(
                                params={'num_line': -1})

                if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
                    start_time = start_time.replace(year=start_time.year + 1,
                                                    month=1,
                                                    day=1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
                    if start_time.month == 12:
                        start_time = start_time.replace(year=start_time.year +
                                                        1,
                                                        month=1,
                                                        day=1)
                    else:
                        start_time = start_time.replace(
                            month=start_time.month + 1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
                    start_time += datetime.timedelta(days=1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
                    start_time += datetime.timedelta(hours=1)
                else:
                    start_time += datetime.timedelta(minutes=1)

            self._reader_status = Status.IDLE
            return result
        except Exception as err:
            self.sys_log("Read range in dir [" + self.get_dir_name() +
                         "] got exception " + str(err) + '.')
            self._logger.error("Read range in dir [" + self.get_dir_name() +
                               "] got exception " + str(err) + '.')
            raise StorageReadException("Read range in dir [" +
                                       self.get_dir_name() +
                                       "] got exception " + str(err) + '.')
コード例 #6
0
 def get_dir_in_timestamp(self, dir_name):
     dir_name = dir_name.replace(self._file_tree.get_root_name(), '')
     if dir_name:
         return FileUtil.parse_dir_to_timestamp(dir_name=dir_name)
     else:
         return None
コード例 #7
0
ファイル: partitioner_base.py プロジェクト: kfrancischen/pslx
    def read_range(self, params):
        self.initialize_from_dir(dir_name=self.get_dir_name())

        def _reformat_time(timestamp):
            if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
                timestamp = timestamp.replace(month=1,
                                              day=1,
                                              hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
                timestamp = timestamp.replace(day=1,
                                              hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
                timestamp = timestamp.replace(hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
                timestamp = timestamp.replace(minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            else:
                timestamp = timestamp.replace(second=0,
                                              microsecond=0,
                                              tzinfo=None)
            return timestamp

        assert 'start_time' in params and 'end_time' in params and params[
            'start_time'] <= params['end_time']

        oldest_dir, latest_dir = self._get_oldest_dir_in_root_directory_interal(
        ), self._get_latest_dir_internal()
        if not latest_dir or not oldest_dir:
            self._logger.warning("Current partitioner [" +
                                 self.get_dir_name() +
                                 "] is empty, cannot read anything.")
            self._SYS_LOGGER.warning("Current partitioner [" +
                                     self.get_dir_name() +
                                     "] is empty, cannot read anything.")
            return {}

        oldest_dir = oldest_dir.replace(self._file_tree.get_root_name(), '')
        latest_dir = latest_dir.replace(self._file_tree.get_root_name(), '')

        oldest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=oldest_dir)
        latest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=latest_dir)
        start_time = max(_reformat_time(params['start_time']),
                         oldest_timestamp)
        end_time = min(_reformat_time(params['end_time']), latest_timestamp)
        result = {}
        try:
            all_file_names = []
            while start_time <= end_time:
                dir_list = FileUtil.parse_timestamp_to_dir(
                    timestamp=start_time).split('/')
                dir_name = '/'.join(
                    dir_list[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[
                        self.PARTITIONER_TYPE]])
                dir_name = FileUtil.join_paths_to_dir(
                    root_dir=self._file_tree.get_root_name(),
                    base_name=dir_name)
                try:
                    self.increment_rpc_count_by(n=1)
                    file_names = FileUtil.list_files_in_dir(dir_name=dir_name)
                    all_file_names.extend(file_names)
                except Exception as _:
                    pass

                if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
                    start_time = start_time.replace(year=start_time.year + 1,
                                                    month=1,
                                                    day=1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
                    if start_time.month == 12:
                        start_time = start_time.replace(year=start_time.year +
                                                        1,
                                                        month=1,
                                                        day=1)
                    else:
                        start_time = start_time.replace(
                            month=start_time.month + 1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
                    start_time += datetime.timedelta(days=1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
                    start_time += datetime.timedelta(hours=1)
                else:
                    start_time += datetime.timedelta(minutes=1)
            result = {}
            self.increment_rpc_count_by(n=1)
            if self._underlying_storage.get_storage_type(
            ) == StorageType.PROTO_TABLE_STORAGE:
                tmp_result = gclient_ext.read_proto_messages(
                    paths=all_file_names, message_type=ProtoTable)
                for file_name, v in tmp_result.items():
                    result[file_name] = dict(v.data)
            else:
                tmp_result = gclient_ext.read_txts(all_file_names)
                for file_name, v in tmp_result.items():
                    result[file_name] = v.rstrip().split('\n')

            return result
        except Exception as err:
            self._SYS_LOGGER.error("Read range in dir [" +
                                   self.get_dir_name() + "] got exception " +
                                   str(err) + '.')
            self._logger.error("Read range in dir [" + self.get_dir_name() +
                               "] got exception " + str(err) + '.')
            raise StorageReadException("Read range in dir [" +
                                       self.get_dir_name() +
                                       "] got exception " + str(err) + '.')