コード例 #1
0
ファイル: test_file_util.py プロジェクト: kfrancischen/pslx
 def test_json_to_file(self):
     data = {
         'name': "123",
         'property': 123,
     }
     FileUtil.write_json_to_file(json_obj=data, file_name=self.TEST_DATA_PATH)
     self.assertDictEqual(data, FileUtil.read_json_from_file(file_name=self.TEST_DATA_PATH))
コード例 #2
0
    def get_next_dir(self, cur_dir):
        cur_dir = cur_dir.replace(self._file_tree.get_root_name(), '')
        cur_time = FileUtil.parse_dir_to_timestamp(dir_name=cur_dir)
        if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
            next_time = datetime.datetime(cur_time.year + 1, 1, 1)
        elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
            if cur_time.month == 12:
                next_time = datetime.datetime(cur_time.year + 1, 1, 1)
            else:
                next_time = datetime.datetime(cur_time.year,
                                              cur_time.month + 1, 1)
        elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
            next_time = cur_time + datetime.timedelta(days=1)
        elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
            next_time = cur_time + datetime.timedelta(hours=1)
        else:
            next_time = cur_time + datetime.timedelta(minutes=1)

        next_dir_name = FileUtil.parse_timestamp_to_dir(
            timestamp=next_time).split('/')
        next_dir_name = '/'.join(
            next_dir_name[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[
                self.PARTITIONER_TYPE]])

        next_dir_name = FileUtil.join_paths_to_dir(
            root_dir=self._file_tree.get_root_name(), base_name=next_dir_name)
        if FileUtil.does_dir_exist(dir_name=next_dir_name):
            return next_dir_name
        else:
            return None
コード例 #3
0
    def write(self, data, params=None):
        if not params:
            params = {}
        if 'overwrite' not in params:
            params['overwrite'] = True

        assert isinstance(data, dict)
        try:
            for key, val in data.items():
                if not params['overwrite'] and key in self._table_message.data:
                    continue
                any_message = ProtoUtil.message_to_any(message=val)
                self._table_message.data[key].CopyFrom(any_message)
            if len(self._table_message.data) > 1000:
                self._SYS_LOGGER.warning(
                    "Warning: the table content is too large, considering using Partitioner "
                    "combined with proto table.")
            self._table_message.updated_time = str(
                TimezoneUtil.cur_time_in_pst())

            self.increment_rpc_count_by(n=1)
            FileUtil.write_proto_to_file(proto=self._table_message,
                                         file_name=self._file_name)

        except Exception as err:
            self._SYS_LOGGER.error("Write to file [" + self.get_file_name() +
                                   "] got exception: " + str(err) + '.')
            self._logger.error("Write to file [" + self.get_file_name() +
                               "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to file [" +
                                        self.get_file_name() +
                                        "] got exception: " + str(err) + '.')
コード例 #4
0
 def _reinitialize_underlying_storage(self, file_base_name):
     file_name = FileUtil.join_paths_to_file(root_dir=self.get_latest_dir(),
                                             base_name=file_base_name)
     if not FileUtil.does_file_exist(file_name):
         self.sys_log("The file to read does not exist.")
         return
     self._underlying_storage.initialize_from_file(file_name=file_name)
コード例 #5
0
    def delete_all(self):
        while self._reader_status != Status.IDLE:
            self.sys_log("Waiting for reader to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        while self._writer_status != Status.IDLE:
            self.sys_log("Waiting for writer to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)
        self._deleter_status = Status.RUNNING
        all_keys = list(dict(self._table_message.data).keys())
        for key in all_keys:
            del self._table_message.data[key]
        try:
            self._table_message.updated_time = str(TimezoneUtil.cur_time_in_pst())
            with FileLockTool(self._file_name, read_mode=False):
                FileUtil.write_proto_to_file(
                    proto=self._table_message,
                    file_name=self._file_name
                )
                self._deleter_status = Status.IDLE
        except Exception as err:
            self.sys_log("Delete all of file [" + self.get_file_name() + "] got exception: " + str(err) + '.')
            self._logger.error("Delete all of file [" + self.get_file_name() + "] got exception: " + str(err) + '.')
            raise StorageDeleteException("Delete all of file [" + self.get_file_name() +
                                         "] got exception: " + str(err) + '.')
コード例 #6
0
ファイル: partitioner_base.py プロジェクト: kfrancischen/pslx
 def get_previous_dir(self, cur_dir):
     self.initialize_from_dir(dir_name=self.get_dir_name())
     cur_dir = cur_dir.replace(self._file_tree.get_root_name(), '')
     cur_time = FileUtil.parse_dir_to_timestamp(dir_name=cur_dir)
     if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
         pre_time = datetime.datetime(cur_time.year - 1, 1, 1)
     elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
         if cur_time.month == 1:
             pre_time = datetime.datetime(cur_time.year - 1, 12, 1)
         else:
             pre_time = datetime.datetime(cur_time.year, cur_time.month - 1,
                                          1)
     elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
         pre_time = cur_time - datetime.timedelta(days=1)
     elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
         pre_time = cur_time - datetime.timedelta(hours=1)
     else:
         pre_time = cur_time - datetime.timedelta(minutes=1)
     last_dir_name = FileUtil.parse_timestamp_to_dir(
         timestamp=pre_time).split('/')
     last_dir_name = '/'.join(
         last_dir_name[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[
             self.PARTITIONER_TYPE]])
     last_dir_name = FileUtil.join_paths_to_dir(
         root_dir=self._file_tree.get_root_name(), base_name=last_dir_name)
     self.increment_rpc_count_by(n=1)
     if FileUtil.does_dir_exist(dir_name=last_dir_name):
         return last_dir_name
     else:
         return None
コード例 #7
0
ファイル: partitioner_base.py プロジェクト: kfrancischen/pslx
 def _reinitialize_underlying_storage(self, file_base_name):
     file_name = FileUtil.join_paths_to_file(
         root_dir=self._get_latest_dir_internal(), base_name=file_base_name)
     self.increment_rpc_count_by(n=1)
     if not FileUtil.does_file_exist(file_name):
         self._SYS_LOGGER.info("The file to read does not exist.")
         return
     self._underlying_storage.initialize_from_file(file_name=file_name)
コード例 #8
0
 def get_content_from_snapshot(cls, snapshot_file, message_type=OperatorContentPlain):
     try:
         snapshot = FileUtil.read_proto_from_file(
             proto_type=OperatorSnapshot,
             file_name=FileUtil.die_if_file_not_exist(file_name=snapshot_file)
         )
         return cls.content_deserializer(content=snapshot.content)
     except FileNotExistException as _:
         return message_type()
コード例 #9
0
ファイル: default_storage.py プロジェクト: alpha-hunter/pslx
    def write(self, data, params=None):
        if not isinstance(data, str):
            if not params:
                params = {'delimiter': ','}
            else:
                assert isinstance(params, dict) and 'delimiter' in params

        if params:
            for param in params:
                if not isinstance(data, str) and param == 'delimiter':
                    continue
                self._logger.warning(
                    param +
                    " will be omitted since it is not useful as an input argument in this function."
                )
                self.sys_log(
                    param +
                    " will be omitted since it is not useful as an input argument in this function."
                )

        while self._reader_status != Status.IDLE:
            self.sys_log("Waiting for reader to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        self._writer_status = Status.RUNNING
        if not isinstance(data, str):
            self.sys_log(
                "Data is not str instance, joining them with preset delimiter."
            )
            data_to_write = params['delimiter'].join(
                [str(val) for val in data])
        else:
            data_to_write = data
        try:
            if self._config['write_rule_type'] == WriteRuleType.WRITE_FROM_END:
                with FileLockTool(self._file_name, read_mode=False):
                    with open(
                            FileUtil.create_file_if_not_exist(
                                file_name=self._file_name), 'a') as outfile:
                        outfile.write(data_to_write + '\n')
            else:
                with FileLockTool(self._file_name, read_mode=False):
                    with open(
                            FileUtil.create_file_if_not_exist(
                                file_name=self._file_name), 'r+') as outfile:
                        file_data = outfile.read()
                        outfile.seek(0, 0)
                        outfile.write(data_to_write + '\n' + file_data)

            self._writer_status = Status.IDLE
        except Exception as err:
            self.sys_log("Write to file [" + self._file_name +
                         "] got exception: " + str(err) + '.')
            self._logger.error("Write to file [" + self._file_name +
                               "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to file [" + self._file_name +
                                        "] got exception: " + str(err) + '.')
コード例 #10
0
 def get_status_from_snapshot(cls, snapshot_file):
     try:
         snapshot = FileUtil.read_proto_from_file(
             proto_type=OperatorSnapshot,
             file_name=FileUtil.die_if_file_not_exist(file_name=snapshot_file)
         )
         return snapshot.status
     except FileNotExistException as _:
         return Status.IDLE
コード例 #11
0
    def _recursively_check_dir_deletable(self, dir_name):
        if FileUtil.list_files_in_dir(dir_name=dir_name):
            return False
        sub_dirs = FileUtil.list_dirs_in_dir(dir_name=dir_name)
        if sub_dirs:
            for sub_dir in sub_dirs:
                if not self._recursively_check_dir_deletable(dir_name=sub_dir):
                    return False

        return True
コード例 #12
0
 def get_content_from_snapshot(cls, snapshot_file, message_type):
     try:
         with FileLockTool(snapshot_file, read_mode=True):
             snapshot = FileUtil.read_proto_from_file(
                 proto_type=OperatorSnapshot,
                 file_name=FileUtil.die_if_file_not_exist(file_name=snapshot_file)
             )
         return ProtoUtil.any_to_message(message_type=message_type, any_message=snapshot.content)
     except FileNotExistException as _:
         return message_type()
コード例 #13
0
    def get_response_and_status_impl(self, request):
        file_path = request.file_path
        response = FileViewerRPCResponse()
        if FileUtil.is_file(path_name=file_path):
            file_name = FileUtil.die_if_file_not_exist(file_name=file_path)
            file_info = response.files_info.add()
            file_info.file_path = file_name
            file_info.file_size = FileUtil.get_file_size(file_name=file_name)
            file_info.modified_time = str(
                FileUtil.get_file_modified_time(file_name=file_name))
        else:
            dir_name = FileUtil.die_if_dir_not_exist(dir_name=file_path)
            sub_files = FileUtil.list_files_in_dir(dir_name=dir_name)
            for sub_file in sub_files:
                file_info = response.files_info.add()
                file_info.file_path = sub_file
                file_info.file_size = FileUtil.get_file_size(
                    file_name=sub_file)
                file_info.modified_time = str(
                    FileUtil.get_file_modified_time(file_name=sub_file))
            sub_dirs = FileUtil.list_dirs_in_dir(dir_name=dir_name)
            for sub_dir in sub_dirs:
                dirs_info = response.directories_info.add()
                dirs_info.file_path = sub_dir

        return response, Status.SUCCEEDED
コード例 #14
0
ファイル: test_file_util.py プロジェクト: kfrancischen/pslx
 def test_parse_timestamp_to_dir(self):
     timestamp = datetime.datetime(2020, 1, 1, 12, 30)
     self.assertEqual(
         FileUtil.parse_timestamp_to_dir(timestamp=timestamp),
         '2020/01/01/12/30'
     )
     timestamp = datetime.datetime(2020, 1, 1)
     self.assertEqual(
         FileUtil.parse_timestamp_to_dir(timestamp=timestamp),
         '2020/01/01/00/00'
     )
コード例 #15
0
ファイル: partitioner_base.py プロジェクト: kfrancischen/pslx
 def is_empty(self):
     leftmost_leaf_name, rightmost_leaf_name = (
         self._file_tree.get_leftmost_leaf(),
         self._file_tree.get_rightmost_leaf())
     self.increment_rpc_count_by(n=2)
     if FileUtil.is_dir_empty(
             dir_name=leftmost_leaf_name) and FileUtil.is_dir_empty(
                 dir_name=rightmost_leaf_name):
         return True
     else:
         return False
コード例 #16
0
def get_containers_info():
    containers_info = []
    existing_containers = {}
    all_proto_files = set()
    if not FileUtil.is_local_path(backend_folder):
        all_cells = ['']
    else:
        all_cells = gclient.list_cells()

    for cell_name in all_cells:
        folder = FileUtil.convert_local_to_cell_path(
            path=backend_folder, cell=cell_name)
        proto_files = FileUtil.list_files_in_dir(folder)
        all_proto_files = all_proto_files.union(set(proto_files))
    for proto_file in all_proto_files:
        storage = ProtoTableStorage()
        storage.initialize_from_file(
            file_name=proto_file
        )
        raw_data = storage.read_all()
        if not raw_data:
            continue
        key = sorted(raw_data.keys())[-1]
        val = raw_data[key]
        result_proto = ProtoUtil.any_to_message(
            message_type=ContainerBackendValue,
            any_message=val
        )
        ttl = result_proto.ttl

        if ttl > 0 and result_proto.updated_time and TimezoneUtil.cur_time_in_pst() - TimezoneUtil.cur_time_from_str(
                result_proto.updated_time) >= datetime.timedelta(days=ttl):
            FileUtil.remove_file(storage.get_file_name())
        else:
            container_info = {
                'container_name': result_proto.container_name,
                'status': ProtoUtil.get_name_by_value(
                    enum_type=Status, value=result_proto.container_status),
                'updated_time': result_proto.updated_time,
                'mode': ProtoUtil.get_name_by_value(enum_type=ModeType, value=result_proto.mode),
                'data_model': ProtoUtil.get_name_by_value(
                    enum_type=DataModelType, value=result_proto.data_model),
                'run_cell': result_proto.run_cell,
                'snapshot_cell': result_proto.snapshot_cell,
            }
            if container_info['container_name'] not in existing_containers:
                existing_containers[container_info['container_name']] = container_info['updated_time']
                containers_info.append(container_info)
            else:
                if container_info['updated_time'] >= existing_containers[container_info['container_name']]:
                    containers_info.append(container_info)

    return containers_info
コード例 #17
0
 def resize_to_new_table(self, new_size_per_shard, new_dir_name):
     self.increment_rpc_count_by(n=2)
     assert not FileUtil.does_dir_exist(dir_name=new_dir_name) or FileUtil.is_dir_empty(dir_name=new_dir_name)
     new_sptable_storage = ShardedProtoTableStorage(size_per_shard=new_size_per_shard)
     new_sptable_storage.initialize_from_dir(dir_name=new_dir_name)
     for shard in range(self.get_num_shards()):
         related_proto_file = self._shard_to_file(shard=shard)
         proto_table = ProtoTableStorage()
         proto_table.initialize_from_file(file_name=related_proto_file)
         new_sptable_storage.write(data=proto_table.read_all())
         self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
     return new_sptable_storage
コード例 #18
0
    def get_response_and_status_impl(self, request):
        storage_value = ContainerBackendValue()
        storage_value.container_name = request.container_name
        storage_value.container_status = request.status
        for operator_name, operator_snapshot in dict(
                request.operator_snapshot_map).items():
            operator_info = ContainerBackendValue.OperatorInfo()
            operator_info.status = operator_snapshot.status
            for parent in operator_snapshot.node_snapshot.parents_names:
                operator_info.parents.append(parent)

            operator_info.start_time = operator_snapshot.start_time
            operator_info.end_time = operator_snapshot.end_time
            storage_value.operator_info_map[operator_name].CopyFrom(
                operator_info)

        storage_value.mode = request.mode
        storage_value.data_model = request.data_model
        storage_value.updated_time = str(TimezoneUtil.cur_time_in_pst())
        storage_value.start_time = request.start_time
        storage_value.end_time = request.end_time
        storage_value.log_dir = request.log_dir
        for key in request.counters:
            storage_value.counters[key] = request.counters[key]
        partitioner_dir = FileUtil.join_paths_to_dir_with_mode(
            root_dir=FileUtil.join_paths_to_dir(
                root_dir=self._backend_folder,
                base_name=ProtoUtil.get_name_by_value(
                    enum_type=DataModelType, value=storage_value.data_model)),
            base_name=storage_value.container_name,
            ttl=EnvUtil.get_pslx_env_variable('PSLX_INTERNAL_TTL'))
        if storage_value.mode == ModeType.TEST:
            partitioner_dir = partitioner_dir.replace('PROD', 'TEST')
        storage = self._lru_cache_tool.get(key=partitioner_dir)
        if not storage:
            self.sys_log(
                "Did not find the storage in cache. Making a new one...")
            storage = DailyPartitionerStorage()
            proto_table = ProtoTableStorage()
            storage.set_underlying_storage(storage=proto_table)
            storage.initialize_from_dir(dir_name=partitioner_dir)
            self._lru_cache_tool.set(key=partitioner_dir, value=storage)
        else:
            self.sys_log("Found key in LRU cache.")

        storage.write(data={storage_value.container_name: storage_value},
                      params={
                          'overwrite': True,
                          'make_partition': True,
                      })
        return None, Status.SUCCEEDED
コード例 #19
0
    def is_empty(self):
        if self.is_updated():
            self.sys_log("Tree updated, need force rebuilding the tree.")
            self._logger.info("Tree updated, need force rebuilding the tree.")
            self.initialize_from_dir(dir_name=self.get_dir_name(), force=True)

        leftmost_leaf_name, rightmost_leaf_name = (
            self._file_tree.get_leftmost_leaf(),
            self._file_tree.get_rightmost_leaf())
        if FileUtil.is_dir_empty(
                dir_name=leftmost_leaf_name) and FileUtil.is_dir_empty(
                    dir_name=rightmost_leaf_name):
            return True
        else:
            return False
コード例 #20
0
ファイル: test_file_util.py プロジェクト: kfrancischen/pslx
 def test_parse_dir_to_timestamp(self):
     dir_name = '2020/01/01/12/30'
     self.assertEqual(
         FileUtil.parse_dir_to_timestamp(dir_name=dir_name),
         datetime.datetime(2020, 1, 1, 12, 30)
     )
     dir_name = '2020/01/01/00/00'
     self.assertEqual(
         FileUtil.parse_dir_to_timestamp(dir_name=dir_name),
         datetime.datetime(2020, 1, 1)
     )
     dir_name = '2020/01'
     self.assertEqual(
         FileUtil.parse_dir_to_timestamp(dir_name=dir_name),
         datetime.datetime(2020, 1, 1)
     )
コード例 #21
0
ファイル: fetcher_tool.py プロジェクト: kfrancischen/pslx
 def fetch_oldest(self):
     try:
         oldest_dir = self._partitioner.get_oldest_dir_in_root_directory()
         if not oldest_dir:
             self._logger.warning('[' + self._partitioner.get_dir_name() +
                                  '] is empty.')
             return None
         proto_table = ProtoTableStorage()
         proto_table.initialize_from_file(
             file_name=FileUtil.join_paths_to_file(root_dir=oldest_dir,
                                                   base_name='data.pb'))
         all_data = proto_table.read_all()
         if all_data:
             self._logger.info(
                 "Successfully get the oldest data in partition dir [" +
                 self._partitioner.get_dir_name() + '].')
             min_key = min(all_data.keys())
             return ProtoUtil.any_to_message(message_type=self.MESSAGE_TYPE,
                                             any_message=all_data[min_key])
         else:
             return None
     except Exception as err:
         self._logger.error("Fetch oldest partition [" +
                            self._partitioner.get_dir_name() +
                            "] with error " + str(err) + '.')
     return None
コード例 #22
0
ファイル: partitioner_base.py プロジェクト: kfrancischen/pslx
 def is_gabage_collected(self):
     rightmost_leaf_name = self._file_tree.get_rightmost_leaf()
     self.increment_rpc_count_by(n=1)
     if not FileUtil.does_dir_exist(dir_name=rightmost_leaf_name):
         return True
     else:
         return False
コード例 #23
0
    def read(self, params=None):
        if not params:
            params = {
                'num_line': 1,
            }
        else:
            assert isinstance(params, dict) and 'num_line' in params

        for param in params:
            if param != 'num_line':
                self._logger.warning(
                    param +
                    " will be omitted since it is not useful as an input argument in this function."
                )
                self._SYS_LOGGER.warning(
                    param +
                    " will be omitted since it is not useful as an input argument in this function."
                )
        try:
            self.increment_rpc_count_by(n=1)
            lines = FileUtil.read_lined_txt_from_file(self._file_name)
        except Exception as _:
            lines = []

        if 'read_rule_type' in self._config and self._config[
                'read_rule_type'] == ReadRuleType.READ_FROM_END:
            lines = lines[::-1]
        if params['num_line'] < 0:
            return [line.strip() for line in lines]

        new_line_number = self._last_read_line + params['num_line']
        if new_line_number > len(lines):
            self._SYS_LOGGER.error(
                str(new_line_number) + " exceeds the file limit. [" +
                self.get_file_name() + "] only has " + str(len(lines)) +
                " lines.")
            self._logger.error(
                str(new_line_number) + " exceeds the file limit. [" +
                self.get_file_name() + "] only has " + str(len(lines)) +
                " lines.")
            raise StoragePastLineException(
                str(new_line_number) + " exceeds the file limit. [" +
                self.get_file_name() + "] only has " + str(len(lines)) +
                " lines.")
        else:
            try:
                result = lines[self._last_read_line:new_line_number]
                assert len(result) == params['num_line']
                result = [line.strip() for line in result]
                self._last_read_line = new_line_number
                return result
            except Exception as err:
                self._SYS_LOGGER.error("Read file [" + self._file_name +
                                       "] got exception: " + str(err) + '.')
                self._logger.error("Read file [" + self._file_name +
                                   "] got exception: " + str(err) + '.')
                raise StorageReadException("Read file [" + self._file_name +
                                           "] got exception: " + str(err) +
                                           '.')
コード例 #24
0
 def _get_latest_status_of_operators(self):
     operator_status = {}
     snapshot_files = FileUtil.get_file_names_in_dir(
         dir_name=FileUtil.join_paths_to_dir(FileUtil.dir_name(self._snapshot_file_folder), 'operators'))
     for snapshot_file in snapshot_files[::-1]:
         operator_name = snapshot_file.split('_')[1]
         if operator_name not in operator_status:
             self._logger.info("Getting status for operator [" + operator_name + '].')
             self.sys_log("Getting status for operator [" + operator_name + '].')
             operator_status[operator_name] = self._node_name_to_node_dict[operator_name].get_status_from_snapshot(
                 snapshot_file=snapshot_file
             )
             self.sys_log("Status for operator [" + operator_name + '] is [' + ProtoUtil.get_name_by_value(
                 enum_type=Status, value=operator_status[operator_name]) + '].')
         if len(operator_status) == len(self._node_name_to_node_dict):
             break
     return operator_status
コード例 #25
0
    def read(self, params=None):
        if self._underlying_storage.get_storage_type(
        ) == StorageType.PROTO_TABLE_STORAGE:
            file_base_name = 'data.pb'
        else:
            file_base_name = 'data'
        if params and 'base_name' in params:
            file_base_name = params['base_name']
            params.pop('base_name', None)
        if params and 'reinitialize_underlying_storage' in params:
            self._reinitialize_underlying_storage(
                file_base_name=file_base_name)

        while self._writer_status != Status.IDLE:
            self.sys_log("Waiting for writer to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        self._reader_status = Status.RUNNING
        self.sys_log("Read from the latest partition.")
        latest_dir = self.get_latest_dir()
        if not latest_dir:
            self.sys_log("Current partitioner is empty, cannot read anything.")
            return []

        file_name = FileUtil.join_paths_to_file(root_dir=latest_dir,
                                                base_name=file_base_name)
        if not FileUtil.does_file_exist(file_name):
            self.sys_log("The file [" + file_name +
                         "] to read does not exist.")
            raise StorageReadException("The file [" + file_name +
                                       "] to read does not exist.")

        if file_name != self._underlying_storage.get_file_name():
            self.sys_log("Sync to the latest file to " + file_name)
            self._underlying_storage.initialize_from_file(file_name=file_name)
        try:
            result = self._underlying_storage.read(params=params)
            self._reader_status = Status.IDLE
            return result
        except Exception as err:
            self.sys_log("Read dir [" + self.get_dir_name() +
                         "] got exception: " + str(err) + '.')
            self._logger.error("Read dir [" + self.get_dir_name() +
                               "] got exception: " + str(err) + '.')
            raise StorageReadException("Read dir [" + self.get_dir_name() +
                                       "] got exception: " + str(err) + '.')
コード例 #26
0
    def _delete_file(self, cur_time, path_name):
        num_file_removed, num_file_failed = 0, 0
        if FileUtil.is_file(path_name=path_name):
            ttl = FileUtil.get_ttl_from_path(path_name=path_name)
            if ttl and cur_time - FileUtil.get_file_modified_time(
                    file_name=path_name) > ttl:
                self._logger.info("Removing file " + path_name + '...')
                try:
                    with FileLockTool(protected_file_path=path_name,
                                      read_mode=True,
                                      timeout=TimeSleepObj.ONE_TENTH_SECOND):
                        FileUtil.remove_file(file_name=path_name)
                    num_file_removed += 1
                    self.counter_increment("num_file_removed")
                except Exception as err:
                    num_file_failed += 1
                    self.counter_increment("num_file_failed_to_be_removed")
                    self._logger.error("Removing file " + path_name +
                                       ' failed with err ' + str(err) + '.')
        else:
            for file_name in FileUtil.list_files_in_dir(dir_name=path_name):
                stats = self._delete_file(cur_time=cur_time,
                                          path_name=file_name)
                num_file_removed += stats[0]
                num_file_failed += stats[1]
            for dir_name in FileUtil.list_dirs_in_dir(dir_name=path_name):
                stats = self._delete_file(cur_time=cur_time,
                                          path_name=dir_name)
                num_file_removed += stats[0]
                num_file_failed += stats[1]

        return num_file_removed, num_file_failed
コード例 #27
0
    def execute_query_file(self, query_file, modification):
        if not FileUtil.does_file_exist(file_name=query_file):
            self._SYS_LOGGER.info("Query file " + query_file +
                                  " does not exist")

        query_str = self._query_file_to_str(query_file=query_file)

        return self.execute_query_str(query_str=query_str,
                                      modification=modification)
コード例 #28
0
ファイル: partitioner_base.py プロジェクト: kfrancischen/pslx
        def _recursive_initialize_from_dir(node, max_recursion):
            self._SYS_LOGGER.info("Starting recursion of " +
                                  str(max_recursion) + '.')
            if max_recursion == 0:
                self._SYS_LOGGER.info("Exhausted all recursions for dir [" +
                                      dir_name + '].')
                self._logger.info("Exhausted all recursions for dir [" +
                                  dir_name + '].')
                return

            node_name = node.get_node_name()
            self.increment_rpc_count_by(n=1)
            child_node_names = sorted(
                FileUtil.list_dirs_in_dir(dir_name=node_name),
                reverse=from_scratch)
            for child_node_name in child_node_names:
                if from_scratch and self._file_tree.get_num_nodes(
                ) >= self._max_capacity > 0:
                    self._SYS_LOGGER.info("Reach the max number of node: " +
                                          str(self._max_capacity) + '.')
                    return

                newly_added_string = child_node_name.replace(node_name,
                                                             '').replace(
                                                                 '/', '')
                if not newly_added_string.isdigit():
                    continue

                if not from_scratch and self._cmp_dir_by_timestamp(
                        dir_name_1=child_node_name,
                        dir_name_2=self._get_latest_dir_internal()):
                    continue

                child_node = self._file_tree.find_node(
                    node_name=child_node_name)
                if not child_node:
                    child_node = OrderedNodeBase(node_name=child_node_name)
                    # The nodes are ordered from large to small. So if the tree is built scratch, since the directory
                    # is listed from large to small, SortOrder.ORDER is used. If it is incremental build, since the
                    # directory is listed from small to large, SortOrder.REVERSE is used.
                    order = SortOrder.ORDER if from_scratch else SortOrder.REVERSE
                    self._file_tree.add_node(parent_node=node,
                                             child_node=child_node,
                                             order=order)
                    self._SYS_LOGGER.info("Adding new node [" +
                                          child_node_name +
                                          node.get_node_name() + '].')
                    self._logger.info("Adding new node [" + child_node_name +
                                      "] to parent node [" +
                                      node.get_node_name() + '].')

                    if not from_scratch:
                        self._file_tree.trim_tree(
                            max_capacity=self._max_capacity)

                _recursive_initialize_from_dir(node=child_node,
                                               max_recursion=max_recursion - 1)
コード例 #29
0
 def get_response_and_status_impl(self, request):
     proto_file = FileUtil.die_if_file_not_exist(
         file_name=request.proto_file_path)
     message_type = ProtoUtil.infer_message_type_from_str(
         message_type_str=request.message_type,
         modules=request.proto_module if request.proto_module else None)
     response = ProtoViewerRPCResponse()
     proto_message = FileUtil.read_proto_from_file(proto_type=message_type,
                                                   file_name=proto_file)
     response.proto_content = ProtoUtil.message_to_text(
         proto_message=proto_message)
     file_info = FileInfo()
     file_info.file_path = request.proto_file_path
     file_info.file_size = FileUtil.get_file_size(file_name=proto_file)
     file_info.modified_time = str(
         FileUtil.get_file_modified_time(file_name=proto_file))
     response.file_info.CopyFrom(file_info)
     return response, Status.SUCCEEDED
コード例 #30
0
    def initialize_from_file(self, file_name):
        if '.pb' not in file_name:
            self.sys_log("Please use .pb extension for proto files.")

        self._file_name = FileUtil.create_file_if_not_exist(file_name=file_name)
        if FileUtil.is_file_empty(file_name=self._file_name):
            self._table_message = ProtoTable()
        else:
            with FileLockTool(self._file_name, read_mode=True):
                self._table_message = FileUtil.read_proto_from_file(
                    proto_type=ProtoTable,
                    file_name=self._file_name
                )
        if not self._table_message.table_path:
            self._table_message.table_path = self._file_name
        if not self._table_message.table_name:
            self._table_message.table_name = FileUtil.base_name(file_name=file_name)
        if not self._table_message.created_time:
            self._table_message.created_time = str(TimezoneUtil.cur_time_in_pst())