コード例 #1
0
    def _proto_table_storage_impl(self, request):
        self._logger.info("Getting request of proto table storage read.")
        read_params = dict(request.params)
        if 'proto_module' in read_params:
            read_params['message_type'] = ProtoUtil.infer_message_type_from_str(
                message_type_str=read_params['message_type'],
                modules=read_params['proto_module']
            )
        else:
            read_params['message_type'] = ProtoUtil.infer_message_type_from_str(
                message_type_str=read_params['message_type']
            )

        lru_key = (request.type, request.file_name)
        storage = self._lru_cache_tool.get(key=lru_key)
        if not storage:
            self.sys_log("Did not find the storage in cache. Making a new one...")
            storage = ProtoTableStorage()
            storage.initialize_from_file(file_name=request.file_name)
            self._lru_cache_tool.set(
                key=lru_key,
                value=storage
            )
        else:
            self.sys_log("Found key in LRU cache.")
        self._logger.info('Current cache size ' + str(self._lru_cache_tool.get_cur_capacity()))
        read_params.pop('proto_module', None)
        return storage.read(params=read_params)
コード例 #2
0
ファイル: fetcher_tool.py プロジェクト: kfrancischen/pslx
 def fetch_oldest(self):
     try:
         oldest_dir = self._partitioner.get_oldest_dir_in_root_directory()
         if not oldest_dir:
             self._logger.warning('[' + self._partitioner.get_dir_name() +
                                  '] is empty.')
             return None
         proto_table = ProtoTableStorage()
         proto_table.initialize_from_file(
             file_name=FileUtil.join_paths_to_file(root_dir=oldest_dir,
                                                   base_name='data.pb'))
         all_data = proto_table.read_all()
         if all_data:
             self._logger.info(
                 "Successfully get the oldest data in partition dir [" +
                 self._partitioner.get_dir_name() + '].')
             min_key = min(all_data.keys())
             return ProtoUtil.any_to_message(message_type=self.MESSAGE_TYPE,
                                             any_message=all_data[min_key])
         else:
             return None
     except Exception as err:
         self._logger.error("Fetch oldest partition [" +
                            self._partitioner.get_dir_name() +
                            "] with error " + str(err) + '.')
     return None
コード例 #3
0
ファイル: logging_renderer.py プロジェクト: alpha-hunter/pslx
 def __init__(self):
     super().__init__(operator_name='FRONTEND_DEDICATED_LOGGING_OP')
     self._pslx_dedicated_logging_storage = ProtoTableStorage(logger=pslx_frontend_logger)
     self._pslx_dedicated_logging_storage.initialize_from_file(
         file_name=pslx_dedicated_logging_storage_path
     )
     self._cached_logging = {}
     self._logging_storage_capacity = max(int(EnvUtil.get_pslx_env_variable(var='PSLX_INTERNAL_CACHE')), 700)
コード例 #4
0
 def test_read_1(self):
     proto_table_storage = ProtoTableStorage()
     proto_table_storage.initialize_from_file(file_name=self.TEST_DATA_3)
     result_proto = proto_table_storage.read(params={
         'key': 'test',
         'message_type': NodeSnapshot
     })
     self.assertEqual(result_proto, self.EXAMPLE_PROTO_1)
コード例 #5
0
ファイル: logging_renderer.py プロジェクト: alpha-hunter/pslx
class FrontendDedicatedLoggingOp(StreamingOperator):

    def __init__(self):
        super().__init__(operator_name='FRONTEND_DEDICATED_LOGGING_OP')
        self._pslx_dedicated_logging_storage = ProtoTableStorage(logger=pslx_frontend_logger)
        self._pslx_dedicated_logging_storage.initialize_from_file(
            file_name=pslx_dedicated_logging_storage_path
        )
        self._cached_logging = {}
        self._logging_storage_capacity = max(int(EnvUtil.get_pslx_env_variable(var='PSLX_INTERNAL_CACHE')), 700)

    def pubsub_msg_parser(self, exchange_name, topic_name, message):
        self._cached_logging[str(TimezoneUtil.cur_time_in_pst())] = message
        if len(self._cached_logging) >= int(EnvUtil.get_pslx_env_variable('PSLX_RPC_FLUSH_RATE')):
            self._pslx_dedicated_logging_storage.write(
                data=self._cached_logging
            )
            self._cached_logging.clear()

            total_entries = self._pslx_dedicated_logging_storage.get_num_entries()
            if total_entries > self._logging_storage_capacity:
                try:
                    all_data = self._pslx_dedicated_logging_storage.read_all()
                    all_sorted_keys = sorted(list(all_data.keys()))
                    for i in range(total_entries - self._logging_storage_capacity):
                        key_to_delete = all_sorted_keys[i]
                        self._pslx_dedicated_logging_storage.delete(key=key_to_delete)
                except Exception as _:
                    pass

    def execute_impl(self):
        pslx_dedicated_subscriber.bind_to_op(self)
        pslx_dedicated_subscriber.start()
コード例 #6
0
ファイル: util.py プロジェクト: kfrancischen/pslx
    def get_response_impl(backend_folder, request, lru_cache=None):
        storage_value = ContainerBackendValue()
        storage_value.container_name = request.container_name
        storage_value.container_status = request.status
        for operator_name, operator_snapshot in dict(
                request.operator_snapshot_map).items():
            operator_info = ContainerBackendValue.OperatorInfo()
            operator_info.status = operator_snapshot.status
            for parent in operator_snapshot.node_snapshot.parents_names:
                operator_info.parents.append(parent)

            operator_info.start_time = operator_snapshot.start_time
            operator_info.end_time = operator_snapshot.end_time
            operator_info.log_file = operator_snapshot.log_file
            storage_value.operator_info_map[operator_name].CopyFrom(
                operator_info)

        storage_value.mode = request.mode
        storage_value.data_model = request.data_model
        storage_value.updated_time = str(TimezoneUtil.cur_time_in_pst())
        storage_value.start_time = request.start_time
        storage_value.end_time = request.end_time
        storage_value.log_file = request.log_file
        storage_value.run_cell = request.run_cell
        storage_value.snapshot_cell = request.snapshot_cell
        for key in request.counters:
            storage_value.counters[key] = request.counters[key]
        storage_value.ttl = int(
            EnvUtil.get_pslx_env_variable('PSLX_BACKEND_CONTAINER_TTL'))

        storage = lru_cache.get(
            key=storage_value.container_name) if lru_cache else None

        if not storage:
            storage = ProtoTableStorage()
            storage.initialize_from_file(file_name=FileUtil.join_paths_to_file(
                root_dir=backend_folder,
                base_name=storage_value.container_name + '.pb'))
            if lru_cache:
                lru_cache.set(key=backend_folder, value=storage)
        all_data = storage.read_all()
        if len(all_data) >= int(
                EnvUtil.get_pslx_env_variable('PSLX_INTERNAL_CACHE')) > 0:
            key_to_delete = sorted(all_data.keys())[0]
            storage.delete(key=key_to_delete)

        storage.write(data={storage_value.start_time: storage_value})
コード例 #7
0
 def test_write_1(self):
     proto_table_storage = ProtoTableStorage()
     proto_table_storage.initialize_from_file(file_name=self.TEST_DATA_3)
     proto_table_storage.write(data={'test': self.EXAMPLE_PROTO_1}, )
     result_proto = proto_table_storage.read(params={
         'key': 'test',
         'message_type': NodeSnapshot
     })
     self.assertEqual(result_proto, self.EXAMPLE_PROTO_1)
     gclient_ext.cp_file(self.TEST_DATA_1, self.TEST_DATA_3)
コード例 #8
0
    def get_response_and_status_impl(self, request):
        storage_value = ContainerBackendValue()
        storage_value.container_name = request.container_name
        storage_value.container_status = request.status
        for operator_name, operator_snapshot in dict(
                request.operator_snapshot_map).items():
            operator_info = ContainerBackendValue.OperatorInfo()
            operator_info.status = operator_snapshot.status
            for parent in operator_snapshot.node_snapshot.parents_names:
                operator_info.parents.append(parent)

            operator_info.start_time = operator_snapshot.start_time
            operator_info.end_time = operator_snapshot.end_time
            storage_value.operator_info_map[operator_name].CopyFrom(
                operator_info)

        storage_value.mode = request.mode
        storage_value.data_model = request.data_model
        storage_value.updated_time = str(TimezoneUtil.cur_time_in_pst())
        storage_value.start_time = request.start_time
        storage_value.end_time = request.end_time
        storage_value.log_dir = request.log_dir
        for key in request.counters:
            storage_value.counters[key] = request.counters[key]
        partitioner_dir = FileUtil.join_paths_to_dir_with_mode(
            root_dir=FileUtil.join_paths_to_dir(
                root_dir=self._backend_folder,
                base_name=ProtoUtil.get_name_by_value(
                    enum_type=DataModelType, value=storage_value.data_model)),
            base_name=storage_value.container_name,
            ttl=EnvUtil.get_pslx_env_variable('PSLX_INTERNAL_TTL'))
        if storage_value.mode == ModeType.TEST:
            partitioner_dir = partitioner_dir.replace('PROD', 'TEST')
        storage = self._lru_cache_tool.get(key=partitioner_dir)
        if not storage:
            self.sys_log(
                "Did not find the storage in cache. Making a new one...")
            storage = DailyPartitionerStorage()
            proto_table = ProtoTableStorage()
            storage.set_underlying_storage(storage=proto_table)
            storage.initialize_from_dir(dir_name=partitioner_dir)
            self._lru_cache_tool.set(key=partitioner_dir, value=storage)
        else:
            self.sys_log("Found key in LRU cache.")

        storage.write(data={storage_value.container_name: storage_value},
                      params={
                          'overwrite': True,
                          'make_partition': True,
                      })
        return None, Status.SUCCEEDED
コード例 #9
0
ファイル: rpc_base.py プロジェクト: kfrancischen/pslx
 def __init__(self, service_name, rpc_storage=None):
     Base.__init__(self)
     self._logger = DummyUtil.dummy_logger()
     self._service_name = service_name
     if rpc_storage:
         assert rpc_storage.get_storage_type() == StorageType.PARTITIONER_STORAGE
         if 'ttl' not in rpc_storage.get_dir_name():
             self._SYS_LOGGER.warning("Warning. Please ttl the request log table.")
         underlying_storage = ProtoTableStorage()
         rpc_storage.set_underlying_storage(storage=underlying_storage)
         rpc_storage.set_max_capacity(max_capacity=EnvUtil.get_pslx_env_variable('PSLX_INTERNAL_CACHE'))
     self._rpc_storage = rpc_storage
     self._request_timestamp = collections.deque()
     self._request_response_pair = {}
コード例 #10
0
def get_containers_info():
    containers_info = []
    existing_containers = {}
    all_proto_files = set()
    if not FileUtil.is_local_path(backend_folder):
        all_cells = ['']
    else:
        all_cells = gclient.list_cells()

    for cell_name in all_cells:
        folder = FileUtil.convert_local_to_cell_path(
            path=backend_folder, cell=cell_name)
        proto_files = FileUtil.list_files_in_dir(folder)
        all_proto_files = all_proto_files.union(set(proto_files))
    for proto_file in all_proto_files:
        storage = ProtoTableStorage()
        storage.initialize_from_file(
            file_name=proto_file
        )
        raw_data = storage.read_all()
        if not raw_data:
            continue
        key = sorted(raw_data.keys())[-1]
        val = raw_data[key]
        result_proto = ProtoUtil.any_to_message(
            message_type=ContainerBackendValue,
            any_message=val
        )
        ttl = result_proto.ttl

        if ttl > 0 and result_proto.updated_time and TimezoneUtil.cur_time_in_pst() - TimezoneUtil.cur_time_from_str(
                result_proto.updated_time) >= datetime.timedelta(days=ttl):
            FileUtil.remove_file(storage.get_file_name())
        else:
            container_info = {
                'container_name': result_proto.container_name,
                'status': ProtoUtil.get_name_by_value(
                    enum_type=Status, value=result_proto.container_status),
                'updated_time': result_proto.updated_time,
                'mode': ProtoUtil.get_name_by_value(enum_type=ModeType, value=result_proto.mode),
                'data_model': ProtoUtil.get_name_by_value(
                    enum_type=DataModelType, value=result_proto.data_model),
                'run_cell': result_proto.run_cell,
                'snapshot_cell': result_proto.snapshot_cell,
            }
            if container_info['container_name'] not in existing_containers:
                existing_containers[container_info['container_name']] = container_info['updated_time']
                containers_info.append(container_info)
            else:
                if container_info['updated_time'] >= existing_containers[container_info['container_name']]:
                    containers_info.append(container_info)

    return containers_info
コード例 #11
0
 def resize_to_new_table(self, new_size_per_shard, new_dir_name):
     self.increment_rpc_count_by(n=2)
     assert not FileUtil.does_dir_exist(dir_name=new_dir_name) or FileUtil.is_dir_empty(dir_name=new_dir_name)
     new_sptable_storage = ShardedProtoTableStorage(size_per_shard=new_size_per_shard)
     new_sptable_storage.initialize_from_dir(dir_name=new_dir_name)
     for shard in range(self.get_num_shards()):
         related_proto_file = self._shard_to_file(shard=shard)
         proto_table = ProtoTableStorage()
         proto_table.initialize_from_file(file_name=related_proto_file)
         new_sptable_storage.write(data=proto_table.read_all())
         self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
     return new_sptable_storage
コード例 #12
0
ファイル: logging_renderer.py プロジェクト: alpha-hunter/pslx
    def streaming_data_generator():
        last_checked_key = None
        while True:
            pslx_dedicated_logging_storage = ProtoTableStorage(logger=pslx_frontend_logger)
            pslx_dedicated_logging_storage.initialize_from_file(
                file_name=pslx_dedicated_logging_storage_path
            )
            if pslx_dedicated_logging_storage.get_num_entries() == 0:
                time.sleep(TimeSleepObj.ONE_TENTH_SECOND)
                continue

            all_data = pslx_dedicated_logging_storage.read_all()
            all_sorted_keys = sorted(list(all_data.keys()))
            if all_sorted_keys[-1] == last_checked_key:
                time.sleep(TimeSleepObj.ONE_TENTH_SECOND)
                continue
            last_checked_key = all_sorted_keys[-1]

            pslx_dedicated_logging_list = []
            for key in all_sorted_keys:
                val = ProtoUtil.any_to_message(
                    message_type=LoggingMessageRequest,
                    any_message=all_data[key]
                )
                if ProtoUtil.get_name_by_value(enum_type=DiskLoggerLevel, value=val.level) in log_levels:
                    message = val.message
                    for string_to_replace, string_after_replacing in strings_to_replace.items():
                        message = message.replace(string_to_replace, string_after_replacing)
                    contain_key_word = False if key_words else True
                    for key_word in key_words:
                        if key_word in message:
                            contain_key_word = True
                            break

                    if contain_key_word:
                        pslx_dedicated_logging_list.append(message)
            yield '\\n'.join(pslx_dedicated_logging_list)

            time.sleep(TimeSleepObj.ONE_TENTH_SECOND)
コード例 #13
0
    def test_write_2(self):
        proto_table_storage = ProtoTableStorage()
        proto_table_storage.initialize_from_file(file_name=self.TEST_DATA_4)
        proto_table_storage.write(data={'test': self.EXAMPLE_PROTO_1})
        proto_table_storage.write(data={'test_1': self.EXAMPLE_PROTO_2})
        result_proto = proto_table_storage.read(
            params={
                'key': 'test_1',
                'message_type': OperatorSnapshot
            })
        self.assertEqual(result_proto, self.EXAMPLE_PROTO_2)

        proto_table_storage.write(data={'test_1': self.EXAMPLE_PROTO_3})
        result_proto = proto_table_storage.read(
            params={
                'key': 'test_1',
                'message_type': OperatorSnapshot
            })
        self.assertEqual(result_proto, self.EXAMPLE_PROTO_3)

        result = proto_table_storage.read_all()
        self.assertDictEqual(
            result, {
                'test': ProtoUtil.message_to_any(self.EXAMPLE_PROTO_1),
                'test_1': ProtoUtil.message_to_any(self.EXAMPLE_PROTO_3),
            })

        self.assertEqual(proto_table_storage.get_num_entries(), 2)
        gclient_ext.cp_file(self.TEST_DATA_2, self.TEST_DATA_4)
コード例 #14
0
 def test_read_3(self):
     proto_table_storage = ProtoTableStorage()
     proto_table_storage.initialize_from_file(file_name=self.TEST_DATA_3)
     result_proto = proto_table_storage.read(params={'key': 'test1'})
     self.assertIsNone(result_proto)
コード例 #15
0
 def test_read_2(self):
     proto_table_storage = ProtoTableStorage()
     proto_table_storage.initialize_from_file(file_name=self.TEST_DATA_3)
     result_proto = proto_table_storage.read(params={'key': 'test'})
     self.assertEqual(result_proto,
                      ProtoUtil.message_to_any(self.EXAMPLE_PROTO_1))
コード例 #16
0
    def _partitioner_storage_impl(self, request):
        self._logger.info("Getting request of partitioner storage read.")
        read_params = dict(request.params)
        is_proto_table = True if read_params['is_proto_table'] == '1' else False
        if 'base_name' in read_params:
            base_name = read_params['base_name']
        else:
            base_name = 'data.pb' if is_proto_table else 'data'

        lru_key = (read_params['PartitionerStorageType'], request.dir_name)
        self._logger.info("Partitioner type is " + read_params['PartitionerStorageType'])
        storage = self._lru_cache_tool.get(key=lru_key)
        if not storage:
            self.sys_log("Did not find the storage in cache. Making a new one...")
            partitioner_type = ProtoUtil.get_value_by_name(
                enum_type=PartitionerStorageType,
                name=read_params['PartitionerStorageType']
            )
            storage = self.PARTITIONER_TYPE_TO_IMPL[partitioner_type]()
            storage.initialize_from_dir(dir_name=request.dir_name)
            self._lru_cache_tool.set(
                key=lru_key,
                value=storage
            )
        else:
            self.sys_log("Found key in LRU cache.")

        self._logger.info('Current cache size ' + str(self._lru_cache_tool.get_cur_capacity()))
        read_params.pop('PartitionerStorageType', None)
        read_params.pop('is_proto_table', None)

        if is_proto_table:
            proto_table_storage = ProtoTableStorage()
            storage.set_underlying_storage(storage=proto_table_storage)
        else:
            read_params['num_line'] = -1

        response = RPCIOResponse()
        if 'start_time' not in read_params:
            # calling read function
            if is_proto_table:
                # if underlying storage is proto table.
                if 'message_type' in read_params:
                    assert 'proto_module' in read_params
                    read_params['message_type'] = ProtoUtil.infer_message_type_from_str(
                        message_type_str=read_params['message_type'],
                        modules=read_params['proto_module']
                    )
                proto_storage = ProtoTableStorage()
                if 'read_oldest' in read_params:
                    proto_storage.initialize_from_file(
                        file_name=FileUtil.join_paths_to_file(
                            root_dir=storage.get_oldest_dir_in_root_directory(),
                            base_name=base_name
                        )
                    )
                else:
                    proto_storage.initialize_from_file(
                        file_name=FileUtil.join_paths_to_file(
                            root_dir=storage.get_latest_dir(),
                            base_name=base_name
                        )
                    )
                data = proto_storage.read_all()
                for key, val in data.items():
                    rpc_list_data = RPCIOResponse.RPCListData()
                    rpc_data = rpc_list_data.data.add()
                    rpc_data.proto_data.CopyFrom(val)
                    response.dict_data[key].CopyFrom(rpc_list_data)
            else:
                # if underlying storage is not proto table.
                default_storage = DefaultStorage()
                if 'read_oldest' in read_params:
                    default_storage.initialize_from_file(
                        file_name=FileUtil.join_paths_to_file(
                            root_dir=storage.get_oldest_dir_in_root_directory(),
                            base_name=base_name
                        )
                    )
                else:
                    default_storage.initialize_from_file(
                        file_name=FileUtil.join_paths_to_file(
                            root_dir=storage.get_latest_dir(),
                            base_name=base_name
                        )
                    )
                data = default_storage.read(params={
                    'num_line': -1,
                })
                rpc_list_data = RPCIOResponse.RPCListData()
                for item in data:
                    rpc_data = rpc_list_data.data.add()
                    rpc_data.string_data = item

                response.list_data.CopyFrom(rpc_list_data)
        else:
            # calling read_range function
            if 'start_time' in read_params:
                read_params['start_time'] = TimezoneUtil.cur_time_from_str(
                    time_str=read_params['start_time']
                )
            if 'end_time' in read_params:
                read_params['end_time'] = TimezoneUtil.cur_time_from_str(
                    time_str=read_params['end_time']
                )

            data = storage.read_range(params=read_params)
            if data:
                for key, val in data.items():
                    rpc_list_data = RPCIOResponse.RPCListData()
                    if is_proto_table:
                        for proto_key, any_message in val.items():
                            rpc_data = rpc_list_data.data.add()
                            rpc_data.string_data = proto_key

                            rpc_data = rpc_list_data.data.add()
                            rpc_data.proto_data.CopyFrom(any_message)
                    else:
                        for entry in val:
                            rpc_data = rpc_list_data.data.add()
                            rpc_data.string_data = entry

                    response.dict_data[key].CopyFrom(rpc_list_data)

        return response
コード例 #17
0
    def read_range(self, params):
        def _reformat_time(timestamp):
            if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
                timestamp = timestamp.replace(month=1,
                                              day=1,
                                              hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
                timestamp = timestamp.replace(day=1,
                                              hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
                timestamp = timestamp.replace(hour=0,
                                              minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
                timestamp = timestamp.replace(minute=0,
                                              second=0,
                                              microsecond=0,
                                              tzinfo=None)
            else:
                timestamp = timestamp.replace(second=0,
                                              microsecond=0,
                                              tzinfo=None)
            return timestamp

        assert 'start_time' in params and 'end_time' in params and params[
            'start_time'] <= params['end_time']
        while self._writer_status != Status.IDLE:
            self.sys_log("Waiting for writer to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        self._reader_status = Status.RUNNING

        oldest_dir, latest_dir = self.get_oldest_dir(), self.get_latest_dir()
        if not latest_dir or not oldest_dir:
            if self.is_empty():
                self._logger.warning("Current partitioner [" +
                                     self.get_dir_name() +
                                     "] is empty, cannot read anything.")
                self.sys_log("Current partitioner [" + self.get_dir_name() +
                             "] is empty, cannot read anything.")
                return {}

        oldest_dir = oldest_dir.replace(self._file_tree.get_root_name(), '')
        latest_dir = latest_dir.replace(self._file_tree.get_root_name(), '')

        oldest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=oldest_dir)
        latest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=latest_dir)
        start_time = max(_reformat_time(params['start_time']),
                         oldest_timestamp)
        end_time = min(_reformat_time(params['end_time']), latest_timestamp)
        result = {}
        try:
            while start_time <= end_time:
                dir_list = FileUtil.parse_timestamp_to_dir(
                    timestamp=start_time).split('/')
                dir_name = '/'.join(
                    dir_list[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[
                        self.PARTITIONER_TYPE]])
                dir_name = FileUtil.join_paths_to_dir(
                    root_dir=self._file_tree.get_root_name(),
                    base_name=dir_name)
                if FileUtil.does_dir_exist(dir_name=dir_name):
                    if self._underlying_storage.get_storage_type(
                    ) == StorageType.PROTO_TABLE_STORAGE:
                        storage = ProtoTableStorage()
                    else:
                        storage = DefaultStorage()
                    file_names = FileUtil.list_files_in_dir(dir_name=dir_name)
                    for file_name in file_names:
                        storage.initialize_from_file(file_name=file_name)
                        if storage.get_storage_type(
                        ) == StorageType.PROTO_TABLE_STORAGE:
                            result[file_name] = storage.read_all()
                        else:
                            result[file_name] = storage.read(
                                params={'num_line': -1})

                if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY:
                    start_time = start_time.replace(year=start_time.year + 1,
                                                    month=1,
                                                    day=1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY:
                    if start_time.month == 12:
                        start_time = start_time.replace(year=start_time.year +
                                                        1,
                                                        month=1,
                                                        day=1)
                    else:
                        start_time = start_time.replace(
                            month=start_time.month + 1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY:
                    start_time += datetime.timedelta(days=1)
                elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY:
                    start_time += datetime.timedelta(hours=1)
                else:
                    start_time += datetime.timedelta(minutes=1)

            self._reader_status = Status.IDLE
            return result
        except Exception as err:
            self.sys_log("Read range in dir [" + self.get_dir_name() +
                         "] got exception " + str(err) + '.')
            self._logger.error("Read range in dir [" + self.get_dir_name() +
                               "] got exception " + str(err) + '.')
            raise StorageReadException("Read range in dir [" +
                                       self.get_dir_name() +
                                       "] got exception " + str(err) + '.')
コード例 #18
0
def get_container_info(container_name, cell_name, start_time):
    container_info = {
        'log_file': '',
        'start_time': '',
        'end_time': '',
        'counter_info': [],
    }
    operators_info = []
    folder = FileUtil.convert_local_to_cell_path(
        path=backend_folder, cell=cell_name)
    pslx_frontend_logger.info(
        "Container backend checking folder [" + folder + '].')
    storage = ProtoTableStorage()
    storage.initialize_from_file(
        FileUtil.join_paths_to_file(
            root_dir=folder,
            base_name=container_name + '.pb'
        )
    )
    raw_data = storage.read_all()
    all_past_run = []
    for key in sorted(list(raw_data.keys()), reverse=True):
        val = ProtoUtil.any_to_message(
            message_type=ContainerBackendValue,
            any_message=raw_data[key]
        )
        all_past_run.append(
            {
                'start_time': val.start_time,
                'updated_time': val.updated_time,
                'end_time': val.end_time,
                'status': ProtoUtil.get_name_by_value(
                    enum_type=Status, value=val.container_status),
                'run_cell': val.run_cell,
                'snapshot_cell': val.snapshot_cell,
            }
        )
        if len(all_past_run) > 10:
            break

    key = start_time if start_time else sorted(raw_data.keys())[-1]
    val = raw_data[key]
    result_proto = ProtoUtil.any_to_message(
        message_type=ContainerBackendValue,
        any_message=val
    )
    container_info['log_file'] = galaxy_viewer_url + result_proto.log_file
    container_info['start_time'] = result_proto.start_time
    container_info['end_time'] = result_proto.end_time
    for key in sorted(dict(result_proto.counters).keys()):
        container_info['counter_info'].append(
            {
                'name': key,
                'count': result_proto.counters[key],
            }
        )
    for key, val in dict(result_proto.operator_info_map).items():
        operators_info.append({
            'operator_name': key,
            'status': ProtoUtil.get_name_by_value(
                enum_type=Status, value=val.status),
            'start_time': val.start_time,
            'end_time': val.end_time,
            'dependencies': ', '.join(val.parents),
            'log_file': galaxy_viewer_url + val.log_file,
        })
    return (container_info, sorted(operators_info, key=lambda x: (x['dependencies'], x['operator_name'])),
            all_past_run)
コード例 #19
0
    def write(self, data, params=None):
        if not params:
            params = {}
        if 'overwrite' not in params:
            params['overwrite'] = True
        assert isinstance(data, dict)

        exising_shard_to_data_map = defaultdict(dict)
        new_data = {}
        for key, val in data.items():
            if key in self._index_map.index_map:
                exising_shard_to_data_map[self._index_map.index_map[key]][key] = val
            else:
                new_data[key] = val

        try:
            for shard, existing_data in exising_shard_to_data_map.items():
                related_proto_file = self._shard_to_file(shard=shard)
                proto_table = ProtoTableStorage()
                proto_table.initialize_from_file(file_name=related_proto_file)
                proto_table.write(data=existing_data, params=params)
                self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
            if new_data:
                all_new_keys = list(new_data.keys())
                latest_shard = self.get_latest_shard()
                latest_proto_file = self._shard_to_file(shard=latest_shard)
                proto_table = ProtoTableStorage()
                proto_table.initialize_from_file(file_name=latest_proto_file)
                latest_proto_table_size = proto_table.get_num_entries()
                proto_table.write(
                    data={key: new_data[key] for key in all_new_keys[:self._size_per_shard - latest_proto_table_size]},
                    params=params
                )
                self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
                for key in all_new_keys[:self._size_per_shard - latest_proto_table_size]:
                    self._index_map.index_map[key] = latest_shard

                if len(all_new_keys) > self._size_per_shard - latest_proto_table_size:
                    remaining_new_keys = all_new_keys[self._size_per_shard - latest_proto_table_size:]
                    start_index = 0
                    while start_index < len(remaining_new_keys):
                        data_for_new_shard = remaining_new_keys[start_index:start_index + self._size_per_shard]
                        latest_shard += 1
                        self._index_map.cur_shard += 1
                        proto_file = self._shard_to_file(shard=latest_shard)
                        self._logger.info("Write to new file with name [" + proto_file + '] and shard [' +
                                          str(latest_shard) + '].')
                        proto_table = ProtoTableStorage()
                        proto_table.initialize_from_file(file_name=proto_file)
                        proto_table.write(
                            data={key: new_data[key] for key in data_for_new_shard},
                            params=params
                        )
                        self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
                        for key in data_for_new_shard:
                            self._index_map.index_map[key] = latest_shard

                        start_index += self._size_per_shard
                self._logger.info("Writing the index map to [" + self._index_map_file + '].')
                self.increment_rpc_count_by(n=1)
                FileUtil.write_proto_to_file(
                    proto=self._index_map,
                    file_name=self._index_map_file
                )

        except Exception as err:
            self._SYS_LOGGER.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
            self._logger.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')