Example #1
0
    def delete_all(self):
        while self._reader_status != Status.IDLE:
            self.sys_log("Waiting for reader to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        while self._writer_status != Status.IDLE:
            self.sys_log("Waiting for writer to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)
        self._deleter_status = Status.RUNNING
        all_keys = list(dict(self._table_message.data).keys())
        for key in all_keys:
            del self._table_message.data[key]
        try:
            self._table_message.updated_time = str(TimezoneUtil.cur_time_in_pst())
            with FileLockTool(self._file_name, read_mode=False):
                FileUtil.write_proto_to_file(
                    proto=self._table_message,
                    file_name=self._file_name
                )
                self._deleter_status = Status.IDLE
        except Exception as err:
            self.sys_log("Delete all of file [" + self.get_file_name() + "] got exception: " + str(err) + '.')
            self._logger.error("Delete all of file [" + self.get_file_name() + "] got exception: " + str(err) + '.')
            raise StorageDeleteException("Delete all of file [" + self.get_file_name() +
                                         "] got exception: " + str(err) + '.')
Example #2
0
    def write(self, data, params=None):
        if not params:
            params = {}
        if 'overwrite' not in params:
            params['overwrite'] = True

        assert isinstance(data, dict)
        try:
            for key, val in data.items():
                if not params['overwrite'] and key in self._table_message.data:
                    continue
                any_message = ProtoUtil.message_to_any(message=val)
                self._table_message.data[key].CopyFrom(any_message)
            if len(self._table_message.data) > 1000:
                self._SYS_LOGGER.warning(
                    "Warning: the table content is too large, considering using Partitioner "
                    "combined with proto table.")
            self._table_message.updated_time = str(
                TimezoneUtil.cur_time_in_pst())

            self.increment_rpc_count_by(n=1)
            FileUtil.write_proto_to_file(proto=self._table_message,
                                         file_name=self._file_name)

        except Exception as err:
            self._SYS_LOGGER.error("Write to file [" + self.get_file_name() +
                                   "] got exception: " + str(err) + '.')
            self._logger.error("Write to file [" + self.get_file_name() +
                               "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to file [" +
                                        self.get_file_name() +
                                        "] got exception: " + str(err) + '.')
Example #3
0
    def get_container_snapshot(self, send_backend=True):
        if not self._is_initialized:
            self._logger.error("Warning: taking snapshot when the container [" + self.get_container_name() +
                               "] is not initialized.")
            self.sys_log("Warning: taking snapshot when the container [" + self.get_container_name() +
                         "] is not initialized.")

        snapshot = ContainerSnapshot()
        snapshot.container_name = self._container_name
        snapshot.is_initialized = self._is_initialized
        snapshot.status = self._status
        snapshot.class_name = self.get_full_class_name()
        snapshot.mode = self._mode
        snapshot.data_model = self.DATA_MODEL
        snapshot.log_dir = self._logger.get_log_dir()
        for key, val in self._counter.items():
            snapshot.counters[key] = val
        if self._start_time:
            snapshot.start_time = str(self._start_time)
        if self._end_time:
            snapshot.end_time = str(self._end_time)

        for op_name, op in self._node_name_to_node_dict.items():
            if 'Dummy' in op.get_class_name():
                continue
            op_output_file = FileUtil.join_paths_to_file(
                root_dir=FileUtil.join_paths_to_dir(FileUtil.dir_name(self._snapshot_file_folder), 'operators'),
                base_name='SNAPSHOT_' + str(TimezoneUtil.cur_time_in_pst()) + '_' + op_name + '.pb'
            )
            snapshot.operator_snapshot_map[op_name].CopyFrom(op.get_operator_snapshot(output_file=op_output_file))

        self.sys_log("Snapshot saved to folder [" + self._snapshot_file_folder + '].')
        self._logger.info("Snapshot saved to folder [" + self._snapshot_file_folder + '].')
        output_file_name = FileUtil.join_paths_to_file(
            root_dir=FileUtil.dir_name(self._snapshot_file_folder),
            base_name='SNAPSHOT_' + str(TimezoneUtil.cur_time_in_pst()) + '_' + self._container_name + '.pb'
        )
        with FileLockTool(output_file_name, read_mode=False):
            FileUtil.write_proto_to_file(
                proto=snapshot,
                file_name=output_file_name
            )
        if self._backend and send_backend:
            try:
                self._backend.send_to_backend(snapshot=snapshot)
            except Exception as err:
                self._logger.error("Sending backend failed with error " + str(err) + '.')

        return snapshot
Example #4
0
    def delete_multiple(self, keys):
        for key in keys:
            if key in self._table_message.data:
                del self._table_message.data[key]

        try:
            self._table_message.updated_time = str(
                TimezoneUtil.cur_time_in_pst())
            self.increment_rpc_count_by(n=1)
            FileUtil.write_proto_to_file(proto=self._table_message,
                                         file_name=self._file_name)
        except Exception as err:
            self._SYS_LOGGER.error("Delete file [" + self.get_file_name() +
                                   "] got exception: " + str(err))
            self._logger.error("Delete file [" + self.get_file_name() +
                               "] got exception: " + str(err))
            raise StorageDeleteException("Delete file [" +
                                         self.get_file_name() +
                                         "] got exception: " + str(err))
Example #5
0
    def delete_all(self):

        all_keys = list(dict(self._table_message.data).keys())
        for key in all_keys:
            del self._table_message.data[key]
        try:
            self._table_message.updated_time = str(
                TimezoneUtil.cur_time_in_pst())
            self.increment_rpc_count_by(n=1)
            FileUtil.write_proto_to_file(proto=self._table_message,
                                         file_name=self._file_name)

        except Exception as err:
            self._SYS_LOGGER.error("Delete all of file [" +
                                   self.get_file_name() + "] got exception: " +
                                   str(err) + '.')
            self._logger.error("Delete all of file [" + self.get_file_name() +
                               "] got exception: " + str(err) + '.')
            raise StorageDeleteException("Delete all of file [" +
                                         self.get_file_name() +
                                         "] got exception: " + str(err) + '.')
Example #6
0
 def get_operator_snapshot(self, output_file=None):
     snapshot = OperatorSnapshot()
     snapshot.operator_name = self.get_node_name()
     snapshot.data_model = self.get_data_model()
     snapshot.status = self.get_status()
     snapshot.node_snapshot.CopyFrom(self.get_node_snapshot())
     snapshot.class_name = self.get_full_class_name()
     if self._start_time:
         snapshot.start_time = str(self._start_time)
     if self.get_status() == Status.SUCCEEDED and self._end_time:
         snapshot.end_time = str(self._end_time)
     if self._persistent:
         assert self.CONTENT_MESSAGE_TYPE is not None
         snapshot.content.CopyFrom(ProtoUtil.message_to_any(message=self._content))
     if output_file and self._config['save_snapshot'] and 'Dummy' not in self.get_class_name():
         self.sys_log("Saved to file " + output_file + '.')
         with FileLockTool(output_file, read_mode=False):
             FileUtil.write_proto_to_file(
                 proto=snapshot,
                 file_name=output_file
             )
     return snapshot
Example #7
0
    def get_operator_snapshot(self, output_file=None):
        snapshot = OperatorSnapshot()
        snapshot.operator_name = self.get_node_name()
        snapshot.data_model = self.get_data_model()
        snapshot.status = self.get_status()
        snapshot.node_snapshot.CopyFrom(self.get_node_snapshot())
        snapshot.class_name = self.get_full_class_name()
        snapshot.log_file = FileUtil.convert_local_to_cell_path(glogging.get_logger_file(self._logger))
        if self._start_time:
            snapshot.start_time = str(self._start_time)
        if self.get_status() == Status.SUCCEEDED and self._end_time:
            snapshot.end_time = str(self._end_time)
        if self._persistent:
            snapshot.content.CopyFrom(self.content_serializer(self._content))
        if output_file and self._config['save_snapshot'] and 'Dummy' not in self.get_class_name():
            self._SYS_LOGGER.info("Saved to file " + output_file + '.')

            FileUtil.write_proto_to_file(
                proto=snapshot,
                file_name=output_file
            )
        return snapshot
Example #8
0
    def write(self, data, params=None):
        if not params:
            params = {}
        if 'overwrite' not in params:
            params['overwrite'] = True

        while self._reader_status != Status.IDLE:
            self.sys_log("Waiting for reader to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        while self._deleter_status != Status.IDLE:
            self.sys_log("Waiting for deleter to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        self._writer_status = Status.RUNNING
        assert isinstance(data, dict)
        try:
            for key, val in data.items():
                if not params['overwrite'] and key in self._table_message.data:
                    continue
                any_message = ProtoUtil.message_to_any(message=val)
                self._table_message.data[key].CopyFrom(any_message)
            if len(self._table_message.data) > 1000:
                self.sys_log("Warning: the table content is too large, considering using Partitioner "
                             "combined with proto table.")
            self._table_message.updated_time = str(TimezoneUtil.cur_time_in_pst())
            with FileLockTool(self._file_name, read_mode=False):
                FileUtil.write_proto_to_file(
                    proto=self._table_message,
                    file_name=self._file_name
                )
                self._writer_status = Status.IDLE
        except Exception as err:
            self.sys_log("Write to file [" + self.get_file_name() + "] got exception: " + str(err) + '.')
            self._logger.error("Write to file [" + self.get_file_name() + "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to file [" + self.get_file_name() + "] got exception: " + str(err) + '.')
Example #9
0
    def write(self, data, params=None):
        if not params:
            params = {}
        if 'overwrite' not in params:
            params['overwrite'] = True
        assert isinstance(data, dict)

        exising_shard_to_data_map = defaultdict(dict)
        new_data = {}
        for key, val in data.items():
            if key in self._index_map.index_map:
                exising_shard_to_data_map[self._index_map.index_map[key]][key] = val
            else:
                new_data[key] = val

        try:
            for shard, existing_data in exising_shard_to_data_map.items():
                related_proto_file = self._shard_to_file(shard=shard)
                proto_table = ProtoTableStorage()
                proto_table.initialize_from_file(file_name=related_proto_file)
                proto_table.write(data=existing_data, params=params)
                self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
            if new_data:
                all_new_keys = list(new_data.keys())
                latest_shard = self.get_latest_shard()
                latest_proto_file = self._shard_to_file(shard=latest_shard)
                proto_table = ProtoTableStorage()
                proto_table.initialize_from_file(file_name=latest_proto_file)
                latest_proto_table_size = proto_table.get_num_entries()
                proto_table.write(
                    data={key: new_data[key] for key in all_new_keys[:self._size_per_shard - latest_proto_table_size]},
                    params=params
                )
                self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
                for key in all_new_keys[:self._size_per_shard - latest_proto_table_size]:
                    self._index_map.index_map[key] = latest_shard

                if len(all_new_keys) > self._size_per_shard - latest_proto_table_size:
                    remaining_new_keys = all_new_keys[self._size_per_shard - latest_proto_table_size:]
                    start_index = 0
                    while start_index < len(remaining_new_keys):
                        data_for_new_shard = remaining_new_keys[start_index:start_index + self._size_per_shard]
                        latest_shard += 1
                        self._index_map.cur_shard += 1
                        proto_file = self._shard_to_file(shard=latest_shard)
                        self._logger.info("Write to new file with name [" + proto_file + '] and shard [' +
                                          str(latest_shard) + '].')
                        proto_table = ProtoTableStorage()
                        proto_table.initialize_from_file(file_name=proto_file)
                        proto_table.write(
                            data={key: new_data[key] for key in data_for_new_shard},
                            params=params
                        )
                        self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
                        for key in data_for_new_shard:
                            self._index_map.index_map[key] = latest_shard

                        start_index += self._size_per_shard
                self._logger.info("Writing the index map to [" + self._index_map_file + '].')
                self.increment_rpc_count_by(n=1)
                FileUtil.write_proto_to_file(
                    proto=self._index_map,
                    file_name=self._index_map_file
                )

        except Exception as err:
            self._SYS_LOGGER.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
            self._logger.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
Example #10
0
def main():
    yaml_path = "example/frontend_example/frontend_config.yaml"
    proto_path = "example/frontend_example/frontend_config.pb"
    config = CommonUtil.make_frontend_config(yaml_path=yaml_path)
    print(config)
    FileUtil.write_proto_to_file(proto=config, file_name=proto_path)
Example #11
0
    def get_container_snapshot(self, send_backend=True):
        if not self._is_initialized:
            self._logger.error(
                "Warning: taking snapshot when the container [" +
                self.get_container_name() + "] is not initialized.")
            self._SYS_LOGGER.error(
                "Warning: taking snapshot when the container [" +
                self.get_container_name() + "] is not initialized.")

        snapshot = ContainerSnapshot()
        snapshot.container_name = self._container_name
        snapshot.is_initialized = self._is_initialized
        snapshot.status = self._status
        snapshot.class_name = self.get_full_class_name()
        snapshot.mode = self._mode
        snapshot.data_model = self.DATA_MODEL
        snapshot.log_file = FileUtil.convert_local_to_cell_path(
            glogging.get_logger_file(self._logger))
        snapshot.run_cell = EnvUtil.get_other_env_variable(
            var='GALAXY_fs_cell', fallback_value='')
        snapshot.snapshot_cell = FileUtil.get_cell_from_path(
            FileUtil.convert_local_to_cell_path(self._snapshot_file_folder))
        for key, val in self._counter.items():
            snapshot.counters[key] = val
        if self._start_time:
            snapshot.start_time = str(self._start_time)
        if self._end_time:
            snapshot.end_time = str(self._end_time)

        for op_name, op in self._node_name_to_node_dict.items():
            if 'Dummy' in op.get_class_name():
                continue
            op_output_file = FileUtil.join_paths_to_file(
                root_dir=FileUtil.join_paths_to_dir(
                    FileUtil.dir_name(self._snapshot_file_folder),
                    'operators'),
                base_name=op_name + '_SNAPSHOT_' +
                str(TimezoneUtil.cur_time_in_pst()) + '.pb')
            snapshot.operator_snapshot_map[op_name].CopyFrom(
                op.get_operator_snapshot(output_file=op_output_file))

        self._SYS_LOGGER.info(
            "Snapshot saved to folder [" +
            FileUtil.convert_local_to_cell_path(self._snapshot_file_folder) +
            '].')
        self._logger.info(
            "Snapshot saved to folder [" +
            FileUtil.convert_local_to_cell_path(self._snapshot_file_folder) +
            '].')
        output_file_name = FileUtil.join_paths_to_file(
            root_dir=FileUtil.join_paths_to_dir(
                FileUtil.dir_name(self._snapshot_file_folder), 'containers'),
            base_name=self._container_name + '_SNAPSHOT_' +
            str(TimezoneUtil.cur_time_in_pst()) + '.pb')

        FileUtil.write_proto_to_file(proto=snapshot,
                                     file_name=output_file_name)
        if self._backend and send_backend:
            try:
                self._backend.send_to_backend(snapshot=snapshot)
            except Exception as err:
                self._logger.error("Sending backend failed with error " +
                                   str(err) + '.')

        return snapshot