Ejemplo n.º 1
0
    def write(self, data, params=None):
        if not params:
            params = {}
        if 'overwrite' not in params:
            params['overwrite'] = True

        assert isinstance(data, dict)
        try:
            for key, val in data.items():
                if not params['overwrite'] and key in self._table_message.data:
                    continue
                any_message = ProtoUtil.message_to_any(message=val)
                self._table_message.data[key].CopyFrom(any_message)
            if len(self._table_message.data) > 1000:
                self._SYS_LOGGER.warning(
                    "Warning: the table content is too large, considering using Partitioner "
                    "combined with proto table.")
            self._table_message.updated_time = str(
                TimezoneUtil.cur_time_in_pst())

            self.increment_rpc_count_by(n=1)
            FileUtil.write_proto_to_file(proto=self._table_message,
                                         file_name=self._file_name)

        except Exception as err:
            self._SYS_LOGGER.error("Write to file [" + self.get_file_name() +
                                   "] got exception: " + str(err) + '.')
            self._logger.error("Write to file [" + self.get_file_name() +
                               "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to file [" +
                                        self.get_file_name() +
                                        "] got exception: " + str(err) + '.')
Ejemplo n.º 2
0
    def write(self, data, params=None):
        if not isinstance(data, str):
            if not params:
                params = {'delimiter': ','}
            else:
                assert isinstance(params, dict) and 'delimiter' in params

        if params:
            for param in params:
                if not isinstance(data, str) and param == 'delimiter':
                    continue
                self._logger.warning(
                    param +
                    " will be omitted since it is not useful as an input argument in this function."
                )
                self.sys_log(
                    param +
                    " will be omitted since it is not useful as an input argument in this function."
                )

        while self._reader_status != Status.IDLE:
            self.sys_log("Waiting for reader to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        self._writer_status = Status.RUNNING
        if not isinstance(data, str):
            self.sys_log(
                "Data is not str instance, joining them with preset delimiter."
            )
            data_to_write = params['delimiter'].join(
                [str(val) for val in data])
        else:
            data_to_write = data
        try:
            if self._config['write_rule_type'] == WriteRuleType.WRITE_FROM_END:
                with FileLockTool(self._file_name, read_mode=False):
                    with open(
                            FileUtil.create_file_if_not_exist(
                                file_name=self._file_name), 'a') as outfile:
                        outfile.write(data_to_write + '\n')
            else:
                with FileLockTool(self._file_name, read_mode=False):
                    with open(
                            FileUtil.create_file_if_not_exist(
                                file_name=self._file_name), 'r+') as outfile:
                        file_data = outfile.read()
                        outfile.seek(0, 0)
                        outfile.write(data_to_write + '\n' + file_data)

            self._writer_status = Status.IDLE
        except Exception as err:
            self.sys_log("Write to file [" + self._file_name +
                         "] got exception: " + str(err) + '.')
            self._logger.error("Write to file [" + self._file_name +
                               "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to file [" + self._file_name +
                                        "] got exception: " + str(err) + '.')
Ejemplo n.º 3
0
    def write(self, data, params=None):
        to_make_partition = True
        if params and 'make_partition' in params:
            to_make_partition = params['make_partition']
            params.pop('make_partition', None)

        if self._underlying_storage.get_storage_type(
        ) == StorageType.PROTO_TABLE_STORAGE:
            file_base_name = 'data.pb'
        else:
            file_base_name = 'data'
        if params and 'base_name' in params:
            file_base_name = params['base_name']
            params.pop('base_name', None)

        while self._reader_status != Status.IDLE:
            self.sys_log("Waiting for reader to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        self._writer_status = Status.RUNNING

        if to_make_partition:
            if not params or 'timezone' not in params or params[
                    'timezone'] == 'PST':
                self.make_new_partition(
                    timestamp=TimezoneUtil.cur_time_in_pst())
            elif params['timezone'] == 'UTC':
                self.make_new_partition(
                    timestamp=TimezoneUtil.cur_time_in_utc())
            elif params['timezone'] == 'EST':
                self.make_new_partition(
                    timestamp=TimezoneUtil.cur_time_in_est())

        self.initialize_from_dir(dir_name=self._file_tree.get_root_name())

        file_name = FileUtil.join_paths_to_file(
            root_dir=self._file_tree.get_leftmost_leaf(),
            base_name=file_base_name)

        if file_name != self._underlying_storage.get_file_name():
            self.sys_log("Sync to the latest file to " + file_name)
            self._underlying_storage.initialize_from_file(file_name=file_name)

        try:
            self._underlying_storage.write(data=data, params=params)
            self._writer_status = Status.IDLE
        except Exception as err:
            self.sys_log("Write to dir [" + self.get_dir_name() +
                         "] got exception: " + str(err) + '.')
            self._logger.error("Write to dir [" + self.get_dir_name() +
                               "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to dir [" +
                                        self.get_dir_name() +
                                        "] got exception: " + str(err) + '.')
Ejemplo n.º 4
0
    def write(self, data, params=None):
        if not isinstance(data, str):
            if not params:
                params = {'delimiter': ','}
            else:
                assert isinstance(params, dict) and 'delimiter' in params

        if params:
            for param in params:
                if not isinstance(data, str) and param == 'delimiter':
                    continue
                self._logger.warning(
                    param +
                    " will be omitted since it is not useful as an input argument in this function."
                )
                self._SYS_LOGGER.warning(
                    param +
                    " will be omitted since it is not useful as an input argument in this function."
                )

        if not isinstance(data, str):
            self._SYS_LOGGER.info(
                "Data is not str instance, joining them with preset delimiter."
            )
            data_to_write = params['delimiter'].join(
                [str(val) for val in data])
        else:
            data_to_write = data
        try:
            self.increment_rpc_count_by(n=1)
            if self._config['write_rule_type'] == WriteRuleType.WRITE_FROM_END:
                gclient.write(path=self._file_name,
                              data=data_to_write + '\n',
                              mode='a')
            else:
                existing_data = gclient_ext.read_txt(path=self._file_name)
                if existing_data is None:
                    existing_data = ''

                gclient.write(path=self._file_name,
                              data=data_to_write + '\n' + existing_data,
                              mode='w')

        except Exception as err:
            self._SYS_LOGGER.info("Write to file [" + self._file_name +
                                  "] got exception: " + str(err) + '.')
            self._logger.error("Write to file [" + self._file_name +
                               "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to file [" + self._file_name +
                                        "] got exception: " + str(err) + '.')
Ejemplo n.º 5
0
    def write(self, data, params=None):
        to_make_partition = True
        if params and 'make_partition' in params:
            to_make_partition = params['make_partition']
            params.pop('make_partition', None)

        if self._underlying_storage.get_storage_type(
        ) == StorageType.PROTO_TABLE_STORAGE:
            file_base_name = 'data.pb'
        else:
            file_base_name = 'data'
        if params and 'base_name' in params:
            file_base_name = params['base_name']

        if to_make_partition:
            if not params or 'timezone' not in params or params[
                    'timezone'] == 'PST':
                self.make_new_partition(
                    timestamp=TimezoneUtil.cur_time_in_pst())
            elif params['timezone'] == 'UTC':
                self.make_new_partition(
                    timestamp=TimezoneUtil.cur_time_in_utc())
            elif params['timezone'] == 'EST':
                self.make_new_partition(
                    timestamp=TimezoneUtil.cur_time_in_est())

        file_name = FileUtil.join_paths_to_file(
            root_dir=self._file_tree.get_leftmost_leaf(),
            base_name=file_base_name)

        if file_name != self._underlying_storage.get_file_name():
            self._SYS_LOGGER.info("Sync to the latest file to " + file_name)
            self._underlying_storage.initialize_from_file(file_name=file_name)

        try:
            self._underlying_storage.write(data=data, params=params)
            self.increment_rpc_count_by(
                n=self._underlying_storage.get_rpc_call_count_and_reset())
        except Exception as err:
            self._SYS_LOGGER.error("Write to dir [" + self.get_dir_name() +
                                   "] got exception: " + str(err) + '.')
            self._logger.error("Write to dir [" + self.get_dir_name() +
                               "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to dir [" +
                                        self.get_dir_name() +
                                        "] got exception: " + str(err) + '.')
Ejemplo n.º 6
0
    def write(self, data, params=None):
        if not params:
            params = {}
        if 'overwrite' not in params:
            params['overwrite'] = True

        while self._reader_status != Status.IDLE:
            self.sys_log("Waiting for reader to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        while self._deleter_status != Status.IDLE:
            self.sys_log("Waiting for deleter to finish.")
            time.sleep(TimeSleepObj.ONE_SECOND)

        self._writer_status = Status.RUNNING
        assert isinstance(data, dict)
        try:
            for key, val in data.items():
                if not params['overwrite'] and key in self._table_message.data:
                    continue
                any_message = ProtoUtil.message_to_any(message=val)
                self._table_message.data[key].CopyFrom(any_message)
            if len(self._table_message.data) > 1000:
                self.sys_log("Warning: the table content is too large, considering using Partitioner "
                             "combined with proto table.")
            self._table_message.updated_time = str(TimezoneUtil.cur_time_in_pst())
            with FileLockTool(self._file_name, read_mode=False):
                FileUtil.write_proto_to_file(
                    proto=self._table_message,
                    file_name=self._file_name
                )
                self._writer_status = Status.IDLE
        except Exception as err:
            self.sys_log("Write to file [" + self.get_file_name() + "] got exception: " + str(err) + '.')
            self._logger.error("Write to file [" + self.get_file_name() + "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to file [" + self.get_file_name() + "] got exception: " + str(err) + '.')
Ejemplo n.º 7
0
    def write(self, data, params=None):
        if not params:
            params = {}
        if 'overwrite' not in params:
            params['overwrite'] = True
        assert isinstance(data, dict)

        exising_shard_to_data_map = defaultdict(dict)
        new_data = {}
        for key, val in data.items():
            if key in self._index_map.index_map:
                exising_shard_to_data_map[self._index_map.index_map[key]][key] = val
            else:
                new_data[key] = val

        try:
            for shard, existing_data in exising_shard_to_data_map.items():
                related_proto_file = self._shard_to_file(shard=shard)
                proto_table = ProtoTableStorage()
                proto_table.initialize_from_file(file_name=related_proto_file)
                proto_table.write(data=existing_data, params=params)
                self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
            if new_data:
                all_new_keys = list(new_data.keys())
                latest_shard = self.get_latest_shard()
                latest_proto_file = self._shard_to_file(shard=latest_shard)
                proto_table = ProtoTableStorage()
                proto_table.initialize_from_file(file_name=latest_proto_file)
                latest_proto_table_size = proto_table.get_num_entries()
                proto_table.write(
                    data={key: new_data[key] for key in all_new_keys[:self._size_per_shard - latest_proto_table_size]},
                    params=params
                )
                self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
                for key in all_new_keys[:self._size_per_shard - latest_proto_table_size]:
                    self._index_map.index_map[key] = latest_shard

                if len(all_new_keys) > self._size_per_shard - latest_proto_table_size:
                    remaining_new_keys = all_new_keys[self._size_per_shard - latest_proto_table_size:]
                    start_index = 0
                    while start_index < len(remaining_new_keys):
                        data_for_new_shard = remaining_new_keys[start_index:start_index + self._size_per_shard]
                        latest_shard += 1
                        self._index_map.cur_shard += 1
                        proto_file = self._shard_to_file(shard=latest_shard)
                        self._logger.info("Write to new file with name [" + proto_file + '] and shard [' +
                                          str(latest_shard) + '].')
                        proto_table = ProtoTableStorage()
                        proto_table.initialize_from_file(file_name=proto_file)
                        proto_table.write(
                            data={key: new_data[key] for key in data_for_new_shard},
                            params=params
                        )
                        self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset())
                        for key in data_for_new_shard:
                            self._index_map.index_map[key] = latest_shard

                        start_index += self._size_per_shard
                self._logger.info("Writing the index map to [" + self._index_map_file + '].')
                self.increment_rpc_count_by(n=1)
                FileUtil.write_proto_to_file(
                    proto=self._index_map,
                    file_name=self._index_map_file
                )

        except Exception as err:
            self._SYS_LOGGER.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
            self._logger.error("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')
            raise StorageWriteException("Write to dir [" + self.get_dir_name() + "] got exception: " + str(err) + '.')