def get_next_dir(self, cur_dir): cur_dir = cur_dir.replace(self._file_tree.get_root_name(), '') cur_time = FileUtil.parse_dir_to_timestamp(dir_name=cur_dir) if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY: next_time = datetime.datetime(cur_time.year + 1, 1, 1) elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY: if cur_time.month == 12: next_time = datetime.datetime(cur_time.year + 1, 1, 1) else: next_time = datetime.datetime(cur_time.year, cur_time.month + 1, 1) elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY: next_time = cur_time + datetime.timedelta(days=1) elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY: next_time = cur_time + datetime.timedelta(hours=1) else: next_time = cur_time + datetime.timedelta(minutes=1) next_dir_name = FileUtil.parse_timestamp_to_dir( timestamp=next_time).split('/') next_dir_name = '/'.join( next_dir_name[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[ self.PARTITIONER_TYPE]]) next_dir_name = FileUtil.join_paths_to_dir( root_dir=self._file_tree.get_root_name(), base_name=next_dir_name) if FileUtil.does_dir_exist(dir_name=next_dir_name): return next_dir_name else: return None
def get_previous_dir(self, cur_dir): self.initialize_from_dir(dir_name=self.get_dir_name()) cur_dir = cur_dir.replace(self._file_tree.get_root_name(), '') cur_time = FileUtil.parse_dir_to_timestamp(dir_name=cur_dir) if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY: pre_time = datetime.datetime(cur_time.year - 1, 1, 1) elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY: if cur_time.month == 1: pre_time = datetime.datetime(cur_time.year - 1, 12, 1) else: pre_time = datetime.datetime(cur_time.year, cur_time.month - 1, 1) elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY: pre_time = cur_time - datetime.timedelta(days=1) elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY: pre_time = cur_time - datetime.timedelta(hours=1) else: pre_time = cur_time - datetime.timedelta(minutes=1) last_dir_name = FileUtil.parse_timestamp_to_dir( timestamp=pre_time).split('/') last_dir_name = '/'.join( last_dir_name[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[ self.PARTITIONER_TYPE]]) last_dir_name = FileUtil.join_paths_to_dir( root_dir=self._file_tree.get_root_name(), base_name=last_dir_name) self.increment_rpc_count_by(n=1) if FileUtil.does_dir_exist(dir_name=last_dir_name): return last_dir_name else: return None
def is_gabage_collected(self): rightmost_leaf_name = self._file_tree.get_rightmost_leaf() self.increment_rpc_count_by(n=1) if not FileUtil.does_dir_exist(dir_name=rightmost_leaf_name): return True else: return False
def resize_to_new_table(self, new_size_per_shard, new_dir_name): self.increment_rpc_count_by(n=2) assert not FileUtil.does_dir_exist(dir_name=new_dir_name) or FileUtil.is_dir_empty(dir_name=new_dir_name) new_sptable_storage = ShardedProtoTableStorage(size_per_shard=new_size_per_shard) new_sptable_storage.initialize_from_dir(dir_name=new_dir_name) for shard in range(self.get_num_shards()): related_proto_file = self._shard_to_file(shard=shard) proto_table = ProtoTableStorage() proto_table.initialize_from_file(file_name=related_proto_file) new_sptable_storage.write(data=proto_table.read_all()) self.increment_rpc_count_by(n=proto_table.get_rpc_call_count_and_reset()) return new_sptable_storage
def make_new_partition(self, timestamp): new_dir_list = FileUtil.parse_timestamp_to_dir( timestamp=timestamp).split('/') new_dir = '/'.join(new_dir_list[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[ self.PARTITIONER_TYPE]]) child_node = OrderedNodeBase(node_name=FileUtil.join_paths_to_dir( root_dir=self._file_tree.get_root_name(), base_name=new_dir)) if FileUtil.does_dir_exist(dir_name=child_node.get_node_name()): self.sys_log('Node [' + child_node.get_node_name() + "] exist. Don't make new partition.") return None else: self.sys_log('Node [' + child_node.get_node_name() + "] doesn't exist. Make new partition.") self._logger.info('Node [' + child_node.get_node_name() + "] doesn't exist. Make new partition.") FileUtil.create_dir_if_not_exist( dir_name=child_node.get_node_name()) self.initialize_from_dir(dir_name=self._file_tree.get_root_name()) return child_node.get_node_name()
def _delete_dir(self, dir_name): num_dir_removed, num_dir_failed = 0, 0 for sub_dir_name in FileUtil.list_dirs_in_dir(dir_name=dir_name): if FileUtil.does_dir_exist( dir_name=sub_dir_name ) and self._recursively_check_dir_deletable(dir_name=sub_dir_name): self._logger.info("Removing directory " + sub_dir_name + '...') try: FileUtil.remove_dir_recursively(dir_name=sub_dir_name) self.counter_increment("num_directory_removed") num_dir_removed += 1 except Exception as err: num_dir_failed += 1 self.counter_increment( "num_directory_failed_to_be_removed") self._logger.error("Removing directory " + sub_dir_name + ' failed with err ' + str(err) + '.') else: stats = self._delete_dir(dir_name=sub_dir_name) num_dir_removed += stats[0] num_dir_failed += stats[1] return num_dir_removed, num_dir_failed
def read_range(self, params): def _reformat_time(timestamp): if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY: timestamp = timestamp.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0, tzinfo=None) elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY: timestamp = timestamp.replace(day=1, hour=0, minute=0, second=0, microsecond=0, tzinfo=None) elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY: timestamp = timestamp.replace(hour=0, minute=0, second=0, microsecond=0, tzinfo=None) elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY: timestamp = timestamp.replace(minute=0, second=0, microsecond=0, tzinfo=None) else: timestamp = timestamp.replace(second=0, microsecond=0, tzinfo=None) return timestamp assert 'start_time' in params and 'end_time' in params and params[ 'start_time'] <= params['end_time'] while self._writer_status != Status.IDLE: self.sys_log("Waiting for writer to finish.") time.sleep(TimeSleepObj.ONE_SECOND) self._reader_status = Status.RUNNING oldest_dir, latest_dir = self.get_oldest_dir(), self.get_latest_dir() if not latest_dir or not oldest_dir: if self.is_empty(): self._logger.warning("Current partitioner [" + self.get_dir_name() + "] is empty, cannot read anything.") self.sys_log("Current partitioner [" + self.get_dir_name() + "] is empty, cannot read anything.") return {} oldest_dir = oldest_dir.replace(self._file_tree.get_root_name(), '') latest_dir = latest_dir.replace(self._file_tree.get_root_name(), '') oldest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=oldest_dir) latest_timestamp = FileUtil.parse_dir_to_timestamp(dir_name=latest_dir) start_time = max(_reformat_time(params['start_time']), oldest_timestamp) end_time = min(_reformat_time(params['end_time']), latest_timestamp) result = {} try: while start_time <= end_time: dir_list = FileUtil.parse_timestamp_to_dir( timestamp=start_time).split('/') dir_name = '/'.join( dir_list[:self.PARTITIONER_TYPE_TO_HEIGHT_MAP[ self.PARTITIONER_TYPE]]) dir_name = FileUtil.join_paths_to_dir( root_dir=self._file_tree.get_root_name(), base_name=dir_name) if FileUtil.does_dir_exist(dir_name=dir_name): if self._underlying_storage.get_storage_type( ) == StorageType.PROTO_TABLE_STORAGE: storage = ProtoTableStorage() else: storage = DefaultStorage() file_names = FileUtil.list_files_in_dir(dir_name=dir_name) for file_name in file_names: storage.initialize_from_file(file_name=file_name) if storage.get_storage_type( ) == StorageType.PROTO_TABLE_STORAGE: result[file_name] = storage.read_all() else: result[file_name] = storage.read( params={'num_line': -1}) if self.PARTITIONER_TYPE == PartitionerStorageType.YEARLY: start_time = start_time.replace(year=start_time.year + 1, month=1, day=1) elif self.PARTITIONER_TYPE == PartitionerStorageType.MONTHLY: if start_time.month == 12: start_time = start_time.replace(year=start_time.year + 1, month=1, day=1) else: start_time = start_time.replace( month=start_time.month + 1) elif self.PARTITIONER_TYPE == PartitionerStorageType.DAILY: start_time += datetime.timedelta(days=1) elif self.PARTITIONER_TYPE == PartitionerStorageType.HOURLY: start_time += datetime.timedelta(hours=1) else: start_time += datetime.timedelta(minutes=1) self._reader_status = Status.IDLE return result except Exception as err: self.sys_log("Read range in dir [" + self.get_dir_name() + "] got exception " + str(err) + '.') self._logger.error("Read range in dir [" + self.get_dir_name() + "] got exception " + str(err) + '.') raise StorageReadException("Read range in dir [" + self.get_dir_name() + "] got exception " + str(err) + '.')
def is_updated(self): rightmost_leaf_name = self._file_tree.get_rightmost_leaf() if not FileUtil.does_dir_exist(dir_name=rightmost_leaf_name): return True else: return False