def load_sstable(pattern, value_type, has_known_shard_key=False): """Returns an SSTable with the given proto type as values. Args: pattern: The file pattern of the input. It may be a glob pattern or a sharded filename pattern. value_type: The protobuf class of the values. has_known_shard_key: If True, the sharded input can be loaded using ShardedSSTable. If False, a sharded file path is opened using MergedSSTable. Returns: An SSTable corresponding to the requested input path. Raises: ValueError: No SSTable could be found at the requested location. """ if shards.IsShardedFileSpec(pattern): paths = shards.GenerateShardedFilenames(pattern) else: paths = gfile.Glob(pattern) if not paths: raise ValueError('No files found for SSTable %s' % pattern) elif len(paths) == 1: return sstable.SSTable(paths[0], wrapper=sstable.TableWrapper(value_type)) elif has_known_shard_key: return sstable.ShardedSSTable( paths, wrapper=sstable.TableWrapper(value_type)) else: return sstable.MergedSSTable( paths, wrapper=sstable.TableWrapper(value_type))
def from_path(cls, pattern): """Create a ScaMMatcher from SSTables of ScaM NearestNeighbors results. Args: pattern: string pattern for paths to sstables holding output from the ScaM map-reduce. Returns: ScaMMatcher for doing lookups with these pre-computed neighbors. """ paths = sorted(gfile.Glob(pattern)) wrapper = sstable.TableWrapper(results_pb2.NearestNeighbors.FromString) table = sstable.ShardedSSTable(paths, wrapper=wrapper) return cls(table)