def load_sstable(pattern, value_type, has_known_shard_key=False):
  """Returns an SSTable with the given proto type as values.

  Args:
    pattern: The file pattern of the input. It may be a glob pattern or a
        sharded filename pattern.
    value_type: The protobuf class of the values.
    has_known_shard_key: If True, the sharded input can be loaded using
        ShardedSSTable. If False, a sharded file path is opened using
        MergedSSTable.

  Returns:
    An SSTable corresponding to the requested input path.

  Raises:
    ValueError: No SSTable could be found at the requested location.
  """
  if shards.IsShardedFileSpec(pattern):
    paths = shards.GenerateShardedFilenames(pattern)
  else:
    paths = gfile.Glob(pattern)

  if not paths:
    raise ValueError('No files found for SSTable %s' % pattern)
  elif len(paths) == 1:
    return sstable.SSTable(paths[0], wrapper=sstable.TableWrapper(value_type))
  elif has_known_shard_key:
    return sstable.ShardedSSTable(
        paths, wrapper=sstable.TableWrapper(value_type))
  else:
    return sstable.MergedSSTable(
        paths, wrapper=sstable.TableWrapper(value_type))
Beispiel #2
0
  def from_path(cls, pattern):
    """Create a ScaMMatcher from SSTables of ScaM NearestNeighbors results.

    Args:
      pattern: string pattern for paths to sstables holding output from the ScaM
        map-reduce.

    Returns:
      ScaMMatcher for doing lookups with these pre-computed neighbors.
    """
    paths = sorted(gfile.Glob(pattern))
    wrapper = sstable.TableWrapper(results_pb2.NearestNeighbors.FromString)
    table = sstable.ShardedSSTable(paths, wrapper=wrapper)
    return cls(table)