Python listbucket Examples

Programming Language: Python

Namespace/Package Name: google.appengine.ext.cloudstorage

Method/Function: listbucket

Examples at hotexamples.com: 5

Python listbucket - 5 examples found. These are the top rated real world Python examples of google.appengine.ext.cloudstorage.listbucket extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

    def _next_file(self):
        """Find next filename.

    self._filenames may need to be expanded via listbucket.

    Returns:
      None if no more file is left. Filename otherwise.
    """
        while True:
            if self._bucket_iter:
                try:
                    return self._bucket_iter.next().filename
                except StopIteration:
                    self._bucket_iter = None
                    self._bucket = None
            if self._index >= len(self._filenames):
                return
            filename = self._filenames[self._index]
            self._index += 1
            if self._delimiter is None or not filename.endswith(
                    self._delimiter):
                return filename
            self._bucket = cloudstorage.listbucket(filename,
                                                   delimiter=self._delimiter)
            self._bucket_iter = iter(self._bucket)

Example #2

Show file

File: output_writers.py Project: alvaromm/cloudsarao

  def _try_to_clean_garbage(self, writer_spec):


    tmpl = string.Template(self._TMPFILE_PREFIX)
    prefix = tmpl.substitute(
        id=self.status.mapreduce_id, shard=self.status.shard)
    bucket = self.status.writer_spec[self.BUCKET_NAME_PARAM]
    account_id = writer_spec.get(self._ACCOUNT_ID_PARAM, None)
    for f in cloudstorage.listbucket("/%s/%s" % (bucket, prefix),
                                     _account_id=account_id):
      self._remove_file(f.filename, self.status.writer_spec)

Example #3

Show file

    def split_input(cls, job_config):
        """Returns a list of input readers.

    An equal number of input files are assigned to each shard (+/- 1). If there
    are fewer files than shards, fewer than the requested number of shards will
    be used. Input files are currently never split (although for some formats
    could be and may be split in a future implementation).

    Args:
      job_config: map_job.JobConfig

    Returns:
      A list of InputReaders. None when no input data can be found.
    """
        reader_params = job_config.input_reader_params
        bucket = reader_params[cls.BUCKET_NAME_PARAM]
        filenames = reader_params[cls.OBJECT_NAMES_PARAM]
        delimiter = reader_params.get(cls.DELIMITER_PARAM)
        account_id = reader_params.get(cls._ACCOUNT_ID_PARAM)
        buffer_size = reader_params.get(cls.BUFFER_SIZE_PARAM)
        path_filter = reader_params.get(cls.PATH_FILTER_PARAM)

        all_filenames = []
        for filename in filenames:
            if filename.endswith("*"):
                all_filenames.extend([
                    file_stat.filename for file_stat in
                    cloudstorage.listbucket("/" + bucket + "/" + filename[:-1],
                                            delimiter=delimiter,
                                            _account_id=account_id)
                ])
            else:
                all_filenames.append("/%s/%s" % (bucket, filename))

        readers = []
        for shard in range(0, job_config.shard_count):
            shard_filenames = all_filenames[shard::job_config.shard_count]
            if shard_filenames:
                readers.append(
                    cls(shard_filenames,
                        buffer_size=buffer_size,
                        _account_id=account_id,
                        delimiter=delimiter,
                        path_filter=path_filter))
        return readers

Example #4

Show file

File: _gcs.py Project: zenlambda/appengine-python3

  def split_input(cls, job_config):
    """Returns a list of input readers.

    An equal number of input files are assigned to each shard (+/- 1). If there
    are fewer files than shards, fewer than the requested number of shards will
    be used. Input files are currently never split (although for some formats
    could be and may be split in a future implementation).

    Args:
      job_config: map_job.JobConfig

    Returns:
      A list of InputReaders. None when no input data can be found.
    """
    reader_params = job_config.input_reader_params
    bucket = reader_params[cls.BUCKET_NAME_PARAM]
    filenames = reader_params[cls.OBJECT_NAMES_PARAM]
    delimiter = reader_params.get(cls.DELIMITER_PARAM)
    account_id = reader_params.get(cls._ACCOUNT_ID_PARAM)
    buffer_size = reader_params.get(cls.BUFFER_SIZE_PARAM)
    path_filter = reader_params.get(cls.PATH_FILTER_PARAM)


    all_filenames = []
    for filename in filenames:
      if filename.endswith("*"):
        all_filenames.extend(
            [file_stat.filename for file_stat in cloudstorage.listbucket(
                "/" + bucket + "/" + filename[:-1], delimiter=delimiter,
                _account_id=account_id)])
      else:
        all_filenames.append("/%s/%s" % (bucket, filename))


    readers = []
    for shard in range(0, job_config.shard_count):
      shard_filenames = all_filenames[shard::job_config.shard_count]
      if shard_filenames:
        readers.append(cls(
            shard_filenames, buffer_size=buffer_size, _account_id=account_id,
            delimiter=delimiter, path_filter=path_filter))
    return readers

Example #5

Show file

File: _gcs.py Project: zenlambda/appengine-python3

  def _next_file(self):
    """Find next filename.

    self._filenames may need to be expanded via listbucket.

    Returns:
      None if no more file is left. Filename otherwise.
    """
    while True:
      if self._bucket_iter:
        try:
          return self._bucket_iter.next().filename
        except StopIteration:
          self._bucket_iter = None
          self._bucket = None
      if self._index >= len(self._filenames):
        return
      filename = self._filenames[self._index]
      self._index += 1
      if self._delimiter is None or not filename.endswith(self._delimiter):
        return filename
      self._bucket = cloudstorage.listbucket(filename,
                                             delimiter=self._delimiter)
      self._bucket_iter = iter(self._bucket)