Esempio n. 1
0
File: s3.py Progetto: suzil/mrjob
    def mkdir(self, dest):
        """Make a directory. This doesn't actually create directories on S3
        (because there is no such thing), but it will create the corresponding
        bucket if it doesn't exist.
        """
        bucket_name, key_name = parse_s3_uri(dest)

        client = self.make_s3_client()

        try:
            client.head_bucket(Bucket=bucket_name)
        except botocore.exceptions.ClientError as ex:
            if _client_error_status(ex) != 404:
                raise

            self.create_bucket(bucket_name)
Esempio n. 2
0
File: s3.py Progetto: suzil/mrjob
    def touchz(self, dest):
        """Make an empty file in the given location. Raises an error if
        a non-empty file already exists in that location."""
        key = self._get_s3_key(dest)

        data = None
        try:
            data = key.get()
        except botocore.exceptions.ClientError as ex:
            # okay if key doesn't exist
            if _client_error_status(ex) != 404:
                raise

        if data and data['ContentLength'] != 0:
            raise OSError('Non-empty file %r already exists!' % (dest, ))

        key.put(Body=b'')
Esempio n. 3
0
File: s3.py Progetto: suzil/mrjob
    def _ls(self, path_glob):
        """Helper method for :py:meth:`ls`; yields tuples of
        ``(uri, key)`` where *key* is the corresponding boto3 s3.ObjectSummary.
        """
        # clean up the  base uri to ensure we have pass boto3 an s3:// URI
        # (not s3n://)
        scheme = urlparse(path_glob).scheme

        # support globs
        glob_match = GLOB_RE.match(path_glob)

        # we're going to search for all keys starting with base_uri
        if glob_match:
            # cut it off at first wildcard
            base_uri = glob_match.group(1)
        else:
            base_uri = path_glob

        bucket_name, base_name = parse_s3_uri(base_uri)

        # allow subdirectories of the path/glob
        if path_glob and not path_glob.endswith('/'):
            dir_glob = path_glob + '/*'
        else:
            dir_glob = path_glob + '*'

        try:
            bucket = self.get_bucket(bucket_name)
        except botocore.exceptions.ClientError as ex:
            if _client_error_status(ex) == 404:  # treat nonexistent as empty
                return
            raise

        for key in bucket.objects.filter(Prefix=base_name):
            uri = "%s://%s/%s" % (scheme, bucket_name, key.key)

            # enforce globbing
            if not (fnmatch.fnmatchcase(uri, path_glob)
                    or fnmatch.fnmatchcase(uri, dir_glob)):
                continue

            yield uri, key
Esempio n. 4
0
File: s3.py Progetto: suzil/mrjob
    def get_bucket(self, bucket_name):
        """Get the (:py:mod:`boto3`) bucket, connecting through the
        appropriate endpoint."""
        client = self.make_s3_client()

        try:
            region_name = _get_bucket_region(client, bucket_name)
        except botocore.exceptions.ClientError as ex:
            # it's possible to have access to a bucket but not access
            # to its location metadata. This happens on the 'elasticmapreduce'
            # bucket, for example (see #1170)
            if _client_error_status(ex) != 403:
                raise
            log.warning(
                'Could not infer endpoint for bucket %s; '
                'assuming defaults', bucket_name)
            region_name = None

        resource = self.make_s3_resource(region_name)
        return resource.Bucket(bucket_name)