Ejemplo n.º 1
0
    def _initiate_upload(self):
        if not self.autocommit and not self.append_block and self.tell(
        ) < self.blocksize:
            # only happens when closing small file, use on-shot PUT
            return
        logger.debug("Initiate upload for %s" % self)
        self.parts = []
        try:
            self.mpu = self._call_s3(self.fs.s3.create_multipart_upload,
                                     Bucket=self.bucket,
                                     Key=self.key,
                                     ACL=self.acl)
        except ClientError as e:
            raise translate_boto_error(e)
        except ParamValidationError as e:
            raise ValueError('Initiating write to %r failed: %s' %
                             (self.path, e))

        if self.append_block:
            # use existing data in key when appending,
            # and block is big enough
            out = self.fs._call_s3(self.fs.s3.upload_part_copy,
                                   self.s3_additional_kwargs,
                                   Bucket=self.bucket,
                                   Key=self.key,
                                   PartNumber=1,
                                   UploadId=self.mpu['UploadId'],
                                   CopySource=self.path)
            self.parts.append({
                'PartNumber': 1,
                'ETag': out['CopyPartResult']['ETag']
            })
Ejemplo n.º 2
0
    def wrapper(*args, **kwargs):
        try:
            func(*args, **kwargs)
        except Exception as exc:
            from s3fs.errors import translate_boto_error

            raise translate_boto_error(exc)
Ejemplo n.º 3
0
    def bulk_delete(self, pathlist, **kwargs):
        """
        Remove multiple keys with one call

        Parameters
        ----------
        pathlist : listof strings
            The keys to remove, must all be in the same bucket.
        """
        if not pathlist:
            return
        buckets = {split_path(path)[0] for path in pathlist}
        if len(buckets) > 1:
            raise ValueError("Bulk delete files should refer to only one "
                             "bucket")
        bucket = buckets.pop()
        if len(pathlist) > 1000:
            for i in range((len(pathlist) // 1000) + 1):
                self.bulk_delete(pathlist[i * 1000:(i + 1) * 1000])
            return
        delete_keys = {
            'Objects': [{
                'Key': split_path(path)[1]
            } for path in pathlist]
        }
        for path in pathlist:
            self.invalidate_cache(self._parent(path))
        try:
            self._call_s3(self.s3.delete_objects,
                          kwargs,
                          Bucket=bucket,
                          Delete=delete_keys)
        except ClientError as e:
            raise translate_boto_error(e)
Ejemplo n.º 4
0
 def info(self, path, version_id=None):
     if path in ['/', '']:
         return {'name': path, 'size': 0, 'type': 'directory'}
     kwargs = self.kwargs.copy()
     if version_id is not None:
         if not self.version_aware:
             raise ValueError("version_id cannot be specified if the "
                              "filesystem is not version aware")
         kwargs['VersionId'] = version_id
     if self.version_aware:
         try:
             bucket, key = split_path(path)
             out = self._call_s3(self.s3.head_object,
                                 kwargs,
                                 Bucket=bucket,
                                 Key=key,
                                 **self.req_kw)
             return {
                 'ETag': out['ETag'],
                 'Key': '/'.join([bucket, key]),
                 'LastModified': out['LastModified'],
                 'Size': out['ContentLength'],
                 'size': out['ContentLength'],
                 'path': '/'.join([bucket, key]),
                 'StorageClass': "STANDARD",
                 'VersionId': out.get('VersionId')
             }
         except ClientError as e:
             raise translate_boto_error(e)
         except ParamValidationError as e:
             raise ValueError('Failed to head path %r: %s' % (path, e))
     return super().info(path)
Ejemplo n.º 5
0
    def _initiate_upload(self):
        if self.acl and self.acl not in key_acls:
            raise ValueError('ACL not in %s', key_acls)
        self.parts = []
        self.size = 0
        if self.blocksize < 5 * 2**20:
            raise ValueError('Block size must be >=5MB')
        try:
            self.mpu = self._call_s3(self.fs.s3.create_multipart_upload,
                                     Bucket=self.bucket,
                                     Key=self.key,
                                     ACL=self.acl)
        except ClientError as e:
            raise translate_boto_error(e)
        except ParamValidationError as e:
            raise ValueError('Initiating write to %r failed: %s' %
                             (self.path, e))

        if 'a' in self.mode and self.fs.exists(self.path):
            if self.append_block:
                # use existing data in key when appending,
                # and block is big enough
                out = self.fs._call_s3(self.fs.s3.upload_part_copy,
                                       self.s3_additional_kwargs,
                                       Bucket=self.bucket,
                                       Key=self.key,
                                       PartNumber=1,
                                       UploadId=self.mpu['UploadId'],
                                       CopySource=self.path)
                self.parts.append({
                    'PartNumber': 1,
                    'ETag': out['CopyPartResult']['ETag']
                })
Ejemplo n.º 6
0
Archivo: core.py Proyecto: mtrbean/s3fs
    def _lsdir(self, path, refresh=False, max_items=None):
        if path.startswith('s3://'):
            path = path[len('s3://'):]
        path = path.rstrip('/')
        bucket, prefix = split_path(path)
        prefix = prefix + '/' if prefix else ""
        if path not in self.dircache or refresh:
            try:
                pag = self.s3.get_paginator('list_objects_v2')
                config = {}
                if max_items is not None:
                    config.update(MaxItems=max_items, PageSize=2 * max_items)
                it = pag.paginate(Bucket=bucket, Prefix=prefix, Delimiter='/',
                                  PaginationConfig=config, **self.req_kw)
                files = []
                dircache = []
                for i in it:
                    dircache.extend(i.get('CommonPrefixes', []))
                    for c in i.get('Contents', []):
                        c['type'] = 'file'
                        c['size'] = c['Size']
                        files.append(c)
                if dircache:
                    files.extend([{'Key': l['Prefix'][:-1], 'Size': 0,
                                  'StorageClass': "DIRECTORY",
                                   'type': 'directory', 'size': 0}
                                  for l in dircache])
                for f in files:
                    f['Key'] = '/'.join([bucket, f['Key']])
                    f['name'] = f['Key']
            except ClientError as e:
                raise translate_boto_error(e)

            self.dircache[path] = files
        return self.dircache[path]
Ejemplo n.º 7
0
Archivo: core.py Proyecto: mtrbean/s3fs
def _fetch_range(client, bucket, key, version_id, start, end, max_attempts=10,
                 req_kw=None):
    if req_kw is None:
        req_kw = {}
    logger.debug("Fetch: %s/%s, %s-%s", bucket, key, start, end)
    for i in range(max_attempts):
        try:
            if version_id is not None:
                kwargs = dict({'VersionId': version_id}, **req_kw)
            else:
                kwargs = req_kw
            resp = client.get_object(Bucket=bucket, Key=key,
                                     Range='bytes=%i-%i' % (start, end - 1),
                                     **kwargs)
            return resp['Body'].read()
        except S3_RETRYABLE_ERRORS as e:
            logger.debug('Exception %r on S3 download, retrying', e,
                         exc_info=True)
            continue
        except ConnectionError as e:
            logger.debug('ConnectionError %r on S3 download, retrying', e,
                         exc_info=True)
            continue
        except ClientError as e:
            if e.response['Error'].get('Code', 'Unknown') in ['416',
                                                              'InvalidRange']:
                return b''
            raise translate_boto_error(e)
        except Exception as e:
            if 'time' in str(e).lower():  # Actual exception type changes often
                continue
            else:
                raise
    raise RuntimeError("Max number of S3 retries exceeded")
Ejemplo n.º 8
0
 def rmdir(self, path):
     path = self._strip_protocol(path).rstrip('/')
     if not self._parent(path):
         try:
             self.s3.delete_bucket(Bucket=path)
         except ClientError as e:
             raise translate_boto_error(e)
         self.invalidate_cache(path)
         self.invalidate_cache('')
Ejemplo n.º 9
0
def _fetch_range(client,
                 bucket,
                 key,
                 version_id,
                 start,
                 end,
                 max_attempts=10,
                 req_kw=None):
    if req_kw is None:
        req_kw = {}
    if start == end:
        # When these match, we would make a request with `range=start-end - 1`
        # According to RFC2616, servers are supposed to ignore the Range
        # field when it's invalid like this. S3 does ignore it, moto doesn't.
        # To avoid differences in behavior under mocking, we just avoid
        # making these requests. It's hoped that since we're being called
        # from a caching object, this won't end up mattering.
        logger.debug(
            'skip fetch for negative range - bucket=%s,key=%s,start=%d,end=%d',
            bucket, key, start, end)
        return b''
    logger.debug("Fetch: %s/%s, %s-%s", bucket, key, start, end)
    for i in range(max_attempts):
        try:
            if version_id is not None:
                kwargs = dict({'VersionId': version_id}, **req_kw)
            else:
                kwargs = req_kw
            resp = client.get_object(Bucket=bucket,
                                     Key=key,
                                     Range='bytes=%i-%i' % (start, end - 1),
                                     **kwargs)
            return resp['Body'].read()
        except S3_RETRYABLE_ERRORS as e:
            logger.debug('Exception %r on S3 download, retrying',
                         e,
                         exc_info=True)
            time.sleep(1.7**i * 0.1)
            continue
        except ConnectionError as e:
            logger.debug('ConnectionError %r on S3 download, retrying',
                         e,
                         exc_info=True)
            time.sleep(1.7**i * 0.1)
            continue
        except ClientError as e:
            if e.response['Error'].get('Code',
                                       'Unknown') in ['416', 'InvalidRange']:
                return b''
            raise translate_boto_error(e)
        except Exception as e:
            if 'time' in str(e).lower():  # Actual exception type changes often
                continue
            else:
                raise
    raise RuntimeError("Max number of S3 retries exceeded")
Ejemplo n.º 10
0
 def touch(self, path, truncate=True, data=None, **kwargs):
     """Create empty file or truncate"""
     bucket, key = split_path(path)
     if not truncate and self.exists(path):
         raise ValueError("S3 does not support touching existent files")
     try:
         self._call_s3(self.s3.put_object, kwargs, Bucket=bucket, Key=key)
     except ClientError as ex:
         raise translate_boto_error(ex)
     self.invalidate_cache(self._parent(path))
Ejemplo n.º 11
0
def export_artifacts(experiment: Dict[str, str], report_path: str,
                     experiment_output_directory: str,
                     export_base_path: str) -> None:
    """Save the experiment artifacts to the `bench_export_directory`.

    experiment: experiment dict that contains "dataset_name" (e.g. ames_housing),
        "experiment_name" (specified by user), and "config_path" (path to experiment config.
        Relative to ludwig/benchmarks/configs).
    report_path: path where the experiment metrics report is
        saved.
    experiment_output_directory: path where the model, data,
        and logs of the experiment are saved.
    export_base_path: remote or local path (directory) where artifacts are
        exported. (e.g. s3://benchmarking.us-west-2.ludwig.com/bench/ or your/local/bench/)
    """
    protocol, _ = fsspec.core.split_protocol(export_base_path)
    fs, _ = get_fs_and_path(export_base_path)
    try:
        export_full_path = os.path.join(export_base_path,
                                        experiment["dataset_name"],
                                        experiment["experiment_name"])
        fs.put(report_path,
               os.path.join(export_full_path, REPORT_JSON),
               recursive=True)
        fs.put(
            os.path.join("configs", experiment["config_path"]),
            os.path.join(export_full_path, CONFIG_YAML),
            recursive=True,
        )
        fs.put(
            os.path.join(experiment["dataset_name"], EXPERIMENT_RUN, "model",
                         MODEL_HYPERPARAMETERS_FILE_NAME),
            os.path.join(export_full_path, MODEL_HYPERPARAMETERS_FILE_NAME),
            recursive=True,
        )

        # zip experiment directory to export
        try:
            shutil.make_archive("artifacts", "zip",
                                experiment_output_directory)
            fs.put("artifacts.zip",
                   os.path.join(export_full_path, "artifacts.zip"),
                   recursive=True)
            os.remove("artifacts.zip")
        except Exception as e:
            logging.error(
                f"Couldn't export '{experiment_output_directory}' to bucket")
            logging.error(e)

        print("Uploaded metrics report and experiment config to\n\t",
              export_full_path)
    except ClientError as e:
        logging.error(translate_boto_error(e))
Ejemplo n.º 12
0
 def copy_managed(self, path1, path2, **kwargs):
     buc1, key1 = split_path(path1)
     buc2, key2 = split_path(path2)
     copy_source = {'Bucket': buc1, 'Key': key1}
     try:
         self.s3.copy(CopySource=copy_source,
                      Bucket=buc2,
                      Key=key2,
                      ExtraArgs=self._get_s3_method_kwargs(
                          self.s3.copy_object, kwargs))
     except ClientError as e:
         raise translate_boto_error(e)
     except ParamValidationError as e:
         raise ValueError('Copy failed (%r -> %r): %s' % (path1, path2, e))
Ejemplo n.º 13
0
 def copy_basic(self, path1, path2, **kwargs):
     """ Copy file between locations on S3 """
     buc1, key1 = split_path(path1)
     buc2, key2 = split_path(path2)
     try:
         self._call_s3(self.s3.copy_object,
                       kwargs,
                       Bucket=buc2,
                       Key=key2,
                       CopySource='/'.join([buc1, key1]))
     except ClientError as e:
         raise translate_boto_error(e)
     except ParamValidationError as e:
         raise ValueError('Copy failed (%r -> %r): %s' % (path1, path2, e))
Ejemplo n.º 14
0
    def rm(self, path, recursive=False, **kwargs):
        """
        Remove keys and/or bucket.

        Parameters
        ----------
        path : string
            The location to remove.
        recursive : bool (True)
            Whether to remove also all entries below, i.e., which are returned
            by `walk()`.
        """
        bucket, key = split_path(path)
        if recursive:
            files = self.find(path, maxdepth=None)
            if key and not files:
                raise FileNotFoundError(path)
            self.bulk_delete(files, **kwargs)
            if not key:
                self.rmdir(bucket)
            return
        if key:
            if not self.exists(path):
                raise FileNotFoundError(path)
            try:
                self._call_s3(self.s3.delete_object,
                              kwargs,
                              Bucket=bucket,
                              Key=key)
            except ClientError as e:
                raise translate_boto_error(e)
            self.invalidate_cache(self._parent(path))
        else:
            if self.exists(bucket):
                try:
                    self.s3.delete_bucket(Bucket=bucket)
                except BotoCoreError as e:
                    raise IOError('Delete bucket %r failed: %s' % (bucket, e))
                self.invalidate_cache(bucket)
                self.invalidate_cache('')
            else:
                raise FileNotFoundError(path)
Ejemplo n.º 15
0
 def mkdir(self, path, acl="", **kwargs):
     path = self._strip_protocol(path).rstrip('/')
     if not self._parent(path):
         if acl and acl not in buck_acls:
             raise ValueError('ACL not in %s', buck_acls)
         try:
             params = {"Bucket": path, 'ACL': acl}
             region_name = (kwargs.get("region_name", None)
                            or self.client_kwargs.get("region_name", None))
             if region_name:
                 params['CreateBucketConfiguration'] = {
                     'LocationConstraint': region_name
                 }
             self.s3.create_bucket(**params)
             self.invalidate_cache('')
             self.invalidate_cache(path)
         except ClientError as e:
             raise translate_boto_error(e)
         except ParamValidationError as e:
             raise ValueError('Bucket create failed %r: %s' % (path, e))