Esempio n. 1
0
 def _configure_backing_store(self):
     try:
         backing_stores = []
         for bs in self.config['Backing Store']:
             if 'Type' in bs:
                 for key, item in bs.items():
                     bs[key] = _get_from_env(item)
                 if bs['Type'].lower() == 's3':
                     backing_stores.append(S3FS(
                         bs['Bucket'],
                         strict=False,
                         aws_access_key_id=bs.get('Key ID', None),
                         aws_secret_access_key=bs.get('Secret Key', None),
                         endpoint_url=bs.get('Endpoint URL', None)
                     ))
                 elif 'dav' in bs['Type'].lower():
                     if not webdav_available:
                         raise exceptions.NoWebdav("no webdavfs module was found")
                     if bs['Root'][0] != '/':
                         bs['Root'] = '/' + bs['Root']
                     backing_stores.append(WebDAVFS(
                         url=bs['Base URL'],
                         login=bs['Username'],
                         password=bs['Password'],
                         root=bs['Root']
                     ))
                 else:
                     _config_error("Unknown filesystem type.")
             else:
                 backing_stores.append(fs.open_fs(bs['URI'], create=True))
     except (KeyError, OSError, CreateFailed) as err:
         _config_error(err)
     return backing_stores
Esempio n. 2
0
def load_s3_filesystem(path, strict=False, config=None):
    """ Loads AWS s3 filesystem from a path

    :param path: A path to a folder on s3 bucket that will be the base folder in this filesystem
    :type path: str
    :param strict: If `True` the filesystem will be making additional checks to the s3. Default is `False`.
    :type strict: bool
    :param config: A configuration object with AWS credentials. By default is set to None and in this case the default
        configuration will be taken.
    :type config: SHConfig or None
    :return: A S3 filesystem object
    :rtype: fs_s3fs.S3FS
    """
    if not path.startswith('s3://'):
        raise ValueError(
            "AWS path has to start with s3:// but found '{}'".format(path))

    if config is None:
        config = SHConfig()

    path_chunks = path.split('/', 3)[2:]
    bucket_name = path_chunks[0]
    dir_path = path_chunks[1] if len(path_chunks) > 1 else '/'

    return S3FS(bucket_name=bucket_name,
                dir_path=dir_path,
                aws_access_key_id=config.aws_access_key_id
                if config.aws_access_key_id else None,
                aws_secret_access_key=config.aws_secret_access_key
                if config.aws_secret_access_key else None,
                strict=strict)
Esempio n. 3
0
 def test_upload_args(self):
     s3 = S3FS("foo", acl="acl", cache_control="cc")
     self.assertDictEqual(
         s3._get_upload_args("test.jpg"),
         {
             "ACL": "acl",
             "CacheControl": "cc",
             "ContentType": "image/jpeg"
         },
     )
     self.assertDictEqual(
         s3._get_upload_args("test.mp3"),
         {
             "ACL": "acl",
             "CacheControl": "cc",
             "ContentType": "audio/mpeg"
         },
     )
     self.assertDictEqual(
         s3._get_upload_args("test.json"),
         {
             "ACL": "acl",
             "CacheControl": "cc",
             "ContentType": "application/json"
         },
     )
     self.assertDictEqual(
         s3._get_upload_args("unknown.unknown"),
         {
             "ACL": "acl",
             "CacheControl": "cc",
             "ContentType": "binary/octet-stream"
         },
     )
def get_services(**options):
    """Instantiate an S3 filesystem service for loading and saving files from the ETL."""
    return {
        'fs': S3FS(options["bucket"],
                   aws_access_key_id=options["key"],
                   aws_secret_access_key=options["secret_key"],
                   endpoint_url=options["endpoint_url"],)
    }
Esempio n. 5
0
def get_s3fs(namespace):
    """
    Helper method to get_filesystem for a file system on S3
    """
    key_id = DJFS_SETTINGS.get('aws_access_key_id', None)
    key_secret = DJFS_SETTINGS.get('aws_secret_access_key', None)

    fullpath = namespace

    if 'prefix' in DJFS_SETTINGS:
        fullpath = os.path.join(DJFS_SETTINGS['prefix'], fullpath)
    s3fs = S3FS(DJFS_SETTINGS['bucket'],
                fullpath,
                aws_secret_access_key=key_secret,
                aws_access_key_id=key_id,
                acl=DJFS_SETTINGS.get('acl', None))

    def get_s3_url(self, filename, timeout=60):
        """
        Patch method to returns a signed S3 url for the given filename

        Note that this will return a url whether or not the requested file
        exsits.

        Arguments:
            self (obj): S3FS instance that this function has been patched onto
            filename (str): The name of the file we are retrieving a url for
            timeout (int): How long the url should be valid for; S3 enforces
                this limit

        Returns:
            str: A signed url to the requested file in S3
        """
        global S3CONN

        try:
            if not S3CONN:
                S3CONN = S3Connection(aws_access_key_id=key_id,
                                      aws_secret_access_key=key_secret)
            return S3CONN.generate_url(timeout,
                                       'GET',
                                       bucket=DJFS_SETTINGS['bucket'],
                                       key=os.path.join(fullpath, filename))
        except Exception:  # pylint: disable=broad-except
            # Retry on error; typically, if the connection has timed out, but
            # the broad except covers all errors.
            S3CONN = S3Connection(aws_access_key_id=key_id,
                                  aws_secret_access_key=key_secret)

            return S3CONN.generate_url(timeout,
                                       'GET',
                                       bucket=DJFS_SETTINGS['bucket'],
                                       key=os.path.join(fullpath, filename))

    s3fs = patch_fs(s3fs, namespace, get_s3_url)
    return s3fs
Esempio n. 6
0
def _resolve_neural_files_bom(neural_files_or_bom: list = None):
    """
    This function is typically used internally by map_video_to_neural_and_sleep_state(...),

    Use save_neural_files_bom to create the CSV once.

    This function resolves a list of neural filenames, or a CSV bill of materials containing a list of the neural
    files with their sizes and ecube timestamps to a list of (ecube_time, file_size, neural_filename)

    :param neural_files_or_bom: a list of neural files (non-globs), or a list of a single CSV file which is the
                                bill of materials (BOM) CSV file containing a list of all neural data files in format:
                                ecube_time, file_size, neural_filename
    :return: list in the form [(ecube_time, file_size, neural_filename), (...), ...]
    """
    assert neural_files_or_bom is not None and len(
        neural_files_or_bom) > 0, 'No neural files found.'
    uses_s3 = any([f.startswith('s3://') for f in neural_files_or_bom])
    if uses_s3:
        _verify_s3_support()

    if len(neural_files_or_bom) == 1 and neural_files_or_bom[0].endswith(
            '.csv'):
        with open(neural_files_or_bom[0], 'r') as csv_file:
            csv_reader = csv.reader(csv_file)
            result = [tuple(row) for row in csv_reader]
    else:
        result = []
        for nfile in neural_files_or_bom:
            if nfile.startswith('s3://'):
                o = urllib.parse.urlparse(nfile)
                bucket = o.netloc
                key = o.path
                with S3FS(bucket,
                          endpoint_url=os.environ.get('ENDPOINT_URL', None),
                          strict=False) as s3fs:
                    s3f = s3fs.openbin(key)
                    ecube_time = np.frombuffer(s3f.read(8), dtype=np.uint64)[0]
                    file_size = s3f.size
            else:
                with open(nfile, 'rb') as f:
                    ecube_time = np.fromfile(f, dtype=np.uint64, count=1)[0]
                    file_size = os.fstat(f.fileno()).st_size

            filename = os.path.split(nfile)[-1]
            result.append((ecube_time, file_size, filename))

    return result
Esempio n. 7
0
def _create_new_s3_fs():
    """Creates a new empty mocked s3 bucket. If one such bucket already exists it deletes it first."""
    bucket_name = "mocked-test-bucket"
    s3resource = boto3.resource("s3", region_name="eu-central-1")

    bucket = s3resource.Bucket(bucket_name)

    if bucket.creation_date:  # If bucket already exists
        for key in bucket.objects.all():
            key.delete()
        bucket.delete()

    s3resource.create_bucket(
        Bucket=bucket_name,
        CreateBucketConfiguration={"LocationConstraint": "eu-central-1"})

    return S3FS(bucket_name=bucket_name)
Esempio n. 8
0
def _resolve_glob(file_glob):
    if file_glob.startswith('s3://'):
        _verify_s3_support()
        o = urllib.parse.urlparse(file_glob)
        bucket = o.netloc
        key = o.path

        s3fs = S3FS(bucket,
                    endpoint_url=os.environ.get('ENDPOINT_URL', None),
                    strict=False)
        result = [
            's3://{}{}'.format(bucket, match.path) for match in s3fs.glob(key)
        ]
    else:
        result = glob.glob(file_glob)

    return result
 def __enter__(self) -> S3FS:
     AwsSessionHook.__enter__(self)
     self.bucket = self.conn_params.extra['bucket']
     self.base_path = self.conn_params.extra.get('base_path')
     if self.conn_params.login and self.conn_params.password:
         kwargs = {
             'aws_access_key_id': self.conn_params.login,
             'aws_secret_access_key': self.conn_params.password
         }
     elif self.session:
         # Get session token
         client = self.session.client('sts')
         session_token = client.get_session_token()
         kwargs = {'aws_session_token': session_token}
     else:
         kwargs = {}
     self.conn = S3FS(self.bucket, dir_path=self.base_path, **kwargs)
     return self.conn
Esempio n. 10
0
 def _configure_backing_store(self):
     try:
         bs = self.config['Backing Store']
         if 'Type' in bs:
             for key, item in bs.items():
                 bs[key] = _get_from_env(item)
             if bs['Type'].lower() == 's3':
                 return S3FS(
                     bs['Bucket'],
                     strict=False,
                     aws_access_key_id=bs.get('Key ID', None),
                     aws_secret_access_key=bs.get('Secret Key', None),
                     endpoint_url=bs.get('Endpoint URL', None)
                 )
         else:
             return fs.open_fs(bs['URI'], create=True)
     except (KeyError, OSError, CreateFailed) as err:
         _config_error(err)
Esempio n. 11
0
def open_fs(fs_url, **kwargs):
    """Open a pyfs filesystem.

    Like fs.open_fs will simply return FS if an instance if given as
    the fs_url parameter.
    """
    if isinstance(fs_url, fs.base.FS):
        return fs_url

    # Now assume a string that may be a path (no ://) or else a filesystem URL
    if "://" not in fs_url:
        # A path, assume this is not URI escaped which is what the OSFS(..)
        # creator assumes (as opposed to open_fs(..))
        return OSFS(fs_url, **kwargs)

    # We have a URL, parse it
    parse_result = fs.opener.parse(fs_url)
    if parse_result.protocol == 's3':
        # And S3 URL, mostly repeat
        # https://github.com/PyFilesystem/s3fs/blob/master/fs_s3fs/opener.py
        # but adjust the handling of strict to default to strict=False
        bucket_name, _, dir_path = parse_result.resource.partition("/")
        if not bucket_name:
            raise fs.opener.errors.OpenerError(
                "invalid bucket name in '{}'".format(fs_url))
        # Instead of allowing this to be turned on by a strict=1 in the
        # URL query params, allow it to be turned off by strict!=1
        strict = (parse_result.params["strict"] == "1"
                  if "strict" in parse_result.params else False)
        s3fs = S3FS(bucket_name,
                    dir_path=dir_path or "/",
                    aws_access_key_id=parse_result.username or None,
                    aws_secret_access_key=parse_result.password or None,
                    endpoint_url=parse_result.params.get("endpoint_url", None),
                    acl=parse_result.params.get("acl", None),
                    cache_control=parse_result.params.get(
                        "cache_control", None),
                    strict=strict)
        # Patch in version of getinfo method that doesn't check parent directory
        s3fs.getinfo = s3fs._getinfo  # pylint: disable=protected-access
        return s3fs
    # Non-S3 URL
    return fs.open_fs(fs_url, **kwargs)
Esempio n. 12
0
def ocfl_opendir(pyfs, dir, **kwargs):
    """Open directory while handling the case of S3 without directory objects.

    FIXME - DIRTY HACK
    """
    if isinstance(pyfs, S3FS):
        # Hack for S3 because the standard opendir(..) fails when there
        # isn't a directory object (even with strict=False)
        new_dir_path = fs.path.join(pyfs.dir_path, dir)
        s3fs = S3FS(
            pyfs._bucket_name,  # pylint: disable=protected-access
            dir_path=new_dir_path,
            aws_access_key_id=pyfs.aws_access_key_id,
            aws_secret_access_key=pyfs.aws_secret_access_key,
            endpoint_url=pyfs.endpoint_url,
            # acl=pyfs.acl,
            # cache_control=pyfs.cache_control),
            strict=pyfs.strict)
        # Patch in version of getinfo method that doesn't check parent directory
        s3fs.getinfo = s3fs._getinfo  # pylint: disable=protected-access
        return s3fs
    # Not S3, just use regular opendir(..)
    return pyfs.opendir(dir, **kwargs)
Esempio n. 13
0
 def test_upload_args(self):
     s3 = S3FS('foo', acl='acl', cache_control='cc')
     self.assertDictEqual(s3._get_upload_args('test.jpg'), {
         'ACL': 'acl',
         'CacheControl': 'cc',
         'ContentType': 'image/jpeg'
     })
     self.assertDictEqual(s3._get_upload_args('test.mp3'), {
         'ACL': 'acl',
         'CacheControl': 'cc',
         'ContentType': 'audio/mpeg'
     })
     self.assertDictEqual(s3._get_upload_args('test.json'), {
         'ACL': 'acl',
         'CacheControl': 'cc',
         'ContentType': 'application/json'
     })
     self.assertDictEqual(
         s3._get_upload_args('unknown.unknown'), {
             'ACL': 'acl',
             'CacheControl': 'cc',
             'ContentType': 'binary/octet-stream'
         })
Esempio n. 14
0
def _open_fs(directory):
    if directory.startswith("s3://"):
        """Manually fetch the permissions from the environment

        Requires the following env variables:
        - S3_ACCESS_KEY
        - S3_SECRET_KEY
        - S3_URL
        """
        from fs_s3fs import S3FS
        if not directory.endswith("/"):
            directory += "/"

        bucket, fpath = directory[len("s3://"):].split("/", 1)
        return S3FS(bucket,
                    dir_path=fpath,
                    aws_access_key_id=os.environ.get("S3_ACCESS_KEY", None),
                    aws_secret_access_key=os.environ.get(
                        'S3_SECRET_KEY', None),
                    strict=False,
                    endpoint_url=os.environ.get('S3_URL', None))
    else:
        return open_fs(directory)
Esempio n. 15
0
def load_s3_filesystem(path: str,
                       strict: bool = False,
                       config: Optional[SHConfig] = None,
                       aws_profile: Optional[str] = None) -> S3FS:
    """Loads AWS s3 filesystem from a path.

    :param path: A path to a folder on s3 bucket that will be the base folder in this filesystem
    :type path: str
    :param strict: If `True` the filesystem will be making additional checks to the s3. Default is `False`.
    :type strict: bool
    :param config: A configuration object with AWS credentials. By default is set to None and in this case the default
        configuration will be taken.
    :type config: SHConfig or None
    :param aws_profile: A name of AWS profile. If given, AWS credentials will be taken from there.
    :return: A S3 filesystem object
    :rtype: fs_s3fs.S3FS
    """
    if not is_s3_path(path):
        raise ValueError(
            f"AWS path has to start with s3:// but found '{path}'.")

    config = config or SHConfig()
    if aws_profile:
        config = get_aws_credentials(aws_profile, config=config)

    path_chunks = path.split("/", 3)[2:]
    bucket_name = path_chunks[0]
    dir_path = path_chunks[1] if len(path_chunks) > 1 else "/"

    return S3FS(
        bucket_name=bucket_name,
        dir_path=dir_path,
        aws_access_key_id=config.aws_access_key_id or None,
        aws_secret_access_key=config.aws_secret_access_key or None,
        aws_session_token=config.aws_session_token or None,
        strict=strict,
    )
Esempio n. 16
0
 def test_path_to_key_subdir(self):
     s3 = S3FS("foo", "/dir")
     self.assertEqual(s3._path_to_key("foo.bar"), "dir/foo.bar")
     self.assertEqual(s3._path_to_key("foo/bar"), "dir/foo/bar")
Esempio n. 17
0
 def test_path_to_key_subdir(self):
     s3 = S3FS('foo', '/dir')
     self.assertEqual(s3._path_to_key('foo.bar'), 'dir/foo.bar')
     self.assertEqual(s3._path_to_key('foo/bar'), 'dir/foo/bar')
Esempio n. 18
0
 def test_path_to_key(self):
     s3 = S3FS('foo')
     self.assertEqual(s3._path_to_key('foo.bar'), 'foo.bar')
     self.assertEqual(s3._path_to_key('foo/bar'), 'foo/bar')
Esempio n. 19
0
 def make_fs(self):
     self._delete_bucket_contents()
     self.s3.Object(self.bucket_name, 'subdirectory').put()
     return S3FS(self.bucket_name, dir_path='subdirectory')
Esempio n. 20
0
 def make_fs(self):
     self._delete_bucket_contents()
     return S3FS(self.bucket_name)
Esempio n. 21
0
    config = get_aws_credentials("default", config=default_config)
    assert config.aws_access_key_id != default_config.aws_access_key_id
    assert config.aws_secret_access_key != default_config.aws_secret_access_key


@pytest.mark.parametrize(
    argnames="path_parts, expected_path",
    ids=["local", "s3"],
    argvalues=[
        (["/tmp", "folder", "xyz", "..",
          "file.json"], os.path.join("/tmp", "folder", "file.json")),
        (["s3://xx/", "/y/z", "a", "..", "b.json"], "s3://xx/y/z/b.json"),
    ],
)
def test_join_path(path_parts, expected_path):
    assert join_path(*path_parts) == expected_path


@pytest.mark.parametrize(
    "filesystem, path, expected_full_path",
    [
        (OSFS("/tmp"), "my/folder", "/tmp/my/folder"),
        (S3FS(bucket_name="data", dir_path="/folder"), "/sub/folder",
         "s3://data/folder/sub/folder"),
        (S3FS(bucket_name="data"), "/sub/folder", "s3://data/sub/folder"),
    ],
)
def test_get_full_path(filesystem, path, expected_full_path):
    full_path = get_full_path(filesystem, path)
    assert full_path == expected_full_path
Esempio n. 22
0
def prepare_filesystem(config: BaseConfig) -> S3FS:
    return S3FS(bucket_name=config.bucket_name,
                aws_access_key_id=config.aws_access_key_id,
                aws_secret_access_key=config.aws_secret_access_key,
                region=config.aws_region)
Esempio n. 23
0
File: s3.py Progetto: msauria/galaxy
 def _open_fs(self, user_context):
     props = self._serialization_props(user_context)
     handle = S3FS(**props)
     return handle
Esempio n. 24
0
File: t.py Progetto: ptzagk/s3fs
from fs_s3fs import S3FS
s3fs = S3FS(u'fsexample')
print(s3fs)

with s3fs.openbin(u'test.bin', u'w') as f:
    f.write(b'a')
    f.write(b'b')
    f.write(b'c')

print s3fs.getinfo(u'test.bin', namespaces=['s3']).raw

import io
f = io.BytesIO(b'Hello, World')
s3fs.setbinfile(u'b', f)

print(s3fs.geturl(u'b'))
s3fs.makedir(u'foo', recreate=True)
print(s3fs.geturl(u'/foo'))

s3fs.settext(u'/foo/bar', u'Hello')

s3fs = S3FS(u'fsexample', dir_path='foo')
print(s3fs)
print(s3fs._prefix)
print(s3fs.listdir(u'/'))
print(s3fs._path_to_dir_key(u'/'))
print(s3fs._path_to_dir_key(u''))
print(s3fs._path_to_dir_key(u'bar'))
print(s3fs._path_to_dir_key(u'/bar'))

# f = s3fs.openbin(u'newfile', 'ab')
Esempio n. 25
0
 def test_path_to_key(self):
     s3 = S3FS("foo")
     self.assertEqual(s3._path_to_key("foo.bar"), "foo.bar")
     self.assertEqual(s3._path_to_key("foo/bar"), "foo/bar")
Esempio n. 26
0
def _prepare_filesystem(sampling_config: SamplingConfig) -> S3FS:
    return S3FS(bucket_name=sampling_config.bucket_name,
                aws_access_key_id=sampling_config.aws_access_key_id,
                aws_secret_access_key=sampling_config.aws_secret_access_key,
                region=sampling_config.aws_region)
Esempio n. 27
0
import os
import json
import datetime
from fs_s3fs import S3FS
from airtable import Airtable
from airtable_local_backup import restore


base = os.environ['ATDB']
table_name = os.environ['TABLE']
dokey = os.environ['DOKEY']
dosecret = os.environ['DOSECRET']
endpoint_url = os.environ['URL']
bucket = os.environ['BUCKET']
prefix = 'testrestore-{}/'.format(datetime.datetime.now())

table = Airtable(base_key=base, table_name=table_name)
space = S3FS(bucket, endpoint_url=endpoint_url, aws_access_key_id=dokey,
             aws_secret_access_key=dosecret)

with open('tests/lots_of_fields.json', 'r') as jsonfile:
    tabledata = json.load(jsonfile)

records = restore.prepare_records(tabledata, s3fs=space, check_integrity=True,
                                  prefix=prefix)

for rec in records:
    table.insert(rec)