Esempi in Python per S3FS, esempi in Python per fs_s3fs.S3FS

Esempio n. 1

0

Mostra file

def load_model(filesystem: S3FS, config: PredictionConfig) -> ResUnetA:
    """ Copy the model locally if not existing and load it """
    if not os.path.exists(f'{config.temp_model_path}/{config.model_name}'):
        if not filesystem.exists(
                f'{config.model_path}/{config.model_name}/checkpoints/'):
            filesystem.makedirs(
                f'{config.model_path}/{config.model_name}/checkpoints/')
        copy_dir(filesystem,
                 f'{config.model_path}/{config.model_name}/checkpoints/',
                 f'{config.temp_model_path}/{config.model_name}',
                 'checkpoints')
        copy_file(filesystem,
                  f'{config.model_path}/{config.model_name}/model_cfg.json',
                  f'{config.temp_model_path}/{config.model_name}',
                  'model_cfg.json')

    input_shape = dict(
        features=[None, config.height, config.width, config.n_channels])

    with open(f'{config.temp_model_path}/{config.model_name}/model_cfg.json',
              'r') as jfile:
        model_cfg = json.load(jfile)

    # initialise model from config, build, compile and load trained weights
    model = ResUnetA(model_cfg)
    model.build(input_shape)
    model.net.compile()
    model.net.load_weights(
        f'{config.temp_model_path}/{config.model_name}/checkpoints/model.ckpt')

    return model

Esempio n. 2

0

Mostra file

def load_metadata(filesystem: S3FS, config: PredictionConfig) -> pd.DataFrame:
    """ Load DataFrame with info about normalisation factors """
    metadata_dir = os.path.dirname(config.metadata_path)
    if not filesystem.exists(metadata_dir):
        filesystem.makedirs(metadata_dir)

    df = pd.read_csv(filesystem.open(f'{config.metadata_path}'))

    normalisation_factors = df.groupby(
        pd.to_datetime(df.timestamp).dt.to_period("M")).max()

    normalisation_factors['month'] = pd.to_datetime(
        normalisation_factors.timestamp).dt.month

    return normalisation_factors

Esempio n. 3

0

Mostra file

File: runner.py Progetto: rickh94/lp_backup

 def _configure_backing_store(self):
     try:
         backing_stores = []
         for bs in self.config['Backing Store']:
             if 'Type' in bs:
                 for key, item in bs.items():
                     bs[key] = _get_from_env(item)
                 if bs['Type'].lower() == 's3':
                     backing_stores.append(S3FS(
                         bs['Bucket'],
                         strict=False,
                         aws_access_key_id=bs.get('Key ID', None),
                         aws_secret_access_key=bs.get('Secret Key', None),
                         endpoint_url=bs.get('Endpoint URL', None)
                     ))
                 elif 'dav' in bs['Type'].lower():
                     if not webdav_available:
                         raise exceptions.NoWebdav("no webdavfs module was found")
                     if bs['Root'][0] != '/':
                         bs['Root'] = '/' + bs['Root']
                     backing_stores.append(WebDAVFS(
                         url=bs['Base URL'],
                         login=bs['Username'],
                         password=bs['Password'],
                         root=bs['Root']
                     ))
                 else:
                     _config_error("Unknown filesystem type.")
             else:
                 backing_stores.append(fs.open_fs(bs['URI'], create=True))
     except (KeyError, OSError, CreateFailed) as err:
         _config_error(err)
     return backing_stores

Esempio n. 4

0

Mostra file

File: tf_data_utils.py Progetto: xolotl18/field-delineation

def _construct_norm_arrays(file_path: str, metadata_path: str, fold: int = None, filesystem: S3FS = None) -> \
        Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
    """ Return arrays with normalisation factors to be used """
    chunk_name = os.path.basename(file_path)

    df = pd.read_csv(
        filesystem.open(metadata_path)
    ) if filesystem is not None else pd.read_csv(metadata_path)

    df = df[df.chunk == chunk_name]

    if fold is not None:
        df = df[df.fold == fold]

    perc99 = df[[
        'norm_perc99_b0', 'norm_perc99_b1', 'norm_perc99_b2', 'norm_perc99_b3'
    ]].values
    meanstd_mean = df[[
        'norm_meanstd_mean_b0', 'norm_meanstd_mean_b1', 'norm_meanstd_mean_b2',
        'norm_meanstd_mean_b3'
    ]].values
    meanstd_median = df[[
        'norm_meanstd_median_b0', 'norm_meanstd_median_b1',
        'norm_meanstd_median_b2', 'norm_meanstd_median_b3'
    ]].values
    meanstd_std = df[[
        'norm_meanstd_std_b0', 'norm_meanstd_std_b1', 'norm_meanstd_std_b2',
        'norm_meanstd_std_b3'
    ]].values

    return perc99, meanstd_mean, meanstd_median, meanstd_std

Esempio n. 5

0

Mostra file

File: fs_utils.py Progetto: xolotl18/eo-learn

def load_s3_filesystem(path, strict=False, config=None):
    """ Loads AWS s3 filesystem from a path

    :param path: A path to a folder on s3 bucket that will be the base folder in this filesystem
    :type path: str
    :param strict: If `True` the filesystem will be making additional checks to the s3. Default is `False`.
    :type strict: bool
    :param config: A configuration object with AWS credentials. By default is set to None and in this case the default
        configuration will be taken.
    :type config: SHConfig or None
    :return: A S3 filesystem object
    :rtype: fs_s3fs.S3FS
    """
    if not path.startswith('s3://'):
        raise ValueError(
            "AWS path has to start with s3:// but found '{}'".format(path))

    if config is None:
        config = SHConfig()

    path_chunks = path.split('/', 3)[2:]
    bucket_name = path_chunks[0]
    dir_path = path_chunks[1] if len(path_chunks) > 1 else '/'

    return S3FS(bucket_name=bucket_name,
                dir_path=dir_path,
                aws_access_key_id=config.aws_access_key_id
                if config.aws_access_key_id else None,
                aws_secret_access_key=config.aws_secret_access_key
                if config.aws_secret_access_key else None,
                strict=strict)

Esempio n. 6

0

Mostra file

File: test_s3fs.py Progetto: priestd09/s3fs-1

 def test_upload_args(self):
     s3 = S3FS("foo", acl="acl", cache_control="cc")
     self.assertDictEqual(
         s3._get_upload_args("test.jpg"),
         {
             "ACL": "acl",
             "CacheControl": "cc",
             "ContentType": "image/jpeg"
         },
     )
     self.assertDictEqual(
         s3._get_upload_args("test.mp3"),
         {
             "ACL": "acl",
             "CacheControl": "cc",
             "ContentType": "audio/mpeg"
         },
     )
     self.assertDictEqual(
         s3._get_upload_args("test.json"),
         {
             "ACL": "acl",
             "CacheControl": "cc",
             "ContentType": "application/json"
         },
     )
     self.assertDictEqual(
         s3._get_upload_args("unknown.unknown"),
         {
             "ACL": "acl",
             "CacheControl": "cc",
             "ContentType": "binary/octet-stream"
         },
     )

Esempio n. 7

0

Mostra file

File: s3_etl_job.py Progetto: schmidtbri/etl-job-ml-model-deployment

def get_services(**options):
    """Instantiate an S3 filesystem service for loading and saving files from the ETL."""
    return {
        'fs': S3FS(options["bucket"],
                   aws_access_key_id=options["key"],
                   aws_secret_access_key=options["secret_key"],
                   endpoint_url=options["endpoint_url"],)
    }

Esempio n. 8

0

Mostra file

File: djpyfs.py Progetto: Learningtribes/django-pyfs

def get_s3fs(namespace):
    """
    Helper method to get_filesystem for a file system on S3
    """
    key_id = DJFS_SETTINGS.get('aws_access_key_id', None)
    key_secret = DJFS_SETTINGS.get('aws_secret_access_key', None)

    fullpath = namespace

    if 'prefix' in DJFS_SETTINGS:
        fullpath = os.path.join(DJFS_SETTINGS['prefix'], fullpath)
    s3fs = S3FS(DJFS_SETTINGS['bucket'],
                fullpath,
                aws_secret_access_key=key_secret,
                aws_access_key_id=key_id,
                acl=DJFS_SETTINGS.get('acl', None))

    def get_s3_url(self, filename, timeout=60):
        """
        Patch method to returns a signed S3 url for the given filename

        Note that this will return a url whether or not the requested file
        exsits.

        Arguments:
            self (obj): S3FS instance that this function has been patched onto
            filename (str): The name of the file we are retrieving a url for
            timeout (int): How long the url should be valid for; S3 enforces
                this limit

        Returns:
            str: A signed url to the requested file in S3
        """
        global S3CONN

        try:
            if not S3CONN:
                S3CONN = S3Connection(aws_access_key_id=key_id,
                                      aws_secret_access_key=key_secret)
            return S3CONN.generate_url(timeout,
                                       'GET',
                                       bucket=DJFS_SETTINGS['bucket'],
                                       key=os.path.join(fullpath, filename))
        except Exception:  # pylint: disable=broad-except
            # Retry on error; typically, if the connection has timed out, but
            # the broad except covers all errors.
            S3CONN = S3Connection(aws_access_key_id=key_id,
                                  aws_secret_access_key=key_secret)

            return S3CONN.generate_url(timeout,
                                       'GET',
                                       bucket=DJFS_SETTINGS['bucket'],
                                       key=os.path.join(fullpath, filename))

    s3fs = patch_fs(s3fs, namespace, get_s3_url)
    return s3fs

Esempio n. 9

0

Mostra file

def load_dates(filesystem: S3FS, tile_name: str) -> List[datetime]:
    """ Load a json file with dates from the bucket and parse out dates
    """
    path = f'/{tile_name}/userdata.json'

    with filesystem.open(path, 'r') as fp:
        userdata = json.load(fp)

    dates_list = json.loads(userdata['dates'])

    return [parse(date) for date in dates_list]

Esempio n. 10

0

Mostra file

File: ntk_sync.py Progetto: davidparks21/neuraltoolkit

def _resolve_neural_files_bom(neural_files_or_bom: list = None):
    """
    This function is typically used internally by map_video_to_neural_and_sleep_state(...),

    Use save_neural_files_bom to create the CSV once.

    This function resolves a list of neural filenames, or a CSV bill of materials containing a list of the neural
    files with their sizes and ecube timestamps to a list of (ecube_time, file_size, neural_filename)

    :param neural_files_or_bom: a list of neural files (non-globs), or a list of a single CSV file which is the
                                bill of materials (BOM) CSV file containing a list of all neural data files in format:
                                ecube_time, file_size, neural_filename
    :return: list in the form [(ecube_time, file_size, neural_filename), (...), ...]
    """
    assert neural_files_or_bom is not None and len(
        neural_files_or_bom) > 0, 'No neural files found.'
    uses_s3 = any([f.startswith('s3://') for f in neural_files_or_bom])
    if uses_s3:
        _verify_s3_support()

    if len(neural_files_or_bom) == 1 and neural_files_or_bom[0].endswith(
            '.csv'):
        with open(neural_files_or_bom[0], 'r') as csv_file:
            csv_reader = csv.reader(csv_file)
            result = [tuple(row) for row in csv_reader]
    else:
        result = []
        for nfile in neural_files_or_bom:
            if nfile.startswith('s3://'):
                o = urllib.parse.urlparse(nfile)
                bucket = o.netloc
                key = o.path
                with S3FS(bucket,
                          endpoint_url=os.environ.get('ENDPOINT_URL', None),
                          strict=False) as s3fs:
                    s3f = s3fs.openbin(key)
                    ecube_time = np.frombuffer(s3f.read(8), dtype=np.uint64)[0]
                    file_size = s3f.size
            else:
                with open(nfile, 'rb') as f:
                    ecube_time = np.fromfile(f, dtype=np.uint64, count=1)[0]
                    file_size = os.fstat(f.fileno()).st_size

            filename = os.path.split(nfile)[-1]
            result.append((ecube_time, file_size, filename))

    return result

Esempio n. 11

0

Mostra file

def _create_new_s3_fs():
    """Creates a new empty mocked s3 bucket. If one such bucket already exists it deletes it first."""
    bucket_name = "mocked-test-bucket"
    s3resource = boto3.resource("s3", region_name="eu-central-1")

    bucket = s3resource.Bucket(bucket_name)

    if bucket.creation_date:  # If bucket already exists
        for key in bucket.objects.all():
            key.delete()
        bucket.delete()

    s3resource.create_bucket(
        Bucket=bucket_name,
        CreateBucketConfiguration={"LocationConstraint": "eu-central-1"})

    return S3FS(bucket_name=bucket_name)

Esempio n. 12

0

Mostra file

File: ntk_sync.py Progetto: davidparks21/neuraltoolkit

def _resolve_glob(file_glob):
    if file_glob.startswith('s3://'):
        _verify_s3_support()
        o = urllib.parse.urlparse(file_glob)
        bucket = o.netloc
        key = o.path

        s3fs = S3FS(bucket,
                    endpoint_url=os.environ.get('ENDPOINT_URL', None),
                    strict=False)
        result = [
            's3://{}{}'.format(bucket, match.path) for match in s3fs.glob(key)
        ]
    else:
        result = glob.glob(file_glob)

    return result

Esempio n. 13

0

Mostra file

File: filesystem_hooks.py Progetto: typhoon-data-org/typhoon-orchestrator

 def __enter__(self) -> S3FS:
     AwsSessionHook.__enter__(self)
     self.bucket = self.conn_params.extra['bucket']
     self.base_path = self.conn_params.extra.get('base_path')
     if self.conn_params.login and self.conn_params.password:
         kwargs = {
             'aws_access_key_id': self.conn_params.login,
             'aws_secret_access_key': self.conn_params.password
         }
     elif self.session:
         # Get session token
         client = self.session.client('sts')
         session_token = client.get_session_token()
         kwargs = {'aws_session_token': session_token}
     else:
         kwargs = {}
     self.conn = S3FS(self.bucket, dir_path=self.base_path, **kwargs)
     return self.conn

Esempio n. 14

0

Mostra file

File: runner.py Progetto: stritti/airtable_local_backup

 def _configure_backing_store(self):
     try:
         bs = self.config['Backing Store']
         if 'Type' in bs:
             for key, item in bs.items():
                 bs[key] = _get_from_env(item)
             if bs['Type'].lower() == 's3':
                 return S3FS(
                     bs['Bucket'],
                     strict=False,
                     aws_access_key_id=bs.get('Key ID', None),
                     aws_secret_access_key=bs.get('Secret Key', None),
                     endpoint_url=bs.get('Endpoint URL', None)
                 )
         else:
             return fs.open_fs(bs['URI'], create=True)
     except (KeyError, OSError, CreateFailed) as err:
         _config_error(err)

Esempio n. 15

0

Mostra file

def open_fs(fs_url, **kwargs):
    """Open a pyfs filesystem.

    Like fs.open_fs will simply return FS if an instance if given as
    the fs_url parameter.
    """
    if isinstance(fs_url, fs.base.FS):
        return fs_url

    # Now assume a string that may be a path (no ://) or else a filesystem URL
    if "://" not in fs_url:
        # A path, assume this is not URI escaped which is what the OSFS(..)
        # creator assumes (as opposed to open_fs(..))
        return OSFS(fs_url, **kwargs)

    # We have a URL, parse it
    parse_result = fs.opener.parse(fs_url)
    if parse_result.protocol == 's3':
        # And S3 URL, mostly repeat
        # https://github.com/PyFilesystem/s3fs/blob/master/fs_s3fs/opener.py
        # but adjust the handling of strict to default to strict=False
        bucket_name, _, dir_path = parse_result.resource.partition("/")
        if not bucket_name:
            raise fs.opener.errors.OpenerError(
                "invalid bucket name in '{}'".format(fs_url))
        # Instead of allowing this to be turned on by a strict=1 in the
        # URL query params, allow it to be turned off by strict!=1
        strict = (parse_result.params["strict"] == "1"
                  if "strict" in parse_result.params else False)
        s3fs = S3FS(bucket_name,
                    dir_path=dir_path or "/",
                    aws_access_key_id=parse_result.username or None,
                    aws_secret_access_key=parse_result.password or None,
                    endpoint_url=parse_result.params.get("endpoint_url", None),
                    acl=parse_result.params.get("acl", None),
                    cache_control=parse_result.params.get(
                        "cache_control", None),
                    strict=strict)
        # Patch in version of getinfo method that doesn't check parent directory
        s3fs.getinfo = s3fs._getinfo  # pylint: disable=protected-access
        return s3fs
    # Non-S3 URL
    return fs.open_fs(fs_url, **kwargs)

Esempio n. 16

0

Mostra file

def ocfl_opendir(pyfs, dir, **kwargs):
    """Open directory while handling the case of S3 without directory objects.

    FIXME - DIRTY HACK
    """
    if isinstance(pyfs, S3FS):
        # Hack for S3 because the standard opendir(..) fails when there
        # isn't a directory object (even with strict=False)
        new_dir_path = fs.path.join(pyfs.dir_path, dir)
        s3fs = S3FS(
            pyfs._bucket_name,  # pylint: disable=protected-access
            dir_path=new_dir_path,
            aws_access_key_id=pyfs.aws_access_key_id,
            aws_secret_access_key=pyfs.aws_secret_access_key,
            endpoint_url=pyfs.endpoint_url,
            # acl=pyfs.acl,
            # cache_control=pyfs.cache_control),
            strict=pyfs.strict)
        # Patch in version of getinfo method that doesn't check parent directory
        s3fs.getinfo = s3fs._getinfo  # pylint: disable=protected-access
        return s3fs
    # Not S3, just use regular opendir(..)
    return pyfs.opendir(dir, **kwargs)

Esempio n. 17

0

Mostra file

File: test_s3fs.py Progetto: spatiallyenabled/s3fs

 def test_upload_args(self):
     s3 = S3FS('foo', acl='acl', cache_control='cc')
     self.assertDictEqual(s3._get_upload_args('test.jpg'), {
         'ACL': 'acl',
         'CacheControl': 'cc',
         'ContentType': 'image/jpeg'
     })
     self.assertDictEqual(s3._get_upload_args('test.mp3'), {
         'ACL': 'acl',
         'CacheControl': 'cc',
         'ContentType': 'audio/mpeg'
     })
     self.assertDictEqual(s3._get_upload_args('test.json'), {
         'ACL': 'acl',
         'CacheControl': 'cc',
         'ContentType': 'application/json'
     })
     self.assertDictEqual(
         s3._get_upload_args('unknown.unknown'), {
             'ACL': 'acl',
             'CacheControl': 'cc',
             'ContentType': 'binary/octet-stream'
         })

Esempio n. 18

0

Mostra file

def _open_fs(directory):
    if directory.startswith("s3://"):
        """Manually fetch the permissions from the environment

        Requires the following env variables:
        - S3_ACCESS_KEY
        - S3_SECRET_KEY
        - S3_URL
        """
        from fs_s3fs import S3FS
        if not directory.endswith("/"):
            directory += "/"

        bucket, fpath = directory[len("s3://"):].split("/", 1)
        return S3FS(bucket,
                    dir_path=fpath,
                    aws_access_key_id=os.environ.get("S3_ACCESS_KEY", None),
                    aws_secret_access_key=os.environ.get(
                        'S3_SECRET_KEY', None),
                    strict=False,
                    endpoint_url=os.environ.get('S3_URL', None))
    else:
        return open_fs(directory)

Esempio n. 19

0

Mostra file

File: fs.py Progetto: sentinel-hub/eo-learn

def load_s3_filesystem(path: str,
                       strict: bool = False,
                       config: Optional[SHConfig] = None,
                       aws_profile: Optional[str] = None) -> S3FS:
    """Loads AWS s3 filesystem from a path.

    :param path: A path to a folder on s3 bucket that will be the base folder in this filesystem
    :type path: str
    :param strict: If `True` the filesystem will be making additional checks to the s3. Default is `False`.
    :type strict: bool
    :param config: A configuration object with AWS credentials. By default is set to None and in this case the default
        configuration will be taken.
    :type config: SHConfig or None
    :param aws_profile: A name of AWS profile. If given, AWS credentials will be taken from there.
    :return: A S3 filesystem object
    :rtype: fs_s3fs.S3FS
    """
    if not is_s3_path(path):
        raise ValueError(
            f"AWS path has to start with s3:// but found '{path}'.")

    config = config or SHConfig()
    if aws_profile:
        config = get_aws_credentials(aws_profile, config=config)

    path_chunks = path.split("/", 3)[2:]
    bucket_name = path_chunks[0]
    dir_path = path_chunks[1] if len(path_chunks) > 1 else "/"

    return S3FS(
        bucket_name=bucket_name,
        dir_path=dir_path,
        aws_access_key_id=config.aws_access_key_id or None,
        aws_secret_access_key=config.aws_secret_access_key or None,
        aws_session_token=config.aws_session_token or None,
        strict=strict,
    )

Esempio n. 20

0

Mostra file

File: test_s3fs.py Progetto: priestd09/s3fs-1

 def test_path_to_key_subdir(self):
     s3 = S3FS("foo", "/dir")
     self.assertEqual(s3._path_to_key("foo.bar"), "dir/foo.bar")
     self.assertEqual(s3._path_to_key("foo/bar"), "dir/foo/bar")

Esempio n. 21

0

Mostra file

File: test_s3fs.py Progetto: spatiallyenabled/s3fs

 def test_path_to_key_subdir(self):
     s3 = S3FS('foo', '/dir')
     self.assertEqual(s3._path_to_key('foo.bar'), 'dir/foo.bar')
     self.assertEqual(s3._path_to_key('foo/bar'), 'dir/foo/bar')

Esempio n. 22

0

Mostra file

File: test_s3fs.py Progetto: spatiallyenabled/s3fs

 def test_path_to_key(self):
     s3 = S3FS('foo')
     self.assertEqual(s3._path_to_key('foo.bar'), 'foo.bar')
     self.assertEqual(s3._path_to_key('foo/bar'), 'foo/bar')

Esempio n. 23

0

Mostra file

File: test_s3fs.py Progetto: spatiallyenabled/s3fs

 def make_fs(self):
     self._delete_bucket_contents()
     self.s3.Object(self.bucket_name, 'subdirectory').put()
     return S3FS(self.bucket_name, dir_path='subdirectory')

Esempio n. 24

0

Mostra file

File: test_s3fs.py Progetto: spatiallyenabled/s3fs

 def make_fs(self):
     self._delete_bucket_contents()
     return S3FS(self.bucket_name)

Esempio n. 25

0

Mostra file

File: utils.py Progetto: xolotl18/field-delineation

def prepare_filesystem(config: BaseConfig) -> S3FS:
    return S3FS(bucket_name=config.bucket_name,
                aws_access_key_id=config.aws_access_key_id,
                aws_secret_access_key=config.aws_secret_access_key,
                region=config.aws_region)

Esempio n. 26

0

Mostra file

File: tf_data_utils.py Progetto: xolotl18/field-delineation

def npz_dir_dataset(file_dir_or_list: Union[str, List[str]],
                    features: dict,
                    metadata_path: str,
                    fold: int = None,
                    randomize: bool = True,
                    num_parallel: int = 5,
                    shuffle_size: int = 500,
                    filesystem: S3FS = None,
                    npz_from_s3: bool = False) -> tf.data.Dataset:
    """ Creates a tf.data.Dataset from a directory containing numpy .npz files.

    Files are loaded lazily when needed. `num_parallel` files are read in parallel and interleaved together.

    :param file_dir_or_list: directory containing .npz files or a list of paths to .npz files
    :param features: dict of (`field` -> `feature_name`) mappings, where `field` is the field in the .npz array
                   and `feature_name` is the name of the feature it is saved to.
    :param fold: in k-fold validation, fold to consider when querying the patchlet info dataframe
    :param randomize: whether to shuffle the samples of the dataset or not, defaults to `True`
    :param num_parallel: number of files to read in parallel and intereleave, defaults to 5
    :param shuffle_size: buffer size for shuffling file order, defaults to 500
    :param metadata_path: path to input csv files with patchlet information
    :param filesystem: filesystem to access bucket, defaults to None
    :param npz_from_s3: if True, npz files are loaded from S3 bucket, otherwise from local disk
    :return: dataset containing examples merged from files
    """

    files = file_dir_or_list

    if npz_from_s3:
        assert filesystem is not None

    # If dir, then list files
    if isinstance(file_dir_or_list, str):
        if filesystem and not filesystem.isdir(file_dir_or_list):
            filesystem.makedirs(file_dir_or_list)
        dir_list = os.listdir(
            file_dir_or_list) if not npz_from_s3 else filesystem.listdir(
                file_dir_or_list)
        files = [os.path.join(file_dir_or_list, f) for f in dir_list]

    fields = list(features.keys())

    # Read one file for shape info
    file = next(iter(files))
    data = np.load(file) if not npz_from_s3 else np.load(
        filesystem.openbin(file))
    np_arrays = [data[f] for f in fields]

    # Append norm arrays
    perc99, meanstd_mean, meanstd_median, meanstd_std = _construct_norm_arrays(
        file, metadata_path, fold, filesystem)

    np_arrays.append(perc99)
    np_arrays.append(meanstd_mean)
    np_arrays.append(meanstd_median)
    np_arrays.append(meanstd_std)

    # Read shape and type info
    #     types = tuple(arr.dtype for arr in np_arrays)
    types = (tf.uint16, tf.float32, tf.float32, tf.float32, tf.float64,
             tf.float64, tf.float64, tf.float64)
    shapes = tuple(arr.shape[1:] for arr in np_arrays)

    # Create datasets
    datasets = [
        _npz_file_lazy_dataset(file,
                               fields,
                               types,
                               shapes,
                               metadata_path,
                               fold=fold,
                               filesystem=filesystem,
                               npz_from_s3=npz_from_s3) for file in files
    ]
    ds = tf.data.Dataset.from_tensor_slices(datasets)

    # Shuffle files and interleave multiple files in parallel
    if randomize:
        ds = ds.shuffle(shuffle_size)

    ds = ds.interleave(lambda x: x, cycle_length=num_parallel)

    return ds

Esempio n. 27

0

Mostra file

File: s3.py Progetto: msauria/galaxy

 def _open_fs(self, user_context):
     props = self._serialization_props(user_context)
     handle = S3FS(**props)
     return handle

Esempio n. 28

0

Mostra file

File: t.py Progetto: ptzagk/s3fs

from fs_s3fs import S3FS
s3fs = S3FS(u'fsexample')
print(s3fs)

with s3fs.openbin(u'test.bin', u'w') as f:
    f.write(b'a')
    f.write(b'b')
    f.write(b'c')

print s3fs.getinfo(u'test.bin', namespaces=['s3']).raw

import io
f = io.BytesIO(b'Hello, World')
s3fs.setbinfile(u'b', f)

print(s3fs.geturl(u'b'))
s3fs.makedir(u'foo', recreate=True)
print(s3fs.geturl(u'/foo'))

s3fs.settext(u'/foo/bar', u'Hello')

s3fs = S3FS(u'fsexample', dir_path='foo')
print(s3fs)
print(s3fs._prefix)
print(s3fs.listdir(u'/'))
print(s3fs._path_to_dir_key(u'/'))
print(s3fs._path_to_dir_key(u''))
print(s3fs._path_to_dir_key(u'bar'))
print(s3fs._path_to_dir_key(u'/bar'))

# f = s3fs.openbin(u'newfile', 'ab')

Esempio n. 29

0

Mostra file

File: test_s3fs.py Progetto: priestd09/s3fs-1

 def test_path_to_key(self):
     s3 = S3FS("foo")
     self.assertEqual(s3._path_to_key("foo.bar"), "foo.bar")
     self.assertEqual(s3._path_to_key("foo/bar"), "foo/bar")

Esempio n. 30

0

Mostra file

def _prepare_filesystem(sampling_config: SamplingConfig) -> S3FS:
    return S3FS(bucket_name=sampling_config.bucket_name,
                aws_access_key_id=sampling_config.aws_access_key_id,
                aws_secret_access_key=sampling_config.aws_secret_access_key,
                region=sampling_config.aws_region)