Esempio n. 1
0
def read_entityset(path, profile_name=None, **kwargs):
    '''Read entityset from disk, S3 path, or URL.

        Args:
            path (str): Directory on disk, S3 path, or URL to read `data_description.json`.
            profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
                Set to False to use an anonymous profile.
            kwargs (keywords): Additional keyword arguments to pass as keyword arguments to the underlying deserialization method.
    '''
    if _is_url(path) or _is_s3(path):
        with tempfile.TemporaryDirectory() as tmpdir:
            file_name = Path(path).name
            file_path = os.path.join(tmpdir, file_name)
            transport_params = None

            if _is_s3(path):
                transport_params = get_transport_params(profile_name)

            use_smartopen_es(file_path, path, transport_params)
            with tarfile.open(str(file_path)) as tar:
                tar.extractall(path=tmpdir)

            data_description = read_data_description(tmpdir)
            return description_to_entityset(data_description, **kwargs)
    else:
        data_description = read_data_description(path)
        return description_to_entityset(data_description, **kwargs)
Esempio n. 2
0
def write_data_description(entityset, path, profile_name=None, **kwargs):
    """Serialize entityset to data description and write to disk or S3 path.

    Args:
        entityset (EntitySet) : Instance of :class:`.EntitySet`.
        path (str) : Location on disk or S3 path to write `data_description.json` and dataframe data.
        profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
            Set to False to use an anonymous profile.
        kwargs (keywords) : Additional keyword arguments to pass as keywords arguments to the underlying serialization method or to specify AWS profile.
    """
    if _is_s3(path):
        with tempfile.TemporaryDirectory() as tmpdir:
            os.makedirs(os.path.join(tmpdir, "data"))
            dump_data_description(entityset, tmpdir, **kwargs)
            file_path = create_archive(tmpdir)

            transport_params = get_transport_params(profile_name)
            use_smartopen_es(file_path,
                             path,
                             read=False,
                             transport_params=transport_params)
    elif _is_url(path):
        raise ValueError("Writing to URLs is not supported")
    else:
        path = os.path.abspath(path)
        os.makedirs(os.path.join(path, "data"), exist_ok=True)
        dump_data_description(entityset, path, **kwargs)
Esempio n. 3
0
def write_data_description(entityset, path, profile_name=None, **kwargs):
    '''Serialize entityset to data description and write to disk or S3 path.

    Args:
        entityset (EntitySet) : Instance of :class:`.EntitySet`.
        path (str) : Location on disk or S3 path to write `data_description.json` and entity data.
        profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
            Set to False to use an anonymous profile.
        kwargs (keywords) : Additional keyword arguments to pass as keywords arguments to the underlying serialization method or to specify AWS profile.
    '''
    if _is_s3(path):
        boto3 = import_or_raise("boto3", BOTO3_ERR_MSG)

        with tempfile.TemporaryDirectory() as tmpdir:
            os.makedirs(os.path.join(tmpdir, 'data'))
            dump_data_description(entityset, tmpdir, **kwargs)
            file_path = create_archive(tmpdir)

            transport_params = {}
            session = boto3.Session()
            if isinstance(profile_name, str):
                transport_params = {'session': boto3.Session(profile_name=profile_name)}
                use_smartopen_es(file_path, path, transport_params, read=False)
            elif profile_name is False:
                use_s3fs_es(file_path, path, read=False)
            elif session.get_credentials() is not None:
                use_smartopen_es(file_path, path, read=False)
            else:
                use_s3fs_es(file_path, path, read=False)
    elif _is_url(path):
        raise ValueError("Writing to URLs is not supported")
    else:
        path = os.path.abspath(path)
        os.makedirs(os.path.join(path, 'data'), exist_ok=True)
        dump_data_description(entityset, path, **kwargs)
Esempio n. 4
0
def read_entityset(path, profile_name=None, **kwargs):
    '''Read entityset from disk, S3 path, or URL.

        Args:
            path (str): Directory on disk, S3 path, or URL to read `data_description.json`.
            profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
                Set to False to use an anonymous profile.
            kwargs (keywords): Additional keyword arguments to pass as keyword arguments to the underlying deserialization method.
    '''
    if _is_url(path) or _is_s3(path):
        boto3 = import_or_raise("boto3", BOTO3_ERR_MSG)

        with tempfile.TemporaryDirectory() as tmpdir:
            file_name = Path(path).name
            file_path = os.path.join(tmpdir, file_name)
            transport_params = {}
            session = boto3.Session()

            if _is_url(path):
                use_smartopen_es(file_path, path)
            elif isinstance(profile_name, str):
                transport_params = {'session': boto3.Session(profile_name=profile_name)}
                use_smartopen_es(file_path, path, transport_params)
            elif profile_name is False:
                use_s3fs_es(file_path, path)
            elif session.get_credentials() is not None:
                use_smartopen_es(file_path, path)
            else:
                use_s3fs_es(file_path, path)

            with tarfile.open(str(file_path)) as tar:
                tar.extractall(path=tmpdir)

            data_description = read_data_description(tmpdir)
            return description_to_entityset(data_description, **kwargs)
    else:
        data_description = read_data_description(path)
        return description_to_entityset(data_description, **kwargs)