Exemple #1
0
def read_entityset(path, profile_name=None, **kwargs):
    '''Read entityset from disk, S3 path, or URL.

        Args:
            path (str): Directory on disk, S3 path, or URL to read `data_description.json`.
            profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
                Set to False to use an anonymous profile.
            kwargs (keywords): Additional keyword arguments to pass as keyword arguments to the underlying deserialization method.
    '''
    if _is_url(path) or _is_s3(path) or _is_local_tar(str(path)):
        with tempfile.TemporaryDirectory() as tmpdir:
            local_path = path
            transport_params = None

            if _is_s3(path):
                transport_params = get_transport_params(profile_name)

            if _is_s3(path) or _is_url(path):
                local_path = os.path.join(tmpdir, "temporary_es")
                use_smartopen_es(local_path, path, transport_params)

            with tarfile.open(str(local_path)) as tar:
                tar.extractall(path=tmpdir)

            data_description = read_data_description(tmpdir)
            return description_to_entityset(data_description, **kwargs)
    else:
        data_description = read_data_description(path)
        return description_to_entityset(data_description, **kwargs)
 def save(self, location, profile_name):
     features_dict = self.to_dict()
     if location is None:
         return json.dumps(features_dict)
     if isinstance(location, str):
         transport_params = {}
         if _is_url(location):
             raise ValueError("Writing to URLs is not supported")
         if _is_s3(location):
             boto3 = import_or_raise("boto3", BOTO3_ERR_MSG)
             session = boto3.Session()
             if isinstance(profile_name, str):
                 transport_params = {'session': boto3.Session(profile_name=profile_name)}
                 use_smartopen_features(location, features_dict, transport_params, read=False)
             elif profile_name is False:
                 use_s3fs_features(location, features_dict, read=False)
             elif session.get_credentials() is not None:
                 use_smartopen_features(location, features_dict, read=False)
             else:
                 use_s3fs_features(location, features_dict, read=False)
         else:
             with open(location, "w") as f:
                 json.dump(features_dict, f)
     else:
         json.dump(features_dict, location)
Exemple #3
0
def write_data_description(entityset, path, profile_name=None, **kwargs):
    """Serialize entityset to data description and write to disk or S3 path.

    Args:
        entityset (EntitySet) : Instance of :class:`.EntitySet`.
        path (str) : Location on disk or S3 path to write `data_description.json` and dataframe data.
        profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
            Set to False to use an anonymous profile.
        kwargs (keywords) : Additional keyword arguments to pass as keywords arguments to the underlying serialization method or to specify AWS profile.
    """
    if _is_s3(path):
        with tempfile.TemporaryDirectory() as tmpdir:
            os.makedirs(os.path.join(tmpdir, "data"))
            dump_data_description(entityset, tmpdir, **kwargs)
            file_path = create_archive(tmpdir)

            transport_params = get_transport_params(profile_name)
            use_smartopen_es(file_path,
                             path,
                             read=False,
                             transport_params=transport_params)
    elif _is_url(path):
        raise ValueError("Writing to URLs is not supported")
    else:
        path = os.path.abspath(path)
        os.makedirs(os.path.join(path, "data"), exist_ok=True)
        dump_data_description(entityset, path, **kwargs)
Exemple #4
0
def write_data_description(entityset, path, profile_name=None, **kwargs):
    '''Serialize entityset to data description and write to disk or S3 path.

    Args:
        entityset (EntitySet) : Instance of :class:`.EntitySet`.
        path (str) : Location on disk or S3 path to write `data_description.json` and entity data.
        profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
            Set to False to use an anonymous profile.
        kwargs (keywords) : Additional keyword arguments to pass as keywords arguments to the underlying serialization method or to specify AWS profile.
    '''
    if _is_s3(path):
        boto3 = import_or_raise("boto3", BOTO3_ERR_MSG)

        with tempfile.TemporaryDirectory() as tmpdir:
            os.makedirs(os.path.join(tmpdir, 'data'))
            dump_data_description(entityset, tmpdir, **kwargs)
            file_path = create_archive(tmpdir)

            transport_params = {}
            session = boto3.Session()
            if isinstance(profile_name, str):
                transport_params = {'session': boto3.Session(profile_name=profile_name)}
                use_smartopen_es(file_path, path, transport_params, read=False)
            elif profile_name is False:
                use_s3fs_es(file_path, path, read=False)
            elif session.get_credentials() is not None:
                use_smartopen_es(file_path, path, read=False)
            else:
                use_s3fs_es(file_path, path, read=False)
    elif _is_url(path):
        raise ValueError("Writing to URLs is not supported")
    else:
        path = os.path.abspath(path)
        os.makedirs(os.path.join(path, 'data'), exist_ok=True)
        dump_data_description(entityset, path, **kwargs)
Exemple #5
0
 def load(cls, features, profile_name):
     if isinstance(features, str):
         try:
             features_dict = json.loads(features)
         except ValueError:
             if _is_url(features):
                 features_dict = use_smartopen_features(features)
             elif _is_s3(features):
                 session = boto3.Session()
                 if isinstance(profile_name, str):
                     transport_params = {
                         'session': boto3.Session(profile_name=profile_name)
                     }
                     features_dict = use_smartopen_features(
                         features, transport_params)
                 elif profile_name is False:
                     features_dict = use_s3fs_features(features)
                 elif session.get_credentials() is not None:
                     features_dict = use_smartopen_features(features)
                 else:
                     features_dict = use_s3fs_features(features)
             else:
                 with open(features, 'r') as f:
                     features_dict = json.load(f)
         return cls(features_dict)
     return cls(json.load(features))
Exemple #6
0
def read_entityset(path, profile_name=None, **kwargs):
    '''Read entityset from disk, S3 path, or URL.

        Args:
            path (str): Directory on disk, S3 path, or URL to read `data_description.json`.
            profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
                Set to False to use an anonymous profile.
            kwargs (keywords): Additional keyword arguments to pass as keyword arguments to the underlying deserialization method.
    '''
    if _is_url(path) or _is_s3(path):
        with tempfile.TemporaryDirectory() as tmpdir:
            file_name = Path(path).name
            file_path = os.path.join(tmpdir, file_name)
            transport_params = {}
            session = boto3.Session()

            if _is_url(path):
                use_smartopen_es(file_path, path)
            elif isinstance(profile_name, str):
                transport_params = {
                    'session': boto3.Session(profile_name=profile_name)
                }
                use_smartopen_es(file_path, path, transport_params)
            elif profile_name is False:
                use_s3fs_es(file_path, path)
            elif session.get_credentials() is not None:
                use_smartopen_es(file_path, path)
            else:
                use_s3fs_es(file_path, path)

            with tarfile.open(str(file_path)) as tar:
                tar.extractall(path=tmpdir)

            data_description = read_data_description(tmpdir)
            return description_to_entityset(data_description, **kwargs)
    else:
        data_description = read_data_description(path)
        return description_to_entityset(data_description, **kwargs)
Exemple #7
0
 def load(cls, features, profile_name):
     if isinstance(features, str):
         try:
             features_dict = json.loads(features)
         except ValueError:
             if _is_url(features) or _is_s3(features):
                 transport_params = None
                 if _is_s3(features):
                     transport_params = get_transport_params(profile_name)
                 features_dict = use_smartopen_features(
                     features, transport_params=transport_params)
             else:
                 with open(features, "r") as f:
                     features_dict = json.load(f)
         return cls(features_dict)
     return cls(json.load(features))
 def save(self, location, profile_name):
     features_dict = self.to_dict()
     if location is None:
         return json.dumps(features_dict)
     if isinstance(location, str):
         if _is_url(location):
             raise ValueError("Writing to URLs is not supported")
         if _is_s3(location):
             transport_params = get_transport_params(profile_name)
             use_smartopen_features(
                 location, features_dict, transport_params, read=False
             )
         else:
             with open(location, "w") as f:
                 json.dump(features_dict, f)
     else:
         json.dump(features_dict, location)