Ejemplo n.º 1
0
def write_woodwork_table(dataframe, path, profile_name=None, **kwargs):
    """Serialize Woodwork table and write to disk or S3 path.

    Args:
        dataframe (pd.DataFrame, dd.DataFrame, ks.DataFrame): DataFrame with Woodwork typing information initialized.
        path (str) : Location on disk to write the Woodwork table.
        profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
                Set to False to use an anonymous profile.
        kwargs (keywords) : Additional keyword arguments to pass as keywords arguments to the underlying serialization method or to specify AWS profile.
    """
    if _is_s3(path):
        with tempfile.TemporaryDirectory() as tmpdir:
            os.makedirs(os.path.join(tmpdir, 'data'))
            _dump_table(dataframe, tmpdir, **kwargs)
            file_path = _create_archive(tmpdir)

            transport_params = get_transport_params(profile_name)
            use_smartopen(file_path,
                          path,
                          read=False,
                          transport_params=transport_params)
    elif _is_url(path):
        raise ValueError("Writing to URLs is not supported")
    else:
        path = os.path.abspath(path)
        os.makedirs(os.path.join(path, 'data'), exist_ok=True)
        _dump_table(dataframe, path, **kwargs)
Ejemplo n.º 2
0
def read_woodwork_table(path, profile_name=None, validate=False, **kwargs):
    """Read Woodwork table from disk, S3 path, or URL.

        Args:
            path (str): Directory on disk, S3 path, or URL to read `woodwork_typing_info.json`.
            profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
                Set to False to use an anonymous profile.
            validate (bool, optional): Whether parameter and data validation should occur when initializing Woodwork dataframe
                during deserialization. Defaults to False. Note: If serialized data was modified outside of Woodwork and you
                are unsure of the validity of the data or typing information, `validate` should be set to True.
            kwargs (keywords): Additional keyword arguments to pass as keyword arguments to the underlying deserialization method.

        Returns:
            DataFrame: DataFrame with Woodwork typing information initialized.
    """
    if _is_url(path) or _is_s3(path):
        with tempfile.TemporaryDirectory() as tmpdir:
            file_name = Path(path).name
            file_path = os.path.join(tmpdir, file_name)
            transport_params = None

            if _is_s3(path):
                transport_params = get_transport_params(profile_name)

            use_smartopen(file_path, path, transport_params)
            with tarfile.open(str(file_path)) as tar:
                tar.extractall(path=tmpdir)

            table_typing_info = read_table_typing_information(tmpdir)
            return _typing_information_to_woodwork_table(
                table_typing_info, validate, **kwargs)
    else:
        table_typing_info = read_table_typing_information(path)
        return _typing_information_to_woodwork_table(table_typing_info,
                                                     validate, **kwargs)
Ejemplo n.º 3
0
def read_datatable(path, profile_name=None, **kwargs):
    '''Read DataTable from disk, S3 path, or URL.

        Args:
            path (str): Directory on disk, S3 path, or URL to read `table_description.json`.
            profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
                Set to False to use an anonymous profile.
            kwargs (keywords): Additional keyword arguments to pass as keyword arguments to the underlying deserialization method.
    '''
    if _is_url(path) or _is_s3(path):
        with tempfile.TemporaryDirectory() as tmpdir:
            file_name = Path(path).name
            file_path = os.path.join(tmpdir, file_name)
            transport_params = None

            if _is_s3(path):
                transport_params = get_transport_params(profile_name)

            use_smartopen(file_path, path, transport_params)
            with tarfile.open(str(file_path)) as tar:
                tar.extractall(path=tmpdir)

            table_description = read_table_description(tmpdir)
            return description_to_datatable(table_description, **kwargs)
    else:
        table_description = read_table_description(path)
        return description_to_datatable(table_description, **kwargs)
Ejemplo n.º 4
0
def write_datatable(datatable, path, profile_name=None, **kwargs):
    '''Serialize datatable and write to disk or S3 path.

    Args:
    datatable (DataTable) : Instance of :class:`.DataTable`.
    path (str) : Location on disk to write datatable data and description.
    profile_name (str, bool): The AWS profile specified to write to S3. Will default to None and search for AWS credentials.
            Set to False to use an anonymous profile.
    kwargs (keywords) : Additional keyword arguments to pass as keywords arguments to the underlying serialization method or to specify AWS profile.
    '''
    if _is_s3(path):
        with tempfile.TemporaryDirectory() as tmpdir:
            os.makedirs(os.path.join(tmpdir, 'data'))
            dump_table(datatable, tmpdir, **kwargs)
            file_path = create_archive(tmpdir)

            transport_params = get_transport_params(profile_name)
            use_smartopen(file_path,
                          path,
                          read=False,
                          transport_params=transport_params)
    elif _is_url(path):
        raise ValueError("Writing to URLs is not supported")
    else:
        path = os.path.abspath(path)
        os.makedirs(os.path.join(path, 'data'), exist_ok=True)
        dump_table(datatable, path, **kwargs)
Ejemplo n.º 5
0
 def save_to_s3(self, profile_name):
     """Serialize data and typing information to S3."""
     with tempfile.TemporaryDirectory() as tmpdir:
         self.write_path = tmpdir
         self.save_to_local_path()
         archive_file_path = self._create_archive()
         transport_params = get_transport_params(profile_name)
         use_smartopen(
             archive_file_path,
             self.path,
             read=False,
             transport_params=transport_params,
         )
Ejemplo n.º 6
0
    def read_from_s3(self, profile_name):
        with tempfile.TemporaryDirectory() as tmpdir:
            tar_filename = Path(self.path).name
            tar_filepath = os.path.join(tmpdir, tar_filename)
            transport_params = None

            if _is_s3(self.path):
                transport_params = get_transport_params(profile_name)

            use_smartopen(tar_filepath, self.path, transport_params)
            with tarfile.open(str(tar_filepath)) as tar:
                tar.extractall(path=tmpdir)

            self.read_path = os.path.join(tmpdir, self.data_subdirectory, self.filename)

            return self.read_from_local_path()
Ejemplo n.º 7
0
    def read_from_s3(self, profile_name):
        """Read data from S3 into a dataframe"""
        with tempfile.TemporaryDirectory() as tmpdir:
            tar_filename = Path(self.path).name
            tar_filepath = os.path.join(tmpdir, tar_filename)
            transport_params = None

            if _is_s3(self.path):
                transport_params = get_transport_params(profile_name)

            use_smartopen(tar_filepath, self.path, transport_params)
            with tarfile.open(str(tar_filepath)) as tar:
                tar.extractall(path=tmpdir)
            self.read_path = os.path.join(
                tmpdir, self.typing_info["loading_info"]["location"])
            return self.read_from_local_path()
Ejemplo n.º 8
0
def read_table_typing_information(path, typing_info_filename, profile_name):
    """Read Woodwork typing information from disk, S3 path, or URL.

    Args:
        path (str): Location on disk, S3 path, or URL to read typing info file.
        typing_info_filename (str): Name of JSON file in which typing info is stored.
        profile_name (str, bool): The AWS profile specified to access to S3.

    Returns:
        dict: Woodwork typing information dictionary
    """
    if _is_url(path) or _is_s3(path):
        with tempfile.TemporaryDirectory() as tmpdir:
            file_name = Path(path).name
            file_path = os.path.join(tmpdir, file_name)
            transport_params = None

            if _is_s3(path):
                transport_params = get_transport_params(profile_name)

            use_smartopen(file_path, path, transport_params)
            with tarfile.open(str(file_path)) as tar:
                tar.extractall(path=tmpdir)

            file = os.path.join(tmpdir, typing_info_filename)
            with open(file, "r") as file:
                typing_info = json.load(file)
    else:
        path = os.path.abspath(path)
        assert os.path.exists(path), '"{}" does not exist'.format(path)
        file = os.path.join(path, typing_info_filename)
        with open(file, "r") as file:
            typing_info = json.load(file)
        typing_info["path"] = path

    return typing_info