Exemple #1
0
    def to_csv(self,
               path,
               sep=',',
               encoding='utf-8',
               engine='python',
               compression=None,
               profile_name=None):
        '''Write entityset to disk in the csv format, location specified by `path`.
            Path could be a local path or a S3 path.
            If writing to S3 a tar archive of files will be written.

            Args:
                path (str) : Location on disk to write to (will be created as a directory)
                sep (str) : String of length 1. Field delimiter for the output file.
                encoding (str) : A string representing the encoding to use in the output file, defaults to 'utf-8'.
                engine (str) : Name of the engine to use. Possible values are: {'c', 'python'}.
                compression (str) : Name of the compression to use. Possible values are: {'gzip', 'bz2', 'zip', 'xz', None}.
                profile_name (str) : Name of AWS profile to use, False to use an anonymous profile, or None.
        '''
        serialize.write_data_description(self,
                                         path,
                                         format='csv',
                                         index=False,
                                         sep=sep,
                                         encoding=encoding,
                                         engine=engine,
                                         compression=compression,
                                         profile_name=profile_name)
        return self
    def to_pickle(self, path, compression=None):
        '''Write entityset to disk in the pickle format, location specified by `path`.

            Args:
                path (str): location on disk to write to (will be created as a directory)
                compression (str) : Name of the compression to use. Possible values are: {'gzip', 'bz2', 'zip', 'xz', None}.
        '''
        serialize.write_data_description(self, path, format='pickle', compression=compression)
        return self
def test_serialize_subdirs_not_removed(es, tmpdir):
    write_path = tmpdir.mkdir("test")
    test_dir = write_path.mkdir("test_dir")
    with open(str(write_path.join('data_description.json')), 'w') as f:
        json.dump('__SAMPLE_TEXT__', f)
    serialize.write_data_description(es, path=str(write_path), index='1', sep='\t', encoding='utf-8', compression=None)
    assert os.path.exists(str(test_dir))
    with open(str(write_path.join('data_description.json')), 'r') as f:
        assert '__SAMPLE_TEXT__' not in json.load(f)
    def to_parquet(self, path, engine='auto', compression=None):
        '''Write entityset to disk in the parquet format, location specified by `path`.

            Args:
                path (str): location on disk to write to (will be created as a directory)
                engine (str) : Name of the engine to use. Possible values are: {'auto', 'pyarrow', 'fastparquet'}.
                compression (str) : Name of the compression to use. Possible values are: {'snappy', 'gzip', 'brotli', None}.
        '''
        serialize.write_data_description(self, path, format='parquet', engine=engine, compression=compression)
        return self
Exemple #5
0
    def to_pickle(self, path, compression=None, profile_name=None):
        '''Write entityset in the pickle format, location specified by `path`.
            Path could be a local path or a S3 path.
            If writing to S3 a tar archive of files will be written.

            Args:
                path (str): location on disk to write to (will be created as a directory)
                compression (str) : Name of the compression to use. Possible values are: {'gzip', 'bz2', 'zip', 'xz', None}.
                profile_name (str) : Name of AWS profile to use, False to use an anonymous profile, or None.
        '''
        serialize.write_data_description(self, path, format='pickle', compression=compression, profile_name=profile_name)
        return self
    def to_csv(self, path, sep=',', encoding='utf-8', engine='python', compression=None):
        '''Write entityset to disk in the csv format, location specified by `path`.

            Args:
                path (str) : Location on disk to write to (will be created as a directory)
                sep (str) : String of length 1. Field delimiter for the output file.
                encoding (str) : A string representing the encoding to use in the output file, defaults to 'utf-8'.
                engine (str) : Name of the engine to use. Possible values are: {'c', 'python'}.
                compression (str) : Name of the compression to use. Possible values are: {'gzip', 'bz2', 'zip', 'xz', None}.
        '''
        serialize.write_data_description(self, path, format='csv', index=False, sep=sep, encoding=encoding, engine=engine, compression=compression)
        return self
Exemple #7
0
    def to_parquet(self, path, engine='auto', compression=None, profile_name=None):
        '''Write entityset to disk in the parquet format, location specified by `path`.
            Path could be a local path or a S3 path.
            If writing to S3 a tar archive of files will be written.

            Args:
                path (str): location on disk to write to (will be created as a directory)
                engine (str) : Name of the engine to use. Possible values are: {'auto', 'pyarrow', 'fastparquet'}.
                compression (str) : Name of the compression to use. Possible values are: {'snappy', 'gzip', 'brotli', None}.
                profile_name (str) : Name of AWS profile to use, False to use an anonymous profile, or None.
        '''
        serialize.write_data_description(self, path, format='parquet', engine=engine, compression=compression, profile_name=profile_name)
        return self
Exemple #8
0
def test_serialize_subdirs_not_removed(es, tmpdir):
    write_path = tmpdir.mkdir("test")
    test_dir = write_path.mkdir("test_dir")
    with open(str(write_path.join('data_description.json')), 'w') as f:
        json.dump('__SAMPLE_TEXT__', f)
    if ks and any(isinstance(e.df, ks.DataFrame) for e in es.entities):
        compression = 'none'
    else:
        compression = None
    serialize.write_data_description(es,
                                     path=str(write_path),
                                     index='1',
                                     sep='\t',
                                     encoding='utf-8',
                                     compression=compression)
    assert os.path.exists(str(test_dir))
    with open(str(write_path.join('data_description.json')), 'r') as f:
        assert '__SAMPLE_TEXT__' not in json.load(f)
Exemple #9
0
def test_serialize_invalid_formats(es, tmpdir):
    error_text = 'must be one of the following formats: {}'
    error_text = error_text.format(', '.join(serialize.FORMATS))
    with pytest.raises(ValueError, match=error_text):
        serialize.write_data_description(es, path=str(tmpdir), format='')