def test_to_pickle(es): path = os.path.join(CACHE, 'es') os.makedirs(path) es.to_pickle(path) new_es = deserialize.read_entityset(path) assert es.__eq__(new_es, deep=True) shutil.rmtree(path)
def test_serialize_s3_anon_parquet(es, s3_client, s3_bucket): if any(isinstance(entity.df, dd.DataFrame) for entity in es.entities): pytest.xfail('tmp file disappears after deserialize step, cannot check equality with Dask') es.to_parquet(TEST_S3_URL, profile_name=False) make_public(s3_client, s3_bucket) new_es = deserialize.read_entityset(TEST_S3_URL, profile_name=False) assert es.__eq__(new_es, deep=True)
def test_to_csv(es): path = os.path.join(CACHE, 'es') os.makedirs(path) es.to_csv(path, encoding='utf-8', engine='python') new_es = deserialize.read_entityset(path) assert es.__eq__(new_es, deep=True) shutil.rmtree(path)
def test_s3_test_profile(es, s3_client, s3_bucket, setup_test_profile): if any(isinstance(entity.df, dd.DataFrame) for entity in es.entities): pytest.xfail('tmp file disappears after deserialize step, cannot check equality with Dask') es.to_csv(TEST_S3_URL, encoding='utf-8', engine='python', profile_name='test') make_public(s3_client, s3_bucket) new_es = deserialize.read_entityset(TEST_S3_URL, profile_name='test') assert es.__eq__(new_es, deep=True)
def test_to_parquet(es, tmpdir): es.to_parquet(str(tmpdir)) new_es = deserialize.read_entityset(str(tmpdir)) assert es.__eq__(new_es, deep=True) df = to_pandas(es['log'].df) new_df = to_pandas(new_es['log'].df) assert type(df['latlong'][0]) in (tuple, list) assert type(new_df['latlong'][0]) in (tuple, list)
def test_to_csv(es, tmpdir): es.to_csv(str(tmpdir), encoding='utf-8', engine='python') new_es = deserialize.read_entityset(str(tmpdir)) assert es.__eq__(new_es, deep=True) df = to_pandas(es['log'].df, index='id') new_df = to_pandas(new_es['log'].df, index='id') assert type(df['latlong'][0]) in (tuple, list) assert type(new_df['latlong'][0]) in (tuple, list)
def test_dask_to_parquet(dask_es, tmpdir): dask_es.to_parquet(str(tmpdir)) new_es = deserialize.read_entityset(str(tmpdir)) assert dask_es.__eq__(new_es, deep=True) assert type( dask_es['log'].df.set_index('id')['latlong'].compute()[0]) == tuple assert type( new_es['log'].df.set_index('id')['latlong'].compute()[0]) == tuple
def test_to_parquet_manual_interesting_values(es, tmpdir): es.add_interesting_values(dataframe_name='log', values={'product_id': ['coke_zero']}) es.to_parquet(str(tmpdir)) new_es = deserialize.read_entityset(str(tmpdir)) assert es.__eq__(new_es, deep=True) assert new_es['log'].ww['product_id'].ww.metadata[ 'interesting_values'] == ['coke_zero']
def test_to_parquet_with_lti(): es = load_mock_customer(return_entityset=True, random_seed=0) path = os.path.join(CACHE, 'es') os.makedirs(path) es.to_parquet(path) new_es = deserialize.read_entityset(path) assert es.__eq__(new_es, deep=True) shutil.rmtree(path)
def test_s3_test_profile(es, s3_client, s3_bucket, setup_test_profile): es.to_csv(TEST_S3_URL, encoding='utf-8', engine='python', profile_name='test') make_public(s3_client, s3_bucket) new_es = deserialize.read_entityset(TEST_S3_URL, profile_name='test') assert es.__eq__(new_es, deep=True)
def test_serialize_s3_anon_parquet(es, s3_client, s3_bucket): if es.dataframe_type != Library.PANDAS.value: pytest.xfail( 'tmp file disappears after deserialize step, cannot check equality with Dask' ) es.to_parquet(TEST_S3_URL, profile_name=False) make_public(s3_client, s3_bucket) new_es = deserialize.read_entityset(TEST_S3_URL, profile_name=False) assert es.__eq__(new_es, deep=True)
def test_serialize_s3_csv(es, s3_client, s3_bucket): if es.dataframe_type != Library.PANDAS.value: pytest.xfail( 'tmp file disappears after deserialize step, cannot check equality with Dask' ) es.to_csv(TEST_S3_URL, encoding='utf-8', engine='python') make_public(s3_client, s3_bucket) new_es = deserialize.read_entityset(TEST_S3_URL) assert es.__eq__(new_es, deep=True)
def test_serialize_s3_parquet(es, s3_client, s3_bucket): if not all(isinstance(entity.df, pd.DataFrame) for entity in es.entities): pytest.xfail( 'tmp file disappears after deserialize step, cannot check equality with Dask or Koalas' ) es.to_parquet(TEST_S3_URL) make_public(s3_client, s3_bucket) new_es = deserialize.read_entityset(TEST_S3_URL) assert es.__eq__(new_es, deep=True)
def test_to_parquet(es, tmpdir): es.to_parquet(str(tmpdir)) new_es = deserialize.read_entityset(str(tmpdir)) assert es.__eq__(new_es, deep=True) df = es['log'].df new_df = new_es['log'].df if isinstance(df, dd.DataFrame): df = df.compute() if isinstance(new_df, dd.DataFrame): new_df = new_df.compute() assert type(df['latlong'][0]) == tuple assert type(df['latlong'][0]) == tuple
def test_to_csv(es, tmpdir): es.to_csv(str(tmpdir), encoding='utf-8', engine='python') new_es = deserialize.read_entityset(str(tmpdir)) assert es.__eq__(new_es, deep=True) df = es['log'].df if isinstance(df, dd.DataFrame): df = df.compute().set_index('id') new_df = new_es['log'].df if isinstance(new_df, dd.DataFrame): new_df = new_df.compute().set_index('id') assert type(df['latlong'][0]) == tuple assert type(new_df['latlong'][0]) == tuple
def test_serialize_s3_anon_csv(es, s3_client, s3_bucket): if not all(isinstance(entity.df, pd.DataFrame) for entity in es.entities): pytest.xfail( 'tmp file disappears after deserialize step, cannot check equality with Dask or Koalas' ) es.to_csv(TEST_S3_URL, encoding='utf-8', engine='python', profile_name=False) make_public(s3_client, s3_bucket) new_es = deserialize.read_entityset(TEST_S3_URL, profile_name=False) assert es.__eq__(new_es, deep=True)
def test_to_parquet_with_lti(tmpdir): es = load_mock_customer(return_entityset=True, random_seed=0) es.to_parquet(str(tmpdir)) new_es = deserialize.read_entityset(str(tmpdir)) assert es.__eq__(new_es, deep=True)
def test_to_pickle_manual_interesting_values(pd_es, tmpdir): pd_es['log']['product_id'].interesting_values = ["coke_zero"] pd_es.to_pickle(str(tmpdir)) new_es = deserialize.read_entityset(str(tmpdir)) assert pd_es.__eq__(new_es, deep=True)
def test_to_pickle(pd_es, tmpdir): pd_es.to_pickle(str(tmpdir)) new_es = deserialize.read_entityset(str(tmpdir)) assert pd_es.__eq__(new_es, deep=True) assert type(pd_es['log'].df['latlong'][0]) == tuple assert type(new_es['log'].df['latlong'][0]) == tuple
def test_default_s3_csv(es): new_es = deserialize.read_entityset(S3_URL) assert es.__eq__(new_es, deep=True)
def test_serialize_s3_anon_pickle(pd_es, s3_client, s3_bucket): pd_es.to_pickle(TEST_S3_URL, profile_name=False) make_public(s3_client, s3_bucket) new_es = deserialize.read_entityset(TEST_S3_URL, profile_name=False) assert pd_es.__eq__(new_es, deep=True)
def test_dask_to_parquet_manual_interesting_values(dask_es, tmpdir): dask_es['log']['product_id'].interesting_values = ["coke_zero"] dask_es.to_parquet(str(tmpdir)) new_es = deserialize.read_entityset(str(tmpdir)) assert dask_es.__eq__(new_es, deep=True)
def test_to_parquet_interesting_values(pd_es, tmpdir): pd_es.add_interesting_values() pd_es.to_parquet(str(tmpdir)) new_es = deserialize.read_entityset(str(tmpdir)) assert pd_es.__eq__(new_es, deep=True)
def test_to_pickle_id_none(path_management): es = EntitySet() es.to_pickle(path_management) new_es = deserialize.read_entityset(path_management) assert es.__eq__(new_es, deep=True)
def test_to_pickle_id_none(tmpdir): es = EntitySet() es.to_pickle(str(tmpdir)) new_es = deserialize.read_entityset(str(tmpdir)) assert es.__eq__(new_es, deep=True)
def test_to_csv(es, tmpdir): es.to_csv(str(tmpdir), encoding='utf-8', engine='python') new_es = deserialize.read_entityset(str(tmpdir)) assert es.__eq__(new_es, deep=True) assert type(es['log'].df['latlong'][0]) == tuple assert type(new_es['log'].df['latlong'][0]) == tuple
def test_deserialize_url_csv(es): new_es = deserialize.read_entityset(URL) assert es.__eq__(new_es, deep=True)
def test_serialize_s3_csv(es, s3_client, s3_bucket): es.to_csv(TEST_S3_URL, encoding='utf-8', engine='python') make_public(s3_client, s3_bucket) new_es = deserialize.read_entityset(TEST_S3_URL) assert es.__eq__(new_es, deep=True)
def test_anon_s3_csv(es): new_es = deserialize.read_entityset(S3_URL, profile_name=False) assert es.__eq__(new_es, deep=True)
def test_serialize_s3_parquet(es, s3_client, s3_bucket): es.to_parquet(TEST_S3_URL) make_public(s3_client, s3_bucket) new_es = deserialize.read_entityset(TEST_S3_URL) assert es.__eq__(new_es, deep=True)