Ejemplo n.º 1
0
def test_to_pickle(es):
    path = os.path.join(CACHE, 'es')
    os.makedirs(path)
    es.to_pickle(path)
    new_es = deserialize.read_entityset(path)
    assert es.__eq__(new_es, deep=True)
    shutil.rmtree(path)
Ejemplo n.º 2
0
def test_serialize_s3_anon_parquet(es, s3_client, s3_bucket):
    if any(isinstance(entity.df, dd.DataFrame) for entity in es.entities):
        pytest.xfail('tmp file disappears after deserialize step, cannot check equality with Dask')
    es.to_parquet(TEST_S3_URL, profile_name=False)
    make_public(s3_client, s3_bucket)
    new_es = deserialize.read_entityset(TEST_S3_URL, profile_name=False)
    assert es.__eq__(new_es, deep=True)
Ejemplo n.º 3
0
def test_to_csv(es):
    path = os.path.join(CACHE, 'es')
    os.makedirs(path)
    es.to_csv(path, encoding='utf-8', engine='python')
    new_es = deserialize.read_entityset(path)
    assert es.__eq__(new_es, deep=True)
    shutil.rmtree(path)
Ejemplo n.º 4
0
def test_s3_test_profile(es, s3_client, s3_bucket, setup_test_profile):
    if any(isinstance(entity.df, dd.DataFrame) for entity in es.entities):
        pytest.xfail('tmp file disappears after deserialize step, cannot check equality with Dask')
    es.to_csv(TEST_S3_URL, encoding='utf-8', engine='python', profile_name='test')
    make_public(s3_client, s3_bucket)
    new_es = deserialize.read_entityset(TEST_S3_URL, profile_name='test')
    assert es.__eq__(new_es, deep=True)
Ejemplo n.º 5
0
def test_to_parquet(es, tmpdir):
    es.to_parquet(str(tmpdir))
    new_es = deserialize.read_entityset(str(tmpdir))
    assert es.__eq__(new_es, deep=True)
    df = to_pandas(es['log'].df)
    new_df = to_pandas(new_es['log'].df)
    assert type(df['latlong'][0]) in (tuple, list)
    assert type(new_df['latlong'][0]) in (tuple, list)
Ejemplo n.º 6
0
def test_to_csv(es, tmpdir):
    es.to_csv(str(tmpdir), encoding='utf-8', engine='python')
    new_es = deserialize.read_entityset(str(tmpdir))
    assert es.__eq__(new_es, deep=True)
    df = to_pandas(es['log'].df, index='id')
    new_df = to_pandas(new_es['log'].df, index='id')
    assert type(df['latlong'][0]) in (tuple, list)
    assert type(new_df['latlong'][0]) in (tuple, list)
def test_dask_to_parquet(dask_es, tmpdir):
    dask_es.to_parquet(str(tmpdir))
    new_es = deserialize.read_entityset(str(tmpdir))
    assert dask_es.__eq__(new_es, deep=True)
    assert type(
        dask_es['log'].df.set_index('id')['latlong'].compute()[0]) == tuple
    assert type(
        new_es['log'].df.set_index('id')['latlong'].compute()[0]) == tuple
Ejemplo n.º 8
0
def test_to_parquet_manual_interesting_values(es, tmpdir):
    es.add_interesting_values(dataframe_name='log',
                              values={'product_id': ['coke_zero']})
    es.to_parquet(str(tmpdir))
    new_es = deserialize.read_entityset(str(tmpdir))
    assert es.__eq__(new_es, deep=True)
    assert new_es['log'].ww['product_id'].ww.metadata[
        'interesting_values'] == ['coke_zero']
Ejemplo n.º 9
0
def test_to_parquet_with_lti():
    es = load_mock_customer(return_entityset=True, random_seed=0)
    path = os.path.join(CACHE, 'es')
    os.makedirs(path)
    es.to_parquet(path)
    new_es = deserialize.read_entityset(path)
    assert es.__eq__(new_es, deep=True)
    shutil.rmtree(path)
Ejemplo n.º 10
0
def test_s3_test_profile(es, s3_client, s3_bucket, setup_test_profile):
    es.to_csv(TEST_S3_URL,
              encoding='utf-8',
              engine='python',
              profile_name='test')
    make_public(s3_client, s3_bucket)
    new_es = deserialize.read_entityset(TEST_S3_URL, profile_name='test')
    assert es.__eq__(new_es, deep=True)
Ejemplo n.º 11
0
def test_serialize_s3_anon_parquet(es, s3_client, s3_bucket):
    if es.dataframe_type != Library.PANDAS.value:
        pytest.xfail(
            'tmp file disappears after deserialize step, cannot check equality with Dask'
        )
    es.to_parquet(TEST_S3_URL, profile_name=False)
    make_public(s3_client, s3_bucket)
    new_es = deserialize.read_entityset(TEST_S3_URL, profile_name=False)
    assert es.__eq__(new_es, deep=True)
Ejemplo n.º 12
0
def test_serialize_s3_csv(es, s3_client, s3_bucket):
    if es.dataframe_type != Library.PANDAS.value:
        pytest.xfail(
            'tmp file disappears after deserialize step, cannot check equality with Dask'
        )
    es.to_csv(TEST_S3_URL, encoding='utf-8', engine='python')
    make_public(s3_client, s3_bucket)
    new_es = deserialize.read_entityset(TEST_S3_URL)
    assert es.__eq__(new_es, deep=True)
Ejemplo n.º 13
0
def test_serialize_s3_parquet(es, s3_client, s3_bucket):
    if not all(isinstance(entity.df, pd.DataFrame) for entity in es.entities):
        pytest.xfail(
            'tmp file disappears after deserialize step, cannot check equality with Dask or Koalas'
        )
    es.to_parquet(TEST_S3_URL)
    make_public(s3_client, s3_bucket)
    new_es = deserialize.read_entityset(TEST_S3_URL)
    assert es.__eq__(new_es, deep=True)
def test_to_parquet(es, tmpdir):
    es.to_parquet(str(tmpdir))
    new_es = deserialize.read_entityset(str(tmpdir))
    assert es.__eq__(new_es, deep=True)
    df = es['log'].df
    new_df = new_es['log'].df
    if isinstance(df, dd.DataFrame):
        df = df.compute()
    if isinstance(new_df, dd.DataFrame):
        new_df = new_df.compute()
    assert type(df['latlong'][0]) == tuple
    assert type(df['latlong'][0]) == tuple
def test_to_csv(es, tmpdir):
    es.to_csv(str(tmpdir), encoding='utf-8', engine='python')
    new_es = deserialize.read_entityset(str(tmpdir))
    assert es.__eq__(new_es, deep=True)
    df = es['log'].df
    if isinstance(df, dd.DataFrame):
        df = df.compute().set_index('id')
    new_df = new_es['log'].df
    if isinstance(new_df, dd.DataFrame):
        new_df = new_df.compute().set_index('id')
    assert type(df['latlong'][0]) == tuple
    assert type(new_df['latlong'][0]) == tuple
Ejemplo n.º 16
0
def test_serialize_s3_anon_csv(es, s3_client, s3_bucket):
    if not all(isinstance(entity.df, pd.DataFrame) for entity in es.entities):
        pytest.xfail(
            'tmp file disappears after deserialize step, cannot check equality with Dask or Koalas'
        )
    es.to_csv(TEST_S3_URL,
              encoding='utf-8',
              engine='python',
              profile_name=False)
    make_public(s3_client, s3_bucket)
    new_es = deserialize.read_entityset(TEST_S3_URL, profile_name=False)
    assert es.__eq__(new_es, deep=True)
def test_to_parquet_with_lti(tmpdir):
    es = load_mock_customer(return_entityset=True, random_seed=0)
    es.to_parquet(str(tmpdir))
    new_es = deserialize.read_entityset(str(tmpdir))
    assert es.__eq__(new_es, deep=True)
def test_to_pickle_manual_interesting_values(pd_es, tmpdir):
    pd_es['log']['product_id'].interesting_values = ["coke_zero"]
    pd_es.to_pickle(str(tmpdir))
    new_es = deserialize.read_entityset(str(tmpdir))
    assert pd_es.__eq__(new_es, deep=True)
def test_to_pickle(pd_es, tmpdir):
    pd_es.to_pickle(str(tmpdir))
    new_es = deserialize.read_entityset(str(tmpdir))
    assert pd_es.__eq__(new_es, deep=True)
    assert type(pd_es['log'].df['latlong'][0]) == tuple
    assert type(new_es['log'].df['latlong'][0]) == tuple
def test_default_s3_csv(es):
    new_es = deserialize.read_entityset(S3_URL)
    assert es.__eq__(new_es, deep=True)
def test_serialize_s3_anon_pickle(pd_es, s3_client, s3_bucket):
    pd_es.to_pickle(TEST_S3_URL, profile_name=False)
    make_public(s3_client, s3_bucket)
    new_es = deserialize.read_entityset(TEST_S3_URL, profile_name=False)
    assert pd_es.__eq__(new_es, deep=True)
def test_dask_to_parquet_manual_interesting_values(dask_es, tmpdir):
    dask_es['log']['product_id'].interesting_values = ["coke_zero"]
    dask_es.to_parquet(str(tmpdir))
    new_es = deserialize.read_entityset(str(tmpdir))
    assert dask_es.__eq__(new_es, deep=True)
def test_to_parquet_interesting_values(pd_es, tmpdir):
    pd_es.add_interesting_values()
    pd_es.to_parquet(str(tmpdir))
    new_es = deserialize.read_entityset(str(tmpdir))
    assert pd_es.__eq__(new_es, deep=True)
Ejemplo n.º 24
0
def test_to_pickle_id_none(path_management):
    es = EntitySet()
    es.to_pickle(path_management)
    new_es = deserialize.read_entityset(path_management)
    assert es.__eq__(new_es, deep=True)
def test_to_pickle_id_none(tmpdir):
    es = EntitySet()
    es.to_pickle(str(tmpdir))
    new_es = deserialize.read_entityset(str(tmpdir))
    assert es.__eq__(new_es, deep=True)
Ejemplo n.º 26
0
def test_to_csv(es, tmpdir):
    es.to_csv(str(tmpdir), encoding='utf-8', engine='python')
    new_es = deserialize.read_entityset(str(tmpdir))
    assert es.__eq__(new_es, deep=True)
    assert type(es['log'].df['latlong'][0]) == tuple
    assert type(new_es['log'].df['latlong'][0]) == tuple
def test_deserialize_url_csv(es):
    new_es = deserialize.read_entityset(URL)
    assert es.__eq__(new_es, deep=True)
Ejemplo n.º 28
0
def test_serialize_s3_csv(es, s3_client, s3_bucket):
    es.to_csv(TEST_S3_URL, encoding='utf-8', engine='python')
    make_public(s3_client, s3_bucket)
    new_es = deserialize.read_entityset(TEST_S3_URL)
    assert es.__eq__(new_es, deep=True)
def test_anon_s3_csv(es):
    new_es = deserialize.read_entityset(S3_URL, profile_name=False)
    assert es.__eq__(new_es, deep=True)
Ejemplo n.º 30
0
def test_serialize_s3_parquet(es, s3_client, s3_bucket):
    es.to_parquet(TEST_S3_URL)
    make_public(s3_client, s3_bucket)
    new_es = deserialize.read_entityset(TEST_S3_URL)
    assert es.__eq__(new_es, deep=True)