Esempio n. 1
0
def test_to_csv(sample_df, tmpdir):
    dt = DataTable(sample_df,
                   name='test_data',
                   index='id',
                   semantic_tags={'id': 'tag1'},
                   logical_types={'age': Ordinal(order=[25, 33, 57])},
                   column_descriptions={
                       'signup_date': 'original signup date',
                       'age': 'age of the user'
                   },
                   column_metadata={
                       'id': {
                           'is_sorted': True
                       },
                       'age': {
                           'interesting_values': [33, 57]
                       }
                   })

    dt.to_csv(str(tmpdir), encoding='utf-8', engine='python')
    _dt = deserialize.read_datatable(str(tmpdir))

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=_dt.index, sort_index=True),
        to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True))
    assert dt == _dt
Esempio n. 2
0
def test_deserialize_s3_csv(sample_df_pandas):
    dt = DataTable(sample_df_pandas, index='id')
    _dt = deserialize.read_datatable(S3_URL)

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=dt.index),
        to_pandas(_dt.to_dataframe(), index=_dt.index))
    assert dt == _dt
Esempio n. 3
0
def test_deserialize_url_csv_anon(sample_df_pandas):
    dt = DataTable(sample_df_pandas, index='id')
    _dt = deserialize.read_datatable(URL, profile_name=False)

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=dt.index),
        to_pandas(_dt.to_dataframe(), index=_dt.index))
    assert dt == _dt
Esempio n. 4
0
def test_to_parquet(sample_df, tmpdir):
    dt = DataTable(sample_df, index='id')
    dt.to_parquet(str(tmpdir))
    _dt = deserialize.read_datatable(str(tmpdir))
    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=dt.index, sort_index=True),
        to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True))
    assert dt == _dt
Esempio n. 5
0
def test_serialize_s3_pickle_anon(sample_df_pandas, s3_client, s3_bucket):
    pandas_dt = DataTable(sample_df_pandas)
    pandas_dt.to_pickle(TEST_S3_URL, profile_name=False)
    make_public(s3_client, s3_bucket)
    _dt = deserialize.read_datatable(TEST_S3_URL, profile_name=False)

    pd.testing.assert_frame_equal(
        to_pandas(pandas_dt.to_dataframe(), index=pandas_dt.index),
        to_pandas(_dt.to_dataframe(), index=_dt.index))
    assert pandas_dt == _dt
Esempio n. 6
0
def test_serialize_s3_parquet_anon(sample_df, s3_client, s3_bucket):
    xfail_tmp_disappears(sample_df)

    dt = DataTable(sample_df)
    dt.to_parquet(TEST_S3_URL, profile_name=False)
    make_public(s3_client, s3_bucket)
    _dt = deserialize.read_datatable(TEST_S3_URL, profile_name=False)

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=dt.index),
        to_pandas(_dt.to_dataframe(), index=_dt.index))
    assert dt == _dt
Esempio n. 7
0
def test_to_parquet_with_latlong(latlong_df, tmpdir):
    dt = DataTable(
        latlong_df,
        logical_types={col: 'LatLong'
                       for col in latlong_df.columns})
    dt.to_parquet(str(tmpdir))
    _dt = deserialize.read_datatable(str(tmpdir))

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=dt.index, sort_index=True),
        to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True))
    assert dt == _dt
Esempio n. 8
0
def test_s3_test_profile(sample_df, s3_client, s3_bucket, setup_test_profile):
    xfail_tmp_disappears(sample_df)
    dt = DataTable(sample_df)
    dt.to_csv(TEST_S3_URL,
              encoding='utf-8',
              engine='python',
              profile_name='test')
    make_public(s3_client, s3_bucket)
    _dt = deserialize.read_datatable(TEST_S3_URL, profile_name='test')

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=dt.index),
        to_pandas(_dt.to_dataframe(), index=_dt.index))
    assert dt == _dt
Esempio n. 9
0
def test_to_pickle(sample_df, tmpdir):
    dt = DataTable(sample_df)
    if not isinstance(sample_df, pd.DataFrame):
        msg = 'DataFrame type not compatible with pickle serialization. Please serialize to another format.'
        with pytest.raises(ValueError, match=msg):
            dt.to_pickle(str(tmpdir))
    else:
        dt.to_pickle(str(tmpdir))
        _dt = deserialize.read_datatable(str(tmpdir))

        pd.testing.assert_frame_equal(
            to_pandas(dt.to_dataframe(), index=dt.index),
            to_pandas(_dt.to_dataframe(), index=_dt.index))
        assert dt == _dt
Esempio n. 10
0
def test_to_csv_S3(sample_df, s3_client, s3_bucket):
    xfail_tmp_disappears(sample_df)

    dt = DataTable(sample_df,
                   name='test_data',
                   index='id',
                   semantic_tags={'id': 'tag1'},
                   logical_types={'age': Ordinal(order=[25, 33, 57])})
    dt.to_csv(TEST_S3_URL, encoding='utf-8', engine='python')
    make_public(s3_client, s3_bucket)

    _dt = deserialize.read_datatable(TEST_S3_URL)

    pd.testing.assert_frame_equal(
        to_pandas(dt.to_dataframe(), index=dt.index),
        to_pandas(_dt.to_dataframe(), index=_dt.index))
    assert dt == _dt
Esempio n. 11
0
def test_to_pickle_with_latlong(latlong_df, tmpdir):
    dt = DataTable(
        latlong_df,
        logical_types={col: 'LatLong'
                       for col in latlong_df.columns})
    if not isinstance(latlong_df, pd.DataFrame):
        msg = 'DataFrame type not compatible with pickle serialization. Please serialize to another format.'
        with pytest.raises(ValueError, match=msg):
            dt.to_pickle(str(tmpdir))
    else:
        dt.to_pickle(str(tmpdir))
        _dt = deserialize.read_datatable(str(tmpdir))

        pd.testing.assert_frame_equal(
            to_pandas(dt.to_dataframe(), index=dt.index, sort_index=True),
            to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True))
        assert dt == _dt