def test_to_csv(sample_df, tmpdir): dt = DataTable(sample_df, name='test_data', index='id', semantic_tags={'id': 'tag1'}, logical_types={'age': Ordinal(order=[25, 33, 57])}, column_descriptions={ 'signup_date': 'original signup date', 'age': 'age of the user' }, column_metadata={ 'id': { 'is_sorted': True }, 'age': { 'interesting_values': [33, 57] } }) dt.to_csv(str(tmpdir), encoding='utf-8', engine='python') _dt = deserialize.read_datatable(str(tmpdir)) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=_dt.index, sort_index=True), to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True)) assert dt == _dt
def test_deserialize_s3_csv(sample_df_pandas): dt = DataTable(sample_df_pandas, index='id') _dt = deserialize.read_datatable(S3_URL) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_deserialize_url_csv_anon(sample_df_pandas): dt = DataTable(sample_df_pandas, index='id') _dt = deserialize.read_datatable(URL, profile_name=False) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_to_parquet(sample_df, tmpdir): dt = DataTable(sample_df, index='id') dt.to_parquet(str(tmpdir)) _dt = deserialize.read_datatable(str(tmpdir)) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index, sort_index=True), to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True)) assert dt == _dt
def test_serialize_s3_pickle_anon(sample_df_pandas, s3_client, s3_bucket): pandas_dt = DataTable(sample_df_pandas) pandas_dt.to_pickle(TEST_S3_URL, profile_name=False) make_public(s3_client, s3_bucket) _dt = deserialize.read_datatable(TEST_S3_URL, profile_name=False) pd.testing.assert_frame_equal( to_pandas(pandas_dt.to_dataframe(), index=pandas_dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert pandas_dt == _dt
def test_serialize_s3_parquet_anon(sample_df, s3_client, s3_bucket): xfail_tmp_disappears(sample_df) dt = DataTable(sample_df) dt.to_parquet(TEST_S3_URL, profile_name=False) make_public(s3_client, s3_bucket) _dt = deserialize.read_datatable(TEST_S3_URL, profile_name=False) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_to_parquet_with_latlong(latlong_df, tmpdir): dt = DataTable( latlong_df, logical_types={col: 'LatLong' for col in latlong_df.columns}) dt.to_parquet(str(tmpdir)) _dt = deserialize.read_datatable(str(tmpdir)) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index, sort_index=True), to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True)) assert dt == _dt
def test_s3_test_profile(sample_df, s3_client, s3_bucket, setup_test_profile): xfail_tmp_disappears(sample_df) dt = DataTable(sample_df) dt.to_csv(TEST_S3_URL, encoding='utf-8', engine='python', profile_name='test') make_public(s3_client, s3_bucket) _dt = deserialize.read_datatable(TEST_S3_URL, profile_name='test') pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_to_pickle(sample_df, tmpdir): dt = DataTable(sample_df) if not isinstance(sample_df, pd.DataFrame): msg = 'DataFrame type not compatible with pickle serialization. Please serialize to another format.' with pytest.raises(ValueError, match=msg): dt.to_pickle(str(tmpdir)) else: dt.to_pickle(str(tmpdir)) _dt = deserialize.read_datatable(str(tmpdir)) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_to_csv_S3(sample_df, s3_client, s3_bucket): xfail_tmp_disappears(sample_df) dt = DataTable(sample_df, name='test_data', index='id', semantic_tags={'id': 'tag1'}, logical_types={'age': Ordinal(order=[25, 33, 57])}) dt.to_csv(TEST_S3_URL, encoding='utf-8', engine='python') make_public(s3_client, s3_bucket) _dt = deserialize.read_datatable(TEST_S3_URL) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_to_pickle_with_latlong(latlong_df, tmpdir): dt = DataTable( latlong_df, logical_types={col: 'LatLong' for col in latlong_df.columns}) if not isinstance(latlong_df, pd.DataFrame): msg = 'DataFrame type not compatible with pickle serialization. Please serialize to another format.' with pytest.raises(ValueError, match=msg): dt.to_pickle(str(tmpdir)) else: dt.to_pickle(str(tmpdir)) _dt = deserialize.read_datatable(str(tmpdir)) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index, sort_index=True), to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True)) assert dt == _dt