def test_setitem_new_column(sample_df): dt = DataTable(sample_df) new_series = pd.Series([1, 2, 3]) if ks and isinstance(sample_df, ks.DataFrame): dtype = 'int64' new_series = ks.Series(new_series) else: dtype = 'Int64' new_col = DataColumn(new_series, use_standard_tags=False) assert new_col.name is None dt['test_col2'] = new_col updated_df = dt.to_dataframe() assert 'test_col2' in dt.columns assert dt['test_col2'].logical_type == Integer assert dt['test_col2'].semantic_tags == set() assert dt['test_col2'].name == 'test_col2' assert dt['test_col2']._series.name == 'test_col2' assert 'test_col2' in updated_df.columns assert updated_df['test_col2'].dtype == dtype # Standard tags and no logical type new_series = pd.Series(['new', 'column', 'inserted'], name='test_col') if ks and isinstance(sample_df, ks.DataFrame): dtype = 'object' new_series = ks.Series(new_series) else: dtype = 'category' new_col = DataColumn(new_series, use_standard_tags=True) dt['test_col'] = new_col updated_df = dt.to_dataframe() assert 'test_col' in dt.columns assert dt['test_col'].logical_type == Categorical assert dt['test_col'].semantic_tags == {'category'} assert dt['test_col'].name == 'test_col' assert dt['test_col']._series.name == 'test_col' assert 'test_col' in updated_df.columns assert updated_df['test_col'].dtype == dtype # Add with logical type and semantic tag new_series = pd.Series([1, 2, 3]) if ks and isinstance(sample_df, ks.DataFrame): new_series = ks.Series(new_series) new_col = DataColumn(new_series, logical_type=Double, use_standard_tags=False, semantic_tags={'test_tag'}) dt['test_col3'] = new_col updated_df = dt.to_dataframe() assert 'test_col3' in dt.columns assert dt['test_col3'].logical_type == Double assert dt['test_col3'].semantic_tags == {'test_tag'} assert dt['test_col3'].name == 'test_col3' assert dt['test_col3']._series.name == 'test_col3' assert 'test_col3' in updated_df.columns assert updated_df['test_col3'].dtype == 'float'
def test_sets_category_dtype_on_init(): column_name = 'test_series' series_list = [ pd.Series(['a', 'b', 'c'], name=column_name), pd.Series(['a', None, 'c'], name=column_name), pd.Series(['a', np.nan, 'c'], name=column_name), pd.Series(['a', pd.NA, 'c'], name=column_name), pd.Series(['a', pd.NaT, 'c'], name=column_name), ] logical_types = [ Categorical, CountryCode, Ordinal(order=['a', 'b', 'c']), SubRegionCode, ZIPCode, ] for series in series_list: series = series.astype('object') for logical_type in logical_types: ltypes = { column_name: logical_type, } dt = DataTable(pd.DataFrame(series), logical_types=ltypes) assert dt.columns[column_name].logical_type == logical_type assert dt.columns[column_name].dtype == logical_type.pandas_dtype assert dt.to_dataframe( )[column_name].dtype == logical_type.pandas_dtype
def test_sets_string_dtype_on_init(): column_name = 'test_series' series_list = [ pd.Series(['a', 'b', 'c'], name=column_name), pd.Series(['a', None, 'c'], name=column_name), pd.Series(['a', np.nan, 'c'], name=column_name), pd.Series(['a', pd.NA, 'c'], name=column_name), ] logical_types = [ Filepath, FullName, IPAddress, NaturalLanguage, PhoneNumber, URL, ] for series in series_list: series = series.astype('object') for logical_type in logical_types: ltypes = { column_name: logical_type, } dt = DataTable(pd.DataFrame(series), logical_types=ltypes) assert dt.columns[column_name].logical_type == logical_type assert dt.columns[column_name].dtype == logical_type.pandas_dtype assert dt.to_dataframe( )[column_name].dtype == logical_type.pandas_dtype
def test_to_csv(sample_df, tmpdir): dt = DataTable(sample_df, name='test_data', index='id', semantic_tags={'id': 'tag1'}, logical_types={'age': Ordinal(order=[25, 33, 57])}, column_descriptions={ 'signup_date': 'original signup date', 'age': 'age of the user' }, column_metadata={ 'id': { 'is_sorted': True }, 'age': { 'interesting_values': [33, 57] } }) dt.to_csv(str(tmpdir), encoding='utf-8', engine='python') _dt = deserialize.read_datatable(str(tmpdir)) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=_dt.index, sort_index=True), to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True)) assert dt == _dt
def test_deserialize_s3_csv(sample_df_pandas): dt = DataTable(sample_df_pandas, index='id') _dt = deserialize.read_datatable(S3_URL) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_sets_object_dtype_on_update(latlong_df): for column_name in latlong_df.columns: ltypes = {column_name: NaturalLanguage} dt = DataTable(latlong_df.loc[:, [column_name]], logical_types=ltypes) dt = dt.set_types(logical_types={column_name: LatLong}) assert dt.columns[column_name].logical_type == LatLong assert dt.columns[column_name].dtype == LatLong.pandas_dtype assert dt.to_dataframe()[column_name].dtype == LatLong.pandas_dtype
def test_set_index(sample_df): # Test setting index with set_index() dt = DataTable(sample_df) new_dt = dt.set_index('id') assert new_dt is not dt assert new_dt.index == 'id' assert dt.index is None assert new_dt.columns['id'].semantic_tags == {'index'} non_index_cols = [ col for col in new_dt.columns.values() if col.name != 'id' ] assert all(['index' not in col.semantic_tags for col in non_index_cols]) # Test changing index with set_index() new_dt2 = new_dt.set_index('full_name') assert new_dt.index == 'id' assert new_dt2.columns['full_name'].semantic_tags == {'index'} non_index_cols = [ col for col in new_dt2.columns.values() if col.name != 'full_name' ] assert all(['index' not in col.semantic_tags for col in non_index_cols]) # Test setting index using setter dt = DataTable(sample_df) dt.index = 'id' assert dt.index == 'id' assert 'index' in dt.columns['id'].semantic_tags non_index_cols = [col for col in dt.columns.values() if col.name != 'id'] assert all(['index' not in col.semantic_tags for col in non_index_cols]) # Test changing index with setter dt.index = 'full_name' assert 'index' in dt.columns['full_name'].semantic_tags non_index_cols = [ col for col in dt.columns.values() if col.name != 'full_name' ] assert all(['index' not in col.semantic_tags for col in non_index_cols]) # Test changing index also changes underlying DataFrame - pandas only if isinstance(sample_df, pd.DataFrame): dt = DataTable(sample_df) dt.index = 'id' assert (dt.to_dataframe().index == [0, 1, 2, 3]).all() assert (dt._dataframe.index == [0, 1, 2, 3]).all() dt.index = 'full_name' assert ( dt.to_dataframe().index == dt.to_dataframe()['full_name']).all() assert (dt._dataframe.index == dt.to_dataframe()['full_name']).all()
def test_to_parquet(sample_df, tmpdir): dt = DataTable(sample_df, index='id') dt.to_parquet(str(tmpdir)) _dt = deserialize.read_datatable(str(tmpdir)) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index, sort_index=True), to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True)) assert dt == _dt
def test_deserialize_url_csv_anon(sample_df_pandas): dt = DataTable(sample_df_pandas, index='id') _dt = deserialize.read_datatable(URL, profile_name=False) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_setitem_overwrite_column(sample_df): dt = DataTable(sample_df, index='id', time_index='signup_date', use_standard_tags=True) # Change to column no change in types original_col = dt['age'] new_series = pd.Series([1, 2, 3]) if ks and isinstance(sample_df, ks.DataFrame): dtype = 'int64' new_series = ks.Series(new_series) else: dtype = 'Int64' overwrite_col = DataColumn(new_series, use_standard_tags=True) dt['age'] = overwrite_col updated_df = dt.to_dataframe() assert 'age' in dt.columns assert dt['age'].logical_type == original_col.logical_type assert dt['age'].semantic_tags == original_col.semantic_tags assert 'age' in updated_df.columns assert updated_df['age'].dtype == dtype assert original_col.to_series() is not dt['age'].to_series() # Change dtype, logical types, and tags with conflicting use_standard_tags original_col = dt['full_name'] new_series = pd.Series([True, False, False]) if ks and isinstance(sample_df, ks.DataFrame): new_series = ks.Series(new_series) dtype = 'bool' else: dtype = 'boolean' overwrite_col = DataColumn(new_series.astype(dtype), use_standard_tags=False, semantic_tags='test_tag') dt['full_name'] = overwrite_col updated_df = dt.to_dataframe() assert 'full_name' in dt.columns assert dt['full_name'].logical_type == Boolean assert dt['full_name'].semantic_tags == {'test_tag'} assert 'full_name' in updated_df.columns assert updated_df['full_name'].dtype == dtype assert original_col.to_series() is not dt['full_name'].to_series()
def test_datatable_getitem_list_input(sample_df): # Test regular columns dt = DataTable(sample_df, time_index='signup_date', index='id', name='dt_name') df = dt.to_dataframe() columns = ['age', 'full_name'] new_dt = dt[columns] assert new_dt is not dt assert new_dt.to_dataframe() is not df pd.testing.assert_frame_equal(to_pandas(df[columns]).reset_index(drop=True), to_pandas(new_dt.to_dataframe())) assert all(new_dt.to_dataframe().columns == ['age', 'full_name']) assert set(new_dt.columns.keys()) == {'age', 'full_name'} assert new_dt.index is None assert new_dt.time_index is None # Test with index columns = ['id', 'full_name'] new_dt = dt[columns] assert new_dt is not dt assert new_dt.to_dataframe() is not df pd.testing.assert_frame_equal(to_pandas(df[columns]), to_pandas(new_dt.to_dataframe())) assert all(new_dt.to_dataframe().columns == ['id', 'full_name']) assert set(new_dt.columns.keys()) == {'id', 'full_name'} assert new_dt.index == 'id' assert new_dt.time_index is None # Test with time_index columns = ['id', 'signup_date', 'full_name'] new_dt = dt[columns] assert new_dt is not dt assert new_dt.to_dataframe() is not df pd.testing.assert_frame_equal(to_pandas(df[columns]), to_pandas(new_dt.to_dataframe()), check_index_type=False) assert all(new_dt.to_dataframe().columns == ['id', 'signup_date', 'full_name']) assert set(new_dt.columns.keys()) == {'id', 'signup_date', 'full_name'} assert new_dt.index == 'id' # Test with empty list selector columns = [] new_dt = dt[columns] assert new_dt is not dt assert new_dt.to_dataframe() is not df assert to_pandas(new_dt.to_dataframe()).empty assert set(new_dt.columns.keys()) == set() assert new_dt.index is None assert new_dt.time_index is None # Test that reversed column order reverses resulting column order columns = list(reversed(list(dt.columns.keys()))) new_dt = dt[columns] assert new_dt is not dt assert new_dt.to_dataframe() is not df assert all(df.columns[::-1] == new_dt.to_dataframe().columns) assert all(dt.types.index[::-1] == new_dt.types.index) assert all(new_dt.to_dataframe().columns == new_dt.types.index) assert set(new_dt.columns.keys()) == set(dt.columns.keys()) assert new_dt.index == 'id' assert new_dt.time_index == 'signup_date'
def test_sets_object_dtype_on_init(latlong_df): for column_name in latlong_df.columns: ltypes = { column_name: LatLong, } dt = DataTable(latlong_df.loc[:, [column_name]], logical_types=ltypes) assert dt.columns[column_name].logical_type == LatLong assert dt.columns[column_name].dtype == LatLong.pandas_dtype assert dt.to_dataframe()[column_name].dtype == LatLong.pandas_dtype
def test_underlying_index_on_update(sample_df): if dd and isinstance(sample_df, dd.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Dask input') if ks and isinstance(sample_df, ks.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Koalas input') dt = DataTable(sample_df.copy(), index='id') dt.update_dataframe(sample_df.tail(2)) assert (dt._dataframe.index == [2, 3]).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == pd.Int64Index assert type(dt.to_dataframe().index) == pd.Int64Index actual = dt.iloc[[0, 1]] assert type(actual._dataframe.index) == pd.Index assert type(actual.to_dataframe().index) == pd.Index actual = dt.select(dt.index) assert type(actual._dataframe.index) == pd.Int64Index assert type(actual.to_dataframe().index) == pd.Int64Index actual = dt[['age']] assert type(actual._dataframe.index) == pd.Int64Index assert type(actual.to_dataframe().index) == pd.Int64Index actual = dt.drop(dt.index) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex actual = dt.reset_semantic_tags(retain_index_tags=False) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex actual = dt.set_types(retain_index_tags=False, semantic_tags={'id': 'numeric'}) assert type(actual._dataframe.index) == pd.RangeIndex assert type(actual.to_dataframe().index) == pd.RangeIndex dt.pop(dt.index) assert type(dt._dataframe.index) == pd.RangeIndex assert type(dt.to_dataframe().index) == pd.RangeIndex
def test_serialize_s3_pickle_anon(sample_df_pandas, s3_client, s3_bucket): pandas_dt = DataTable(sample_df_pandas) pandas_dt.to_pickle(TEST_S3_URL, profile_name=False) make_public(s3_client, s3_bucket) _dt = deserialize.read_datatable(TEST_S3_URL, profile_name=False) pd.testing.assert_frame_equal( to_pandas(pandas_dt.to_dataframe(), index=pandas_dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert pandas_dt == _dt
def test_underlying_index_no_index(sample_df): if dd and isinstance(sample_df, dd.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Dask input') if ks and isinstance(sample_df, ks.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Koalas input') assert type(sample_df.index) == pd.RangeIndex dt = DataTable(sample_df.copy()) assert type(dt._dataframe.index) == pd.RangeIndex assert type(dt.to_dataframe().index) == pd.RangeIndex sample_df = sample_df.sort_values('full_name') assert type(sample_df.index) == pd.Int64Index dt = DataTable(sample_df) assert type(dt._dataframe.index) == pd.RangeIndex assert type(dt.to_dataframe().index) == pd.RangeIndex
def test_datatable_init(sample_df): dt = DataTable(sample_df) df = dt.to_dataframe() assert dt.name is None assert dt.index is None assert dt.time_index is None assert set(dt.columns.keys()) == set(sample_df.columns) assert df is sample_df pd.testing.assert_frame_equal(to_pandas(df), to_pandas(sample_df))
def test_to_parquet_with_latlong(latlong_df, tmpdir): dt = DataTable( latlong_df, logical_types={col: 'LatLong' for col in latlong_df.columns}) dt.to_parquet(str(tmpdir)) _dt = deserialize.read_datatable(str(tmpdir)) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index, sort_index=True), to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True)) assert dt == _dt
def test_serialize_s3_parquet_anon(sample_df, s3_client, s3_bucket): xfail_tmp_disappears(sample_df) dt = DataTable(sample_df) dt.to_parquet(TEST_S3_URL, profile_name=False) make_public(s3_client, s3_bucket) _dt = deserialize.read_datatable(TEST_S3_URL, profile_name=False) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_sets_float64_dtype_on_update(): column_name = 'test_series' series = pd.Series([0, 1, 0], name=column_name) series = series.astype('object') ltypes = { column_name: Integer, } dt = DataTable(pd.DataFrame(series), logical_types=ltypes) dt = dt.set_types(logical_types={column_name: Double}) assert dt.columns[column_name].logical_type == Double assert dt.columns[column_name].dtype == Double.pandas_dtype assert dt.to_dataframe()[column_name].dtype == Double.pandas_dtype
def test_setitem_different_name(sample_df): dt = DataTable(sample_df) new_series = pd.Series([1, 2, 3, 4], name='wrong') if ks and isinstance(sample_df, ks.DataFrame): new_series = ks.Series(new_series) warning = 'Name mismatch between wrong and id. DataColumn and underlying series name are now id' with pytest.warns(ColumnNameMismatchWarning, match=warning): dt['id'] = DataColumn(new_series, use_standard_tags=False) assert dt['id'].name == 'id' assert dt['id'].to_series().name == 'id' assert dt.to_dataframe()['id'].name == 'id' assert 'wrong' not in dt.columns new_series2 = pd.Series([1, 2, 3, 4], name='wrong2') if ks and isinstance(sample_df, ks.DataFrame): new_series2 = ks.Series(new_series2) warning = 'Name mismatch between wrong2 and new_col. DataColumn and underlying series name are now new_col' with pytest.warns(ColumnNameMismatchWarning, match=warning): dt['new_col'] = DataColumn(new_series2, use_standard_tags=False) assert dt['new_col'].name == 'new_col' assert dt['new_col'].to_series().name == 'new_col' assert dt.to_dataframe()['new_col'].name == 'new_col' assert 'wrong2' not in dt.columns warning = 'Name mismatch between wrong and col_with_name. DataColumn and underlying series name are now col_with_name' with pytest.warns(ColumnNameMismatchWarning, match=warning): dt['col_with_name'] = DataColumn(new_series, use_standard_tags=False, name='wrong') assert dt['col_with_name'].name == 'col_with_name' assert dt['col_with_name'].to_series().name == 'col_with_name' assert dt.to_dataframe()['col_with_name'].name == 'col_with_name' assert 'wrong' not in dt.columns
def test_sets_datetime_dtype_on_update(): column_name = 'test_series' series = pd.Series(['2020-01-01', '2020-01-02', '2020-01-03'], name=column_name) series = series.astype('object') ltypes = { column_name: NaturalLanguage, } dt = DataTable(pd.DataFrame(series), logical_types=ltypes) dt = dt.set_types(logical_types={column_name: Datetime}) assert dt.columns[column_name].logical_type == Datetime assert dt.columns[column_name].dtype == Datetime.pandas_dtype assert dt.to_dataframe()[column_name].dtype == Datetime.pandas_dtype
def test_to_pickle(sample_df, tmpdir): dt = DataTable(sample_df) if not isinstance(sample_df, pd.DataFrame): msg = 'DataFrame type not compatible with pickle serialization. Please serialize to another format.' with pytest.raises(ValueError, match=msg): dt.to_pickle(str(tmpdir)) else: dt.to_pickle(str(tmpdir)) _dt = deserialize.read_datatable(str(tmpdir)) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_s3_test_profile(sample_df, s3_client, s3_bucket, setup_test_profile): xfail_tmp_disappears(sample_df) dt = DataTable(sample_df) dt.to_csv(TEST_S3_URL, encoding='utf-8', engine='python', profile_name='test') make_public(s3_client, s3_bucket) _dt = deserialize.read_datatable(TEST_S3_URL, profile_name='test') pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_underlying_index_on_update(sample_df): if dd and isinstance(sample_df, dd.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Dask input') if ks and isinstance(sample_df, ks.DataFrame): pytest.xfail( 'Setting underlying index is not supported with Koalas input') dt = DataTable(sample_df.copy(), index='id') dt.update_dataframe(sample_df.tail(2)) assert (dt._dataframe.index == [2, 3]).all() assert dt._dataframe.index.name is None assert type(dt._dataframe.index) == pd.Int64Index assert type(dt.to_dataframe().index) == pd.Int64Index
def test_select_ltypes_no_match_and_all(sample_df): dt = DataTable(sample_df) dt = dt.set_types(logical_types={ 'full_name': FullName, 'email': EmailAddress, 'phone_number': PhoneNumber, 'age': Double, 'signup_date': Datetime, }) assert len(dt.select(ZIPCode).columns) == 0 assert len(dt.select(['ZIPCode', PhoneNumber]).columns) == 1 all_types = ww.type_system.registered_types dt_all_types = dt.select(all_types) assert len(dt_all_types.columns) == len(dt.columns) assert len(dt_all_types.to_dataframe().columns) == len(dt.to_dataframe().columns)
def test_sets_int64_dtype_on_update(): column_name = 'test_series' series = pd.Series([1.0, 2.0, 1.0], name=column_name) series = series.astype('object') logical_types = [Integer] for logical_type in logical_types: ltypes = { column_name: Double, } dt = DataTable(pd.DataFrame(series), logical_types=ltypes) dt = dt.set_types(logical_types={column_name: logical_type}) assert dt.columns[column_name].logical_type == logical_type assert dt.columns[column_name].dtype == logical_type.pandas_dtype assert dt.to_dataframe( )[column_name].dtype == logical_type.pandas_dtype
def test_to_pickle_with_latlong(latlong_df, tmpdir): dt = DataTable( latlong_df, logical_types={col: 'LatLong' for col in latlong_df.columns}) if not isinstance(latlong_df, pd.DataFrame): msg = 'DataFrame type not compatible with pickle serialization. Please serialize to another format.' with pytest.raises(ValueError, match=msg): dt.to_pickle(str(tmpdir)) else: dt.to_pickle(str(tmpdir)) _dt = deserialize.read_datatable(str(tmpdir)) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index, sort_index=True), to_pandas(_dt.to_dataframe(), index=_dt.index, sort_index=True)) assert dt == _dt
def test_to_csv_S3(sample_df, s3_client, s3_bucket): xfail_tmp_disappears(sample_df) dt = DataTable(sample_df, name='test_data', index='id', semantic_tags={'id': 'tag1'}, logical_types={'age': Ordinal(order=[25, 33, 57])}) dt.to_csv(TEST_S3_URL, encoding='utf-8', engine='python') make_public(s3_client, s3_bucket) _dt = deserialize.read_datatable(TEST_S3_URL) pd.testing.assert_frame_equal( to_pandas(dt.to_dataframe(), index=dt.index), to_pandas(_dt.to_dataframe(), index=_dt.index)) assert dt == _dt
def test_sets_float64_dtype_on_init(): column_name = 'test_series' series_list = [ pd.Series([1.1, 2, 3], name=column_name), pd.Series([1.1, None, 3], name=column_name), pd.Series([1.1, np.nan, 3], name=column_name), ] logical_type = Double for series in series_list: series = series.astype('object') ltypes = { column_name: logical_type, } dt = DataTable(pd.DataFrame(series), logical_types=ltypes) assert dt.columns[column_name].logical_type == logical_type assert dt.columns[column_name].dtype == logical_type.pandas_dtype assert dt.to_dataframe( )[column_name].dtype == logical_type.pandas_dtype
def test_sets_boolean_dtype_on_init(): column_name = 'test_series' series_list = [ pd.Series([True, False, True], name=column_name), pd.Series([True, None, True], name=column_name), pd.Series([True, np.nan, True], name=column_name), pd.Series([True, pd.NA, True], name=column_name), ] logical_type = Boolean for series in series_list: series = series.astype('object') ltypes = { column_name: logical_type, } dt = DataTable(pd.DataFrame(series), logical_types=ltypes) assert dt.columns[column_name].logical_type == logical_type assert dt.columns[column_name].dtype == logical_type.pandas_dtype assert dt.to_dataframe( )[column_name].dtype == logical_type.pandas_dtype