def dshape_from_pandas_helper(col): """Return an object from datashape.coretypes given a column from a pandas dataframe. """ if (isinstance(col.dtype, type(pd.Categorical.dtype)) or isinstance(col.dtype, pd.api.types.CategoricalDtype) or cudf and isinstance(col.dtype, cudf.core.dtypes.CategoricalDtype)): # Compute category dtype categories = np.array(col.cat.categories) if categories.dtype.kind == 'U': categories = categories.astype('object') cat_dshape = datashape.dshape('{} * {}'.format( len(col.cat.categories), categories.dtype, )) return datashape.Categorical(categories, type=cat_dshape, ordered=col.cat.ordered) elif col.dtype.kind == 'M': tz = getattr(col.dtype, 'tz', None) if tz is not None: # Pandas stores this as a pytz.tzinfo, but DataShape wants a string tz = str(tz) return datashape.Option(datashape.DateTime(tz=tz)) elif isinstance(col.dtype, RaggedDtype): return col.dtype dshape = datashape.CType.from_numpy_dtype(col.dtype) dshape = datashape.string if dshape == datashape.object_ else dshape if dshape in (datashape.string, datashape.datetime_): return datashape.Option(dshape) return dshape
def dshape_from_pandas_helper(col): if isinstance(col.dtype, type(pd.Categorical.dtype)): cat_dshape = datashape.dshape('{} * {}'.format( len(col.cat.categories), col.cat.categories.dtype, )) return datashape.Categorical(col.cat.categories.values, type=cat_dshape, ordered=col.cat.categorical.ordered) elif col.dtype.kind == 'M': tz = getattr(col.dtype, 'tz', None) if tz is not None: # Pandas stores this as a pytz.tzinfo, but DataShape wants a string tz = str(tz) return datashape.Option(datashape.DateTime(tz=tz)) dshape = datashape.CType.from_numpy_dtype(col.dtype) dshape = datashape.string if dshape == datashape.object_ else dshape if dshape in (datashape.string, datashape.datetime_): return datashape.Option(dshape) return dshape