コード例 #1
0
ファイル: test_utils_dataframe.py プロジェクト: rlugojr/dask
def test_meta_nonempty_scalar():
    meta = meta_nonempty(np.float64(1.0))
    assert isinstance(meta, np.float64)

    x = pd.Timestamp(2000, 1, 1)
    meta = meta_nonempty(x)
    assert meta is x
コード例 #2
0
ファイル: test_utils_dataframe.py プロジェクト: rlugojr/dask
def test_meta_nonempty():
    df1 = pd.DataFrame({'A': pd.Categorical(['Alice', 'Bob', 'Carol']),
                        'B': list('abc'),
                        'C': 'bar',
                        'D': np.float32(1),
                        'E': np.int32(1),
                        'F': pd.Timestamp('2016-01-01'),
                        'G': pd.date_range('2016-01-01', periods=3,
                                           tz='America/New_York'),
                        'H': pd.Timedelta('1 hours', 'ms'),
                        'I': np.void(b' '),
                        'J': pd.Categorical([UNKNOWN_CATEGORIES] * 3)},
                       columns=list('DCBAHGFEIJ'))
    df2 = df1.iloc[0:0]
    df3 = meta_nonempty(df2)
    assert (df3.dtypes == df2.dtypes).all()
    assert df3['A'][0] == 'Alice'
    assert df3['B'][0] == 'foo'
    assert df3['C'][0] == 'foo'
    assert df3['D'][0] == np.float32(1)
    assert df3['D'][0].dtype == 'f4'
    assert df3['E'][0] == np.int32(1)
    assert df3['E'][0].dtype == 'i4'
    assert df3['F'][0] == pd.Timestamp('1970-01-01 00:00:00')
    assert df3['G'][0] == pd.Timestamp('1970-01-01 00:00:00',
                                       tz='America/New_York')
    assert df3['H'][0] == pd.Timedelta('1', 'ms')
    assert df3['I'][0] == 'foo'
    assert df3['J'][0] == UNKNOWN_CATEGORIES

    s = meta_nonempty(df2['A'])
    assert s.dtype == df2['A'].dtype
    assert (df3['A'] == s).all()
コード例 #3
0
ファイル: test_utils_dataframe.py プロジェクト: ankravch/dask
def test_meta_nonempty():
    df1 = pd.DataFrame({'A': pd.Categorical(['Alice', 'Bob', 'Carol']),
                        'B': list('abc'),
                        'C': 'bar',
                        'D': 3.0,
                        'E': pd.Timestamp('2016-01-01'),
                        'F': pd.date_range('2016-01-01', periods=3,
                                           tz='America/New_York'),
                        'G': pd.Timedelta('1 hours'),
                        'H': np.void(b' ')},
                       columns=list('DCBAHGFE'))
    df2 = df1.iloc[0:0]
    df3 = meta_nonempty(df2)
    assert (df3.dtypes == df2.dtypes).all()
    assert df3['A'][0] == 'Alice'
    assert df3['B'][0] == 'foo'
    assert df3['C'][0] == 'foo'
    assert df3['D'][0] == 1.0
    assert df3['E'][0] == pd.Timestamp('1970-01-01 00:00:00')
    assert df3['F'][0] == pd.Timestamp('1970-01-01 00:00:00',
                                       tz='America/New_York')
    assert df3['G'][0] == pd.Timedelta('1 days')
    assert df3['H'][0] == 'foo'

    s = meta_nonempty(df2['A'])
    assert s.dtype == df2['A'].dtype
    assert (df3['A'] == s).all()
コード例 #4
0
def test_meta_nonempty():
    df1 = pd.DataFrame(
        {
            'A': pd.Categorical(['Alice', 'Bob', 'Carol']),
            'B': list('abc'),
            'C': 'bar',
            'D': np.float32(1),
            'E': np.int32(1),
            'F': pd.Timestamp('2016-01-01'),
            'G': pd.date_range('2016-01-01', periods=3, tz='America/New_York'),
            'H': pd.Timedelta('1 hours', 'ms'),
            'I': np.void(b' '),
            'J': pd.Categorical([UNKNOWN_CATEGORIES] * 3)
        },
        columns=list('DCBAHGFEIJ'))
    df2 = df1.iloc[0:0]
    df3 = meta_nonempty(df2)
    assert (df3.dtypes == df2.dtypes).all()
    assert df3['A'][0] == 'Alice'
    assert df3['B'][0] == 'foo'
    assert df3['C'][0] == 'foo'
    assert df3['D'][0] == np.float32(1)
    assert df3['D'][0].dtype == 'f4'
    assert df3['E'][0] == np.int32(1)
    assert df3['E'][0].dtype == 'i4'
    assert df3['F'][0] == pd.Timestamp('1970-01-01 00:00:00')
    assert df3['G'][0] == pd.Timestamp('1970-01-01 00:00:00',
                                       tz='America/New_York')
    assert df3['H'][0] == pd.Timedelta('1', 'ms')
    assert df3['I'][0] == 'foo'
    assert df3['J'][0] == UNKNOWN_CATEGORIES

    s = meta_nonempty(df2['A'])
    assert s.dtype == df2['A'].dtype
    assert (df3['A'] == s).all()
コード例 #5
0
def test_meta_nonempty():
    df1 = pd.DataFrame(
        {
            "A": pd.Categorical(["Alice", "Bob", "Carol"]),
            "B": list("abc"),
            "C": "bar",
            "D": np.float32(1),
            "E": np.int32(1),
            "F": pd.Timestamp("2016-01-01"),
            "G": pd.date_range("2016-01-01", periods=3, tz="America/New_York"),
            "H": pd.Timedelta("1 hours"),
            "I": np.void(b" "),
            "J": pd.Categorical([UNKNOWN_CATEGORIES] * 3),
        },
        columns=list("DCBAHGFEIJ"),
    )
    df2 = df1.iloc[0:0]
    df3 = meta_nonempty(df2)
    assert (df3.dtypes == df2.dtypes).all()
    assert df3["A"][0] == "Alice"
    assert df3["B"][0] == "foo"
    assert df3["C"][0] == "foo"
    assert df3["D"][0] == np.float32(1)
    assert df3["D"][0].dtype == "f4"
    assert df3["E"][0] == np.int32(1)
    assert df3["E"][0].dtype == "i4"
    assert df3["F"][0] == pd.Timestamp("1970-01-01 00:00:00")
    assert df3["G"][0] == pd.Timestamp("1970-01-01 00:00:00", tz="America/New_York")
    assert df3["H"][0] == pd.Timedelta("1")
    assert df3["I"][0] == "foo"
    assert df3["J"][0] == UNKNOWN_CATEGORIES

    s = meta_nonempty(df2["A"])
    assert s.dtype == df2["A"].dtype
    assert (df3["A"] == s).all()
コード例 #6
0
def test_meta_nonempty_scalar():
    meta = meta_nonempty(np.float64(1.0))
    assert isinstance(meta, np.float64)

    x = pd.Timestamp(2000, 1, 1)
    meta = meta_nonempty(x)
    assert meta is x
コード例 #7
0
ファイル: test_utils_dataframe.py プロジェクト: rlugojr/dask
def test_meta_nonempty_index():
    idx = pd.RangeIndex(1, name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.RangeIndex
    assert res.name == idx.name

    idx = pd.Int64Index([1], name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.Int64Index
    assert res.name == idx.name

    idx = pd.Index(['a'], name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.Index
    assert res.name == idx.name

    idx = pd.DatetimeIndex(['1970-01-01'], freq='d',
                           tz='America/New_York', name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.DatetimeIndex
    assert res.tz == idx.tz
    assert res.freq == idx.freq
    assert res.name == idx.name

    idx = pd.PeriodIndex(['1970-01-01'], freq='d', name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.PeriodIndex
    assert res.freq == idx.freq
    assert res.name == idx.name

    idx = pd.TimedeltaIndex([np.timedelta64(1, 'D')], freq='d', name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.TimedeltaIndex
    assert res.freq == idx.freq
    assert res.name == idx.name

    idx = pd.CategoricalIndex(['a'], ['a', 'b'], ordered=True, name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.CategoricalIndex
    assert (res.categories == idx.categories).all()
    assert res.ordered == idx.ordered
    assert res.name == idx.name

    idx = pd.CategoricalIndex([], [UNKNOWN_CATEGORIES], ordered=True, name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.CategoricalIndex
    assert res.ordered == idx.ordered
    assert res.name == idx.name

    levels = [pd.Int64Index([1], name='a'),
              pd.Float64Index([1.0], name='b')]
    idx = pd.MultiIndex(levels=levels, labels=[[0], [0]], names=['a', 'b'])
    res = meta_nonempty(idx)
    assert type(res) is pd.MultiIndex
    for idx1, idx2 in zip(idx.levels, res.levels):
        assert type(idx1) is type(idx2)
        assert idx1.name == idx2.name
    assert res.names == idx.names
コード例 #8
0
def test_meta_nonempty_scalar():
    meta = meta_nonempty(np.float64(1.0))
    assert isinstance(meta, np.float64)

    x = pd.Timestamp(2000, 1, 1)
    meta = meta_nonempty(x)
    assert meta is x

    # DatetimeTZDtype
    x = pd.DatetimeTZDtype(tz="UTC")
    meta = meta_nonempty(x)
    assert meta == pd.Timestamp(1, tz=x.tz, unit=x.unit)
コード例 #9
0
def test_meta_nonempty_index():
    idx = pd.RangeIndex(1, name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.RangeIndex
    assert res.name == idx.name

    idx = pd.Int64Index([1], name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.Int64Index
    assert res.name == idx.name

    idx = pd.Index(['a'], name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.Index
    assert res.name == idx.name

    idx = pd.DatetimeIndex(['1970-01-01'], freq='d',
                           tz='America/New_York', name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.DatetimeIndex
    assert res.tz == idx.tz
    assert res.freq == idx.freq
    assert res.name == idx.name

    idx = pd.PeriodIndex(['1970-01-01'], freq='d', name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.PeriodIndex
    assert res.freq == idx.freq
    assert res.name == idx.name

    idx = pd.TimedeltaIndex([np.timedelta64(1, 'D')], freq='d', name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.TimedeltaIndex
    assert res.freq == idx.freq
    assert res.name == idx.name

    idx = pd.CategoricalIndex(['a'], ['a', 'b'], ordered=True, name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.CategoricalIndex
    assert (res.categories == idx.categories).all()
    assert res.ordered == idx.ordered
    assert res.name == idx.name

    idx = pd.CategoricalIndex([], [UNKNOWN_CATEGORIES], ordered=True, name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.CategoricalIndex
    assert res.ordered == idx.ordered
    assert res.name == idx.name

    levels = [pd.Int64Index([1], name='a'),
              pd.Float64Index([1.0], name='b')]
    idx = pd.MultiIndex(levels=levels, labels=[[0], [0]], names=['a', 'b'])
    res = meta_nonempty(idx)
    assert type(res) is pd.MultiIndex
    for idx1, idx2 in zip(idx.levels, res.levels):
        assert type(idx1) is type(idx2)
        assert idx1.name == idx2.name
    assert res.names == idx.names
コード例 #10
0
ファイル: groupby.py プロジェクト: bschifferer/NVTabular
    def transform(self, columns: ColumnNames,
                  df: DataFrameType) -> DataFrameType:

        # Sort if necessary
        if self.sort_cols:
            df = df.sort_values(self.sort_cols, ignore_index=True)

        # List aggregations do not work with empty data.
        # Use synthetic metadata to predict output columns.
        empty_df = not len(df)
        _df = meta_nonempty(df) if empty_df else df

        # Get "complete" aggregation dicts
        _list_aggs, _conv_aggs = _get_agg_dicts(self.groupby_cols,
                                                self.list_aggs, self.conv_aggs,
                                                columns)

        # Apply aggregations
        new_df = _apply_aggs(_df,
                             self.groupby_cols,
                             _list_aggs,
                             _conv_aggs,
                             name_sep=self.name_sep)

        if empty_df:
            return new_df.iloc[:0]
        return new_df
コード例 #11
0
ファイル: test_utils_dataframe.py プロジェクト: rlugojr/dask
def test_meta_nonempty_empty_categories():
    for dtype in ['O', 'f8', 'M8']:
        # Index
        idx = pd.CategoricalIndex([], pd.Index([], dtype=dtype),
                                  ordered=True, name='foo')
        res = meta_nonempty(idx)
        assert type(res) is pd.CategoricalIndex
        assert type(res.categories) is type(idx.categories)
        assert res.ordered == idx.ordered
        assert res.name == idx.name
        # Series
        s = idx.to_series()
        res = meta_nonempty(s)
        assert res.dtype == s.dtype
        assert type(res.cat.categories) is type(s.cat.categories)
        assert res.cat.ordered == s.cat.ordered
        assert res.name == s.name
コード例 #12
0
 def _maybe_partial_time_string(self, iindexer):
     """
     Convert index-indexer for partial time string slicing
     if obj.index is DatetimeIndex / PeriodIndex
     """
     idx = meta_nonempty(self.obj._meta.index)
     iindexer = _maybe_partial_time_string(idx, iindexer)
     return iindexer
コード例 #13
0
def test_meta_nonempty_empty_categories():
    for dtype in ['O', 'f8', 'M8']:
        # Index
        idx = pd.CategoricalIndex([], pd.Index([], dtype=dtype),
                                  ordered=True, name='foo')
        res = meta_nonempty(idx)
        assert type(res) is pd.CategoricalIndex
        assert type(res.categories) is type(idx.categories)
        assert res.ordered == idx.ordered
        assert res.name == idx.name
        # Series
        s = idx.to_series()
        res = meta_nonempty(s)
        assert res.dtype == s.dtype
        assert type(res.cat.categories) is type(s.cat.categories)
        assert res.cat.ordered == s.cat.ordered
        assert res.name == s.name
コード例 #14
0
def test_meta_duplicated():
    df = pd.DataFrame(columns=['A', 'A', 'B'])
    res = meta_nonempty(df)

    exp = pd.DataFrame([['foo', 'foo', 'foo'], ['foo', 'foo', 'foo']],
                       index=['a', 'b'],
                       columns=['A', 'A', 'B'])
    tm.assert_frame_equal(res, exp)
コード例 #15
0
ファイル: test_utils_dataframe.py プロジェクト: rlugojr/dask
def test_meta_duplicated():
    df = pd.DataFrame(columns=['A', 'A', 'B'])
    res = meta_nonempty(df)

    exp = pd.DataFrame([['foo', 'foo', 'foo'],
                        ['foo', 'foo', 'foo']],
                       index=['a', 'b'],
                       columns=['A', 'A', 'B'])
    tm.assert_frame_equal(res, exp)
コード例 #16
0
def test_meta_nonempty_empty_categories():
    for dtype in ["O", "f8", "M8[ns]"]:
        # Index
        idx = pd.CategoricalIndex(
            [], pd.Index([], dtype=dtype), ordered=True, name="foo"
        )
        res = meta_nonempty(idx)
        assert type(res) is pd.CategoricalIndex
        assert type(res.categories) is type(idx.categories)
        assert res.ordered == idx.ordered
        assert res.name == idx.name
        # Series
        s = idx.to_series()
        res = meta_nonempty(s)
        assert res.dtype == "category"
        assert s.dtype == "category"
        assert type(res.cat.categories) is type(s.cat.categories)
        assert res.cat.ordered == s.cat.ordered
        assert res.name == s.name
コード例 #17
0
def test_meta_duplicated():
    df = pd.DataFrame(columns=["A", "A", "B"])
    res = meta_nonempty(df)

    exp = pd.DataFrame(
        [["foo", "foo", "foo"], ["foo", "foo", "foo"]],
        index=["a", "b"],
        columns=["A", "A", "B"],
    )
    tm.assert_frame_equal(res, exp)
コード例 #18
0
def test_meta_nonempty_uint64index():
    idx = pd.UInt64Index([1], name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.UInt64Index
    assert res.name == idx.name
コード例 #19
0
def test_meta_nonempty_uint64index():
    idx = pd.UInt64Index([1], name='foo')
    res = meta_nonempty(idx)
    assert type(res) is pd.UInt64Index
    assert res.name == idx.name
コード例 #20
0
def test_nonempty_series_nullable_float():
    ser = pd.Series([], dtype="Float64")
    non_empty = meta_nonempty(ser)
    assert non_empty.dtype == "Float64"
コード例 #21
0
def test_nonempty_series_sparse():
    ser = pd.Series(pd.array([0, 1], dtype="Sparse"))
    with warnings.catch_warnings(record=True) as record:
        meta_nonempty(ser)
    assert not record
コード例 #22
0
def test_meta_nonempty_uint64index():
    idx = pd.Index([1], name="foo", dtype="uint64")
    res = meta_nonempty(idx)
    assert type(res) is type(idx)
    assert res.dtype == "uint64"
    assert res.name == idx.name
コード例 #23
0
def test_meta_nonempty_index():
    idx = pd.RangeIndex(1, name="foo")
    res = meta_nonempty(idx)
    assert type(res) is pd.RangeIndex
    assert res.name == idx.name

    idx = pd.Index([1], name="foo", dtype="int")
    res = meta_nonempty(idx)
    assert type(res) is type(idx)
    assert res.dtype == "int64"
    assert res.name == idx.name

    idx = pd.Index(["a"], name="foo")
    res = meta_nonempty(idx)
    assert type(res) is pd.Index
    assert res.name == idx.name

    idx = pd.DatetimeIndex(["1970-01-01"],
                           freq="d",
                           tz="America/New_York",
                           name="foo")
    res = meta_nonempty(idx)
    assert type(res) is pd.DatetimeIndex
    assert res.tz == idx.tz
    assert res.freq == idx.freq
    assert res.name == idx.name

    idx = pd.PeriodIndex(["1970-01-01"], freq="d", name="foo")
    res = meta_nonempty(idx)
    assert type(res) is pd.PeriodIndex
    assert res.freq == idx.freq
    assert res.name == idx.name

    idx = pd.TimedeltaIndex([np.timedelta64(1, "D")], freq="d", name="foo")
    res = meta_nonempty(idx)
    assert type(res) is pd.TimedeltaIndex
    assert res.freq == idx.freq
    assert res.name == idx.name

    idx = pd.CategoricalIndex(["xyx"], ["xyx", "zzz"],
                              ordered=True,
                              name="foo")
    res = meta_nonempty(idx)
    assert type(res) is pd.CategoricalIndex
    assert (res.categories == idx.categories).all()
    assert res.ordered == idx.ordered
    assert res.name == idx.name

    idx = pd.CategoricalIndex([], [UNKNOWN_CATEGORIES],
                              ordered=True,
                              name="foo")
    res = meta_nonempty(idx)
    assert type(res) is pd.CategoricalIndex
    assert res.ordered == idx.ordered
    assert res.name == idx.name

    levels = [pd.Index([1], name="a"), pd.Index([1.0], name="b")]
    codes = [[0], [0]]
    idx = pd.MultiIndex(levels=levels, names=["a", "b"], codes=codes)
    res = meta_nonempty(idx)
    assert type(res) is pd.MultiIndex
    for idx1, idx2 in zip(idx.levels, res.levels):
        assert type(idx1) is type(idx2)
        assert idx1.name == idx2.name
    assert res.names == idx.names

    levels = [
        pd.Index([1], name="a"),
        pd.CategoricalIndex(data=["xyx"], categories=["xyx"], name="b"),
        pd.TimedeltaIndex([np.timedelta64(1, "D")], name="timedelta"),
    ]

    codes = [[0], [0], [0]]

    idx = pd.MultiIndex(levels=levels,
                        names=["a", "b", "timedelta"],
                        codes=codes)
    res = meta_nonempty(idx)
    assert type(res) is pd.MultiIndex
    for idx1, idx2 in zip(idx.levels, res.levels):
        assert type(idx1) is type(idx2)
        assert idx1.name == idx2.name
    assert res.names == idx.names
コード例 #24
0
def meta_nonempty_dataframe(df, index=None):
    return GeoDataFrame(meta_nonempty(pd.DataFrame(df.head(0))))
コード例 #25
0
def test_nonempty_series_sparse():
    ser = pd.Series(pd.array([0, 1], dtype="Sparse"))
    with pytest.warns(None) as w:
        meta_nonempty(ser)

    assert len(w) == 0