Esempio n. 1
0
def test_rangeindex_get_slice_bound(bounds, indices, side, kind):
    start, stop = bounds
    pd_index = pd.RangeIndex(start, stop)
    cudf_index = RangeIndex(start, stop)
    for idx in indices:
        expect = pd_index.get_slice_bound(idx, side, kind)
        got = cudf_index.get_slice_bound(idx, side, kind)
        assert expect == got
Esempio n. 2
0
def test_index_comparision():
    start, stop = 10, 34
    rg = RangeIndex(start, stop)
    gi = GenericIndex(np.arange(start, stop))
    assert rg.equals(gi)
    assert gi.equals(rg)
    assert not rg[:-1].equals(gi)
    assert rg[:-1].equals(gi[:-1])
Esempio n. 3
0
def test_rangeindex_get_slice_bound_step(bounds, label, side, kind):
    start, stop, step = bounds
    pd_index = pd.RangeIndex(start, stop, step)
    cudf_index = RangeIndex(start, stop, step)

    expect = pd_index.get_slice_bound(label, side, kind)
    got = cudf_index.get_slice_bound(label, side, kind)
    assert expect == got
Esempio n. 4
0
def test_index_immutable():
    start, stop = 10, 34
    rg = RangeIndex(start, stop)
    with pytest.raises(TypeError):
        rg[1] = 5
    gi = GenericIndex(np.arange(start, stop))
    with pytest.raises(TypeError):
        gi[1] = 5
Esempio n. 5
0
 def create_df(f, m, n):
     X = np.random.uniform(-1, 1, (m, n))
     ret = cudf.DataFrame([(i,
                            X[:, i].astype(np.float32))
                           for i in range(n)],
                          index=RangeIndex(f * m,
                                           f * m + m, 1))
     return ret
Esempio n. 6
0
def test_index_find_label_range_rangeindex():
    """Cudf specific
    """
    # step > 0
    # 3, 8, 13, 18
    ridx = RangeIndex(3, 20, 5)
    assert ridx.find_label_range(3, 8) == (0, 2)
    assert ridx.find_label_range(0, 7) == (0, 1)
    assert ridx.find_label_range(3, 19) == (0, 4)
    assert ridx.find_label_range(2, 21) == (0, 4)

    # step < 0
    # 20, 15, 10, 5
    ridx = RangeIndex(20, 3, -5)
    assert ridx.find_label_range(15, 10) == (1, 3)
    assert ridx.find_label_range(10, 0) == (2, 4)
    assert ridx.find_label_range(30, 13) == (0, 2)
    assert ridx.find_label_range(30, 0) == (0, 4)
Esempio n. 7
0
    def _getitem_tuple_arg(self, arg):
        from cudf import MultiIndex
        from cudf.core.column import column
        from cudf.core.index import as_index

        # Iloc Step 1:
        # Gather the columns specified by the second tuple arg
        columns_df = self._get_column_selection(arg[1])
        columns_df._index = self._df._index

        # Iloc Step 2:
        # Gather the rows specified by the first tuple arg
        if isinstance(columns_df.index, MultiIndex):
            if isinstance(arg[0], slice):
                df = columns_df[arg[0]]
            else:
                df = columns_df.index._get_row_major(columns_df, arg[0])
            if (len(df) == 1 and len(columns_df) >= 1) and not (
                isinstance(arg[0], slice) or isinstance(arg[1], slice)
            ):
                # Pandas returns a numpy scalar in this case
                return df.iloc[0]
            if self._can_downcast_to_series(df, arg):
                return self._downcast_to_series(df, arg)
            return df
        else:
            if isinstance(arg[0], slice):
                df = columns_df._slice(arg[0])
            elif is_scalar(arg[0]):
                index = arg[0]
                if index < 0:
                    index += len(columns_df)
                df = columns_df._slice(slice(index, index + 1, 1))
            else:
                arg = (column.as_column(arg[0]), arg[1])
                if pd.api.types.is_bool_dtype(arg[0]):
                    df = columns_df._apply_boolean_mask(arg[0])
                else:
                    df = columns_df._gather(arg[0])

        # Iloc Step 3:
        # Reindex
        if df.shape[0] == 1:  # we have a single row without an index
            df.index = as_index(self._df.index[arg[0]])

        # Iloc Step 4:
        # Downcast
        if self._can_downcast_to_series(df, arg):
            return self._downcast_to_series(df, arg)

        if df.shape[0] == 0 and df.shape[1] == 0 and isinstance(arg[0], slice):
            from cudf.core.index import RangeIndex

            slice_len = len(self._df)
            start, stop, step = arg[0].indices(slice_len)
            df._index = RangeIndex(start, stop)
        return df
Esempio n. 8
0
def _linear_interpolation(column, index=None):
    """
    Interpolate over a float column. Implicitly assumes that values are
    evenly spaced with respect to the x-axis, for example the data
    [1.0, NaN, 3.0] will be interpolated assuming the NaN is half way
    between the two valid values, yielding [1.0, 2.0, 3.0]
    """

    index = RangeIndex(start=0, stop=len(column), step=1)
    return _index_or_values_interpolation(column, index=index)
Esempio n. 9
0
def test_range_index(testrange):

    index = RangeIndex(start=testrange[0], stop=testrange[1])
    index_pd = pd.RangeIndex(start=testrange[0],
                             stop=testrange[1],
                             step=testrange[2])

    assert index.is_unique == index_pd.is_unique
    assert index.is_monotonic == index_pd.is_monotonic
    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
Esempio n. 10
0
def test_index_rangeindex_search_range():
    # step > 0
    ridx = RangeIndex(-13, 17, 4)
    stop = ridx._start + ridx._step * len(ridx)
    for i in range(len(ridx)):
        assert i == search_range(
            ridx._start, stop, ridx[i], ridx._step, side="left"
        )
        assert i + 1 == search_range(
            ridx._start, stop, ridx[i], ridx._step, side="right"
        )
Esempio n. 11
0
    def _getitem_tuple_arg(self, arg):
        from cudf import MultiIndex
        from cudf.core.dataframe import DataFrame, Series
        from cudf.core.index import as_index

        # Iloc Step 1:
        # Gather the columns specified by the second tuple arg
        columns_df = self._get_column_selection(arg[1])
        columns_df._index = self._df._index

        # Iloc Step 2:
        # Gather the rows specified by the first tuple arg
        if isinstance(columns_df.index, MultiIndex):
            if isinstance(arg[0], slice):
                df = columns_df[arg[0]]
            else:
                df = columns_df.index._get_row_major(columns_df, arg[0])
            if (len(df) == 1 and len(columns_df) >= 1) and not (isinstance(
                    arg[0], slice) or isinstance(arg[1], slice)):
                # Pandas returns a numpy scalar in this case
                return df[0]
            if self._can_downcast_to_series(df, arg):
                return self._downcast_to_series(df, arg)
            return df
        else:
            df = DataFrame()
            for i, col in enumerate(columns_df._columns):
                # need Series() in case a scalar is returned
                df[i] = Series(col[arg[0]])

            df.index = as_index(columns_df.index[arg[0]])
            df.columns = columns_df.columns

        # Iloc Step 3:
        # Reindex
        if df.shape[0] == 1:  # we have a single row without an index
            df.index = as_index(self._df.index[arg[0]])

        # Iloc Step 4:
        # Downcast
        if self._can_downcast_to_series(df, arg):
            return self._downcast_to_series(df, arg)

        if df.shape[0] == 0 and df.shape[1] == 0 and isinstance(arg[0], slice):
            from cudf.core.index import RangeIndex

            slice_len = len(self._df)
            start, stop, step = arg[0].indices(slice_len)
            df._index = RangeIndex(start, stop)
        return df
Esempio n. 12
0
def test_rangeindex_slice_attr_name():
    start, stop = 0, 10
    rg = RangeIndex(start, stop, "myindex")
    sliced_rg = rg[0:9]
    assert_eq(rg.name, sliced_rg.name)
Esempio n. 13
0
    def _getitem_tuple_arg(self, arg):
        from cudf import MultiIndex
        from cudf.core.dataframe import DataFrame, Series
        from cudf.core.column import column_empty
        from cudf.core.index import as_index

        # Iloc Step 1:
        # Gather the columns specified by the second tuple arg
        columns = self._get_column_selection(arg[1])
        if isinstance(self._df.columns, MultiIndex):
            columns_df = self._df.columns._get_column_major(self._df, arg[1])
            if (len(columns_df) == 0 and len(columns_df.columns) == 0
                    and not isinstance(arg[0], slice)):
                result = Series(column_empty(0, dtype="float64"), name=arg[0])
                result._index = columns_df.columns.copy(deep=False)
                return result
        else:
            if isinstance(arg[0], slice):
                columns_df = DataFrame()
                for i, col in enumerate(columns):
                    columns_df.insert(i, col, self._df[col])
                columns_df._index = self._df._index
            else:
                columns_df = self._df._columns_view(columns)

        # Iloc Step 2:
        # Gather the rows specified by the first tuple arg
        if isinstance(columns_df.index, MultiIndex):
            df = columns_df.index._get_row_major(columns_df, arg[0])
            if (len(df) == 1 and len(columns_df) >= 1) and not (isinstance(
                    arg[0], slice) or isinstance(arg[1], slice)):
                # Pandas returns a numpy scalar in this case
                return df[0]
            if self._can_downcast_to_series(df, arg):
                return self._downcast_to_series(df, arg)
            return df
        else:
            df = DataFrame()
            for i, col in enumerate(columns_df._columns):
                # need Series() in case a scalar is returned
                df[i] = Series(col[arg[0]])

            df.index = as_index(columns_df.index[arg[0]])
            df.columns = columns_df.columns

        # Iloc Step 3:
        # Reindex
        if df.shape[0] == 1:  # we have a single row without an index
            if isinstance(arg[0], slice):
                start = arg[0].start
                if start is None:
                    start = 0
                df.index = as_index(self._df.index[start])
            else:
                df.index = as_index(self._df.index[arg[0]])

        # Iloc Step 4:
        # Downcast
        if self._can_downcast_to_series(df, arg):
            if isinstance(df.columns, MultiIndex):
                if len(df) > 0 and not (isinstance(arg[0], slice)
                                        or isinstance(arg[1], slice)):
                    return list(df._data.values())[0][0]
                elif df.shape[1] > 1:
                    result = self._downcast_to_series(df, arg)
                    result.index = df.columns
                    return result
                elif not isinstance(arg[0], slice):
                    if len(df._data) == 0:
                        return Series(
                            column_empty(0, dtype="float64"),
                            index=df.columns,
                            name=arg[0],
                        )
                    else:
                        result_series = df[df.columns[0]]
                        result_series.index = df.columns
                        result_series.name = arg[0]
                        return result_series
                else:
                    return df[df.columns[0]]
            return self._downcast_to_series(df, arg)
        if df.shape[0] == 0 and df.shape[1] == 0:
            from cudf.core.index import RangeIndex

            slice_len = arg[0].stop or len(self._df)
            start, stop, step = arg[0].indices(slice_len)
            df._index = RangeIndex(start, stop)
        return df
Esempio n. 14
0
def test_rangeindex_contains():
    assert_eq(True, 9 in RangeIndex(start=0, stop=10, name="Index"))
    assert_eq(False, 10 in RangeIndex(start=0, stop=10, name="Index"))