Beispiel #1
0
def test_get_slice_bound_missing(label, side, kind):
    mylist = [2, 4, 6, 8, 10]
    index = GenericIndex(mylist)
    index_pd = pd.Index(mylist)
    assert index.get_slice_bound(
        label, side, kind
    ) == index_pd.get_slice_bound(label, side, kind)
Beispiel #2
0
def test_get_slice_bound(testlist, side, kind):
    index = GenericIndex(testlist)
    index_pd = pd.Index(testlist)
    for label in testlist:
        assert index.get_slice_bound(
            label, side, kind
        ) == index_pd.get_slice_bound(label, side, kind)
Beispiel #3
0
def test_index_comparision():
    start, stop = 10, 34
    rg = RangeIndex(start, stop)
    gi = GenericIndex(np.arange(start, stop))
    assert rg.equals(gi)
    assert gi.equals(rg)
    assert not rg[:-1].equals(gi)
    assert rg[:-1].equals(gi[:-1])
Beispiel #4
0
def test_get_slice_bound_missing_str(label, side):
    # Slicing for monotonic string indices not yet supported
    # when missing values are specified (allowed in pandas)
    mylist = ["b", "d", "f"]
    index = GenericIndex(mylist)
    index_pd = pd.Index(mylist)
    assert index.get_slice_bound(
        label, side, "getitem"
    ) == index_pd.get_slice_bound(label, side, "getitem")
Beispiel #5
0
def test_index_immutable():
    start, stop = 10, 34
    rg = RangeIndex(start, stop)
    with pytest.raises(TypeError):
        rg[1] = 5
    gi = GenericIndex(np.arange(start, stop))
    with pytest.raises(TypeError):
        gi[1] = 5
Beispiel #6
0
def test_index_comparision():
    start, stop = 10, 34
    rg = RangeIndex(start, stop)
    gi = GenericIndex(np.arange(start, stop))
    assert rg == gi
    assert gi == rg
    assert rg[:-1] != gi
    assert rg[:-1] == gi[:-1]
Beispiel #7
0
def test_generic_index(testlist):

    index = GenericIndex(testlist)
    index_pd = pd.Index(testlist)

    assert index.is_unique == index_pd.is_unique
    assert index.is_monotonic == index_pd.is_monotonic
    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
Beispiel #8
0
def test_index_find_label_range():
    idx = GenericIndex(np.asarray([4, 5, 6, 10]))
    assert idx.find_label_range(4, 6) == (0, 3)
    assert idx.find_label_range(5, 10) == (1, 4)
    # Last value not found
    with pytest.raises(ValueError) as raises:
        idx.find_label_range(0, 6)
    raises.match("value not found")
    # Last value not found
    with pytest.raises(ValueError) as raises:
        idx.find_label_range(4, 11)
    raises.match("value not found")
Beispiel #9
0
 def apply_multiindex_or_single_index(self, result):
     if len(result) == 0:
         final_result = DataFrame()
         for col in result.columns:
             if col not in self._by:
                 final_result[col] = result[col]
         if len(self._by) == 1 or len(final_result.columns) == 0:
             dtype = 'float64' if len(self._by) == 1 else 'object'
             name = self._by[0] if len(self._by) == 1 else None
             from cudf.dataframe.index import GenericIndex
             index = GenericIndex(Series([], dtype=dtype))
             index.name = name
             final_result.index = index
         else:
             mi = MultiIndex(source_data=result[self._by])
             mi.names = self._by
             final_result.index = mi
         if len(final_result.columns) == 1 and hasattr(self, "_gotattr"):
             final_series = Series([], name=final_result.columns[0])
             final_series.index = final_result.index
             return final_series
         return final_result
     if len(self._by) == 1:
         from cudf.dataframe import index
         idx = index.as_index(result[self._by[0]])
         idx.name = self._by[0]
         result = result.drop(idx.name)
         if idx.name == self._LEVEL_0_INDEX_NAME:
             idx.name = self._original_index_name
         result = result.set_index(idx)
         return result
     else:
         multi_index = MultiIndex(source_data=result[self._by])
         final_result = DataFrame()
         for col in result.columns:
             if col not in self._by:
                 final_result[col] = result[col]
         if len(final_result.columns) == 1 and hasattr(self, "_gotattr"):
             final_series = Series(final_result[final_result.columns[0]])
             final_series.name = final_result.columns[0]
             final_series.index = multi_index
             return final_series
         return final_result.set_index(multi_index)
Beispiel #10
0
def test_name():
    idx = GenericIndex(np.asarray([4, 5, 6, 10]), name='foo')
    assert idx.name == 'foo'
Beispiel #11
0
def test_reductions(func):
    x = np.asarray([4, 5, 6, 10])
    idx = GenericIndex(np.asarray([4, 5, 6, 10]))

    assert func(x) == func(idx)
Beispiel #12
0
 def apply_multiindex_or_single_index(self, result):
     if len(result) == 0:
         final_result = DataFrame()
         for col in result.columns:
             if col not in self._by:
                 final_result[col] = result[col]
         if len(self._by) == 1 or len(final_result.columns) == 0:
             if len(self._by) == 1:
                 dtype = self._df[self._by[0]]
             else:
                 dtype = 'object'
             name = self._by[0] if len(self._by) == 1 else None
             from cudf.dataframe.index import GenericIndex
             index = GenericIndex(Series([], dtype=dtype))
             index.name = name
             final_result.index = index
         else:
             mi = MultiIndex(source_data=result[self._by])
             mi.names = self._by
             final_result.index = mi
         return final_result
     if len(self._by) == 1:
         from cudf.dataframe import index
         idx = index.as_index(result[self._by[0]])
         name = self._by[0]
         if isinstance(name, str):
             name = self._by[0].split('+')
             if name[0] == 'cudfvalcol':
                 idx.name = name[1]
             else:
                 idx.name = name[0]
             result = result.drop(self._by[0])
         for col in result.columns:
             if isinstance(col, str):
                 colnames = col.split('+')
                 if colnames[0] == 'cudfvalcol':
                     result[colnames[1]] = result[col]
                     result = result.drop(col)
         if idx.name == _LEVEL_0_INDEX_NAME:
             idx.name = self._original_index_name
         result = result.set_index(idx)
         return result
     else:
         for col in result.columns:
             if isinstance(col, str):
                 colnames = col.split('+')
                 if colnames[0] == 'cudfvalcol':
                     result[colnames[1]] = result[col]
                     result = result.drop(col)
         new_by = []
         for by in self._by:
             if isinstance(col, str):
                 splitby = by.split('+')
                 if splitby[0] == 'cudfvalcol':
                     new_by.append(splitby[1])
                 else:
                     new_by.append(splitby[0])
             else:
                 new_by.append(by)
         self._by = new_by
         multi_index = MultiIndex(source_data=result[self._by])
         final_result = DataFrame()
         for col in result.columns:
             if col not in self._by:
                 final_result[col] = result[col]
         if len(final_result.columns) > 0:
             return final_result.set_index(multi_index)
         else:
             return result.set_index(multi_index)
Beispiel #13
0
 def apply_multiindex_or_single_index(self, result):
     if len(result) == 0:
         final_result = DataFrame()
         for col in result.columns:
             if col not in self._by:
                 final_result[col] = result[col]
         if len(self._by) == 1 or len(final_result.columns) == 0:
             dtype = 'float64' if len(self._by) == 1 else 'object'
             name = self._by[0] if len(self._by) == 1 else None
             from cudf.dataframe.index import GenericIndex
             index = GenericIndex(Series([], dtype=dtype))
             index.name = name
             final_result.index = index
         else:
             levels = []
             codes = []
             names = []
             for by in self._by:
                 levels.append([])
                 codes.append([])
                 names.append(by)
             mi = MultiIndex(levels, codes)
             mi.names = names
             final_result.index = mi
         if len(final_result.columns) == 1 and hasattr(self, "_gotattr"):
             final_series = Series([], name=final_result.columns[0])
             final_series.index = final_result.index
             return final_series
         return final_result
     if len(self._by) == 1:
         from cudf.dataframe import index
         idx = index.as_index(result[self._by[0]])
         idx.name = self._by[0]
         result = result.drop(idx.name)
         if idx.name == self._LEVEL_0_INDEX_NAME:
             idx.name = self._original_index_name
         result = result.set_index(idx)
         return result
     else:
         levels = []
         codes = DataFrame()
         names = []
         # Note: This is an O(N^2) solution using gpu masking
         # to compute new codes for the MultiIndex. There may be
         # a faster solution that could be executed on gpu at the same
         # time the groupby is calculated.
         for by in self._by:
             level = result[by].unique()
             replaced = result[by].replace(level, range(len(level)))
             levels.append(level)
             codes[by] = Series(replaced, dtype="int32")
             names.append(by)
         multi_index = MultiIndex(levels=levels, codes=codes, names=names)
         final_result = DataFrame()
         for col in result.columns:
             if col not in self._by:
                 final_result[col] = result[col]
         if len(final_result.columns) == 1 and hasattr(self, "_gotattr"):
             final_series = Series(final_result[final_result.columns[0]])
             final_series.name = final_result.columns[0]
             final_series.index = multi_index
             return final_series
         return final_result.set_index(multi_index)