def _get_row_major(self, df, row_tuple): valid_indices = self._compute_validity_mask(df, row_tuple) from cudf import Series result = df.take(Series(valid_indices)) # Build new index - INDEX based MultiIndex # --------------- from cudf import DataFrame out_index = DataFrame() # Select the last n-k columns where n is the number of source # levels and k is the length of the indexing tuple for k in range(len(row_tuple), len(df.index.levels)): out_index.add_column(df.index.names[k], df.index.codes[df.index.codes.columns[k]]) # If there's only one column remaining in the output index, convert # it into a StringIndex and name the final index values according # to the proper codes. if len(out_index.columns) == 1: out_index = [] for val in result.index.codes[result.index.codes.columns[len(result.index.codes.columns)-1]]: # noqa: E501 out_index.append(result.index.levels[ len(result.index.codes.columns)-1][val]) # TODO: Warning! The final index column could be arbitrarily # ordered integers, not Strings, so we need to check for that # dtype and produce a GenericIndex instead of a StringIndex out_index = StringIndex(out_index) out_index.name = result.index.names[len(result.index.names)-1] result.index = out_index else: # Otherwise pop the leftmost levels, names, and codes from the # source index until it has the correct number of columns (n-k) if(len(out_index.columns)) > 0: result.reset_index(drop=True) result.index = result.index._popn(len(row_tuple)) return result
def test_string_index(testlist): index = StringIndex(testlist) index_pd = pd.Index(testlist) assert index.is_unique == index_pd.is_unique assert index.is_monotonic == index_pd.is_monotonic assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
def test_string_index(): pdf = pd.DataFrame(np.random.rand(5, 5)) gdf = DataFrame.from_pandas(pdf) stringIndex = ["a", "b", "c", "d", "e"] pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = np.array(["a", "b", "c", "d", "e"]) pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = StringIndex(["a", "b", "c", "d", "e"], name="name") pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = StringColumn(["a", "b", "c", "d", "e"], name="name") pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf)
def test_string_index(): pdf = pd.DataFrame(np.random.rand(5, 5)) gdf = DataFrame.from_pandas(pdf) stringIndex = ['a', 'b', 'c', 'd', 'e'] pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = np.array(['a', 'b', 'c', 'd', 'e']) pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = StringIndex(['a', 'b', 'c', 'd', 'e'], name='name') pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = StringColumn(['a', 'b', 'c', 'd', 'e'], name='name') pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf)
def _get_column_major(self, df, row_tuple): valid_indices = self._compute_validity_mask(df, row_tuple) from cudf import DataFrame result = DataFrame() for ix, col in enumerate(df.columns): if ix in valid_indices: result[ix] = list(df._cols.values())[ix] # Build new index - COLUMN based MultiIndex # --------------- if len(row_tuple) < len(self.levels): columns = self._popn(len(row_tuple)) result.columns = columns.take(valid_indices) else: result.columns = self.take(valid_indices) if len(result.columns.levels) == 1: columns = [] for code in result.columns.codes[result.columns.codes.columns[0]]: columns.append(result.columns.levels[0][code]) name = result.columns.names[0] result.columns = StringIndex(columns, name=name) return result