예제 #1
0
파일: multiindex.py 프로젝트: jimmytuc/cudf
 def _get_row_major(self, df, row_tuple):
     valid_indices = self._compute_validity_mask(df, row_tuple)
     from cudf import Series
     result = df.take(Series(valid_indices))
     # Build new index - INDEX based MultiIndex
     # ---------------
     from cudf import DataFrame
     out_index = DataFrame()
     # Select the last n-k columns where n is the number of source
     # levels and k is the length of the indexing tuple
     for k in range(len(row_tuple), len(df.index.levels)):
         out_index.add_column(df.index.names[k],
                              df.index.codes[df.index.codes.columns[k]])
     # If there's only one column remaining in the output index, convert
     # it into a StringIndex and name the final index values according
     # to the proper codes.
     if len(out_index.columns) == 1:
         out_index = []
         for val in result.index.codes[result.index.codes.columns[len(result.index.codes.columns)-1]]:  # noqa: E501
             out_index.append(result.index.levels[
                     len(result.index.codes.columns)-1][val])
         # TODO: Warning! The final index column could be arbitrarily
         # ordered integers, not Strings, so we need to check for that
         # dtype and produce a GenericIndex instead of a StringIndex
         out_index = StringIndex(out_index)
         out_index.name = result.index.names[len(result.index.names)-1]
         result.index = out_index
     else:
         # Otherwise pop the leftmost levels, names, and codes from the
         # source index until it has the correct number of columns (n-k)
         if(len(out_index.columns)) > 0:
             result.reset_index(drop=True)
             result.index = result.index._popn(len(row_tuple))
     return result
예제 #2
0
def test_string_index(testlist):

    index = StringIndex(testlist)
    index_pd = pd.Index(testlist)

    assert index.is_unique == index_pd.is_unique
    assert index.is_monotonic == index_pd.is_monotonic
    assert index.is_monotonic_increasing == index_pd.is_monotonic_increasing
    assert index.is_monotonic_decreasing == index_pd.is_monotonic_decreasing
예제 #3
0
def test_string_index():
    pdf = pd.DataFrame(np.random.rand(5, 5))
    gdf = DataFrame.from_pandas(pdf)
    stringIndex = ["a", "b", "c", "d", "e"]
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
    stringIndex = np.array(["a", "b", "c", "d", "e"])
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
    stringIndex = StringIndex(["a", "b", "c", "d", "e"], name="name")
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
    stringIndex = StringColumn(["a", "b", "c", "d", "e"], name="name")
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
예제 #4
0
def test_string_index():
    pdf = pd.DataFrame(np.random.rand(5, 5))
    gdf = DataFrame.from_pandas(pdf)
    stringIndex = ['a', 'b', 'c', 'd', 'e']
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
    stringIndex = np.array(['a', 'b', 'c', 'd', 'e'])
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
    stringIndex = StringIndex(['a', 'b', 'c', 'd', 'e'], name='name')
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
    stringIndex = StringColumn(['a', 'b', 'c', 'd', 'e'], name='name')
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
예제 #5
0
파일: multiindex.py 프로젝트: jimmytuc/cudf
 def _get_column_major(self, df, row_tuple):
     valid_indices = self._compute_validity_mask(df, row_tuple)
     from cudf import DataFrame
     result = DataFrame()
     for ix, col in enumerate(df.columns):
         if ix in valid_indices:
             result[ix] = list(df._cols.values())[ix]
     # Build new index - COLUMN based MultiIndex
     # ---------------
     if len(row_tuple) < len(self.levels):
         columns = self._popn(len(row_tuple))
         result.columns = columns.take(valid_indices)
     else:
         result.columns = self.take(valid_indices)
     if len(result.columns.levels) == 1:
         columns = []
         for code in result.columns.codes[result.columns.codes.columns[0]]:
             columns.append(result.columns.levels[0][code])
         name = result.columns.names[0]
         result.columns = StringIndex(columns, name=name)
     return result