Ejemplo n.º 1
0
def test_multiindex_sort_values(pmidx, ascending, return_indexer):
    pmidx = pmidx
    midx = cudf.from_pandas(pmidx)

    expected = pmidx.sort_values(
        ascending=ascending, return_indexer=return_indexer
    )
    actual = midx.sort_values(
        ascending=ascending, return_indexer=return_indexer
    )

    if return_indexer:
        expected_indexer = expected[1]
        actual_indexer = actual[1]

        assert_eq(expected_indexer, actual_indexer)

        expected = expected[0]
        actual = actual[0]

    assert_eq(expected, actual)
Ejemplo n.º 2
0
def test_series_setitem_categorical():
    psr = pd.Series(["a", "b", "a", "c", "d"], dtype="category")
    gsr = cudf.from_pandas(psr)

    psr[0] = "d"
    gsr[0] = "d"
    assert_eq(psr, gsr)

    psr = psr.cat.add_categories(["e"])
    gsr = gsr.cat.add_categories(["e"])
    psr[0] = "e"
    gsr[0] = "e"
    assert_eq(psr, gsr)

    psr[[0, 1]] = "b"
    gsr[[0, 1]] = "b"
    assert_eq(psr, gsr)

    psr[0:3] = "e"
    gsr[0:3] = "e"
    assert_eq(psr, gsr)
Ejemplo n.º 3
0
def test_concat_join_one_df(ignore_index, sort, join, axis):
    pdf1 = pd.DataFrame({
        "x": range(10),
        "y": list(map(float, range(10))),
        "z": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    })

    gdf1 = gd.from_pandas(pdf1)

    assert_eq(
        pd.concat([pdf1],
                  sort=sort,
                  join=join,
                  ignore_index=ignore_index,
                  axis=axis),
        gd.concat([gdf1],
                  sort=sort,
                  join=join,
                  ignore_index=ignore_index,
                  axis=axis),
    )
Ejemplo n.º 4
0
def test_serialize(df):
    """ This should hopefully replace all functions below """
    a = df()
    if "cudf" not in type(a).__module__:
        a = cudf.from_pandas(a)
    header, frames = a.serialize()
    msgpack.dumps(header)  # ensure that header is msgpack serializable
    ndevice = 0
    for frame in frames:
        if not isinstance(frame, (bytes, memoryview)):
            ndevice += 1
    # Indices etc. will not be DeviceNDArray
    # but data should be...
    if hasattr(df, "_cols"):
        assert ndevice >= len(df._data)
    else:
        assert ndevice > 0

    typ = type(a)
    b = typ.deserialize(header, frames)
    assert_eq(a, b)
Ejemplo n.º 5
0
def get_mean_reciprocal_rank(sub):
    # sub is a pandas dataframe
    # sub should have the following columns:
    # 'row_id', 'prob', 'reference', 'item_id'
    # sorted by prob in descending order for each group
    sub = gd.from_pandas(sub)

    def get_order_in_group(prob, row_id, order):
        for i in range(cuda.threadIdx.x, len(prob), cuda.blockDim.x):
            order[i] = i

    dg = sub.groupby('row_id',
                     method="cudf").apply_grouped(get_order_in_group,
                                                  incols=['prob', 'row_id'],
                                                  outcols={'order': np.int32},
                                                  tpb=32)

    dg = dg.to_pandas()
    dg['order'] = 1.0 / (1 + dg['order'])
    dg = dg[dg['reference'] == dg['item_id']]
    return dg['order'].mean()
Ejemplo n.º 6
0
def overlap_coefficient(G, ebunch=None):
    """
    NetworkX similar API.  See 'jaccard' for a description

    """
    vertex_pair = None

    G, isNx = check_nx_graph(G)

    if isNx is True and ebunch is not None:
        vertex_pair = cudf.from_pandas(pd.DataFrame(ebunch))

    df = overlap(G, vertex_pair)

    if isNx is True:
        df = df_edge_score_to_dictionary(df,
                                         k="overlap_coeff",
                                         src="source",
                                         dst="destination")

    return df
Ejemplo n.º 7
0
def test_to_numeric_downcast_string_large_float(data, downcast):
    ps = pd.Series(data)
    gs = cudf.from_pandas(ps)

    if downcast == "float":
        expected = pd.to_numeric(ps, downcast=downcast)
        got = cudf.to_numeric(gs, downcast=downcast)

        # Pandas bug: https://github.com/pandas-dev/pandas/issues/19729
        with pytest.raises(AssertionError, match="Series are different"):
            assert_eq(expected, got)
    else:
        expected = pd.Series([np.inf, -np.inf])
        with pytest.warns(
                UserWarning,
                match="Downcasting from float to int "
                "will be limited by float32 precision.",
        ):
            got = cudf.to_numeric(gs, downcast=downcast)

        assert_eq(expected, got)
Ejemplo n.º 8
0
def test_string_index():
    from cudf.dataframe.index import StringIndex, StringColumn

    pdf = pd.DataFrame(np.random.rand(5, 5))
    gdf = cudf.from_pandas(pdf)
    stringIndex = ["a", "b", "c", "d", "e"]
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
    stringIndex = np.array(["a", "b", "c", "d", "e"])
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
    stringIndex = StringIndex(["a", "b", "c", "d", "e"], name="name")
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
    stringIndex = StringColumn(["a", "b", "c", "d", "e"], name="name")
    pdf.index = stringIndex
    gdf.index = stringIndex
    assert_eq(pdf, gdf)
Ejemplo n.º 9
0
def test_parquet_write_partitioned(tmpdir_factory, cols):
    # Checks that write_to_dataset is wrapping to_parquet
    # as expected
    gdf_dir = str(tmpdir_factory.mktemp("gdf_dir"))
    pdf_dir = str(tmpdir_factory.mktemp("pdf_dir"))
    size = 100
    pdf = pd.DataFrame(
        {
            "a": np.arange(0, stop=size, dtype="int64"),
            "b": np.random.choice(list("abcd"), size=size),
            "c": np.random.choice(np.arange(4), size=size),
        }
    )
    pdf.to_parquet(pdf_dir, index=False, partition_cols=cols)
    gdf = cudf.from_pandas(pdf)
    gdf.to_parquet(gdf_dir, index=False, partition_cols=cols)

    # Use pandas since dataset may be partitioned
    expect = pd.read_parquet(pdf_dir)
    got = pd.read_parquet(gdf_dir)
    assert_eq(expect, got)
Ejemplo n.º 10
0
def test_csv_writer_chunksize(chunksize, tmpdir):
    pdf_df_fname = tmpdir.join("pdf_df_4.csv")
    gdf_df_fname = tmpdir.join("gdf_df_4.csv")

    pdf = make_numpy_mixed_dataframe()
    pdf["Date"] = pdf["Date"].astype("datetime64")
    # Increase the df len as chunked logic only gets applied from chunksize >=8
    pdf = pd.concat([pdf] * 5)
    gdf = cudf.from_pandas(pdf)

    pdf.to_csv(
        pdf_df_fname, date_format="%Y-%m-%dT%H:%M:%SZ", chunksize=chunksize,
    )
    gdf.to_csv(gdf_df_fname, chunksize=chunksize)

    assert os.path.exists(pdf_df_fname)
    assert os.path.exists(gdf_df_fname)

    expect = pd.read_csv(pdf_df_fname)
    got = pd.read_csv(gdf_df_fname)
    assert_eq(expect, got)
Ejemplo n.º 11
0
def test_series_datetime_value_counts(data, nulls, normalize, dropna):
    psr = data.copy()

    if len(data) > 0:
        if nulls == "one":
            p = np.random.randint(0, len(data))
            psr[p] = None
        elif nulls == "some":
            p = np.random.randint(0, len(data), 2)
            psr[p] = None

    gsr = cudf.from_pandas(psr)
    expected = psr.value_counts(dropna=dropna, normalize=normalize)
    got = gsr.value_counts(dropna=dropna, normalize=normalize)

    assert_eq(expected.sort_index(), got.sort_index(), check_dtype=False)
    assert_eq(
        expected.reset_index(drop=True),
        got.reset_index(drop=True),
        check_dtype=False,
    )
Ejemplo n.º 12
0
def test_groupby_split_out(split_out, column):
    df = pd.DataFrame({
        "a":
        np.arange(8),
        "b": [1, 0, 0, 2, 1, 1, 2, 0],
        "c": [0, 1] * 4,
        "d": ["dog", "cat", "cat", "dog", "dog", "dog", "cat", "bird"],
    }).fillna(0)
    df["e"] = df["d"].astype("category")

    gdf = cudf.from_pandas(df)

    ddf = dd.from_pandas(df, npartitions=3)
    gddf = dask_cudf.from_cudf(gdf, npartitions=3)

    ddf_result = (ddf.groupby(column).a.mean(
        split_out=split_out).compute().sort_values().dropna())
    gddf_result = (gddf.groupby(column).a.mean(
        split_out=split_out).compute().sort_values())

    dd.assert_eq(gddf_result, ddf_result, check_index=False)
Ejemplo n.º 13
0
def test_index_sort_values(data, ascending, return_indexer):
    pdi = data
    gdi = cudf.from_pandas(pdi)

    expected = pdi.sort_values(
        ascending=ascending, return_indexer=return_indexer
    )
    actual = gdi.sort_values(
        ascending=ascending, return_indexer=return_indexer
    )

    if return_indexer:
        expected_indexer = expected[1]
        actual_indexer = actual[1]

        assert_eq(expected_indexer, actual_indexer)

        expected = expected[0]
        actual = actual[0]

    assert_eq(expected, actual)
Ejemplo n.º 14
0
    def read_partition(cls, fs, piece, columns):

        path = piece["path"]
        if "rows" in piece:

            # See: (https://github.com/rapidsai/cudf/issues/6529)
            # Using `uavro` library for now. This means we must convert
            # data to pandas, and then to cudf (which is much slower
            # than `cudf.read_avro`). TODO: Once `num_rows` is fixed,
            # this can be changed to:
            #
            #   skiprows, num_rows = piece["rows"]
            #   df = cudf.io.read_avro(
            #       path, skiprows=skiprows, num_rows=num_rows
            #   )

            block_offset, part_blocks = piece["blocks"]
            file_size = fs.du(piece["path"])
            with fs.open(piece["path"], "rb") as fo:
                header = ua.core.read_header(fo)
                ua.core.scan_blocks(fo, header, file_size)
                header["blocks"] = header["blocks"][block_offset : block_offset + part_blocks]

                # Adjust the total row count
                nrows = 0
                for block in header["blocks"]:
                    nrows += block["nrows"]
                header["nrows"] = nrows

                # Read in as pandas and convert to cudf (avoid block scan)
                df = cudf.from_pandas(
                    ua.core.filelike_to_dataframe(fo, file_size, header, scan=False)
                )
        else:
            df = cudf.io.read_avro(path)

        # Deal with column selection
        if columns is None:
            columns = list(df.columns)
        return df[columns]
Ejemplo n.º 15
0
def test_networkx_compatibility(graph_file):
    gc.collect()

    # test from_cudf_edgelist()

    M = utils.read_csv_for_nx(graph_file)

    df = pd.DataFrame()
    df['source'] = pd.Series(M['0'])
    df['target'] = pd.Series(M['1'])
    df['weight'] = pd.Series(M.weight)
    gdf = cudf.from_pandas(df)

    Gnx = nx.from_pandas_edgelist(df,
                                  source='source',
                                  target='target',
                                  edge_attr='weight',
                                  create_using=nx.DiGraph)
    G = cugraph.from_cudf_edgelist(gdf,
                                   source='source',
                                   destination='target',
                                   edge_attr='weight',
                                   create_using=cugraph.DiGraph)
    assert compare_graphs(Gnx, G)

    Gnx.clear()
    G.clear()
    Gnx = nx.from_pandas_edgelist(df,
                                  source='source',
                                  target='target',
                                  create_using=nx.DiGraph)
    G = cugraph.from_cudf_edgelist(gdf,
                                   source='source',
                                   destination='target',
                                   create_using=cugraph.DiGraph)

    assert compare_graphs(Gnx, G)

    Gnx.clear()
    G.clear()
Ejemplo n.º 16
0
    def transform(self, input_df: XDataFrame) -> XDataFrame:
        """Transform data frame.

        Args:
            input_df (XDataFrame): Input data frame.
        Returns:
            XDataFrame : Output data frame.
        """
        if isinstance(input_df, pd.DataFrame):
            new_df = input_df.copy()
        elif cudf_is_available() and isinstance(input_df, cudf.DataFrame):
            new_df = input_df.to_pandas()
        else:
            raise RuntimeError("Unexpected data type: {}".format(type(input_df)))
        generated_cols = []

        input_cols = self._input_cols
        if not input_cols:
            input_cols = new_df.columns.tolist()
        if len(self._exclude_cols) > 0:
            input_cols = [col for col in input_cols if col not in self._exclude_cols]

        for col in input_cols:
            new_col = self._output_prefix + col + self._output_suffix
            if self._fillna is not None:
                new_df[new_col] = (
                    new_df[col].fillna(self._fillna).apply(self._lambda_func)
                )
            else:
                new_df[new_col] = new_df[col].apply(self._lambda_func)

            generated_cols.append(new_col)

        if cudf_is_available() and isinstance(input_df, cudf.DataFrame):
            new_df = cudf.from_pandas(new_df)

        if self._drop_origin:
            return new_df[generated_cols]

        return new_df
Ejemplo n.º 17
0
def test_groupby_size():
    pdf = pd.DataFrame({
        "a": [1, 1, 3, 4],
        "b": ["bob", "bob", "alice", "cooper"],
        "c": [1, 2, 3, 4],
    })
    gdf = cudf.from_pandas(pdf)

    assert_eq(pdf.groupby("a").size(),
              gdf.groupby("a").size(),
              check_dtype=False)

    assert_eq(
        pdf.groupby(["a", "b", "c"]).size(),
        gdf.groupby(["a", "b", "c"]).size(),
        check_dtype=False,
    )

    sr = pd.Series(range(len(pdf)))
    assert_eq(pdf.groupby(sr).size(),
              gdf.groupby(sr).size(),
              check_dtype=False)
Ejemplo n.º 18
0
def test_rollling_series_basic(data, index, agg, nulls, center):
    if len(data) > 0:
        if nulls == "one":
            p = np.random.randint(0, len(data))
            data[p] = None
        elif nulls == "some":
            p1, p2 = np.random.randint(0, len(data), (2, ))
            data[p1] = None
            data[p2] = None
        elif nulls == "all":
            data = [None] * len(data)

    psr = pd.Series(data, index=index)
    gsr = cudf.from_pandas(psr)

    for window_size in range(1, len(data) + 1):
        for min_periods in range(1, window_size + 1):
            assert_eq(getattr(psr.rolling(window_size, min_periods, center),
                              agg)().fillna(-1),
                      getattr(gsr.rolling(window_size, min_periods, center),
                              agg)().fillna(-1),
                      check_dtype=False)
Ejemplo n.º 19
0
    def test_inplace_predict_cudf(self):
        import cupy as cp
        import cudf
        import pandas as pd
        rows = 1000
        cols = 10
        rng = np.random.RandomState(1994)
        cp.cuda.runtime.setDevice(0)
        X = rng.randn(rows, cols)
        X = pd.DataFrame(X)
        y = rng.randn(rows)
        X = cudf.from_pandas(X)

        dtrain = xgb.DMatrix(X, y)

        booster = xgb.train({'tree_method': 'gpu_hist'},
                            dtrain,
                            num_boost_round=10)
        test = xgb.DMatrix(X)
        predt_from_array = booster.inplace_predict(X)
        predt_from_dmatrix = booster.predict(test)

        cp.testing.assert_allclose(predt_from_array, predt_from_dmatrix)

        def predict_df(x):
            # column major array
            inplace_predt = booster.inplace_predict(x.values)
            d = xgb.DMatrix(x)
            copied_predt = cp.array(booster.predict(d))
            assert cp.all(copied_predt == inplace_predt)

            inplace_predt = booster.inplace_predict(x)
            return cp.all(copied_predt == inplace_predt)

        for i in range(10):
            run_threaded_predict(X, rows, predict_df)

        base_margin = cudf.Series(rng.randn(rows))
        self.run_inplace_base_margin(booster, dtrain, X, base_margin)
Ejemplo n.º 20
0
    def put(self, pandas_df):
        """
        Given a pandas.DataFrame,
        convert it to a cudf.DataFrame, and add it to the cudf_dataframe_dict.
        Return the new key added to the dictionary (as an OID).

        Parameters
        ----------
            pandas_df : pandas.dataFrame/pandas.Series
                the pandas dataFrame object.
                If it is a pandas.Series object,
                convert it to a pandas.dataFrame.
                No matter what, the pandas.dataFrame
                will be converted to a cudf.dataFrame.
        Returns
        -------
            an oid corresponding to the key generated
            when you added the new cudf.DataFrame object to the cudf_dataframe_dict.
        """
        if isinstance(pandas_df, pandas.Series):
            pandas_df = pandas_df.to_frame()
        return self.store_new_df(cudf.from_pandas(pandas_df))
Ejemplo n.º 21
0
    def transform(self, y: cudf.Series) -> cudf.Series:
        """
        Transform an input into its categorical keys.

        This is intended for use with small inputs relative to the size of the
        dataset. For fitting and transforming an entire dataset, prefer
        `fit_transform`.

        Parameters
        ----------
        y : cudf.Series
            Input keys to be transformed. Its values should match the
            categories given to `fit`

        Returns
        -------
        encoded : cudf.Series
            The ordinally encoded input series

        Raises
        ------
        KeyError
            if a category appears that was not seen in `fit`
        """
        if isinstance(y, pdSeries):
            y = cudf.from_pandas(y)

        self._check_is_fitted()

        y = y.astype('category')

        encoded = y.cat.set_categories(self.classes_)._column.codes

        encoded = cudf.Series(encoded, index=y.index)

        if encoded.has_nulls and self.handle_unknown == 'error':
            raise KeyError("Attempted to encode unseen key")

        return encoded
Ejemplo n.º 22
0
def parquet_writer_test_rowgroup_index_compression(
    pdf, compression, row_group_size
):
    pd_file_name = "cpu_pdf.parquet"
    gd_file_name = "gpu_pdf.parquet"

    gdf = cudf.from_pandas(pdf)

    pdf.to_parquet(
        pd_file_name, compression=compression, row_group_size=row_group_size,
    )
    gdf.to_parquet(
        gd_file_name, compression=compression, row_group_size=row_group_size,
    )

    actual = cudf.read_parquet(gd_file_name)
    expected = pd.read_parquet(pd_file_name)
    assert_eq(actual, expected)

    actual = cudf.read_parquet(pd_file_name)
    expected = pd.read_parquet(gd_file_name)
    assert_eq(actual, expected)
Ejemplo n.º 23
0
def test_dropna_series(data, nulls):

    psr = pd.Series(data)

    if len(data) > 0:
        if nulls == "one":
            p = np.random.randint(0, 4)
            psr[p] = None
        elif nulls == "some":
            p1, p2 = np.random.randint(0, 4, (2, ))
            psr[p1] = None
            psr[p2] = None
        elif nulls == "all":
            psr[:] = None

    gsr = cudf.from_pandas(psr)

    check_dtype = True
    if gsr.null_count == len(gsr):
        check_dtype = False

    assert_eq(psr.dropna(), gsr.dropna(), check_dtype=check_dtype)
Ejemplo n.º 24
0
def test_dropna_thresh_cols(thresh, subset, inplace):
    pdf = pd.DataFrame(
        {"a": [1, 2], "b": [3, 4], "c": [5, None], "d": [np.nan, np.nan]}
    )
    gdf = cudf.from_pandas(pdf)

    if inplace:
        pdf.dropna(axis=1, thresh=thresh, subset=subset, inplace=inplace)
        gdf.dropna(axis=1, thresh=thresh, subset=subset, inplace=inplace)
        expected = pdf
        actual = gdf
    else:
        expected = pdf.dropna(
            axis=1, thresh=thresh, subset=subset, inplace=inplace
        )
        actual = gdf.dropna(
            axis=1, thresh=thresh, subset=subset, inplace=inplace
        )

    assert_eq(
        expected, actual,
    )
Ejemplo n.º 25
0
def test_fillna_method_numerical(data, container, data_dtype, method, inplace):
    if container == pd.DataFrame:
        data = {"a": data, "b": data, "c": data}

    pdata = container(data)

    if np.dtype(data_dtype).kind not in ("f"):
        data_dtype = cudf.utils.dtypes.cudf_dtypes_to_pandas_dtypes[np.dtype(
            data_dtype)]
    pdata = pdata.astype(data_dtype)

    # Explicitly using nans_as_nulls=True
    gdata = cudf.from_pandas(pdata, nan_as_null=True)

    expected = pdata.fillna(method=method, inplace=inplace)
    actual = gdata.fillna(method=method, inplace=inplace)

    if inplace:
        expected = pdata
        actual = gdata

    assert_eq(expected, actual, check_dtype=False)
Ejemplo n.º 26
0
def test_rolling_dataframe_numba_udf_basic(data, center):

    pdf = pd.DataFrame(data)
    gdf = cudf.from_pandas(pdf)

    def some_func(A):
        b = 0
        for a in A:
            b = b + a ** 2
        return b / len(A)

    for window_size in range(1, len(data) + 1):
        for min_periods in range(1, window_size + 1):
            assert_eq(
                pdf.rolling(window_size, min_periods, center)
                .apply(some_func)
                .fillna(-1),
                gdf.rolling(window_size, min_periods, center)
                .apply(some_func)
                .fillna(-1),
                check_dtype=False,
            )
Ejemplo n.º 27
0
def test_rollling_series_numba_udf_basic(data, index, center):

    psr = pd.Series(data, index=index)
    gsr = cudf.from_pandas(psr)

    def some_func(A):
        b = 0
        for a in A:
            b = max(b, math.sqrt(a))
        return b

    for window_size in range(1, len(data) + 1):
        for min_periods in range(1, window_size + 1):
            assert_eq(
                psr.rolling(window_size, min_periods, center)
                .apply(some_func)
                .fillna(-1),
                gsr.rolling(window_size, min_periods, center)
                .apply(some_func)
                .fillna(-1),
                check_dtype=False,
            )
Ejemplo n.º 28
0
def test_as_array():
    pd_df = dsutils.load_bank()
    pd_series = pd_df['id']

    assert isinstance(df_utils.as_array(pd_series), np.ndarray)
    assert isinstance(df_utils.as_array(pd_series.values), np.ndarray)
    assert isinstance(df_utils.as_array(pd_series.values.tolist()), np.ndarray)

    installed_cudf = False
    try:
        import cudf
        import cupy
        installed_cudf = True
    except Exception as e:
        pass

    if installed_cudf:
        import cudf

        cudf_series = cudf.from_pandas(pd_df)['id']
        assert isinstance(df_utils.as_array(cudf_series), np.ndarray)
        assert isinstance(df_utils.as_array(cudf_series.values), np.ndarray)
Ejemplo n.º 29
0
def test_renumber_ips_cols():

    source_list = [
        '192.168.1.1', '172.217.5.238', '216.228.121.209', '192.16.31.23'
    ]
    dest_list = [
        '172.217.5.238', '216.228.121.209', '192.16.31.23', '192.168.1.1'
    ]

    pdf = pd.DataFrame({'source_list': source_list, 'dest_list': dest_list})

    gdf = cudf.from_pandas(pdf)

    gdf['source_as_int'] = gdf['source_list'].str.ip2int()
    gdf['dest_as_int'] = gdf['dest_list'].str.ip2int()

    src, dst, number_df = cugraph.renumber_from_cudf(gdf, ['source_as_int'],
                                                     ['dest_as_int'])

    for i in range(len(gdf)):
        assert number_df['0'][src[i]] == gdf['source_as_int'][i]
        assert number_df['0'][dst[i]] == gdf['dest_as_int'][i]
Ejemplo n.º 30
0
def test_multiindex_loc(pdf, gdf, pdfIndex):
    gdfIndex = cudf.from_pandas(pdfIndex)
    assert_eq(pdfIndex, gdfIndex)
    pdf.index = pdfIndex
    gdf.index = gdfIndex
    # return 2 rows, 0 remaining keys = dataframe with entire index
    assert_eq(pdf.loc[('a', 'store', 'clouds', 'fire')],
              gdf.loc[('a', 'store', 'clouds', 'fire')])
    # return 2 rows, 1 remaining key = dataframe with n-k index columns
    assert_eq(pdf.loc[('a', 'store', 'storm')],
              gdf.loc[('a', 'store', 'storm')])
    # return 2 rows, 2 remaining keys = dataframe with n-k index columns
    assert_eq(pdf.loc[('a', 'store')], gdf.loc[('a', 'store')])
    assert_eq(pdf.loc[('b', 'house')], gdf.loc[('b', 'house')])
    # return 2 rows, n-1 remaining keys = dataframe with n-k index columns
    assert_eq(pdf.loc[('a', )], gdf.loc[('a', )])
    # return 1 row, 0 remaining keys = dataframe with entire index
    assert_eq(pdf.loc[('a', 'store', 'storm', 'smoke')],
              gdf.loc[('a', 'store', 'storm', 'smoke')])
    # return 1 row and 1 remaining key = series
    assert_eq(pdf.loc[('c', 'forest', 'clear')],
              gdf.loc[('c', 'forest', 'clear')])