Esempio n. 1
0
def query_execute(df, expr, callenv):
    """Compile & execute the query expression

    Note: the expression is compiled and cached for future reuse.

    Parameters
    ----------
    df : DataFrame
    expr : str
        boolean expression
    callenv : dict
        Contains keys 'local_dict', 'locals' and 'globals' which are all dict.
        They represent the arg, local and global dictionaries of the caller.
    """

    # compile
    compiled = query_compile(expr)
    columns = compiled["colnames"]

    # prepare col args
    colarrays = [cudf.core.dataframe.extract_col(df, col) for col in columns]

    # wait to check the types until we know which cols are used
    if any(col.dtype not in SUPPORTED_QUERY_TYPES for col in colarrays):
        raise TypeError(
            "query only supports numeric, datetime, timedelta, "
            "or bool dtypes."
        )

    colarrays = [col.data_array_view for col in colarrays]

    kernel = compiled["kernel"]
    # process env args
    envargs = []
    envdict = callenv["globals"].copy()
    envdict.update(callenv["locals"])
    envdict.update(callenv["local_dict"])
    for name in compiled["refnames"]:
        name = name[len(ENVREF_PREFIX) :]
        try:
            val = envdict[name]
            if isinstance(val, dt.datetime):
                val = np.datetime64(val)
        except KeyError:
            msg = "{!r} not defined in the calling environment"
            raise NameError(msg.format(name))
        else:
            envargs.append(val)

    # allocate output buffer
    nrows = len(df)
    out = column_empty(nrows, dtype=np.bool_)
    # run kernel
    args = [out] + colarrays + envargs
    kernel.forall(nrows)(*args)
    out_mask = applyutils.make_aggregate_nullmask(df, columns=columns)
    return out.set_mask(out_mask).fillna(False)
Esempio n. 2
0
def query_execute(df, expr, callenv):
    """Compile & execute the query expression

    Note: the expression is compiled and cached for future reuse.

    Parameters
    ----------
    df : DataFrame
    expr : str
        boolean expression
    callenv : dict
        Contains keys 'local_dict', 'locals' and 'globals' which are all dict.
        They represent the arg, local and global dictionaries of the caller.
    """
    # compile
    compiled = query_compile(expr)
    kernel = compiled["kernel"]
    # process env args
    envargs = []
    envdict = callenv["globals"].copy()
    envdict.update(callenv["locals"])
    envdict.update(callenv["local_dict"])
    for name in compiled["refnames"]:
        name = name[len(ENVREF_PREFIX) :]
        try:
            val = envdict[name]
            if isinstance(val, dt.datetime):
                val = np.datetime64(val)
        except KeyError:
            msg = "{!r} not defined in the calling environment"
            raise NameError(msg.format(name))
        else:
            envargs.append(val)
    columns = compiled["colnames"]
    # prepare col args
    colarrays = [df[col]._column.data_array_view for col in columns]
    # allocate output buffer
    nrows = len(df)
    out = rmm.device_array(nrows, dtype=np.bool_)
    # run kernel
    args = [out] + colarrays + envargs
    kernel.forall(nrows)(*args)
    out_mask = applyutils.make_aggregate_nullmask(df, columns=columns)
    if out_mask is not None:
        out = cudautils.fill_mask(out, out_mask.data_array_view, False)
    return out