コード例 #1
0
 def gen_table_expr(table, expr):
     resolver = {
         name: FakeSeries(dtype.to_pandas_dtype())
         for name, dtype in zip(table.schema.names, table.schema.types)
     }
     scope = Scope(level=0, resolvers=(resolver, ))
     return Expr(expr=expr, env=scope)
コード例 #2
0
 def __init__(self, expr, engine='numexpr', parser='pandas', env=None,
              truediv=True, level=0):
     self.expr = expr
     self.env = env or Scope(level=level + 1)
     self.engine = engine
     self.parser = parser
     self.env.scope['truediv'] = truediv
     self._visitor = _parsers[parser](self.env, self.engine, self.parser)
     self.terms = self.parse()
コード例 #3
0
 def __init__(
     self,
     expr,
     engine: str = "numexpr",
     parser: str = "pandas",
     env: Optional[Scope] = None,
     level: int = 0,
 ):
     self.expr = expr
     self.env = env or Scope(level=level + 1)
     self.engine = engine
     self.parser = parser
     self._visitor = _parsers[parser](self.env, self.engine, self.parser)
     self.terms = self.parse()
コード例 #4
0
 def __init__(self,
              expr,
              engine="numexpr",
              parser="qq_pandas",
              env=None,
              truediv=True,
              level=0):
     self.expr = expr
     self.env = env or Scope(level=level + 1)
     self.engine = engine
     self.parser = parser
     self.env.scope["truediv"] = truediv
     self._visitor = _parsers[parser](self.env, self.engine, self.parser)
     self.terms = self.parse()
コード例 #5
0
ファイル: expr.py プロジェクト: YarShev/pandas
 def __init__(
     self,
     expr,
     engine: str = "numexpr",
     parser: str = "pandas",
     env: Scope | None = None,
     level: int = 0,
 ) -> None:
     self.expr = expr
     self.env = env or Scope(level=level + 1)
     self.engine = engine
     self.parser = parser
     self._visitor = PARSERS[parser](self.env, self.engine, self.parser)
     self.terms = self.parse()
コード例 #6
0
ファイル: query_compiler.py プロジェクト: wroldwiedbwe/modin
        def gen_table_expr(table, expr):
            """
            Build pandas expression for the specified query.

            Parameters
            ----------
            table : pyarrow.Table
                Table to evaluate expression on.
            expr : str
                Query string to evaluate on the `table` columns.

            Returns
            -------
            pandas.core.computation.expr.Expr
            """
            resolver = {
                name: FakeSeries(dtype.to_pandas_dtype())
                for name, dtype in zip(table.schema.names, table.schema.types)
            }
            scope = Scope(level=0, resolvers=(resolver, ))
            return Expr(expr=expr, env=scope)
コード例 #7
0
def hacky_query_eval(df, varstr, selstr="", verbose=False):
    """
    Please don't read/use. This is dangerous and stupid, kind of like 
    integrating a function by printing out a plot, coloring the area under it in red,
    faxing it to yourself, then counting red pixels to get the area.

    Basically I wanted some way to convert

        df.query("dimuon_mass > 5 and pass_baseline_iso").eval("dimuon_mass").mean()

    into

        df["dimuon_mass"][ (df["dimuon_mass"] > 5) & (df["pass_baseline_iso"]) ].mean()

    because the latter doesn't make an intermediate copy of all the columns with query(),
    and it also doesn't do jitting with numexpr. In principle, this is much faster to execute.

    Usage:

        arr = hacky_query_eval(
            df_data,
            varstr = "dimuon_mass",
            selstr = "pass_baseline_iso and 0<logabsetaphi<1.25",
        )
        print(arr.mean())
    """
    from pandas.core.computation.expr import Expr
    from pandas.core.computation.scope import Scope
    env = Scope(
        1,
        global_dict=globals(),
        local_dict=locals(),
        resolvers=[df],
        target=None,
    )

    def inject_df(s):
        """
        convert expression string like (a > 1) to (df["a"] > 1)
        so that it can be eval'd later
        """
        expr = Expr(s, env=env, parser="pandas")
        self = expr._visitor

        def visit_Name_hack(node, **kwargs):
            result = self.term_type(node.id, self.env, **kwargs)
            result._name = f'df["{result._name}"]'
            return result

        def _maybe_downcast_constants_hack(left, right):
            return left, right

        expr._visitor.visit_Name = visit_Name_hack
        expr._visitor._maybe_downcast_constants = _maybe_downcast_constants_hack
        expr.terms = expr.parse()
        return str(expr)

    varexpr = inject_df(varstr)
    toeval = f"({varexpr})"
    if selstr:
        selexpr = str(inject_df(selstr))
        toeval += f"[{selexpr}]"
    if verbose: print(f"Evaluating string: {toeval}")
    result = eval(toeval)
    return result