def gen_table_expr(table, expr): resolver = { name: FakeSeries(dtype.to_pandas_dtype()) for name, dtype in zip(table.schema.names, table.schema.types) } scope = Scope(level=0, resolvers=(resolver, )) return Expr(expr=expr, env=scope)
def __init__(self, expr, engine='numexpr', parser='pandas', env=None, truediv=True, level=0): self.expr = expr self.env = env or Scope(level=level + 1) self.engine = engine self.parser = parser self.env.scope['truediv'] = truediv self._visitor = _parsers[parser](self.env, self.engine, self.parser) self.terms = self.parse()
def __init__( self, expr, engine: str = "numexpr", parser: str = "pandas", env: Optional[Scope] = None, level: int = 0, ): self.expr = expr self.env = env or Scope(level=level + 1) self.engine = engine self.parser = parser self._visitor = _parsers[parser](self.env, self.engine, self.parser) self.terms = self.parse()
def __init__(self, expr, engine="numexpr", parser="qq_pandas", env=None, truediv=True, level=0): self.expr = expr self.env = env or Scope(level=level + 1) self.engine = engine self.parser = parser self.env.scope["truediv"] = truediv self._visitor = _parsers[parser](self.env, self.engine, self.parser) self.terms = self.parse()
def __init__( self, expr, engine: str = "numexpr", parser: str = "pandas", env: Scope | None = None, level: int = 0, ) -> None: self.expr = expr self.env = env or Scope(level=level + 1) self.engine = engine self.parser = parser self._visitor = PARSERS[parser](self.env, self.engine, self.parser) self.terms = self.parse()
def gen_table_expr(table, expr): """ Build pandas expression for the specified query. Parameters ---------- table : pyarrow.Table Table to evaluate expression on. expr : str Query string to evaluate on the `table` columns. Returns ------- pandas.core.computation.expr.Expr """ resolver = { name: FakeSeries(dtype.to_pandas_dtype()) for name, dtype in zip(table.schema.names, table.schema.types) } scope = Scope(level=0, resolvers=(resolver, )) return Expr(expr=expr, env=scope)
def hacky_query_eval(df, varstr, selstr="", verbose=False): """ Please don't read/use. This is dangerous and stupid, kind of like integrating a function by printing out a plot, coloring the area under it in red, faxing it to yourself, then counting red pixels to get the area. Basically I wanted some way to convert df.query("dimuon_mass > 5 and pass_baseline_iso").eval("dimuon_mass").mean() into df["dimuon_mass"][ (df["dimuon_mass"] > 5) & (df["pass_baseline_iso"]) ].mean() because the latter doesn't make an intermediate copy of all the columns with query(), and it also doesn't do jitting with numexpr. In principle, this is much faster to execute. Usage: arr = hacky_query_eval( df_data, varstr = "dimuon_mass", selstr = "pass_baseline_iso and 0<logabsetaphi<1.25", ) print(arr.mean()) """ from pandas.core.computation.expr import Expr from pandas.core.computation.scope import Scope env = Scope( 1, global_dict=globals(), local_dict=locals(), resolvers=[df], target=None, ) def inject_df(s): """ convert expression string like (a > 1) to (df["a"] > 1) so that it can be eval'd later """ expr = Expr(s, env=env, parser="pandas") self = expr._visitor def visit_Name_hack(node, **kwargs): result = self.term_type(node.id, self.env, **kwargs) result._name = f'df["{result._name}"]' return result def _maybe_downcast_constants_hack(left, right): return left, right expr._visitor.visit_Name = visit_Name_hack expr._visitor._maybe_downcast_constants = _maybe_downcast_constants_hack expr.terms = expr.parse() return str(expr) varexpr = inject_df(varstr) toeval = f"({varexpr})" if selstr: selexpr = str(inject_df(selstr)) toeval += f"[{selexpr}]" if verbose: print(f"Evaluating string: {toeval}") result = eval(toeval) return result