def _build_update_filter(df, update_q): if type(update_q) is not list: raise_malformed("Expressions must be lists", update_q) if not update_q: raise_malformed("Empty expression not allowed", update_q) operator = update_q[0] if operator == "isnull": assert_len(update_q, 2, 'Invalid length of isnull query') try: return getattr(_prepare_arg(df, update_q[1]), 'isnull')() except AttributeError: raise_malformed("Unknown column for 'isnull'", update_q) if operator == "in": if len(update_q) != 3: raise_malformed("Invalid length of 'in' query", update_q) _, column, values = update_q if column not in df: raise_malformed("First argument to 'in' must be a column present in frame", update_q) if not isinstance(values, (list, tuple)): raise_malformed("Second argument to 'in' must be a list", update_q) return getattr(df, column).isin([_prepare_arg(df, val) for val in values]) if operator in COMPARISON_OPERATORS: arg1 = _prepare_arg(df, update_q[1]) arg2 = _prepare_arg(df, update_q[2]) return COMPARISON_OPERATORS[operator](arg1, arg2) raise_malformed("Unknown operator '{operator}'".format(operator=operator), update_q)
def _like_filter(df, q): assert_len(q, 3) op, column, raw_expr = q if not is_quoted(raw_expr): raise_malformed("like expects a quoted string as second argument", q) regexp = unquote(raw_expr) if not regexp.startswith('%'): regexp = '^' + regexp else: regexp = regexp[1:] if not regexp.endswith('%'): regexp += '$' else: regexp = regexp[:-1] # 'like' is case sensitive, 'ilike' is case insensitive case = op == 'like' try: return df[column].str.contains(regexp, case=case) except AttributeError: raise_malformed("Invalid column type for (i)like", q)
def _bitwise_filter(df, q): assert_len(q, 3) op, column, arg = q if not isinstance(arg, (int, long)): raise_malformed('Invalid argument type, must be an integer:'.format(t=type(arg)), q) try: series = df[column] & arg if op == "any_bits": return series > 0 return series == arg except TypeError: raise_malformed("Invalid column type, must be an integer", q)
def _build_filter(self, q): result = None if type(q) is not list: return unicode(q) if not q: raise_malformed("Empty expression not allowed", q) op = q[0] if op == "!": assert_len(q, 2, "! is a single arity operator, invalid number of arguments") result = "not " + self._build_filter(q[1]) elif op == "isnull": assert_len(q, 2, "isnull is a single arity operator, invalid number of arguments") # Slightly hacky but the only way I've come up with so far. result = "({arg} != {arg})".format(arg=q[1]) elif op in COMPARISON_OPERATORS: assert_len(q, 3) _, arg1, arg2 = q result = self._build_filter(arg1) + " " + op + " " + self._build_filter(arg2) elif op in JOINING_OPERATORS: if len(q) < 2: raise_malformed("Invalid number of arguments", q) elif len(q) == 2: # Conjunctions and disjunctions with only one clause are OK result = self._build_filter(q[1]) else: result = ' {op} '.format(op=op).join(self._build_filter(x) for x in q[1:]) elif op == 'in': col_name, args = prepare_in_clause(q, FILTER_ENGINE_NUMEXPR) var_name = self._insert_in_env(args) result = '{col_name} in @env.{var_name}'.format(col_name=col_name, var_name=var_name) else: raise_malformed("Unknown operator", q) return "({result})".format(result=result)
def _comparison_filter(df, q): assert_len(q, 3) op, col_name, arg = q return COMPARISON_OPERATORS[op](df[col_name], _do_pandas_filter(df, arg))
def _isnull_filter(df, q): assert_len(q, 2, "isnull is a single arity operator, invalid number of arguments") # Slightly hacky but the only way I've come up with so far. return df[q[1]] != df[q[1]]
def _not_filter(df, q): assert_len(q, 2, "! is a single arity operator, invalid number of arguments") return ~_do_pandas_filter(df, q[1])