Ejemplo n.º 1
0
def _build_update_filter(df, update_q):
    if type(update_q) is not list:
        raise_malformed("Expressions must be lists", update_q)

    if not update_q:
        raise_malformed("Empty expression not allowed", update_q)

    operator = update_q[0]
    if operator == "isnull":
        assert_len(update_q, 2, 'Invalid length of isnull query')
        try:
            return getattr(_prepare_arg(df, update_q[1]), 'isnull')()
        except AttributeError:
            raise_malformed("Unknown column for 'isnull'", update_q)

    if operator == "in":
        if len(update_q) != 3:
            raise_malformed("Invalid length of 'in' query", update_q)

        _, column, values = update_q
        if column not in df:
            raise_malformed("First argument to 'in' must be a column present in frame", update_q)

        if not isinstance(values, (list, tuple)):
            raise_malformed("Second argument to 'in' must be a list", update_q)

        return getattr(df, column).isin([_prepare_arg(df, val) for val in values])

    if operator in COMPARISON_OPERATORS:
        arg1 = _prepare_arg(df, update_q[1])
        arg2 = _prepare_arg(df, update_q[2])
        return COMPARISON_OPERATORS[operator](arg1, arg2)

    raise_malformed("Unknown operator '{operator}'".format(operator=operator), update_q)
Ejemplo n.º 2
0
def _like_filter(df, q):
    assert_len(q, 3)
    op, column, raw_expr = q

    if not is_quoted(raw_expr):
        raise_malformed("like expects a quoted string as second argument", q)

    regexp = unquote(raw_expr)

    if not regexp.startswith('%'):
        regexp = '^' + regexp
    else:
        regexp = regexp[1:]

    if not regexp.endswith('%'):
        regexp += '$'
    else:
        regexp = regexp[:-1]

    # 'like' is case sensitive, 'ilike' is case insensitive
    case = op == 'like'

    try:
        return df[column].str.contains(regexp, case=case)
    except AttributeError:
        raise_malformed("Invalid column type for (i)like", q)
Ejemplo n.º 3
0
def _bitwise_filter(df, q):
    assert_len(q, 3)
    op, column, arg = q
    if not isinstance(arg, (int, long)):
        raise_malformed('Invalid argument type, must be an integer:'.format(t=type(arg)), q)

    try:
        series = df[column] & arg
        if op == "any_bits":
            return series > 0
        return series == arg
    except TypeError:
        raise_malformed("Invalid column type, must be an integer", q)
Ejemplo n.º 4
0
    def _build_filter(self, q):
        result = None
        if type(q) is not list:
            return unicode(q)

        if not q:
            raise_malformed("Empty expression not allowed", q)

        op = q[0]
        if op == "!":
            assert_len(q, 2, "! is a single arity operator, invalid number of arguments")
            result = "not " + self._build_filter(q[1])
        elif op == "isnull":
            assert_len(q, 2, "isnull is a single arity operator, invalid number of arguments")

            # Slightly hacky but the only way I've come up with so far.
            result = "({arg} != {arg})".format(arg=q[1])
        elif op in COMPARISON_OPERATORS:
            assert_len(q, 3)
            _, arg1, arg2 = q
            result = self._build_filter(arg1) + " " + op + " " + self._build_filter(arg2)
        elif op in JOINING_OPERATORS:
            if len(q) < 2:
                raise_malformed("Invalid number of arguments", q)
            elif len(q) == 2:
                # Conjunctions and disjunctions with only one clause are OK
                result = self._build_filter(q[1])
            else:
                result = ' {op} '.format(op=op).join(self._build_filter(x) for x in q[1:])
        elif op == 'in':
            col_name, args = prepare_in_clause(q, FILTER_ENGINE_NUMEXPR)
            var_name = self._insert_in_env(args)
            result = '{col_name} in @env.{var_name}'.format(col_name=col_name, var_name=var_name)
        else:
            raise_malformed("Unknown operator", q)

        return "({result})".format(result=result)
Ejemplo n.º 5
0
def _comparison_filter(df, q):
    assert_len(q, 3)
    op, col_name, arg = q
    return COMPARISON_OPERATORS[op](df[col_name], _do_pandas_filter(df, arg))
Ejemplo n.º 6
0
def _isnull_filter(df, q):
    assert_len(q, 2, "isnull is a single arity operator, invalid number of arguments")

    # Slightly hacky but the only way I've come up with so far.
    return df[q[1]] != df[q[1]]
Ejemplo n.º 7
0
def _not_filter(df, q):
    assert_len(q, 2, "! is a single arity operator, invalid number of arguments")
    return ~_do_pandas_filter(df, q[1])