Example #1
0
    def test_nested_scope(self):
        from pandas.computation.ops import UndefinedVariableError

        engine = self.engine
        parser = self.parser
        # smoke test
        x = 1  # noqa
        result = pd.eval("x + 1", engine=engine, parser=parser)
        self.assertEqual(result, 2)

        df = DataFrame(np.random.randn(5, 3))
        df2 = DataFrame(np.random.randn(5, 3))

        # don't have the pandas parser
        with tm.assertRaises(SyntaxError):
            df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser)

        with tm.assertRaises(UndefinedVariableError):
            df.query("(df>0) & (df2>0)", engine=engine, parser=parser)

        expected = df[(df > 0) & (df2 > 0)]
        result = pd.eval("df[(df > 0) & (df2 > 0)]", engine=engine, parser=parser)
        assert_frame_equal(expected, result)

        expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]
        result = pd.eval("df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]", engine=engine, parser=parser)
        assert_frame_equal(expected, result)
Example #2
0
    def check_query_with_partially_named_multiindex(self, parser, engine):
        tm.skip_if_no_ne(engine)
        a = np.random.choice(["red", "green"], size=10)
        b = np.arange(10)
        index = MultiIndex.from_arrays([a, b])
        index.names = [None, "rating"]
        df = DataFrame(randn(10, 2), index=index)
        res = df.query("rating == 1", parser=parser, engine=engine)
        ind = Series(df.index.get_level_values("rating").values, index=index, name="rating")
        exp = df[ind == 1]
        assert_frame_equal(res, exp)

        res = df.query("rating != 1", parser=parser, engine=engine)
        ind = Series(df.index.get_level_values("rating").values, index=index, name="rating")
        exp = df[ind != 1]
        assert_frame_equal(res, exp)

        res = df.query('ilevel_0 == "red"', parser=parser, engine=engine)
        ind = Series(df.index.get_level_values(0).values, index=index)
        exp = df[ind == "red"]
        assert_frame_equal(res, exp)

        res = df.query('ilevel_0 != "red"', parser=parser, engine=engine)
        ind = Series(df.index.get_level_values(0).values, index=index)
        exp = df[ind != "red"]
        assert_frame_equal(res, exp)
Example #3
0
    def test_nested_raises_on_local_self_reference(self):
        from pandas.computation.ops import UndefinedVariableError

        df = DataFrame(np.random.randn(5, 3))

        # can't reference ourself b/c we're a local so @ is necessary
        with tm.assertRaises(UndefinedVariableError):
            df.query("df > 0", engine=self.engine, parser=self.parser)
Example #4
0
    def test_query_undefined_local(self):
        from pandas.computation.ops import UndefinedVariableError

        engine, parser = self.engine, self.parser
        skip_if_no_pandas_parser(parser)
        df = DataFrame(np.random.rand(10, 2), columns=list("ab"))
        with tm.assertRaisesRegexp(UndefinedVariableError, "local variable 'c' is not defined"):
            df.query("a == @c", engine=engine, parser=parser)
Example #5
0
    def test_query_doesnt_pickup_local(self):
        from pandas.computation.ops import UndefinedVariableError

        engine, parser = self.engine, self.parser
        n = m = 10
        df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))

        # we don't pick up the local 'sin'
        with tm.assertRaises(UndefinedVariableError):
            df.query("sin > 5", engine=engine, parser=parser)
Example #6
0
 def test_date_index_query_with_NaT_duplicates(self):
     engine, parser = self.engine, self.parser
     n = 10
     df = DataFrame(randn(n, 3))
     df["dates1"] = date_range("1/1/2012", periods=n)
     df["dates3"] = date_range("1/1/2014", periods=n)
     df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
     df.set_index("dates1", inplace=True, drop=True)
     with tm.assertRaises(NotImplementedError):
         df.query("index < 20130101 < dates3", engine=engine, parser=parser)
Example #7
0
    def test_query_builtin(self):
        from pandas.computation.engines import NumExprClobberingError

        engine, parser = self.engine, self.parser

        n = m = 10
        df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))

        df.index.name = "sin"
        with tm.assertRaisesRegexp(NumExprClobberingError, "Variables in expression.+"):
            df.query("sin > 5", engine=engine, parser=parser)
Example #8
0
    def test_date_query_with_non_date(self):
        engine, parser = self.engine, self.parser

        n = 10
        df = DataFrame({"dates": date_range("1/1/2012", periods=n), "nondate": np.arange(n)})

        ops = "==", "!=", "<", ">", "<=", ">="

        for op in ops:
            with tm.assertRaises(TypeError):
                df.query("dates %s nondate" % op, parser=parser, engine=engine)
Example #9
0
    def test_query_index_with_name(self):
        engine, parser = self.engine, self.parser
        df = DataFrame(
            np.random.randint(10, size=(10, 3)), index=Index(range(10), name="blob"), columns=["a", "b", "c"]
        )
        res = df.query("(blob < 5) & (a < b)", engine=engine, parser=parser)
        expec = df[(df.index < 5) & (df.a < df.b)]
        assert_frame_equal(res, expec)

        res = df.query("blob < b", engine=engine, parser=parser)
        expec = df[df.index < df.b]

        assert_frame_equal(res, expec)
Example #10
0
    def test_query_index_without_name(self):
        engine, parser = self.engine, self.parser
        df = DataFrame(np.random.randint(10, size=(10, 3)), index=range(10), columns=["a", "b", "c"])

        # "index" should refer to the index
        res = df.query("index < b", engine=engine, parser=parser)
        expec = df[df.index < df.b]
        assert_frame_equal(res, expec)

        # test against a scalar
        res = df.query("index < 5", engine=engine, parser=parser)
        expec = df[df.index < 5]
        assert_frame_equal(res, expec)
Example #11
0
    def test_local_syntax(self):
        skip_if_no_pandas_parser(self.parser)

        engine, parser = self.engine, self.parser
        df = DataFrame(randn(100, 10), columns=list("abcdefghij"))
        b = 1
        expect = df[df.a < b]
        result = df.query("a < @b", engine=engine, parser=parser)
        assert_frame_equal(result, expect)

        expect = df[df.a < df.b]
        result = df.query("a < b", engine=engine, parser=parser)
        assert_frame_equal(result, expect)
Example #12
0
    def test_local_variable_with_in(self):
        engine, parser = self.engine, self.parser
        skip_if_no_pandas_parser(parser)
        a = Series(np.random.randint(3, size=15), name="a")
        b = Series(np.random.randint(10, size=15), name="b")
        df = DataFrame({"a": a, "b": b})

        expected = df.loc[(df.b - 1).isin(a)]
        result = df.query("b - 1 in a", engine=engine, parser=parser)
        assert_frame_equal(expected, result)

        b = Series(np.random.randint(10, size=15), name="b")
        expected = df.loc[(b - 1).isin(a)]
        result = df.query("@b - 1 in a", engine=engine, parser=parser)
        assert_frame_equal(expected, result)
Example #13
0
 def check_query_with_nested_special_character(self, parser, engine):
     skip_if_no_pandas_parser(parser)
     tm.skip_if_no_ne(engine)
     df = DataFrame({"a": ["a", "b", "test & test"], "b": [1, 2, 3]})
     res = df.query('a == "test & test"', parser=parser, engine=engine)
     expec = df[df.a == "test & test"]
     assert_frame_equal(res, expec)
Example #14
0
    def post(self, slug):
        mc = memcache.Client(["127.0.0.1:11211"], debug=0)

        columns = json.loads(MyBucket.get("{}-columns".format(slug)).data)
        fields = columns
        if self.get_argument("fields", None):
            fields = self.get_argument("fields").split(",")

        filters = [i[0] for i in self.request.arguments.iteritems() if len(i[0].split("filter__")) > 1]

        fields_json = json.dumps(fields)
        filters_json = json.dumps({f: self.get_argument(f) for f in filters})
        if (
            mc.get(str(slug))
            and mc.get("{}-columns".format(slug)) == fields_json
            and mc.get("{}-fulters".format(slug)) == filters_json
        ):
            self.write(mc.get(str(slug)))
            self.finish()

        mc.set("{}-columns".format(slug), fields_json)
        mc.set("{}-filters".format(slug), filters_json)

        df = DataFrame(MyBucket.get(slug).data, columns=fields)
        if len(filters) >= 1:
            for f in filters:
                df = df.query(df_generate(df, self.get_argument, f))
        convert = df.to_dict(outtype="records")

        write = json.dumps({"columns": fields, "json": convert})
        mc.set(str(slug), write)
        self.write(write)
        self.finish()
Example #15
0
 def check_query_single_element_booleans(self, parser, engine):
     tm.skip_if_no_ne(engine)
     columns = "bid", "bidsize", "ask", "asksize"
     data = np.random.randint(2, size=(1, len(columns))).astype(bool)
     df = DataFrame(data, columns=columns)
     res = df.query("bid & ask", engine=engine, parser=parser)
     expected = df[df.bid & df.ask]
     assert_frame_equal(res, expected)
Example #16
0
 def test_at_inside_string(self):
     engine, parser = self.engine, self.parser
     skip_if_no_pandas_parser(parser)
     c = 1  # noqa
     df = DataFrame({"a": ["a", "a", "b", "b", "@c", "@c"]})
     result = df.query('a == "@c"', engine=engine, parser=parser)
     expected = df[df.a == "@c"]
     assert_frame_equal(result, expected)
Example #17
0
    def check_object_array_eq_ne(self, parser, engine):
        tm.skip_if_no_ne(engine)
        df = DataFrame(
            {
                "a": list("aaaabbbbcccc"),
                "b": list("aabbccddeeff"),
                "c": np.random.randint(5, size=12),
                "d": np.random.randint(9, size=12),
            }
        )
        res = df.query("a == b", parser=parser, engine=engine)
        exp = df[df.a == df.b]
        assert_frame_equal(res, exp)

        res = df.query("a != b", parser=parser, engine=engine)
        exp = df[df.a != df.b]
        assert_frame_equal(res, exp)
Example #18
0
 def f(self, query):
     """
     Filter data based on query
     query
         A valid pandas dataframe query
     """
     df = DataFrame(self._cash)
     return df.query(query)
Example #19
0
 def f(self, f):
     """
     Filter trades based on conditions
     f
         Any valid pandas dataframe query
     """
     df = DataFrame(self._trades)
     return df.query(f)
Example #20
0
 def test_chained_cmp_and_in(self):
     skip_if_no_pandas_parser(self.parser)
     engine, parser = self.engine, self.parser
     cols = list("abc")
     df = DataFrame(randn(100, len(cols)), columns=cols)
     res = df.query("a < b < c and a not in b not in c", engine=engine, parser=parser)
     ind = (df.a < df.b) & (df.b < df.c) & ~df.b.isin(df.a) & ~df.c.isin(df.b)  # noqa
     expec = df[ind]
     assert_frame_equal(res, expec)
Example #21
0
 def test_date_query_no_attribute_access(self):
     engine, parser = self.engine, self.parser
     df = DataFrame(randn(5, 3))
     df["dates1"] = date_range("1/1/2012", periods=5)
     df["dates2"] = date_range("1/1/2013", periods=5)
     df["dates3"] = date_range("1/1/2014", periods=5)
     res = df.query("(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser)
     expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
     assert_frame_equal(res, expec)
Example #22
0
    def test_query_builtin(self):
        engine, parser = self.engine, self.parser

        n = m = 10
        df = DataFrame(np.random.randint(m, size=(n, 3)), columns=list("abc"))

        df.index.name = "sin"
        expected = df[df.index > 5]
        result = df.query("sin > 5", engine=engine, parser=parser)
        assert_frame_equal(expected, result)
Example #23
0
 def test_date_index_query(self):
     engine, parser = self.engine, self.parser
     n = 10
     df = DataFrame(randn(n, 3))
     df["dates1"] = date_range("1/1/2012", periods=n)
     df["dates3"] = date_range("1/1/2014", periods=n)
     df.set_index("dates1", inplace=True, drop=True)
     res = df.query("(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser)
     expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
     assert_frame_equal(res, expec)
Example #24
0
 def test_inf(self):
     n = 10
     df = DataFrame({"a": np.random.rand(n), "b": np.random.rand(n)})
     df.loc[::2, 0] = np.inf
     ops = "==", "!="
     d = dict(zip(ops, (operator.eq, operator.ne)))
     for op, f in d.items():
         q = "a %s inf" % op
         expected = df[f(df.a, np.inf)]
         result = df.query(q, engine=self.engine, parser=self.parser)
         assert_frame_equal(result, expected)
Example #25
0
 def test_date_query_with_NaT(self):
     engine, parser = self.engine, self.parser
     n = 10
     df = DataFrame(randn(n, 3))
     df["dates1"] = date_range("1/1/2012", periods=n)
     df["dates2"] = date_range("1/1/2013", periods=n)
     df["dates3"] = date_range("1/1/2014", periods=n)
     df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
     df.loc[np.random.rand(n) > 0.5, "dates3"] = pd.NaT
     res = df.query("(dates1 < 20130101) & (20130101 < dates3)", engine=engine, parser=parser)
     expec = df[(df.dates1 < "20130101") & ("20130101" < df.dates3)]
     assert_frame_equal(res, expec)
Example #26
0
 def test_date_index_query_with_NaT_duplicates(self):
     engine, parser = self.engine, self.parser
     n = 10
     d = {}
     d["dates1"] = date_range("1/1/2012", periods=n)
     d["dates3"] = date_range("1/1/2014", periods=n)
     df = DataFrame(d)
     df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
     df.set_index("dates1", inplace=True, drop=True)
     res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
     expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)]
     assert_frame_equal(res, expec)
Example #27
0
    def check_str_list_query_method(self, parser, engine):
        tm.skip_if_no_ne(engine)
        df = DataFrame(randn(10, 1), columns=["b"])
        df["strings"] = Series(list("aabbccddee"))
        expect = df[df.strings.isin(["a", "b"])]

        if parser != "pandas":
            col = "strings"
            lst = '["a", "b"]'

            lhs = [col] * 2 + [lst] * 2
            rhs = lhs[::-1]

            eq, ne = "==", "!="
            ops = 2 * ([eq] + [ne])

            for lhs, op, rhs in zip(lhs, ops, rhs):
                ex = "{lhs} {op} {rhs}".format(lhs=lhs, op=op, rhs=rhs)
                with tm.assertRaises(NotImplementedError):
                    df.query(ex, engine=engine, parser=parser)
        else:
            res = df.query('strings == ["a", "b"]', engine=engine, parser=parser)
            assert_frame_equal(res, expect)

            res = df.query('["a", "b"] == strings', engine=engine, parser=parser)
            assert_frame_equal(res, expect)

            expect = df[~df.strings.isin(["a", "b"])]

            res = df.query('strings != ["a", "b"]', engine=engine, parser=parser)
            assert_frame_equal(res, expect)

            res = df.query('["a", "b"] != strings', engine=engine, parser=parser)
            assert_frame_equal(res, expect)
Example #28
0
    def test_nested_scope(self):
        engine = self.engine
        parser = self.parser

        skip_if_no_pandas_parser(parser)

        df = DataFrame(np.random.randn(5, 3))
        df2 = DataFrame(np.random.randn(5, 3))
        expected = df[(df > 0) & (df2 > 0)]

        result = df.query("(@df > 0) & (@df2 > 0)", engine=engine, parser=parser)
        assert_frame_equal(result, expected)

        result = pd.eval("df[df > 0 and df2 > 0]", engine=engine, parser=parser)
        assert_frame_equal(result, expected)

        result = pd.eval("df[df > 0 and df2 > 0 and df[df > 0] > 0]", engine=engine, parser=parser)
        expected = df[(df > 0) & (df2 > 0) & (df[df > 0] > 0)]
        assert_frame_equal(result, expected)

        result = pd.eval("df[(df>0) & (df2>0)]", engine=engine, parser=parser)
        expected = df.query("(@df>0) & (@df2>0)", engine=engine, parser=parser)
        assert_frame_equal(result, expected)
Example #29
0
    def check_query_lex_compare_strings(self, parser, engine):
        tm.skip_if_no_ne(engine=engine)
        import operator as opr

        a = Series(np.random.choice(list("abcde"), 20))
        b = Series(np.arange(a.size))
        df = DataFrame({"X": a, "Y": b})

        ops = {"<": opr.lt, ">": opr.gt, "<=": opr.le, ">=": opr.ge}

        for op, func in ops.items():
            res = df.query('X %s "d"' % op, engine=engine, parser=parser)
            expected = df[func(df.X, "d")]
            assert_frame_equal(res, expected)
Example #30
0
    def check_query_with_named_multiindex(self, parser, engine):
        tm.skip_if_no_ne(engine)
        a = np.random.choice(["red", "green"], size=10)
        b = np.random.choice(["eggs", "ham"], size=10)
        index = MultiIndex.from_arrays([a, b], names=["color", "food"])
        df = DataFrame(randn(10, 2), index=index)
        ind = Series(df.index.get_level_values("color").values, index=index, name="color")

        # equality
        res1 = df.query('color == "red"', parser=parser, engine=engine)
        res2 = df.query('"red" == color', parser=parser, engine=engine)
        exp = df[ind == "red"]
        assert_frame_equal(res1, exp)
        assert_frame_equal(res2, exp)

        # inequality
        res1 = df.query('color != "red"', parser=parser, engine=engine)
        res2 = df.query('"red" != color', parser=parser, engine=engine)
        exp = df[ind != "red"]
        assert_frame_equal(res1, exp)
        assert_frame_equal(res2, exp)

        # list equality (really just set membership)
        res1 = df.query('color == ["red"]', parser=parser, engine=engine)
        res2 = df.query('["red"] == color', parser=parser, engine=engine)
        exp = df[ind.isin(["red"])]
        assert_frame_equal(res1, exp)
        assert_frame_equal(res2, exp)

        res1 = df.query('color != ["red"]', parser=parser, engine=engine)
        res2 = df.query('["red"] != color', parser=parser, engine=engine)
        exp = df[~ind.isin(["red"])]
        assert_frame_equal(res1, exp)
        assert_frame_equal(res2, exp)

        # in/not in ops
        res1 = df.query('["red"] in color', parser=parser, engine=engine)
        res2 = df.query('"red" in color', parser=parser, engine=engine)
        exp = df[ind.isin(["red"])]
        assert_frame_equal(res1, exp)
        assert_frame_equal(res2, exp)

        res1 = df.query('["red"] not in color', parser=parser, engine=engine)
        res2 = df.query('"red" not in color', parser=parser, engine=engine)
        exp = df[~ind.isin(["red"])]
        assert_frame_equal(res1, exp)
        assert_frame_equal(res2, exp)