Esempio n. 1
0
 def test_columns_info(self):
     import liquer.ext.lq_pandas  # register pandas commands and state type
     from liquer.state_types import encode_state_data, decode_state_data
     filename = encode_token(os.path.dirname(
         inspect.getfile(self.__class__))+"/test.csv")
     assert evaluate(f"df_from-{filename}/df_columns").get() == ["a", "b"]
     assert evaluate(
         f"df_from-{filename}/columns_info").get()["columns"] == ["a", "b"]
     assert evaluate(
         f"df_from-{filename}/columns_info").get()["has_tags"] == False
     assert evaluate(
         f"df_from-{filename}/columns_info").get()["types"]["a"].startswith("int")
     filename = encode_token(os.path.dirname(
         inspect.getfile(self.__class__))+"/test_hxl.csv")
     assert evaluate(f"df_from-{filename}/df_columns").get() == ["a", "b"]
     assert evaluate(
         f"df_from-{filename}/columns_info").get()["columns"] == ["a", "b"]
     assert evaluate(
         f"df_from-{filename}/columns_info").get()["has_tags"] == True
     assert evaluate(
         f"df_from-{filename}/columns_info").get()["tags"]["a"] == "#indicator +num +aaa"
     assert evaluate(
         f"df_from-{filename}/columns_info").get()["tags"]["b"] == "#indicator +num +bbb"
     info = evaluate(f"df_from-{filename}/columns_info").get()
     b, mime, tid = encode_state_data(info)
     assert info == decode_state_data(b, tid)
Esempio n. 2
0
    def test_from_with_cache(self, httpserver):
        import liquer.ext.lq_hxl  # register HXL commands and state type

        test_hxl = open(
            os.path.dirname(inspect.getfile(self.__class__)) +
            "/test_hxl.csv").read()

        httpserver.expect_request("/test_hxl.csv").respond_with_data(test_hxl)
        url = encode_token(httpserver.url_for("/test_hxl.csv"))
        query = f"hxl_from-{url}"
        with tempfile.TemporaryDirectory() as cachepath:
            set_cache(FileCache(cachepath))
            state = evaluate(query)
            data = state.get()
            assert data.columns[0].header == "a"
            assert data.columns[0].display_tag == "#indicator+num+aaa"
            assert data.columns[1].header == "b"
            assert data.columns[1].display_tag == "#indicator+num+bbb"
            state = evaluate(query)
            data = state.get()
            assert data.columns[0].header == "a"
            assert data.columns[0].display_tag == "#indicator+num+aaa"
            assert data.columns[1].header == "b"
            assert data.columns[1].display_tag == "#indicator+num+bbb"
        set_cache(None)
Esempio n. 3
0
    def test_ridge(self):
        import importlib
        import liquer.ext.basic
        import liquer.ext.lq_pandas
        import liquer.ext.lq_sklearn_regression
        from liquer.commands import reset_command_registry

        reset_command_registry()  # prevent double-registration
        # Hack to enforce registering of the commands
        importlib.reload(liquer.ext.basic)
        importlib.reload(liquer.ext.lq_pandas)
        importlib.reload(liquer.ext.lq_sklearn_regression)

        @first_command
        def test1():
            return pd.DataFrame(dict(x=[1, 2, 3], Y=[10, 20, 30]))

        @first_command
        def test2():
            return pd.DataFrame(dict(x=[1, 2, 3], Y=[30, 40, 50]))

        @first_command
        def test3():
            return pd.DataFrame(dict(x1=[1, 2, 3], x2=[0, 0, 1], Y=[30, 40, 55]))

        df = evaluate("ns-sklearn/test1/ridge").get()
        assert list(df.x)[0] == pytest.approx(9.52381)
        df = evaluate("ns-sklearn/test2/ridge").get()
        assert list(df.x)[0] == pytest.approx(9.52381)
        assert list(df.intercept)[0] == pytest.approx(20.952381)
        df = evaluate("ns-sklearn/test3/ridge").get()
        assert list(df.x1)[0] == pytest.approx(9.562842)
        assert list(df.x2)[0] == pytest.approx(4.918033)
        assert list(df.intercept)[0] == pytest.approx(20.901639)
Esempio n. 4
0
 def test_vars(self):
     import liquer.ext.basic
     assert evaluate("state_variable-abc").get() is None
     assert evaluate("let-abc-1/state_variable-abc").get() == "1"
     assert evaluate("state_variable-abc").get() is None
     set_var("abc", "123")
     assert evaluate("state_variable-abc").get() == "123"
     assert evaluate("let-abc-1/state_variable-abc").get() == "1"
Esempio n. 5
0
    def test_image(self):
        store = MemoryStore()
        set_store(store)

        @first_command
        def image():
            return Image.new(mode="RGB", size=(200, 300))

        assert evaluate("image").get().size == (200,300)
        evaluate_and_save("image/ns-pil/resize-400-600-bilinear/test.png",target_resource_directory="x")
        image = evaluate("x/test.png/-/dr").get()
        assert image.size == (400,600)
Esempio n. 6
0
 def test_set_tags(self):
     import liquer.ext.lq_hxl     # register HXL commands and state type
     import liquer.ext.lq_pandas  # register pandas commands and state type
     filename = encode_token(os.path.dirname(
         inspect.getfile(self.__class__))+"/test.csv")
     data = evaluate(f"df_from-{filename}/set_tags-b-indicator+b-a-indicator+a/df2hxl").get()
     assert data.columns[0].header == "a"
     assert data.columns[0].display_tag == "#indicator+a"
     assert data.columns[1].header == "b"
     assert data.columns[1].display_tag == "#indicator+b"
     df = evaluate(f"df_from-{filename}/set_tags-b-indicator+b").get()
     assert list(df.a) == ["",1,3]
     assert list(df.b) == ["#indicator+b",2,4]
Esempio n. 7
0
 def test_teq(self):
     import liquer.ext.lq_pandas  # register pandas commands and state type
     filename = encode_token(os.path.dirname(
         inspect.getfile(self.__class__))+"/test_hxl.csv")
     state = evaluate(f"df_from-{filename}/teq-a-1")
     df = state.get()
     assert "a" in df.columns
     assert "b" in df.columns
     assert list(df.a) == ["#indicator +num +aaa", "1"]
     assert list(df.b) == ["#indicator +num +bbb", "2"]
     df = evaluate(f"df_from-{filename}/teq-a-3-b-4").get()
     assert list(df.a) == ["#indicator +num +aaa", "3"]
     assert list(df.b) == ["#indicator +num +bbb", "4"]
     df = evaluate(f"df_from-{filename}/teq-a-1-b-4").get()
     assert list(df.a) == ["#indicator +num +aaa"]
     assert list(df.b) == ["#indicator +num +bbb"]
Esempio n. 8
0
 def test_head(self):
     import liquer.ext.lq_pandas  # register pandas commands and state type
     filename = encode_token(os.path.dirname(
         inspect.getfile(self.__class__))+"/test.csv")
     df = evaluate(f"df_from-{filename}/head_df-1").get()
     assert list(df.a) == [1]
     assert list(df.b) == [2]
Esempio n. 9
0
def split_df(state, *columns):
    """Split of dataframe by columns
    Creates a dataframe with unique (combinations of) value from supplied columns and queries
    to obtain the corresponding filtered dataframes from the original dataframe.

    This behaves like qsplit_df, with two important differenced:
    - each generated query is evaluated (and thus eventually cached)
    - link is generated and put into link column (state variable link_column)
    The split_link_type state variable is used to determine the link type; url by default.
    """
    from liquer.parser import parse

    state = qsplit_df(state, *columns)
    df = state.get().copy()

    query_column = state.vars.get("query_column")
    if query_column is None:
        query_column = "query"

    link_column = state.vars.get("link_column")
    if link_column is None:
        link_column = "link"

    split_link_type = state.vars.get("split_link_type")
    if split_link_type is None:
        split_link_type = "url"

    #    df.loc[:,link_column] = [evaluate(encode(decode(q)+[["link",split_link_type]])).get() for q in df[query_column]]
    df.loc[:, link_column] = [
        evaluate(parse(q).with_action("link", split_link_type).encode()).get()
        for q in df[query_column]
    ]
    return state.with_data(df)
Esempio n. 10
0
def tsplit_df(state, *columns):
    """Split of dataframe by columns (version of split_df expecting a first row with tags)"""
    from liquer.parser import parse

    state = qtsplit_df(state, *columns)
    df = state.get().copy()

    query_column = state.vars.get("query_column")
    if query_column is None:
        query_column = "query"

    link_column = state.vars.get("link_column")
    if link_column is None:
        link_column = "link"

    split_link_type = state.vars.get("split_link_type")
    if split_link_type is None:
        split_link_type = "url"

    #    df.loc[:,link_column] = [""]+[evaluate(encode(decode(q)+[["link",split_link_type]])).get() for q in list(df[query_column])[1:]]
    df.loc[:, link_column] = [""] + [
        evaluate(parse(q).with_action("link", split_link_type).encode()).get()
        for q in list(df[query_column])[1:]
    ]
    return state.with_data(df)
Esempio n. 11
0
def serve(query):
    """Main service for evaluating queries"""
    try:
        return response(evaluate(query))
    except:
        traceback.print_exc()
        abort(500)
Esempio n. 12
0
    def test_tsplit(self):
        import importlib
        import liquer.ext.lq_pandas  # register pandas commands and state type
        import liquer.ext.basic
        from liquer.commands import reset_command_registry

        reset_command_registry()  # prevent double-registration
        # Hack to enforce registering of the commands
        importlib.reload(liquer.ext.lq_pandas)
        importlib.reload(liquer.ext.basic)
        set_var("server", "http://localhost")
        set_var("api_path", "/q/")

        filename = encode_token(
            os.path.dirname(inspect.getfile(self.__class__)) + "/test_hxl.csv")
        df = evaluate(f"df_from-{filename}/tsplit_df-a").get()
        assert "a" in df.columns
        assert "b" not in df.columns
        assert list(df.a) == ["#indicator +num +aaa", "1", "3"]
        assert list(df["query"])[1:] == [
            f"df_from-{filename}/teq-a-1",
            f"df_from-{filename}/teq-a-3",
        ]
        assert list(df["link"])[1:] == [
            f"http://localhost/q/df_from-{filename}/teq-a-1",
            f"http://localhost/q/df_from-{filename}/teq-a-3",
        ]
Esempio n. 13
0
    def test_work_with_parquet(self):
        filename = encode_token(
            os.path.dirname(inspect.getfile(self.__class__)) + "/test.csv")
        with TemporaryDirectory() as tmpdir:
            path = Path(tmpdir) / "test.parquet"
            evaluate_and_save(f"df_from-{filename}/test.parquet",
                              target_directory=tmpdir)

            @first_command(volatile=True, cache=False)
            def execution_context():
                ctx = daf.ExecutionContext()
                ctx.register_parquet("a", str(path))
                return ctx

            @command
            def process(ctx):
                return ctx.sql("SELECT a, b, a+b AS c FROM a")

            evaluate_and_save(f"execution_context/process/result.parquet",
                              target_directory=tmpdir)

            df = evaluate(
                f"execution_context/process/datafusion_to_pandas").get()
            assert "a" in df.columns
            assert "b" in df.columns
            assert "c" in df.columns
            assert list(df.a) == [1, 3]
            assert list(df.b) == [2, 4]
            assert list(df.c) == [3, 7]
Esempio n. 14
0
 def test_eq(self):
     import liquer.ext.lq_pandas  # register pandas commands and state type
     filename = encode_token(os.path.dirname(
         inspect.getfile(self.__class__))+"/test.csv")
     state = evaluate(f"df_from-{filename}/eq-a-1")
     df = state.get()
     assert "a" in df.columns
     assert "b" in df.columns
     assert list(df.a) == [1]
     assert list(df.b) == [2]
     df = evaluate(f"df_from-{filename}/eq-a-3-b-4").get()
     assert list(df.a) == [3]
     assert list(df.b) == [4]
     df = evaluate(f"df_from-{filename}/eq-a-1-b-4").get()
     assert list(df.a) == []
     assert list(df.b) == []
Esempio n. 15
0
 def test_append_with_cache(self):
     import liquer.ext.lq_pandas  # register pandas commands and state type
     with tempfile.TemporaryDirectory() as cachepath:
         set_cache(FileCache(cachepath))
         filename = encode_token(os.path.dirname(
             inspect.getfile(self.__class__))+"/test.csv")
         df = evaluate(f"df_from-{filename}/append_df-{filename}").get()
         assert "a" in df.columns
         assert "b" in df.columns
         assert list(df.a) == [1, 3, 1, 3]
         assert list(df.b) == [2, 4, 2, 4]
         df = evaluate(f"df_from-{filename}/append_df-{filename}").get()
         assert "a" in df.columns
         assert "b" in df.columns
         assert list(df.a) == [1, 3, 1, 3]
         assert list(df.b) == [2, 4, 2, 4]
     set_cache(None)
Esempio n. 16
0
 def test_qsplit2(self):
     import liquer.ext.lq_pandas  # register pandas commands and state type
     filename = encode_token(os.path.dirname(
         inspect.getfile(self.__class__))+"/test.csv")
     df = evaluate(f"df_from-{filename}/qsplit_df-a-b").get()
     assert list(df.a) == [1, 3]
     assert list(df.b) == [2, 4]
     assert list(df["query"]) == [
         f"df_from-{filename}/eq-a-1-b-2", f"df_from-{filename}/eq-a-3-b-4"]
Esempio n. 17
0
 def test_df2hxl(self):
     import liquer.ext.lq_hxl     # register HXL commands and state type
     import liquer.ext.lq_pandas  # register pandas commands and state type
     path = encode_token(os.path.dirname(
         inspect.getfile(self.__class__))+"/test_hxl.csv")
     data = evaluate(f"df_from-{path}/df2hxl").get()
     assert data.columns[0].header == "a"
     assert data.columns[0].display_tag == "#indicator+num+aaa"
     assert data.columns[1].header == "b"
     assert data.columns[1].display_tag == "#indicator+num+bbb"
Esempio n. 18
0
 def test_qsplit1(self):
     import liquer.ext.lq_pandas  # register pandas commands and state type
     filename = encode_token(os.path.dirname(
         inspect.getfile(self.__class__))+"/test.csv")
     df = evaluate(f"df_from-{filename}/qsplit_df-a").get()
     assert "a" in df.columns
     assert "b" not in df.columns
     assert list(df.a) == [1, 3]
     assert list(df["query"]) == [
         f"df_from-{filename}/eq-a-1", f"df_from-{filename}/eq-a-3"]
Esempio n. 19
0
 def test_append(self):
     import liquer.ext.lq_pandas  # register pandas commands and state type
     filename = encode_token(os.path.dirname(
         inspect.getfile(self.__class__))+"/test.csv")
     state = evaluate(f"df_from-{filename}/append_df-{filename}")
     df = state.get()
     assert "a" in df.columns
     assert "b" in df.columns
     assert list(df.a) == [1, 3, 1, 3]
     assert list(df.b) == [2, 4, 2, 4]
Esempio n. 20
0
    def test_hxl2df(self, httpserver):
        import liquer.ext.lq_hxl  # register HXL commands and state type
        test_hxl = open(os.path.dirname(
            inspect.getfile(self.__class__))+"/test_hxl.csv").read()

        httpserver.expect_request("/test_hxl.csv").respond_with_data(test_hxl)
        url = encode_token(httpserver.url_for("/test_hxl.csv"))
        df = evaluate(f"hxl_from-{url}/hxl2df").get()
        assert list(df.columns) == ["a", "b"]
        assert list(df.a[1:]) == ["1", "3"]
        assert list(df.b[1:]) == ["2", "4"]
Esempio n. 21
0
    def test_concat_recipe(self):
        import pandas as pd
        import liquer.ext.basic
        from liquer.commands import reset_command_registry

        reset_command_registry()  # prevent double-registration
        importlib.reload(liquer.ext.basic)
        importlib.reload(liquer.ext.lq_pandas)
        import liquer.store as st

        @first_command
        def hello(offset=0):
            return pd.DataFrame(
                dict(a=[1 + offset, 2 + offset], b=[3 + offset, 4 + offset]))

        substore = st.MemoryStore()
        substore.store(
            "recipes.yaml",
            """
RECIPES:
  - filename: hello.parquet
    type: pandas_concat
    concat:
    - hello
    - query: hello-10
      column: test
      value: extra
""",
            {},
        )
        store = RecipeSpecStore(substore)
        set_store(store)
        assert "hello.parquet" in store.keys()
        df = evaluate("hello.parquet/-/dr").get()
        assert sorted(df.columns) == ["a", "b", "test"]
        assert list(df.a) == [1, 2, 11, 12]
        assert list(df.b) == [3, 4, 13, 14]
        assert list(df.test) == [None, None, "extra", "extra"]

        assert store.get_metadata(
            "hello.parquet")["status"] == Status.READY.value
        assert store.get_metadata(
            "hello.parquet")["recipes_key"] == "recipes.yaml"
        assert store.get_metadata("hello.parquet")["has_recipe"] == True
        assert store.get_metadata("hello.parquet")["recipes_directory"] == ""
        assert store.get_metadata("hello.parquet")[
            "recipe_name"] == "recipes.yaml/-Ryaml/RECIPES/0#hello.parquet"
        assert store.get_metadata("hello.parquet")["data_characteristics"][
            "description"] == "Dataframe with 3 columns and 4 rows."

        set_store(None)
        reset_command_registry()
Esempio n. 22
0
    def test_workbook(self):
        store = MemoryStore()
        set_store(store)

        filename = encode_token(
            os.path.dirname(inspect.getfile(self.__class__)) + "/test.csv")
        evaluate_and_save(f"df_from-{filename}/test.xlsx",
                          target_resource_directory="testdir")
        df = evaluate("testdir/test.xlsx/-/workbook/workbook_sheet_df").get()
        assert "a" in df.columns
        assert "b" in df.columns
        assert list(df.a) == [1, 3]
        assert list(df.b) == [2, 4]
Esempio n. 23
0
    def get(self, query):
        """Main service for evaluating queries"""
        try:
            b, mimetype, filename = response(evaluate(query))
        except:
            traceback.print_exc()
            self.set_status(500)
            self.finish(f"500 - Failed to create a respone to {query}")

        header = "Content-Type"
        body = mimetype
        self.set_header(header, body)

        self.write(b)
Esempio n. 24
0
    def test_pptx(self):
        store = MemoryStore()
        set_store(store)

        @first_command
        def make_presentation():
            prs = Presentation()
            title_slide_layout = prs.slide_layouts[0]
            slide = prs.slides.add_slide(title_slide_layout)
            title = slide.shapes.title
            subtitle = slide.placeholders[1]

            title.text = "Hello, World!"
            subtitle.text = "python-pptx was here!"
            return prs

        @command
        def add_slide(prs, title="Title"):
            bullet_slide_layout = prs.slide_layouts[1]

            slide = prs.slides.add_slide(bullet_slide_layout)
            shapes = slide.shapes

            title_shape = shapes.title
            body_shape = shapes.placeholders[1]

            title_shape.text = title

            tf = body_shape.text_frame
            tf.text = 'Find the bullet slide layout'

            p = tf.add_paragraph()
            p.text = 'Use _TextFrame.text for first bullet'
            p.level = 1
            return prs

        evaluate_and_save(f"make_presentation/add_slide/test.pptx",
                          target_resource_directory="testdir")
        assert store.get_metadata(
            "testdir/test.pptx")["type_identifier"] == "pptx_presentation"
        prs = evaluate("testdir/test.pptx/-/dr/add_slide-Slide2").get()
        assert len(prs.slides) == 3
Esempio n. 25
0
    def test_dr(self):
        import pandas as pd
        import liquer.ext.basic
        from liquer.commands import reset_command_registry

        reset_command_registry()  # prevent double-registration
        importlib.reload(liquer.ext.basic)
        importlib.reload(liquer.ext.lq_pandas)

        store = MemoryStore()
        set_store(store)
        store.store("data.csv", b"a,b\n1,2\n3,4", {})
        df = evaluate("data.csv/-/dr").get()
        assert isinstance(df, pd.DataFrame)
        assert len(df) == 2
        assert len(df.columns) == 2
        assert "a" in df.columns
        assert "b" in df.columns
        assert list(df.a) == [1, 3]
        assert list(df.b) == [2, 4]
Esempio n. 26
0
    def test_link(self):
        import liquer.ext.basic
        assert evaluate("let-hello-world/state_variable-hello/link").get(
        ) == "let-hello-world/state_variable-hello"
        assert evaluate("let-hello-world/state_variable-hello/link-dataurl"
                        ).get() == "data:text/plain;base64,d29ybGQ="

        assert evaluate("let-hello-world/state_variable-hello/link-path").get(
        ) == "/q/let-hello-world/state_variable-hello"
        assert evaluate("let-hello-world/state_variable-hello/link-url").get(
        ) == "http://localhost/q/let-hello-world/state_variable-hello"
        set_var("server", "http://localhost:1234")
        set_var("api_path", "/liquer/q/")
        assert evaluate("let-hello-world/state_variable-hello/link-path").get(
        ) == "/liquer/q/let-hello-world/state_variable-hello"
        assert evaluate("let-hello-world/state_variable-hello/link-url").get(
        ) == "http://localhost:1234/liquer/q/let-hello-world/state_variable-hello"
        set_var("server", "http://localhost")
        set_var("api_path", "/q/")
Esempio n. 27
0
 def get(self, query):
     state = evaluate(query)
     state_json = state.as_dict()
     self.write(json.dumps(state_json))
Esempio n. 28
0
def serve(query):
    """Main service for evaluating queries"""
    return response(evaluate(query))
Esempio n. 29
0
def debug_json(query):
    """Debug query - returns metadata from a state after a query is evaluated"""
    state = evaluate(query)
    state_json = state.as_dict()
    return jsonify(state_json)
Esempio n. 30
0
    def test_state(self):
        from liquer.commands import command_registry

        state = evaluate("ns-meta/commands/state").get()
        assert state["query"] == "ns-meta/commands"