예제 #1
0
def test_graph_creation(reader, retriever_with_docs, document_store_with_docs):
    pipeline = Pipeline()
    pipeline.add_node(name="ES",
                      component=retriever_with_docs,
                      inputs=["Query"])

    with pytest.raises(AssertionError):
        pipeline.add_node(name="Reader",
                          component=retriever_with_docs,
                          inputs=["ES.output_2"])

    with pytest.raises(AssertionError):
        pipeline.add_node(name="Reader",
                          component=retriever_with_docs,
                          inputs=["ES.wrong_edge_label"])

    with pytest.raises(Exception):
        pipeline.add_node(name="Reader",
                          component=retriever_with_docs,
                          inputs=["InvalidNode"])

    with pytest.raises(Exception):
        pipeline = Pipeline()
        pipeline.add_node(name="ES",
                          component=retriever_with_docs,
                          inputs=["InvalidNode"])
예제 #2
0
def test_pipeline(document_store, retriever):
    documents = [
        {
            "name": "name_1",
            "text": "text_1",
            "embedding": np.random.rand(768).astype(np.float32)
        },
        {
            "name": "name_2",
            "text": "text_2",
            "embedding": np.random.rand(768).astype(np.float32)
        },
        {
            "name": "name_3",
            "text": "text_3",
            "embedding": np.random.rand(768).astype(np.float64)
        },
        {
            "name": "name_4",
            "text": "text_4",
            "embedding": np.random.rand(768).astype(np.float32)
        },
    ]
    document_store.write_documents(documents)
    pipeline = Pipeline()
    pipeline.add_node(component=retriever, name="FAISS", inputs=["Query"])
    output = pipeline.run(query="How to test this?", top_k_retriever=3)
    assert len(output["documents"]) == 3
예제 #3
0
def test_join_document_pipeline(document_store_with_docs, reader):
    es = ElasticsearchRetriever(document_store=document_store_with_docs)
    dpr = DensePassageRetriever(
        document_store=document_store_with_docs,
        query_embedding_model="facebook/dpr-question_encoder-single-nq-base",
        passage_embedding_model="facebook/dpr-ctx_encoder-single-nq-base",
        use_gpu=False,
    )
    document_store_with_docs.update_embeddings(dpr)

    query = "Where does Carla lives?"

    # test merge without weights
    join_node = JoinDocuments(join_mode="merge")
    p = Pipeline()
    p.add_node(component=es, name="R1", inputs=["Query"])
    p.add_node(component=dpr, name="R2", inputs=["Query"])
    p.add_node(component=join_node, name="Join", inputs=["R1", "R2"])
    results = p.run(query=query)
    assert len(results["documents"]) == 3

    # test merge with weights
    join_node = JoinDocuments(join_mode="merge",
                              weights=[1000, 1],
                              top_k_join=2)
    p = Pipeline()
    p.add_node(component=es, name="R1", inputs=["Query"])
    p.add_node(component=dpr, name="R2", inputs=["Query"])
    p.add_node(component=join_node, name="Join", inputs=["R1", "R2"])
    results = p.run(query=query)
    assert results["documents"][0].score > 1000
    assert len(results["documents"]) == 2

    # test concatenate
    join_node = JoinDocuments(join_mode="concatenate")
    p = Pipeline()
    p.add_node(component=es, name="R1", inputs=["Query"])
    p.add_node(component=dpr, name="R2", inputs=["Query"])
    p.add_node(component=join_node, name="Join", inputs=["R1", "R2"])
    results = p.run(query=query)
    assert len(results["documents"]) == 3

    # test join_node with reader
    join_node = JoinDocuments()
    p = Pipeline()
    p.add_node(component=es, name="R1", inputs=["Query"])
    p.add_node(component=dpr, name="R2", inputs=["Query"])
    p.add_node(component=join_node, name="Join", inputs=["R1", "R2"])
    p.add_node(component=reader, name="Reader", inputs=["Join"])
    results = p.run(query=query)
    assert results["answers"][0]["answer"] == "Berlin"
예제 #4
0
def test_query_keyword_statement_classifier():
    class KeywordOutput(RootNode):
        outgoing_edges = 2

        def run(self, **kwargs):
            kwargs["output"] = "keyword"
            return kwargs, "output_1"

    class QuestionOutput(RootNode):
        outgoing_edges = 2

        def run(self, **kwargs):
            kwargs["output"] = "question"
            return kwargs, "output_2"

    pipeline = Pipeline()
    pipeline.add_node(
        name="SkQueryKeywordQuestionClassifier",
        component=SklearnQueryClassifier(),
        inputs=["Query"],
    )
    pipeline.add_node(
        name="KeywordNode",
        component=KeywordOutput(),
        inputs=["SkQueryKeywordQuestionClassifier.output_2"],
    )
    pipeline.add_node(
        name="QuestionNode",
        component=QuestionOutput(),
        inputs=["SkQueryKeywordQuestionClassifier.output_1"],
    )
    output = pipeline.run(query="morse code")
    assert output["output"] == "keyword"

    output = pipeline.run(query="How old is John?")
    assert output["output"] == "question"

    pipeline = Pipeline()
    pipeline.add_node(
        name="TfQueryKeywordQuestionClassifier",
        component=TransformersQueryClassifier(),
        inputs=["Query"],
    )
    pipeline.add_node(
        name="KeywordNode",
        component=KeywordOutput(),
        inputs=["TfQueryKeywordQuestionClassifier.output_2"],
    )
    pipeline.add_node(
        name="QuestionNode",
        component=QuestionOutput(),
        inputs=["TfQueryKeywordQuestionClassifier.output_1"],
    )
    output = pipeline.run(query="morse code")
    assert output["output"] == "keyword"

    output = pipeline.run(query="How old is John?")
    assert output["output"] == "question"
예제 #5
0
def test_parallel_paths_in_pipeline_graph_with_branching():
    class AWithOutput1(RootNode):
        outgoing_edges = 2

        def run(self, **kwargs):
            kwargs["output"] = "A"
            return kwargs, "output_1"

    class AWithOutput2(RootNode):
        outgoing_edges = 2

        def run(self, **kwargs):
            kwargs["output"] = "A"
            return kwargs, "output_2"

    class AWithOutputAll(RootNode):
        outgoing_edges = 2

        def run(self, **kwargs):
            kwargs["output"] = "A"
            return kwargs, "output_all"

    class B(RootNode):
        def run(self, **kwargs):
            kwargs["output"] += "B"
            return kwargs, "output_1"

    class C(RootNode):
        def run(self, **kwargs):
            kwargs["output"] += "C"
            return kwargs, "output_1"

    class D(RootNode):
        def run(self, **kwargs):
            kwargs["output"] += "D"
            return kwargs, "output_1"

    class E(RootNode):
        def run(self, **kwargs):
            kwargs["output"] += "E"
            return kwargs, "output_1"

    class JoinNode(RootNode):
        def run(self, **kwargs):
            if kwargs.get("inputs"):
                kwargs["output"] = ""
                for input_dict in kwargs["inputs"]:
                    kwargs["output"] += input_dict["output"]
            return kwargs, "output_1"

    pipeline = Pipeline()
    pipeline.add_node(name="A", component=AWithOutput1(), inputs=["Query"])
    pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
    pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
    pipeline.add_node(name="D", component=E(), inputs=["B"])
    pipeline.add_node(name="E", component=D(), inputs=["B"])
    pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
    output = pipeline.run(query="test")
    assert output["output"] == "ABEABD"

    pipeline = Pipeline()
    pipeline.add_node(name="A", component=AWithOutput2(), inputs=["Query"])
    pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
    pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
    pipeline.add_node(name="D", component=E(), inputs=["B"])
    pipeline.add_node(name="E", component=D(), inputs=["B"])
    pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
    output = pipeline.run(query="test")
    assert output["output"] == "AC"

    pipeline = Pipeline()
    pipeline.add_node(name="A", component=AWithOutputAll(), inputs=["Query"])
    pipeline.add_node(name="B", component=B(), inputs=["A.output_1"])
    pipeline.add_node(name="C", component=C(), inputs=["A.output_2"])
    pipeline.add_node(name="D", component=E(), inputs=["B"])
    pipeline.add_node(name="E", component=D(), inputs=["B"])
    pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E", "C"])
    output = pipeline.run(query="test")
    assert output["output"] == "ACABEABD"
예제 #6
0
def test_parallel_paths_in_pipeline_graph():
    class A(RootNode):
        def run(self, **kwargs):
            kwargs["output"] = "A"
            return kwargs, "output_1"

    class B(RootNode):
        def run(self, **kwargs):
            kwargs["output"] += "B"
            return kwargs, "output_1"

    class C(RootNode):
        def run(self, **kwargs):
            kwargs["output"] += "C"
            return kwargs, "output_1"

    class D(RootNode):
        def run(self, **kwargs):
            kwargs["output"] += "D"
            return kwargs, "output_1"

    class E(RootNode):
        def run(self, **kwargs):
            kwargs["output"] += "E"
            return kwargs, "output_1"

    class JoinNode(RootNode):
        def run(self, **kwargs):
            kwargs["output"] = (kwargs["inputs"][0]["output"] +
                                kwargs["inputs"][1]["output"])
            return kwargs, "output_1"

    pipeline = Pipeline()
    pipeline.add_node(name="A", component=A(), inputs=["Query"])
    pipeline.add_node(name="B", component=B(), inputs=["A"])
    pipeline.add_node(name="C", component=C(), inputs=["B"])
    pipeline.add_node(name="E", component=E(), inputs=["C"])
    pipeline.add_node(name="D", component=D(), inputs=["B"])
    pipeline.add_node(name="F", component=JoinNode(), inputs=["D", "E"])
    output = pipeline.run(query="test")
    assert output["output"] == "ABDABCE"

    pipeline = Pipeline()
    pipeline.add_node(name="A", component=A(), inputs=["Query"])
    pipeline.add_node(name="B", component=B(), inputs=["A"])
    pipeline.add_node(name="C", component=C(), inputs=["B"])
    pipeline.add_node(name="D", component=D(), inputs=["B"])
    pipeline.add_node(name="E", component=JoinNode(), inputs=["C", "D"])
    output = pipeline.run(query="test")
    assert output["output"] == "ABCABD"