async def test_descendant_reverse_category(caplog):
    """
    Test that when we are given related_to that descendants
    will be filled either in the forward or backwards direction
    """
    valid_qg = query_graph_from_string("""
        n0(( categories[] biolink:Disease ))
        n0(( ids[] MONDO:0005737 ))
        n0-- biolink:related_to -->n1
        n1(( categories[] biolink:ChemicalSubstance ))
        """)
    await prepare_query_graph(valid_qg)
    plan, kps = await generate_plan(valid_qg)
    assert_no_level(caplog, logging.WARNING, 1)

    invalid_qg = query_graph_from_string("""
        n0(( categories[] biolink:Disease ))
        n0(( ids[] MONDO:0005737 ))
        n0-- biolink:treats -->n1
        n1(( categories[] biolink:ChemicalSubstance ))
        """)
    await prepare_query_graph(invalid_qg)

    with pytest.raises(NoAnswersError, match=r"No KPs"):
        plan, kps = await generate_plan(invalid_qg)
Exemple #2
0
async def test_node_set(redis):
    """Test that is_set is handled correctly."""
    QGRAPH = query_graph_from_string("""
        n0(( ids[] CHEBI:6801 ))
        n0(( categories[] biolink:ChemicalSubstance ))
        n1(( categories[] biolink:Disease ))
        n2(( categories[] biolink:PhenotypicFeature ))
        n0-- biolink:treats -->n1
        n1-- biolink:has_phenotype -->n2
        """)
    QGRAPH["nodes"]["n1"]["is_set"] = True

    # Create query
    q = {
        "message": {
            "query_graph": QGRAPH
        },
        "log_level": "WARNING",
    }

    # Run
    output = await lookup(q, redis)
    assert len(output["message"]["results"]) == 2
    assert {
        len(result["node_bindings"]["n1"])
        for result in output["message"]["results"]
    } == {1, 2}
async def test_planning_performance_typical_example():
    """
    Test our performance when planning a more typical query graph
    (modeled after a COP) with a lot of KPs available.

    We should be able to do better on performance using our filtering methods.
    """

    qg = query_graph_from_string("""
        n0(( ids[] MONDO:0005737 ))
        n1(( categories[] biolink:BiologicalProcessOrActivity ))
        n2(( categories[] biolink:AnatomicalEntity ))
        n3(( categories[] biolink:PhenotypicFeature ))
        n4(( categories[] biolink:PhenotypicFeature ))
        n5(( ids[] MONDO:6801 ))
        n0-- biolink:related_to -->n1
        n1-- biolink:related_to -->n2
        n2-- biolink:related_to -->n3
        n3-- biolink:related_to -->n4
        n4-- biolink:related_to -->n5
        """)
    await prepare_query_graph(qg)

    async def testable_generate_plans():
        await generate_plan(qg, logger=logging.getLogger())

    await time_and_display(
        testable_generate_plans,
        "generate plan for a typical query graph (50k KPs)",
    )
async def test_plan_ex1(caplog):
    """Test that we get a good plan for our first example"""
    qg = query_graph_from_string("""
        n0(( categories[] biolink:MolecularEntity ))
        n1(( ids[] MONDO:0005148 ))
        n2(( categories[] biolink:GeneOrGeneProduct ))
        n1-- biolink:treated_by -->n0
        n0-- biolink:affects_abundance_of -->n2
        """)
    await prepare_query_graph(qg)

    plan, kps = await generate_plan(qg)
    # One step per edge
    assert len(plan) == len(qg["edges"])
async def test_double_sided(caplog):
    """
    Test planning when a KP provides edges in both directions.
    """

    qg = query_graph_from_string("""
        n0(( ids[] MONDO:0005737 ))
        n0(( categories[] biolink:Disease ))
        n1(( categories[] biolink:Drug ))
        n0-- biolink:treated_by -->n1
        """)
    await prepare_query_graph(qg)
    plan, kps = await generate_plan(qg, logger=logging.getLogger())
    assert plan == {"n0n1": ["kp0"]}
    assert "kp0" in kps
async def test_not_enough_kps(caplog):
    """
    Check we get no plans when we submit a query graph
    that has edges we can't solve
    """

    qg = query_graph_from_string("""
        n0(( categories[] biolink:ExposureEvent ))
        n1(( categories[] biolink:Drug ))
        n0-- biolink:related_to -->n1
        """)

    await prepare_query_graph(qg)

    with pytest.raises(NoAnswersError, match=r"cannot reach"):
        plan, kps = await generate_plan(qg, logger=logging.getLogger())
async def test_valid_two_pinned_nodes(caplog):
    """
    Test Pinned -> Unbound + Pinned
    This should be valid because we only care about
    a path from a pinned node to all unbound nodes.
    """

    qg = query_graph_from_string("""
        n0(( ids[] MONDO:0005148 ))
        n1(( categories[] biolink:Drug ))
        n0-- biolink:treated_by -->n1
        n2(( ids[] MONDO:0011122 ))
        """)
    await prepare_query_graph(qg)

    plan, kps = await generate_plan(qg)
Exemple #8
0
async def test_gene_protein_conflation(redis):
    """Test conflation of biolink:Gene and biolink:Protein categories.
    e0 checks to make sure that Protein is added to Gene nodes, and e1
    checks that Gene is added to Protein nodes. Additionally, e2 checks
    that non-Gene and non-Protein nodes do not have either added as a category."""
    QGRAPH = query_graph_from_string("""
        n0(( ids[] MONDO:0008114 ))
        n0(( categories[] biolink:Disease ))
        n1(( categories[] biolink:Protein ))
        n2(( categories[] biolink:Gene ))
        n3(( categories[] biolink:Disease ))
        n0-- biolink:related_to -->n1
        n1-- biolink:related_to -->n2
        n2-- biolink:related_to -->n3
        """)

    # Create query
    q = {"message": {"query_graph": QGRAPH}, "log_level": "ERROR"}

    # Run query
    output = await lookup(q, redis)

    # Check to see that appropriate nodes are in results
    validate_message(
        {
            "knowledge_graph":
            """
                MONDO:0008114 biolink:related_to MESH:C035133
                MESH:C035133 biolink:related_to HP:0007430
                HP:0007430 biolink:related_to CHEBI:6801
                """,
            "results": [
                """
                node_bindings:
                    n0 MONDO:0008114
                    n1 MESH:C035133
                    n2 HP:0007430
                    n3 CHEBI:6801
                edge_bindings:
                    n0n1 MONDO:0008114-MESH:C035133
                    n1n2 MESH:C035133-HP:0007430
                    n2n3 HP:0007430-CHEBI:6801
                """
            ],
        },
        output["message"],
    )
async def test_unbound_unconnected_node(caplog):
    """
    Test Pinned -> Unbound + Unbound
    This should be invalid because there is no path
    to the unbound node
    """

    qg = query_graph_from_string("""
        n0(( ids[] MONDO:0005148 ))
        n1(( categories[] biolink:Drug ))
        n0-- biolink:treated_by -->n1
        n2(( categories[] biolink:PhenotypicFeature ))
        """)
    await prepare_query_graph(qg)

    with pytest.raises(NoAnswersError, match=r"cannot reach"):
        plan, kps = await generate_plan(qg)
async def test_plan_reverse_edge(caplog):
    """
    Test that we can plan a simple query graph
    where we have to traverse an edge in the opposite
    direction of one that was given
    """

    qg = query_graph_from_string("""
        n0(( ids[] MONDO:0005148 ))
        n1(( categories[] biolink:Drug ))
        n1-- biolink:treats -->n0
        """)
    await prepare_query_graph(qg)

    plan, kps = await generate_plan(qg)
    assert plan == {"n1n0": ["kp0"]}

    assert "kp0" in kps
async def test_fork(caplog):
    """
    Test Unbound <- Pinned -> Unbound

    This should be valid because we allow
    a fork to multiple paths.
    """

    qg = query_graph_from_string("""
        n0(( ids[] MONDO:0005148 ))
        n1(( categories[] biolink:Drug ))
        n2(( categories[] biolink:PhenotypicFeature ))
        n0-- biolink:treated_by -->n1
        n0-- biolink:has_phenotype -->n2
        """)
    await prepare_query_graph(qg)

    plan, kps = await generate_plan(qg)
Exemple #12
0
async def test_solve_ex1(redis):
    """Test solving the ex1 query graph"""
    QGRAPH = query_graph_from_string("""
        n0(( ids[] CHEBI:6801 ))
        n0(( categories[] biolink:ChemicalSubstance ))
        n1(( categories[] biolink:Disease ))
        n2(( categories[] biolink:PhenotypicFeature ))
        n0-- biolink:treats -->n1
        n1-- biolink:has_phenotype -->n2
        """)

    # Create query
    q = {
        "message": {
            "query_graph": QGRAPH
        },
        "log_level": "ERROR",
    }

    # Run
    output = await lookup(q, redis)

    validate_message(
        {
            "knowledge_graph":
            """
                CHEBI:6801 biolink:treats MONDO:0005148
                MONDO:0005148 biolink:has_phenotype HP:0004324
                """,
            "results": [
                """
                node_bindings:
                    n0 CHEBI:6801
                    n1 MONDO:0005148
                    n2 HP:0004324
                edge_bindings:
                    n0n1 CHEBI:6801-MONDO:0005148
                    n1n2 MONDO:0005148-HP:0004324
                """
            ],
        },
        output["message"],
    )
async def test_planning_performance_generic_qg():
    """
    Test our performance when planning a very generic query graph.

    This is a use case we hopefully don't encounter very much.
    """

    qg = query_graph_from_string("""
        n0(( ids[] MONDO:0005737 ))
        n1(( categories[] biolink:NamedThing ))
        n2(( categories[] biolink:NamedThing ))
        n0-- biolink:related_to -->n1
        n1-- biolink:related_to -->n2
        """)
    await prepare_query_graph(qg)
    await time_and_display(
        partial(generate_plan, qg, logger=logging.getLogger()),
        "generate plan for a generic query graph (1000 kps)",
    )
async def test_plan_loop():
    """
    Test that we create a plan for a query with a loop
    """

    qg = query_graph_from_string("""
        n0(( ids[] MONDO:0008114 ))
        n0(( categories[] biolink:Disease ))
        n1(( categories[] biolink:PhenotypicFeature ))
        n2(( categories[] biolink:ChemicalSubstance ))
        n0-- biolink:has_phenotype -->n1
        n2-- biolink:treats -->n0
        n2-- biolink:treats -->n1
        """)
    await prepare_query_graph(qg)

    plan, _ = await generate_plan(qg)

    assert len(plan) == 3
async def test_plan_reuse_pinned():
    """
    Test that we create a plan that uses a pinned node twice
    """

    qg = query_graph_from_string("""
        n0(( ids[] MONDO:0005148 ))
        n0(( categories[] biolink:Disease ))
        n1(( categories[] biolink:Disease ))
        n2(( categories[] biolink:Disease ))
        n3(( categories[] biolink:Disease ))
        n0-- biolink:related_to -->n1
        n1-- biolink:related_to -->n2
        n2-- biolink:related_to -->n0
        n0-- biolink:related_to -->n3
        """)
    await prepare_query_graph(qg)

    plan, kps = await generate_plan(qg)
Exemple #16
0
async def test_mixed_canonical(redis):
    """Test qedge with mixed canonical and non-canonical predicates."""
    QGRAPH = query_graph_from_string("""
        n0(( ids[] CHEBI:6801 ))
        n0(( categories[] biolink:ChemicalSubstance ))
        n1(( categories[] biolink:Disease ))
        n0-- biolink:treats biolink:phenotype_of -->n1
        """)

    # Create query
    q = {
        "message": {
            "query_graph": QGRAPH
        },
        "log_level": "ERROR",
    }

    # Run
    output = await lookup(q, redis)

    assert len(output["message"]["results"]) == 2
Exemple #17
0
async def test_symmetric_noncanonical(redis):
    """Test qedge with the symmetric, non-canonical predicate genetically_interacts_with."""
    QGRAPH = query_graph_from_string("""
        n0(( ids[] CHEBI:6801 ))
        n0(( categories[] biolink:ChemicalSubstance ))
        n1(( categories[] biolink:Disease ))
        n0-- biolink:genetically_interacts_with -->n1
        """)

    # Create query
    q = {
        "message": {
            "query_graph": QGRAPH
        },
        "log_level": "ERROR",
    }

    # Run
    output = await lookup(q, redis)

    assert len(output["message"]["results"]) == 1
Exemple #18
0
async def test_disambiguation(redis):
    """
    Test disambiguating batch results with qnode_id.
    """
    QGRAPH = query_graph_from_string("""
        n0(( ids[] CHEBI:6801 ))
        n0-- biolink:treats -->n1
        n1(( categories[] biolink:Disease ))
        """)

    # Create query
    q = {
        "message": {
            "query_graph": QGRAPH
        },
        "log_level": "ERROR",
    }

    # Run
    output = await lookup(q, redis)
    assert len(output["message"]["results"]) == 1

    validate_message(
        {
            "knowledge_graph":
            """
                CHEBI:XXX biolink:treats MONDO:0005148
                """,
            "results": [
                """
                node_bindings:
                    n0 CHEBI:XXX
                    n1 MONDO:0005148
                edge_bindings:
                    n0n1 CHEBI:XXX-MONDO:0005148
                """,
            ],
        },
        output["message"],
    )
async def test_plan_double_loop(caplog):
    """
    Test valid plan for a more complex query with two loops
    """

    qg = query_graph_from_string("""
        n0(( ids[] MONDO:0005148 ))
        n0(( categories[] biolink:Disease ))
        n1(( categories[] biolink:Disease ))
        n2(( categories[] biolink:Disease ))
        n3(( categories[] biolink:Disease ))
        n4(( categories[] biolink:Disease ))
        n0-- biolink:related_to -->n1
        n1-- biolink:related_to -->n2
        n2-- biolink:related_to -->n0
        n2-- biolink:related_to -->n3
        n3-- biolink:related_to -->n4
        n4-- biolink:related_to -->n2
        """)
    await prepare_query_graph(qg)

    plan, kps = await generate_plan(qg)