Beispiel #1
0
def test_3node_bubble_func_doesnt_fail_on_cyclic():
    """As with other cyclic bubble test below, this is up for debate."""
    g = get_3_node_bubble_graph()
    g.add_edge(2, 0)
    for s in [1, 2]:
        assert not AssemblyGraph.is_valid_3node_bubble(g, s)[0]
    assert AssemblyGraph.is_valid_3node_bubble(g, 0)[0]
def test_funky_selfloop():
    """Tests the simple cyclic chain, but now with a self-loop from 0 -> 0.

    The expected behavior in this case (at least at the top level) is that
    we'll classify 0 -> 0 as a cyclic chain, but leave 1, 2, and 3 out of
    this.

    The reason for this is that 0 has an extraneous incoming (and outgoing,
    technically) edge, so it doesn't really fit in in a cyclic chain
    containing other nodes. It all comes down to the simplistic way in which
    we define cyclic chains in the first place; this isn't the "correct"
    behavior so much as it is the behavior you get when you use the rules we
    use for defining patterns.

    +--+-------------+
    |  ^             |
    V  |             |
    0 -+-> 1 -> 2 -> 3
    """

    g = get_simple_cyclic_chain_graph()
    g.add_edge(0, 0)

    # 0 -> 0 is a cyclic chain
    results = AssemblyGraph.is_valid_cyclic_chain(g, 0)
    assert results[0]
    assert results[1] == [0]

    # However, with other starting nodes, you don't find anything -- 0 throws
    # off the detection
    for s in [1, 2, 3]:
        assert not AssemblyGraph.is_valid_cyclic_chain(g, s)[0]
Beispiel #3
0
def test_component_sorting_simple():
    ag = AssemblyGraph("metagenomescope/tests/input/sample1.gfa")
    ag.hierarchically_identify_patterns()

    wccs = ag.get_connected_components()
    # Four ccs:
    # 1. 1+, 2+, 3+, Chain of [4-, 5+] (the chain is counted as a single node
    #                                   in the decomposed digraph)
    # 2. 1-, 2-, 3-, Chain of [5-, 4+]
    # 3. 6+
    # 4. 6-
    # Components 1/2 and 3/4 have the same number of nodes/edges/patterns, so
    # the precise ordering is arbitrary. We just check that the number of
    # top-level nodes ("real" nodes or collapsed patterns, stored in the 0th
    # position of each 3-tuple in wccs) is correct, and that the total numbers
    # of "real" nodes and edges in the component (stored in the 1st and 2nd
    # position of each 3-tuple) is also correct.
    assert len(wccs) == 4

    assert len(wccs[0][0]) == 4
    assert wccs[0][1] == 5
    assert wccs[0][2] == 4

    assert len(wccs[1][0]) == 4
    assert wccs[1][1] == 5
    assert wccs[1][2] == 4

    assert len(wccs[2][0]) == 1
    assert wccs[2][1] == 1
    assert wccs[2][2] == 0

    assert len(wccs[3][0]) == 1
    assert wccs[3][1] == 1
    assert wccs[3][2] == 0
def test_scale_edges_all_edge_weights_equal():
    ag = AssemblyGraph("metagenomescope/tests/input/marygold_fig2a.gml")
    ag.scale_edges()
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        assert data["is_outlier"] == 0
        assert data["relative_weight"] == 0.5
Beispiel #5
0
def test_extra_nodes_on_ending():
    r"""Tests that the following graph (starting at 0) isn't identified as a
    bubble, due to the 4 -> 3 edge.

            4
      /-1-\ |
     /     \V
    0       3
     \     /
      \-2-/

    However, flipping this edge to be 3 -> 4 makes this back into a valid
    bubble (since ending nodes can have arbitrary outgoing edges so long as
    they aren't cyclic).
    """
    g = get_easy_bubble_graph()
    g.add_edge(4, 3)
    assert not AssemblyGraph.is_valid_bubble(g, 0)[0]

    # Test that we can get this back to a valid bubble by reversing 4 -> 3 to
    # be 3 -> 4
    g.remove_edge(4, 3)
    g.add_edge(3, 4)
    results = AssemblyGraph.is_valid_bubble(g, 0)
    assert results[0]
    assert set(results[1]) == set([0, 1, 2, 3])
Beispiel #6
0
def test_intervening_paths_easier():
    """Removes an edge in the "intervening graph" and checks that
    the chain detection's behavior adapts accordingly.

    After this modification, the graph should look like:

       4
      ^
     /
    0 -> 1 -> 2 -> 3

    ... so we should find one chain in this graph, 1 -> 2 -> 3.
    """
    g = get_intervening_graph()
    # Try lopping off the 5 -> 2 edge
    g.remove_edge(5, 2)

    # Now, 1 -> 2 -> 3 should be a valid chain
    # Due to backwards extension, we should be able to start in either 1 or 2
    # and detect the same chain (note the (1, 3) range -- the endpoint, 3, is
    # excluded and therefore not checked)
    for i in range(1, 3):
        results = AssemblyGraph.is_valid_chain(g, i)
        assert results[0] and results[1] == [1, 2, 3]

    # Of course, the other nodes in the graph won't result in chains being
    # detected (3 and 4 have no outgoing nodes, 5 is an "island", 0 has an
    # intervening outgoing edge to 4)
    assert not AssemblyGraph.is_valid_chain(g, 0)[0]
    assert not AssemblyGraph.is_valid_chain(g, 3)[0]
    assert not AssemblyGraph.is_valid_chain(g, 4)[0]
    assert not AssemblyGraph.is_valid_chain(g, 5)[0]
def test_scale_edges_no_edge_weights():
    ag = AssemblyGraph("metagenomescope/tests/input/loop.gfa")
    ag.scale_edges()
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        assert data["is_outlier"] == 0
        assert data["relative_weight"] == 0.5
def test_selfloop():
    """Produces a graph that looks like:

       +---+
       |   |
       V   |
       0 --+

       is a valid cyclic chain!

       Also adds on incoming and outgoing nodes and checks that those don't
       interfere with the detection of the 0 -> 0 cyclic chain:

            +---+
            |   |
            V   |
       1 -> 0 --+
             \
              2
    """
    g = nx.DiGraph()
    g.add_edge(0, 0)

    results = AssemblyGraph.is_valid_cyclic_chain(g, 0)
    assert results[0]
    assert results[1] == [0]

    g.add_edge(1, 0)
    g.add_edge(0, 2)

    results = AssemblyGraph.is_valid_cyclic_chain(g, 0)
    assert results[0]
    assert results[1] == [0]
Beispiel #9
0
def test_scale_nodes():
    ag = AssemblyGraph("metagenomescope/tests/input/sample1.gfa")
    # This graph has six nodes, with lengths 8, 10, 21, 7, 8, 4.
    #                          (for node IDs 1,  2,  3, 4, 5, 6.)
    ag.scale_nodes()
    nodename2rl = {
        "1": approx(0.4180047),
        "2": approx(0.5525722),
        "3": 1,
        "4": approx(0.3374782),
        "5": approx(0.4180047),
        "6": 0,
    }
    nodename2lp = {
        "1": config.MID_LONGSIDE_PROPORTION,
        "2": config.HIGH_LONGSIDE_PROPORTION,
        "3": config.HIGH_LONGSIDE_PROPORTION,
        "4": config.MID_LONGSIDE_PROPORTION,
        "5": config.MID_LONGSIDE_PROPORTION,
        "6": config.LOW_LONGSIDE_PROPORTION,
    }
    seen_nodenames = []
    for node in ag.digraph.nodes:
        name = ag.digraph.nodes[node]["name"]
        rl = ag.digraph.nodes[node]["relative_length"]
        lp = ag.digraph.nodes[node]["longside_proportion"]
        if name in nodename2rl:
            assert rl == nodename2rl[name]
            assert lp == nodename2lp[name]
        else:
            negated_name = negate_node_id(name)
            assert rl == nodename2rl[negated_name]
            assert lp == nodename2lp[negated_name]
        seen_nodenames.append(name)
    assert len(seen_nodenames) == 12
def test_isolated_start():
    """Tests that an isolated node and an isolated chain are both not detected
    as cylic chains.
    """
    g = nx.DiGraph()
    g.add_node(0)
    assert not AssemblyGraph.is_valid_cyclic_chain(g, 0)[0]
    g.add_edge(0, 1)
    assert not AssemblyGraph.is_valid_cyclic_chain(g, 0)[0]
    assert not AssemblyGraph.is_valid_cyclic_chain(g, 1)[0]
Beispiel #11
0
def test_scale_nodes_all_lengths_equal():
    ag = AssemblyGraph("metagenomescope/tests/input/loop.gfa")
    # all of the nodes in this graph have length 3
    ag.scale_nodes()
    for node in ag.digraph.nodes:
        assert ag.digraph.nodes[node]["relative_length"] == 0.5
        assert (
            ag.digraph.nodes[node]["longside_proportion"]
            == config.MID_LONGSIDE_PROPORTION
        )
def test_simple_pattern_layout():
    ag = AssemblyGraph("metagenomescope/tests/input/bubble_test.gml")
    ag.scale_nodes()
    ag.compute_node_dimensions()
    ag.scale_edges()
    ag.hierarchically_identify_patterns()

    # This graph should contain just a single bubble. We're going to verify
    # that laying it out works as expected.
    assert len(ag.bubbles) == 1
    p = ag.bubbles[0]
    p.layout(ag)
def test_has_edge_weights():
    ag = AssemblyGraph("metagenomescope/tests/input/loop.gfa")
    assert not ag.has_edge_weights()

    ag = AssemblyGraph("metagenomescope/tests/input/cycletest_LastGraph")
    assert ag.has_edge_weights()

    ag = AssemblyGraph("metagenomescope/tests/input/marygold_fig2a.gml")
    assert ag.has_edge_weights()
def test_get_edge_weight_field():
    ag = AssemblyGraph("metagenomescope/tests/input/loop.gfa")
    assert ag.get_edge_weight_field() is None

    ag = AssemblyGraph("metagenomescope/tests/input/cycletest_LastGraph")
    assert ag.get_edge_weight_field() == "multiplicity"

    ag = AssemblyGraph("metagenomescope/tests/input/marygold_fig2a.gml")
    assert ag.get_edge_weight_field() == "bsize"
def test_scale_edges_less_than_4_edges():
    ag = AssemblyGraph("metagenomescope/tests/input/1_node_1_edge.LastGraph")
    ag.scale_edges()
    # I mean, I guess it really has 2 edges if we assume it's unoriented
    # (which as of writing is the default for LastGraph / GFAs but not
    # required)
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        # No outlier detection should be done
        assert data["is_outlier"] == 0
        # Normal, relative scaling should have been done -- in this particular
        # case both edges have the same weight so they both get 0.5 for their
        # relative weight
        assert data["relative_weight"] == 0.5
Beispiel #16
0
def test_extraneous_outgoing_node_from_start_nodes():
    r"""Tests that a graph that looks like:

      5
     /
    0 -\ /-> 3
        2
    1 -/ \-> 4

     ... is not a valid frayed rope, regardless of if you "start" at 0 or 1.
    """
    g = get_simple_fr_graph()
    g.add_edge(0, 5)
    assert not AssemblyGraph.is_valid_frayed_rope(g, 0)[0]
    assert not AssemblyGraph.is_valid_frayed_rope(g, 1)[0]
Beispiel #17
0
def test_cyclic_chain_found_to_be_cyclic_during_backwards_extension():
    """Function name is probably self-explanatory. This looks at this graph:

         ________
        /        \
       V          \
       1 --> 2 --> 3 --> 4

       If we look for a chain starting at "2", then we'll see that 2 -> 3 seems
       to be a valid chain (none of 3's outgoing edges hit 2 or 3). However,
       when we try to do backwards extension (finding a more "optimal" starting
       node for the chain than 2), we should hit 1 and then 3, and this should
       cause the code to realize that the pattern here is really a cyclic
       chain.

       Fun fact: I'm pretty sure this sort of case was treated as a "chain" by
       MetagenomeScope's pattern detection code before. Adding this test
       actually made me realize this bug existed!
    """
    g = nx.DiGraph()
    g.add_edge(1, 2)
    g.add_edge(2, 3)
    g.add_edge(3, 1)
    g.add_edge(3, 4)
    # Regardless of picked starting node, this shouldn't work
    for s in [1, 2, 3, 4]:
        assert not AssemblyGraph.is_valid_chain(g, s)[0]
def test_scale_edges_dup_edges():
    """Test that edges marked with is_dup = True cause an error.

    These particular edges are only used (as of writing, at least) to connect a
    node with its duplicate. Scaling them doesn't make sense, since they don't
    have any weight or anything (since they're not "real" edges). They
    shouldn't even exist in the graph, yet! So if they exist, we raise an
    error.
    """
    ag = AssemblyGraph(
        "metagenomescope/tests/input/edge_scaling_test_both_outliers.LastGraph"
    )
    ag.digraph.add_edge(0, 0, is_dup=True)
    with raises(ValueError) as ei:
        ag.scale_edges()
    assert str(ei.value) == "Duplicate edges shouldn't exist in the graph yet."
Beispiel #19
0
def test_cyclic_frayed_rope():
    r"""Tests that a graph that looks like:

    +--------+
    |        |
    V        |
    0 -\ /-> 3
        2
    1 -/ \-> 4

     ... is not a valid frayed rope.
    """
    g = get_simple_fr_graph()
    g.add_edge(3, 0)
    assert not AssemblyGraph.is_valid_frayed_rope(g, 0)[0]
    assert not AssemblyGraph.is_valid_frayed_rope(g, 1)[0]
Beispiel #20
0
def test_easy_3_node_bubble_fails_with_normal_simple_bubble_detection():
    """Tests that is_valid_bubble() doesn't detect 3-node bubbles. Don't
    worry, though, because is_valid_3node_bubble() does!
    """
    g = get_3_node_bubble_graph()
    for s in [1, 2]:
        assert not AssemblyGraph.is_valid_bubble(g, s)[0]
def test_simple_cyclic_chain_detection():
    """Tests that the simple cyclic chain produced above is detected."""
    g = get_simple_cyclic_chain_graph()
    for s in [0, 1, 2, 3]:
        results = AssemblyGraph.is_valid_cyclic_chain(g, s)
        assert results[0]
        assert set(results[1]) == set([0, 1, 2, 3])
def test_extraneous_outgoing_edge_from_start():
    """Tests that the 0 -> 4 edge here prevents a cyclic chain from being
    detected "starting" at anywhere except for 1.

    I'll be honest, this is kind of weird behavior. It isn't a bug per se --
    we'd still detect this cyclic chain structure assuming the other nodes
    aren't collapsed into something else in the meantime, since we check all
    nodes at the same level as being the start of each type of structure --
    but it is weird nonetheless.

    In the future, I may revise the cyclic chain detection algorithm... or
    remove it entirely. For now, at least, this test should document the
    sometimes-funky behavior of this code.

    +--------------+
    |  4           |
    V /            |
    0 -> 1 -> 2 -> 3
    """
    g = get_simple_cyclic_chain_graph()
    g.add_edge(0, 4)
    for s in [0, 1, 2, 3, 4]:
        print(s)
        results = AssemblyGraph.is_valid_cyclic_chain(g, s)
        if s == 1:
            assert results[0]
        else:
            assert not results[0]
Beispiel #23
0
def test_bubble_cyclic_chain_identification():
    r"""The input graph looks like

    +-----------------+
    |                 |
    |   2   5   8    11
     \ / \ / \ / \  /
      1   4   7   10
     / \ / \ / \ /  \
    |   3   6   9    12
    |                 |
    +-----------------+

    ... that is, it's just a bunch of cyclic bubbles, with the "last" bubble
    in this visual representation (10 -> [11|12] -> 1) being the one that
    has the back-edges drawn.

    TLDR, we should end up with something like

    +=======================================+
    |                                       |
    | +-------+-------+--------+---------+  |
    | |   2   |   5   |   8    |    11   |  |
    | |  / \  |  / \  |  / \   |   /  \  |  |
    +== 1   4 = 4   7 = 7   10 = 10    1 ===+
      |  \ /  |  \ /  |  \ /   |   \  /  |
      |   3   |   6   |   9    |    12   |
      +-------+-------+--------+---------+

    ... where the nodes "shared" by adjacent bubbles (4, 7, 10, 1) are all
    duplicated.
    """
    ag = AssemblyGraph(
        "metagenomescope/tests/input/bubble_cyclic_chain_test.gml"
    )
    ag.hierarchically_identify_patterns()
    # write_dot(ag.decomposed_digraph, "dec.gv")
    # write_dot(ag.digraph, "digraph.gv")
    # for bub in ag.bubbles:
    #     print(bub)
    assert len(ag.decomposed_digraph.nodes) == 1
    assert len(ag.decomposed_digraph.edges) == 0
    assert len(ag.chains) == 0
    assert len(ag.cyclic_chains) == 1
    assert len(ag.frayed_ropes) == 0
    assert len(ag.bubbles) == 4
Beispiel #24
0
def test_diverges_to_start():
    r"""Tests that a graph that looks like:

    +--------+
    |        |
    V     /--+
    0 -\ /
        2
    1 -/ \-> 4

     ... is not a valid frayed rope.
    """
    g = get_simple_fr_graph()
    g.remove_edge(2, 3)
    g.add_edge(2, 0)
    assert not AssemblyGraph.is_valid_frayed_rope(g, 0)[0]
    assert not AssemblyGraph.is_valid_frayed_rope(g, 1)[0]
def test_scale_edges_four_edges():
    # Really, there are two edges in this particular graph, but due to
    # reverse complementing we consider there to be four edges.
    ag = AssemblyGraph("metagenomescope/tests/input/cycletest_LastGraph")
    ag.scale_edges()
    # No edges should have been flagged as outliers.
    # The two edges with weight 5 (the minimum weight in this dataset)
    # should've been assigned a relative weight of 0.
    # The two edges with weight 9 (the max weight) should've been assigned a
    # relative weight of 1.
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        assert data["is_outlier"] == 0
        if data["multiplicity"] == 5:
            assert data["relative_weight"] == 0
        else:
            assert data["relative_weight"] == 1
Beispiel #26
0
def test_intervening_paths_harder():
    """Test that checks what chains are found in the "intervening graph."

    Only 2 -> 3 should be detected -- everything else isn't possible due to
    the "intervening" nodes/edges (also, sidenote, writing out "intervening"
    like 5 times here has made it not look like a word at all).
    """
    g = get_intervening_graph()
    # Only one chain can be detected in this graph: 2 -> 3
    # ... So starting at everything except for 2 should result in nothing found
    for i in [0, 1, 3, 4, 5]:
        results = AssemblyGraph.is_valid_chain(g, i)
        assert not results[0] and results[1] is None

    # Check that 2 -> 3 is indeed recognized as a chain
    results = AssemblyGraph.is_valid_chain(g, 2)
    assert results[0] and (results[1] == [2, 3])
Beispiel #27
0
def test_easy_bubble_fails_when_starting_point_bad():
    """Tests that the same basic bubble as above isn't identified if you don't
    start at "0".
    """
    g = get_easy_bubble_graph()
    for s in [1, 2, 3]:
        results = AssemblyGraph.is_valid_bubble(g, s)
        assert not results[0]
        assert results[1] is None
Beispiel #28
0
def test_easiest_possible_case():
    """Tests case where the chain is 0 -> 1 -> 2, and the "starting node" used
    is 0.
    """
    g = get_test_path_graph(3)
    results = AssemblyGraph.is_valid_chain(g, 0)

    assert len(results) == 2
    assert results[0]
    assert results[1] == [0, 1, 2]
def test_scale_edges_low_outlier():
    ag = AssemblyGraph(
        "metagenomescope/tests/input/edge_scaling_test_low.LastGraph")
    ag.scale_edges()
    # Low outlier weights: 1
    # Non-outlier weights: 1000, 1001, 1005
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        if data["multiplicity"] == 1:
            assert data["is_outlier"] == -1
            assert data["relative_weight"] == 0
        elif data["multiplicity"] == 1000:
            assert data["is_outlier"] == 0
            assert data["relative_weight"] == 0
        elif data["multiplicity"] == 1001:
            assert data["is_outlier"] == 0
            assert data["relative_weight"] == 1 / 5
        else:
            assert data["is_outlier"] == 0
            assert data["relative_weight"] == 1
Beispiel #30
0
def test_easy_no_chain():
    """Tests 0 -> 1 -> 2 case starting at 2. No chain will be found here to
    start off with, so the code won't bother doing the backwards extension
    stuff.
    """
    g = get_test_path_graph(3)
    results = AssemblyGraph.is_valid_chain(g, 2)

    assert len(results) == 2
    assert not results[0]
    assert results[1] is None