def test_get_edge_weight_field():
    ag = AssemblyGraph("metagenomescope/tests/input/loop.gfa")
    assert ag.get_edge_weight_field() is None

    ag = AssemblyGraph("metagenomescope/tests/input/cycletest_LastGraph")
    assert ag.get_edge_weight_field() == "multiplicity"

    ag = AssemblyGraph("metagenomescope/tests/input/marygold_fig2a.gml")
    assert ag.get_edge_weight_field() == "bsize"
def test_has_edge_weights():
    ag = AssemblyGraph("metagenomescope/tests/input/loop.gfa")
    assert not ag.has_edge_weights()

    ag = AssemblyGraph("metagenomescope/tests/input/cycletest_LastGraph")
    assert ag.has_edge_weights()

    ag = AssemblyGraph("metagenomescope/tests/input/marygold_fig2a.gml")
    assert ag.has_edge_weights()
Beispiel #3
0
def test_scale_nodes():
    ag = AssemblyGraph("metagenomescope/tests/input/sample1.gfa")
    # This graph has six nodes, with lengths 8, 10, 21, 7, 8, 4.
    #                          (for node IDs 1,  2,  3, 4, 5, 6.)
    ag.scale_nodes()
    nodename2rl = {
        "1": approx(0.4180047),
        "2": approx(0.5525722),
        "3": 1,
        "4": approx(0.3374782),
        "5": approx(0.4180047),
        "6": 0,
    }
    nodename2lp = {
        "1": config.MID_LONGSIDE_PROPORTION,
        "2": config.HIGH_LONGSIDE_PROPORTION,
        "3": config.HIGH_LONGSIDE_PROPORTION,
        "4": config.MID_LONGSIDE_PROPORTION,
        "5": config.MID_LONGSIDE_PROPORTION,
        "6": config.LOW_LONGSIDE_PROPORTION,
    }
    seen_nodenames = []
    for node in ag.digraph.nodes:
        name = ag.digraph.nodes[node]["name"]
        rl = ag.digraph.nodes[node]["relative_length"]
        lp = ag.digraph.nodes[node]["longside_proportion"]
        if name in nodename2rl:
            assert rl == nodename2rl[name]
            assert lp == nodename2lp[name]
        else:
            negated_name = negate_node_id(name)
            assert rl == nodename2rl[negated_name]
            assert lp == nodename2lp[negated_name]
        seen_nodenames.append(name)
    assert len(seen_nodenames) == 12
def test_scale_edges_no_edge_weights():
    ag = AssemblyGraph("metagenomescope/tests/input/loop.gfa")
    ag.scale_edges()
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        assert data["is_outlier"] == 0
        assert data["relative_weight"] == 0.5
def test_scale_edges_all_edge_weights_equal():
    ag = AssemblyGraph("metagenomescope/tests/input/marygold_fig2a.gml")
    ag.scale_edges()
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        assert data["is_outlier"] == 0
        assert data["relative_weight"] == 0.5
Beispiel #6
0
def test_component_sorting_simple():
    ag = AssemblyGraph("metagenomescope/tests/input/sample1.gfa")
    ag.hierarchically_identify_patterns()

    wccs = ag.get_connected_components()
    # Four ccs:
    # 1. 1+, 2+, 3+, Chain of [4-, 5+] (the chain is counted as a single node
    #                                   in the decomposed digraph)
    # 2. 1-, 2-, 3-, Chain of [5-, 4+]
    # 3. 6+
    # 4. 6-
    # Components 1/2 and 3/4 have the same number of nodes/edges/patterns, so
    # the precise ordering is arbitrary. We just check that the number of
    # top-level nodes ("real" nodes or collapsed patterns, stored in the 0th
    # position of each 3-tuple in wccs) is correct, and that the total numbers
    # of "real" nodes and edges in the component (stored in the 1st and 2nd
    # position of each 3-tuple) is also correct.
    assert len(wccs) == 4

    assert len(wccs[0][0]) == 4
    assert wccs[0][1] == 5
    assert wccs[0][2] == 4

    assert len(wccs[1][0]) == 4
    assert wccs[1][1] == 5
    assert wccs[1][2] == 4

    assert len(wccs[2][0]) == 1
    assert wccs[2][1] == 1
    assert wccs[2][2] == 0

    assert len(wccs[3][0]) == 1
    assert wccs[3][1] == 1
    assert wccs[3][2] == 0
Beispiel #7
0
def test_scale_nodes_all_lengths_equal():
    ag = AssemblyGraph("metagenomescope/tests/input/loop.gfa")
    # all of the nodes in this graph have length 3
    ag.scale_nodes()
    for node in ag.digraph.nodes:
        assert ag.digraph.nodes[node]["relative_length"] == 0.5
        assert (
            ag.digraph.nodes[node]["longside_proportion"]
            == config.MID_LONGSIDE_PROPORTION
        )
def test_simple_pattern_layout():
    ag = AssemblyGraph("metagenomescope/tests/input/bubble_test.gml")
    ag.scale_nodes()
    ag.compute_node_dimensions()
    ag.scale_edges()
    ag.hierarchically_identify_patterns()

    # This graph should contain just a single bubble. We're going to verify
    # that laying it out works as expected.
    assert len(ag.bubbles) == 1
    p = ag.bubbles[0]
    p.layout(ag)
def test_scale_edges_less_than_4_edges():
    ag = AssemblyGraph("metagenomescope/tests/input/1_node_1_edge.LastGraph")
    ag.scale_edges()
    # I mean, I guess it really has 2 edges if we assume it's unoriented
    # (which as of writing is the default for LastGraph / GFAs but not
    # required)
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        # No outlier detection should be done
        assert data["is_outlier"] == 0
        # Normal, relative scaling should have been done -- in this particular
        # case both edges have the same weight so they both get 0.5 for their
        # relative weight
        assert data["relative_weight"] == 0.5
Beispiel #10
0
def test_component_sorting_ecoli_graph():
    ag = AssemblyGraph("metagenomescope/tests/input/E_coli_LastGraph")
    ag.hierarchically_identify_patterns()
    wccs = ag.get_connected_components()

    # Assert that the first component is the big ugly one that contains node 89
    seen_89_in_largest_cc = False
    for node_id in wccs[0][0]:
        if not ag.is_pattern(node_id):
            if ag.decomposed_digraph.nodes[node_id]["name"] == "89":
                seen_89_in_largest_cc = True
                break
        else:
            # Recursively go through all patterns until we find 89
            pattern_queue = [node_id]
            while len(pattern_queue) > 0:
                pattern = pattern_queue.pop()
                patt_obj = ag.id2pattern[pattern]
                for child_node_id in patt_obj.node_ids:
                    if ag.is_pattern(child_node_id):
                        pattern_queue.append(child_node_id)
                    else:
                        if ag.digraph.nodes[child_node_id]["name"] == "89":
                            seen_89_in_largest_cc = True
                            break
                if seen_89_in_largest_cc:
                    break

    assert seen_89_in_largest_cc

    # Assert that components in the 1-indexed range [6, 11] are all cyclic
    # chains with a single node and edge (i.e. they are sorted before the
    # other components with just a single node each). This is because these are
    # more "important" than these other components, at least to us.
    for wcc_tuple in wccs[5:11]:
        wcc = wcc_tuple[0]
        # Check that the component contains just 1 node at the top level (a
        # cyclic chain pattern)...
        assert len(wcc) == 1
        # ... and just 1 "real" node in full (the one node within the cyclic
        # chain)
        assert wcc_tuple[1] == 1
        # ... and just 1 edge
        assert wcc_tuple[2] == 1
        cyc_id = list(wcc)[0]
        assert ag.is_pattern(cyc_id)
        # This cyclic chain should contain one node and one edge (and no other
        # patterns within itself).
        assert ag.id2pattern[cyc_id].get_counts(ag) == [1, 1, 0]
def test_scale_edges_dup_edges():
    """Test that edges marked with is_dup = True cause an error.

    These particular edges are only used (as of writing, at least) to connect a
    node with its duplicate. Scaling them doesn't make sense, since they don't
    have any weight or anything (since they're not "real" edges). They
    shouldn't even exist in the graph, yet! So if they exist, we raise an
    error.
    """
    ag = AssemblyGraph(
        "metagenomescope/tests/input/edge_scaling_test_both_outliers.LastGraph"
    )
    ag.digraph.add_edge(0, 0, is_dup=True)
    with raises(ValueError) as ei:
        ag.scale_edges()
    assert str(ei.value) == "Duplicate edges shouldn't exist in the graph yet."
def test_scale_edges_four_edges():
    # Really, there are two edges in this particular graph, but due to
    # reverse complementing we consider there to be four edges.
    ag = AssemblyGraph("metagenomescope/tests/input/cycletest_LastGraph")
    ag.scale_edges()
    # No edges should have been flagged as outliers.
    # The two edges with weight 5 (the minimum weight in this dataset)
    # should've been assigned a relative weight of 0.
    # The two edges with weight 9 (the max weight) should've been assigned a
    # relative weight of 1.
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        assert data["is_outlier"] == 0
        if data["multiplicity"] == 5:
            assert data["relative_weight"] == 0
        else:
            assert data["relative_weight"] == 1
Beispiel #13
0
def test_bubble_cyclic_chain_identification():
    r"""The input graph looks like

    +-----------------+
    |                 |
    |   2   5   8    11
     \ / \ / \ / \  /
      1   4   7   10
     / \ / \ / \ /  \
    |   3   6   9    12
    |                 |
    +-----------------+

    ... that is, it's just a bunch of cyclic bubbles, with the "last" bubble
    in this visual representation (10 -> [11|12] -> 1) being the one that
    has the back-edges drawn.

    TLDR, we should end up with something like

    +=======================================+
    |                                       |
    | +-------+-------+--------+---------+  |
    | |   2   |   5   |   8    |    11   |  |
    | |  / \  |  / \  |  / \   |   /  \  |  |
    +== 1   4 = 4   7 = 7   10 = 10    1 ===+
      |  \ /  |  \ /  |  \ /   |   \  /  |
      |   3   |   6   |   9    |    12   |
      +-------+-------+--------+---------+

    ... where the nodes "shared" by adjacent bubbles (4, 7, 10, 1) are all
    duplicated.
    """
    ag = AssemblyGraph(
        "metagenomescope/tests/input/bubble_cyclic_chain_test.gml"
    )
    ag.hierarchically_identify_patterns()
    # write_dot(ag.decomposed_digraph, "dec.gv")
    # write_dot(ag.digraph, "digraph.gv")
    # for bub in ag.bubbles:
    #     print(bub)
    assert len(ag.decomposed_digraph.nodes) == 1
    assert len(ag.decomposed_digraph.edges) == 0
    assert len(ag.chains) == 0
    assert len(ag.cyclic_chains) == 1
    assert len(ag.frayed_ropes) == 0
    assert len(ag.bubbles) == 4
Beispiel #14
0
def test_compute_node_dimensions_all_lengths_equal():
    ag = AssemblyGraph("metagenomescope/tests/input/loop.gfa")
    ag.scale_nodes()
    ag.compute_node_dimensions()
    default_area = config.MIN_NODE_AREA + (
        0.5 * (config.MAX_NODE_AREA - config.MIN_NODE_AREA)
    )
    default_height = default_area ** config.MID_LONGSIDE_PROPORTION
    default_width = default_area / default_height

    # This double-checks that the defaults we expect here are computed
    # properly. If the config values are updated that may break this, so feel
    # free to comment this out if that happens to you.
    assert default_area == 5.5
    assert default_height == approx(3.115839)
    assert default_width == approx(1.765174)

    for node in ag.digraph.nodes:
        assert ag.digraph.nodes[node]["height"] == default_height
        assert ag.digraph.nodes[node]["width"] == default_width
def test_scale_edges_low_outlier():
    ag = AssemblyGraph(
        "metagenomescope/tests/input/edge_scaling_test_low.LastGraph")
    ag.scale_edges()
    # Low outlier weights: 1
    # Non-outlier weights: 1000, 1001, 1005
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        if data["multiplicity"] == 1:
            assert data["is_outlier"] == -1
            assert data["relative_weight"] == 0
        elif data["multiplicity"] == 1000:
            assert data["is_outlier"] == 0
            assert data["relative_weight"] == 0
        elif data["multiplicity"] == 1001:
            assert data["is_outlier"] == 0
            assert data["relative_weight"] == 1 / 5
        else:
            assert data["is_outlier"] == 0
            assert data["relative_weight"] == 1
def test_scale_edges_high_outlier():
    ag = AssemblyGraph(
        "metagenomescope/tests/input/edge_scaling_test_high.LastGraph")
    ag.scale_edges()
    for edge in ag.digraph.edges:
        data = ag.digraph.edges[edge]
        # We omit the outlier edge weight (1000) from the non-outlier-edge
        # relative scaling. So the "effective" min and max edge weights are 5
        # and 99, ignoring the 1000s.
        if data["multiplicity"] == 5:
            assert data["is_outlier"] == 0
            assert data["relative_weight"] == 0
        elif data["multiplicity"] == 9:
            assert data["is_outlier"] == 0
            # (9 - 5) / (99 - 5) = 4 / 94 = 0.04255319...
            assert data["relative_weight"] == approx(0.0425532)
        elif data["multiplicity"] == 99:
            assert data["is_outlier"] == 0
            assert data["relative_weight"] == 1
        else:
            # The edges with weight 1000 are high outliers!
            assert data["is_outlier"] == 1
            assert data["relative_weight"] == 1
Beispiel #17
0
def test_simple_hierarch_decomp():
    r"""I don't know why I called this test "simple", but whatever. The input
        graph looks like

        3
       / \
      1   4 -> 5 ---> 6 -------> 7 -> 14 -> 15 -> 16
     / \ /           /          /
    0   2          11          /
     \            /           /
      8 -> 9 -> 10 -> 12 -> 13

    ...and the following simplifications should happen:

        3
       / \
      1   [Chain] --> 6 ------> [Chain]
     / \ /           /         /
    0   2          11         /
     \            /          /
      \--> [Chain] -> [Chain]

    Eventually, the 1-2-3-chain thing should turn into a bubble.
    But for now this isn't happening.
    """
    ag = AssemblyGraph(
        "metagenomescope/tests/input/hierarchical_test_graph.gml"
    )
    ag.hierarchically_identify_patterns()
    # write_dot(ag.decomposed_digraph, "dec.gv")
    # This is with the "maximum" decomposition settings for this test graph.
    assert len(ag.decomposed_digraph.nodes) == 10
    assert len(ag.decomposed_digraph.edges) == 12
    assert len(ag.chains) == 4
    assert len(ag.cyclic_chains) == 0
    assert len(ag.frayed_ropes) == 0
    assert len(ag.bubbles) == 0
Beispiel #18
0
def test_compute_node_dimensions():
    ag = AssemblyGraph("metagenomescope/tests/input/sample1.gfa")
    ag.scale_nodes()
    ag.compute_node_dimensions()

    def get_dims(rl, lp):
        area = config.MIN_NODE_AREA + (
            rl * (config.MAX_NODE_AREA - config.MIN_NODE_AREA)
        )
        hgt = area ** lp
        wid = area / hgt
        return (wid, hgt)

    # Relative length and longside proportions reused from test_scale_nodes()
    nodename2dims = {
        "1": get_dims(0.4180047, config.MID_LONGSIDE_PROPORTION),
        "2": get_dims(0.5525722, config.HIGH_LONGSIDE_PROPORTION),
        "3": get_dims(1, config.HIGH_LONGSIDE_PROPORTION),
        "4": get_dims(0.3374782, config.MID_LONGSIDE_PROPORTION),
        "5": get_dims(0.4180047, config.MID_LONGSIDE_PROPORTION),
        "6": get_dims(0, config.LOW_LONGSIDE_PROPORTION),
    }

    seen_nodenames = []
    for node in ag.digraph.nodes:
        name = ag.digraph.nodes[node]["name"]
        w = ag.digraph.nodes[node]["width"]
        h = ag.digraph.nodes[node]["height"]
        exp_data = ()
        if name in nodename2dims:
            exp_data = nodename2dims[name]
        else:
            exp_data = nodename2dims[negate_node_id(name)]
        assert w == approx(exp_data[0])
        assert h == approx(exp_data[1])
        seen_nodenames.append(name)
    assert len(seen_nodenames) == 12
Beispiel #19
0
def test_bubble_chain_identification():
    r"""The input graph looks like


           2   5
          / \ / \
    0 -> 1   4   7 -> 8
          \ / \ /
           3   6

    NOTE: currently a suboptimal decomposition is done here, since bubbles
    can't work with chains as start/end nodes. this should be fixed so this
    looks like follows, but for now it's a weird chain->frayedrope->chain chain
    thing.

    First, we should collapse one of the bubbles (order shouldn't impact
    result). Let's say the leftmost bubble is collapsed first.

        +-------+
        |   2   | 5
        |  / \  |/ \
    0 ->| 1   4 |   7 -> 8
        |  \ /  |\ /
        |   3   | 6
        +-------+

    Then a valid bubble would exist where the first bubble is the start node:

        +------+
        |   5  |
        |  / \ |
    0 ->|B1   7|-> 8
        |  \ / |
        |   6  |
        +------+

    However, this is silly, because nothing about the input graph implied any
    sort of hierarchy between these two bubbles. The code should detect that a
    bubble is being used as the start node of this other bubble (or as the end
    node, if the rightmost bubble was detected first), and then handle this
    by duplicating just the shared node between the bubbles:

        +-------+-------+
        |   2   |   5   |
        |  / \  |  / \  |
    0 ->| 1   4 = 4   7 | -> 8
        |  \ /  |  \ /  |
        |   3   |   6   |
        +-------+-------+


    0 -> (Bubble 1) = (Bubble 2) -> 8


        +-------+-------+
        |   2   |   5   |
        |  / \  |  / \  |
    0 ->| 1   4 = 4   7 | -> 8
        |  \ /  |  \ /  |
        |   3   |   6   |
        +-------+-------+

    ... which makes sense.

    Also at some point we should merge everything into a chain going from 0 to
    8. Ideally, chains will support _merging_: so when a chain is created that
    includes another chain at the top-level, that chain will be subsumed into
    the overall chain. So even if 0 -> 1 or 7 -> 8 is created first, the
    resulting graph should be the same.
    """
    ag = AssemblyGraph("metagenomescope/tests/input/bubble_chain_test.gml")
    ag.hierarchically_identify_patterns()
    assert len(ag.decomposed_digraph.nodes) == 1
    assert len(ag.decomposed_digraph.edges) == 0
    assert len(ag.chains) == 3
    assert len(ag.cyclic_chains) == 0
    assert len(ag.frayed_ropes) == 1
    assert len(ag.bubbles) == 0
def test_scale_edges_low_and_high_outliers():
    ag = AssemblyGraph(
        "metagenomescope/tests/input/edge_scaling_test_both_outliers.LastGraph"
    )
    ag.scale_edges()
    _verify_both_graph(ag.digraph, 0)
Beispiel #21
0
def test_to_dict_simple():
    ag = AssemblyGraph("metagenomescope/tests/input/sample1.gfa")
    ag.process()

    data = ag.to_dict()
    assert type(data) == dict
def test_check_attrs_node():
    with pytest.raises(ValueError) as einfo:
        AssemblyGraph("metagenomescope/tests/input/check_attrs_test_node.gml")
    assert "has reserved attribute(s) {'height'}." in str(einfo.value)
def test_check_attrs_edge():
    with pytest.raises(ValueError) as einfo:
        AssemblyGraph("metagenomescope/tests/input/check_attrs_test_edge.gml")
    assert "has reserved attribute(s) {'ctrl_pt_coords'}." in str(einfo.value)
Beispiel #24
0
def test_compute_node_dimensions_fails_if_scale_nodes_not_called_first():
    # (Since relative_length and longside_proportion data won't be available.)
    ag = AssemblyGraph("metagenomescope/tests/input/loop.gfa")
    with raises(KeyError):
        ag.compute_node_dimensions()