Beispiel #1
0
    def test_circular(self, circular_linear_structure):
        graph, contig = circular_linear_structure
        asm = khmer.LinearAssembler(graph)

        path = asm.assemble(contig[:K], direction='R')
        print(path, ',', contig)
        assert utils._equals_rc(path, contig[:len(path)])
Beispiel #2
0
    def test_branch_to_end(self, left_tip_structure):
        # assemble from branch point until end
        graph, contig, L, HDN, R, tip = left_tip_structure
        asm = khmer.LinearAssembler(graph)
        path = asm.assemble(HDN)

        assert len(path) == len(contig) - HDN.pos
        assert utils._equals_rc(path, contig[HDN.pos:])
Beispiel #3
0
    def test_end_to_beginning(self, right_tip_structure):
        # should have exact same behavior as right_of_branch_outwards
        graph, contig, L, HDN, R, tip = right_tip_structure
        asm = khmer.LinearAssembler(graph)
        path = asm.assemble(contig[-K:])

        assert len(path) == len(contig)
        assert utils._equals_rc(path, contig)
Beispiel #4
0
    def test_left_of_branch_to_beginning_revcomp(self, right_tip_structure):
        # start from revcomp of HDN (left of branch)
        graph, contig, L, HDN, R, tip = right_tip_structure
        asm = khmer.LinearAssembler(graph)
        path = asm.assemble(revcomp(L))

        assert len(path) == HDN.pos + K
        assert utils._equals_rc(path, contig[:len(path)])
Beispiel #5
0
    def test_beginning_to_branch(self, right_tip_structure):
        # assemble from beginning of contig, up until branch point
        graph, contig, L, HDN, R, tip = right_tip_structure
        asm = khmer.LinearAssembler(graph)
        path = asm.assemble(contig[0:K])

        assert len(path) == HDN.pos + K
        assert utils._equals_rc(path, contig[:len(path)])
Beispiel #6
0
    def test_all_start_positions(self, linear_structure):
        # assemble entire contig, starting from wherever
        graph, contig = linear_structure
        asm = khmer.LinearAssembler(graph)

        for start in range(0, len(contig), 150):
            path = asm.assemble(contig[start:start + K])
            assert utils._equals_rc(path, contig), start
Beispiel #7
0
    def test_end_to_branch(self, left_tip_structure):
        # assemble from end until branch point
        # should include HDN
        graph, contig, L, HDN, R, tip = left_tip_structure
        asm = khmer.LinearAssembler(graph)
        path = asm.assemble(contig[-K:])

        assert len(path) == len(contig) - HDN.pos
        assert utils._equals_rc(path, contig[HDN.pos:])
Beispiel #8
0
    def test_all_right_to_end(self, linear_structure):
        # assemble directed right
        graph, contig = linear_structure
        asm = khmer.LinearAssembler(graph)

        for start in range(0, len(contig), 150):
            path = asm.assemble(contig[start:start + K], direction='R')
            print(path, ', ', contig[:start])
            assert utils._equals_rc(path, contig[start:]), start
Beispiel #9
0
    def test_from_branch_to_ends_with_stopbf_revcomp(self, left_tip_structure):
        # block the tip with the stop_bf. should return a full length contig.
        graph, contig, L, HDN, R, tip = left_tip_structure
        asm = khmer.LinearAssembler(graph)
        stop_bf = khmer.Nodegraph(K, 1e5, 4)
        stop_bf.count(tip)

        path = asm.assemble(revcomp(HDN), stop_bf)

        assert len(path) == len(contig)
        assert utils._equals_rc(path, contig)
Beispiel #10
0
    def test_right_of_branch_outwards_to_ends(self, right_tip_structure):
        # assemble from right of branch point (at R)
        # Should get the *entire* original contig, as the assembler
        # will move left relative to the branch, and not consider it
        # as a high degree node
        graph, contig, L, HDN, R, tip = right_tip_structure
        asm = khmer.LinearAssembler(graph)
        path = asm.assemble(R)

        assert len(path) == len(contig)
        assert utils._equals_rc(path, contig)
Beispiel #11
0
    def test_single_node_flanked_by_hdns(self, left_tip_structure):
        # assemble single node flanked by high-degree nodes
        # we'll copy the main nodegraph before mutating it
        graph, contig, L, HDN, R, tip = left_tip_structure
        asm = khmer.LinearAssembler(graph)

        graph.consume(mutate_position(contig, HDN.pos + K))

        path = asm.assemble(HDN)

        assert len(path) == K
        assert utils._equals_rc(path, HDN)
Beispiel #12
0
    def test_end_thru_tip_with_stopbf(self, left_tip_structure):
        # assemble up to branch point, and include introduced branch b/c
        # of stop bf
        graph, contig, L, HDN, R, tip = left_tip_structure
        asm = khmer.LinearAssembler(graph)

        stop_bf = khmer.Nodegraph(K, 1e5, 4)
        stop_bf.count(L)  # ...and block original path
        path = asm.assemble(contig[-K:], stop_bf)
        assert len(path) == len(contig) - HDN.pos + 1

        # should be the tip k-kmer, plus the last base of the HDN thru
        # the end of the contig
        assert utils._equals_rc(path, tip + contig[HDN.pos + K - 1:])
Beispiel #13
0
def traverse_and_mark_linear_paths(graph, nk, stop_bf, pathy, degree_nodes):
    size, adj_kmers, visited = graph.traverse_linear_path(
        nk, degree_nodes, stop_bf)
    if not size:  # 0 length paths
        return

    # get an ID for the new path
    path_id = pathy.new_linear_node()

    # output a contig if requested
    if pathy.assemblyfp:
        asm = khmer.LinearAssembler(graph, stop_bf)
        contig = asm.assemble(nk)
        pathy.add_assembly(path_id, contig)
        if size and not contig:
            print('nonzero size, but contig is not produced. WTF.')
        if contig and graph.get_min_count(contig) == 0:
            print('generated k-mers not in BF. sigh.')
        if contig:
            stop_bf.add(contig[:graph.ksize()])
            stop_bf.add(contig[-graph.ksize():])

        if len(contig) and size + graph.ksize() - 1 != len(contig):
            print('visited k-mers != contig size. WTF?')
    else:
        assert 0

    # add all adjacencies, if any
    if adj_kmers:
        for kmer in adj_kmers:
            adj_node_id = pathy.kmers_to_nodes[kmer]
            pathy.add_adjacency(path_id, adj_node_id)
    # a purely linear path; add all the k-mers to stop bf to prevent
    # traversing in both directions.
    else:
        for k in visited:
            stop_bf.add(k)