Beispiel #1
0
def test_deserialize():
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_node('node1', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node2', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_edge('edge1',
                       'node1',
                       '+',
                       'node2',
                       '+',
                       4,
                       7,
                       0,
                       3,
                       '*',
                       tags={},
                       labels={})
    gfa_graph.add_path('000000F', ['node1', 'node2'], ['4M', '7M'],
                       tags={},
                       labels={'label1': 'test'})

    dump = mod.serialize_gfa(gfa_graph)

    fp_in = StringIO(dump)

    result = mod.deserialize_gfa(fp_in)

    assert (result.nodes == gfa_graph.nodes)
    assert (result.edges == gfa_graph.edges)
    assert (result.paths == gfa_graph.paths)
def test_serialize():
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_node('node1', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node2', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_edge('edge1', 'node1', '+', 'node2', '+', 4, 7, 0, 3, '*', tags={}, labels={})
    gfa_graph.add_path('000000F', ['node1', 'node2'], ['4M', '7M'], tags={}, labels={'label1': 'test'})

    result = mod.serialize_gfa(gfa_graph)
    expected = '{"paths": {"000000F": {"labels": {"label1": "test"}, "nodes": ["node1", "node2"], "tags": {}, "name": "000000F", "cigars": ["4M", "7M"]}}, "nodes": {"node1": {"labels": {}, "seq": "ACTGAAA", "name": "node1", "len": 7, "tags": {}}, "node2": {"labels": {}, "seq": "AAACCCGGGT", "name": "node2", "len": 10, "tags": {}}}, "edges": {"(\'node1\', \'node2\')": {"labels": {}, "v_orient": "+", "tags": {}, "v_start": 4, "cigar": "*", "w_end": 3, "w_start": 0, "w_orient": "+", "name": "edge1", "v_end": 7, "w": "node2", "v": "node1"}}}'

    assert(result == expected)
Beispiel #3
0
def run(fp_out, p_ctg_tiling_path, a_ctg_tiling_path,
        p_ctg_fasta, a_ctg_fasta,
        write_contigs,
        min_p_len, min_a_len, only_these_contigs):

    gfa_graph = GFAGraph()

    # Load the primary and associate contig files.
    p_ctg_dict = falcon_kit.mains.collect_pread_gfa.load_seqs(p_ctg_fasta, (not write_contigs))
    p_ctg_lens = {key: val[0] for key, val in p_ctg_dict.iteritems()}
    p_ctg_seqs = {key: val[1] for key, val in p_ctg_dict.iteritems()}

    a_ctg_dict = falcon_kit.mains.collect_pread_gfa.load_seqs(a_ctg_fasta, (not write_contigs))
    a_ctg_lens = {key: val[0] for key, val in a_ctg_dict.iteritems()}
    a_ctg_seqs = {key: val[1] for key, val in a_ctg_dict.iteritems()}

    # Create whitelists for filtering contigs.
    p_ctg_whitelist = set(p_ctg_seqs.keys())
    a_ctg_whitelist = set([key for key in a_ctg_seqs.keys()])
    if only_these_contigs:
        p_ctg_whitelist = set(open(only_these_contigs).read().splitlines()) & set(p_ctg_whitelist)
        a_ctg_whitelist = set([key for key in a_ctg_seqs.keys() if key.split('-')[0].split('_')[0] in p_ctg_whitelist])

    # Load the tiling paths and assign coordinates.
    p_paths = falcon_kit.tiling_path.load_tiling_paths(p_ctg_tiling_path, whitelist_seqs=p_ctg_whitelist, contig_lens=p_ctg_lens)
    a_paths = falcon_kit.tiling_path.load_tiling_paths(a_ctg_tiling_path, whitelist_seqs=a_ctg_whitelist, contig_lens=a_ctg_lens)

    # Find the associate contig placement. `a_placement` is a dict:
    #   placement[p_ctg_id][a_ctg_id] = (start, end, p_ctg_id, a_ctg_id, first_node, last_node)
    a_placement = falcon_kit.tiling_path.find_a_ctg_placement(p_paths, a_paths)

    # Add the nodes.
    for ctg_id, tiling_path in p_paths.iteritems():
        gfa_graph.add_node(ctg_id, p_ctg_lens[ctg_id], p_ctg_seqs[ctg_id])
    for ctg_id, tiling_path in a_paths.iteritems():
        gfa_graph.add_node(ctg_id, a_ctg_lens[ctg_id], a_ctg_seqs[ctg_id])

    for p_ctg_id, a_dict in a_placement.iteritems():
        for a_ctg_id, placement in a_dict.iteritems():
            start, end, p_ctg_id, a_ctg_id, first_node, last_node = placement

            a_ctg_len = a_ctg_lens[a_ctg_id]

            # edge_name = 'edge-%d-out-%s-to-%s' % (len(gfa_graph.edges), a_ctg_id, p_ctg_id)
            edge_name = 'edge-%d' % (len(gfa_graph.edges))
            gfa_graph.add_edge(edge_name, p_ctg_id, '+', a_ctg_id, '+', start, start, 0, 0, '*', tags = {}, labels = {})

            # edge_name = 'edge-%d-in-%s-to-%s' % (len(gfa_graph.edges), a_ctg_id, p_ctg_id)
            edge_name = 'edge-%d' % (len(gfa_graph.edges))
            gfa_graph.add_edge(edge_name, a_ctg_id, '+', p_ctg_id, '+', a_ctg_len, a_ctg_len, end, end, '*', tags = {}, labels = {})

    fp_out.write(serialize_gfa(gfa_graph))
    fp_out.write('\n')
def test_deserialize():
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_node('node1', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node2', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_edge('edge1', 'node1', '+', 'node2', '+', 4, 7, 0, 3, '*', tags={}, labels={})
    gfa_graph.add_path('000000F', ['node1', 'node2'], ['4M', '7M'], tags={}, labels={'label1': 'test'})

    dump = mod.serialize_gfa(gfa_graph)

    fp_in = StringIO(dump)

    result = mod.deserialize_gfa(fp_in)

    assert (result.nodes == gfa_graph.nodes)
    assert (result.edges == gfa_graph.edges)
    assert (result.paths == gfa_graph.paths)
Beispiel #5
0
def test_serialize():
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_node('node1', 7, 'ACTGAAA', tags={}, labels={})
    gfa_graph.add_node('node2', 10, 'AAACCCGGGT', tags={}, labels={})
    gfa_graph.add_edge('edge1',
                       'node1',
                       '+',
                       'node2',
                       '+',
                       4,
                       7,
                       0,
                       3,
                       '*',
                       tags={},
                       labels={})
    gfa_graph.add_path('000000F', ['node1', 'node2'], ['4M', '7M'],
                       tags={},
                       labels={'label1': 'test'})

    result = mod.serialize_gfa(gfa_graph)
    expected = '{"paths": {"000000F": {"labels": {"label1": "test"}, "nodes": ["node1", "node2"], "tags": {}, "name": "000000F", "cigars": ["4M", "7M"]}}, "nodes": {"node1": {"labels": {}, "seq": "ACTGAAA", "name": "node1", "len": 7, "tags": {}}, "node2": {"labels": {}, "seq": "AAACCCGGGT", "name": "node2", "len": 10, "tags": {}}}, "edges": {"(\'node1\', \'node2\')": {"labels": {}, "v_orient": "+", "tags": {}, "v_start": 4, "cigar": "*", "w_end": 3, "w_start": 0, "w_orient": "+", "name": "edge1", "v_end": 7, "w": "node2", "v": "node1"}}}'

    assert (result == expected)