def test_add_asm_graph():
    # Load the assembly graph.
    sg_edges_list = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'sg_edges_list')
    utg_data = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'utg_data')
    ctg_paths = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'ctg_paths')
    asm_graph = AsmGraph(sg_edges_list, utg_data, ctg_paths)

    # Add the graph to GFA.
    gfa_graph = mod.GFAGraph()
    gfa_graph.add_asm_graph(asm_graph)

    assert(len(gfa_graph.paths.keys()) == 0)

    expected = {
        ('000000016:B', '000000027:B'): ['000000016:B', '000000027:B', '*', 1540, 99.94, 449, 0, None, None, None, None],
        ('000000005:B', '000000016:B'): ['000000005:B', '000000016:B', '*', 1487, 99.93, 502, 0, None, None, None, None],
        ('000000016:B', '000000025:B'): ['000000016:B', '000000025:B', '*', 1540, 99.94, 449, 0, None, None, None, None],
        ('000000007:B', '000000005:B'): ['000000007:B', '000000005:B', '*', 1980, 99.95, 9, 0, None, None, None, None],
        ('000000018:B', '000000004:B'): ['000000018:B', '000000004:B', '*', 1963, 99.95, 26, 0, None, None, None, None],
        ('000000025:B', '000000018:B'): ['000000025:B', '000000018:B', '*', 1978, 99.95, 11, 0, None, None, None, None]
    }

    assert(len(gfa_graph.edges.keys()) == len(expected.keys()))

    for key, edge in gfa_graph.edges.iteritems():
        assert(key in expected)
        assert(expected[key] == edge)
Exemple #2
0
def test_add_nx_string_graph():
    # Load the assembly graph.
    sg_edges_list = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                 'sg_edges_list')
    utg_data = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'utg_data')
    ctg_paths = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'ctg_paths')
    asm_graph = AsmGraph(sg_edges_list, utg_data, ctg_paths)

    # The following block is taken from Unzip, graphs_to_h_tigs.py.
    nx_sg = nx.DiGraph()
    arid_to_phase = {}
    for ctg_id in asm_graph.ctg_data.keys():
        ctg_G = asm_graph.get_sg_for_ctg(ctg_id)
        ctg_nodes = set(ctg_G.nodes())
        for v, w in ctg_G.edges():
            vrid = v[:9]
            wrid = w[:9]
            edge_data = asm_graph.sg_edges[(v, w)]
            if edge_data[-1] != "G":
                continue

            vphase = arid_to_phase.get(vrid, (-1, 0))
            wphase = arid_to_phase.get(wrid, (-1, 0))
            if vphase[0] == wphase[0] and vphase[1] != wphase[1]:
                cross_phase = "Y"
            else:
                cross_phase = "N"

            nx_sg.add_node(v,
                           label="%d_%d" % vphase,
                           phase="%d_%d" % vphase,
                           src="P")

            nx_sg.add_node(w,
                           label="%d_%d" % wphase,
                           phase="%d_%d" % wphase,
                           src="P")

            nx_sg.add_edge(v, w, src="OP", cross_phase=cross_phase)

            # we need to add the complimentary edges as the ctg_graph does not contain the dual edges
            rv = reverse_end(v)
            rw = reverse_end(w)
            nx_sg.add_node(rv,
                           label="%d_%d" % vphase,
                           phase="%d_%d" % vphase,
                           src="P")
            nx_sg.add_node(rw,
                           label="%d_%d" % wphase,
                           phase="%d_%d" % wphase,
                           src="P")
            nx_sg.add_edge(rw, rv, src="OP", cross_phase=cross_phase)

    # Add the string graph to the GFA.
    gfa_graph = mod.GFAGraph()
    gexf_file = os.path.join(helpers.get_test_data_dir(), 'gfa-1', 'sg.gexf')
    nx_sg = nx.read_gexf(gexf_file)
    gfa_graph.add_nx_string_graph(nx_sg)
def test_calc_cutoff_err():
    partial_capture_fn = os.path.join(
        helpers.get_test_data_dir(), 'calc_cutoff/partial_capture.txt')
    assert os.path.exists(partial_capture_fn)
    with pytest.raises(Exception) as excinfo:
        mod.main('prog --coverage 23 1 {}'.format(partial_capture_fn).split())
    assert expected_err in str(excinfo.value)
Exemple #4
0
def test_calc_cutoff(capsys):
    partial_capture_fn = os.path.join(helpers.get_test_data_dir(), 'calc_cutoff/partial_capture.txt')
    assert os.path.exists(partial_capture_fn)
    mod.main('prog --coverage 14 1 {}'.format(partial_capture_fn).split())
    out, err = capsys.readouterr()
    assert out == '2'
    assert not err
def test_calc_cutoff_err():
    partial_capture_fn = os.path.join(helpers.get_test_data_dir(),
                                      'calc_cutoff/partial_capture.txt')
    assert os.path.exists(partial_capture_fn)
    with pytest.raises(Exception) as excinfo:
        mod.main('prog --coverage 23 1 {}'.format(partial_capture_fn).split())
    assert expected_err in str(excinfo.value)
Exemple #6
0
def test_filter_tiling_paths_by_len():
    p_ctg_tiling_path_file = os.path.join(
        helpers.get_test_data_dir(), 'p_ctg_tiling_path_1')
    p_path, p_edge_to_ctg = mod.load_tiling_paths(p_ctg_tiling_path_file, 'P')
    _, p_ctg_len = mod.calc_tiling_paths_len(p_path)

    p_path_filtered = mod.filter_tiling_paths_by_len(p_path, p_ctg_len, 0)
    assert(sorted(p_path_filtered.keys()) == sorted(
        ['000000F', '000001F', '000002F', '000003F', '000004F']))

    p_path_filtered = mod.filter_tiling_paths_by_len(p_path, p_ctg_len, 10000)
    assert(sorted(p_path_filtered.keys()) == sorted(
        ['000000F', '000001F', '000002F', '000003F']))

    p_path_filtered = mod.filter_tiling_paths_by_len(p_path, p_ctg_len, 35000)
    assert(sorted(p_path_filtered.keys()) == sorted(['000000F', '000001F']))

    p_path_filtered = mod.filter_tiling_paths_by_len(p_path, p_ctg_len, 100000)
    assert(sorted(p_path_filtered.keys()) == sorted([]))

    # Test a degenerate case where there is no length for a particular contig.
    keys = p_ctg_len.keys()
    p_ctg_len_degenerate = {}
    for i in xrange(1, len(keys)):
        p_ctg_len_degenerate[keys[i]] = p_ctg_len[keys[i]]
    with pytest.raises(Exception) as e_info:
        p_path_filtered = mod.filter_tiling_paths_by_len(
            p_path, p_ctg_len_degenerate, 0)
def test_calc_cutoff(capsys):
    partial_capture_fn = os.path.join(
        helpers.get_test_data_dir(), 'calc_cutoff/partial_capture.txt')
    assert os.path.exists(partial_capture_fn)
    mod.main('prog --coverage 14 1 {}'.format(partial_capture_fn).split())
    out, err = capsys.readouterr()
    assert out == '2'
    assert not err
Exemple #8
0
def test_calc_cutoff_errfile(monkeypatch, tmpdir):
    fn = str(tmpdir.mkdir('tmp').join('errfile'))
    monkeypatch.setenv('PBFALCON_ERRFILE', fn)
    partial_capture_fn = os.path.join(helpers.get_test_data_dir(), 'calc_cutoff/partial_capture.txt')
    assert os.path.exists(partial_capture_fn)
    with pytest.raises(Exception) as excinfo:
        mod.main('prog --coverage 23 1 {}'.format(partial_capture_fn).split())
    assert expected_err in str(excinfo.value)
    assert expected_err in open(fn).read()
Exemple #9
0
def wrap_write_gfa_v1_test(use_sg, use_nx, use_tp, write_reads, write_contigs,
                           min_p_len, min_a_len, expected_path):
    # Create a GFA graph.
    gfa_graph = mod.GFAGraph()

    # Init paths to other input files.
    preads_file = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                               'preads4falcon.fasta')
    p_ctg_fasta = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                               'p_ctg.fa')
    a_ctg_fasta = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                               'a_ctg.fa')

    if use_sg:
        # Load the assembly graph.
        sg_edges_list = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                     'sg_edges_list')
        utg_data = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                'utg_data')
        ctg_paths = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                 'ctg_paths')
        asm_graph = AsmGraph(sg_edges_list, utg_data, ctg_paths)
        # Add the string graph to the GFA.
        gfa_graph.add_asm_graph(asm_graph)

    if use_tp:
        p_ctg_tiling_path_file = os.path.join(helpers.get_test_data_dir(),
                                              'gfa-1', 'p_ctg_tiling_path')
        a_ctg_tiling_path_file = os.path.join(helpers.get_test_data_dir(),
                                              'gfa-1', 'a_ctg_tiling_path')
        gen_gfa_v1.add_tiling_paths_to_gfa(p_ctg_fasta, a_ctg_fasta,
                                           p_ctg_tiling_path_file,
                                           a_ctg_tiling_path_file, min_p_len,
                                           min_a_len, gfa_graph)

    if use_nx:
        gexf_file = os.path.join(helpers.get_test_data_dir(), 'gfa-1',
                                 'sg.gexf')
        nx_sg = nx.read_gexf(gexf_file)
        gfa_graph.add_nx_string_graph(nx_sg)

    fp_out = StringIO()
    # Run the unit under test.
    gfa_graph.write_gfa_v1(fp_out, preads_file, [p_ctg_fasta, a_ctg_fasta],
                           write_reads, write_contigs)

    # Compare results.
    value = fp_out.getvalue()
    helpers.assert_filecmp(value, expected_path)
def test_calc_cutoff_errfile(monkeypatch, tmpdir):
    fn = str(tmpdir.mkdir('tmp').join('errfile'))
    monkeypatch.setenv('PBFALCON_ERRFILE', fn)
    partial_capture_fn = os.path.join(
        helpers.get_test_data_dir(), 'calc_cutoff/partial_capture.txt')
    assert os.path.exists(partial_capture_fn)
    with pytest.raises(Exception) as excinfo:
        mod.main('prog --coverage 23 1 {}'.format(partial_capture_fn).split())
    assert expected_err in str(excinfo.value)
    assert expected_err in open(fn).read()
Exemple #11
0
def test_format_gfa_v1_path_line():
    gfa_graph = mod.GFAGraph()

    # Load tiling paths from file.
    p_ctg_tiling_path_file = os.path.join(helpers.get_test_data_dir(),
                                          'p_ctg_tiling_path_1')
    p_paths, p_edge_to_ctg = gen_gfa_v1.load_tiling_paths(
        p_ctg_tiling_path_file, 'P')

    # If seq_len_map is None, all CIGAR operations should be '*'.
    expected = {
        '000000F':
        'P\t000000F\t000092122-,000081654-,000034462-,000061403-,000021348-,000062240-,000083779-,000019819+,000063672+,000026565+,000050047-\t*,*,*,*,*,*,*,*,*,*,*',
        '000001F':
        'P\t000001F\t000070651+,000018109+,000068978+,000100559+,000010548-,000006846-,000065052-,000071922+,000076878+,000000861+,000001755-\t*,*,*,*,*,*,*,*,*,*,*',
        '000002F':
        'P\t000002F\t000088930+,000008918+,000100248-,000085315-,000071965+,000082497+\t*,*,*,*,*,*',
        '000003F': 'P\t000003F\t000084518+,000011674+,000057445-\t*,*,*',
        '000004F': 'P\t000004F\t000014727+,000024020+,000060868+\t*,*,*',
    }
    seq_len_map = None
    for ctg_id, path in p_paths.iteritems():
        path_line = gfa_graph.format_gfa_v1_path_line(ctg_id, path,
                                                      seq_len_map)
        assert (path_line == expected[ctg_id])

    # The seq_len_map dict is only used for the first read in the path,
    # because it needs to be included completely. The other CIGAR operations
    # are determined directly from the edges.
    expected = {
        '000000F':
        'P\t000000F\t000092122-,000081654-,000034462-,000061403-,000021348-,000062240-,000083779-,000019819+,000063672+,000026565+,000050047-\t10000M,33726M,10123M,1352M,9924M,5834M,862M,5562M,1384M,473M,2171M',
        '000001F':
        'P\t000001F\t000070651+,000018109+,000068978+,000100559+,000010548-,000006846-,000065052-,000071922+,000076878+,000000861+,000001755-\t10000M,10077M,3766M,2648M,2421M,2089M,18168M,2723M,2451M,666M,15088M',
        '000002F':
        'P\t000002F\t000088930+,000008918+,000100248-,000085315-,000071965+,000082497+\t10000M,15215M,3113M,4851M,1857M,6035M',
        '000003F':
        'P\t000003F\t000084518+,000011674+,000057445-\t10000M,9432M,23096M',
        '000004F':
        'P\t000004F\t000014727+,000024020+,000060868+\t10000M,5238M,3235M',
    }
    for ctg_id, path in p_paths.iteritems():
        # Initialize all reads to a fixed value, just to be safe.
        seq_len_map = {}
        for edge in path:
            v, w = edge[0], edge[1]
            seq_len_map[v.split(':')[0]] = 10000
            seq_len_map[w.split(':')[0]] = 10000
        path_line = gfa_graph.format_gfa_v1_path_line(ctg_id, path,
                                                      seq_len_map)
        assert (path_line == expected[ctg_id])

    # Test a degenerate case where path is None.
    path_line = gfa_graph.format_gfa_v1_path_line('', None, None)
    assert (path_line == '')
Exemple #12
0
def test_calc_tiling_paths_len():
    p_ctg_tiling_path_file = os.path.join(
        helpers.get_test_data_dir(), 'p_ctg_tiling_path_1')
    p_path, p_edge_to_ctg = mod.load_tiling_paths(p_ctg_tiling_path_file, 'P')
    p_coords, p_ctg_len = mod.calc_tiling_paths_len(p_path)

    for ctg_id in p_coords.keys():
        shared_items = set(expected_coord_map[ctg_id].items()) & set(
            p_coords[ctg_id].items())
        assert(len(shared_items) == len(p_coords[ctg_id]))
        assert(expected_contig_len[ctg_id] == p_ctg_len[ctg_id])
Exemple #13
0
def test_calc_node_coords():
    # The p_ctg_tiling_path_1 is a normal tiling path file.
    p_ctg_tiling_path_file = os.path.join(
        helpers.get_test_data_dir(), 'p_ctg_tiling_path_1')
    p_paths, p_edge_to_ctg = mod.load_tiling_paths(p_ctg_tiling_path_file, 'P')

    ctg_id = '000000F'
    coord_map, contig_len = mod.calc_node_coords(p_paths[ctg_id])
    shared_items = set(expected_coord_map[ctg_id].items()) & set(
        coord_map.items())
    assert(len(shared_items) == len(coord_map))
    assert(expected_contig_len[ctg_id] == contig_len)

    # The p_ctg_tiling_path_2 has two degenerative cases:
    # - 000000F which has an inner cycle
    # - 000001F which has an out-of-order edge
    # - 000002F which is circular (this is a valid case)
    p_ctg_tiling_path_file = os.path.join(
        helpers.get_test_data_dir(), 'p_ctg_tiling_path_2')
    p_paths, p_edge_to_ctg = mod.load_tiling_paths(p_ctg_tiling_path_file, 'P')

    # Allow cycles, but the node's coord gets overwritten.
    ctg_id = '000000F'
    coord_map, contig_len = mod.calc_node_coords(p_paths[ctg_id])
    assert(coord_map['000081654:B'] == 55125)

    # Do not allow unsorted graphs.
    ctg_id = '000001F'
    with pytest.raises(Exception) as e_info:
        coord_map, contig_len = mod.calc_node_coords(p_paths[ctg_id])

    # Allow circular graphs.
    ctg_id = '000002F'
    coord_map, contig_len = mod.calc_node_coords(p_paths[ctg_id])
    assert(contig_len == 18473)

    # Test for an empty tiling path.
    coord_map, contig_len = mod.calc_node_coords([])
    assert(not coord_map)
    assert(contig_len == 0)
Exemple #14
0
def test_load_tiling_paths():
    p_ctg_tiling_path_file = os.path.join(
        helpers.get_test_data_dir(), 'p_ctg_tiling_path_1')
    p_path, p_edge_to_ctg = mod.load_tiling_paths(p_ctg_tiling_path_file, 'P')

    assert(sorted(p_path.keys()) == sorted(
        ['000000F', '000001F', '000002F', '000003F', '000004F']))

    for ctg_id, path in p_path.iteritems():
        for edge in path:
            v, w, b, e, l, idt, etype = edge
            assert((v, w) in p_edge_to_ctg)
            assert(p_edge_to_ctg[(v, w)] == (ctg_id, etype))
Exemple #15
0
def test_load_tiling_paths_from_stream():
    # This tests a normal case.
    p_ctg_tiling_path_file = os.path.join(
        helpers.get_test_data_dir(), 'p_ctg_tiling_path_1')
    p_paths = {}
    edge_to_ctg = {}
    with open(p_ctg_tiling_path_file) as f:
        p_paths, edge_to_ctg = mod.load_tiling_paths_from_stream(f, 'P')
    assert(sorted(p_paths.keys()) == sorted(
        ['000000F', '000001F', '000002F', '000003F', '000004F']))
    for ctg_id, path in p_paths.iteritems():
        for edge in path:
            v, w, b, e, l, idt, etype = edge
            assert((v, w) in edge_to_ctg)
            assert(edge_to_ctg[(v, w)] == (ctg_id, etype))
def test_write_gfa_v1_2():
    # Tests a case where a node is added to the graph, but
    # there is no corresponding pread in preads4falcon.fasta file.

    # Create a GFA graph.
    gfa_graph = mod.GFAGraph()

    # Load the p_ctg tiling paths.
    p_ctg_tiling_path_file = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'p_ctg_tiling_path')
    p_paths, p_edge_to_ctg = gen_gfa_v1.load_tiling_paths(
        p_ctg_tiling_path_file, 'P')
    # Add the tiling paths to the GFA.
    for ctg_id, path in p_paths.iteritems():
        gfa_graph.add_tiling_path(path, ctg_id)

    # Init paths to other input files.
    preads_file = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'preads4falcon.fasta')
    p_ctg_fasta = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'p_ctg.fa')
    a_ctg_fasta = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'a_ctg.fa')

    write_reads = False
    write_contigs = False

    fp_out = StringIO()

    # Add a node which does not exist in the preads4falcon.fasta file.
    gfa_graph.add_read_from_node('12345:B')

    # Run the unit under test.
    with pytest.raises(Exception) as e_info:
        gfa_graph.write_gfa_v1(fp_out, preads_file, [
                               p_ctg_fasta, a_ctg_fasta], write_reads, write_contigs)
def test_add_tiling_path():
    # Load the tiling path. These methods are tested in test_gen_gfa_v1.py.
    p_ctg_tiling_path_file = os.path.join(
        helpers.get_test_data_dir(), 'p_ctg_tiling_path_1')
    p_paths, p_edge_to_ctg = gen_gfa_v1.load_tiling_paths(
        p_ctg_tiling_path_file, 'P')

    # Create a new GFA graph.
    gfa_graph = mod.GFAGraph()

    # Add the tiling paths.
    for ctg_id, path in p_paths.iteritems():
        gfa_graph.add_tiling_path(path, ctg_id)

    # Check if we have the correct number of tiling paths.
    assert(len(gfa_graph.paths.keys()) == len(p_paths.keys()))

    # They should be same as loaded.
    for ctg_id, path in p_paths.iteritems():
        assert(ctg_id in gfa_graph.paths)
        assert(gfa_graph.paths[ctg_id] == path)
def test_write_gfa_v1_1():
    test_dir = os.path.join(helpers.get_test_data_dir(), 'gfa-1')

    # Test various combinations of options.
    wrap_write_gfa_v1_test(True, False, True, True, True,
                           0, 0, os.path.join(test_dir, 'expected-1-sg-r-c.gfa'))
    wrap_write_gfa_v1_test(False, False, True, True, True, 0, 0, os.path.join(
        test_dir, 'expected-2-tiling-r-c.gfa'))
    wrap_write_gfa_v1_test(False, False, True, False, True, 0, 0, os.path.join(
        test_dir, 'expected-3-tiling-no_r-c.gfa'))
    wrap_write_gfa_v1_test(False, False, True, False, False, 0, 0, os.path.join(
        test_dir, 'expected-4-tiling-no_r-no_c.gfa'))
    wrap_write_gfa_v1_test(True, False, True, False, False, 0, 0, os.path.join(
        test_dir, 'expected-5-sg-no_r-no_c.gfa'))
    wrap_write_gfa_v1_test(False, False, True, False, False, 10000, 10000, os.path.join(
        test_dir, 'expected-6-tiling-no_r-no_c-minlen.gfa'))
    wrap_write_gfa_v1_test(False, True, False, False, False, 0, 0, os.path.join(
        test_dir, 'expected-7-nx-no_r-no_c.gfa'))
    wrap_write_gfa_v1_test(False, True, True, False, False, 0, 0, os.path.join(
        test_dir, 'expected-8-nx-tiling-no_r-no_c.gfa'))
    wrap_write_gfa_v1_test(False, True, True, True, True, 0, 0, os.path.join(
        test_dir, 'expected-9-nx-tiling-r-c.gfa'))
def test_main_6(capsys):
    test_dir = os.path.join(helpers.get_test_data_dir(), 'gfa-1')

    argv = [
        'prog',
        '--p-ctg-tiling-path',
        os.path.join(test_dir, 'p_ctg_tiling_path'),
        '--a-ctg-tiling-path',
        os.path.join(test_dir, 'a_ctg_tiling_path'),
        '--preads-fasta',
        os.path.join(test_dir, 'preads4falcon.fasta'),
        '--p-ctg-fasta',
        os.path.join(test_dir, 'p_ctg.fa'),
        '--a-ctg-fasta',
        os.path.join(test_dir, 'a_ctg.fa'),
        '--sg-edges-list',
        os.path.join(test_dir, 'sg_edges_list'),
        '--utg-data',
        os.path.join(test_dir, 'utg_data'),
        '--ctg-paths',
        os.path.join(test_dir, 'ctg_paths'),
        '--tiling',
        # '--write-reads',
        # '--write-contigs',
        '--min-p-len',
        '10000',
        '--min-a-len',
        '10000',
    ]
    mod.main(argv)
    out, err = capsys.readouterr()
    result = out.strip().splitlines()
    expected = [
        line.strip() for line in
        open(os.path.join(
            test_dir, 'expected-6-tiling-no_r-no_c-minlen.gfa')).readlines()
    ]
    assert (result == expected)
Exemple #20
0
def test_main_2(capsys):
    test_dir = os.path.join(helpers.get_test_data_dir(), 'gfa-1')

    argv = ['prog',
            '--p-ctg-tiling-path', os.path.join(test_dir, 'p_ctg_tiling_path'),
            '--a-ctg-tiling-path', os.path.join(test_dir, 'a_ctg_tiling_path'),
            '--preads-fasta', os.path.join(test_dir, 'preads4falcon.fasta'),
            '--p-ctg-fasta', os.path.join(test_dir, 'p_ctg.fa'),
            '--a-ctg-fasta', os.path.join(test_dir, 'a_ctg.fa'),
            '--sg-edges-list', os.path.join(test_dir, 'sg_edges_list'),
            '--utg-data', os.path.join(test_dir, 'utg_data'),
            '--ctg-paths', os.path.join(test_dir, 'ctg_paths'),
            # '--add-string-graph',
            '--write-reads',
            '--write-contigs',
            '--min-p-len', '0',
            '--min-a-len', '0',
            ]
    mod.main(argv)
    out, err = capsys.readouterr()
    expected_path = os.path.join(test_dir,
                                 'expected-2-tiling-r-c.gfa')
    helpers.assert_filecmp(out, expected_path)
Exemple #21
0
def test_calc_cutoff_errfile(monkeypatch, tmpdir):
    fn = str(tmpdir.mkdir('tmp').join('errfile'))
    monkeypatch.setenv('PBFALCON_ERRFILE', fn)
    partial_capture_fn = os.path.join(helpers.get_test_data_dir(),
                                      'calc_cutoff/partial_capture.txt')
    assert os.path.exists(partial_capture_fn)
    with pytest.raises(Exception) as excinfo:
        mod.main('prog --coverage 23 1 {}'.format(partial_capture_fn).split())
    #assert expected_err0 in str(excinfo.value)
    assert expected_err1 in str(excinfo.value)
    with open(fn) as sin:
        errfile_content = sin.read()
        assert expected_err0 in errfile_content
        assert expected_err1 in errfile_content

    # Also check new 'alarms.json'
    encoded0 = json.dumps(expected_err0)[
        1:-1]  # actually just escapes the newlines
    encoded1 = json.dumps(expected_err1)[1:-1]
    with open('alarms.json') as sin:
        alarm_content = sin.read()
    assert encoded0 in alarm_content
    assert encoded1 in alarm_content
Exemple #22
0
def test_write_gfa_v1_1(args, expected_path):
    test_dir = os.path.join(helpers.get_test_data_dir(), 'gfa-1')

    with system.cd(test_dir):
        wrap_write_gfa_v1_test(*args, expected_path=expected_path)
def wrap_write_gfa_v1_test(use_sg, use_nx, use_tp, write_reads, write_contigs, min_p_len, min_a_len, expected_path):
    # Create a GFA graph.
    gfa_graph = mod.GFAGraph()

    if use_sg:
        # Load the assembly graph.
        sg_edges_list = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'sg_edges_list')
        utg_data = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'utg_data')
        ctg_paths = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'ctg_paths')
        asm_graph = AsmGraph(sg_edges_list, utg_data, ctg_paths)
        # Add the string graph to the GFA.
        gfa_graph.add_asm_graph(asm_graph)

    if use_tp:
        # Load the p_ctg tiling paths.
        p_ctg_tiling_path_file = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'p_ctg_tiling_path')
        p_paths, p_edge_to_ctg = gen_gfa_v1.load_tiling_paths(
            p_ctg_tiling_path_file, 'P')
        # Add the tiling paths to the GFA.
        for ctg_id, path in p_paths.iteritems():
            _, contig_len = gen_gfa_v1.calc_node_coords(path)
            if contig_len >= min_p_len:
                gfa_graph.add_tiling_path(path, ctg_id)
        a_ctg_tiling_path_file = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'a_ctg_tiling_path')
        a_paths, a_edge_to_ctg = gen_gfa_v1.load_tiling_paths(
            a_ctg_tiling_path_file, 'P')
        # Add the tiling paths to the GFA.
        for ctg_id, path in a_paths.iteritems():
            _, contig_len = gen_gfa_v1.calc_node_coords(path)
            if contig_len >= min_a_len:
                gfa_graph.add_tiling_path(path, ctg_id)

    if use_nx:
        gexf_file = os.path.join(
            helpers.get_test_data_dir(), 'gfa-1', 'sg.gexf')
        nx_sg = nx.read_gexf(gexf_file)
        gfa_graph.add_nx_string_graph(nx_sg)

    # Init paths to other input files.
    preads_file = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'preads4falcon.fasta')
    p_ctg_fasta = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'p_ctg.fa')
    a_ctg_fasta = os.path.join(
        helpers.get_test_data_dir(), 'gfa-1', 'a_ctg.fa')

    fp_out = StringIO()
    # Run the unit under test.
    gfa_graph.write_gfa_v1(fp_out, preads_file, [
                           p_ctg_fasta, a_ctg_fasta], write_reads, write_contigs)

    # Compare results.
    result = fp_out.getvalue()
    result = result.splitlines()
    expected = [line.strip() for line in open(expected_path).readlines()]
    assert(result == expected)