def test_path_conversion(self): path_gfa1 = "P\t1\ta+,b-\t100M" path_gfa2 = "O\t1\ta+ a_to_b+ b-" # gfa1 => gfa2 l1 = "L\ta\t+\tb\t-\t100M\tID:Z:a_to_b" g1 = gfapy.Gfa() path_gfa1_line = gfapy.Line(path_gfa1) g1.add_line(path_gfa1_line) g1.add_line(l1) g1.process_line_queue() # not connected self.assertRaises(gfapy.RuntimeError, gfapy.Line(path_gfa1).to_gfa2) # connected self.assertEqual(path_gfa1,str(path_gfa1_line.to_gfa1())) self.assertEqual(path_gfa2,str(path_gfa1_line.to_gfa2())) # gfa2 => gfa1 e = "E\ta_to_b\ta+\tb-\t100\t200$\t100\t200$\t100M" sA = "S\ta\t200\t*" sB = "S\tb\t200\t*" g2 = gfapy.Gfa() path_gfa2_line = gfapy.Line(path_gfa2) g2.add_line(path_gfa2_line) g2.add_line(e) g2.add_line(sA) g2.add_line(sB) # not connected self.assertRaises(gfapy.RuntimeError, gfapy.Line(path_gfa2).to_gfa1) # connected self.assertEqual(path_gfa1,str( path_gfa2_line.to_gfa1())) self.assertEqual(path_gfa2,str( path_gfa2_line.to_gfa2()))
def test_adding_containment_to_rgfa(self): gfa = gfapy.Gfa() gfa.append("C\t1\t+\t2\t+\t12\t*") gfa.validate() gfa = gfapy.Gfa(version="gfa1", dialect="rgfa") gfa.append("C\t1\t+\t2\t+\t12\t*") with self.assertRaises(gfapy.NotFoundError): gfa.validate()
def test_version_empty(self): gfa = gfapy.Gfa() self.assertIsNone(gfa.version) gfa = gfapy.Gfa(version="gfa1") self.assertEqual("gfa1", gfa.version) gfa = gfapy.Gfa(version="gfa2") self.assertEqual("gfa2", gfa.version) with self.assertRaises(gfapy.VersionError): gfapy.Gfa(version="0.0")
def test_adding_invalid_segment_to_rgfa(self): gfa = gfapy.Gfa() gfa.append("S\t1\t*") gfa.validate() gfa = gfapy.Gfa(dialect="rgfa") gfa.append("S\t1\t*") with self.assertRaises(gfapy.NotFoundError): gfa.validate()
def test_from_list(self): lines = ["H\tVN:Z:1.0","S\t1\t*","S\t2\t*","S\t3\t*", "L\t1\t+\t2\t-\t*","C\t1\t+\t3\t-\t12\t*","P\t4\t1+,2-\t*"] gfa1 = gfapy.Gfa() for l in lines: gfa1.append(l) gfa2 = gfapy.Gfa(lines) assert(gfa2) self.assertEqual(gfapy.Gfa, gfa2.__class__) self.assertEqual(str(gfa1), str(gfa2))
def test_paths_backreferences(self): g = gfapy.Gfa() s = {}; l = {} for name in ["a", "b", "c", "d", "e", "f"]: s[name] = gfapy.Line("S\t{}\t*".format(name)) g.append(s[name]) path = gfapy.Line("P\tp1\tf+,a+,b+,c-,e+\t*") g.append(path) for sname in ["a", "b", "c", "e", "f"]: self.assertEqual([path], s[sname].paths) self.assertEqual([], s["d"].paths) for name in ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"]: l[name] = gfapy.Line("\t".join(list("L{}*".format(name)))) g.append(l[name]) for lname in ["a+b+", "b+c-", "e-c+", "a-f-"]: self.assertEqual([path], l[lname].paths) self.assertEqual([], l["c-d+"].paths) # disconnection effects path.disconnect() for lname in ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"]: self.assertEqual([], l[lname].paths) for sname in ["a", "b", "c", "d", "e", "f"]: self.assertEqual([], s[sname].paths) # reconnection path.connect(g) for sname in ["a", "b", "c", "e", "f"]: self.assertEqual([path], s[sname].paths) self.assertEqual([], s["d"].paths) for lname in ["a+b+", "b+c-", "e-c+", "a-f-"]: self.assertEqual([path], l[lname].paths) self.assertEqual([], l["c-d+"].paths)
def test_disconnect_unregisters_line(self): s1 = gfapy.Line("S\t1\tACCAT") g = gfapy.Gfa() g.append(s1) self.assertEqual([s1], g.segments) s1.disconnect() self.assertEqual([], g.segments)
def test_L_to_E(self): g = gfapy.Gfa(version="gfa1") g.add_line("S\t1\t*\tLN:i:100") g.add_line("S\t2\t*\tLN:i:100") g.add_line("S\t3\t*\tLN:i:100") g.add_line("S\t4\t*\tLN:i:100") g.add_line("L\t1\t+\t2\t+\t10M") g.add_line("L\t1\t-\t2\t-\t20M") g.add_line("L\t3\t-\t4\t+\t30M") g.add_line("L\t3\t+\t4\t-\t40M") dovetails_gfa1 = g.dovetails dovetails_gfa2 = {dovetails_gfa1[0].to_gfa2_s(), dovetails_gfa1[1].to_gfa2_s(), dovetails_gfa1[2].to_gfa2_s(), dovetails_gfa1[3].to_gfa2_s()} expected_dovetails_gfa2 = { "E 5 1+ 2+ 90 100$ 0 10 10M", "E 6 1- 2- 0 20 80 100$ 20M", "E 7 3- 4+ 0 30 0 30 30M", "E 8 3+ 4- 60 100$ 60 100$ 40M"} try: self.assertEqual(expected_dovetails_gfa2, dovetails_gfa2) except: # sometimes 7 and 8 are assigned with a different order # despite using a fixed hash seed in the tests expected_dovetails_gfa2 = { "E 5 1+ 2+ 90 100$ 0 10 10M", "E 6 1- 2- 0 20 80 100$ 20M", "E 8 3- 4+ 0 30 0 30 30M", "E 7 3+ 4- 60 100$ 60 100$ 40M"} self.assertEqual(expected_dovetails_gfa2, dovetails_gfa2) assert(isinstance(g.dovetails[0].to_gfa1(),gfapy.line.edge.Link)) assert(isinstance(g.dovetails[0].to_gfa2(),gfapy.line.edge.GFA2))
def test_gaps_backreferences(self): g = gfapy.Gfa() sa = gfapy.Line("S\ta\t100\t*") g.append(sa) # gaps s = {} gap = {} for name in ["b", "c", "d", "e", "f", "g", "h", "i"]: s[name] = gfapy.Line("S\t{}\t100\t*".format(name)) g.append(s[name]) for name in \ ["a+b+", "a+c-", "a-d+", "a-e-", "f+a+", "g+a-", "h-a+", "i-a-"]: gap[name] = gfapy.Line("\t".join( ["G","*",name[0:2],name[2:4],"200","*"])) g.append(gap[name]) # gaps_[LR]() self.assertEqual([gap["a-d+"], gap["a-e-"], gap["f+a+"], gap["h-a+"]], sa.gaps_L) self.assertEqual([gap["a+b+"], gap["a+c-"], gap["g+a-"], gap["i-a-"]], sa.gaps_R) # gaps() self.assertEqual(sa.gaps_L, sa.gaps_of_end("L")) self.assertEqual(sa.gaps_R, sa.gaps_of_end("R")) self.assertEqual(sa.gaps_L + sa.gaps_R, sa.gaps) # disconnection effects gap["a-d+"].disconnect() self.assertEqual([gap["a-e-"], gap["f+a+"], gap["h-a+"]], sa.gaps_L) sa.disconnect() self.assertEqual([], sa.gaps_L) self.assertEqual([], sa.gaps_R) self.assertEqual([], sa.gaps_of_end("L")) self.assertEqual([], sa.gaps_of_end("R")) self.assertEqual([], sa.gaps)
def test_header_add(self): gfa = gfapy.Gfa() gfa.append("H\tVN:Z:1.0") gfa.append("H\taa:i:12\tab:Z:test1") gfa.append("H\tac:Z:test2") gfa.header.add("aa", 15) self.assertSetEqual( set([ "H\tVN:Z:1.0", "H\taa:i:12", "H\taa:i:15", "H\tab:Z:test1", "H\tac:Z:test2", ]), set([str(x) for x in gfa.headers])) gfa.header.add("aa", 16) self.assertSetEqual( set([ "H\tVN:Z:1.0", "H\taa:i:12", "H\taa:i:15", "H\taa:i:16", "H\tab:Z:test1", "H\tac:Z:test2", ]), set([str(x) for x in gfa.headers])) gfa.header.delete("aa") gfa.header.aa = 26 self.assertEqual( set([ "H\tVN:Z:1.0", "H\taa:i:26", "H\tab:Z:test1", "H\tac:Z:test2", ]), set([str(x) for x in gfa.headers]))
def test_unordered_groups_create_virtual_unknown_records(self): g = gfapy.Gfa(version="gfa2") set = gfapy.Line("U\tset\tchildpath b childset edge") g.append(set) for i in set.items: assert (i.virtual) self.assertEqual("\n", i.record_type) childpath = gfapy.Line("O\tchildpath\tf+ a+") g.append(childpath) assert (not set.items[0].virtual) self.assertEqual(childpath, set.items[0]) self.assertEqual([set], childpath.sets) sB = gfapy.Line("S\tb\t1000\t*") g.append(sB) assert (not set.items[1].virtual) self.assertEqual(sB, set.items[1]) self.assertEqual([set], sB.sets) childset = gfapy.Line("U\tchildset\tg edge2") g.append(childset) assert (not set.items[2].virtual) self.assertEqual(childset, set.items[2]) self.assertEqual([set], childset.sets) edge = gfapy.Line("E\tedge\te-\tc+\t0\t100\t900\t1000$\t*") g.append(edge) assert (not set.items[3].virtual) self.assertEqual(edge, set.items[3]) self.assertEqual([set], edge.sets)
def test_paths_create_virtual_links(self): g = gfapy.Gfa(version="gfa1") path = gfapy.Line("P\tp1\tb+,ccc-,e+\t10M1I2M,15M") g.append(path) for i in path.segment_names: assert (i.line.virtual) self.assertEqual(set(["b", "ccc", "e"]), set([x.name for x in g.segments])) sB = gfapy.Line("S\tb\t*") g.append(sB) assert (not path.segment_names[0].line.virtual) self.assertEqual(sB, path.segment_names[0].line) self.assertEqual([path], sB.paths) for i in path.links: assert (i.line.virtual) l = gfapy.Line("L\tccc\t+\tb\t-\t2M1D10M") g.append(l) assert (not path.links[0].line.virtual) self.assertEqual(l, path.links[0].line) self.assertEqual([path], l.paths) l = gfapy.Line("L\tccc\t-\te\t+\t15M") g.append(l) assert (not path.links[1].line.virtual) self.assertEqual(l, path.links[1].line) self.assertEqual([path], l.paths)
def test_extensions(self): g = gfapy.Gfa(version="gfa2", vlevel=0) MetagenomicAssignment(["M", "*", "N12", "C", "SC:i:20"]) sA = gfapy.Line("S\tA\t1000\t*") g.append(sA) tB12 = gfapy.Line("T\tB12_c") g.append(tB12) m1 = gfapy.Line("M\t1\ttaxon:123\tA\tSC:i:40\txx:Z:cjaks536") g.append(m1) m2 = gfapy.Line("M\t2\ttaxon:123\tB\txx:Z:cga5r5cs") g.append(m2) sB = gfapy.Line("S\tB\t1000\t*") g.append(sB) mx = gfapy.Line("M\t*\tB12_c\tB\tSC:i:20") g.append(mx) t123 = gfapy.Line("T\ttaxon:123\tUL:Z:http://www.taxon123.com") g.append(t123) self.assertEqual(MetagenomicAssignment, m1.__class__) self.assertEqual(Taxon, tB12.__class__) self.assertEqual("1", m1.mid) assert (gfapy.is_placeholder(mx.mid)) self.assertEqual(t123, m1.tid) self.assertEqual(sA, m1.sid) self.assertEqual("cjaks536", m1.xx) self.assertEqual([m2, mx], sB.metagenomic_assignments) self.assertEqual([m1, m2], t123.metagenomic_assignments) self.assertEqual("taxon:123", t123.tid) self.assertEqual("http://www.taxon123.com", t123.UL)
def test_delete_links(self): gfa = gfapy.Gfa() s = ["S\t0\t*", "S\t1\t*", "S\t2\t*"] l = "L\t1\t+\t2\t+\t12M" c = "C\t1\t+\t0\t+\t12\t12M" for line in (s + [l, c]): gfa.append(line) self.assertEqual([l], [str(x) for x in gfa.dovetails]) self.assertEqual([l], [str(x) for x in \ gfa.segment("1").end_relations("R", ["2", "L"])]) for x in gfa.segment("1").oriented_relations("+", \ gfapy.OrientedLine("2", "+")): x.disconnect() self.assertEqual([], gfa.dovetails) self.assertEqual([], gfa.segment("1").end_relations("R", ["2", "L"])) self.assertEqual([c], [str(x) for x in gfa.containments]) self.assertEqual( c, str( gfa.segment("1").relations_to(gfa.segment("0"), "edges_to_contained")[0])) gfa.append(l) self.assertNotEqual([], gfa.dovetails) for x in gfa.segment("1").oriented_relations("+", \ gfapy.OrientedLine("2", "+")): x.disconnect() self.assertEqual([], gfa.dovetails)
def test_error_inconsistent_definitions(self): g = gfapy.Gfa() g.add_line("H\txx:i:1") g.add_line("H\txx:i:2") # nothing raised g.add_line("H\tTS:i:120") g.add_line("H\tTS:i:120") # nothing raised self.assertRaises(gfapy.InconsistencyError, g.add_line, "H\tTS:i:122")
def main(): global C, g, args #maybe clean all the unnatached segments output = gfapy.Gfa() output.add_line(g.header) all_segments = [output.add_line(str(line)) for line in g.segments] links = [str(line) for line in g.edges] for x in range(len(links) - 1): link = links[x] kmerA = link.split("\t") k = kmerA[5] dB = kmerA[4] kmerB = kmerA[3] kmerA = kmerA[1] kA, kB = kmerA, kmerB kA = kA.split("A:") kA = kA[1] kA = kA.split(',B:') kA[1] = kA[1].split(")")[0] kB = kB.split("A:") kB = kB[1] kB = kB.split(',B:') kB[1] = kB[1].split(")")[0] coverageA = abs(int(kA[0]) - int(kA[1])) coverageB = abs(int(kB[0]) - int(kB[1])) DifEx = (coverageA + coverageB) / 2 if DifEx < C: pass else: output.add_line("L\t%s\t+\t%s\t%s\t%s\tKC:i:%d" % (kmerA, kmerB, dB, k, int(DifEx))) filename = os.path.join("Parser_Output", args.output) output.to_file(filename)
def test_wrong_version_in_header(self): hother = "H\taa:A:a\tff:f:1.1" hv2 = "H\tzz:Z:test\tVN:Z:2.0\tii:i:11" gfa = gfapy.Gfa(version="gfa1") gfa.add_line(hother) self.assertEqual("gfa1", gfa.version) self.assertRaises(gfapy.VersionError, gfa.add_line, hv2)
def test_delete_segment(self): gfa = gfapy.Gfa() gfa.append("H\tVN:Z:1.0") s = ["S\t0\t*", "S\t1\t*", "S\t2\t*"] l = "L\t1\t+\t2\t+\t12M" c = "C\t1\t+\t0\t+\t12\t12M" p = "P\t4\t2+,0-\t12M" for line in (s + [l, c, p]): gfa.append(line) self.assertEqual(set(s), set([str(x) for x in gfa.segments])) self.assertEqual(set(["0", "1", "2"]), set(gfa.segment_names)) self.assertEqual([l], [str(x) for x in gfa.dovetails if not x.virtual]) self.assertEqual([c], [str(x) for x in gfa.containments]) self.assertEqual([p], [str(x) for x in gfa.paths]) self.assertEqual(["4"], gfa.path_names) gfa.segment("0").disconnect() self.assertEqual(set([s[1], s[2]]), set([str(x) for x in gfa.segments])) self.assertEqual(set(["1", "2"]), set(gfa.segment_names)) self.assertEqual([l], [str(x) for x in gfa.dovetails if not x.virtual]) self.assertEqual([], [str(x) for x in gfa.containments]) self.assertEqual([], [str(x) for x in gfa.paths]) self.assertEqual([], gfa.path_names) gfa.segment("1").disconnect() self.assertEqual([s[2]], [str(x) for x in gfa.segments]) self.assertEqual([], gfa.dovetails) gfa.rm("2") self.assertEqual([], gfa.segments)
def test_unknown_version_in_header(self): hother = "H\taa:A:a\tff:f:1.1" hvx = "H\tzz:Z:test\tVN:Z:x.x\tii:i:11" gfa = gfapy.Gfa() gfa.add_line(hother) self.assertEqual(None, gfa.version) self.assertRaises(gfapy.VersionError, gfa.add_line, hvx)
def read_gfa_from_handler(handler): gfa = gfapy.Gfa() for line in handler: gfa.append(line) return gfa
def test_register_line_merge(self): g = gfapy.Gfa(version="gfa1") l = gfapy.line.Header({"xx": 1}, version="gfa1") l._gfa = g g._register_line(l) self.assertEqual(1, g.header.xx) self.assertRaises(gfapy.AssertionError, g._unregister_line, l)
def test_paths_references(self): g = gfapy.Gfa() s = {}; l = {} for name in ["a", "b", "c", "d", "e", "f"]: s[name] = gfapy.Line("S\t{}\t*".format(name)) g.append(s[name]) path = gfapy.Line("P\tp1\tf+,a+,b+,c-,e+\t*") self.assertEqual([gfapy.OrientedLine("f","+"), gfapy.OrientedLine("a","+"), gfapy.OrientedLine("b","+"), gfapy.OrientedLine("c","-"), gfapy.OrientedLine("e","+")], path.segment_names) self.assertEqual([], path.links) # connection g.append(path) # add links for name in ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-"]: l[name] = gfapy.Line("\t".join((list("L{}*".format(name))))) g.append(l[name]) # segment_names self.assertEqual([gfapy.OrientedLine(s["f"],"+"), gfapy.OrientedLine(s["a"],"+"), gfapy.OrientedLine(s["b"],"+"), gfapy.OrientedLine(s["c"],"-"), gfapy.OrientedLine(s["e"],"+")], path.segment_names) # links self.assertEqual([gfapy.OrientedLine(l["a-f-"],"-"), gfapy.OrientedLine(l["a+b+"],"+"), gfapy.OrientedLine(l["b+c-"],"+"), gfapy.OrientedLine(l["e-c+"],"-")], path.links) # path disconnection path.disconnect() self.assertEqual([gfapy.OrientedLine("f","+"), gfapy.OrientedLine("a","+"), gfapy.OrientedLine("b","+"), gfapy.OrientedLine("c","-"), gfapy.OrientedLine("e","+")], path.segment_names) self.assertEqual([], path.links) g.append(path) # links disconnection cascades on paths: assert(path.is_connected()) l["a-f-"].disconnect() assert(not path.is_connected()) self.assertEqual([gfapy.OrientedLine("f","+"), gfapy.OrientedLine("a","+"), gfapy.OrientedLine("b","+"), gfapy.OrientedLine("c","-"), gfapy.OrientedLine("e","+")], path.segment_names) g.append(path) g.append(l["a-f-"]) # segment disconnection cascades on links and then paths: assert(path.is_connected()) s["a"].disconnect() assert(not path.is_connected()) self.assertEqual([gfapy.OrientedLine("f","+"), gfapy.OrientedLine("a","+"), gfapy.OrientedLine("b","+"), gfapy.OrientedLine("c","-"), gfapy.OrientedLine("e","+")], path.segment_names) self.assertEqual([], path.links)
def test_header_version_editing(self): standalone = gfapy.Line("H\txx:i:1\tVN:Z:1.0") standalone.VN = "2.0" # nothing raised g = gfapy.Gfa() g.add_line("H\txx:i:1\tVN:Z:1.0") g.header.xx = 2 # nothing raised with self.assertRaises(gfapy.RuntimeError): g.header.VN = "2.0"
def test_conflicting_versions_in_header(self): hother = "H\taa:A:a\tff:f:1.1" hv1 = "H\tzz:Z:test\tVN:Z:1.0\tii:i:11" hv2 = "H\tzz:Z:test\tVN:Z:2.0\tii:i:11" gfa = gfapy.Gfa() gfa.add_line(hother) gfa.add_line(hv1) self.assertRaises(gfapy.VersionError, gfa.add_line, hv2)
def test_connected_and_gfa(self): s1 = gfapy.Line("S\t1\tACCAT") assert (not s1.is_connected()) self.assertEqual(None, s1.gfa) g = gfapy.Gfa() g.append(s1) assert (s1.is_connected()) assert (g is s1.gfa)
def test_register_line_unnamed(self): g = gfapy.Gfa(version="gfa1") l = gfapy.line.edge.Link({}, version="gfa1") l._gfa = g g._register_line(l) self.assertEqual([l], g.dovetails) g._unregister_line(l) self.assertEqual([], g.dovetails)
def test_connect(self): s2 = gfapy.Line("S\t2\tACCAT") assert (not s2.is_connected()) self.assertEqual(None, s2.gfa) g = gfapy.Gfa() s2.connect(g) assert (s2.is_connected()) assert (g is s2.gfa)
def test_auto_select_distribute_end_eq_factor(self): g = gfapy.Gfa() # one =, one > factor self.assertEqual("L", g._auto_select_distribute_end(4, 4, 5, False)) self.assertEqual("R", g._auto_select_distribute_end(4, 5, 4, False)) # one =, one < factor self.assertEqual("L", g._auto_select_distribute_end(4, 4, 3, False)) self.assertEqual("R", g._auto_select_distribute_end(4, 3, 4, False))
def test_GFA2_header(self): hother = "H\taa:A:a\tff:f:1.1" hv2 = "H\tzz:Z:test\tVN:Z:2.0\tii:i:11" gfa = gfapy.Gfa() gfa.add_line(hother) self.assertEqual(None, gfa.version) gfa.add_line(hv2) self.assertEqual("gfa2", gfa.version)
def test_gfa_single_def_tags(self): g = gfapy.Gfa() g.add_line("H\txx:i:1") self.assertEqual(["xx"], g.header.tagnames) self.assertEqual(1, g.header.xx) g.header.set("xx", 12) self.assertEqual(12, g.header.xx) g.header.delete("xx") self.assertEqual(None, g.header.xx)