def _find_edge_from_path_to_segment(self, path, oriented_segment): edges = [] for edge in oriented_segment.line.edges: if (edge.sid1 == oriented_segment and edge.sid2 == path[-1]) or \ (edge.sid1 == path[-1] and edge.sid2 == oriented_segment): edges.append(gfapy.OrientedLine(edge, "+")) elif (edge.sid1 == oriented_segment.inverted() and edge.sid2 == path[-1].inverted()) or\ (edge.sid1 == path[-1].inverted() and edge.sid2 == oriented_segment.inverted()): edges.append(gfapy.OrientedLine(edge, "-")) if len(edges) == 0: raise gfapy.NotFoundError( "Path is not valid, segments are not contiguous\n" + "Line: {}\n".format(self) + "Previous elements:\n" + "".join([" {} ({})\n".format(e, e.line) for e in path]) + "Current element:\n" + " {} ({})\n".format(oriented_segment, oriented_segment.line)) elif len(edges) > 1: raise gfapy.NotUniqueError( "Path is not unique\n" + "Line: {}\n".format(self) + "Previous elements:\n" + "".join([" {} ({})\n".format(e, e.line) for e in path]) + "Current element:\n" + " {} ({})\n".format(oriented_segment, oriented_segment.line) + "Possible edges\n" + "".join([" {} ({})\n".format(e, e.line) for e in edges])) return edges[0]
def test_delete_links(self): gfa = gfapy.Gfa() s = ["S\t0\t*", "S\t1\t*", "S\t2\t*"] l = "L\t1\t+\t2\t+\t12M" c = "C\t1\t+\t0\t+\t12\t12M" for line in (s + [l, c]): gfa.append(line) self.assertEqual([l], [str(x) for x in gfa.dovetails]) self.assertEqual([l], [str(x) for x in \ gfa.segment("1").end_relations("R", ["2", "L"])]) for x in gfa.segment("1").oriented_relations("+", \ gfapy.OrientedLine("2", "+")): x.disconnect() self.assertEqual([], gfa.dovetails) self.assertEqual([], gfa.segment("1").end_relations("R", ["2", "L"])) self.assertEqual([c], [str(x) for x in gfa.containments]) self.assertEqual( c, str( gfa.segment("1").relations_to(gfa.segment("0"), "edges_to_contained")[0])) gfa.append(l) self.assertNotEqual([], gfa.dovetails) for x in gfa.segment("1").oriented_relations("+", \ gfapy.OrientedLine("2", "+")): x.disconnect() self.assertEqual([], gfa.dovetails)
def test_validate(self): gfapy.OrientedLine("a", "+").validate() gfapy.OrientedLine(gfapy.Line("S\tb\t*\txx:Z:1.0"), "-").validate() self.assertRaises(gfapy.ValueError, gfapy.OrientedLine("a", "*").validate) self.assertRaises(gfapy.TypeError, gfapy.OrientedLine([], "+").validate) self.assertRaises(gfapy.FormatError, gfapy.OrientedLine("a\ta", "+").validate)
def test_P(self): fields = ["P", "4", "1+,2-,3+", "9M2I3D1M,12M", "ab:Z:abcd"] s = "\t".join(fields) gfapy.Line(s) # nothing raised self.assertEqual(gfapy.line.group.Path, gfapy.Line(s).__class__) self.assertEqual(fields[0], gfapy.Line(s).record_type) self.assertEqual(fields[1], gfapy.Line(s).path_name) self.assertEqual([ gfapy.OrientedLine("1", "+"), gfapy.OrientedLine("2", "-"), gfapy.OrientedLine("3", "+") ], gfapy.Line(s).segment_names) self.assertEqual([[ gfapy.CIGAR.Operation(9, "M"), gfapy.CIGAR.Operation(2, "I"), gfapy.CIGAR.Operation(3, "D"), gfapy.CIGAR.Operation(1, "M") ], [gfapy.CIGAR.Operation(12, "M")]], gfapy.Line(s).overlaps) self.assertEqual("abcd", gfapy.Line(s).ab) with self.assertRaises(gfapy.FormatError): (str + gfapy.Line("\tH1")) with self.assertRaises(gfapy.FormatError): gfapy.Line("P\tH") with self.assertRaises(gfapy.FormatError): f = fields.copy() f[2] = "1,2,3" gfapy.Line("\t".join(f), vlevel=1) with self.assertRaises(gfapy.InconsistencyError): f = fields.copy() f[2] = "1+,2+" f[3] = "9M,12M,3M" gfapy.Line("\t".join(f), vlevel=1) f = fields.copy() f[3] = "*,*" gfapy.Line("\t".join(f), vlevel=1) f = fields.copy() f[3] = "9M2I3D1M,12M,12M" gfapy.Line("\t".join(f), vlevel=3) f = fields.copy() f[3] = "*" gfapy.Line("\t".join(f), vlevel=1) with self.assertRaises(gfapy.FormatError): f = fields.copy() f[3] = "12,12" gfapy.Line("\t".join(f), vlevel=1) with self.assertRaises(gfapy.FormatError): f = fields.copy() f[3] = "12M|12M" gfapy.Line("\t".join(f), vlevel=1)
def test_inverted(self): os = gfapy.OrientedLine("a", "+") inv_os = os.inverted() self.assertEqual("a", inv_os.line) self.assertEqual("+", os.orient) self.assertEqual("-", inv_os.orient) s = gfapy.Line("S\tb\t*\txx:Z:1.0") os = gfapy.OrientedLine(s, "-") inv_os = os.inverted() self.assertEqual(s, inv_os.line) self.assertEqual("-", os.orient) self.assertEqual("+", inv_os.orient) os = gfapy.OrientedLine("a", "*") self.assertRaises(gfapy.ValueError, os.invert)
def test_disconnect_removes_nonfield_references(self): s1 = gfapy.Line("S\t1\tACCAT") s2 = gfapy.Line("S\t2\tCATGG") s3 = gfapy.Line("S\t3\tTGGAA") l12 = gfapy.Line("L\t1\t+\t2\t+\t*") l23 = gfapy.Line("L\t2\t+\t3\t+\t*") p4 = gfapy.Line("P\t4\t1+,2+,3+\t*") g = gfapy.Gfa() for line in [s1, s2, s3, l12, l23, p4]: g.append(line) self.assertEqual( [gfapy.OrientedLine(l12, "+"), gfapy.OrientedLine(l23, "+")], p4.links) p4.disconnect() self.assertEqual([], p4.links)
def oriented_relations(self, orientation, oriented_segment, collection="edges"): return [e for e in getattr(self, collection) if \ (e.other_oriented_segment(gfapy.OrientedLine(self, orientation), tolerant=True) == \ oriented_segment)]
def _initialize_links(self): self._refs["links"] = [] for from_segment, to_segment, cigar in self._compute_required_links(): l = None orient = "+" if self._gfa.segment(from_segment.line) and self._gfa.segment( to_segment.line): l = self._gfa._search_link(from_segment, to_segment, cigar) if l is not None and l.is_compatible_complement( from_segment, to_segment, cigar): orient = "-" if l is None: if self._gfa._segments_first_order: raise gfapy.NotFoundError( "Path: {}\n".format(self) + "requires a non-existing link:\n" + "from={} to={} cigar={}".format( from_segment, to_segment, cigar)) l = gfapy.line.edge.Link( { "from_segment": from_segment.line, "from_orient": from_segment.orient, "to_segment": to_segment.line, "to_orient": to_segment.orient, "overlap": cigar }, virtual=True, version="gfa1") l.connect(self._gfa) self._refs["links"].append(gfapy.OrientedLine(l, orient)) l._add_reference(self, "paths")
def test_block(self): a = gfapy.OrientedLine("a", "+") a._block() with self.assertRaises(gfapy.RuntimeError): a.line = "b" a._unblock() a.line = "b"
def _import_field_references(self, previous): for sid in ["sid1", "sid2"]: self._set_existing_field(sid, gfapy.OrientedLine( self._gfa.segment(self.get(sid).line), self.get(sid).orient), set_reference=True)
def oriented_to(self): """ Returns ------- gfapy.OrientedLine The oriented segment represented by the to_segment/to_orient fields. """ return gfapy.OrientedLine(self.to_segment, self.to_orient)
def oriented_from(self): """ Returns ------- gfapy.OrientedLine The oriented segment represented by the from_segment/from_orient fields. """ return gfapy.OrientedLine(self.from_segment, self.from_orient)
def validate_decoded(iterable): for elem in iterable: elem = gfapy.OrientedLine(elem) elem.validate() if not re.match(r"^[!-)+-<>-~][!-~]*$", elem.name): raise gfapy.FormatError( "{} is not a valid GFA1 segment name\n".format(elem.name)+ "(it does not match [!-)+-<>-~][!-~]*)")
def unsafe_encode(obj): if isinstance(obj, str): return obj elif isinstance(obj, list): return ",".join([str(gfapy.OrientedLine(os)) for os in obj]) else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: str, list)")
def test_equal(self): a = gfapy.OrientedLine("a", "+") b = gfapy.OrientedLine(gfapy.Line("S\ta\t*"), "+") c = gfapy.OrientedLine("a", "-") self.assertEqual(a, b) self.assertNotEqual(a, c) # line itself is not checked for equiv, only name: b2 = gfapy.OrientedLine(gfapy.Line("S\ta\tCACAC"), "+") self.assertEqual(b, b2) # equivalence to string: self.assertEqual("a+", a) self.assertEqual("a+", b) self.assertEqual(a, "a+") self.assertEqual(b, "a+") # equivalence to list: self.assertEqual(a, ["a", "+"]) self.assertEqual(b, ["a", "+"]) self.assertEqual(["a", "+"], a) self.assertEqual(["a", "+"], b)
def test_edges_references(self): g = gfapy.Gfa() lab = gfapy.Line("E\t*\ta+\tb+\t0\t10\t90\t100$\t*") self.assertEqual(gfapy.OrientedLine("a", "+"), lab.sid1) self.assertEqual(gfapy.OrientedLine("b", "+"), lab.sid2) sa = gfapy.Line("S\ta\t100\t*") g.append(sa) sb = gfapy.Line("S\tb\t100\t*") g.append(sb) g.append(lab) self.assertEqual(sa, lab.sid1.line) self.assertEqual(sb, lab.sid2.line) lab.disconnect() self.assertEqual("a", lab.sid1.line) self.assertEqual("b", lab.sid2.line) # disconnection of segment cascades on edges g.append(lab) assert (lab.is_connected()) self.assertEqual(sa, lab.sid1.line) sa.disconnect() assert (not lab.is_connected()) self.assertEqual("a", lab.sid1.line)
def test_gap_references(self): g = gfapy.Gfa() gap = gfapy.Line("G\t*\ta+\tb+\t90\t*") self.assertEqual(gfapy.OrientedLine("a","+"), gap.sid1) self.assertEqual(gfapy.OrientedLine("b","+"), gap.sid2) sa = gfapy.Line("S\ta\t100\t*"); g.append(sa) sb = gfapy.Line("S\tb\t100\t*"); g.append(sb) g.append(gap) self.assertEqual(sa, gap.sid1.line) self.assertEqual(sb, gap.sid2.line) gap.disconnect() self.assertEqual("a", gap.sid1.line) self.assertEqual("b", gap.sid2.line) # disconnection of segment cascades on gaps g.append(gap) assert(gap.is_connected()) self.assertEqual(sa, gap.sid1.line) sa.disconnect() assert(not gap.is_connected()) self.assertEqual("a", gap.sid1.line)
def test_register_line_external(self): g = gfapy.Gfa(version="gfa2") l = gfapy.line.Fragment({"external": gfapy.OrientedLine("x","+")}, version="gfa2") l._gfa = g g._register_line(l) self.assertEqual([l], g.fragments) self.assertEqual([l], g.fragments_for_external("x")) self.assertEqual(["x"], g.external_names) g._unregister_line(l) self.assertEqual([], g.fragments) self.assertEqual([], g.fragments_for_external("x")) self.assertEqual([], g.external_names)
def test_properties(self): a = gfapy.OrientedLine("a", "+") self.assertEqual("a", a.line) self.assertEqual("+", a.orient) self.assertEqual("a", a.name) s = gfapy.Line("S\tb\t*\txx:Z:1.0") a.line = s self.assertEqual(s, a.line) self.assertEqual("b", a.name) self.assertEqual("+", a.orient) a.orient = "-" self.assertEqual(s, a.line) self.assertEqual("-", a.orient)
def test_fragments_references(self): g = gfapy.Gfa() f = gfapy.Line("F\ta\tf+\t0\t200\t281\t502$\t*") self.assertEqual("a", f.sid) self.assertEqual(gfapy.OrientedLine("f","+"), f.external) sa = gfapy.Line("S\ta\t100\t*") g.append(sa) g.append(f) self.assertEqual(sa, f.sid) f.disconnect() self.assertEqual("a", f.sid) # disconnection of segment cascades on fragments g.append(f) assert(f.is_connected()) self.assertEqual(sa, f.sid) sa.disconnect() assert(not f.is_connected()) self.assertEqual("a", f.sid)
def test_init(self): a = gfapy.OrientedLine("a", "+") # no validation on creation: (invalid orientation) gfapy.OrientedLine("a", "*") # no validation on creation: (invalid line name) gfapy.OrientedLine("a\ta", "+") b = gfapy.OrientedLine("a+") self.assertEqual(a, b) c = gfapy.OrientedLine(["a", "+"]) self.assertEqual(a, c) self.assertRaises(IndexError, gfapy.OrientedLine, []) self.assertRaises(IndexError, gfapy.OrientedLine, ["a"]) # nothing raised, if too many args are provided (further are ignored) gfapy.OrientedLine(["a", "+", 1])
def _initialize_references(self): for snum in [1, 2]: sid = "sid{}".format(snum) orient = self.get(sid).orient linesymbol = self.get(sid).line s = self._gfa.segment(linesymbol) if s is None: if self._gfa._segments_first_order: raise gfapy.NotFoundError() s = gfapy.line.segment.GFA2( { "sid": linesymbol, "slen": 1, "sequence": "*" }, version="gfa2", virtual=True) s.connect(self._gfa) self._set_existing_field(sid, gfapy.OrientedLine(s, orient), set_reference=True) s._add_reference(self, self._refkey_for_s(snum))
def _initialize_references(self): st1 = self._substring_type(self.beg1, self.end1)[0] st2 = self._substring_type(self.beg2, self.end2)[0] for snum in [1, 2]: sid = "sid{}".format(snum) orient = self.get(sid).orient s = self._gfa.segment(self.get(sid).line) if s is None: if self._gfa._segments_first_order: raise gfapy.NotFoundError() s = gfapy.line.segment.GFA2( { "sid": self.get(sid).line, "slen": 1, "sequence": "*" }, version="gfa2", virtual=True) s.connect(self._gfa) self._set_existing_field(sid, gfapy.OrientedLine(s, orient), set_reference=True) s._add_reference(self, self._refkey_for_s(snum, st1, st2))
def test_str(self): self.assertEqual("a-", str(gfapy.OrientedLine("a", "-"))) s = gfapy.Line("S\tb\t*\txx:Z:1.0") self.assertEqual("b+", str(gfapy.OrientedLine(s, "+")))
def test_select_by_hash_gfa2(self): # search segments self.assertEqual( set(TestAPILinesFinders.l_gfa2_a[0:2]), set([ str(x) for x in TestAPILinesFinders.gfa2.select({ "record_type": "S", "sequence": "CGAT" }) ])) self.assertEqual( TestAPILinesFinders.l_gfa2[1:2], TestAPILinesFinders.gfa2.select({ "record_type": "S", "slen": 110 })) # search edges self.assertEqual( TestAPILinesFinders.l_gfa2[2:3], TestAPILinesFinders.gfa2.select({ "record_type": "E", "sid1": gfapy.OrientedLine("1", "+") })) # search gaps self.assertEqual( TestAPILinesFinders.l_gfa2[3:4], TestAPILinesFinders.gfa2.select({ "record_type": "G", "sid1": gfapy.OrientedLine("1", "-") })) self.assertEqual( TestAPILinesFinders.l_gfa2[11:12], TestAPILinesFinders.gfa2.select({ "record_type": "G", "disp": 2000 })) # search paths self.assertEqual( TestAPILinesFinders.l_gfa2[4:5], TestAPILinesFinders.gfa2.select({ "record_type": "O", "items": "1+ 2-" })) # search sets self.assertEqual( TestAPILinesFinders.l_gfa2[5:6], TestAPILinesFinders.gfa2.select({ "record_type": "U", "name": "u1" })) # search fragments self.assertEqual( TestAPILinesFinders.l_gfa2[6:9], TestAPILinesFinders.gfa2.select({ "record_type": "F", "external": "read1-" })) # search custom records self.assertEqual( TestAPILinesFinders.l_gfa2[9:10], TestAPILinesFinders.gfa2.select({ "record_type": "X", "xx": "A" }))
def unsafe_decode(string): return gfapy.OrientedLine(string[:-1], string[-1])
def unsafe_decode(string): return [ gfapy.OrientedLine(str(l[0:-1]), str(l[-1])) for l in string.split(",")]
def test_gfa2_paths_references(self): g = gfapy.Gfa() s = {} for name in ["a", "b", "c", "d", "e", "f"]: s[name] = gfapy.Line("S\t{}\t1000\t*".format(name)) g.append(s[name]) path1_part1 = gfapy.Line("O\tp1\tp2- b+") path1_part2 = gfapy.Line("O\tp1\tc- e-c+-") path1 = path1_part2 path2 = gfapy.Line("O\tp2\tf+ a+") self.assertEqual([gfapy.OrientedLine("p2","-"), gfapy.OrientedLine("b","+")], path1_part1.items) self.assertEqual([gfapy.OrientedLine("c","-"), gfapy.OrientedLine("e-c+","-")], path1_part2.items) self.assertEqual([gfapy.OrientedLine("f","+"), gfapy.OrientedLine("a","+")], path2.items) with self.assertRaises(gfapy.RuntimeError): path1.captured_path with self.assertRaises(gfapy.RuntimeError): path2.captured_path # connection g.append(path1_part1) g.append(path1_part2) g.append(path2) # edges e = {} for name in ["a+b+", "b+c-", "c-d+", "e-c+", "a-f-", "f-b+"]: coord1 = "900\t1000$" if (name[1] == "+") else "0\t100" coord2 = "0\t100" if (name[3] == "+") else "900\t1000$" e[name] = gfapy.Line("E\t{}\t{}\t{}\t{}\t{}\t100M".format(name,name[0:2],name[2:4],coord1,coord2)) g.append(e[name]) # items self.assertEqual([gfapy.OrientedLine(path2,"-"), gfapy.OrientedLine(s["b"],"+"), gfapy.OrientedLine(s["c"],"-"), gfapy.OrientedLine(e["e-c+"],"-")], path1.items) self.assertEqual([gfapy.OrientedLine(s["f"],"+"), gfapy.OrientedLine(s["a"],"+")], path2.items) # induced set self.assertEqual([gfapy.OrientedLine(s["f"],"+"), gfapy.OrientedLine(e["a-f-"],"-"), gfapy.OrientedLine(s["a"],"+")], path2.captured_path) self.assertEqual([gfapy.OrientedLine(s["a"],"-"), gfapy.OrientedLine(e["a-f-"],"+"), gfapy.OrientedLine(s["f"],"-"), gfapy.OrientedLine(e["f-b+"],"+"), gfapy.OrientedLine(s["b"],"+"), gfapy.OrientedLine(e["b+c-"],"+"), gfapy.OrientedLine(s["c"],"-"), gfapy.OrientedLine(e["e-c+"],"-"), gfapy.OrientedLine(s["e"],"+")], path1.captured_path) # backreferences for line in [path2, s["b"], s["c"], e["e-c+"]]: self.assertEqual([path1], line.paths) for line in [s["f"], s["a"]]: self.assertEqual([path2], line.paths) # group disconnection path1.disconnect() self.assertEqual([gfapy.OrientedLine("p2","-"), gfapy.OrientedLine("b","+"), gfapy.OrientedLine("c","-"), gfapy.OrientedLine("e-c+","-")], path1.items) with self.assertRaises(gfapy.RuntimeError): path1.captured_path self.assertEqual([gfapy.OrientedLine(s["f"],"+"), gfapy.OrientedLine(s["a"],"+")], path2.items) for line in [path2, s["b"], s["c"], e["e-c+"]]: self.assertEqual([], line.paths) # group reconnection g.append(path1) self.assertEqual([gfapy.OrientedLine(path2,"-"), gfapy.OrientedLine(s["b"],"+"), gfapy.OrientedLine(s["c"],"-"), gfapy.OrientedLine(e["e-c+"],"-")], path1.items) self.assertEqual([gfapy.OrientedLine(s["f"],"+"), gfapy.OrientedLine(s["a"],"+")], path2.items) for line in [path2, s["b"], s["c"], e["e-c+"]]: self.assertEqual([path1], line.paths) # item disconnection cascades on group assert(path1.is_connected()) assert(path2.is_connected()) e["e-c+"].disconnect() assert(not path1.is_connected()) assert(path2.is_connected()) g.append(e["e-c+"]) g.append(path1) # two-level disconnection cascade assert(path1.is_connected()) assert(path2.is_connected()) s["f"].disconnect() assert(not path2.is_connected()) assert(not path1.is_connected())
def test_search_link(self): # search using the direct link self.assertEqual( TestUnitLineFinders.l_gfa1[4], TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("1", "+"), gfapy.OrientedLine("2", "+"), "*")) # search using the complement link self.assertEqual( TestUnitLineFinders.l_gfa1[4], TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("2", "-"), gfapy.OrientedLine("1", "-"), "*")) # with cigar parameter, but placeholder in line self.assertEqual( TestUnitLineFinders.l_gfa1[4], TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("1", "+"), gfapy.OrientedLine("2", "+"), "10M")) # with cigar parameter, and cigar in line self.assertEqual( TestUnitLineFinders.l_gfa1[5], TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("1", "-"), gfapy.OrientedLine("3", "+"), "10M")) self.assertEqual( None, TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("1", "-"), gfapy.OrientedLine("3", "+"), "12M")) # with placeholder parameter, and cigar in line self.assertEqual( TestUnitLineFinders.l_gfa1[5], TestUnitLineFinders.gfa1._search_link(gfapy.OrientedLine("1", "-"), gfapy.OrientedLine("3", "+"), "*"))
def test_delegate_methods(self): ol = gfapy.OrientedLine(gfapy.Line("S\ta\tCACAC"), "+") self.assertEqual("CACAC", ol.sequence) self.assertEqual("CACAC", ol.field_to_s("sequence")) ol.set("xx", 1) self.assertEqual("xx:i:1", ol.field_to_s("xx", True))