def decode(string): if string == "*": return gfapy.Placeholder() else: try: return int(string) except: raise gfapy.FormatError( "the string does not represent a valid integer")
def test_register_line_name_absent(self): g = gfapy.Gfa(version="gfa2") l = gfapy.line.edge.GFA2({"eid": gfapy.Placeholder()}, version="gfa2") l._gfa = g g._register_line(l) self.assertEqual([l], g.edges) self.assertEqual([], g.edge_names) g._unregister_line(l) self.assertEqual([], g.edges)
def Sequence(string): """Parses the content of a sequence field. Parameters: string (str) : content of a sequence field Returns: str, gfapy.Placeholder : if the string is the placeholder symbol ``*`` then a placeholder, otherwise the string itself """ return gfapy.Placeholder() if (string == "*") else string
def unsafe_decode(string): if string == "*": return gfapy.Placeholder() else: return string
def decode(string): if string == "*": return gfapy.Placeholder() else: validate_encoded(string) return string
def _add_segment_to_merged(self, merged, segment, is_reversed, cut, init, enable_tracking=False, merged_name=None): n = segment.name if is_reversed: s = gfapy.sequence.rc(segment.sequence)[cut:] if enable_tracking: n = self._reverse_segment_name(segment.name, "_") rn = self._reverse_pos_array(segment.rn, segment.LN) mp = self._reverse_pos_array(segment.mp, segment.LN) else: s = segment.sequence[cut:] if enable_tracking: rn = segment.rn mp = segment.mp if enable_tracking: if not mp and segment.LN: mp = [1, segment.LN] if segment.get("or") is None: o = n elif is_reversed: o = self._reverse_segment_name(segment.get("or"), ",") else: o = segment.get("or") if init: merged.sequence = [s] if merged_name: merged.name = [merged_name] else: merged.name = [n] merged.LN = segment.LN if enable_tracking: merged.rn = rn merged.set("or", [o]) merged.mp = mp else: if gfapy.is_placeholder(segment.sequence): merged.sequence = gfapy.Placeholder() else: merged.sequence.append(s) if not merged_name: merged.name.append(n) if merged.LN: if enable_tracking: if rn: rn = [pos - cut + merged.LN for pos in rn] if not merged.rn: merged.rn = rn else: merged.rn += rn if mp and merged.mp: merged.mp += [pos - cut + merged.LN for pos in mp] if segment.LN: merged.LN += (segment.LN - cut) else: merged.LN = None elif enable_tracking: merged.mp = None if enable_tracking: if not merged.get("or"): merged.set("or", [o]) else: merged.get("or").append(o)
class TestUnitLineEquivalence(unittest.TestCase): a = gfapy.Line("S\tA\t*\tLN:i:8\txx:Z:a") b = gfapy.Line("S\tB\t*\tLN:i:10") c = gfapy.Line("C\tA\t+\tB\t+\t10\t*") l = gfapy.Line("L\tA\t+\tB\t+\t*") e = gfapy.Line("E\t1\tA+\tB-\t0\t100$\t20\t121\t*") a_ln = gfapy.Line("S\tA\t*\tLN:i:10\txx:Z:a") a_seq = gfapy.Line("S\tA\tACCTTCGT\tLN:i:8\txx:Z:a") a_gfa2 = gfapy.Line("S\tA\t8\tACCTTCGT\txx:Z:a") a_noxx = gfapy.Line("S\tA\t*\tLN:i:8") a_yy = gfapy.Line("S\tA\t*\tLN:i:8\txx:Z:a\tyy:Z:b") l_from = gfapy.Line("L\tC\t+\tB\t+\t*") e_name = gfapy.Line("E\t2\tA+\tB-\t0\t100$\t20\t121\t*") h_a = {"record_type": "S", "name": "A", "LN": 8, "xx": "a"} h_a_rt = h_a.copy() h_a_rt["record_type"] = "X" h_a_pl = h_a.copy() h_a_pl["name"] = gfapy.Placeholder() h_a_name = h_a.copy() h_a_name["name"] = "B" h_a_seq = h_a.copy() h_a_seq["sequence"] = "ACCTTCGT" h_a_ln = h_a.copy() h_a_ln["LN"] = 10 h_a_LNstr = h_a.copy() h_a_LNstr["LN"] = "8" h_a_noxx = h_a.copy() h_a_noxx.pop("xx") h_a_yy = h_a.copy() h_a_yy["yy"] = "b" h_a_gfa2 = {"record_type": "S", "sid": "A", "slen": 8, "xx": "a"} def test_line_placeholder(self): assert (not gfapy.is_placeholder(TestUnitLineEquivalence.a)) assert (not gfapy.is_placeholder(TestUnitLineEquivalence.b)) def test_line_diff_two_segments(self): adiffb = [("different", "positional_field", "name", "A", "B"), ("exclusive", "<", "tag", "xx", "Z", "a"), ("different", "tag", "LN", "i", "8", "i", "10")] self.assertEqual( sorted(adiffb), sorted(TestUnitLineEquivalence.a.diff(TestUnitLineEquivalence.b))) bdiffa = [("different", "positional_field", "name", "B", "A"), ("exclusive", ">", "tag", "xx", "Z", "a"), ("different", "tag", "LN", "i", "10", "i", "8")] self.assertEqual( sorted(bdiffa), sorted(TestUnitLineEquivalence.b.diff(TestUnitLineEquivalence.a))) self.assertEqual([], TestUnitLineEquivalence.a.diff( TestUnitLineEquivalence.a)) self.assertEqual([], TestUnitLineEquivalence.b.diff( TestUnitLineEquivalence.b)) def test_line_diffscript_two_segments(self): acpy = TestUnitLineEquivalence.a.clone() exec(acpy.diffscript(TestUnitLineEquivalence.b, "acpy")) self.assertNotEqual(str(TestUnitLineEquivalence.b), str(TestUnitLineEquivalence.a)) self.assertEqual(str(TestUnitLineEquivalence.b), str(acpy)) bcpy = TestUnitLineEquivalence.b.clone() exec(bcpy.diffscript(TestUnitLineEquivalence.a, "bcpy")) self.assertNotEqual(str(TestUnitLineEquivalence.a), str(TestUnitLineEquivalence.b)) self.assertEqual(str(TestUnitLineEquivalence.a), str(bcpy)) def test_equal(self): assert (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a) assert (TestUnitLineEquivalence.b == TestUnitLineEquivalence.b) assert (TestUnitLineEquivalence.c == TestUnitLineEquivalence.c) assert (TestUnitLineEquivalence.l == TestUnitLineEquivalence.l) assert (TestUnitLineEquivalence.e == TestUnitLineEquivalence.e) assert (not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.b)) assert (not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a_ln)) assert (not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a_seq)) assert (not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a_gfa2)) assert (not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a_noxx)) assert (TestUnitLineEquivalence.b == TestUnitLineEquivalence.b.clone()) assert (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a.clone()) def test_pointer_equality(self): assert (TestUnitLineEquivalence.a is TestUnitLineEquivalence.a) assert ( not TestUnitLineEquivalence.a is TestUnitLineEquivalence.a.clone()) def test_has_eql_fields(self): # same object assert (TestUnitLineEquivalence.a._has_eql_fields( TestUnitLineEquivalence.a)) # clone assert (TestUnitLineEquivalence.a._has_eql_fields( TestUnitLineEquivalence.a.clone())) # positional field difference assert (not TestUnitLineEquivalence.l._has_eql_fields( TestUnitLineEquivalence.l_from)) assert (TestUnitLineEquivalence.l._has_eql_fields( TestUnitLineEquivalence.l_from, ["from"])) # positional field difference: name alias assert (not TestUnitLineEquivalence.e._has_eql_fields( TestUnitLineEquivalence.e_name)) assert (TestUnitLineEquivalence.e._has_eql_fields( TestUnitLineEquivalence.e_name, ["eid"])) assert (TestUnitLineEquivalence.e._has_eql_fields( TestUnitLineEquivalence.e_name, ["name"])) # positional field difference: placeholder in line assert (TestUnitLineEquivalence.a._has_eql_fields( TestUnitLineEquivalence.a_seq)) # positional field difference: placeholder in reference assert (TestUnitLineEquivalence.a_seq._has_eql_fields( TestUnitLineEquivalence.a)) # tag difference assert (not TestUnitLineEquivalence.a._has_eql_fields( TestUnitLineEquivalence.a_ln)) assert (TestUnitLineEquivalence.a._has_eql_fields( TestUnitLineEquivalence.a_ln, ["LN"])) # additional tag in line assert (TestUnitLineEquivalence.a._has_eql_fields( TestUnitLineEquivalence.a_noxx)) assert (not TestUnitLineEquivalence.a_noxx._has_eql_fields( TestUnitLineEquivalence.a)) # missing tag in line assert (not TestUnitLineEquivalence.a._has_eql_fields( TestUnitLineEquivalence.a_yy)) assert (TestUnitLineEquivalence.a_yy._has_eql_fields( TestUnitLineEquivalence.a)) assert (TestUnitLineEquivalence.a._has_eql_fields( TestUnitLineEquivalence.a_yy, ["yy"])) # gfa1 vs gfa2 assert (TestUnitLineEquivalence.a._has_eql_fields( TestUnitLineEquivalence.a_gfa2, ["slen"])) assert (TestUnitLineEquivalence.a_gfa2._has_eql_fields( TestUnitLineEquivalence.a, ["LN"])) # record_type assert (not TestUnitLineEquivalence.c._has_eql_fields( TestUnitLineEquivalence.l)) assert (not TestUnitLineEquivalence.l._has_eql_fields( TestUnitLineEquivalence.c)) assert (TestUnitLineEquivalence.c._has_eql_fields( TestUnitLineEquivalence.l, ["record_type"])) assert (TestUnitLineEquivalence.l._has_eql_fields( TestUnitLineEquivalence.c, ["record_type", "pos"])) def test_has_field_values(self): assert (TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a)) # record_type difference assert (not TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_rt)) assert (TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_rt, ["record_type"])) # positional field difference assert (not TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_name)) assert (TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_name, ["name"])) # positional field difference: placeholder in line assert (TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_seq)) # positional field difference: placeholder in hash is compared assert (not TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_pl)) assert (TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_pl, ["name"])) # tag difference assert (not TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_ln)) assert (TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_ln, ["LN"])) # encoded value assert (TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_LNstr)) # additional tag in line assert (TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_noxx)) # missing tag in line assert (not TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_yy)) assert (TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_yy, ["yy"])) # gfa1 vs gfa2 assert (TestUnitLineEquivalence.a._has_field_values( TestUnitLineEquivalence.h_a_gfa2, ["slen"]))