Beispiel #1
0
def decode(string):
    if string == "*":
        return gfapy.Placeholder()
    else:
        try:
            return int(string)
        except:
            raise gfapy.FormatError(
                "the string does not represent a valid integer")
Beispiel #2
0
 def test_register_line_name_absent(self):
   g = gfapy.Gfa(version="gfa2")
   l = gfapy.line.edge.GFA2({"eid": gfapy.Placeholder()},
                            version="gfa2")
   l._gfa = g
   g._register_line(l)
   self.assertEqual([l], g.edges)
   self.assertEqual([], g.edge_names)
   g._unregister_line(l)
   self.assertEqual([], g.edges)
Beispiel #3
0
def Sequence(string):
    """Parses the content of a sequence field.

  Parameters:
    string (str) : content of a sequence field

  Returns:
    str, gfapy.Placeholder : if the string is the placeholder
      symbol ``*`` then a placeholder, otherwise the string
      itself
  """
    return gfapy.Placeholder() if (string == "*") else string
Beispiel #4
0
def unsafe_decode(string):
    if string == "*":
        return gfapy.Placeholder()
    else:
        return string
Beispiel #5
0
def decode(string):
    if string == "*":
        return gfapy.Placeholder()
    else:
        validate_encoded(string)
        return string
Beispiel #6
0
 def _add_segment_to_merged(self,
                            merged,
                            segment,
                            is_reversed,
                            cut,
                            init,
                            enable_tracking=False,
                            merged_name=None):
     n = segment.name
     if is_reversed:
         s = gfapy.sequence.rc(segment.sequence)[cut:]
         if enable_tracking:
             n = self._reverse_segment_name(segment.name, "_")
             rn = self._reverse_pos_array(segment.rn, segment.LN)
             mp = self._reverse_pos_array(segment.mp, segment.LN)
     else:
         s = segment.sequence[cut:]
         if enable_tracking:
             rn = segment.rn
             mp = segment.mp
     if enable_tracking:
         if not mp and segment.LN:
             mp = [1, segment.LN]
         if segment.get("or") is None:
             o = n
         elif is_reversed:
             o = self._reverse_segment_name(segment.get("or"), ",")
         else:
             o = segment.get("or")
     if init:
         merged.sequence = [s]
         if merged_name:
             merged.name = [merged_name]
         else:
             merged.name = [n]
         merged.LN = segment.LN
         if enable_tracking:
             merged.rn = rn
             merged.set("or", [o])
             merged.mp = mp
     else:
         if gfapy.is_placeholder(segment.sequence):
             merged.sequence = gfapy.Placeholder()
         else:
             merged.sequence.append(s)
         if not merged_name:
             merged.name.append(n)
         if merged.LN:
             if enable_tracking:
                 if rn:
                     rn = [pos - cut + merged.LN for pos in rn]
                     if not merged.rn:
                         merged.rn = rn
                     else:
                         merged.rn += rn
                 if mp and merged.mp:
                     merged.mp += [pos - cut + merged.LN for pos in mp]
             if segment.LN:
                 merged.LN += (segment.LN - cut)
             else:
                 merged.LN = None
         elif enable_tracking:
             merged.mp = None
         if enable_tracking:
             if not merged.get("or"):
                 merged.set("or", [o])
             else:
                 merged.get("or").append(o)
Beispiel #7
0
class TestUnitLineEquivalence(unittest.TestCase):

    a = gfapy.Line("S\tA\t*\tLN:i:8\txx:Z:a")
    b = gfapy.Line("S\tB\t*\tLN:i:10")
    c = gfapy.Line("C\tA\t+\tB\t+\t10\t*")
    l = gfapy.Line("L\tA\t+\tB\t+\t*")
    e = gfapy.Line("E\t1\tA+\tB-\t0\t100$\t20\t121\t*")

    a_ln = gfapy.Line("S\tA\t*\tLN:i:10\txx:Z:a")
    a_seq = gfapy.Line("S\tA\tACCTTCGT\tLN:i:8\txx:Z:a")
    a_gfa2 = gfapy.Line("S\tA\t8\tACCTTCGT\txx:Z:a")
    a_noxx = gfapy.Line("S\tA\t*\tLN:i:8")
    a_yy = gfapy.Line("S\tA\t*\tLN:i:8\txx:Z:a\tyy:Z:b")
    l_from = gfapy.Line("L\tC\t+\tB\t+\t*")
    e_name = gfapy.Line("E\t2\tA+\tB-\t0\t100$\t20\t121\t*")

    h_a = {"record_type": "S", "name": "A", "LN": 8, "xx": "a"}
    h_a_rt = h_a.copy()
    h_a_rt["record_type"] = "X"
    h_a_pl = h_a.copy()
    h_a_pl["name"] = gfapy.Placeholder()
    h_a_name = h_a.copy()
    h_a_name["name"] = "B"
    h_a_seq = h_a.copy()
    h_a_seq["sequence"] = "ACCTTCGT"
    h_a_ln = h_a.copy()
    h_a_ln["LN"] = 10
    h_a_LNstr = h_a.copy()
    h_a_LNstr["LN"] = "8"
    h_a_noxx = h_a.copy()
    h_a_noxx.pop("xx")
    h_a_yy = h_a.copy()
    h_a_yy["yy"] = "b"
    h_a_gfa2 = {"record_type": "S", "sid": "A", "slen": 8, "xx": "a"}

    def test_line_placeholder(self):
        assert (not gfapy.is_placeholder(TestUnitLineEquivalence.a))
        assert (not gfapy.is_placeholder(TestUnitLineEquivalence.b))

    def test_line_diff_two_segments(self):
        adiffb = [("different", "positional_field", "name", "A", "B"),
                  ("exclusive", "<", "tag", "xx", "Z", "a"),
                  ("different", "tag", "LN", "i", "8", "i", "10")]
        self.assertEqual(
            sorted(adiffb),
            sorted(TestUnitLineEquivalence.a.diff(TestUnitLineEquivalence.b)))
        bdiffa = [("different", "positional_field", "name", "B", "A"),
                  ("exclusive", ">", "tag", "xx", "Z", "a"),
                  ("different", "tag", "LN", "i", "10", "i", "8")]
        self.assertEqual(
            sorted(bdiffa),
            sorted(TestUnitLineEquivalence.b.diff(TestUnitLineEquivalence.a)))
        self.assertEqual([],
                         TestUnitLineEquivalence.a.diff(
                             TestUnitLineEquivalence.a))
        self.assertEqual([],
                         TestUnitLineEquivalence.b.diff(
                             TestUnitLineEquivalence.b))

    def test_line_diffscript_two_segments(self):
        acpy = TestUnitLineEquivalence.a.clone()
        exec(acpy.diffscript(TestUnitLineEquivalence.b, "acpy"))
        self.assertNotEqual(str(TestUnitLineEquivalence.b),
                            str(TestUnitLineEquivalence.a))
        self.assertEqual(str(TestUnitLineEquivalence.b), str(acpy))
        bcpy = TestUnitLineEquivalence.b.clone()
        exec(bcpy.diffscript(TestUnitLineEquivalence.a, "bcpy"))
        self.assertNotEqual(str(TestUnitLineEquivalence.a),
                            str(TestUnitLineEquivalence.b))
        self.assertEqual(str(TestUnitLineEquivalence.a), str(bcpy))

    def test_equal(self):
        assert (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a)
        assert (TestUnitLineEquivalence.b == TestUnitLineEquivalence.b)
        assert (TestUnitLineEquivalence.c == TestUnitLineEquivalence.c)
        assert (TestUnitLineEquivalence.l == TestUnitLineEquivalence.l)
        assert (TestUnitLineEquivalence.e == TestUnitLineEquivalence.e)
        assert (not (TestUnitLineEquivalence.a == TestUnitLineEquivalence.b))
        assert (not (TestUnitLineEquivalence.a
                     == TestUnitLineEquivalence.a_ln))
        assert (not (TestUnitLineEquivalence.a
                     == TestUnitLineEquivalence.a_seq))
        assert (not (TestUnitLineEquivalence.a
                     == TestUnitLineEquivalence.a_gfa2))
        assert (not (TestUnitLineEquivalence.a
                     == TestUnitLineEquivalence.a_noxx))
        assert (TestUnitLineEquivalence.b == TestUnitLineEquivalence.b.clone())
        assert (TestUnitLineEquivalence.a == TestUnitLineEquivalence.a.clone())

    def test_pointer_equality(self):
        assert (TestUnitLineEquivalence.a is TestUnitLineEquivalence.a)
        assert (
            not TestUnitLineEquivalence.a is TestUnitLineEquivalence.a.clone())

    def test_has_eql_fields(self):
        # same object
        assert (TestUnitLineEquivalence.a._has_eql_fields(
            TestUnitLineEquivalence.a))
        # clone
        assert (TestUnitLineEquivalence.a._has_eql_fields(
            TestUnitLineEquivalence.a.clone()))
        # positional field difference
        assert (not TestUnitLineEquivalence.l._has_eql_fields(
            TestUnitLineEquivalence.l_from))
        assert (TestUnitLineEquivalence.l._has_eql_fields(
            TestUnitLineEquivalence.l_from, ["from"]))
        # positional field difference: name alias
        assert (not TestUnitLineEquivalence.e._has_eql_fields(
            TestUnitLineEquivalence.e_name))
        assert (TestUnitLineEquivalence.e._has_eql_fields(
            TestUnitLineEquivalence.e_name, ["eid"]))
        assert (TestUnitLineEquivalence.e._has_eql_fields(
            TestUnitLineEquivalence.e_name, ["name"]))
        # positional field difference: placeholder in line
        assert (TestUnitLineEquivalence.a._has_eql_fields(
            TestUnitLineEquivalence.a_seq))
        # positional field difference: placeholder in reference
        assert (TestUnitLineEquivalence.a_seq._has_eql_fields(
            TestUnitLineEquivalence.a))
        # tag difference
        assert (not TestUnitLineEquivalence.a._has_eql_fields(
            TestUnitLineEquivalence.a_ln))
        assert (TestUnitLineEquivalence.a._has_eql_fields(
            TestUnitLineEquivalence.a_ln, ["LN"]))
        # additional tag in line
        assert (TestUnitLineEquivalence.a._has_eql_fields(
            TestUnitLineEquivalence.a_noxx))
        assert (not TestUnitLineEquivalence.a_noxx._has_eql_fields(
            TestUnitLineEquivalence.a))
        # missing tag in line
        assert (not TestUnitLineEquivalence.a._has_eql_fields(
            TestUnitLineEquivalence.a_yy))
        assert (TestUnitLineEquivalence.a_yy._has_eql_fields(
            TestUnitLineEquivalence.a))
        assert (TestUnitLineEquivalence.a._has_eql_fields(
            TestUnitLineEquivalence.a_yy, ["yy"]))
        # gfa1 vs gfa2
        assert (TestUnitLineEquivalence.a._has_eql_fields(
            TestUnitLineEquivalence.a_gfa2, ["slen"]))
        assert (TestUnitLineEquivalence.a_gfa2._has_eql_fields(
            TestUnitLineEquivalence.a, ["LN"]))
        # record_type
        assert (not TestUnitLineEquivalence.c._has_eql_fields(
            TestUnitLineEquivalence.l))
        assert (not TestUnitLineEquivalence.l._has_eql_fields(
            TestUnitLineEquivalence.c))
        assert (TestUnitLineEquivalence.c._has_eql_fields(
            TestUnitLineEquivalence.l, ["record_type"]))
        assert (TestUnitLineEquivalence.l._has_eql_fields(
            TestUnitLineEquivalence.c, ["record_type", "pos"]))

    def test_has_field_values(self):
        assert (TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a))
        # record_type difference
        assert (not TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_rt))
        assert (TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_rt, ["record_type"]))
        # positional field difference
        assert (not TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_name))
        assert (TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_name, ["name"]))
        # positional field difference: placeholder in line
        assert (TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_seq))
        # positional field difference: placeholder in hash is compared
        assert (not TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_pl))
        assert (TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_pl, ["name"]))
        # tag difference
        assert (not TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_ln))
        assert (TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_ln, ["LN"]))
        # encoded value
        assert (TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_LNstr))
        # additional tag in line
        assert (TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_noxx))
        # missing tag in line
        assert (not TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_yy))
        assert (TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_yy, ["yy"]))
        # gfa1 vs gfa2
        assert (TestUnitLineEquivalence.a._has_field_values(
            TestUnitLineEquivalence.h_a_gfa2, ["slen"]))