Esempio n. 1
0
 def test_from_list(self):
     self.assertEqual(TestUnitSegmentEnd.se_s, gfapy.SegmentEnd(["a", "L"]))
     self.assertEqual(gfapy.SegmentEnd,
                      gfapy.SegmentEnd(["a", "L"]).__class__)
     self.assertRaises(gfapy.ArgumentError, gfapy.SegmentEnd,
                       ["a", "L", "L"])
     gfapy.SegmentEnd(["a", "X"])  # no validation
Esempio n. 2
0
 def _delete_other_links(self,
                         segment_end,
                         other_end,
                         conserve_components=False):
     segment_end = gfapy.SegmentEnd(segment_end)
     other_end = gfapy.SegmentEnd(other_end)
     s = self.try_get_segment(segment_end.segment)
     for d in s.dovetails_of_end(segment_end.end_type):
         if not conserve_components or not self.is_cut_link(d):
             d.disconnect()
Esempio n. 3
0
 def test_equal(self):
     se2 = gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "L")
     se3 = gfapy.SegmentEnd(TestUnitSegmentEnd.ref, "R")
     self.assertEqual(TestUnitSegmentEnd.se_s, se2)
     self.assertEqual(TestUnitSegmentEnd.se_r, se3)
     # only name and end_type equivalence is checked, not segment
     assert (TestUnitSegmentEnd.se_r != TestUnitSegmentEnd.se_s)
     assert (TestUnitSegmentEnd.se_r.inverted() == TestUnitSegmentEnd.se_s)
     # equivalence to array
     assert (TestUnitSegmentEnd.se_s == ["a", "L"])
     assert (TestUnitSegmentEnd.se_r == ["a", "R"])
Esempio n. 4
0
 def _junction_junction_paths(self, sn, exclude):
   retval = []
   exclude.append(sn)
   s = self.segment(sn)
   for dL in s.dovetails_L:
     eL = dL.other_end(gfapy.SegmentEnd(s, "L"))
     if (eL.name in exclude) or (len(eL.segment.dovetails_of_end(eL.end_type)) == 1):
       retval.append([True, eL, gfapy.SegmentEnd(s, "R"), True])
   for dR in s.dovetails_R:
     eR = dR.other_end(gfapy.SegmentEnd(s, "R"))
     if (eR.name in exclude) or (len(eR.segment.dovetails_of_end(eR.end_type)) == 1):
       retval.append([True, gfapy.SegmentEnd(s, "R"), eR.inverted(), True])
   return retval
Esempio n. 5
0
    def segment_connected_component(self, segment, visited=None):
        """Compute the connected component to which a segment belong.

    Note:
      only dovetail overlaps are considered as connections

    Parameters:
      segment (str, Line) : a segment name or instance

    Returns:
       list : a list of segment instances
    """
        if visited is None:
            visited = set()
        if isinstance(segment, gfapy.Line):
            segment_name = segment.name
        else:
            segment_name = segment
            segment = self.segment(segment)
        visited.add(segment_name)
        c = set()
        c.add(segment)
        for e in ["L", "R"]:
            self.__traverse_component(gfapy.SegmentEnd(segment, e), c, visited)
        return list(c)
Esempio n. 6
0
    def is_cut_segment(self, segment):
        """Does the removal of a segment split a connected component?

    Note:
      only dovetail overlaps are considered as connections

    Parameters:
      segment (str, Line) : a segment name or instance

    Returns:
       bool
    """
        if isinstance(segment, str):
            segment = self.try_get_segment(segment)
        if segment._connectivity() in [(0, 0), (0, 1), (1, 0)]:
            return False
        start_points = set()
        for et in ["L", "R"]:
            for l in segment.dovetails_of_end(et):
                start_points.add(l.other_end(\
                    gfapy.SegmentEnd(segment.name, et)).inverted())
        cc = []
        for start_point in start_points:
            cc.append(set())
            visited = set()
            visited.add(segment.name)
            self.__traverse_component(start_point, cc[-1], visited)
        return any(c != cc[0] for c in cc)
Esempio n. 7
0
def merge_linear_path(gfa,
                      segpath,
                      enable_tracking=False,
                      merged_name=None,
                      cut_counts=False):
    """Merge a specified linear path of dovetail overlaps connecting segments.
  Note:
    for the parameter usage, see merge_linear_paths();
    the only difference is that merged_name can be set to a string (different
    from 'short'), which will be used as a name for the merged segment.
  """
    if len(segpath) < 2:
        return gfa
    segpath = [gfapy.SegmentEnd(s) for s in segpath]
    #print("Merging path", segpath)
    merged, first_reversed, last_reversed = create_merged_segment(
        gfa,
        segpath,
        merged_name=merged_name,
        cut_counts=cut_counts,
        enable_tracking=enable_tracking)
    gfa.append(merged)
    link_merged(gfa, merged.name, segpath[0].inverted(), first_reversed)
    link_merged(gfa, merged.name, segpath[-1], last_reversed)
    idx1 = 0
    idx2 = None
    for sn_et in segpath[idx1:idx2]:
        gfa.segment(sn_et.segment).disconnect()
    #return gfa
    """Find and merge linear paths of dovetail overlaps connecting segments.
Esempio n. 8
0
 def test_segment(self):
     self.assertEqual(TestUnitSegmentEnd.sym,
                      TestUnitSegmentEnd.se_s.segment)
     self.assertEqual(TestUnitSegmentEnd.ref,
                      TestUnitSegmentEnd.se_r.segment)
     se2 = gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "R")
     se2.segment = TestUnitSegmentEnd.ref
     self.assertEqual(TestUnitSegmentEnd.ref, se2.segment)
Esempio n. 9
0
 def _distribute_links(self, links_distribution_policy, segment_name,
                       copy_names, factor):
     if factor < 2:
         return
     end_type = self._select_distribute_end(links_distribution_policy,
                                            segment_name, factor)
     if end_type is None:
         return
     et_links = self.segment(segment_name).dovetails_of_end(end_type)
     diff = max([len(et_links) - factor, 0])
     links_signatures = list([repr(l.other_end(gfapy.SegmentEnd(segment_name, \
                           end_type))) for l in et_links])
     for i, sn in enumerate([segment_name] + copy_names):
         to_keep = links_signatures[i:i + diff + 1]
         links = self.segment(sn).dovetails_of_end(end_type).copy()
         for l in links:
             l_sig = repr(l.other_end(gfapy.SegmentEnd(sn, end_type)))
             if l_sig not in to_keep:
                 l.disconnect()
Esempio n. 10
0
 def test_segment_ends_path(self):
     sep = gfapy.SegmentEndsPath(
         [gfapy.SegmentEnd("a", "L"),
          gfapy.SegmentEnd("b", "R")])
     self.assertEqual(
         [gfapy.SegmentEnd("b", "L"),
          gfapy.SegmentEnd("a", "R")], list(reversed(sep)))
     self.assertNotEqual(
         [gfapy.SegmentEnd("b", "L"),
          gfapy.SegmentEnd("a", "R")], sep)
     sep.reverse()
     self.assertEqual(
         [gfapy.SegmentEnd("b", "L"),
          gfapy.SegmentEnd("a", "R")], sep)
Esempio n. 11
0
 def __traverse_component(self, segment_end, c, visited):
     s = segment_end.segment
     assert (isinstance(s, gfapy.Line))
     for l in s.dovetails_of_end(segment_end.end_type):
         oe = l.other_end(segment_end)
         sn = oe.name
         s = oe.segment
         if sn in visited:
             continue
         visited.add(sn)
         c.add(s)
         for e in ["L", "R"]:
             self.__traverse_component(gfapy.SegmentEnd(s, e), c, visited)
Esempio n. 12
0
    def merge_linear_path(self,
                          segpath,
                          redundant_junctions=False,
                          jntag="jn",
                          enable_tracking=False,
                          merged_name=None,
                          cut_counts=False):
        """Merge a specified linear path of dovetail overlaps connecting segments.

    Note:
      for the parameter usage, see merge_linear_paths();
      the only difference is that merged_name can be set to a string (different
      from 'short'), which will be used as a name for the merged segment.
    """
        if len(segpath) < 2:
            return self
        if segpath[0] in [True, False]:
            first_redundant = segpath.pop(0)
            last_redundant = segpath.pop()
        else:
            first_redundant = False
            last_redundant = False
        segpath = [gfapy.SegmentEnd(s) for s in segpath]
        merged, first_reversed, last_reversed = \
            self.__create_merged_segment(segpath,
                redundant_junctions=redundant_junctions, jntag=jntag,
                merged_name=merged_name,cut_counts=cut_counts,
                enable_tracking=enable_tracking)
        self.append(merged)
        if first_redundant:
            self._link_duplicated_first(merged,
                                        self.segment(segpath[0].segment),
                                        first_reversed, jntag)
        else:
            self.__link_merged(merged.name, segpath[0].inverted(),
                               first_reversed)
        if last_redundant:
            self._link_duplicated_last(merged,
                                       self.segment(segpath[-1].segment),
                                       last_reversed, jntag)
        else:
            self.__link_merged(merged.name, segpath[-1], last_reversed)
        idx1 = 1 if first_redundant else 0
        idx2 = -1 if last_redundant else None
        for sn_et in segpath[idx1:idx2]:
            self.segment(sn_et.segment).disconnect()
            if self._progress:
                self._progress_log("merge_linear_paths", 0.05)
        return self
Esempio n. 13
0
 def __traverse_linear_path(self, segment_end, exclude):
     lst = gfapy.SegmentEndsPath()
     current = gfapy.SegmentEnd(segment_end)
     current.segment = self.segment(current.segment)
     while True:
         after = current.segment.dovetails_of_end(current.end_type)
         before = current.segment.dovetails_of_end(
             gfapy.invert(current.end_type))
         if (len(before) == 1 and len(after) == 1) or not lst:
             lst.append(gfapy.SegmentEnd(current.name, current.end_type))
             exclude.add(current.name)
             current = after[0].other_end(current).inverted()
             if current.name in exclude:
                 break
         elif len(before) == 1:
             lst.append(gfapy.SegmentEnd(current.name, current.end_type))
             exclude.add(current.name)
             break
         else:
             break
     if segment_end.end_type == "L":
         return list(reversed(lst))
     else:
         return lst
Esempio n. 14
0
 def test_add_links(self):
     s1 = "S\t1\t*"
     s2 = "S\t2\t*"
     l1 = gfapy.Line("L\t1\t+\t2\t+\t12M")
     l2 = "L\t1\t+\t3\t+\t12M"
     gfa = gfapy.Gfa()
     gfa.append(s1)
     gfa.append(s2)
     gfa.append(l1)  # nothing raised
     self.assertEqual([l1], gfa.dovetails)
     self.assertEqual([l1], gfa.segment("1").end_relations("R", ["2", "L"]))
     self.assertEqual([l1], gfa.segment("2").end_relations("L", ["1", "R"]))
     self.assertEqual([],
                      gfa.segment("2").end_relations(
                          "R", gfapy.SegmentEnd("1", "L")))
     gfa.append(l2)  # nothing raised
Esempio n. 15
0
    def to_end(self):
        """The segment end corresponding to the to_segment field of L lines.

    Note:
      The result is meaningful only for dovetails overlaps (GFA1 L lines
      or GFA2 E lines representing dovetail overlaps).

    For a L line, the to_orient field is used to compute if the overlap
    involves the left (5') or right (3') end of the to_segment and the
    SegmentEnd end_type property is set accordingly to 'L' or 'R'.  For a E
    line, it is first computed which of the sid1/sid2 corresponds to the
    to_segment field of a L line, then the same computation is done, as for L
    lines.

    Returns:
      gfapy.segment_end.SegmentEnd
    """
        return gfapy.SegmentEnd(self.to_segment,
                                "L" if self.to_orient == "+" else "R")
Esempio n. 16
0
 def _randomly_orient_proven_invertible_segment(self, segment_name):
     se = gfapy.SegmentEnd([segment_name, "R"])
     parts = self._partitioned_links_of(se)
     if len(parts) == 2:
         tokeep1_other_end = parts[0][0].other_end(se)
         tokeep2_other_end = parts[1][0].other_end(se)
     elif len(parts) == 1 and len(parts[0]) == 2:
         tokeep1_other_end = parts[0][0].other_end(se)
         tokeep2_other_end = parts[0][1].other_end(se)
     else:
         return
     if len(tokeep1_other_end.segment.dovetails(
             tokeep1_other_end.end_type)) < 2:
         return
     if len(tokeep2_other_end.segment.dovetails(
             tokeep2_other_end.end_type)) < 2:
         return
     self._delete_other_links(se, tokeep1_other_end)
     self._delete_other_links(se.inverted(), tokeep2_other_end)
     self._annotate_random_orientation(segment_name)
Esempio n. 17
0
    def linear_path(self, segment, exclude=None):
        """Finnd a linear path which contains the specified segment

    Parameters:
      segment (str, Line): the segment to analyse
      exclude : (API private)
    """
        if isinstance(segment, gfapy.Line):
            segment_name = segment.name
        else:
            segment_name = segment
            segment = self.segment(segment_name)
        cs = segment._connectivity()
        if exclude is None:
            exclude = set()
        segpath = gfapy.SegmentEndsPath()
        for i, et in enumerate(["L", "R"]):
            if cs[i] == 1:
                exclude.add(segment_name)
                if len(segpath) > 0:
                    segpath.pop()
                segpath += self.__traverse_linear_path(
                    gfapy.SegmentEnd(segment, et), exclude)
        return segpath
Esempio n. 18
0
 def test_end_type(self):
     self.assertEqual("L", TestUnitSegmentEnd.se_s.end_type)
     self.assertEqual("R", TestUnitSegmentEnd.se_r.end_type)
     se2 = gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "L")
     se2.end_type = "R"
     self.assertEqual("R", se2.end_type)
Esempio n. 19
0
 def _segment_same_links_both_ends(self, segment_name):
     e_links = self._link_targets_for_cmp(
         gfapy.SegmentEnd(segment_name, "R"))
     b_links = self._link_targets_for_cmp(
         gfapy.SegmentEnd(segment_name, "L"))
     return e_links == b_links
Esempio n. 20
0
def create_merged_segment(gfa,
                          segpath,
                          merged_name=None,
                          enable_tracking=False,
                          cut_counts=False):
    merged = gfa.try_get_segment(segpath[0].segment).clone()
    merged_vlevel = merged.vlevel
    merged.vlevel = 0
    total_cut = 0
    a = segpath[0]
    first_reversed = (a.end_type == "L")
    last_reversed = None
    if merged_name == "short":
        merged_name = gfa.unused_name()
    gfa._add_segment_to_merged(merged,
                               gfa.segment(a.segment),
                               first_reversed,
                               0,
                               True,
                               enable_tracking=enable_tracking,
                               merged_name=merged_name)
    #for i in range(len(segpath)-1):
    #  b = gfapy.SegmentEnd(segpath[i+1]).inverted()
    for s in segpath[1:]:
        b = gfapy.SegmentEnd(s).inverted()
        ls = gfa.segment(a.segment).end_relations(a.end_type, b, "dovetails")
        if len(ls) != 1:
            msg = "A single link was expected between {}".format(a) + \
                  "and {}".format(b) + "{} were found".format(len(ls))
            raise gfapy.ValueError(msg)
        l = ls[0]
        if not l.overlap:
            cut = 0
        else:
            cut = min(l.overlap.length_on_query(), gfa.segment(b.segment).LN)
        #elif all(op.code in ["M","="] for op in l.overlap):
        #  cut = sum([len(op) for op in l.overlap])
        #else:
        #  raise gfapy.ValueError(
        #      "Merging is only allowed if all operations are M/=")
        total_cut += cut
        last_reversed = (b.end_type == "R")
        gfa._add_segment_to_merged(merged,
                                   gfa.segment(b.segment),
                                   last_reversed,
                                   cut,
                                   False,
                                   enable_tracking=enable_tracking,
                                   merged_name=merged_name)
        a = gfapy.SegmentEnd(b).inverted()
    merged.vlevel = merged_vlevel
    if isinstance(merged.name, list):
        merged.name = "_".join(merged.name)
    ortag = merged.get("or")
    if isinstance(ortag, list):
        merged.set("or", ",".join(ortag))
    if not gfapy.is_placeholder(merged.sequence):
        merged.sequence = "".join(merged.sequence)
        if not merged.LN:
            merged.LN = len(merged.sequence)
        elif gfa._vlevel > 0 and merged.LN != len(merged.sequence):
            raise gfapy.InconsistencyError(
                "Computed sequence length {} ".format(merged.sequence.length) +
                "and computed LN {} differ".format(merged.LN))
    if merged.length is not None:
        for count_tag in ["KC", "RC", "FC"]:
            merged.set(count_tag, None)
    else:
        factor = 1
        if cut_counts:
            factor = merged.length / (total_cut + merged.length)
        for count_tag, count in gfa.__sum_of_counts(segpath, factor).items():
            merged.set(count_tag, count)
    return merged, first_reversed, last_reversed
Esempio n. 21
0
 def test_new(self):
     gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "L")
     # no validation on creation
     gfapy.SegmentEnd(TestUnitSegmentEnd.invalid_sym, "X")
Esempio n. 22
0
 def end_relations(self, extremity, segment_end, collection="edges"):
     return [e for e in getattr(self, collection) if \
       (e.other_end(gfapy.SegmentEnd(self, extremity), tolerant=True) == \
         segment_end)]
Esempio n. 23
0
 def test_validate(self):
     TestUnitSegmentEnd.se_s.validate()
     TestUnitSegmentEnd.se_r.validate()
     se1 = gfapy.SegmentEnd("a", "X")
     self.assertRaises(gfapy.ValueError, se1.validate)
Esempio n. 24
0
class TestUnitSegmentEnd(unittest.TestCase):

    sym = "a"
    ref = gfapy.Line("S\ta\t*\txx:Z:1.0")
    invalid_sym = "a\ta"
    invalid_ref = []
    se_s = gfapy.SegmentEnd(sym, "L")
    se_r = gfapy.SegmentEnd(ref, "R")
    se_s_str = "aL"
    se_r_str = "aR"
    se_s_sym = "aL"
    se_r_sym = "aR"

    def test_new(self):
        gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "L")
        # no validation on creation
        gfapy.SegmentEnd(TestUnitSegmentEnd.invalid_sym, "X")

    def test_from_list(self):
        self.assertEqual(TestUnitSegmentEnd.se_s, gfapy.SegmentEnd(["a", "L"]))
        self.assertEqual(gfapy.SegmentEnd,
                         gfapy.SegmentEnd(["a", "L"]).__class__)
        self.assertRaises(gfapy.ArgumentError, gfapy.SegmentEnd,
                          ["a", "L", "L"])
        gfapy.SegmentEnd(["a", "X"])  # no validation

    def test_segment(self):
        self.assertEqual(TestUnitSegmentEnd.sym,
                         TestUnitSegmentEnd.se_s.segment)
        self.assertEqual(TestUnitSegmentEnd.ref,
                         TestUnitSegmentEnd.se_r.segment)
        se2 = gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "R")
        se2.segment = TestUnitSegmentEnd.ref
        self.assertEqual(TestUnitSegmentEnd.ref, se2.segment)

    def test_end_type(self):
        self.assertEqual("L", TestUnitSegmentEnd.se_s.end_type)
        self.assertEqual("R", TestUnitSegmentEnd.se_r.end_type)
        se2 = gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "L")
        se2.end_type = "R"
        self.assertEqual("R", se2.end_type)

    def test_name(self):
        self.assertEqual(TestUnitSegmentEnd.sym, TestUnitSegmentEnd.se_s.name)
        self.assertEqual(TestUnitSegmentEnd.sym, TestUnitSegmentEnd.se_r.name)

    def test_validate(self):
        TestUnitSegmentEnd.se_s.validate()
        TestUnitSegmentEnd.se_r.validate()
        se1 = gfapy.SegmentEnd("a", "X")
        self.assertRaises(gfapy.ValueError, se1.validate)

    def test_inverted(self):
        inv_s = TestUnitSegmentEnd.se_s.inverted()
        self.assertEqual(TestUnitSegmentEnd.se_s.segment, inv_s.segment)
        self.assertEqual("R", inv_s.end_type)
        inv_r = TestUnitSegmentEnd.se_r.inverted()
        self.assertEqual(TestUnitSegmentEnd.se_r.segment, inv_r.segment)
        self.assertEqual("L", inv_r.end_type)

    def test_to_s(self):
        self.assertEqual(TestUnitSegmentEnd.se_s_str,
                         str(TestUnitSegmentEnd.se_s))
        self.assertEqual(TestUnitSegmentEnd.se_r_str,
                         str(TestUnitSegmentEnd.se_r))

    def test_equal(self):
        se2 = gfapy.SegmentEnd(TestUnitSegmentEnd.sym, "L")
        se3 = gfapy.SegmentEnd(TestUnitSegmentEnd.ref, "R")
        self.assertEqual(TestUnitSegmentEnd.se_s, se2)
        self.assertEqual(TestUnitSegmentEnd.se_r, se3)
        # only name and end_type equivalence is checked, not segment
        assert (TestUnitSegmentEnd.se_r != TestUnitSegmentEnd.se_s)
        assert (TestUnitSegmentEnd.se_r.inverted() == TestUnitSegmentEnd.se_s)
        # equivalence to array
        assert (TestUnitSegmentEnd.se_s == ["a", "L"])
        assert (TestUnitSegmentEnd.se_r == ["a", "R"])

    #def test_comparison(self):
    #  self.assertEqual(-1, ["a","L"].to_segment_end() <=> ["b","L"].to_segment_end())
    #  self.assertEqual(0,  ["a","L"].to_segment_end() <=> ["a","L"].to_segment_end())
    #  self.assertEqual(1,  ["b","L"].to_segment_end() <=> ["a","L"].to_segment_end())
    #  self.assertEqual(-1, ["a","L"].to_segment_end() <=> ["a","R"].to_segment_end())
    #  self.assertEqual(0,  ["a","R"].to_segment_end() <=> ["a","R"].to_segment_end())
    #  self.assertEqual(1,  ["a","R"].to_segment_end() <=> ["a","L"].to_segment_end())

    def test_segment_ends_path(self):
        sep = gfapy.SegmentEndsPath(
            [gfapy.SegmentEnd("a", "L"),
             gfapy.SegmentEnd("b", "R")])
        self.assertEqual(
            [gfapy.SegmentEnd("b", "L"),
             gfapy.SegmentEnd("a", "R")], list(reversed(sep)))
        self.assertNotEqual(
            [gfapy.SegmentEnd("b", "L"),
             gfapy.SegmentEnd("a", "R")], sep)
        sep.reverse()
        self.assertEqual(
            [gfapy.SegmentEnd("b", "L"),
             gfapy.SegmentEnd("a", "R")], sep)