Beispiel #1
0
 def __init__(self, *args, vlevel=1, version=None):
     if not isinstance(vlevel, int):
         raise gfapy.ArgumentError(
             "vlevel is not an integer ({})".format(vlevel))
     if vlevel < 0:
         raise gfapy.ArgumentError(
             "vlevel is not a positive integer ({})".format(vlevel))
     if not version in ['gfa1', 'gfa2', None]:
         raise gfapy.VersionError(
             "GFA version unknown ({})".format(version))
     self._vlevel = vlevel
     self._max_int_name = 0
     self._records = defaultdict(dict)
     self._records["H"] = gfapy.line.Header(["H"], vlevel=vlevel)
     self._records["H"].connect(self)
     self._records["S"] = {}
     self._records["P"] = {}
     self._records["F"] = {}
     self._records["\n"] = {}
     self._records["E"] = {}
     self._records["U"] = {}
     self._records["G"] = {}
     self._records["O"] = {}
     self._records["C"] = {}
     self._records["L"] = {}
     self._records["#"] = {}
     self._segments_first_order = False
     self._progress = None
     self._default = {"count_tag": "RC", "unit_length": 1}
     self._line_queue = []
     if version is None:
         self._version = None
         self._version_explanation = None
         self._version_guess = "gfa2"
     else:
         self._version = version
         self._version_explanation = "set during initialization"
         self._version_guess = version
         self._validate_version()
     if len(args) == 1:
         lst = None
         if isinstance(args[0], str):
             lst = args[0].split("\n")
         elif isinstance(args[0], list):
             lst = args[0]
         else:
             raise gfapy.ArgumentError("Cannot create a Gfa" +
                                       " instance from an object of type {}"
                                       .format(type(args[0])))
         for line in lst:
             self.add_line(line)
         self.process_line_queue()
         if vlevel >= 1:
             self.validate()
     elif len(args) > 1:
         raise gfapy.ArgumentError("Wrong number of arguments for Gfa()" +
                                   "({})".format(len(args)))
Beispiel #2
0
    def set_datatype(self, fieldname, datatype):
        """
    Set the datatype of a tag.

    If an existing tag datatype is changed, its content may become
    invalid (call **validate_field** if necessary).

    Parameters
    ----------
    fieldname : str
      The field name (it is not required that the field exists already)
    datatype : gfapy.Field.FIELD_DATATYPE
      The datatype.

    Raises
    ------
    gfapy.ArgumentError
      If **datatype** is not a valid datatype for tags.
    """
        if self._is_predefined_tag(fieldname):
            if self.get_datatype(fieldname) != datatype:
                raise gfapy.RuntimeError(
                    "Cannot set the datatype of {} to {}\n".format(
                        fieldname, datatype) +
                    "The datatype of a predefined tag cannot be changed")
        elif not self._is_valid_custom_tagname(fieldname) and self.vlevel > 0:
            raise gfapy.FormatError(
                "{} is not a valid custom tag name".format(fieldname))
        if datatype not in gfapy.Field.TAG_DATATYPE:
            raise gfapy.ArgumentError("Unknown datatype: {}".format(datatype))
        self._datatype[fieldname] = datatype
Beispiel #3
0
 def _check_ref_connection(self, item):
   if item.line.gfa != self._gfa:
     raise gfapy.ArgumentError(
       "Line: {}\n".format(self)+
       "Item: {}".format(repr(item))+
       "The item added to the group must be connected\n"+
       "to the same GFA object as the group")
Beispiel #4
0
    def other_end(self, segment_end, tolerant=False):
        """The other segment end involved in the alignment represented by the edge.

    Note:
      The result is meaningful only for dovetails overlaps (GFA1 L lines
      or GFA2 E lines representing dovetail overlaps).

    Parameters:
      segment_end (`gfapy.segment_end.SegmentEnd`) : one of the two segment
        ends involved in the alignment represented by the edge

    Returns:
      gfapy.segment_end.SegmentEnd

    Raises:
      gfapy.error.ArgumentError: If segment_end is not a valid segment end
      gfapy.RuntimeError: if the segment_end is not involved in the alignment
        represented by the line.
    """
        segment_end
        if (self.from_end == segment_end):
            return self.to_end
        elif (self.to_end == segment_end):
            return self.from_end
        elif tolerant:
            return None
        else:
            raise gfapy.ArgumentError(
                "Segment end '{}' not found\n".format(repr(segment_end)) +
                "(from={};to={})".format(repr(self.from_end), repr(
                    self.to_end)))
Beispiel #5
0
 def __new__(cls, *args, **kargs):
   """Create an instance of an alignment field class."""
   if args[0] is None or \
       gfapy.is_placeholder(args[0]):
     return gfapy.AlignmentPlaceholder()
   if len(args) > 1:
     raise gfapy.ArgumentError("The Alignment() constructor requires "+
         "a single positional argument, {} found".format(len(args)))
   if isinstance(args[0], gfapy.CIGAR) or \
       isinstance(args[0], gfapy.Trace):
     return args[0]
   if isinstance(args[0], str):
     return Alignment._from_string(*args, **kargs)
   elif isinstance(args[0], list):
     return Alignment._from_list(*args, **kargs)
   else:
     raise gfapy.ArgumentError("Cannot create an alignment "+
         "from an instance of the class {}".format(type(args[0])))
Beispiel #6
0
 def __init__(self, verbose_level = 1, channel = sys.stderr, prefix = "#"):
   self._progress = False
   if not isinstance(verbose_level, int):
     raise gfapy.ArgumentError("verbose_level must be an Integer")
   if not(getattr(channel, "write", None) and callable(channel.write)):
     raise gfapy.TypeError("channel must provide a 'write' method")
   self._channel = channel
   self._pfx = prefix
   self._verbose_level = verbose_level
   self._data = {}
Beispiel #7
0
 def __add_line_unknown_version(self, gfa_line):
     if isinstance(gfa_line, str):
         rt = gfa_line[0]
     elif isinstance(gfa_line, gfapy.Line):
         rt = gfa_line.record_type
     else:
         raise gfapy.ArgumentError(\
             "Only strings and gfapy.Line instances can be added")
     if rt == "#":
         if isinstance(gfa_line, str):
             gfa_line = gfapy.Line(gfa_line, dialect=self._dialect)
         gfa_line.connect(self)
     elif rt == "H":
         if isinstance(gfa_line, str):
             gfa_line = gfapy.Line(gfa_line,
                                   vlevel=self._vlevel,
                                   dialect=self._dialect)
         self.header._merge(gfa_line)
         if gfa_line.VN:
             if gfa_line.VN == "1.0":
                 self._version = "gfa1"
             elif gfa_line.VN == "2.0":
                 self._version = "gfa2"
             else:
                 self._version = gfa_line.VN
             self._version_explanation = "specified in header VN tag"
             if self._vlevel > 0:
                 self._validate_version()
             self.process_line_queue()
     elif rt == "S":
         if isinstance(gfa_line, str):
             gfa_line = gfapy.Line(gfa_line,
                                   vlevel=self._vlevel,
                                   dialect=self._dialect)
         self._version = gfa_line.version
         self._version_explanation = \
             "implied by: syntax of S {} line".format(gfa_line.name)
         self.process_line_queue()
         gfa_line.connect(self)
     elif rt in ["E", "F", "G", "U", "O"]:
         self._version = "gfa2"
         self._version_explanation = "implied by: presence of a {} line".format(
             rt)
         if isinstance(gfa_line, str):
             gfa_line = gfapy.Line(gfa_line,
                                   vlevel=self._vlevel,
                                   version=self._version,
                                   dialect=self._dialect)
         self.process_line_queue()
         gfa_line.connect(self)
     elif rt in ["L", "C", "P"]:
         self._version_guess = "gfa1"
         self._line_queue.append(gfa_line)
     else:
         self._line_queue.append(gfa_line)
Beispiel #8
0
 def _check_ref_class(self, item):
   if item.__class__ not in [
        gfapy.line.edge.GFA2,
        gfapy.line.segment.GFA2,
        gfapy.line.gap.Gap,
        gfapy.line.group.Ordered,
        self.__class__]:
     raise gfapy.ArgumentError(
       "Line: {}\n".format(self)+
       "Cannot add items of class {}\n".format(item.__class__.__name__)+
       "Only GFA2 edges, segments, gaps, groups[*] "+
       "can be added\n(* = unordered groups to unordered groups only).")
Beispiel #9
0
 def __init__(self, *args):
     if len(args) == 1:
         if isinstance(args[0], OrientedLine):
             return
         elif isinstance(args[0], str):
             self.__line = args[0][0:-1]
             self.__orient = args[0][-1]
         elif isinstance(args[0], list):
             self.__line = args[0][0]
             self.__orient = args[0][1]
         else:
             raise gfapy.ArgumentError("Cannot create an OrientedLine" +
                                       " instance from an object of type {}"
                                       .format(type(args[0])))
     elif len(args) == 2:
         self.__line = args[0]
         self.__orient = args[1]
     else:
         raise gfapy.ArgumentError(
             "Wrong number of arguments for OrientedLine()")
     self.__editable = True
Beispiel #10
0
 def __init__(self, *args):
     if len(args) == 1:
         if isinstance(args[0], SegmentEnd):
             return
         elif isinstance(args[0], str):
             self.__segment = args[0][0:-1]
             self.__end_type = args[0][-1]
         elif isinstance(args[0], list):
             if len(args[0]) != 2:
                 raise gfapy.ArgumentError(
                     "Cannot create a SegmentEnd " +
                     " from a list of size {}".format(len(args[0])))
             self.__segment = args[0][0]
             self.__end_type = args[0][1]
         else:
             raise gfapy.ArgumentError(
                 "Cannot create an SegmentEnd " +
                 " from an object of type {}".format(type(args[0])))
     elif len(args) == 2:
         self.__segment = args[0]
         self.__end_type = args[1]
     else:
         raise gfapy.ArgumentError(
             "Wrong number of arguments for SegmentEnd()")
Beispiel #11
0
  def enable_progress(self, part = 0.1):
    """Enable output of progress of long running methods.

    Parameters
      part (float between 0 and 1) : if part = 0, output at every call of
         progress_log(); if 0 < part < 1, output once per part of the total
         progress (e.g. 0.001 = log every 0.1% progress); if part = 1, output
         only total elapsed time at the end of the computation.
    """
    if part < 0 or part > 1:
      raise gfapy.ArgumentError("part must be in range [0..1]")
    self._progress = True
    self._part = part
    if self._verbose_level > 0:
      self._channel.write("{} Progress logging enabled\n".format(self._pfx))
Beispiel #12
0
 def _select_distribute_end(self, links_distribution_policy, segment_name,
                            factor):
     if links_distribution_policy not in self.LINKS_DISTRIBUTION_POLICY:
         raise gfapy.ArgumentError("Unknown links distribution policy {}\n".format(\
             links_distribution_policy)+"accepted values are: {}".format(\
             ", ".join(self.LINKS_DISTRIBUTION_POLICY)))
     if links_distribution_policy == "off":
         return None
     if links_distribution_policy in ["L", "R"]:
         return links_distribution_policy
     else:
         s = self.segment(segment_name)
         esize = len(s.dovetails_of_end("R"))
         bsize = len(s.dovetails_of_end("L"))
         return self._auto_select_distribute_end(
             factor, bsize, esize, links_distribution_policy == "equal")
Beispiel #13
0
    def _connectivity(self):
        """
    Computes the connectivity of a segment from its number of dovetail overlaps.

    Returns
    -------
    (conn_symbol,conn_symbol) list

    conn. symbols respectively of the :L and :R ends of +segment+.

    <b>Connectivity symbol:</b> (+conn_symbol+)
    - Let _n_ be the number of links to an end (+:L+ or +:R+) of a segment.
    Then the connectivity symbol is +:M+ if <i>n > 1</i>, otherwise _n_.
    """
        if not self.is_connected():
            raise gfapy.ArgumentError(
                "Cannot compute the connectivity of {}\n".format(self) +
                "Segment is not connected to a GFA instance")
        return self._connectivity_symbols(len(self.dovetails_L),
                                          len(self.dovetails_R))
Beispiel #14
0
    def multiply(self,
                 segment,
                 factor,
                 copy_names=None,
                 conserve_components=True,
                 distribute=None,
                 track_origin=False,
                 origin_tag="or",
                 extended=False):
        """Multiply a segment by a given factor.

    The multiplication operation is implemented as described in
    Gonnella and Kurtz (2016).

    Parameters:
      segment (Line, str): the segment to multiply
      factor (int): the multiplication factor; if 0, the segment is
        deleted; if 1, nothing is done; if > 1, the multiplication
        is performed
      copy_names (list, None): an optional list of strings, the names
        of the copies which will result from the multiplication;
        the length of this list must be equal to factor - 1; if no
        list is specified, the names are computed automatically, adding
        (or incrementing) an integer as suffix to the segment name,
        until enough non-previously used names are found
      conserve_components (bool): if True, the removal of segments
        in the case where factor == 0 is only done if it does not
        split an existing connected component (thereby only dovetail
        overlaps are considered)
      extended : if True, then dovetail distribution and track origin
        are turned on by default
      distribute (str, None) : select an end for which the dovetail
        overlaps are distributed (see Gonnella and Kurtz, 2016); if ``auto``
        (the default if extended is set), an end is selected automatically,
        trying to maximize the number of links which can be deleted; if ``off``
        (the default if extended is not set), no distribution is performed; if
        ``L`` or ``R``, links of the specified end are distributed; if
        ``equal``, an end is selected (if any), for which the number of links
        is equal to the factor (if none, links are not distributed; if both,
        then ``R`` is used)
      track_origin (bool): if True, the name of the original segment (or
        the content of its own origin tag, if any) is stored
        in a tag in the copies (default: False)
      origin_tag (str): the tag where to store the origin information,
        if track_origin is set (default: ``or``)
    """
        if extended:
            if distribute == None:
                distribute = "auto"
            track_origin = True
        if factor < 0:
            raise gfapy.ArgumentError("Mulitiplication factor must be >= 0" +
                                      " ({} found)".format(factor))
        elif factor == 0:
            if conserve_components and factor == 1 and self.is_cut_segment(
                    segment):
                return self
            else:
                self.rm(segment)
                return self
        elif factor == 1:
            return self
        else:
            s, sn = self._segment_and_segment_name(segment)
            if track_origin and not s.get(origin_tag):
                s.set(origin_tag, sn)
            self.__divide_segment_and_connection_counts(s, factor)
            if copy_names is None:
                copy_names = self._compute_copy_names(sn, factor)
            for cn in copy_names:
                self.__clone_segment_and_connections(s, cn)
            if distribute:
                self._distribute_links(distribute, sn, copy_names, factor)
            return self