Ejemplo n.º 1
0
 def _to_gfa1_a(self):
     """List of the field content of the line in GFA1.
 """
     at = self._alignment_type
     if at == "I":
         raise gfapy.RuntimeError(
             "Conversion of edge line from GFA2 to GFA1 failed\n" +
             "Edge represents an internal overlap:\n" +
             "Edge line: {}\n".format(str(self)))
     a = [at]
     if self._is_sid1_from():
         ol1 = self.get("sid1")
         ol2 = self.get("sid2")
     else:
         ol1 = self.get("sid2")
         ol2 = self.get("sid1")
     a.append(ol1.name)
     a.append(ol1.orient)
     a.append(ol2.name)
     a.append(ol2.orient)
     if self._alignment_type == "C":
         a.append(str(self.pos))
     try:
         self.overlap.validate(version="gfa1")
     except:
         raise gfapy.RuntimeError(
             "Conversion of edge line from GFA2 to GFA1 failed\n" +
             "Overlap is invalid or not compatible with GFA1\n" +
             "Edge line: {}\n".format(str(self)))
     a.append(str(self.overlap))
     if not gfapy.is_placeholder(self.eid):
         a.append(gfapy.Field._to_gfa_tag(self.eid, "ID", datatype="Z"))
     for fn in self.tagnames:
         a.append(self.field_to_s(fn, tag=True))
     return a
Ejemplo n.º 2
0
 def _push_item_on_se_path(self, path, prev_edge, item):
     if isinstance(item.line, str):
         raise gfapy.RuntimeError(
             "Captured path cannot be computed; a reference has not been resolved\n"
             + "Line: {}\n".format(self) +
             "Unresolved reference: {} (String found)".format(item.line))
     elif isinstance(item.line, gfapy.line.segment.GFA2):
         if not item.line.is_connected():
             raise gfapy.RuntimeError(
                 "Captured path cannot be computed; item is not connected\n"
                 + "Line: {}\n".format(self) + "Item: {}".format(item.line))
         self._push_segment_on_se_path(path, prev_edge, item)
         prev_edge = False
     elif isinstance(item.line, gfapy.line.edge.GFA2):
         if not item.line.is_connected():
             raise gfapy.RuntimeError(
                 "Captured path cannot be computed; item is not connected\n"
                 + "Line: {}\n".format(self) + "Item: {}".format(item.line))
         if not path:
             self._push_first_edge_on_se_path(path, self.items)
         else:
             self._push_nonfirst_edge_on_se_path(path, item)
         prev_edge = True
     elif isinstance(item.line, gfapy.line.group.Ordered):
         if not item.line.is_connected():
             raise gfapy.RuntimeError(
                 "Captured path cannot be computed; item is not connected\n"
                 + "Line: {}\n".format(self) + "Item: {}".format(item.line))
         subpath, prev_edge_subpath = item.line._compute_captured_path()
         if not subpath:
             raise gfapy.AssertionError()
         if item.orient == "+":
             for subpath_item in subpath:
                 path, prev_edge = self._push_item_on_se_path(
                     path, prev_edge, subpath_item)
         else:
             for subpath_item in reversed(subpath):
                 path, prev_edge = self._push_item_on_se_path(
                     path, prev_edge, subpath_item.inverted())
         prev_edge = prev_edge_subpath
     elif isinstance(item.line, gfapy.line.unknown.Unknown):
         raise gfapy.RuntimeError(
             "Captured path cannot be computed; a reference has not been resolved\n"
             + "Line: {}\n".format(self) +
             "Unresolved reference: {} (Virtual unknown line)".format(
                 item.name))
     else:
         raise gfapy.TypeError("Line: {}\t".format(self) +
                               "Cannot compute captured path:\t" +
                               "Error: items of type {} are not supported\t"
                               .format(item.line.__class__.__name__) +
                               "Unsupported item: {}".format(item))
     return path, prev_edge
Ejemplo n.º 3
0
 def induced_segments_set(self):
     if not self.is_connected():
         raise gfapy.RuntimeError(
             "Induced set cannot be computed\n" +
             "Line is not connected to a GFA instance\n" +
             "Line: {}".format(self))
     segments_set = list()
     for item in self.items:
         if isinstance(item, str):
             raise gfapy.RuntimeError(
                 "Induced set cannot be computed; a reference has not been resolved\n"
                 + "Line: {}\n".format(self) +
                 "Unresolved reference: {} (String found)".format(item.line)
             )
         elif isinstance(item, gfapy.line.segment.GFA2):
             self._check_induced_set_elem_connected(item)
             segments_set.append(item)
         elif isinstance(item, gfapy.line.edge.GFA2):
             self._check_induced_set_elem_connected(item)
             for sl in [item.sid1.line, item.sid2.line]:
                 self._check_induced_set_elem_connected(sl)
                 segments_set.append(sl)
         elif isinstance(item, gfapy.line.group.Ordered):
             self._check_induced_set_elem_connected(item)
             subset = item.captured_segments
             assert (subset)
             for elem in subset:
                 segments_set.append(elem.line)
         elif isinstance(item, gfapy.line.group.Unordered):
             self._check_induced_set_elem_connected(item)
             subset = item.induced_segments_set
             assert (subset)
             for elem in subset:
                 segments_set.append(elem)
         elif isinstance(item, gfapy.line.Unknown):
             raise gfapy.RuntimeError(
                 "Induced set cannot be computed; a reference has not been resolved\n"
                 + "Line: {}\n".format(self) +
                 "Unresolved reference: {} (Virtual unknown line)".format(
                     item.name))
         else:
             raise gfapy.TypeError(
                 "Line: {}\t".format(self) +
                 "Cannot compute induced set:\t" +
                 "Error: items of type {} are not supported\t".format(
                     item.__class__.__name__) +
                 "Unsupported item: {}".format(item))
     unique_ids = set()
     return [e for e in segments_set \
         if id(e) not in unique_ids and not unique_ids.add(id(e))]
Ejemplo n.º 4
0
 def induced_edges_set(self):
     if not self.is_connected():
         raise gfapy.RuntimeError(
             "Induced set cannot be computed\n" +
             "Line is not connected to a GFA instance\n" +
             "Line: {}".format(self))
     return self._compute_induced_edges_set(self.induced_segments_set)
Ejemplo n.º 5
0
  def connect(self, gfa):
    """
    Connect the line to a GFA instance

    Parameters
    ----------
    gfa : GFA
      the GFA instance

    Returns
    -------
    None
    """
    if self.is_connected():
      raise gfapy.RuntimeError(
        "Line {} is already connected to a GFA instance".format(self))
    previous = gfa._search_duplicate(self)
    if previous:
      if previous.virtual:
        return self._substitute_virtual_line(previous)
      else:
        return self._process_not_unique(previous)
    else:
      self._gfa = gfa
      self._initialize_references()
      self._gfa._register_line(self)
      return None
Ejemplo n.º 6
0
 def line(self, line):
     if self.__editable:
         self.__line = line
     else:
         raise gfapy.RuntimeError(
             "gfapy.OrientedLine instance cannot be edited ({})".format(
                 self))
Ejemplo n.º 7
0
 def to_version(self, version, raise_on_failure=True):
     """
 Returns
 -------
 gfapy.Line
 	Conversion to the selected version.
 """
     if version == self._version:
         return self
     elif version not in gfapy.VERSIONS:
         raise gfapy.VersionError("Version unknown ({})".format(version))
     else:
         l = getattr(self, "_to_" + version + "_a")()
         if l:
             try:
                 converted = gfapy.Line(l,
                                        version=version,
                                        vlevel=self.vlevel)
             except:
                 raise gfapy.RuntimeError(
                     "Conversion to {} failed\n".format(version) +
                     "Line: {}".format(str(self)))
             return converted
         elif raise_on_failure:
             raise gfapy.VersionError(
                 "Records of type {} ".format(self.record_type) +
                 "cannot be converted from version {} ".format(
                     self._version) + "to version {}".format(version))
         else:
             return None
Ejemplo n.º 8
0
 def _to_gfa2_a(self):
     """
 Returns
 -------
 list of str
 	A list of GFA2 field strings.
 """
     try:
         length = self.try_get_length()
     except gfapy.NotFoundError:
         raise gfapy.RuntimeError(
             "Conversion of GFA1 segment line to GFA2 failed\n" +
             "GFA2 requires to specify a length\n" +
             "No length information available in the GFA1 segment:\n" +
             "Segment line: {}".format(str(self)))
     a = [
         "S",
         self.field_to_s("name", tag=False),
         str(self.try_get_length()),
         self.field_to_s("sequence", tag=False)
     ]
     for fn in self.tagnames:
         if fn != "LN":
             a.append(self.field_to_s(fn, tag=True))
     return a
Ejemplo n.º 9
0
 def captured_path(self):
     if not self.is_connected():
         raise gfapy.RuntimeError(
             "Captured path cannot be computed\n" +
             "Line is not connected to a GFA instance\n" +
             "Line: {}".format(self))
     return self._compute_captured_path()[0]
Ejemplo n.º 10
0
 def orient(self, orient):
     if self.__editable:
         self.__orient = orient
     else:
         raise gfapy.RuntimeError(
             "gfapy.OrientedLine instance cannot be edited ({})".format(
                 self))
Ejemplo n.º 11
0
    def set_datatype(self, fieldname, datatype):
        """
    Set the datatype of a tag.

    If an existing tag datatype is changed, its content may become
    invalid (call **validate_field** if necessary).

    Parameters
    ----------
    fieldname : str
      The field name (it is not required that the field exists already)
    datatype : gfapy.Field.FIELD_DATATYPE
      The datatype.

    Raises
    ------
    gfapy.ArgumentError
      If **datatype** is not a valid datatype for tags.
    """
        if self._is_predefined_tag(fieldname):
            if self.get_datatype(fieldname) != datatype:
                raise gfapy.RuntimeError(
                    "Cannot set the datatype of {} to {}\n".format(
                        fieldname, datatype) +
                    "The datatype of a predefined tag cannot be changed")
        elif not self._is_valid_custom_tagname(fieldname) and self.vlevel > 0:
            raise gfapy.FormatError(
                "{} is not a valid custom tag name".format(fieldname))
        if datatype not in gfapy.Field.TAG_DATATYPE:
            raise gfapy.ArgumentError("Unknown datatype: {}".format(datatype))
        self._datatype[fieldname] = datatype
Ejemplo n.º 12
0
 def _set_existing_field(self, fieldname, value, set_reference=False):
     renaming_connected = False
     if self._gfa:
         if not set_reference and \
           (fieldname in self.__class__.REFERENCE_FIELDS or \
            fieldname in self.__class__.BACKREFERENCE_RELATED_FIELDS):
             raise gfapy.RuntimeError(
                 "The value of field '{}' cannot be changed, ".format(
                     fieldname) + "as the line belongs to a GFA instance")
         if (fieldname == self.__class__.STORAGE_KEY) or \
           (self.__class__.STORAGE_KEY == "name" and \
           fieldname == self.__class__.NAME_FIELD):
             renaming_connected = True
             self._gfa._unregister_line(self)
     if value is None:
         if fieldname in self._data:
             self._data.pop(fieldname)
     else:
         if self.vlevel >= 3:
             self._field_or_default_datatype(fieldname, value)
             gfapy.Field._validate_gfa_field(
                 value, self._field_datatype(fieldname), fieldname)
         self._data[fieldname] = value
     if renaming_connected:
         self._gfa._register_line(self)
Ejemplo n.º 13
0
 def __init__(self, data, vlevel=1, virtual=False, version=None):
     self.vlevel = vlevel
     self._virtual = virtual
     self._datatype = {}
     self._data = {}
     self._gfa = None
     self._version = version
     self._refs = {}
     if self.__class__ == gfapy.Line:
         raise gfapy.AssertionError("Line subclass unknown")
     if isinstance(data, dict):
         # API private initialization using dict
         self._data.update(data)
     else:
         # public initialization using list (or tab-separated string)
         if self.__class__ == gfapy.line.Comment:
             data = gfapy.Line._init_comment_data(data)
         elif isinstance(data, str):
             data = data.split(gfapy.Line.SEPARATOR)
         if self.version is None:
             self._compute_version(data[0])
         else:
             self._validate_version()
         self._initialize_positional_fields(data)
         self._initialize_tags(data)
         if self.vlevel >= 1:
             self._validate_record_type_specific_info()
         if self.version is None:
             raise gfapy.RuntimeError(
                 "version could not be determined, " +
                 "record_type={}".format(self.record_type))
Ejemplo n.º 14
0
 def connect(self, gfa):
     if gfa.header is not self:
         raise gfapy.RuntimeError(
             "gfapy.line.Header instances cannot be connected\n" +
             "Use gfa.add_line(this_line) to add the information\n" +
             "contained in this header line to the header of a GFA instance."
         )
     else:
         self._gfa = gfa
Ejemplo n.º 15
0
 def diffscript(self, other, selfvar):
   outscript = []
   for diffitem in self.diff(other):
     if diffitem[0] == "incompatible":
       if diffitem[1] == "record_type":
         raise gfapy.RuntimeError(
           "Cannot compute conversion script: different record type\n"+
           "Line: {}\n".format(self)+
           "Other: {}\n".format(other)+
           "{0} != {1}",format(diffitem[2], diffitem[3]))
       elif diffitem[1] == "version":
         raise gfapy.RuntimeError(
           "Cannot compute conversion script: different GFA version\n"+
           "Line: {}\n".format(self)+
           "Other: {}\n".format(other)+
           "{0} != {1}",format(diffitem[2], diffitem[3]))
     elif diffitem[0] == "different":
       if diffitem[1] == "positional_field":
         outscript.append("{0}.set('{1}', '{2}')".format(selfvar,
                                     diffitem[2].replace("'","\\'"),
                                     diffitem[4].replace("'","\\'")))
       elif diffitem[1] == "tag":
         if diffitem[3] != diffitem[5]:
           outscript.append("{0}.set_datatype('{1}', '{2}')".format(selfvar,
                                     diffitem[2].replace("'","\\'"),
                                     diffitem[5].replace("'","\\'")))
         if diffitem[4] != diffitem[6]:
           outscript.append("{0}.set('{1}', '{2}')".format(selfvar,
                                     diffitem[2].replace("'","\\'"),
                                     diffitem[6].replace("'","\\'")))
     elif diffitem[0] == "exclusive":
       if diffitem[1] == ">":
         if diffitem[2] == "tag":
           outscript.append("{0}.set_datatype('{1}', '{2}')".format(selfvar,
                                     diffitem[3].replace("'","\\'"),
                                     diffitem[4].replace("'","\\'")))
           outscript.append("{0}.set('{1}', '{2}')".format(selfvar,
                                     diffitem[3].replace("'","\\'"),
                                     diffitem[5].replace("'","\\'")))
       elif diffitem[1] == "<":
         if diffitem[2] == "tag":
           outscript.append("{0}.delete('{1}')".format(selfvar,
                                     diffitem[3].replace("'","\\'")))
   return "\n".join(outscript)
Ejemplo n.º 16
0
 def _api_private_check_gfa_line(self, gfa_line, callermeth):
   if not isinstance(gfa_line, gfapy.Line):
     raise gfapy.TypeError("Note: {} is API private, ".format(callermeth)+
         "do not call it directly\n"+
         "Error: line class is {} and not gfapy.Line")
   elif not gfa_line._gfa is self:
     raise gfapy.RuntimeError("Note: {} is API private, ".format(callermeth)+
         "do not call it directly\n"+
         "Error: line.gfa is not the expected instance of gfapy.Gfa\n"+
         repr(gfa_line.gfa)+" != "+repr(self))
Ejemplo n.º 17
0
    def set(self, fieldname, value):
        """Set the value of a field.

    The generic Line.set() method is overwritten for comments,
    in order to disallow tags.
    """
        if fieldname in ["content", "spacer"]:
            return super().set(fieldname, value)
        else:
            raise gfapy.RuntimeError("Tags of comment lines cannot be set")
Ejemplo n.º 18
0
 def _set_existing_field(self, fieldname, value, set_reference=False):
     if fieldname == "VN" and self.get(
             "VN") is not None and self.is_connected():
         raise gfapy.RuntimeError(
             "The value of the header tag VN cannot be edited\n" +
             "For version conversion use to_gfa1 or to_gfa2")
     else:
         super()._set_existing_field(fieldname,
                                     value,
                                     set_reference=set_reference)
Ejemplo n.º 19
0
    def randomly_orient_invertible(self, segment):
        '''Selects a random orientation for an invertible segment.

    For the definition of invertible segment, see Gonnella and Kurtz (2016).'''
        if isinstance(segment, gfapy.Line):
            segment_name = segment.name
        else:
            segment_name = segment
        if not self._segment_same_links_both_ends(segment_name):
            raise gfapy.RuntimeError(
                "Only segments with links to the same or " +
                "equivalent segments at both ends can be randomly oriented")
        self._randomly_orient_proven_invertible_segment(segment_name)
Ejemplo n.º 20
0
    def set(self, fieldname, value):
        """Set the value of a field.

    If a datatype for a new custom tag is not set,
    the default for the value assigned to the field will be used
    (e.g. J for Hashes, i for Integer, etc).

    Parameters
    ----------
    fieldname : str
      The name of the field to set.
      (positional field, predefined tag (uppercase) or custom tag (lowercase))

    Raises
    ------
    gfapy.FormatError
      If **fieldname** is not a valid predefined or
      custom tag name (and **validate["tags"]**).

    Returns
    -------
    object
      **value**
    """
        if fieldname in self._data or self._is_predefined_tag(fieldname):
            return self._set_existing_field(fieldname, value)
        elif fieldname in self.__class__.FIELD_ALIAS:
            return self.set(self.__class__.FIELD_ALIAS[fieldname], value)
        elif self.virtual:
            raise gfapy.RuntimeError("Virtual lines do not have tags")
        elif (self.vlevel == 0) or self._is_valid_custom_tagname(fieldname):
            self._define_field_methods(fieldname)
            if self._datatype.get(fieldname, None) is not None:
                return self._set_existing_field(fieldname, value)
            elif value is not None:
                self._datatype[
                    fieldname] = gfapy.Field._get_default_gfa_tag_datatype(
                        value)
                self._data[fieldname] = value
                return self._data[fieldname]
        else:
            raise gfapy.FormatError(
                "{} is not a positional field,".format(fieldname) +
                "an existing tag, an alias, a predefined tag or a valid custom tag\n"
                + "positional fields: {}\n".format(", ".join(
                    self.positional_fieldnames)) +
                "existing tags: {}\n".format(", ".join(self.tagnames)) +
                "aliases: {}\n".format(", ".join(
                    self.__class__.FIELD_ALIAS.keys())) +
                "predefined tags: {}\n".format(", ".join(
                    self.__class__.PREDEFINED_TAGS)))
Ejemplo n.º 21
0
 def register_extension(cls, references=[]):
     # check the definitions
     if isinstance(cls.POSFIELDS, OrderedDict):
         for fieldname, datatype in cls.POSFIELDS.items():
             cls.DATATYPE[fieldname] = datatype
         cls.POSFIELDS = list(cls.POSFIELDS.keys())
     else:
         for posfield in cls.POSFIELDS:
             if posfield not in cls.DATATYPE:
                 raise gfapy.RuntimeError(
                     "Extension {} ".format(str(cls)) +
                     "defines no datatype for the positional field {}".
                     format(posfield))
     if hasattr(cls, "TAGS_DATATYPE"):
         for fieldname, datatype in cls.TAGS_DATATYPE.items():
             cls.DATATYPE[fieldname] = datatype
     if not cls.RECORD_TYPE:
         raise gfapy.RuntimeError(
             "Extension {} ".format(str(cls)) +
             "does not define the RECORD_TYPE constant")
     if cls.NAME_FIELD is not None:
         gfapy.lines.finders.Finders.RECORDS_WITH_NAME.append(
             cls.RECORD_TYPE)
     for field, klass, refkey in references:
         if field not in cls.REFERENCE_FIELDS:
             if not cls.REFERENCE_FIELDS:
                 cls.REFERENCE_FIELDS = []
             cls.REFERENCE_FIELDS.append(field)
         if refkey not in klass.DEPENDENT_LINES:
             klass.DEPENDENT_LINES.append(refkey)
             klass._define_reference_getters()
         if cls.REFERENCE_INITIALIZERS is None:
             cls.REFERENCE_INITIALIZERS = []
         cls.REFERENCE_INITIALIZERS.append((field, klass, refkey))
     cls._apply_definitions()
     gfapy.Line.EXTENSIONS[cls.RECORD_TYPE] = cls
     gfapy.Line.RECORD_TYPE_VERSIONS["specific"]["gfa2"].append(
         cls.RECORD_TYPE)
Ejemplo n.º 22
0
 def captured_path(self):
     if not self.is_connected():
         raise gfapy.RuntimeError(
             "Captured path cannot be computed\n" +
             "Line is not connected to a GFA instance\n" +
             "Line: {}".format(self))
     retval = []
     for i in range(len(self.segment_names) - 1):
         retval.append(self.segment_names[i])
         retval.append(self.links[i])
     retval.append(self.segment_names[-1])
     if len(self.segment_names) == len(self.links):
         retval.append(self.links[-1])
         retval.append(self.segment_names[0])
     return retval
Ejemplo n.º 23
0
 def _compute_version(self, rt):
     if rt in Construction.RECORD_TYPE_VERSIONS["generic"]:
         self._version = "generic"
     elif rt in Construction.RECORD_TYPE_VERSIONS["different"]:
         if hasattr(self.__class__, "VERSION"):
             self._version = self.__class__.VERSION
         else:
             raise gfapy.RuntimeError(
                 "GFA version not specified\n" +
                 "Records of type {} ".format(rt) +
                 "have different syntax according to the version")
     else:
         for k, v in Construction.RECORD_TYPE_VERSIONS["specific"].items():
             if rt in v:
                 self._version = k
                 break
     if not self._version:
         self._version = "gfa2"
Ejemplo n.º 24
0
    def disconnect(self):
        """
    Remove the line from the GFA instance it belongs to, if any.

    The Line instance itself will still exist, but all references from it to
    other lines are deleted, as well as references to it from other lines.
    Mandatory references are turned into their non-reference representations
    (e.g. segments references in the sid fields of E lines
    or in the from/to lines of L/C lines are changed into symbols).
    """
        if not self.is_connected():
            raise gfapy.RuntimeError(
                "Line {} is not connected to a GFA instance".format(self))
        self._remove_field_backreferences()
        self._remove_field_references()
        self._disconnect_dependent_lines()
        self._remove_nonfield_backreferences()
        self._remove_nonfield_references()
        self._gfa._unregister_line(self)
        self._gfa = None
Ejemplo n.º 25
0
 def _check_captured_path_elem_connected(self, item):
     if not item.is_connected():
         raise gfapy.RuntimeError("Cannot compute induced set\n" +
                                  "Non-connected element found\n" +
                                  "Item: {}\nLine: {}".format(item, self))
Ejemplo n.º 26
0
 def _check_ref_not_self(self, item):
   if (item.line == self):
     raise gfapy.RuntimeError(
       "Line: {}\n".format(self)+
       "Item is the line itself\n"+
       "A group is not allowed to refer to itself")