def other_oriented_segment(self, oriented_segment, tolerant=False): """ Parameters ---------- oriented_segment : gfapy.OrientedLine One of the two oriented segments of the line. Returns ------- gfapy.OrientedLine The other oriented segment. Raises ------ gfapy.NotFoundError If segment_end is not a segment end of the line. """ if (self.oriented_from == oriented_segment): return self.oriented_to elif (self.oriented_to == oriented_segment): return self.oriented_from elif tolerant: return None else: raise gfapy.NotFoundError("Oriented segment '{}' not found\n". format(repr(oriented_segment)) + "Line: {}".format(self))
def try_get_segment(self, s): """Call segment() and raise an exception is the segment is not found.""" seg = self.segment(s) if seg is None: raise gfapy.NotFoundError("No segment has name {}".format(s)) else: return seg
def __validate_segment_references(self): for s in self.segments: if s.virtual: raise gfapy.NotFoundError( "Segment {} ".format(s.name) + "does not exist\nReferences to {} ".format(s.name) + "were found in the following lines:\n" + s.refstr())
def _find_edge_from_path_to_segment(self, path, oriented_segment): edges = [] for edge in oriented_segment.line.edges: if (edge.sid1 == oriented_segment and edge.sid2 == path[-1]) or \ (edge.sid1 == path[-1] and edge.sid2 == oriented_segment): edges.append(gfapy.OrientedLine(edge, "+")) elif (edge.sid1 == oriented_segment.inverted() and edge.sid2 == path[-1].inverted()) or\ (edge.sid1 == path[-1].inverted() and edge.sid2 == oriented_segment.inverted()): edges.append(gfapy.OrientedLine(edge, "-")) if len(edges) == 0: raise gfapy.NotFoundError( "Path is not valid, segments are not contiguous\n" + "Line: {}\n".format(self) + "Previous elements:\n" + "".join([" {} ({})\n".format(e, e.line) for e in path]) + "Current element:\n" + " {} ({})\n".format(oriented_segment, oriented_segment.line)) elif len(edges) > 1: raise gfapy.NotUniqueError( "Path is not unique\n" + "Line: {}\n".format(self) + "Previous elements:\n" + "".join([" {} ({})\n".format(e, e.line) for e in path]) + "Current element:\n" + " {} ({})\n".format(oriented_segment, oriented_segment.line) + "Possible edges\n" + "".join([" {} ({})\n".format(e, e.line) for e in edges])) return edges[0]
def other(self, segment, tolerant=False): """ The other segment of a connection line. Parameters ---------- segment : gfapy.line.segment.GFA1 or str Segment name or instance. Raises ------ gfapy.NotFoundError If segment is not involved in the connection. Returns ------- str The name or instance of the other segment of the connection. If circular, then **segment**. """ segment_name = str(segment) if segment_name == str(self.from_segment): return self.to elif segment_name == str(self.to_segment): return self.from_segment elif tolerant: return None else: raise gfapy.NotFoundError( "Line {} does not involve segment {}".format( self, segment_name))
def _initialize_links(self): self._refs["links"] = [] for from_segment, to_segment, cigar in self._compute_required_links(): l = None orient = "+" if self._gfa.segment(from_segment.line) and self._gfa.segment( to_segment.line): l = self._gfa._search_link(from_segment, to_segment, cigar) if l is not None and l.is_compatible_complement( from_segment, to_segment, cigar): orient = "-" if l is None: if self._gfa._segments_first_order: raise gfapy.NotFoundError( "Path: {}\n".format(self) + "requires a non-existing link:\n" + "from={} to={} cigar={}".format( from_segment, to_segment, cigar)) l = gfapy.line.edge.Link( { "from_segment": from_segment.line, "from_orient": from_segment.orient, "to_segment": to_segment.line, "to_orient": to_segment.orient, "overlap": cigar }, virtual=True, version="gfa1") l.connect(self._gfa) self._refs["links"].append(gfapy.OrientedLine(l, orient)) l._add_reference(self, "paths")
def __validate_path_links(self): for pt in self._gfa1_paths: for ol in pt.links: l = ol.line if l.virtual: raise gfapy.NotFoundError("A link equivalent to:\n{}\n".format(\ l.to_str(add_virtual_commentary=False))+ "does not exist, but is required by the following paths:\n"+ l.refstr())
def try_get_line(self, l): """Call line() and raise an exception is the line is not found.""" gfa_line = self.line(l) if gfa_line is None: if gfapy.is_placeholder(l): raise gfapy.ValueError( "'*' is a placeholder and not a valid name for a line") else: raise gfapy.NotFoundError("No line found with ID {}".format(l)) return gfa_line
def _line_for_ref_symbol(self, ref): line = self._gfa.line(ref) if line is None: if self._gfa._segments_first_order: raise gfapy.NotFoundError("Group: {}\n".format(self)+ "requires a non-existing ref with ID {}".format(ref)) line = gfapy.line.unknown.Unknown({"name" : ref}, virtual = True, version = "gfa2") self._gfa.add_line(line) line._add_reference(self, "paths" if (self.record_type == "O") else "sets") return line
def try_get_coverage(self, count_tag="RC", unit_length=1): """ As coverage, but raises an exception if the coverage cannot be computed. """ c = self.coverage(count_tag=count_tag, unit_length=unit_length) if c is None: self.try_get_length() raise gfapy.NotFoundError("Tag {} undefined for segment {}".format( count_tag, self.name)) else: return c
def __validate_group_items(self): if self.version == "gfa1": return for group in self.sets + self.paths: for item in group.items: if isinstance(item, gfapy.OrientedLine): item = item.line if item.virtual: raise gfapy.NotFoundError("A line with identifier {}\n".format(\ item.name)+ "does not exist, but is required by the following groups:\n"+ item.refstr())
def try_get_length(self): """ Raises ------ gfapy.NotFoundError If not an LN tag and the sequence is "*". See Also -------- __len__ """ l = self.length if l is None: raise gfapy.NotFoundError("No length information available") return l
def _push_nonfirst_edge_on_se_path(self, path, oriented_edge): prev_os = path[-1] path.append(oriented_edge) possible_prev = [oriented_edge.line.sid1, oriented_edge.line.sid2] if oriented_edge.orient == "-": for i, v in enumerate(possible_prev): possible_prev[i].invert() if prev_os == possible_prev[0]: path.append(possible_prev[1]) elif prev_os == possible_prev[1]: path.append(possible_prev[0]) else: raise gfapy.NotFoundError( "Path is not valid, elements are not contiguous\n" + "Line: {}\n".format(self) + "Previous elements:\n" + "".join([" {} ({})\n".format(e, e.line) for e in path]) + "Current element:\n" + " {} ({})".format(oriented_edge, oriented_edge.line))
def _initialize_references(self): for d in ["from", "to"]: s = self._gfa.segment(self.get(d)) if s is None: if self._gfa._segments_first_order: raise gfapy.NotFoundError() s = gfapy.line.segment.GFA1({"name" : self.get(d), "sequence" : "*"}, version = "gfa1", virtual = True) s.connect(self._gfa) self._set_existing_field(d+"_segment", s, set_reference = True) if self.record_type == "L": et = self.from_end.end_type if d == "from" else self.to_end.end_type key = "dovetails_{}".format(et) else: key = \ "edges_to_contained" if (d == "from") else "edges_to_containers" s._add_reference(self, key)
def _get_dynamic_field(self, name, err): if self.virtual: raise err if name.startswith("try_get_"): name = name[8:] try_get = True else: try_get = False if name in self._data: return (lambda : self.try_get(name)) if try_get else self.get(name) if (name in self.__class__.PREDEFINED_TAGS or self._is_valid_custom_tagname(name)): if not try_get: return None else: raise gfapy.NotFoundError( "No value defined for tag {}".format(name)) else: raise err
def other_oriented_segment(self, oriented_segment): """The other oriented segment. Parameters: oriented_segment (gfapy.OrientedLine) : One of the two oriented segments of the line. Returns: gfapy.OrientedLine Raises: gfapy.error.NotFoundError: If segment_end is not a segment end of the line. """ if (self.sid1 == oriented_segment): return self.sid2 elif (self.sid2 == oriented_segment): return self.sid1 else: raise gfapy.NotFoundError( "Oriented segment '{}' not found\n".format(oriented_segment) + "Line: {}".format(self))
def other(self, segment): """The other segment of an edge line. Parameters: segment (gfapy.line.segment.GFA2) : Segment name or instance. Raises: gfapy.error.NotFoundError: If segment is not a segment of the line. Returns: gfapy.line.segment.GFA2 or str : instance or name of the other segment of the connection (the segment itself, if the connection is circular) """ if isinstance(segment, gfapy.Line): segment = segment.name if segment == self.sid1.name: return self.sid2.line elif segment == self.sid2.name: return self.sid1.line else: raise gfapy.NotFoundError( "Line {} does not involve segment {}".format(self, segment))
def _initialize_references(self): for snum in [1, 2]: sid = "sid{}".format(snum) orient = self.get(sid).orient linesymbol = self.get(sid).line s = self._gfa.segment(linesymbol) if s is None: if self._gfa._segments_first_order: raise gfapy.NotFoundError() s = gfapy.line.segment.GFA2( { "sid": linesymbol, "slen": 1, "sequence": "*" }, version="gfa2", virtual=True) s.connect(self._gfa) self._set_existing_field(sid, gfapy.OrientedLine(s, orient), set_reference=True) s._add_reference(self, self._refkey_for_s(snum))
def _validate_rgfa_tags_in_lines(self, lines): """ Validate rGFA tags for a group of lines """ for line in lines: rt = line.record_type tags_check_presence = gfapy.Gfa.RGFA_TAGS["mandatory"].get(rt, {}) tags_check_datatype = tags_check_presence.copy() tags_check_datatype.update(gfapy.Gfa.RGFA_TAGS["optional"].get( rt, {})) for tag, datatype in tags_check_presence.items(): if tag not in line.tagnames: raise gfapy.NotFoundError( "rGFA {} lines must have a {} tag\n".format(rt, tag) + "offending line:\n{}".format(str(line))) for tag, datatype in tags_check_datatype.items(): if tag in line.tagnames: if line.get_datatype(tag) != datatype: raise gfapy.ValueError( "rGFA {} tags in {} lines must have datatype {}\n". format(tag, rt, datatype) + "offending line:\n{}".format(str(line)))
def field_to_s(self, fieldname, tag=False): """ Compute the string representation of a field. Parameters ---------- fieldname : str The tag name of the field. tag : bool *(defaults to: ***False***)* Return the tagname:datatype:value representation. Raises ------ gfapy.NotFoundError If field is not defined. Returns ------- str The string representation """ fieldname = self.__class__.FIELD_ALIAS.get(fieldname, fieldname) v = self._data.get(fieldname, None) if v is None: raise gfapy.NotFoundError("Field {} not found".format(fieldname)) t = self._field_or_default_datatype(fieldname, v) if not isinstance(v, str): v = gfapy.Field._to_gfa_field(v, datatype=t, fieldname=fieldname, line=self) if self.vlevel >= 2: gfapy.Field._validate_gfa_field(v, t, fieldname) if tag: return gfapy.Field._to_gfa_tag(v, fieldname, datatype=t, line=self) else: return v
def _initialize_references(self): st1 = self._substring_type(self.beg1, self.end1)[0] st2 = self._substring_type(self.beg2, self.end2)[0] for snum in [1, 2]: sid = "sid{}".format(snum) orient = self.get(sid).orient s = self._gfa.segment(self.get(sid).line) if s is None: if self._gfa._segments_first_order: raise gfapy.NotFoundError() s = gfapy.line.segment.GFA2( { "sid": self.get(sid).line, "slen": 1, "sequence": "*" }, version="gfa2", virtual=True) s.connect(self._gfa) self._set_existing_field(sid, gfapy.OrientedLine(s, orient), set_reference=True) s._add_reference(self, self._refkey_for_s(snum, st1, st2))
def try_get(self, fieldname): """ Value of a field, raising an exception if it is not defined. Parameters ---------- fieldname : str Name of the field. Raises ------ gfapy.NotFoundError If field is not defined. Returns ------- object or None Value of the field. """ v = self.get(fieldname) if v is None: raise gfapy.NotFoundError( "No value defined for tag {}".format(fieldname)) return v
def _check_item_included(self, item): if item not in self.items: raise gfapy.NotFoundError( "Line: {}\n".format(self)+ "Item: {}".format(repr(item))+ "Items of the line do not include the item")