def validate(self, version="gfa1"): """Validates the CIGAR operation. Parameters: version (str): 'gfa1' or 'gfa2' Raises: ~gfapy.error.VersionError: If a wrong **version** is specified. ~gfapy.error.TypeError: If the CIGAR operation length is not an integer or a string representing an integer. ~gfapy.error.ValueError: If the length of an operation is < 0; If an operation code is invalid in general or for the specified GFA version. """ if version != "gfa1" and version != "gfa2": raise gfapy.VersionError("Version error: {}".format( repr(version))) if not isinstance(self.length, int) and not isinstance( self.length, str): raise gfapy.TypeError( "Type error: length of CIGAR is {}".format(self.length)) if (int(self.length) < 0): raise gfapy.ValueError("Length of CIGAR is {}".format( self.length)) if version == "gfa2": if not self.code in Operation.CODE_GFA1_GFA2: raise gfapy.ValueError() else: if not self.code in Operation.CODE: raise gfapy.ValueError()
def validate(self, ts = None, version = "gfa2"): """Validates the trace alignment Parameters: ts (int): Trace Spacing. If specified, it will be checked that all values are < **ts** (default: **None**, no check). version (str) : GFA version (must be 'gfa1' or 'gfa2') Raises: ~gfapy.error.TypeError: If the list contains non-integer values ~gfapy.error.ValueError: If the list contains values < 0 or > **ts** ~gfapy.error.VersionError: If the version is 'gfa1' or an invalid version string is provided """ if version != "gfa2": if version == "gfa1": raise gfapy.VersionError("Traces are not compatible with GFA1") else: raise gfapy.VersionError("Version unknown: {}".format(repr(version))) for e in self: if not isinstance(e, int): raise gfapy.TypeError( ("Trace contains non-integer values ({0} found)\n" + "Content: {1}") .format(e, repr(self))) if e < 0: raise gfapy.ValueError( ("Trace contains value < 0 ({0} found)\n" + "Content: {1}") .format(e, repr(self))) if ts and e > ts: raise gfapy.ValueError( ("Trace contains value > TS ({0} found, TS = {2})\n" + "Content: {1}") .format(e, repr(self), ts))
def integer_type(range): """ Computes the subtype for integers in a given range. If all elements are non-negative, an unsigned subtype is selected, otherwise a signed subtype. Parameters ---------- range : (int, int) The integer range (min, max) Raises ------ gfapy.ValueError If the integer range is outside all subtype ranges Returns ------- one of gfapy.NumericArray.INT_SUBTYPE subtype code """ if range[0] < 0: for st in NumericArray.SIGNED_INT_SUBTYPE: st_range = NumericArray.SUBTYPE_RANGE[st] if st_range[0] <= range[0] and st_range[1] > range[1]: return st else: for st in NumericArray.UNSIGNED_INT_SUBTYPE: st_range = NumericArray.SUBTYPE_RANGE[st] if st_range[1] > range[1]: return st raise gfapy.ValueError( "NumericArray: values are outside of all integer subtype ranges\n" + "Range: {}".format(repr(range)))
def _substring_type(self, begpos, endpos): """Type of substring (pfx, sfx, whole, internal) given start and end pos. Analyzes the begin and end position and determine if the substring is the whole string, or a (possibly empty) other substring, ie a prefix, a suffix, or an internal alignment. """ if gfapy.posvalue(begpos) > gfapy.posvalue(endpos): raise gfapy.ValueError( "Line: {}\n".format(str(self))+ "begin > end: {}$ > {}".format(gfapy.posvalue(begpos), gfapy.posvalue(endpos))) if gfapy.isfirstpos(begpos): if gfapy.isfirstpos(endpos): return ("pfx", True) elif gfapy.islastpos(endpos): return ("whole", False) else: return ("pfx", False) elif gfapy.islastpos(begpos): if not gfapy.islastpos(endpos): raise gfapy.FormatError( "Line: {}\n".format(str(self))+ "Wrong use of $ marker\n"+ "{} >= {}$".format(gfapy.posvalue(endpos), gfapy.posvalue(begpos))) return ("sfx", True) else: if gfapy.islastpos(endpos): return ("sfx", False) else: return ("internal", gfapy.posvalue(begpos) == gfapy.posvalue(endpos))
def compute_subtype(self): """ Computes the subtype of the array from its content. If all elements are float, then the computed subtype is "f". If all elements are integer, the smallest possible numeric subtype is computed; thereby, if all elements are non-negative, an unsigned subtype is selected, otherwise a signed subtype. In all other cases an exception is raised. Raises ------ gfapy.ValueError If the array is not a valid numeric array Returns ------- one of gfapy.NumericArray.SUBTYPE """ if all([isinstance(f, float) for f in self]): return "f" else: e_max = None e_min = None for e in self: if not isinstance(e, int): raise gfapy.ValueError( "NumericArray does not contain homogenous numeric values\n" + "Content: {}".format(repr(self))) if (e_max is None or e > e_max): e_max = e if (e_min is None or e < e_min): e_min = e return gfapy.NumericArray.integer_type((e_min, e_max))
def gen(): for e in elems[1:]: if subtype != "f": try: e = int(e) except: raise gfapy.ValueError( "Value is not valid: {}\n".format(e) + "Numeric array string: {}".format(string)) if not valid and not (e >= range[0] and e < range[1]): raise gfapy.ValueError( ("NumericArray: " + "value is outside of subtype {0} range\n" + "Value: {1}\n" + "Range: {2}\n" + "Content: {3}").format(subtype, e, repr(range), repr(elems))) yield e else: yield float(e)
def try_get_line(self, l): """Call line() and raise an exception is the line is not found.""" gfa_line = self.line(l) if gfa_line is None: if gfapy.is_placeholder(l): raise gfapy.ValueError( "'*' is a placeholder and not a valid name for a line") else: raise gfapy.NotFoundError("No line found with ID {}".format(l)) return gfa_line
def validate_decoded(obj): if isinstance(obj, int): if obj < 0: raise gfapy.ValueError("{} is not a positive integer".format(obj)) elif isinstance(obj, gfapy.LastPos): obj.validate() else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: int, gfapy.LastPos)")
def pos(self): """Value of the GFA1 **pos** field, if the edge is a containment. Returns: int or gfapy.Lastpos Raises: gfapy.error.ValueError: If the edge is not a containment. """ if self._alignment_type == "I": raise gfapy.ValueError("Line: {}\n".format(str(self)) + "Internal alignment, pos is not defined") elif self._alignment_type == "L": raise gfapy.ValueError("Line: {}\n".format(str(self)) + "Dovetail alignment, pos is not defined") elif self._alignment_type == "C": if gfapy.isfirstpos(self.beg1): return self.beg1 if (gfapy.isfirstpos(self.beg2) and gfapy.islastpos(self.end2)) else self.beg2 else: return self.beg1
def _from_string(cls, string, valid=False): if string[-1] == "$": return cls(int(string[:-1]), valid=valid) else: try: v = int(string) except: raise gfapy.FormatError( "LastPos value has a wrong format: {}".format(string)) if not valid: if v < 0: raise gfapy.ValueError("LastPos value shall be >= 0," + " {} found".format(v)) return v
def _is_sid1_from(self): sr1 = self._segment_role(self.beg1, self.end1, self.sid1.orient) sr2 = self._segment_role(self.beg2, self.end2, self.sid2.orient) if sr2 == "contained": return True elif sr1 == "contained": return False elif sr1 == "sfx" and sr2 == "pfx": return True elif sr2 == "sfx" and sr1 == "pfx": return False else: raise gfapy.ValueError( "Line: {}\n".format(str(self)) + "Internal overlap, 'from' is undefined\n" + "Roles: segment1 is {} ({},{}), segment2 is {} ({},{})".format( sr1, self.beg1, self.end1, sr2, self.beg2, self.end2))
def validate(self): """Checks that the value is a positive integer. Validation is performed by default on construction, unless the valid parameter of the constructor is set to True. Raises: gfapy.error.TypeError : if the value is not an integer gfapy.error.ValueError : if the value is not >= 0 """ if not isinstance(self.value, int): raise gfapy.TypeError("LastPos value shall be an integer," + " {} found".format(type(self.value))) elif self.value < 0: raise gfapy.ValueError("LastPos value shall be >= 0," + " {} found".format(self.value))
def _to_gfa1_a(self): a = ["P"] if gfapy.is_placeholder(self.name): raise gfapy.ValueError("Conversion to GFA1 failed\n" + "The path name is a placeholder\t" + "Line: {}".format(self)) a.append(self.name) segment_names = [] for oline in self.captured_segments: gfapy.Field._validate_gfa_field(oline.name, "segment_name_gfa1") segment_names.append(str(oline)) a.append(",".join(segment_names)) overlaps = [] for oline in self.captured_edges: gfapy.Field._validate_gfa_field(oline.line.overlap, "alignment_gfa1") overlaps.append(str(oline.line.overlap)) a.append(",".join(overlaps)) return a
def _validate_rgfa_tags_in_lines(self, lines): """ Validate rGFA tags for a group of lines """ for line in lines: rt = line.record_type tags_check_presence = gfapy.Gfa.RGFA_TAGS["mandatory"].get(rt, {}) tags_check_datatype = tags_check_presence.copy() tags_check_datatype.update(gfapy.Gfa.RGFA_TAGS["optional"].get( rt, {})) for tag, datatype in tags_check_presence.items(): if tag not in line.tagnames: raise gfapy.NotFoundError( "rGFA {} lines must have a {} tag\n".format(rt, tag) + "offending line:\n{}".format(str(line))) for tag, datatype in tags_check_datatype.items(): if tag in line.tagnames: if line.get_datatype(tag) != datatype: raise gfapy.ValueError( "rGFA {} tags in {} lines must have datatype {}\n". format(tag, rt, datatype) + "offending line:\n{}".format(str(line)))
def invert(symbol): """Computes the inverted orientation or end_type symbol. Parameters: symbol (str) : a one-character string, symbolizing an orientation (+ or -) or an end-type (L or R) Returns: str : the other one character string of the same category (e.g. - for +) Raises: gfapy.error.ValueError : if a string other than the mentioned ones is used """ if symbol == "+": return "-" elif symbol == "-": return "+" elif symbol == "L": return "R" elif symbol == "R": return "L" else: raise gfapy.ValueError("No inverse defined for {}".format(symbol))
def fun(c): wcc = WCC.get(c, c if valid else None) if not wcc: raise gfapy.ValueError( "{}: no Watson-Crick complement for {}".format(sequence, c)) return wcc
def create_merged_segment(gfa, segpath, merged_name=None, enable_tracking=False, cut_counts=False): merged = gfa.try_get_segment(segpath[0].segment).clone() merged_vlevel = merged.vlevel merged.vlevel = 0 total_cut = 0 a = segpath[0] first_reversed = (a.end_type == "L") last_reversed = None if merged_name == "short": merged_name = gfa.unused_name() gfa._add_segment_to_merged(merged, gfa.segment(a.segment), first_reversed, 0, True, enable_tracking=enable_tracking, merged_name=merged_name) #for i in range(len(segpath)-1): # b = gfapy.SegmentEnd(segpath[i+1]).inverted() for s in segpath[1:]: b = gfapy.SegmentEnd(s).inverted() ls = gfa.segment(a.segment).end_relations(a.end_type, b, "dovetails") if len(ls) != 1: msg = "A single link was expected between {}".format(a) + \ "and {}".format(b) + "{} were found".format(len(ls)) raise gfapy.ValueError(msg) l = ls[0] if not l.overlap: cut = 0 else: cut = min(l.overlap.length_on_query(), gfa.segment(b.segment).LN) #elif all(op.code in ["M","="] for op in l.overlap): # cut = sum([len(op) for op in l.overlap]) #else: # raise gfapy.ValueError( # "Merging is only allowed if all operations are M/=") total_cut += cut last_reversed = (b.end_type == "R") gfa._add_segment_to_merged(merged, gfa.segment(b.segment), last_reversed, cut, False, enable_tracking=enable_tracking, merged_name=merged_name) a = gfapy.SegmentEnd(b).inverted() merged.vlevel = merged_vlevel if isinstance(merged.name, list): merged.name = "_".join(merged.name) ortag = merged.get("or") if isinstance(ortag, list): merged.set("or", ",".join(ortag)) if not gfapy.is_placeholder(merged.sequence): merged.sequence = "".join(merged.sequence) if not merged.LN: merged.LN = len(merged.sequence) elif gfa._vlevel > 0 and merged.LN != len(merged.sequence): raise gfapy.InconsistencyError( "Computed sequence length {} ".format(merged.sequence.length) + "and computed LN {} differ".format(merged.LN)) if merged.length is not None: for count_tag in ["KC", "RC", "FC"]: merged.set(count_tag, None) else: factor = 1 if cut_counts: factor = merged.length / (total_cut + merged.length) for count_tag, count in gfa.__sum_of_counts(segpath, factor).items(): merged.set(count_tag, count) return merged, first_reversed, last_reversed
def __validate_end_type(self): if not self.__end_type in ["L", "R"]: raise gfapy.ValueError("Invalid end type ({})".format( repr(self.__end_type)))
def __validate_orient(self): if not self.orient in ["+", "-"]: raise gfapy.ValueError("Invalid orientation ({})".format( self.orient))
def _validate_rgfa_no_containments(self): """Validate the absence of C lines in rGFA""" if self.containments: raise gfapy.ValueError("rGFA does not support containment lines")
def _validate_rgfa_no_paths(self): """Validate the absence of P lines in rGFA""" if self.paths: raise gfapy.ValueError("rGFA does not support path lines")
def _validate_rgfa_link_overlaps(self): for link in self.dovetails: if link.field_to_s("overlap") != "0M": raise gfapy.ValueError("rGFA CIGARs must be 0M\n", "offending line:\n{}".format(str(link)))
def _validate_rgfa_no_headers(self): """Validate the absence of H lines in rGFA""" if self.headers: raise gfapy.ValueError("rGFA does not support header lines")
def _initialize_tags(self, strings): if len(strings) > 3: raise gfapy.ValueError("Comment lines do not support tags")
def decode(string): position = unsafe_decode(string) value = gfapy.posvalue(position) if value < 0: raise gfapy.ValueError("{} is not a positive integer".format(value)) return position
def validate_decoded(integer): if integer < 0: raise gfapy.ValueError( "{} is not a positive integer".format(integer))
def validate_encoded(string): if not re.match(r"^taxon:(\d+)$",string) and \ not re.match(r"^[a-zA-Z0-9_]+$", string): raise gfapy.ValueError("Invalid taxon ID: {}".format(string))
def _check_not_internal(self, fn): if self.is_internal(): raise gfapy.ValueError( "Line: {}\n".format(str(self)) + "Internal alignment, {} is not defined".format(fn))