def validate_decoded(obj): if isinstance(obj, int): pass else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: str, int)")
def from_string(cls, string, valid=False): """ Create a numeric array from a string Parameters ---------- string : str valid : optional bool *(default:* **False** *)* If **False**, validate the range of the numeric values, according to the array subtype. If **True** the string is guaranteed to be valid. Raises ------ gfapy.ValueError If any value is not compatible with the subtype. gfapy.TypeError If the subtype code is invalid. Returns ------- gfapy.NumericArray The numeric array """ if not valid: if len(string) == 0: raise gfapy.FormatError( "Numeric array string shall not be empty") if string[-1] == ",": raise gfapy.FormatError( "Numeric array string ends with comma\n" + "String: {}".format(string)) elems = string.split(",") subtype = elems[0] if subtype not in NumericArray.SUBTYPE: raise gfapy.TypeError("Subtype {} unknown".format(subtype)) if subtype != "f": range = NumericArray.SUBTYPE_RANGE[subtype] def gen(): for e in elems[1:]: if subtype != "f": try: e = int(e) except: raise gfapy.ValueError( "Value is not valid: {}\n".format(e) + "Numeric array string: {}".format(string)) if not valid and not (e >= range[0] and e < range[1]): raise gfapy.ValueError( ("NumericArray: " + "value is outside of subtype {0} range\n" + "Value: {1}\n" + "Range: {2}\n" + "Content: {3}").format(subtype, e, repr(range), repr(elems))) yield e else: yield float(e) return cls(list(gen()))
def encode(obj): if not isinstance(obj, str): raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: str)") validate_encoded(obj) return obj
def validate(self, ts = None, version = "gfa2"): """Validates the trace alignment Parameters: ts (int): Trace Spacing. If specified, it will be checked that all values are < **ts** (default: **None**, no check). version (str) : GFA version (must be 'gfa1' or 'gfa2') Raises: ~gfapy.error.TypeError: If the list contains non-integer values ~gfapy.error.ValueError: If the list contains values < 0 or > **ts** ~gfapy.error.VersionError: If the version is 'gfa1' or an invalid version string is provided """ if version != "gfa2": if version == "gfa1": raise gfapy.VersionError("Traces are not compatible with GFA1") else: raise gfapy.VersionError("Version unknown: {}".format(repr(version))) for e in self: if not isinstance(e, int): raise gfapy.TypeError( ("Trace contains non-integer values ({0} found)\n" + "Content: {1}") .format(e, repr(self))) if e < 0: raise gfapy.ValueError( ("Trace contains value < 0 ({0} found)\n" + "Content: {1}") .format(e, repr(self))) if ts and e > ts: raise gfapy.ValueError( ("Trace contains value > TS ({0} found, TS = {2})\n" + "Content: {1}") .format(e, repr(self), ts))
def validate(self, version="gfa1"): """Validates the CIGAR operation. Parameters: version (str): 'gfa1' or 'gfa2' Raises: ~gfapy.error.VersionError: If a wrong **version** is specified. ~gfapy.error.TypeError: If the CIGAR operation length is not an integer or a string representing an integer. ~gfapy.error.ValueError: If the length of an operation is < 0; If an operation code is invalid in general or for the specified GFA version. """ if version != "gfa1" and version != "gfa2": raise gfapy.VersionError("Version error: {}".format( repr(version))) if not isinstance(self.length, int) and not isinstance( self.length, str): raise gfapy.TypeError( "Type error: length of CIGAR is {}".format(self.length)) if (int(self.length) < 0): raise gfapy.ValueError("Length of CIGAR is {}".format( self.length)) if version == "gfa2": if not self.code in Operation.CODE_GFA1_GFA2: raise gfapy.ValueError() else: if not self.code in Operation.CODE: raise gfapy.ValueError()
def __validate_decoded_gfa_field(obj, datatype, fieldname=None): """Validate a non-string field content. Parameters: obj : the field content to validate datatype (str) : the datatype identifier fieldname (str) : for error messages Raises: gfapy.error.TypeError: if the specified datatype is invalid or the object is of a class which is not compatible with the datatype gfapy.error.FormatError: if the format of a string in the object is not compatible with the datatype; or if the object encoded into a GFA string is incompatible with the specification gfapy.error.VersionError: if the object value is invalid for the specific GFA version for which this datatype is used gfapy.error.ValueError: if the value of the object is invalid """ if isinstance(obj, gfapy.FieldArray): return obj._validate_gfa_field(datatype, fieldname=fieldname) mod = gfapy.Field.FIELD_MODULE.get(datatype) if not mod: raise gfapy.TypeError("Datatype unknown: {}".format( repr(datatype))) return mod.validate_decoded(obj)
def unsafe_encode(obj): if isinstance(obj, str): return obj elif isinstance(obj, list): retval = [] for elem in obj: if not isinstance(elem, gfapy.OrientedLine): raise gfapy.TypeError( "the list contains an object of class {}\n".format(type(elem))+ "(accepted classes: gfapy.OrientedLine)") retval.append(str(elem)) return " ".join(retval) else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n" .format(obj.__class__.__name__)+ "(accepted classes: str, list)")
def validate_decoded(obj): if isinstance(obj, list): for elem in obj: if isinstance(elem, gfapy.Line): elem = str(elem.name) elif not isinstance(elem, str): raise gfapy.TypeError("the list contains an obj of class {}\n". format(elem.__class__.__name__) + "(accepted classes: str, gfapy.Line)") if not re.match("^[!-~]+$", elem): raise gfapy.FormatError( "the list contains an invalid GFA2 identifier ({})\n". format(repr(string)) + "(it contains spaces and/or non-printable characters)") else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: list)")
def unsafe_encode(obj): if isinstance(obj, str): return obj elif isinstance(obj, list): return ",".join([str(gfapy.OrientedLine(os)) for os in obj]) else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: str, list)")
def func(elem): if isinstance(elem, str): return elem elif isinstance(elem, gfapy.Line): return str(elem.name) else: raise gfapy.TypeError("the list contains an obj of class {}\n". format(elem.__class__.__name__) + "(accepted classes: str, gfapy.Line)")
def __init__(self, verbose_level = 1, channel = sys.stderr, prefix = "#"): self._progress = False if not isinstance(verbose_level, int): raise gfapy.ArgumentError("verbose_level must be an Integer") if not(getattr(channel, "write", None) and callable(channel.write)): raise gfapy.TypeError("channel must provide a 'write' method") self._channel = channel self._pfx = prefix self._verbose_level = verbose_level self._data = {}
def unsafe_encode(obj): if isinstance(obj, gfapy.Placeholder): return str(obj) elif isinstance(obj, list): return ",".join([str(gfapy.Alignment(cig)) for cig in obj]) else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: list, AlignmentPlaceholder)")
def validate_decoded(obj): if isinstance(obj, gfapy.CIGAR): obj.validate() elif isinstance(obj, gfapy.Placeholder): pass else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name) + "(accepted classes: gfapy.CIGAR, gfapy.Placeholder)")
def unsafe_encode(obj): if isinstance(obj, str): return obj if isinstance(obj, gfapy.Line): return str(obj.name) else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: str, gfapy.Line)")
def validate_decoded(obj): if isinstance(obj, gfapy.Line): validate_encoded(obj.name) elif isinstance(obj, str): validate_encoded(obj) else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: str, gfapy.Line)")
def _api_private_check_gfa_line(self, gfa_line, callermeth): if not isinstance(gfa_line, gfapy.Line): raise gfapy.TypeError("Note: {} is API private, ".format(callermeth)+ "do not call it directly\n"+ "Error: line class is {} and not gfapy.Line") elif not gfa_line._gfa is self: raise gfapy.RuntimeError("Note: {} is API private, ".format(callermeth)+ "do not call it directly\n"+ "Error: line.gfa is not the expected instance of gfapy.Gfa\n"+ repr(gfa_line.gfa)+" != "+repr(self))
def validate_decoded(obj): if isinstance(obj, int): if obj < 0: raise gfapy.ValueError("{} is not a positive integer".format(obj)) elif isinstance(obj, gfapy.LastPos): obj.validate() else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: int, gfapy.LastPos)")
def validate_decoded(obj): if isinstance(obj, gfapy.FieldArray): obj.validate() elif isinstance(obj, list) or isinstance(obj, dict): string = encode(obj) validate_all_printable(string) else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: list, dict, gfapy.FieldArray)")
def encode(obj): if isinstance(obj, str): validate_encoded(obj) return obj elif isinstance(obj, int) or isinstance(obj, gfapy.Placeholder): return str(obj) else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: int, str, gfapy.Placeholder)")
def validate_decoded(obj): if isinstance(obj, gfapy.Placeholder): pass elif isinstance(obj, list): for e in obj: e = gfapy.Alignment(e, version="gfa1") e.validate() else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: list, AlignmentPlaceholder)")
def _push_item_on_se_path(self, path, prev_edge, item): if isinstance(item.line, str): raise gfapy.RuntimeError( "Captured path cannot be computed; a reference has not been resolved\n" + "Line: {}\n".format(self) + "Unresolved reference: {} (String found)".format(item.line)) elif isinstance(item.line, gfapy.line.segment.GFA2): if not item.line.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed; item is not connected\n" + "Line: {}\n".format(self) + "Item: {}".format(item.line)) self._push_segment_on_se_path(path, prev_edge, item) prev_edge = False elif isinstance(item.line, gfapy.line.edge.GFA2): if not item.line.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed; item is not connected\n" + "Line: {}\n".format(self) + "Item: {}".format(item.line)) if not path: self._push_first_edge_on_se_path(path, self.items) else: self._push_nonfirst_edge_on_se_path(path, item) prev_edge = True elif isinstance(item.line, gfapy.line.group.Ordered): if not item.line.is_connected(): raise gfapy.RuntimeError( "Captured path cannot be computed; item is not connected\n" + "Line: {}\n".format(self) + "Item: {}".format(item.line)) subpath, prev_edge_subpath = item.line._compute_captured_path() if not subpath: raise gfapy.AssertionError() if item.orient == "+": for subpath_item in subpath: path, prev_edge = self._push_item_on_se_path( path, prev_edge, subpath_item) else: for subpath_item in reversed(subpath): path, prev_edge = self._push_item_on_se_path( path, prev_edge, subpath_item.inverted()) prev_edge = prev_edge_subpath elif isinstance(item.line, gfapy.line.unknown.Unknown): raise gfapy.RuntimeError( "Captured path cannot be computed; a reference has not been resolved\n" + "Line: {}\n".format(self) + "Unresolved reference: {} (Virtual unknown line)".format( item.name)) else: raise gfapy.TypeError("Line: {}\t".format(self) + "Cannot compute captured path:\t" + "Error: items of type {} are not supported\t" .format(item.line.__class__.__name__) + "Unsupported item: {}".format(item)) return path, prev_edge
def unsafe_encode(obj): if isinstance(obj, gfapy.NumericArray): return str(obj) elif isinstance(obj, list): return str(gfapy.NumericArray(obj)) elif isinstance(obj, str): return obj else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: str, line, gfapy.NumericArray)")
def encode(obj): if isinstance(obj, gfapy.ByteArray): return str(obj) elif isinstance(obj, list): return str(ByteArray(obj)) elif isinstance(obj, str): validate_encoded(obj) return obj else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: str, list, gfapy.ByteArray)")
def __validate_line(self): if isinstance(self.line, gfapy.Line): string = self.line.name elif isinstance(self.line, str): string = self.line else: raise gfapy.TypeError( "Invalid class ({}) for line reference ({})".format( self.line.__class__, self.line)) if not re.match(r"^[!-~]+$", string): raise gfapy.FormatError( "{} is not a valid GFA identifier\n".format(repr(string)) + "(it contains spaces or non-printable characters)")
def encode(obj): if isinstance(obj, str): validate_encoded(obj) return obj elif isinstance(obj, list) or isinstance(obj, dict): string = json.dumps(obj) validate_all_printable(string) return string else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: list, dict, gfapy.FieldArray)")
def validate_decoded(obj): if isinstance(obj, gfapy.OrientedLine): if not re.match("^[!-~]+$", obj.name): raise gfapy.FormatError( "{} is not a valid oriented GFA2 identifier\n".format(repr(obj.name))) if obj.orient != "+" and obj.orient != "-": raise gfapy.FormatError( "{} is not a valid orientation\n".format(repr(obj.orient))) else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n" .format(obj.__class__.__name__)+ "(accepted classes: gfapy.OrientedLine)")
def encode(obj): if isinstance(obj, gfapy.Placeholder): return str(obj) elif isinstance(obj, String): obj = str(obj) elif isinstance(obj, gfapy.Line): obj = str(obj.name) else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: str, gfapy.Line, gfapy.Placeholder)") validate_encoded(obj) return obj
def induced_segments_set(self): if not self.is_connected(): raise gfapy.RuntimeError( "Induced set cannot be computed\n" + "Line is not connected to a GFA instance\n" + "Line: {}".format(self)) segments_set = list() for item in self.items: if isinstance(item, str): raise gfapy.RuntimeError( "Induced set cannot be computed; a reference has not been resolved\n" + "Line: {}\n".format(self) + "Unresolved reference: {} (String found)".format(item.line) ) elif isinstance(item, gfapy.line.segment.GFA2): self._check_induced_set_elem_connected(item) segments_set.append(item) elif isinstance(item, gfapy.line.edge.GFA2): self._check_induced_set_elem_connected(item) for sl in [item.sid1.line, item.sid2.line]: self._check_induced_set_elem_connected(sl) segments_set.append(sl) elif isinstance(item, gfapy.line.group.Ordered): self._check_induced_set_elem_connected(item) subset = item.captured_segments assert (subset) for elem in subset: segments_set.append(elem.line) elif isinstance(item, gfapy.line.group.Unordered): self._check_induced_set_elem_connected(item) subset = item.induced_segments_set assert (subset) for elem in subset: segments_set.append(elem) elif isinstance(item, gfapy.line.Unknown): raise gfapy.RuntimeError( "Induced set cannot be computed; a reference has not been resolved\n" + "Line: {}\n".format(self) + "Unresolved reference: {} (Virtual unknown line)".format( item.name)) else: raise gfapy.TypeError( "Line: {}\t".format(self) + "Cannot compute induced set:\t" + "Error: items of type {} are not supported\t".format( item.__class__.__name__) + "Unsupported item: {}".format(item)) unique_ids = set() return [e for e in segments_set \ if id(e) not in unique_ids and not unique_ids.add(id(e))]
def encode(obj): if isinstance(obj, str): validate_encoded(obj) return obj elif isinstance(obj, gfapy.CIGAR) or isinstance(obj, gfapy.Trace): obj.validate() return str(obj) elif isinstance(obj, gfapy.Placeholder): return "*" else: raise gfapy.TypeError( "the class {} is incompatible with the datatype\n".format( obj.__class__.__name__) + "(accepted classes: " + "str, CIGAR, Trace, AlignmentPlaceholder)")
def validate_decoded(iterable): for elem in iterable: if not isinstance(elem, gfapy.OrientedLine): raise gfapy.TypeError( "the list contains an object of class {}\n".format(type(elem))+ "(accepted classes: gfapy.OrientedLine)") elem.validate() if not re.match(r"^[!-~]+$", elem.name): raise gfapy.FormatError( "the list contains an invalid GFA2 identifier {}\n".format(elem.name)+ "(it contains spaces and/or non-printable characters)") if not elem.orient in ["+", "-"]: raise gfapy.FormatError( "{} is not a valid orientation".format(elem.orient))