def __add_line_GFA2(self, gfa_line): if isinstance(gfa_line, str): if gfa_line[0] == "S": gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel) else: gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel, version="gfa2") elif gfa_line.__class__ in gfapy.Lines.GFA1Specific: raise gfapy.VersionError( "Version: 2.0 ({})\n".format(self._version_explanation) + "Cannot add instance of incompatible line type " + str(type(gfa_line))) if gfa_line.record_type == "H": if self._vlevel > 0 and gfa_line.VN and gfa_line.VN != "2.0": raise gfapy.VersionError( "Header line specified wrong version ({})\n".format( gfa_line.VN) + "Line: {}\n".format(gfa_line) + "File version: 2.0 ({})".format(self._version_explanation)) self.header._merge(gfa_line) elif gfa_line.record_type == "S": if gfa_line.version == "gfa1": raise gfapy.VersionError( "Version: 2.0 ({})\n".format(self._version_explanation) + "GFA1 segment found: {}".format(gfa_line)) gfa_line.connect(self) else: gfa_line.connect(self)
def to_version(self, version, raise_on_failure=True): """ Returns ------- gfapy.Line Conversion to the selected version. """ if version == self._version: return self elif version not in gfapy.VERSIONS: raise gfapy.VersionError("Version unknown ({})".format(version)) else: l = getattr(self, "_to_" + version + "_a")() if l: try: converted = gfapy.Line(l, version=version, vlevel=self.vlevel) except: raise gfapy.RuntimeError( "Conversion to {} failed\n".format(version) + "Line: {}".format(str(self))) return converted elif raise_on_failure: raise gfapy.VersionError( "Records of type {} ".format(self.record_type) + "cannot be converted from version {} ".format( self._version) + "to version {}".format(version)) else: return None
def validate(self, ts = None, version = "gfa2"): """Validates the trace alignment Parameters: ts (int): Trace Spacing. If specified, it will be checked that all values are < **ts** (default: **None**, no check). version (str) : GFA version (must be 'gfa1' or 'gfa2') Raises: ~gfapy.error.TypeError: If the list contains non-integer values ~gfapy.error.ValueError: If the list contains values < 0 or > **ts** ~gfapy.error.VersionError: If the version is 'gfa1' or an invalid version string is provided """ if version != "gfa2": if version == "gfa1": raise gfapy.VersionError("Traces are not compatible with GFA1") else: raise gfapy.VersionError("Version unknown: {}".format(repr(version))) for e in self: if not isinstance(e, int): raise gfapy.TypeError( ("Trace contains non-integer values ({0} found)\n" + "Content: {1}") .format(e, repr(self))) if e < 0: raise gfapy.ValueError( ("Trace contains value < 0 ({0} found)\n" + "Content: {1}") .format(e, repr(self))) if ts and e > ts: raise gfapy.ValueError( ("Trace contains value > TS ({0} found, TS = {2})\n" + "Content: {1}") .format(e, repr(self), ts))
def __add_line_GFA1(self, gfa_line): if isinstance(gfa_line, str): if gfa_line[0] == "S": gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel) else: gfa_line = gfapy.Line(gfa_line, vlevel=self._vlevel, version="gfa1") elif gfa_line.__class__ in gfapy.Lines.GFA2Specific: raise gfapy.VersionError( "Version: 1.0 ({})\n".format(self._version_explanation) + "Cannot add instance of incompatible line type " + str(type(gfa_line))) if gfa_line.record_type == "H": if self._vlevel > 0 and gfa_line.VN and gfa_line.VN != "1.0": raise gfapy.VersionError( "Header line specified wrong version ({})\n".format( gfa_line.VN) + "Line: {}\n".format(gfa_line) + "File version: 1.0 ({})".format(self._version_explanation)) self.header._merge(gfa_line) elif gfa_line.record_type == "S": if gfa_line.version == "gfa2": raise gfapy.VersionError( "Version: 1.0 ({})\n".format(self._version_explanation) + "GFA2 segment found: {}".format(gfa_line)) gfa_line.connect(self) elif gfa_line.record_type in ["L", "P", "C", "#"]: gfa_line.connect(self) else: raise gfapy.AssertionError( "Invalid record type {}. This should never happen".format(rt))
def _subclass_GFA1(record_type): if record_type is None: raise gfapy.VersionError( "gfapy uses virtual records of unknown type for GFA2 only") if record_type == "H": return gfapy.line.Header elif record_type == "S": return gfapy.line.segment.GFA1 elif record_type == "#": return gfapy.line.Comment elif record_type == "L": return gfapy.line.edge.Link elif record_type == "C": return gfapy.line.edge.Containment elif record_type == "P": return gfapy.line.group.Path else: raise gfapy.VersionError( "Custom record types are not supported in GFA1: '{}'".format( record_type))
def _validate_version(self): rt = self.__class__.RECORD_TYPE if self._version not in gfapy.VERSIONS: raise gfapy.VersionError( "GFA specification version unknown ({})".format(self._version)) else: for k, v in Construction.RECORD_TYPE_VERSIONS["specific"].items(): if rt in v: if self._version != k: raise gfapy.VersionError( "Records of type {} are incompatible ".format( self.record_type) + "with version {}".format(self._version)) return
def validate(self, version="gfa1"): """Validates the CIGAR operation. Parameters: version (str): 'gfa1' or 'gfa2' Raises: ~gfapy.error.VersionError: If a wrong **version** is specified. ~gfapy.error.TypeError: If the CIGAR operation length is not an integer or a string representing an integer. ~gfapy.error.ValueError: If the length of an operation is < 0; If an operation code is invalid in general or for the specified GFA version. """ if version != "gfa1" and version != "gfa2": raise gfapy.VersionError("Version error: {}".format( repr(version))) if not isinstance(self.length, int) and not isinstance( self.length, str): raise gfapy.TypeError( "Type error: length of CIGAR is {}".format(self.length)) if (int(self.length) < 0): raise gfapy.ValueError("Length of CIGAR is {}".format( self.length)) if version == "gfa2": if not self.code in Operation.CODE_GFA1_GFA2: raise gfapy.ValueError() else: if not self.code in Operation.CODE: raise gfapy.ValueError()
def _from_string(cls, string, version = "gfa2", valid = False): """ Parses an alignment field Parameters ---------- string : str The string to parse. version : str GFA version (gfa1 or gfa2) If *gfa1*, then CIGARs and Placeholders are supported. If *gfa2*, also Traces are supported. Defaults to *gfa2*. valid : bool If *True*, the string is guaranteed to be valid, and further checks are skipped. Defaults to *False*. Returns ------- gfapy.CIGAR or gfapy.Trace or gfapy.AlignentPlaceholder Raises ------ gfapy.FormatError If the content of the field cannot be parsed. gfapy.VersionError If a wrong value is provided for the version parameter. """ if version != "gfa1" and version != "gfa2": raise gfapy.VersionError( "Version error: {}".format(repr(version))) first = True for char in string: if first: if char.isdigit(): first = False continue elif char == "*" and len(string) == 1: return gfapy.AlignmentPlaceholder() else: if char.isdigit(): continue elif char == ",": if version == "gfa2": t = gfapy.Trace._from_string(string) if not valid: t.validate() return t else: raise gfapy.FormatError( "Trace alignments are not allowed in GFA1: {}" .format(repr(string))) elif char in ["M","I","D","P"] or (char in ["=","X","S","H","N"] and version == "gfa1"): return gfapy.CIGAR._from_string(string, valid=valid, version=version) break raise gfapy.FormatError("Alignment field contains invalid data {}" .format(repr(string)))
def __init__(self, *args, vlevel=1, version=None): if not isinstance(vlevel, int): raise gfapy.ArgumentError( "vlevel is not an integer ({})".format(vlevel)) if vlevel < 0: raise gfapy.ArgumentError( "vlevel is not a positive integer ({})".format(vlevel)) if not version in ['gfa1', 'gfa2', None]: raise gfapy.VersionError( "GFA version unknown ({})".format(version)) self._vlevel = vlevel self._max_int_name = 0 self._records = defaultdict(dict) self._records["H"] = gfapy.line.Header(["H"], vlevel=vlevel) self._records["H"].connect(self) self._records["S"] = {} self._records["P"] = {} self._records["F"] = {} self._records["\n"] = {} self._records["E"] = {} self._records["U"] = {} self._records["G"] = {} self._records["O"] = {} self._records["C"] = {} self._records["L"] = {} self._records["#"] = {} self._segments_first_order = False self._progress = None self._default = {"count_tag": "RC", "unit_length": 1} self._line_queue = [] if version is None: self._version = None self._version_explanation = None self._version_guess = "gfa2" else: self._version = version self._version_explanation = "set during initialization" self._version_guess = version self._validate_version() if len(args) == 1: lst = None if isinstance(args[0], str): lst = args[0].split("\n") elif isinstance(args[0], list): lst = args[0] else: raise gfapy.ArgumentError("Cannot create a Gfa" + " instance from an object of type {}" .format(type(args[0]))) for line in lst: self.add_line(line) self.process_line_queue() if vlevel >= 1: self.validate() elif len(args) > 1: raise gfapy.ArgumentError("Wrong number of arguments for Gfa()" + "({})".format(len(args)))
def _subclass(data, version=None): record_type = data[0] if record_type and record_type[0] == "#": return gfapy.line.Comment elif version == "gfa1": return gfapy.Line._subclass_GFA1(record_type) elif version == "gfa2": return gfapy.Line._subclass_GFA2(record_type) elif version is None: return gfapy.Line._subclass_unknown_version(data) else: raise gfapy.VersionError( "GFA specification version unknown ({})".format(version))
def _from_list(cls, array, version = "gfa2", valid = True): """ Converts an alignment array into a specific list type Parameters ---------- array : list The alignment array. version : str GFA version (gfa1 or gfa2) If *gfa1*, then CIGARs and Placeholders are supported. If *gfa2*, also Traces are supported. Defaults to *gfa2*. valid : bool If *True*, the list is guaranteed to be valid, and further checks are skipped. Defaults to *False*. Returns ------- gfapy.CIGAR or gfapy.Trace """ if version != "gfa1" and version != "gfa2": raise gfapy.VersionError( "Version error: {}".format(repr(version))) if not array: return gfapy.AlignmentPlaceholder() elif isinstance(array[0], int): if version == "gfa2": return gfapy.Trace(array) else: raise gfapy.VersionError( "Trace alignments are not allowed in GFA1: {}".format(repr(array))) elif isinstance(array[0], gfapy.CIGAR.Operation): return gfapy.CIGAR(array) else: raise gfapy.FormatError( "Array does not represent a valid alignment field: {}" .format(repr(array)))
def _delayed_initialize_positional_fields(self, strings, n_positional_fields): self._positional_fieldnames = [] if strings[0] in ["P", "C", "L"]: raise gfapy.VersionError( "GFA-like line (P,C,L) found in GFA2\n" + "Line: {}\n".format(" ".join(strings)) + "Custom lines with record_type P, C and L are not supported by gfapy." ) self._init_field_value("record_type", "custom_record_type", strings[0], errmsginfo=strings) for i in range(1, n_positional_fields): n = "field{}".format(i) self._init_field_value(n, "generic", strings[i], errmsginfo=strings) self.positional_fieldnames.append(n) self._datatype[n] = "generic"
def validate(self, version="gfa1"): """Validates the instance. Parameters: version (str): 'gfa1' or 'gfa2' Raises: ~gfapy.error.VersionError: If a wrong **version** is specified. ~gfapy.error.TypeError: If a component of the list is not a CIGAR Operation; If the CIGAR operation length is not an integer or a string representing an integer. ~gfapy.error.ValueError: If the length of an operation is < 0; If an operation code is invalid in general or for the specified GFA version. """ if version != "gfa1" and version != "gfa2": raise gfapy.VersionError("Version error: {}".format(repr(version))) for op in self: if not isinstance(op, gfapy.CIGAR.Operation): raise gfapy.TypeError( "Element is not a CIGAR operation: {}\n".format(op) + "CIGAR instance is invalid: {}".format(self)) op.validate(version=version)
def _validate_version(self): if (self._version != None) and (self._version not in gfapy.VERSIONS): raise gfapy.VersionError( "GFA specification version {} not supported".format( self._version))
def _validate_rgfa_version(self): """Validate version of rGFA (it must be gfa1)""" if self.version != "gfa1": raise gfapy.VersionError("rGFA format only supports GFA version 1")