Ejemplo n.º 1
0
        def validate(self, version="gfa1"):
            """Validates the CIGAR operation.

      Parameters:
        version (str): 'gfa1' or 'gfa2'

      Raises:
        ~gfapy.error.VersionError: If a wrong **version** is specified.
        ~gfapy.error.TypeError: If the CIGAR operation length is not an integer
            or a string representing an integer.
        ~gfapy.error.ValueError: If the length of an operation is < 0; If an
            operation code is invalid in general or for the specified GFA
            version.
      """
            if version != "gfa1" and version != "gfa2":
                raise gfapy.VersionError("Version error: {}".format(
                    repr(version)))
            if not isinstance(self.length, int) and not isinstance(
                    self.length, str):
                raise gfapy.TypeError(
                    "Type error: length of CIGAR is {}".format(self.length))
            if (int(self.length) < 0):
                raise gfapy.ValueError("Length of CIGAR is {}".format(
                    self.length))
            if version == "gfa2":
                if not self.code in Operation.CODE_GFA1_GFA2:
                    raise gfapy.ValueError()
            else:
                if not self.code in Operation.CODE:
                    raise gfapy.ValueError()
Ejemplo n.º 2
0
  def validate(self, ts = None, version = "gfa2"):
    """Validates the trace alignment

    Parameters:
      ts (int): Trace Spacing. If specified, it will be checked that all values
        are < **ts** (default: **None**, no check).
      version (str) : GFA version (must be 'gfa1' or 'gfa2')

    Raises:
      ~gfapy.error.TypeError: If the list contains non-integer values
      ~gfapy.error.ValueError: If the list contains values < 0 or > **ts**
      ~gfapy.error.VersionError: If the version is 'gfa1' or an invalid version
        string is provided
    """
    if version != "gfa2":
      if version == "gfa1":
        raise gfapy.VersionError("Traces are not compatible with GFA1")
      else:
        raise gfapy.VersionError("Version unknown: {}".format(repr(version)))
    for e in self:
      if not isinstance(e, int):
        raise gfapy.TypeError(
            ("Trace contains non-integer values ({0} found)\n" + "Content: {1}")
            .format(e, repr(self)))
      if e < 0:
        raise gfapy.ValueError(
            ("Trace contains value < 0 ({0} found)\n" + "Content: {1}")
            .format(e, repr(self)))
      if ts and e > ts:
        raise gfapy.ValueError(
            ("Trace contains value > TS ({0} found, TS = {2})\n" + "Content: {1}")
            .format(e, repr(self), ts))
Ejemplo n.º 3
0
    def integer_type(range):
        """
    Computes the subtype for integers in a given range.

    If all elements are non-negative, an unsigned subtype is selected,
    otherwise a signed subtype.

    Parameters
    ----------
    range : (int, int)
      The integer range (min, max)

    Raises
    ------
    gfapy.ValueError
      If the integer range is outside all subtype ranges

    Returns
    -------
    one of gfapy.NumericArray.INT_SUBTYPE
      subtype code
    """
        if range[0] < 0:
            for st in NumericArray.SIGNED_INT_SUBTYPE:
                st_range = NumericArray.SUBTYPE_RANGE[st]
                if st_range[0] <= range[0] and st_range[1] > range[1]:
                    return st
        else:
            for st in NumericArray.UNSIGNED_INT_SUBTYPE:
                st_range = NumericArray.SUBTYPE_RANGE[st]
                if st_range[1] > range[1]:
                    return st
        raise gfapy.ValueError(
            "NumericArray: values are outside of all integer subtype ranges\n"
            + "Range: {}".format(repr(range)))
Ejemplo n.º 4
0
  def _substring_type(self, begpos, endpos):
    """Type of substring (pfx, sfx, whole, internal) given start and end pos.

    Analyzes the begin and end position and determine if the substring is
    the whole string, or a (possibly empty) other substring, ie a prefix,
    a suffix, or an internal alignment.
    """
    if gfapy.posvalue(begpos) > gfapy.posvalue(endpos):
      raise gfapy.ValueError(
        "Line: {}\n".format(str(self))+
        "begin > end: {}$ > {}".format(gfapy.posvalue(begpos),
                                       gfapy.posvalue(endpos)))
    if gfapy.isfirstpos(begpos):
      if gfapy.isfirstpos(endpos):
        return ("pfx", True)
      elif gfapy.islastpos(endpos):
        return ("whole", False)
      else:
        return ("pfx", False)
    elif gfapy.islastpos(begpos):
      if not gfapy.islastpos(endpos):
        raise gfapy.FormatError(
          "Line: {}\n".format(str(self))+
          "Wrong use of $ marker\n"+
          "{} >= {}$".format(gfapy.posvalue(endpos),
                             gfapy.posvalue(begpos)))
      return ("sfx", True)
    else:
      if gfapy.islastpos(endpos):
        return ("sfx", False)
      else:
        return ("internal",
            gfapy.posvalue(begpos) == gfapy.posvalue(endpos))
Ejemplo n.º 5
0
    def compute_subtype(self):
        """
    Computes the subtype of the array from its content.

    If all elements are float, then the computed subtype is "f".
    If all elements are integer, the smallest possible numeric subtype
    is computed; thereby,
    if all elements are non-negative, an unsigned subtype is selected,
    otherwise a signed subtype.
    In all other cases an exception is raised.

    Raises
    ------
    gfapy.ValueError
      If the array is not a valid numeric array

    Returns
    -------
    one of gfapy.NumericArray.SUBTYPE
    """
        if all([isinstance(f, float) for f in self]):
            return "f"
        else:
            e_max = None
            e_min = None
            for e in self:
                if not isinstance(e, int):
                    raise gfapy.ValueError(
                        "NumericArray does not contain homogenous numeric values\n"
                        + "Content: {}".format(repr(self)))
                if (e_max is None or e > e_max): e_max = e
                if (e_min is None or e < e_min): e_min = e
            return gfapy.NumericArray.integer_type((e_min, e_max))
Ejemplo n.º 6
0
 def gen():
     for e in elems[1:]:
         if subtype != "f":
             try:
                 e = int(e)
             except:
                 raise gfapy.ValueError(
                     "Value is not valid: {}\n".format(e) +
                     "Numeric array string: {}".format(string))
             if not valid and not (e >= range[0] and e < range[1]):
                 raise gfapy.ValueError(
                     ("NumericArray: " +
                      "value is outside of subtype {0} range\n" +
                      "Value: {1}\n" + "Range: {2}\n" +
                      "Content: {3}").format(subtype, e, repr(range),
                                             repr(elems)))
             yield e
         else:
             yield float(e)
Ejemplo n.º 7
0
 def try_get_line(self, l):
     """Call line() and raise an exception is the line is not found."""
     gfa_line = self.line(l)
     if gfa_line is None:
         if gfapy.is_placeholder(l):
             raise gfapy.ValueError(
                 "'*' is a placeholder and not a valid name for a line")
         else:
             raise gfapy.NotFoundError("No line found with ID {}".format(l))
     return gfa_line
Ejemplo n.º 8
0
def validate_decoded(obj):
    if isinstance(obj, int):
        if obj < 0:
            raise gfapy.ValueError("{} is not a positive integer".format(obj))
    elif isinstance(obj, gfapy.LastPos):
        obj.validate()
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: int, gfapy.LastPos)")
Ejemplo n.º 9
0
    def pos(self):
        """Value of the GFA1 **pos** field, if the edge is a containment.

    Returns:
      int or gfapy.Lastpos

    Raises:
      gfapy.error.ValueError: If the edge is not a containment.
    """
        if self._alignment_type == "I":
            raise gfapy.ValueError("Line: {}\n".format(str(self)) +
                                   "Internal alignment, pos is not defined")
        elif self._alignment_type == "L":
            raise gfapy.ValueError("Line: {}\n".format(str(self)) +
                                   "Dovetail alignment, pos is not defined")
        elif self._alignment_type == "C":
            if gfapy.isfirstpos(self.beg1):
                return self.beg1 if (gfapy.isfirstpos(self.beg2) and
                                     gfapy.islastpos(self.end2)) else self.beg2
            else:
                return self.beg1
Ejemplo n.º 10
0
 def _from_string(cls, string, valid=False):
     if string[-1] == "$":
         return cls(int(string[:-1]), valid=valid)
     else:
         try:
             v = int(string)
         except:
             raise gfapy.FormatError(
                 "LastPos value has a wrong format: {}".format(string))
         if not valid:
             if v < 0:
                 raise gfapy.ValueError("LastPos value shall be >= 0," +
                                        " {} found".format(v))
         return v
Ejemplo n.º 11
0
 def _is_sid1_from(self):
     sr1 = self._segment_role(self.beg1, self.end1, self.sid1.orient)
     sr2 = self._segment_role(self.beg2, self.end2, self.sid2.orient)
     if sr2 == "contained":
         return True
     elif sr1 == "contained":
         return False
     elif sr1 == "sfx" and sr2 == "pfx":
         return True
     elif sr2 == "sfx" and sr1 == "pfx":
         return False
     else:
         raise gfapy.ValueError(
             "Line: {}\n".format(str(self)) +
             "Internal overlap, 'from' is undefined\n" +
             "Roles: segment1 is {} ({},{}), segment2 is {} ({},{})".format(
                 sr1, self.beg1, self.end1, sr2, self.beg2, self.end2))
Ejemplo n.º 12
0
    def validate(self):
        """Checks that the value is a positive integer.

    Validation is performed by default on construction, unless the valid
    parameter of the constructor is set to True.

    Raises:
      gfapy.error.TypeError : if the value is not an integer
      gfapy.error.ValueError : if the value is not >= 0
    """

        if not isinstance(self.value, int):
            raise gfapy.TypeError("LastPos value shall be an integer," +
                                  " {} found".format(type(self.value)))
        elif self.value < 0:
            raise gfapy.ValueError("LastPos value shall be >= 0," +
                                   " {} found".format(self.value))
Ejemplo n.º 13
0
 def _to_gfa1_a(self):
     a = ["P"]
     if gfapy.is_placeholder(self.name):
         raise gfapy.ValueError("Conversion to GFA1 failed\n" +
                                "The path name is a placeholder\t" +
                                "Line: {}".format(self))
     a.append(self.name)
     segment_names = []
     for oline in self.captured_segments:
         gfapy.Field._validate_gfa_field(oline.name, "segment_name_gfa1")
         segment_names.append(str(oline))
     a.append(",".join(segment_names))
     overlaps = []
     for oline in self.captured_edges:
         gfapy.Field._validate_gfa_field(oline.line.overlap,
                                         "alignment_gfa1")
         overlaps.append(str(oline.line.overlap))
     a.append(",".join(overlaps))
     return a
Ejemplo n.º 14
0
 def _validate_rgfa_tags_in_lines(self, lines):
     """
 Validate rGFA tags for a group of lines
 """
     for line in lines:
         rt = line.record_type
         tags_check_presence = gfapy.Gfa.RGFA_TAGS["mandatory"].get(rt, {})
         tags_check_datatype = tags_check_presence.copy()
         tags_check_datatype.update(gfapy.Gfa.RGFA_TAGS["optional"].get(
             rt, {}))
         for tag, datatype in tags_check_presence.items():
             if tag not in line.tagnames:
                 raise gfapy.NotFoundError(
                     "rGFA {} lines must have a {} tag\n".format(rt, tag) +
                     "offending line:\n{}".format(str(line)))
         for tag, datatype in tags_check_datatype.items():
             if tag in line.tagnames:
                 if line.get_datatype(tag) != datatype:
                     raise gfapy.ValueError(
                         "rGFA {} tags in {} lines must have datatype {}\n".
                         format(tag, rt, datatype) +
                         "offending line:\n{}".format(str(line)))
Ejemplo n.º 15
0
def invert(symbol):
    """Computes the inverted orientation or end_type symbol.

  Parameters:
    symbol (str) : a one-character string, symbolizing an orientation (+ or -)
      or an end-type (L or R)

  Returns:
    str : the other one character string of the same category (e.g. - for +)

  Raises:
    gfapy.error.ValueError : if a string other than the mentioned ones is used
  """
    if symbol == "+":
        return "-"
    elif symbol == "-":
        return "+"
    elif symbol == "L":
        return "R"
    elif symbol == "R":
        return "L"
    else:
        raise gfapy.ValueError("No inverse defined for {}".format(symbol))
Ejemplo n.º 16
0
 def fun(c):
     wcc = WCC.get(c, c if valid else None)
     if not wcc:
         raise gfapy.ValueError(
             "{}: no Watson-Crick complement for {}".format(sequence, c))
     return wcc
Ejemplo n.º 17
0
def create_merged_segment(gfa,
                          segpath,
                          merged_name=None,
                          enable_tracking=False,
                          cut_counts=False):
    merged = gfa.try_get_segment(segpath[0].segment).clone()
    merged_vlevel = merged.vlevel
    merged.vlevel = 0
    total_cut = 0
    a = segpath[0]
    first_reversed = (a.end_type == "L")
    last_reversed = None
    if merged_name == "short":
        merged_name = gfa.unused_name()
    gfa._add_segment_to_merged(merged,
                               gfa.segment(a.segment),
                               first_reversed,
                               0,
                               True,
                               enable_tracking=enable_tracking,
                               merged_name=merged_name)
    #for i in range(len(segpath)-1):
    #  b = gfapy.SegmentEnd(segpath[i+1]).inverted()
    for s in segpath[1:]:
        b = gfapy.SegmentEnd(s).inverted()
        ls = gfa.segment(a.segment).end_relations(a.end_type, b, "dovetails")
        if len(ls) != 1:
            msg = "A single link was expected between {}".format(a) + \
                  "and {}".format(b) + "{} were found".format(len(ls))
            raise gfapy.ValueError(msg)
        l = ls[0]
        if not l.overlap:
            cut = 0
        else:
            cut = min(l.overlap.length_on_query(), gfa.segment(b.segment).LN)
        #elif all(op.code in ["M","="] for op in l.overlap):
        #  cut = sum([len(op) for op in l.overlap])
        #else:
        #  raise gfapy.ValueError(
        #      "Merging is only allowed if all operations are M/=")
        total_cut += cut
        last_reversed = (b.end_type == "R")
        gfa._add_segment_to_merged(merged,
                                   gfa.segment(b.segment),
                                   last_reversed,
                                   cut,
                                   False,
                                   enable_tracking=enable_tracking,
                                   merged_name=merged_name)
        a = gfapy.SegmentEnd(b).inverted()
    merged.vlevel = merged_vlevel
    if isinstance(merged.name, list):
        merged.name = "_".join(merged.name)
    ortag = merged.get("or")
    if isinstance(ortag, list):
        merged.set("or", ",".join(ortag))
    if not gfapy.is_placeholder(merged.sequence):
        merged.sequence = "".join(merged.sequence)
        if not merged.LN:
            merged.LN = len(merged.sequence)
        elif gfa._vlevel > 0 and merged.LN != len(merged.sequence):
            raise gfapy.InconsistencyError(
                "Computed sequence length {} ".format(merged.sequence.length) +
                "and computed LN {} differ".format(merged.LN))
    if merged.length is not None:
        for count_tag in ["KC", "RC", "FC"]:
            merged.set(count_tag, None)
    else:
        factor = 1
        if cut_counts:
            factor = merged.length / (total_cut + merged.length)
        for count_tag, count in gfa.__sum_of_counts(segpath, factor).items():
            merged.set(count_tag, count)
    return merged, first_reversed, last_reversed
Ejemplo n.º 18
0
 def __validate_end_type(self):
     if not self.__end_type in ["L", "R"]:
         raise gfapy.ValueError("Invalid end type ({})".format(
             repr(self.__end_type)))
Ejemplo n.º 19
0
 def __validate_orient(self):
     if not self.orient in ["+", "-"]:
         raise gfapy.ValueError("Invalid orientation ({})".format(
             self.orient))
Ejemplo n.º 20
0
 def _validate_rgfa_no_containments(self):
     """Validate the absence of C lines in rGFA"""
     if self.containments:
         raise gfapy.ValueError("rGFA does not support containment lines")
Ejemplo n.º 21
0
 def _validate_rgfa_no_paths(self):
     """Validate the absence of P lines in rGFA"""
     if self.paths:
         raise gfapy.ValueError("rGFA does not support path lines")
Ejemplo n.º 22
0
 def _validate_rgfa_link_overlaps(self):
     for link in self.dovetails:
         if link.field_to_s("overlap") != "0M":
             raise gfapy.ValueError("rGFA CIGARs must be 0M\n",
                                    "offending line:\n{}".format(str(link)))
Ejemplo n.º 23
0
 def _validate_rgfa_no_headers(self):
     """Validate the absence of H lines in rGFA"""
     if self.headers:
         raise gfapy.ValueError("rGFA does not support header lines")
Ejemplo n.º 24
0
 def _initialize_tags(self, strings):
   if len(strings) > 3:
     raise gfapy.ValueError("Comment lines do not support tags")
Ejemplo n.º 25
0
def decode(string):
    position = unsafe_decode(string)
    value = gfapy.posvalue(position)
    if value < 0:
        raise gfapy.ValueError("{} is not a positive integer".format(value))
    return position
Ejemplo n.º 26
0
def validate_decoded(integer):
  if integer < 0:
    raise gfapy.ValueError(
      "{} is not a positive integer".format(integer))
Ejemplo n.º 27
0
 def validate_encoded(string):
     if not re.match(r"^taxon:(\d+)$",string) and \
         not re.match(r"^[a-zA-Z0-9_]+$", string):
         raise gfapy.ValueError("Invalid taxon ID: {}".format(string))
Ejemplo n.º 28
0
 def _check_not_internal(self, fn):
     if self.is_internal():
         raise gfapy.ValueError(
             "Line: {}\n".format(str(self)) +
             "Internal alignment, {} is not defined".format(fn))