Exemple #1
0
def validate_decoded(obj):
    if isinstance(obj, int):
        pass
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) + "(accepted classes: str, int)")
Exemple #2
0
    def from_string(cls, string, valid=False):
        """
    Create a numeric array from a string

    Parameters
    ----------
    string : str
    valid : optional bool
      *(default:* **False** *)*
      If **False**, validate the range of the numeric values, according
      to the array subtype. If **True** the string is guaranteed to be valid.

    Raises
    ------
    gfapy.ValueError
      If any value is not compatible with the subtype.
    gfapy.TypeError
      If the subtype code is invalid.

    Returns
    -------
    gfapy.NumericArray
      The numeric array
    """
        if not valid:
            if len(string) == 0:
                raise gfapy.FormatError(
                    "Numeric array string shall not be empty")
            if string[-1] == ",":
                raise gfapy.FormatError(
                    "Numeric array string ends with comma\n" +
                    "String: {}".format(string))
        elems = string.split(",")
        subtype = elems[0]
        if subtype not in NumericArray.SUBTYPE:
            raise gfapy.TypeError("Subtype {} unknown".format(subtype))
        if subtype != "f":
            range = NumericArray.SUBTYPE_RANGE[subtype]

        def gen():
            for e in elems[1:]:
                if subtype != "f":
                    try:
                        e = int(e)
                    except:
                        raise gfapy.ValueError(
                            "Value is not valid: {}\n".format(e) +
                            "Numeric array string: {}".format(string))
                    if not valid and not (e >= range[0] and e < range[1]):
                        raise gfapy.ValueError(
                            ("NumericArray: " +
                             "value is outside of subtype {0} range\n" +
                             "Value: {1}\n" + "Range: {2}\n" +
                             "Content: {3}").format(subtype, e, repr(range),
                                                    repr(elems)))
                    yield e
                else:
                    yield float(e)

        return cls(list(gen()))
Exemple #3
0
def encode(obj):
    if not isinstance(obj, str):
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) + "(accepted classes: str)")
    validate_encoded(obj)
    return obj
Exemple #4
0
  def validate(self, ts = None, version = "gfa2"):
    """Validates the trace alignment

    Parameters:
      ts (int): Trace Spacing. If specified, it will be checked that all values
        are < **ts** (default: **None**, no check).
      version (str) : GFA version (must be 'gfa1' or 'gfa2')

    Raises:
      ~gfapy.error.TypeError: If the list contains non-integer values
      ~gfapy.error.ValueError: If the list contains values < 0 or > **ts**
      ~gfapy.error.VersionError: If the version is 'gfa1' or an invalid version
        string is provided
    """
    if version != "gfa2":
      if version == "gfa1":
        raise gfapy.VersionError("Traces are not compatible with GFA1")
      else:
        raise gfapy.VersionError("Version unknown: {}".format(repr(version)))
    for e in self:
      if not isinstance(e, int):
        raise gfapy.TypeError(
            ("Trace contains non-integer values ({0} found)\n" + "Content: {1}")
            .format(e, repr(self)))
      if e < 0:
        raise gfapy.ValueError(
            ("Trace contains value < 0 ({0} found)\n" + "Content: {1}")
            .format(e, repr(self)))
      if ts and e > ts:
        raise gfapy.ValueError(
            ("Trace contains value > TS ({0} found, TS = {2})\n" + "Content: {1}")
            .format(e, repr(self), ts))
Exemple #5
0
        def validate(self, version="gfa1"):
            """Validates the CIGAR operation.

      Parameters:
        version (str): 'gfa1' or 'gfa2'

      Raises:
        ~gfapy.error.VersionError: If a wrong **version** is specified.
        ~gfapy.error.TypeError: If the CIGAR operation length is not an integer
            or a string representing an integer.
        ~gfapy.error.ValueError: If the length of an operation is < 0; If an
            operation code is invalid in general or for the specified GFA
            version.
      """
            if version != "gfa1" and version != "gfa2":
                raise gfapy.VersionError("Version error: {}".format(
                    repr(version)))
            if not isinstance(self.length, int) and not isinstance(
                    self.length, str):
                raise gfapy.TypeError(
                    "Type error: length of CIGAR is {}".format(self.length))
            if (int(self.length) < 0):
                raise gfapy.ValueError("Length of CIGAR is {}".format(
                    self.length))
            if version == "gfa2":
                if not self.code in Operation.CODE_GFA1_GFA2:
                    raise gfapy.ValueError()
            else:
                if not self.code in Operation.CODE:
                    raise gfapy.ValueError()
Exemple #6
0
    def __validate_decoded_gfa_field(obj, datatype, fieldname=None):
        """Validate a non-string field content.

    Parameters:
      obj : the field content to validate
      datatype (str) : the datatype identifier
      fieldname (str) : for error messages

    Raises:
      gfapy.error.TypeError: if the specified datatype is invalid or the
        object is of a class which is not compatible with the datatype
      gfapy.error.FormatError: if the format of a string in the object
        is not compatible with the datatype; or if the object encoded into
        a GFA string is incompatible with the specification
      gfapy.error.VersionError: if the object value is invalid
        for the specific GFA version for which this datatype is used
      gfapy.error.ValueError: if the value of the object is invalid
    """
        if isinstance(obj, gfapy.FieldArray):
            return obj._validate_gfa_field(datatype, fieldname=fieldname)
        mod = gfapy.Field.FIELD_MODULE.get(datatype)
        if not mod:
            raise gfapy.TypeError("Datatype unknown: {}".format(
                repr(datatype)))
        return mod.validate_decoded(obj)
def unsafe_encode(obj):
  if isinstance(obj, str):
    return obj
  elif isinstance(obj, list):
    retval = []
    for elem in obj:
      if not isinstance(elem, gfapy.OrientedLine):
        raise gfapy.TypeError(
              "the list contains an object of class {}\n".format(type(elem))+
              "(accepted classes: gfapy.OrientedLine)")
      retval.append(str(elem))
    return " ".join(retval)
  else:
    raise gfapy.TypeError(
      "the class {} is incompatible with the datatype\n"
      .format(obj.__class__.__name__)+
      "(accepted classes: str, list)")
def validate_decoded(obj):
    if isinstance(obj, list):
        for elem in obj:
            if isinstance(elem, gfapy.Line):
                elem = str(elem.name)
            elif not isinstance(elem, str):
                raise gfapy.TypeError("the list contains an obj of class {}\n".
                                      format(elem.__class__.__name__) +
                                      "(accepted classes: str, gfapy.Line)")
            if not re.match("^[!-~]+$", elem):
                raise gfapy.FormatError(
                    "the list contains an invalid GFA2 identifier ({})\n".
                    format(repr(string)) +
                    "(it contains spaces and/or non-printable characters)")
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) + "(accepted classes: list)")
def unsafe_encode(obj):
    if isinstance(obj, str):
        return obj
    elif isinstance(obj, list):
        return ",".join([str(gfapy.OrientedLine(os)) for os in obj])
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) + "(accepted classes: str, list)")
 def func(elem):
     if isinstance(elem, str):
         return elem
     elif isinstance(elem, gfapy.Line):
         return str(elem.name)
     else:
         raise gfapy.TypeError("the list contains an obj of class {}\n".
                               format(elem.__class__.__name__) +
                               "(accepted classes: str, gfapy.Line)")
Exemple #11
0
 def __init__(self, verbose_level = 1, channel = sys.stderr, prefix = "#"):
   self._progress = False
   if not isinstance(verbose_level, int):
     raise gfapy.ArgumentError("verbose_level must be an Integer")
   if not(getattr(channel, "write", None) and callable(channel.write)):
     raise gfapy.TypeError("channel must provide a 'write' method")
   self._channel = channel
   self._pfx = prefix
   self._verbose_level = verbose_level
   self._data = {}
Exemple #12
0
def unsafe_encode(obj):
    if isinstance(obj, gfapy.Placeholder):
        return str(obj)
    elif isinstance(obj, list):
        return ",".join([str(gfapy.Alignment(cig)) for cig in obj])
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: list, AlignmentPlaceholder)")
Exemple #13
0
def validate_decoded(obj):
    if isinstance(obj, gfapy.CIGAR):
        obj.validate()
    elif isinstance(obj, gfapy.Placeholder):
        pass
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name) +
            "(accepted classes: gfapy.CIGAR, gfapy.Placeholder)")
Exemple #14
0
def unsafe_encode(obj):
    if isinstance(obj, str):
        return obj
    if isinstance(obj, gfapy.Line):
        return str(obj.name)
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: str, gfapy.Line)")
Exemple #15
0
def validate_decoded(obj):
    if isinstance(obj, gfapy.Line):
        validate_encoded(obj.name)
    elif isinstance(obj, str):
        validate_encoded(obj)
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: str, gfapy.Line)")
Exemple #16
0
 def _api_private_check_gfa_line(self, gfa_line, callermeth):
   if not isinstance(gfa_line, gfapy.Line):
     raise gfapy.TypeError("Note: {} is API private, ".format(callermeth)+
         "do not call it directly\n"+
         "Error: line class is {} and not gfapy.Line")
   elif not gfa_line._gfa is self:
     raise gfapy.RuntimeError("Note: {} is API private, ".format(callermeth)+
         "do not call it directly\n"+
         "Error: line.gfa is not the expected instance of gfapy.Gfa\n"+
         repr(gfa_line.gfa)+" != "+repr(self))
Exemple #17
0
def validate_decoded(obj):
    if isinstance(obj, int):
        if obj < 0:
            raise gfapy.ValueError("{} is not a positive integer".format(obj))
    elif isinstance(obj, gfapy.LastPos):
        obj.validate()
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: int, gfapy.LastPos)")
Exemple #18
0
def validate_decoded(obj):
    if isinstance(obj, gfapy.FieldArray):
        obj.validate()
    elif isinstance(obj, list) or isinstance(obj, dict):
        string = encode(obj)
        validate_all_printable(string)
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: list, dict, gfapy.FieldArray)")
Exemple #19
0
def encode(obj):
    if isinstance(obj, str):
        validate_encoded(obj)
        return obj
    elif isinstance(obj, int) or isinstance(obj, gfapy.Placeholder):
        return str(obj)
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: int, str, gfapy.Placeholder)")
Exemple #20
0
def validate_decoded(obj):
    if isinstance(obj, gfapy.Placeholder):
        pass
    elif isinstance(obj, list):
        for e in obj:
            e = gfapy.Alignment(e, version="gfa1")
            e.validate()
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: list, AlignmentPlaceholder)")
Exemple #21
0
 def _push_item_on_se_path(self, path, prev_edge, item):
     if isinstance(item.line, str):
         raise gfapy.RuntimeError(
             "Captured path cannot be computed; a reference has not been resolved\n"
             + "Line: {}\n".format(self) +
             "Unresolved reference: {} (String found)".format(item.line))
     elif isinstance(item.line, gfapy.line.segment.GFA2):
         if not item.line.is_connected():
             raise gfapy.RuntimeError(
                 "Captured path cannot be computed; item is not connected\n"
                 + "Line: {}\n".format(self) + "Item: {}".format(item.line))
         self._push_segment_on_se_path(path, prev_edge, item)
         prev_edge = False
     elif isinstance(item.line, gfapy.line.edge.GFA2):
         if not item.line.is_connected():
             raise gfapy.RuntimeError(
                 "Captured path cannot be computed; item is not connected\n"
                 + "Line: {}\n".format(self) + "Item: {}".format(item.line))
         if not path:
             self._push_first_edge_on_se_path(path, self.items)
         else:
             self._push_nonfirst_edge_on_se_path(path, item)
         prev_edge = True
     elif isinstance(item.line, gfapy.line.group.Ordered):
         if not item.line.is_connected():
             raise gfapy.RuntimeError(
                 "Captured path cannot be computed; item is not connected\n"
                 + "Line: {}\n".format(self) + "Item: {}".format(item.line))
         subpath, prev_edge_subpath = item.line._compute_captured_path()
         if not subpath:
             raise gfapy.AssertionError()
         if item.orient == "+":
             for subpath_item in subpath:
                 path, prev_edge = self._push_item_on_se_path(
                     path, prev_edge, subpath_item)
         else:
             for subpath_item in reversed(subpath):
                 path, prev_edge = self._push_item_on_se_path(
                     path, prev_edge, subpath_item.inverted())
         prev_edge = prev_edge_subpath
     elif isinstance(item.line, gfapy.line.unknown.Unknown):
         raise gfapy.RuntimeError(
             "Captured path cannot be computed; a reference has not been resolved\n"
             + "Line: {}\n".format(self) +
             "Unresolved reference: {} (Virtual unknown line)".format(
                 item.name))
     else:
         raise gfapy.TypeError("Line: {}\t".format(self) +
                               "Cannot compute captured path:\t" +
                               "Error: items of type {} are not supported\t"
                               .format(item.line.__class__.__name__) +
                               "Unsupported item: {}".format(item))
     return path, prev_edge
Exemple #22
0
def unsafe_encode(obj):
    if isinstance(obj, gfapy.NumericArray):
        return str(obj)
    elif isinstance(obj, list):
        return str(gfapy.NumericArray(obj))
    elif isinstance(obj, str):
        return obj
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: str, line, gfapy.NumericArray)")
Exemple #23
0
def encode(obj):
    if isinstance(obj, gfapy.ByteArray):
        return str(obj)
    elif isinstance(obj, list):
        return str(ByteArray(obj))
    elif isinstance(obj, str):
        validate_encoded(obj)
        return obj
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: str, list, gfapy.ByteArray)")
Exemple #24
0
 def __validate_line(self):
     if isinstance(self.line, gfapy.Line):
         string = self.line.name
     elif isinstance(self.line, str):
         string = self.line
     else:
         raise gfapy.TypeError(
             "Invalid class ({}) for line reference ({})".format(
                 self.line.__class__, self.line))
     if not re.match(r"^[!-~]+$", string):
         raise gfapy.FormatError(
             "{} is not a valid GFA identifier\n".format(repr(string)) +
             "(it contains spaces or non-printable characters)")
Exemple #25
0
def encode(obj):
    if isinstance(obj, str):
        validate_encoded(obj)
        return obj
    elif isinstance(obj, list) or isinstance(obj, dict):
        string = json.dumps(obj)
        validate_all_printable(string)
        return string
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: list, dict, gfapy.FieldArray)")
Exemple #26
0
def validate_decoded(obj):
  if isinstance(obj, gfapy.OrientedLine):
    if not re.match("^[!-~]+$", obj.name):
      raise gfapy.FormatError(
          "{} is not a valid oriented GFA2 identifier\n".format(repr(obj.name)))
    if obj.orient != "+" and obj.orient != "-":
      raise gfapy.FormatError(
          "{} is not a valid orientation\n".format(repr(obj.orient)))
  else:
    raise gfapy.TypeError(
      "the class {} is incompatible with the datatype\n"
      .format(obj.__class__.__name__)+
      "(accepted classes: gfapy.OrientedLine)")
Exemple #27
0
def encode(obj):
    if isinstance(obj, gfapy.Placeholder):
        return str(obj)
    elif isinstance(obj, String):
        obj = str(obj)
    elif isinstance(obj, gfapy.Line):
        obj = str(obj.name)
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) +
            "(accepted classes: str, gfapy.Line, gfapy.Placeholder)")
    validate_encoded(obj)
    return obj
Exemple #28
0
 def induced_segments_set(self):
     if not self.is_connected():
         raise gfapy.RuntimeError(
             "Induced set cannot be computed\n" +
             "Line is not connected to a GFA instance\n" +
             "Line: {}".format(self))
     segments_set = list()
     for item in self.items:
         if isinstance(item, str):
             raise gfapy.RuntimeError(
                 "Induced set cannot be computed; a reference has not been resolved\n"
                 + "Line: {}\n".format(self) +
                 "Unresolved reference: {} (String found)".format(item.line)
             )
         elif isinstance(item, gfapy.line.segment.GFA2):
             self._check_induced_set_elem_connected(item)
             segments_set.append(item)
         elif isinstance(item, gfapy.line.edge.GFA2):
             self._check_induced_set_elem_connected(item)
             for sl in [item.sid1.line, item.sid2.line]:
                 self._check_induced_set_elem_connected(sl)
                 segments_set.append(sl)
         elif isinstance(item, gfapy.line.group.Ordered):
             self._check_induced_set_elem_connected(item)
             subset = item.captured_segments
             assert (subset)
             for elem in subset:
                 segments_set.append(elem.line)
         elif isinstance(item, gfapy.line.group.Unordered):
             self._check_induced_set_elem_connected(item)
             subset = item.induced_segments_set
             assert (subset)
             for elem in subset:
                 segments_set.append(elem)
         elif isinstance(item, gfapy.line.Unknown):
             raise gfapy.RuntimeError(
                 "Induced set cannot be computed; a reference has not been resolved\n"
                 + "Line: {}\n".format(self) +
                 "Unresolved reference: {} (Virtual unknown line)".format(
                     item.name))
         else:
             raise gfapy.TypeError(
                 "Line: {}\t".format(self) +
                 "Cannot compute induced set:\t" +
                 "Error: items of type {} are not supported\t".format(
                     item.__class__.__name__) +
                 "Unsupported item: {}".format(item))
     unique_ids = set()
     return [e for e in segments_set \
         if id(e) not in unique_ids and not unique_ids.add(id(e))]
Exemple #29
0
def encode(obj):
    if isinstance(obj, str):
        validate_encoded(obj)
        return obj
    elif isinstance(obj, gfapy.CIGAR) or isinstance(obj, gfapy.Trace):
        obj.validate()
        return str(obj)
    elif isinstance(obj, gfapy.Placeholder):
        return "*"
    else:
        raise gfapy.TypeError(
            "the class {} is incompatible with the datatype\n".format(
                obj.__class__.__name__) + "(accepted classes: " +
            "str, CIGAR, Trace, AlignmentPlaceholder)")
def validate_decoded(iterable):
  for elem in iterable:
    if not isinstance(elem, gfapy.OrientedLine):
      raise gfapy.TypeError(
            "the list contains an object of class {}\n".format(type(elem))+
            "(accepted classes: gfapy.OrientedLine)")
    elem.validate()
    if not re.match(r"^[!-~]+$", elem.name):
      raise gfapy.FormatError(
        "the list contains an invalid GFA2 identifier {}\n".format(elem.name)+
        "(it contains spaces and/or non-printable characters)")
    if not elem.orient in ["+", "-"]:
      raise gfapy.FormatError(
        "{} is not a valid orientation".format(elem.orient))