Ejemplo n.º 1
0
    def _vpush(self, value, datatype=None, fieldname=None):
        """Add a value to the array and validate.

    Raises
    ------
    gfapy.InconsistencyError
    	If the type of the new value does not correspond to the type of
      existing values.

    Parameters
    ----------
    value : Object
    	The value to add.
    datatype : gfapy.Field.TAG_DATATYPE or None
    	The datatype to use.
      If not **None**, it will be checked that the specified datatype is the
      same as for previous elements of the field array.
      If **None**, the value will be validated, according to the datatype
      specified on field array creation.
    fieldname : str
    	The field name to use for error messages.
    """
        if datatype is None:
            gfapy.Field._validate_gfa_field(value, self.datatype, fieldname)
        elif datatype != self.datatype:
            raise gfapy.InconsistencyError(
                "Datadatatype mismatch error for field {}:\n".format(
                    fieldname) + "value: {}\n".format(value) +
                "existing datatype: {};\n".format(self.datatype) +
                "new datatype: {}".format(datatype))
        self._data.append(value)
Ejemplo n.º 2
0
 def _check_s_is_as_expected(self, path, oriented_segment):
     if path[-1] != oriented_segment:
         raise gfapy.InconsistencyError(
             "Path is not valid\n" + "Line: {}\n".format(self) +
             "Previous elements:\n" +
             "".join(["  {} ({})\n".format(e, e.line)
                      for e in path[0:-2]]) + "Expected element:\n" +
             "  {} ({})\n".format(path[-1], path[-1].line) +
             "Current element:\n" +
             "  {} ({})\n".format(oriented_segment, oriented_segment.line))
Ejemplo n.º 3
0
 def _check_s_to_e_contiguity(self, path, oriented_segment):
     # check that segment is an extremity of path[-1]
     # and that the other extremity is path[-2]
     if not (path[-1].sid1 == self.segment and path[-1].sid2 == path[-2]) and \
        not (path[-1].sid1 == path[-2] and path[-1].sid2 == self.segment):
         raise gfapy.InconsistencyError(
             "Path is not valid\n" + "Line: {}\n".format(self) +
             "Previous elements:\n" +
             "".join(["  {} ({})\n".format(e, e.line)
                      for e in path]) + "Current element:\n" +
             "  {} ({})\n".format(oriented_segment, oriented_segment.line))
Ejemplo n.º 4
0
 def validate_length(self):
     """
 Raises
 ------
 gfapy.InconsistencyError
   If sequence length and LN tag are not consistent.
 """
     if not gfapy.is_placeholder(self.sequence) and "LN" in self.tagnames:
         if self.LN != len(self.sequence):
             raise gfapy.InconsistencyError(
                 "Segment: {}\n".format(str(self)) +
                 "Length in LN tag ({}) ".format(self.LN) +
                 "is different from length of sequence field ({})".format(
                     len(self.sequence)))
Ejemplo n.º 5
0
 def _validate_lists_size(self):
   n_overlaps = len(self.overlaps)
   n_segments = len(self.segment_names)
   if n_overlaps == n_segments - 1:
     # case 1: linear path
     return True
   elif n_overlaps == 1 and not self.overlaps[0]:
     # case 2: linear path, single "*" to represent overlaps which are all "*"
     return True
   elif n_overlaps == n_segments:
     # case 3: circular path
     pass
   else:
     raise gfapy.InconsistencyError(
       "Path has {} oriented segments, ".format(n_segments)+
       "but {} overlaps".format(n_overlaps))
Ejemplo n.º 6
0
 def validate_positions(self):
     "Checks that positions suffixed by $ are the last position of segments"
     if self.is_connected():
         seg = self.get("sid")
         seq = seg.sequence
         if not gfapy.is_placeholder(seq):
             seqlen = len(seq)
             for sfx in ["beg", "end"]:
                 fn = "s_" + sfx
                 pos = self.get(fn)
                 if gfapy.islastpos(pos):
                     if pos != seqlen:
                         raise gfapy.InconsistencyError(
                             "Fragment: {}\n".format(str(self)) +
                             "Field {}: $ after ".format(str(fn)) +
                             "non-last position ({})\n".format(str(pos)) +
                             "Segment: {}".format(str(seg)))
Ejemplo n.º 7
0
    def add(self, tagname, value, datatype=None):
        """
    Set a header value (multi-value compatible).

    If a field does not exist yet, set it to value. If it exists and it is a
    *gfapy.FieldArray*, add the value to the field array. If it exists and it
    is not a field array, create a field array with the previous value and
    the new one.

    Parameters
    ----------
    tagname : str
    value : object
    datatype : gfapy.Field.TAG_DATATYPE, optional
      The datatype to use.
      The default is to determine the datatype according to the value or the
      previous values present in the field.
    """
        prev = self.get(tagname)
        if prev is None:
            if datatype is not None:
                self.set_datatype(tagname, datatype)
            self.set(tagname, value)
            return
        elif not isinstance(prev, gfapy.FieldArray):
            if tagname in self.SINGLE_DEFINITION_TAGS:
                if self.field_to_s(tagname) == \
                    gfapy.Field._to_gfa_field(value, fieldname=tagname):
                    return
                else:
                    raise gfapy.InconsistencyError(
                        "Inconsistent values for header tag {} found\n".format(
                            tagname) +
                        "Previous definition: {}\n".format(prev) +
                        "Current definition: {}".format(value))
            prev = gfapy.FieldArray(self.get_datatype(tagname), [prev])
            self._set_existing_field(tagname, prev)
        if self.vlevel > 1:
            prev.vpush(value, datatype, tagname)
        else:
            prev.append(value)
Ejemplo n.º 8
0
def create_merged_segment(gfa,
                          segpath,
                          merged_name=None,
                          enable_tracking=False,
                          cut_counts=False):
    merged = gfa.try_get_segment(segpath[0].segment).clone()
    merged_vlevel = merged.vlevel
    merged.vlevel = 0
    total_cut = 0
    a = segpath[0]
    first_reversed = (a.end_type == "L")
    last_reversed = None
    if merged_name == "short":
        merged_name = gfa.unused_name()
    gfa._add_segment_to_merged(merged,
                               gfa.segment(a.segment),
                               first_reversed,
                               0,
                               True,
                               enable_tracking=enable_tracking,
                               merged_name=merged_name)
    #for i in range(len(segpath)-1):
    #  b = gfapy.SegmentEnd(segpath[i+1]).inverted()
    for s in segpath[1:]:
        b = gfapy.SegmentEnd(s).inverted()
        ls = gfa.segment(a.segment).end_relations(a.end_type, b, "dovetails")
        if len(ls) != 1:
            msg = "A single link was expected between {}".format(a) + \
                  "and {}".format(b) + "{} were found".format(len(ls))
            raise gfapy.ValueError(msg)
        l = ls[0]
        if not l.overlap:
            cut = 0
        else:
            cut = min(l.overlap.length_on_query(), gfa.segment(b.segment).LN)
        #elif all(op.code in ["M","="] for op in l.overlap):
        #  cut = sum([len(op) for op in l.overlap])
        #else:
        #  raise gfapy.ValueError(
        #      "Merging is only allowed if all operations are M/=")
        total_cut += cut
        last_reversed = (b.end_type == "R")
        gfa._add_segment_to_merged(merged,
                                   gfa.segment(b.segment),
                                   last_reversed,
                                   cut,
                                   False,
                                   enable_tracking=enable_tracking,
                                   merged_name=merged_name)
        a = gfapy.SegmentEnd(b).inverted()
    merged.vlevel = merged_vlevel
    if isinstance(merged.name, list):
        merged.name = "_".join(merged.name)
    ortag = merged.get("or")
    if isinstance(ortag, list):
        merged.set("or", ",".join(ortag))
    if not gfapy.is_placeholder(merged.sequence):
        merged.sequence = "".join(merged.sequence)
        if not merged.LN:
            merged.LN = len(merged.sequence)
        elif gfa._vlevel > 0 and merged.LN != len(merged.sequence):
            raise gfapy.InconsistencyError(
                "Computed sequence length {} ".format(merged.sequence.length) +
                "and computed LN {} differ".format(merged.LN))
    if merged.length is not None:
        for count_tag in ["KC", "RC", "FC"]:
            merged.set(count_tag, None)
    else:
        factor = 1
        if cut_counts:
            factor = merged.length / (total_cut + merged.length)
        for count_tag, count in gfa.__sum_of_counts(segpath, factor).items():
            merged.set(count_tag, count)
    return merged, first_reversed, last_reversed