Exemple #1
0
    def _map(self, from_pos, to_pos, pos, end, strict_bounds):
        """Map position between aligned segments

        Positions in this function are 0-based, base-counting. 
        """

        if strict_bounds and (pos < 0 or pos > from_pos[-1]):
            raise HGVSInvalidIntervalError("Position is beyond the bounds of transcript record")

        # find aligned segment to use as basis for mapping
        # okay for pos to be before first element or after last
        for pos_i in range(len(self.cigar_op)):
            if pos < from_pos[pos_i+1]:
                break

        if self.cigar_op[pos_i] in "=MX":
            mapped_pos = to_pos[pos_i] + (pos - from_pos[pos_i])
            mapped_pos_offset = 0

        elif self.cigar_op[pos_i] in "DI":
            mapped_pos = to_pos[pos_i]
            if end == "start":
                mapped_pos -= 1
            mapped_pos_offset = 0

        elif self.cigar_op[pos_i] == "N":
            if pos - from_pos[pos_i] + 1 <= from_pos[pos_i + 1] - pos:
                mapped_pos = to_pos[pos_i] - 1
                mapped_pos_offset = pos - from_pos[pos_i] + 1
            else:
                mapped_pos = to_pos[pos_i]
                mapped_pos_offset = -(from_pos[pos_i + 1] - pos)

        return mapped_pos, mapped_pos_offset, self.cigar_op[pos_i]
Exemple #2
0
    def _map(self, from_pos, to_pos, pos, base):
        """Map position between aligned sequences

        Positions in this function are 0-based.
        """
        pos_i = -1
        while pos_i < len(self.cigar_op) and pos >= from_pos[pos_i + 1]:
            pos_i += 1

        if pos_i == -1 or pos_i == len(self.cigar_op):
            raise HGVSInvalidIntervalError(
                "Position is beyond the bounds of transcript record")

        if self.cigar_op[pos_i] in "=MX":
            mapped_pos = to_pos[pos_i] + (pos - from_pos[pos_i])
            mapped_pos_offset = 0
        elif self.cigar_op[pos_i] in "DI":
            if base == "start":
                mapped_pos = to_pos[pos_i] - 1
            elif base == "end":
                mapped_pos = to_pos[pos_i]
            mapped_pos_offset = 0
        elif self.cigar_op[pos_i] == "N":
            if pos - from_pos[pos_i] + 1 <= from_pos[pos_i + 1] - pos:
                mapped_pos = to_pos[pos_i] - 1
                mapped_pos_offset = pos - from_pos[pos_i] + 1
            else:
                mapped_pos = to_pos[pos_i]
                mapped_pos_offset = -(from_pos[pos_i + 1] - pos)

        return mapped_pos, mapped_pos_offset, self.cigar_op[pos_i]
Exemple #3
0
    def _map(from_ivs, to_ivs, from_start_i, from_end_i, max_extent):
        def iv_map(from_ivs, to_ivs, from_start_i, from_end_i, max_extent):
            """returns the <start,end> intervals indexes in which from_start_i and from_end_i occur"""
            # first look for 0-width interval that matches
            seil = [i for i, iv in enumerate(from_ivs) if iv.start_i == from_start_i and iv.end_i == from_end_i]
            if len(seil) > 0:
                si = ei = seil[0]
            else:
                sil = [i for i, iv in enumerate(from_ivs) if iv.start_i <= from_start_i <= iv.end_i]
                eil = [i for i, iv in enumerate(from_ivs) if iv.start_i <= from_end_i <= iv.end_i]
                if len(sil) == 0 or len(eil) == 0:
                    raise HGVSInvalidIntervalError("start or end or both are beyond the bounds of transcript record")
                si, ei = (sil[0], eil[-1]) if max_extent else (sil[-1], eil[0])
            return si, ei

        def clip_to_iv(iv, pos):
            return max(iv.start_i, min(iv.end_i, pos))

        assert from_start_i <= from_end_i, "expected from_start_i <= from_end_i"
        try:
            si, ei = iv_map(from_ivs, to_ivs, from_start_i, from_end_i, max_extent)
        except ValueError:
            raise HGVSInvalidIntervalError("start_i,end_i interval out of bounds")
        to_start_i = clip_to_iv(to_ivs[si], to_ivs[si].start_i + (from_start_i - from_ivs[si].start_i))
        to_end_i = clip_to_iv(to_ivs[ei], to_ivs[ei].end_i - (from_ivs[ei].end_i - from_end_i))
        return to_start_i, to_end_i
Exemple #4
0
    def n_to_c(self, n_interval, strict_bounds=None):
        """convert a transcript cDNA (n.) interval to a transcript CDS (c.) interval"""

        if strict_bounds is None:
            strict_bounds = global_config.mapping.strict_bounds

        if self.cds_start_i is None:    # cds_start_i defined iff cds_end_i defined; see assertion above
            raise HGVSUsageError(
                "CDS is undefined for {self.tx_ac}; cannot map to c. coordinate (non-coding transcript?)"
                .format(self=self))

        if strict_bounds and (n_interval.start.base <= 0 or n_interval.end.base > self.tgt_len):
            raise HGVSInvalidIntervalError(
                "The given coordinate is outside the bounds of the reference sequence.")

        def pos_n_to_c(pos):
            if pos.base <= self.cds_start_i:
                c = pos.base - self.cds_start_i - (1 if pos.base > 0 else 0)
                c_datum = Datum.CDS_START
            elif pos.base > self.cds_start_i and pos.base <= self.cds_end_i:
                c = pos.base - self.cds_start_i
                c_datum = Datum.CDS_START
            else:
                c = pos.base - self.cds_end_i
                c_datum = Datum.CDS_END
            return hgvs.location.BaseOffsetPosition(base=c, offset=pos.offset, datum=c_datum)

        c_interval = hgvs.location.BaseOffsetInterval(start=pos_n_to_c(n_interval.start),
                                                      end=pos_n_to_c(n_interval.end),
                                                      uncertain=n_interval.uncertain)
        return c_interval
Exemple #5
0
 def __init__(self, ref, tgt):
     if not ((ref.len == tgt.len) or (ref.len == 0 and tgt.len != 0) or
             (ref.len != 0 and tgt.len == 0)):
         raise HGVSInvalidIntervalError(
             "IntervalPair doesn't represent a match, insertion, or deletion"
         )
     self.ref = ref
     self.tgt = tgt
Exemple #6
0
 def check_datum(self):
     # check for valid combinations of start and end datums
     if (self.start.datum, self.end.datum) not in [
         (Datum.SEQ_START, Datum.SEQ_START),
         (Datum.CDS_START, Datum.CDS_START),
         (Datum.CDS_START, Datum.CDS_END),
         (Datum.CDS_END, Datum.CDS_END),
     ]:
         raise HGVSInvalidIntervalError("BaseOffsetInterval start datum and end datum are incompatible")
Exemple #7
0
 def iv_map(from_ivs, to_ivs, from_start_i, from_end_i, max_extent):
     """returns the <start,end> intervals indexes in which from_start_i and from_end_i occur"""
     # first look for 0-width interval that matches
     seil = [i for i, iv in enumerate(from_ivs) if iv.start_i == from_start_i and iv.end_i == from_end_i]
     if len(seil) > 0:
         si = ei = seil[0]
     else:
         sil = [i for i, iv in enumerate(from_ivs) if iv.start_i <= from_start_i <= iv.end_i]
         eil = [i for i, iv in enumerate(from_ivs) if iv.start_i <= from_end_i <= iv.end_i]
         if len(sil) == 0 or len(eil) == 0:
             raise HGVSInvalidIntervalError("start or end or both are beyond the bounds of transcript record")
         si, ei = (sil[0], eil[-1]) if max_extent else (sil[-1], eil[0])
     return si, ei
Exemple #8
0
 def pos_c_to_n(pos):
     if pos.datum == Datum.CDS_START:
         n = pos.base + self.cds_start_i
         if pos.base < 0:   # correct for lack of c.0 coordinate
             n += 1
     elif pos.datum == Datum.CDS_END:
         n = pos.base + self.cds_end_i
     if n <= 0:             # correct for lack of n.0 coordinate
         n -= 1
     if (n <= 0 or n > self.tgt_len):
         if strict_bounds:
             raise HGVSInvalidIntervalError(f"c.{pos} coordinate is out of bounds")
     return hgvs.location.BaseOffsetPosition(base=n,
                                             offset=pos.offset,
                                             datum=Datum.SEQ_START)
Exemple #9
0
    def c_to_n(self, c_interval, strict_bounds=None):
        """convert a transcript CDS (c.) interval to a transcript cDNA (n.) interval"""

        if strict_bounds is None:
            strict_bounds = global_config.mapping.strict_bounds

        if self.cds_start_i is None:  # cds_start_i defined iff cds_end_i defined; see assertion above
            raise HGVSUsageError(
                "CDS is undefined for {self.tx_ac}; cannot map from c. coordinate (non-coding transcript?)"
                .format(self=self))

        # start
        if c_interval.start.datum == Datum.CDS_START and c_interval.start.base < 0:
            n_start = c_interval.start.base + self.cds_start_i + 1
        elif c_interval.start.datum == Datum.CDS_START and c_interval.start.base > 0:
            n_start = c_interval.start.base + self.cds_start_i
        elif c_interval.start.datum == Datum.CDS_END:
            n_start = c_interval.start.base + self.cds_end_i
        # end
        if c_interval.end.datum == Datum.CDS_START and c_interval.end.base < 0:
            n_end = c_interval.end.base + self.cds_start_i + 1
        elif c_interval.end.datum == Datum.CDS_START and c_interval.end.base > 0:
            n_end = c_interval.end.base + self.cds_start_i
        elif c_interval.end.datum == Datum.CDS_END:
            n_end = c_interval.end.base + self.cds_end_i

        if strict_bounds and (n_start <= 0 or n_end > self.tgt_len):
            raise HGVSInvalidIntervalError(
                "The given coordinate is outside the bounds of the reference sequence."
            )

        n_interval = hgvs.location.BaseOffsetInterval(
            start=hgvs.location.BaseOffsetPosition(
                base=n_start,
                offset=c_interval.start.offset,
                datum=Datum.SEQ_START),
            end=hgvs.location.BaseOffsetPosition(base=n_end,
                                                 offset=c_interval.end.offset,
                                                 datum=Datum.SEQ_START),
            uncertain=c_interval.uncertain)
        return n_interval
Exemple #10
0
    def n_to_c(self, n_interval):
        """convert a transcript cDNA (n.) interval to a transcript CDS (c.) interval"""

        if self.cds_start_i is None:  # cds_start_i defined iff cds_end_i defined; see assertion above
            raise HGVSUsageError(
                "CDS is undefined for {self.tx_ac}; cannot map to c. coordinate (non-coding transcript?)"
                .format(self=self))
        if n_interval.start.base <= 0 or n_interval.end.base > self.tgt_len:
            raise HGVSInvalidIntervalError(
                "The given coordinate is outside the bounds of the reference sequence."
            )

        # start
        if n_interval.start.base <= self.cds_start_i:
            cs = n_interval.start.base - (self.cds_start_i + 1)
            cs_datum = Datum.CDS_START
        elif n_interval.start.base > self.cds_start_i and n_interval.start.base <= self.cds_end_i:
            cs = n_interval.start.base - self.cds_start_i
            cs_datum = Datum.CDS_START
        else:
            cs = n_interval.start.base - self.cds_end_i
            cs_datum = Datum.CDS_END
        # end
        if n_interval.end.base <= self.cds_start_i:
            ce = n_interval.end.base - (self.cds_start_i + 1)
            ce_datum = Datum.CDS_START
        elif n_interval.end.base > self.cds_start_i and n_interval.end.base <= self.cds_end_i:
            ce = n_interval.end.base - self.cds_start_i
            ce_datum = Datum.CDS_START
        else:
            ce = n_interval.end.base - self.cds_end_i
            ce_datum = Datum.CDS_END

        c_interval = hgvs.location.BaseOffsetInterval(
            start=hgvs.location.BaseOffsetPosition(
                base=cs, offset=n_interval.start.offset, datum=cs_datum),
            end=hgvs.location.BaseOffsetPosition(base=ce,
                                                 offset=n_interval.end.offset,
                                                 datum=ce_datum),
            uncertain=n_interval.uncertain)
        return c_interval
Exemple #11
0
 def __init__(self, start_i, end_i):
     if not (start_i <= end_i):
         raise HGVSInvalidIntervalError(
             "start_i must be less than or equal to end_i")
     self.start_i = start_i
     self.end_i = end_i