Example #1
0
    def _alt_ac_for_tx_ac(self, tx_ac):
        """return chromosomal accession for given transcript accession (and
        the_assembly and aln_method setting used to instantiate this
        AssemblyMapper)

        """
        alt_acs = [
            e["alt_ac"] for e in self.hdp.get_tx_mapping_options(tx_ac)
            if e["alt_aln_method"] == self.alt_aln_method
            and e["alt_ac"] in self._assembly_accessions
        ]

        if not alt_acs:
            raise HGVSDataNotAvailableError(
                "No alignments for {tx_ac} in {an} using {am}".format(
                    tx_ac=tx_ac, an=self.assembly_name,
                    am=self.alt_aln_method))

        # TODO: conditional is unnecessary; remove
        if len(alt_acs) > 1:
            names = set(self._assembly_map[ac] for ac in alt_acs)
            if names != set("XY"):
                alts = ", ".join([
                    "{ac} ({n})".format(ac=ac, n=self._assembly_map[ac])
                    for ac in alt_acs
                ])
                raise HGVSError(
                    "Multiple chromosomal alignments for {tx_ac} in {an}"
                    " using {am} (non-pseudoautosomal region) [{alts}]".format(
                        tx_ac=tx_ac,
                        an=self.assembly_name,
                        am=self.alt_aln_method,
                        alts=alts))

            # assume PAR
            if self.in_par_assume is None:
                raise HGVSError(
                    "Multiple chromosomal alignments for {tx_ac} in {an}"
                    " using {am} (likely pseudoautosomal region)".format(
                        tx_ac=tx_ac,
                        an=self.assembly_name,
                        am=self.alt_aln_method))

            alt_acs = [
                ac for ac in alt_acs
                if self._assembly_map[ac] == self.in_par_assume
            ]
            if len(alt_acs) != 1:
                raise HGVSError(
                    "Multiple chromosomal alignments for {tx_ac} in {an}"
                    " using {am}; in_par_assume={ipa} selected {n} of them".
                    format(tx_ac=tx_ac,
                           an=self.assembly_name,
                           am=self.alt_aln_method,
                           ipa=self.in_par_assume,
                           n=len(alt_acs)))

        assert len(
            alt_acs) == 1, "Should have exactly one alignment at this point"
        return alt_acs[0]
Example #2
0
 def __str__(self):
     if self.min > self.max:
         raise HGVSError(
             "Repeat min count must be less than or equal to max count")
     if self.min == self.max:
         return "{self.ref}[{self.min}]".format(self=self)
     return "{self.ref}({self.min}_{self.max})".format(self=self)
Example #3
0
    def c_to_n(self, c_interval):
        """convert a transcript CDS (c.) interval to a transcript cDNA (n.) interval"""

        if self.cds_start_i is None:    # cds_start_i defined iff cds_end_i defined; see assertion above
            raise HGVSUsageError(
                "CDS is undefined for {self.tx_ac}; cannot map from c. coordinate (non-coding transcript?)".format(
                    self=self))

        # start
        if c_interval.start.datum == hgvs.location.CDS_START and c_interval.start.base < 0:
            rs = c_interval.start.base + self.cds_start_i + 1
        elif c_interval.start.datum == hgvs.location.CDS_START and c_interval.start.base > 0:
            rs = c_interval.start.base + self.cds_start_i
        elif c_interval.start.datum == hgvs.location.CDS_END:
            rs = c_interval.start.base + self.cds_end_i
        # end
        if c_interval.end.datum == hgvs.location.CDS_START and c_interval.end.base < 0:
            re = c_interval.end.base + self.cds_start_i + 1
        elif c_interval.end.datum == hgvs.location.CDS_START and c_interval.end.base > 0:
            re = c_interval.end.base + self.cds_start_i
        elif c_interval.end.datum == hgvs.location.CDS_END:
            re = c_interval.end.base + self.cds_end_i

        if rs <= 0 or re > self.tgt_len:
            raise HGVSError("The given coordinate is outside the bounds of the reference sequence.")

        n_interval = hgvs.location.BaseOffsetInterval(
            start=hgvs.location.BaseOffsetPosition(
                base=rs, offset=c_interval.start.offset, datum=hgvs.location.SEQ_START),
            end=hgvs.location.BaseOffsetPosition(base=re, offset=c_interval.end.offset, datum=hgvs.location.SEQ_START),
            uncertain=c_interval.uncertain)
        return n_interval
Example #4
0
    def format(self, conf=None):
        if self.ref is None and self.alt is None:
            raise HGVSError("RefAlt: ref and alt sequences are both undefined")

        max_ref_length = self._format_config_na(conf)

        if max_ref_length is not None:
            ref = self.ref_s
            if ref is None or len(ref) > max_ref_length:
                ref = ''
        else:
            ref = self.ref

        # subst and delins
        if self.ref is not None and self.alt is not None:
            if self.ref == self.alt:
                s = "{ref}=".format(ref=ref)
            elif len(self.alt) == 1 and len(
                    self.ref
            ) == 1 and not self.ref.isdigit():  # don't turn del5insT into 5>T
                s = "{self.ref}>{self.alt}".format(self=self)
            else:
                s = "del{ref}ins{alt}".format(ref=ref, alt=self.alt)
        # del case
        elif self.ref is not None:
            s = "del{ref}".format(ref=ref)

        # ins case
        else:  # self.alt is not None
            s = "ins{self.alt}".format(self=self)

        return "(" + s + ")" if self.uncertain else s
Example #5
0
 def is_coding_transcript(self):
     if ((self.tx_info["cds_start_i"] is not None) ^
         (self.tx_info["cds_end_i"] is not None)):
         raise HGVSError(
             "{self.tx_ac}: CDS start_i and end_i"
             " must be both defined or both undefined".format(self=self))
     return self.tx_info["cds_start_i"] is not None
Example #6
0
    def _get_cursor(self, n_retries=1):
        """Returns a context manager for obtained from a single or pooled
        connection, and sets the PostgreSQL search_path to the schema
        specified in the connection URL.

        Although *connections* are threadsafe, *cursors* are bound to
        connections and are *not* threadsafe. Do not share cursors
        across threads.

        Use this funciton like this::

            with hdp._get_cursor() as cur:
                # your code

        Do not call this function outside a contextmanager.

        """

        n_tries_rem = n_retries + 1
        while n_tries_rem > 0:
            try:

                conn = self._pool.getconn() if self.pooling else self._conn

                # autocommit=True obviates closing explicitly
                conn.autocommit = True

                cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
                cur.execute(
                    "set search_path = {self.url.schema};".format(self=self))

                yield cur

                # contextmanager executes these when context exits
                cur.close()
                if self.pooling:
                    self._pool.putconn(conn)

                break

            except psycopg2.OperationalError:

                _logger.warning(
                    "Lost connection to {url}; attempting reconnect".format(
                        url=self.url))
                if self.pooling:
                    self._pool.closeall()
                self._connect()
                _logger.warning("Reconnected to {url}".format(url=self.url))

            n_tries_rem -= 1

        else:

            # N.B. Probably never reached
            raise HGVSError(
                "Permanently lost connection to {url} ({n} retries)".format(
                    url=self.url, n=n_retries))
Example #7
0
    def __init__(self, hdp, tx_ac, alt_ac, alt_aln_method):
        self.hdp = hdp
        self.tx_ac = tx_ac
        self.alt_ac = alt_ac
        self.alt_aln_method = alt_aln_method
        if self.alt_aln_method != "transcript":
            self.tx_info = hdp.get_tx_info(self.tx_ac, self.alt_ac,
                                           self.alt_aln_method)
            if self.tx_info is None:
                raise HGVSDataNotAvailableError(
                    "TranscriptMapper(tx_ac={self.tx_ac}, "
                    "alt_ac={self.alt_ac}, alt_aln_method={self.alt_aln_method}): "
                    "No transcript info".format(self=self))

            self.tx_exons = hdp.get_tx_exons(self.tx_ac, self.alt_ac,
                                             self.alt_aln_method)
            if self.tx_exons is None:
                raise HGVSDataNotAvailableError(
                    "TranscriptMapper(tx_ac={self.tx_ac}, "
                    "alt_ac={self.alt_ac}, alt_aln_method={self.alt_aln_method}): "
                    "No transcript exons".format(self=self))

            # hgvs-386: An assumption when building the cigar string
            # is that exons are adjacent. Assert that here.
            tx_exons = sorted(self.tx_exons, key=lambda e: e["ord"])
            for i in range(1, len(tx_exons)):
                if tx_exons[i - 1]["tx_end_i"] != tx_exons[i]["tx_start_i"]:
                    raise HGVSDataNotAvailableError(
                        "TranscriptMapper(tx_ac={self.tx_ac}, "
                        "alt_ac={self.alt_ac}, alt_aln_method={self.alt_aln_method}): "
                        "Exons {a} and {b} are not adjacent".format(self=self,
                                                                    a=i,
                                                                    b=i + 1))

            self.strand = self.tx_exons[0]["alt_strand"]
            self.cds_start_i = self.tx_info["cds_start_i"]
            self.cds_end_i = self.tx_info["cds_end_i"]
            self.gc_offset = self.tx_exons[0]["alt_start_i"]
            self.cigar = build_tx_cigar(self.tx_exons, self.strand)
            self.im = hgvs.intervalmapper.IntervalMapper.from_cigar(self.cigar)
            self.tgt_len = self.im.tgt_len
        else:
            # this covers the identity cases n <-> c
            self.tx_identity_info = hdp.get_tx_identity_info(self.tx_ac)
            if self.tx_identity_info is None:
                raise HGVSError(
                    "TranscriptMapper(tx_ac={self.tx_ac}, "
                    "alt_ac={self.alt_ac}, alt_aln_method={self.alt_aln_method}): "
                    "No transcript identity info".format(self=self))
            self.cds_start_i = self.tx_identity_info["cds_start_i"]
            self.cds_end_i = self.tx_identity_info["cds_end_i"]
            self.tgt_len = sum(self.tx_identity_info["lengths"])

        assert not (
            (self.cds_start_i is None) ^ (self.cds_end_i is None)
        ), "CDS start and end must both be defined or neither defined"
Example #8
0
 def format(self, conf=None):
     if self.min > self.max:
         raise HGVSError("Repeat min count must be less than or equal to max count")
     max_ref_length = self._format_config_na(conf)
     ref = self.ref
     if max_ref_length is not None and (ref is None or len(ref) > max_ref_length):
         ref = ''
     if self.min == self.max:
         return "{ref}[{min}]".format(ref=ref, min=self.min)
     return "{ref}({min}_{max})".format(ref=ref, min=self.min, max=self.max)
Example #9
0
    def n_to_c(self, n_interval):
        """convert a transcript cDNA (n.) interval to a transcript CDS (c.) interval"""

        if self.cds_start_i is None:  # cds_start_i defined iff cds_end_i defined; see assertion above
            raise HGVSUsageError(
                "CDS is undefined for {self.tx_ac}; cannot map to c. coordinate (non-coding transcript?)"
                .format(self=self))
        if n_interval.start.base <= 0 or n_interval.end.base > self.tgt_len:
            raise HGVSError(
                "The given coordinate is outside the bounds of the reference sequence."
            )

        # start
        if n_interval.start.base <= self.cds_start_i:
            cs = n_interval.start.base - (self.cds_start_i + 1)
            cs_datum = Datum.CDS_START
        elif n_interval.start.base > self.cds_start_i and n_interval.start.base <= self.cds_end_i:
            cs = n_interval.start.base - self.cds_start_i
            cs_datum = Datum.CDS_START
        else:
            cs = n_interval.start.base - self.cds_end_i
            cs_datum = Datum.CDS_END
        # end
        if n_interval.end.base <= self.cds_start_i:
            ce = n_interval.end.base - (self.cds_start_i + 1)
            ce_datum = Datum.CDS_START
        elif n_interval.end.base > self.cds_start_i and n_interval.end.base <= self.cds_end_i:
            ce = n_interval.end.base - self.cds_start_i
            ce_datum = Datum.CDS_START
        else:
            ce = n_interval.end.base - self.cds_end_i
            ce_datum = Datum.CDS_END

        c_interval = hgvs.location.BaseOffsetInterval(
            start=hgvs.location.BaseOffsetPosition(
                base=cs, offset=n_interval.start.offset, datum=cs_datum),
            end=hgvs.location.BaseOffsetPosition(base=ce,
                                                 offset=n_interval.end.offset,
                                                 datum=ce_datum),
            uncertain=n_interval.uncertain)
        return c_interval
Example #10
0
    def __str__(self):
        if self.ref is None and self.alt is None:
            raise HGVSError("RefAlt: ref and alt sequences are both undefined")

        # subst and delins
        if self.ref is not None and self.alt is not None:
            if self.ref == self.alt:
                s = "{self.ref}=".format(self=self)
            elif len(self.alt) == 1 and len(
                    self.ref
            ) == 1 and not self.ref.isdigit():  # don't turn del5insT into 5>T
                s = "{self.ref}>{self.alt}".format(self=self)
            else:
                s = "del{self.ref}ins{self.alt}".format(self=self)
        # del case
        elif self.ref is not None:
            s = "del{self.ref}".format(self=self)

        # ins case
        else:  # self.alt is not None
            s = "ins{self.alt}".format(self=self)

        return "(" + s + ")" if self.uncertain else s