def _alt_ac_for_tx_ac(self, tx_ac): """return chromosomal accession for given transcript accession (and the_assembly and aln_method setting used to instantiate this AssemblyMapper) """ alt_acs = [ e["alt_ac"] for e in self.hdp.get_tx_mapping_options(tx_ac) if e["alt_aln_method"] == self.alt_aln_method and e["alt_ac"] in self._assembly_accessions ] if not alt_acs: raise HGVSDataNotAvailableError( "No alignments for {tx_ac} in {an} using {am}".format( tx_ac=tx_ac, an=self.assembly_name, am=self.alt_aln_method)) # TODO: conditional is unnecessary; remove if len(alt_acs) > 1: names = set(self._assembly_map[ac] for ac in alt_acs) if names != set("XY"): alts = ", ".join([ "{ac} ({n})".format(ac=ac, n=self._assembly_map[ac]) for ac in alt_acs ]) raise HGVSError( "Multiple chromosomal alignments for {tx_ac} in {an}" " using {am} (non-pseudoautosomal region) [{alts}]".format( tx_ac=tx_ac, an=self.assembly_name, am=self.alt_aln_method, alts=alts)) # assume PAR if self.in_par_assume is None: raise HGVSError( "Multiple chromosomal alignments for {tx_ac} in {an}" " using {am} (likely pseudoautosomal region)".format( tx_ac=tx_ac, an=self.assembly_name, am=self.alt_aln_method)) alt_acs = [ ac for ac in alt_acs if self._assembly_map[ac] == self.in_par_assume ] if len(alt_acs) != 1: raise HGVSError( "Multiple chromosomal alignments for {tx_ac} in {an}" " using {am}; in_par_assume={ipa} selected {n} of them". format(tx_ac=tx_ac, an=self.assembly_name, am=self.alt_aln_method, ipa=self.in_par_assume, n=len(alt_acs))) assert len( alt_acs) == 1, "Should have exactly one alignment at this point" return alt_acs[0]
def __str__(self): if self.min > self.max: raise HGVSError( "Repeat min count must be less than or equal to max count") if self.min == self.max: return "{self.ref}[{self.min}]".format(self=self) return "{self.ref}({self.min}_{self.max})".format(self=self)
def c_to_n(self, c_interval): """convert a transcript CDS (c.) interval to a transcript cDNA (n.) interval""" if self.cds_start_i is None: # cds_start_i defined iff cds_end_i defined; see assertion above raise HGVSUsageError( "CDS is undefined for {self.tx_ac}; cannot map from c. coordinate (non-coding transcript?)".format( self=self)) # start if c_interval.start.datum == hgvs.location.CDS_START and c_interval.start.base < 0: rs = c_interval.start.base + self.cds_start_i + 1 elif c_interval.start.datum == hgvs.location.CDS_START and c_interval.start.base > 0: rs = c_interval.start.base + self.cds_start_i elif c_interval.start.datum == hgvs.location.CDS_END: rs = c_interval.start.base + self.cds_end_i # end if c_interval.end.datum == hgvs.location.CDS_START and c_interval.end.base < 0: re = c_interval.end.base + self.cds_start_i + 1 elif c_interval.end.datum == hgvs.location.CDS_START and c_interval.end.base > 0: re = c_interval.end.base + self.cds_start_i elif c_interval.end.datum == hgvs.location.CDS_END: re = c_interval.end.base + self.cds_end_i if rs <= 0 or re > self.tgt_len: raise HGVSError("The given coordinate is outside the bounds of the reference sequence.") n_interval = hgvs.location.BaseOffsetInterval( start=hgvs.location.BaseOffsetPosition( base=rs, offset=c_interval.start.offset, datum=hgvs.location.SEQ_START), end=hgvs.location.BaseOffsetPosition(base=re, offset=c_interval.end.offset, datum=hgvs.location.SEQ_START), uncertain=c_interval.uncertain) return n_interval
def format(self, conf=None): if self.ref is None and self.alt is None: raise HGVSError("RefAlt: ref and alt sequences are both undefined") max_ref_length = self._format_config_na(conf) if max_ref_length is not None: ref = self.ref_s if ref is None or len(ref) > max_ref_length: ref = '' else: ref = self.ref # subst and delins if self.ref is not None and self.alt is not None: if self.ref == self.alt: s = "{ref}=".format(ref=ref) elif len(self.alt) == 1 and len( self.ref ) == 1 and not self.ref.isdigit(): # don't turn del5insT into 5>T s = "{self.ref}>{self.alt}".format(self=self) else: s = "del{ref}ins{alt}".format(ref=ref, alt=self.alt) # del case elif self.ref is not None: s = "del{ref}".format(ref=ref) # ins case else: # self.alt is not None s = "ins{self.alt}".format(self=self) return "(" + s + ")" if self.uncertain else s
def is_coding_transcript(self): if ((self.tx_info["cds_start_i"] is not None) ^ (self.tx_info["cds_end_i"] is not None)): raise HGVSError( "{self.tx_ac}: CDS start_i and end_i" " must be both defined or both undefined".format(self=self)) return self.tx_info["cds_start_i"] is not None
def _get_cursor(self, n_retries=1): """Returns a context manager for obtained from a single or pooled connection, and sets the PostgreSQL search_path to the schema specified in the connection URL. Although *connections* are threadsafe, *cursors* are bound to connections and are *not* threadsafe. Do not share cursors across threads. Use this funciton like this:: with hdp._get_cursor() as cur: # your code Do not call this function outside a contextmanager. """ n_tries_rem = n_retries + 1 while n_tries_rem > 0: try: conn = self._pool.getconn() if self.pooling else self._conn # autocommit=True obviates closing explicitly conn.autocommit = True cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor) cur.execute( "set search_path = {self.url.schema};".format(self=self)) yield cur # contextmanager executes these when context exits cur.close() if self.pooling: self._pool.putconn(conn) break except psycopg2.OperationalError: _logger.warning( "Lost connection to {url}; attempting reconnect".format( url=self.url)) if self.pooling: self._pool.closeall() self._connect() _logger.warning("Reconnected to {url}".format(url=self.url)) n_tries_rem -= 1 else: # N.B. Probably never reached raise HGVSError( "Permanently lost connection to {url} ({n} retries)".format( url=self.url, n=n_retries))
def __init__(self, hdp, tx_ac, alt_ac, alt_aln_method): self.hdp = hdp self.tx_ac = tx_ac self.alt_ac = alt_ac self.alt_aln_method = alt_aln_method if self.alt_aln_method != "transcript": self.tx_info = hdp.get_tx_info(self.tx_ac, self.alt_ac, self.alt_aln_method) if self.tx_info is None: raise HGVSDataNotAvailableError( "TranscriptMapper(tx_ac={self.tx_ac}, " "alt_ac={self.alt_ac}, alt_aln_method={self.alt_aln_method}): " "No transcript info".format(self=self)) self.tx_exons = hdp.get_tx_exons(self.tx_ac, self.alt_ac, self.alt_aln_method) if self.tx_exons is None: raise HGVSDataNotAvailableError( "TranscriptMapper(tx_ac={self.tx_ac}, " "alt_ac={self.alt_ac}, alt_aln_method={self.alt_aln_method}): " "No transcript exons".format(self=self)) # hgvs-386: An assumption when building the cigar string # is that exons are adjacent. Assert that here. tx_exons = sorted(self.tx_exons, key=lambda e: e["ord"]) for i in range(1, len(tx_exons)): if tx_exons[i - 1]["tx_end_i"] != tx_exons[i]["tx_start_i"]: raise HGVSDataNotAvailableError( "TranscriptMapper(tx_ac={self.tx_ac}, " "alt_ac={self.alt_ac}, alt_aln_method={self.alt_aln_method}): " "Exons {a} and {b} are not adjacent".format(self=self, a=i, b=i + 1)) self.strand = self.tx_exons[0]["alt_strand"] self.cds_start_i = self.tx_info["cds_start_i"] self.cds_end_i = self.tx_info["cds_end_i"] self.gc_offset = self.tx_exons[0]["alt_start_i"] self.cigar = build_tx_cigar(self.tx_exons, self.strand) self.im = hgvs.intervalmapper.IntervalMapper.from_cigar(self.cigar) self.tgt_len = self.im.tgt_len else: # this covers the identity cases n <-> c self.tx_identity_info = hdp.get_tx_identity_info(self.tx_ac) if self.tx_identity_info is None: raise HGVSError( "TranscriptMapper(tx_ac={self.tx_ac}, " "alt_ac={self.alt_ac}, alt_aln_method={self.alt_aln_method}): " "No transcript identity info".format(self=self)) self.cds_start_i = self.tx_identity_info["cds_start_i"] self.cds_end_i = self.tx_identity_info["cds_end_i"] self.tgt_len = sum(self.tx_identity_info["lengths"]) assert not ( (self.cds_start_i is None) ^ (self.cds_end_i is None) ), "CDS start and end must both be defined or neither defined"
def format(self, conf=None): if self.min > self.max: raise HGVSError("Repeat min count must be less than or equal to max count") max_ref_length = self._format_config_na(conf) ref = self.ref if max_ref_length is not None and (ref is None or len(ref) > max_ref_length): ref = '' if self.min == self.max: return "{ref}[{min}]".format(ref=ref, min=self.min) return "{ref}({min}_{max})".format(ref=ref, min=self.min, max=self.max)
def n_to_c(self, n_interval): """convert a transcript cDNA (n.) interval to a transcript CDS (c.) interval""" if self.cds_start_i is None: # cds_start_i defined iff cds_end_i defined; see assertion above raise HGVSUsageError( "CDS is undefined for {self.tx_ac}; cannot map to c. coordinate (non-coding transcript?)" .format(self=self)) if n_interval.start.base <= 0 or n_interval.end.base > self.tgt_len: raise HGVSError( "The given coordinate is outside the bounds of the reference sequence." ) # start if n_interval.start.base <= self.cds_start_i: cs = n_interval.start.base - (self.cds_start_i + 1) cs_datum = Datum.CDS_START elif n_interval.start.base > self.cds_start_i and n_interval.start.base <= self.cds_end_i: cs = n_interval.start.base - self.cds_start_i cs_datum = Datum.CDS_START else: cs = n_interval.start.base - self.cds_end_i cs_datum = Datum.CDS_END # end if n_interval.end.base <= self.cds_start_i: ce = n_interval.end.base - (self.cds_start_i + 1) ce_datum = Datum.CDS_START elif n_interval.end.base > self.cds_start_i and n_interval.end.base <= self.cds_end_i: ce = n_interval.end.base - self.cds_start_i ce_datum = Datum.CDS_START else: ce = n_interval.end.base - self.cds_end_i ce_datum = Datum.CDS_END c_interval = hgvs.location.BaseOffsetInterval( start=hgvs.location.BaseOffsetPosition( base=cs, offset=n_interval.start.offset, datum=cs_datum), end=hgvs.location.BaseOffsetPosition(base=ce, offset=n_interval.end.offset, datum=ce_datum), uncertain=n_interval.uncertain) return c_interval
def __str__(self): if self.ref is None and self.alt is None: raise HGVSError("RefAlt: ref and alt sequences are both undefined") # subst and delins if self.ref is not None and self.alt is not None: if self.ref == self.alt: s = "{self.ref}=".format(self=self) elif len(self.alt) == 1 and len( self.ref ) == 1 and not self.ref.isdigit(): # don't turn del5insT into 5>T s = "{self.ref}>{self.alt}".format(self=self) else: s = "del{self.ref}ins{self.alt}".format(self=self) # del case elif self.ref is not None: s = "del{self.ref}".format(self=self) # ins case else: # self.alt is not None s = "ins{self.alt}".format(self=self) return "(" + s + ")" if self.uncertain else s