def _construct_change(var, reverse=False): """ Construct mutation description. @arg var: RawVar object. @type var: pyparsing.ParseResults @var reverse: Variant is on the reverse strand. @type reverse: bool @return: Description of mutation (without reference and positions). @rtype: unicode """ # Note that the pyparsing parse tree yields `str('')` for nonexisting # attributes, so we wrap the optional attributes in `unicode()`. if reverse: try: arg1 = unicode(int(var.Arg1)) except ValueError: arg1 = util.reverse_complement(unicode(var.Arg1)) try: arg2 = unicode(int(var.Arg2)) except ValueError: arg2 = util.reverse_complement(unicode(var.Arg2)) else: arg1 = unicode(var.Arg1) arg2 = unicode(var.Arg2) def parse_sequence(seq): if not seq.Sequence: raise NotImplementedError('Only explicit sequences are supported ' 'for insertions.') if reverse: return util.reverse_complement(seq.Sequence) return seq.Sequence if var.MutationType == 'subst': change = '%s>%s' % (arg1, arg2) elif var.MutationType in ('ins', 'delins'): if var.SeqList: if reverse: seqs = reversed(var.SeqList) else: seqs = var.SeqList insertion = '[' + ';'.join(parse_sequence(seq) for seq in seqs) + ']' else: insertion = parse_sequence(var.Seq) change = '%s%s' % (var.MutationType, insertion) else: change = '%s%s' % (var.MutationType, arg1 or arg2 or '') return change
def parse_sequence(seq): if not seq.Sequence: raise NotImplementedError('Only explicit sequences are supported ' 'for insertions.') if reverse: return util.reverse_complement(seq.Sequence) return seq.Sequence
def __init__(self, s1, s2, lcp, s1_end, s2_end, DNA=False): """ Initialise the class. @arg s1: A string. @type s1: unicode @arg s2: A string. @type s2: unicode @arg lcp: The length of the longest common prefix of {s1} and {s2}. @type lcp: int @arg s1_end: End of the substring in {s1}. @type s1_end: @arg s2_end: End of the substring in {s2}. @type s2_end: int @arg DNA: @type DNA: bool """ self.__lcp = lcp self.__s1 = s1[self.__lcp:s1_end] self.__s2 = s2[self.__lcp:s2_end] self.__s2_len = s2_end - lcp self.__matrix = self.LCSMatrix(self.__s1, self.__s2) self.__s2_rc = None self.__matrix_rc = None if DNA: self.__s2_rc = reverse_complement(s2[self.__lcp:s2_end]) self.__matrix_rc = self.LCSMatrix(self.__s1, self.__s2_rc)
def inversion(self, pos1, pos2): """ Invert a range from non-interbase position pos1 to pos2. @arg pos1: First nucleotide of the inverted sequence. @type pos1: int @arg pos2: Last nucleotide of the inverted sequence. @type pos2: int """ sequence = util.reverse_complement(unicode(self.orig[pos1 - 1:pos2])) visualisation = ['inversion between %i and %i' % (pos1, pos2)] visualisation.extend(self._visualise(pos1 - 1, pos2, sequence)) self._output.addOutput('visualisation', visualisation) self._mutate(pos1 - 1, pos2, sequence)
def __maybeInvert(self, gene, string, string_reverse=None): """ Return the reverse-complement of a DNA sequence if the gene is in the reverse orientation. @arg gene: Gene @type gene: object @arg string: DNA sequence @type string: unicode @kwarg string_reverse: DNA sequence to use (if not None) for the reverse complement. @return: reverse-complement (if applicable), otherwise return the original. @rtype: unicode """ if gene.orientation == -1: if string_reverse: string = string_reverse return util.reverse_complement(string) return string
def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None, start_fuzzy=False, stop_fuzzy=False): """ Generate variant descriptions for all genes, transcripts, etc. @arg start_g: start position @type start_g: integer @arg stop_g: stop position @type stop_g: integer @arg varType: variant type @type varType: unicode @arg arg1: argument 1 of a raw variant @type arg1: unicode @arg arg2: argument 2 of a raw variant @type arg2: unicode @arg roll: ??? @type roll: tuple (integer, integer) @kwarg arg1_reverse: argument 1 to be used on reverse strand @type arg1_reverse: unicode @kwarg start_fuzzy: Indicates if start position of variant is fuzzy. @type start_fuzzy: bool @kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy. @type stop_fuzzy: bool """ forwardStart = start_g forwardStop = stop_g reverseStart = stop_g reverseStop = start_g if self.record.orientation == 1: chromStart = self.record.toChromPos(start_g) chromStop = self.record.toChromPos(stop_g) chromArg1 = arg1 chromArg2 = arg2 else: chromStart = self.record.toChromPos(stop_g) chromStop = self.record.toChromPos(start_g) chromArg1 = util.reverse_complement(arg1) chromArg2 = util.reverse_complement(arg2) # Todo: Should we use arg1_reverse here? if roll: forwardStart += roll[1] forwardStop += roll[1] reverseStart -= roll[0] reverseStop -= roll[0] if chromStart is not None: if self.record.orientation == 1: chromStart += roll[1] chromStop += roll[1] else: chromStart += roll[0] chromStop += roll[0] # if if varType != "subst": if forwardStart != forwardStop: # Todo: Fuzzy offsets to genomic positions (see bug #38). # # The genomic positioning is problematic. We would like to # have it in brackets (as fuzzy positions), like the above # g.(34299_23232)del example. # # Now consider a variant c.a-?_b+18del where only the offset # before the exon is unknown but the offset after the exon is # exact. Now a genomic description like g.(34299)_23232del # comes to mind, however, this notation is not allowed by the # HGVS grammar. # # I think all we can do is to treat both positions as fuzzy in # the genomic description, even if only one of them really is. # # Peter thinks the HGVS grammar should at some point be # updated to allow the brackets around individual locations. if start_fuzzy or stop_fuzzy: self.record.addToDescription("(%s_%s)%s%s" % (forwardStart, forwardStop, varType, arg1)) self.record.addToChromDescription("(%s_%s)%s%s" % (chromStart, chromStop, varType, chromArg1)) else: self.record.addToDescription("%s_%s%s%s" % (forwardStart, forwardStop, varType, arg1)) self.record.addToChromDescription("%s_%s%s%s" % (chromStart, chromStop, varType, chromArg1)) # if else: if start_fuzzy or stop_fuzzy: # Todo: Current HGVS does not allow for () around single # positions, only around ranges (see above and #38). self.record.addToDescription("(%s)%s%s" % (forwardStart, varType, arg1)) self.record.addToChromDescription("(%s)%s%s" % (chromStart, varType, chromArg1)) else: self.record.addToDescription("%s%s%s" % (forwardStart, varType, arg1)) self.record.addToChromDescription("%s%s%s" % (chromStart, varType, chromArg1)) # else # if else: if start_fuzzy or stop_fuzzy: # Todo: Current HGVS does not allow for () around single # positions, only around ranges (see above and #38). self.record.addToDescription("(%s)%c>%c" % (forwardStart, arg1, arg2)) self.record.addToChromDescription("(%s)%c>%c" % (chromStart, chromArg1, chromArg2)) else: self.record.addToDescription("%s%c>%c" % (forwardStart, arg1, arg2)) self.record.addToChromDescription("%s%c>%c" % (chromStart, chromArg1, chromArg2)) for i in self.record.geneList: for j in i.transcriptList: if j.CM: orientedStart = forwardStart orientedStop = forwardStop if i.orientation == -1: orientedStart = reverseStart orientedStop = reverseStop # if # Turn of translation to protein if we hit splice sites. # For the current transcript, this is handled with more # care in variantchecker.py. if not j.current and util.over_splice_site(orientedStart, orientedStop, j.CM.RNA): j.translate = False # And check whether the variant hits CDS start. if j.molType == "c" and forwardStop >= j.CM.x2g(1, 0) and forwardStart <= j.CM.x2g(3, 0): self.__output.addMessage( __file__, 2, "WSTART", "Mutation in start codon of gene %s transcript " "%s." % (i.name, j.name), ) if not j.current: j.translate = False # FIXME Check whether the variant hits a splice site. if varType != "subst": if orientedStart != orientedStop: if (start_fuzzy or stop_fuzzy) and not j.current: # Don't generate descriptions on transcripts # other than the current in the case of fuzzy # positions. j.cancelDescription() else: j.addToDescription( "%s_%s%s%s" % ( j.CM.g2c(orientedStart, start_fuzzy), j.CM.g2c(orientedStop, stop_fuzzy), varType, self.__maybeInvert(i, arg1, arg1_reverse), ) ) self.checkIntron(i, j, orientedStart) self.checkIntron(i, j, orientedStop) # if else: if start_fuzzy and not j.current: # Don't generate descriptions on transcripts # other than the current in the case of fuzzy # positions. j.cancelDescription() else: j.addToDescription( "%s%s%s" % ( j.CM.g2c(orientedStart, start_fuzzy), varType, self.__maybeInvert(i, arg1, arg1_reverse), ) ) self.checkIntron(i, j, orientedStart) # else # if else: if start_fuzzy and not j.current: # Don't generate descriptions on transcripts # other than the current in the case of fuzzy # positions. j.cancelDescription() else: j.addToDescription( "%s%c>%c" % ( j.CM.g2c(orientedStart, start_fuzzy), self.__maybeInvert(i, arg1, arg1_reverse), self.__maybeInvert(i, arg2), ) ) self.checkIntron(i, j, orientedStart)
def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None, start_fuzzy=False, stop_fuzzy=False): """ Generate variant descriptions for all genes, transcripts, etc. @arg start_g: start position @type start_g: integer @arg stop_g: stop position @type stop_g: integer @arg varType: variant type @type varType: unicode @arg arg1: argument 1 of a raw variant @type arg1: unicode @arg arg2: argument 2 of a raw variant @type arg2: unicode @arg roll: ??? @type roll: tuple (integer, integer) @kwarg arg1_reverse: argument 1 to be used on reverse strand @type arg1_reverse: unicode @kwarg start_fuzzy: Indicates if start position of variant is fuzzy. @type start_fuzzy: bool @kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy. @type stop_fuzzy: bool """ forwardStart = start_g forwardStop = stop_g reverseStart = stop_g reverseStop = start_g if self.record.orientation == 1: chromStart = self.record.toChromPos(start_g) chromStop = self.record.toChromPos(stop_g) chromArg1 = arg1 chromArg2 = arg2 else: chromStart = self.record.toChromPos(stop_g) chromStop = self.record.toChromPos(start_g) chromArg1 = util.reverse_complement(arg1) chromArg2 = util.reverse_complement(arg2) # Todo: Should we use arg1_reverse here? if roll: forwardStart += roll[1] forwardStop += roll[1] reverseStart -= roll[0] reverseStop -= roll[0] if chromStart is not None: if self.record.orientation == 1: chromStart += roll[1] chromStop += roll[1] else: chromStart += roll[0] chromStop += roll[0] #if if varType != "subst": if forwardStart != forwardStop: # Todo: Fuzzy offsets to genomic positions (see bug #38). # # The genomic positioning is problematic. We would like to # have it in brackets (as fuzzy positions), like the above # g.(34299_23232)del example. # # Now consider a variant c.a-?_b+18del where only the offset # before the exon is unknown but the offset after the exon is # exact. Now a genomic description like g.(34299)_23232del # comes to mind, however, this notation is not allowed by the # HGVS grammar. # # I think all we can do is to treat both positions as fuzzy in # the genomic description, even if only one of them really is. # # Peter thinks the HGVS grammar should at some point be # updated to allow the brackets around individual locations. if start_fuzzy or stop_fuzzy: self.record.addToDescription( "(%s_%s)%s%s" % (forwardStart, forwardStop, varType, arg1)) self.record.addToChromDescription( "(%s_%s)%s%s" % (chromStart, chromStop, varType, chromArg1)) else: self.record.addToDescription( "%s_%s%s%s" % (forwardStart, forwardStop, varType, arg1)) self.record.addToChromDescription( "%s_%s%s%s" % (chromStart, chromStop, varType, chromArg1)) #if else: if start_fuzzy or stop_fuzzy: # Todo: Current HGVS does not allow for () around single # positions, only around ranges (see above and #38). self.record.addToDescription("(%s)%s%s" % (forwardStart, varType, arg1)) self.record.addToChromDescription( "(%s)%s%s" % (chromStart, varType, chromArg1)) else: self.record.addToDescription("%s%s%s" % (forwardStart, varType, arg1)) self.record.addToChromDescription( "%s%s%s" % (chromStart, varType, chromArg1)) #else #if else: if start_fuzzy or stop_fuzzy: # Todo: Current HGVS does not allow for () around single # positions, only around ranges (see above and #38). self.record.addToDescription("(%s)%c>%c" % (forwardStart, arg1, arg2)) self.record.addToChromDescription( "(%s)%c>%c" % (chromStart, chromArg1, chromArg2)) else: self.record.addToDescription("%s%c>%c" % (forwardStart, arg1, arg2)) self.record.addToChromDescription( "%s%c>%c" % (chromStart, chromArg1, chromArg2)) for i in self.record.geneList: for j in i.transcriptList: if j.CM: orientedStart = forwardStart orientedStop = forwardStop if i.orientation == -1: orientedStart = reverseStart orientedStop = reverseStop #if # Turn of translation to protein if we hit splice sites. # For the current transcript, this is handled with more # care in variantchecker.py. if not j.current and \ util.over_splice_site(orientedStart, orientedStop, j.CM.RNA): j.translate = False # And check whether the variant hits CDS start. if j.molType == 'c' and forwardStop >= j.CM.x2g(1, 0) \ and forwardStart <= j.CM.x2g(3, 0) : self.__output.addMessage(__file__, 2, "WSTART", "Mutation in start codon of gene %s transcript " \ "%s." % (i.name, j.name)) if not j.current: j.translate = False # FIXME Check whether the variant hits a splice site. if varType != "subst": if orientedStart != orientedStop: if (start_fuzzy or stop_fuzzy) and not j.current: # Don't generate descriptions on transcripts # other than the current in the case of fuzzy # positions. j.cancelDescription() else: j.addToDescription( "%s_%s%s%s" % (j.CM.g2c(orientedStart, start_fuzzy), j.CM.g2c(orientedStop, stop_fuzzy), varType, self.__maybeInvert(i, arg1, arg1_reverse))) self.checkIntron(i, j, orientedStart) self.checkIntron(i, j, orientedStop) #if else: if start_fuzzy and not j.current: # Don't generate descriptions on transcripts # other than the current in the case of fuzzy # positions. j.cancelDescription() else: j.addToDescription( "%s%s%s" % (j.CM.g2c( orientedStart, start_fuzzy), varType, self.__maybeInvert( i, arg1, arg1_reverse))) self.checkIntron(i, j, orientedStart) #else #if else: if start_fuzzy and not j.current: # Don't generate descriptions on transcripts # other than the current in the case of fuzzy # positions. j.cancelDescription() else: j.addToDescription( "%s%c>%c" % (j.CM.g2c(orientedStart, start_fuzzy), self.__maybeInvert(i, arg1, arg1_reverse), self.__maybeInvert(i, arg2))) self.checkIntron(i, j, orientedStart)