Esempio n. 1
0
def _construct_change(var, reverse=False):
    """
    Construct mutation description.

    @arg var: RawVar object.
    @type var: pyparsing.ParseResults
    @var reverse: Variant is on the reverse strand.
    @type reverse: bool

    @return: Description of mutation (without reference and positions).
    @rtype: unicode
    """
    # Note that the pyparsing parse tree yields `str('')` for nonexisting
    # attributes, so we wrap the optional attributes in `unicode()`.
    if reverse:
        try:
            arg1 = unicode(int(var.Arg1))
        except ValueError:
            arg1 = util.reverse_complement(unicode(var.Arg1))
        try:
            arg2 = unicode(int(var.Arg2))
        except ValueError:
            arg2 = util.reverse_complement(unicode(var.Arg2))
    else:
        arg1 = unicode(var.Arg1)
        arg2 = unicode(var.Arg2)

    def parse_sequence(seq):
        if not seq.Sequence:
            raise NotImplementedError('Only explicit sequences are supported '
                                      'for insertions.')
        if reverse:
            return util.reverse_complement(seq.Sequence)
        return seq.Sequence

    if var.MutationType == 'subst':
        change = '%s>%s' % (arg1, arg2)
    elif var.MutationType in ('ins', 'delins'):
        if var.SeqList:
            if reverse:
                seqs = reversed(var.SeqList)
            else:
                seqs = var.SeqList
            insertion = '[' + ';'.join(parse_sequence(seq)
                                       for seq in seqs) + ']'
        else:
            insertion = parse_sequence(var.Seq)
        change = '%s%s' % (var.MutationType, insertion)
    else:
        change = '%s%s' % (var.MutationType, arg1 or arg2 or '')

    return change
Esempio n. 2
0
def _construct_change(var, reverse=False):
    """
    Construct mutation description.

    @arg var: RawVar object.
    @type var: pyparsing.ParseResults
    @var reverse: Variant is on the reverse strand.
    @type reverse: bool

    @return: Description of mutation (without reference and positions).
    @rtype: unicode
    """
    # Note that the pyparsing parse tree yields `str('')` for nonexisting
    # attributes, so we wrap the optional attributes in `unicode()`.
    if reverse:
        try:
            arg1 = unicode(int(var.Arg1))
        except ValueError:
            arg1 = util.reverse_complement(unicode(var.Arg1))
        try:
            arg2 = unicode(int(var.Arg2))
        except ValueError:
            arg2 = util.reverse_complement(unicode(var.Arg2))
    else:
        arg1 = unicode(var.Arg1)
        arg2 = unicode(var.Arg2)

    def parse_sequence(seq):
        if not seq.Sequence:
            raise NotImplementedError('Only explicit sequences are supported '
                                      'for insertions.')
        if reverse:
            return util.reverse_complement(seq.Sequence)
        return seq.Sequence

    if var.MutationType == 'subst':
        change = '%s>%s' % (arg1, arg2)
    elif var.MutationType in ('ins', 'delins'):
        if var.SeqList:
            if reverse:
                seqs = reversed(var.SeqList)
            else:
                seqs = var.SeqList
            insertion = '[' + ';'.join(parse_sequence(seq)
                                       for seq in seqs) + ']'
        else:
            insertion = parse_sequence(var.Seq)
        change = '%s%s' % (var.MutationType, insertion)
    else:
        change = '%s%s' % (var.MutationType, arg1 or arg2 or '')

    return change
Esempio n. 3
0
 def parse_sequence(seq):
     if not seq.Sequence:
         raise NotImplementedError('Only explicit sequences are supported '
                                   'for insertions.')
     if reverse:
         return util.reverse_complement(seq.Sequence)
     return seq.Sequence
Esempio n. 4
0
 def parse_sequence(seq):
     if not seq.Sequence:
         raise NotImplementedError('Only explicit sequences are supported '
                                   'for insertions.')
     if reverse:
         return util.reverse_complement(seq.Sequence)
     return seq.Sequence
Esempio n. 5
0
    def __init__(self, s1, s2, lcp, s1_end, s2_end, DNA=False):
        """
        Initialise the class.

        @arg s1: A string.
        @type s1: unicode
        @arg s2: A string.
        @type s2: unicode
        @arg lcp: The length of the longest common prefix of {s1} and {s2}.
        @type lcp: int
        @arg s1_end: End of the substring in {s1}.
        @type s1_end:
        @arg s2_end: End of the substring in {s2}.
        @type s2_end: int
        @arg DNA:
        @type DNA: bool
        """
        self.__lcp = lcp
        self.__s1 = s1[self.__lcp:s1_end]
        self.__s2 = s2[self.__lcp:s2_end]
        self.__s2_len = s2_end - lcp
        self.__matrix = self.LCSMatrix(self.__s1, self.__s2)

        self.__s2_rc = None
        self.__matrix_rc = None
        if DNA:
            self.__s2_rc = reverse_complement(s2[self.__lcp:s2_end])
            self.__matrix_rc = self.LCSMatrix(self.__s1, self.__s2_rc)
Esempio n. 6
0
    def inversion(self, pos1, pos2):
        """
        Invert a range from non-interbase position pos1 to pos2.

        @arg pos1: First nucleotide of the inverted sequence.
        @type pos1: int
        @arg pos2: Last nucleotide of the inverted sequence.
        @type pos2: int
        """
        sequence = util.reverse_complement(unicode(self.orig[pos1 - 1:pos2]))

        visualisation = ['inversion between %i and %i' % (pos1, pos2)]
        visualisation.extend(self._visualise(pos1 - 1, pos2, sequence))
        self._output.addOutput('visualisation', visualisation)

        self._mutate(pos1 - 1, pos2, sequence)
Esempio n. 7
0
    def inversion(self, pos1, pos2):
        """
        Invert a range from non-interbase position pos1 to pos2.

        @arg pos1: First nucleotide of the inverted sequence.
        @type pos1: int
        @arg pos2: Last nucleotide of the inverted sequence.
        @type pos2: int
        """
        sequence = util.reverse_complement(unicode(self.orig[pos1 - 1:pos2]))

        visualisation = ['inversion between %i and %i' % (pos1, pos2)]
        visualisation.extend(self._visualise(pos1 - 1, pos2, sequence))
        self._output.addOutput('visualisation', visualisation)

        self._mutate(pos1 - 1, pos2, sequence)
Esempio n. 8
0
    def __maybeInvert(self, gene, string, string_reverse=None):
        """
        Return the reverse-complement of a DNA sequence if the gene is in
        the reverse orientation.

        @arg gene: Gene
        @type gene: object
        @arg string: DNA sequence
        @type string: unicode
        @kwarg string_reverse: DNA sequence to use (if not None) for the
            reverse complement.

        @return: reverse-complement (if applicable), otherwise return the
            original.
        @rtype: unicode
        """
        if gene.orientation == -1:
            if string_reverse:
                string = string_reverse
            return util.reverse_complement(string)
        return string
Esempio n. 9
0
    def __maybeInvert(self, gene, string, string_reverse=None):
        """
        Return the reverse-complement of a DNA sequence if the gene is in
        the reverse orientation.

        @arg gene: Gene
        @type gene: object
        @arg string: DNA sequence
        @type string: unicode
        @kwarg string_reverse: DNA sequence to use (if not None) for the
            reverse complement.

        @return: reverse-complement (if applicable), otherwise return the
            original.
        @rtype: unicode
        """
        if gene.orientation == -1:
            if string_reverse:
                string = string_reverse
            return util.reverse_complement(string)
        return string
Esempio n. 10
0
    def name(self, start_g, stop_g, varType, arg1, arg2, roll, arg1_reverse=None, start_fuzzy=False, stop_fuzzy=False):
        """
        Generate variant descriptions for all genes, transcripts, etc.

        @arg start_g: start position
        @type start_g: integer
        @arg stop_g: stop position
        @type stop_g: integer
        @arg varType: variant type
        @type varType: unicode
        @arg arg1: argument 1 of a raw variant
        @type arg1: unicode
        @arg arg2: argument 2 of a raw variant
        @type arg2: unicode
        @arg roll: ???
        @type roll: tuple (integer, integer)
        @kwarg arg1_reverse: argument 1 to be used on reverse strand
        @type arg1_reverse: unicode
        @kwarg start_fuzzy: Indicates if start position of variant is fuzzy.
        @type start_fuzzy: bool
        @kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy.
        @type stop_fuzzy: bool
        """
        forwardStart = start_g
        forwardStop = stop_g
        reverseStart = stop_g
        reverseStop = start_g

        if self.record.orientation == 1:
            chromStart = self.record.toChromPos(start_g)
            chromStop = self.record.toChromPos(stop_g)
            chromArg1 = arg1
            chromArg2 = arg2
        else:
            chromStart = self.record.toChromPos(stop_g)
            chromStop = self.record.toChromPos(start_g)
            chromArg1 = util.reverse_complement(arg1)
            chromArg2 = util.reverse_complement(arg2)
            # Todo: Should we use arg1_reverse here?

        if roll:
            forwardStart += roll[1]
            forwardStop += roll[1]
            reverseStart -= roll[0]
            reverseStop -= roll[0]
            if chromStart is not None:
                if self.record.orientation == 1:
                    chromStart += roll[1]
                    chromStop += roll[1]
                else:
                    chromStart += roll[0]
                    chromStop += roll[0]
        # if

        if varType != "subst":
            if forwardStart != forwardStop:
                # Todo: Fuzzy offsets to genomic positions (see bug #38).
                #
                # The genomic positioning is problematic. We would like to
                # have it in brackets (as fuzzy positions), like the above
                # g.(34299_23232)del example.
                #
                # Now consider a variant c.a-?_b+18del where only the offset
                # before the exon is unknown but the offset after the exon is
                # exact. Now a genomic description like g.(34299)_23232del
                # comes to mind, however, this notation is not allowed by the
                # HGVS grammar.
                #
                # I think all we can do is to treat both positions as fuzzy in
                # the genomic description, even if only one of them really is.
                #
                # Peter thinks the HGVS grammar should at some point be
                # updated to allow the brackets around individual locations.
                if start_fuzzy or stop_fuzzy:
                    self.record.addToDescription("(%s_%s)%s%s" % (forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription("(%s_%s)%s%s" % (chromStart, chromStop, varType, chromArg1))
                else:
                    self.record.addToDescription("%s_%s%s%s" % (forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription("%s_%s%s%s" % (chromStart, chromStop, varType, chromArg1))
            # if
            else:
                if start_fuzzy or stop_fuzzy:
                    # Todo: Current HGVS does not allow for () around single
                    # positions, only around ranges (see above and #38).
                    self.record.addToDescription("(%s)%s%s" % (forwardStart, varType, arg1))
                    self.record.addToChromDescription("(%s)%s%s" % (chromStart, varType, chromArg1))
                else:
                    self.record.addToDescription("%s%s%s" % (forwardStart, varType, arg1))
                    self.record.addToChromDescription("%s%s%s" % (chromStart, varType, chromArg1))
            # else
        # if
        else:
            if start_fuzzy or stop_fuzzy:
                # Todo: Current HGVS does not allow for () around single
                # positions, only around ranges (see above and #38).
                self.record.addToDescription("(%s)%c>%c" % (forwardStart, arg1, arg2))
                self.record.addToChromDescription("(%s)%c>%c" % (chromStart, chromArg1, chromArg2))
            else:
                self.record.addToDescription("%s%c>%c" % (forwardStart, arg1, arg2))
                self.record.addToChromDescription("%s%c>%c" % (chromStart, chromArg1, chromArg2))

        for i in self.record.geneList:
            for j in i.transcriptList:
                if j.CM:
                    orientedStart = forwardStart
                    orientedStop = forwardStop
                    if i.orientation == -1:
                        orientedStart = reverseStart
                        orientedStop = reverseStop
                    # if

                    # Turn of translation to protein if we hit splice sites.
                    # For the current transcript, this is handled with more
                    # care in variantchecker.py.
                    if not j.current and util.over_splice_site(orientedStart, orientedStop, j.CM.RNA):
                        j.translate = False

                    # And check whether the variant hits CDS start.
                    if j.molType == "c" and forwardStop >= j.CM.x2g(1, 0) and forwardStart <= j.CM.x2g(3, 0):
                        self.__output.addMessage(
                            __file__,
                            2,
                            "WSTART",
                            "Mutation in start codon of gene %s transcript " "%s." % (i.name, j.name),
                        )
                        if not j.current:
                            j.translate = False

                    # FIXME Check whether the variant hits a splice site.

                    if varType != "subst":
                        if orientedStart != orientedStop:
                            if (start_fuzzy or stop_fuzzy) and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription(
                                    "%s_%s%s%s"
                                    % (
                                        j.CM.g2c(orientedStart, start_fuzzy),
                                        j.CM.g2c(orientedStop, stop_fuzzy),
                                        varType,
                                        self.__maybeInvert(i, arg1, arg1_reverse),
                                    )
                                )
                                self.checkIntron(i, j, orientedStart)
                                self.checkIntron(i, j, orientedStop)
                        # if
                        else:
                            if start_fuzzy and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription(
                                    "%s%s%s"
                                    % (
                                        j.CM.g2c(orientedStart, start_fuzzy),
                                        varType,
                                        self.__maybeInvert(i, arg1, arg1_reverse),
                                    )
                                )
                                self.checkIntron(i, j, orientedStart)
                        # else
                    # if
                    else:
                        if start_fuzzy and not j.current:
                            # Don't generate descriptions on transcripts
                            # other than the current in the case of fuzzy
                            # positions.
                            j.cancelDescription()
                        else:
                            j.addToDescription(
                                "%s%c>%c"
                                % (
                                    j.CM.g2c(orientedStart, start_fuzzy),
                                    self.__maybeInvert(i, arg1, arg1_reverse),
                                    self.__maybeInvert(i, arg2),
                                )
                            )
                            self.checkIntron(i, j, orientedStart)
Esempio n. 11
0
    def name(self,
             start_g,
             stop_g,
             varType,
             arg1,
             arg2,
             roll,
             arg1_reverse=None,
             start_fuzzy=False,
             stop_fuzzy=False):
        """
        Generate variant descriptions for all genes, transcripts, etc.

        @arg start_g: start position
        @type start_g: integer
        @arg stop_g: stop position
        @type stop_g: integer
        @arg varType: variant type
        @type varType: unicode
        @arg arg1: argument 1 of a raw variant
        @type arg1: unicode
        @arg arg2: argument 2 of a raw variant
        @type arg2: unicode
        @arg roll: ???
        @type roll: tuple (integer, integer)
        @kwarg arg1_reverse: argument 1 to be used on reverse strand
        @type arg1_reverse: unicode
        @kwarg start_fuzzy: Indicates if start position of variant is fuzzy.
        @type start_fuzzy: bool
        @kwarg stop_fuzzy: Indicates if stop position of variant is fuzzy.
        @type stop_fuzzy: bool
        """
        forwardStart = start_g
        forwardStop = stop_g
        reverseStart = stop_g
        reverseStop = start_g

        if self.record.orientation == 1:
            chromStart = self.record.toChromPos(start_g)
            chromStop = self.record.toChromPos(stop_g)
            chromArg1 = arg1
            chromArg2 = arg2
        else:
            chromStart = self.record.toChromPos(stop_g)
            chromStop = self.record.toChromPos(start_g)
            chromArg1 = util.reverse_complement(arg1)
            chromArg2 = util.reverse_complement(arg2)
            # Todo: Should we use arg1_reverse here?

        if roll:
            forwardStart += roll[1]
            forwardStop += roll[1]
            reverseStart -= roll[0]
            reverseStop -= roll[0]
            if chromStart is not None:
                if self.record.orientation == 1:
                    chromStart += roll[1]
                    chromStop += roll[1]
                else:
                    chromStart += roll[0]
                    chromStop += roll[0]
        #if

        if varType != "subst":
            if forwardStart != forwardStop:
                # Todo: Fuzzy offsets to genomic positions (see bug #38).
                #
                # The genomic positioning is problematic. We would like to
                # have it in brackets (as fuzzy positions), like the above
                # g.(34299_23232)del example.
                #
                # Now consider a variant c.a-?_b+18del where only the offset
                # before the exon is unknown but the offset after the exon is
                # exact. Now a genomic description like g.(34299)_23232del
                # comes to mind, however, this notation is not allowed by the
                # HGVS grammar.
                #
                # I think all we can do is to treat both positions as fuzzy in
                # the genomic description, even if only one of them really is.
                #
                # Peter thinks the HGVS grammar should at some point be
                # updated to allow the brackets around individual locations.
                if start_fuzzy or stop_fuzzy:
                    self.record.addToDescription(
                        "(%s_%s)%s%s" %
                        (forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription(
                        "(%s_%s)%s%s" %
                        (chromStart, chromStop, varType, chromArg1))
                else:
                    self.record.addToDescription(
                        "%s_%s%s%s" %
                        (forwardStart, forwardStop, varType, arg1))
                    self.record.addToChromDescription(
                        "%s_%s%s%s" %
                        (chromStart, chromStop, varType, chromArg1))
            #if
            else:
                if start_fuzzy or stop_fuzzy:
                    # Todo: Current HGVS does not allow for () around single
                    # positions, only around ranges (see above and #38).
                    self.record.addToDescription("(%s)%s%s" %
                                                 (forwardStart, varType, arg1))
                    self.record.addToChromDescription(
                        "(%s)%s%s" % (chromStart, varType, chromArg1))
                else:
                    self.record.addToDescription("%s%s%s" %
                                                 (forwardStart, varType, arg1))
                    self.record.addToChromDescription(
                        "%s%s%s" % (chromStart, varType, chromArg1))
            #else
        #if
        else:
            if start_fuzzy or stop_fuzzy:
                # Todo: Current HGVS does not allow for () around single
                # positions, only around ranges (see above and #38).
                self.record.addToDescription("(%s)%c>%c" %
                                             (forwardStart, arg1, arg2))
                self.record.addToChromDescription(
                    "(%s)%c>%c" % (chromStart, chromArg1, chromArg2))
            else:
                self.record.addToDescription("%s%c>%c" %
                                             (forwardStart, arg1, arg2))
                self.record.addToChromDescription(
                    "%s%c>%c" % (chromStart, chromArg1, chromArg2))

        for i in self.record.geneList:
            for j in i.transcriptList:
                if j.CM:
                    orientedStart = forwardStart
                    orientedStop = forwardStop
                    if i.orientation == -1:
                        orientedStart = reverseStart
                        orientedStop = reverseStop
                    #if

                    # Turn of translation to protein if we hit splice sites.
                    # For the current transcript, this is handled with more
                    # care in variantchecker.py.
                    if not j.current and \
                           util.over_splice_site(orientedStart, orientedStop,
                                                 j.CM.RNA):
                        j.translate = False

                    # And check whether the variant hits CDS start.
                    if j.molType == 'c' and forwardStop >= j.CM.x2g(1, 0) \
                       and forwardStart <= j.CM.x2g(3, 0) :
                        self.__output.addMessage(__file__, 2, "WSTART",
                            "Mutation in start codon of gene %s transcript " \
                            "%s." % (i.name, j.name))
                        if not j.current:
                            j.translate = False

                    # FIXME Check whether the variant hits a splice site.

                    if varType != "subst":
                        if orientedStart != orientedStop:
                            if (start_fuzzy or stop_fuzzy) and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription(
                                    "%s_%s%s%s" %
                                    (j.CM.g2c(orientedStart, start_fuzzy),
                                     j.CM.g2c(orientedStop,
                                              stop_fuzzy), varType,
                                     self.__maybeInvert(i, arg1,
                                                        arg1_reverse)))
                                self.checkIntron(i, j, orientedStart)
                                self.checkIntron(i, j, orientedStop)
                        #if
                        else:
                            if start_fuzzy and not j.current:
                                # Don't generate descriptions on transcripts
                                # other than the current in the case of fuzzy
                                # positions.
                                j.cancelDescription()
                            else:
                                j.addToDescription(
                                    "%s%s%s" % (j.CM.g2c(
                                        orientedStart, start_fuzzy), varType,
                                                self.__maybeInvert(
                                                    i, arg1, arg1_reverse)))
                                self.checkIntron(i, j, orientedStart)
                        #else
                    #if
                    else:
                        if start_fuzzy and not j.current:
                            # Don't generate descriptions on transcripts
                            # other than the current in the case of fuzzy
                            # positions.
                            j.cancelDescription()
                        else:
                            j.addToDescription(
                                "%s%c>%c" %
                                (j.CM.g2c(orientedStart, start_fuzzy),
                                 self.__maybeInvert(i, arg1, arg1_reverse),
                                 self.__maybeInvert(i, arg2)))
                            self.checkIntron(i, j, orientedStart)