Example #1
0
    def scanPWM(self, seq):
        """Matrix of log-odds scores for a nucleotide sequence.

        scans a nucleotide sequence and returns the matrix of log-odds
        scores for all positions.

        - the result is a one-dimensional list or numpy array
        - the sequence can only be a DNA sequence
        - the search is performed only on one strand
        """
        # TODO - Code itself tolerates ambiguous bases (as NaN).
        if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA):
            raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs"
                                 % self.alphabet)
        if not isinstance(seq.alphabet, IUPAC.IUPACUnambiguousDNA):
            raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences"
                                 % sequence.alphabet)

        seq = str(seq)

        # check if the fast C code can be used
        try:
            import _pwm
        except ImportError:
            # use the slower Python code otherwise
            return self._pwm_calculate(seq)

        # get the log-odds matrix into a proper shape
        # (each row contains sorted (ACGT) log-odds values)
        logodds=[[y[1] for y in sorted(x.items())] for x in self.log_odds()]
        return _pwm.calculate(seq, logodds)
Example #2
0
    def scanPWM(self, seq):
        """Matrix of log-odds scores for a nucleotide sequence.

        scans a nucleotide sequence and returns the matrix of log-odds
        scores for all positions.

        - the result is a one-dimensional list or numpy array
        - the sequence can only be a DNA sequence
        - the search is performed only on one strand
        """
        # TODO - Code itself tolerates ambiguous bases (as NaN).
        if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA):
            raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs"
                                 % self.alphabet)
        if not isinstance(seq.alphabet, IUPAC.IUPACUnambiguousDNA):
            raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences"
                                 % sequence.alphabet)

        seq = str(seq)

        # check if the fast C code can be used
        try:
            import _pwm
        except ImportError:
            # use the slower Python code otherwise
            return self._pwm_calculate(seq)

        # get the log-odds matrix into a proper shape
        # (each row contains sorted (ACGT) log-odds values)
        logodds = [[y[1] for y in sorted(x.items())] for x in self.log_odds()]
        return _pwm.calculate(seq, logodds)
Example #3
0
    def scanPWM(self,seq):
        """Matrix of log-odds scores for a nucleotide sequence.
 
        scans a nucleotide sequence and returns the matrix of log-odds
        scores for all positions.

        - the result is a one-dimensional list or numpy array
        - the sequence can only be a DNA sequence
        - the search is performed only on one strand
        """
        if self.alphabet!=IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA motifs")
        if seq.alphabet!=IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA sequences")

        seq = seq.tostring()

        # check if the fast C code can be used
        try:
            import _pwm
        except ImportError:
            # use the slower Python code otherwise
            return self._pwm_calculate(seq)
        
        # get the log-odds matrix into a proper shape
        # (each row contains sorted (ACGT) log-odds values)
        logodds=[[y[1] for y in sorted(x.items())] for x in self.log_odds()]
        return _pwm.calculate(seq, logodds)
Example #4
0
    def scanPWM(self, seq):
        """Matrix of log-odds scores for a nucleotide sequence.
 
        scans a nucleotide sequence and returns the matrix of log-odds
        scores for all positions.

        - the result is a one-dimensional list or numpy array
        - the sequence can only be a DNA sequence
        - the search is performed only on one strand
        """
        if self.alphabet != IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA motifs")
        if seq.alphabet != IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA sequences")

        seq = str(seq)

        # check if the fast C code can be used
        try:
            import _pwm
        except ImportError:
            # use the slower Python code otherwise
            return self._pwm_calculate(seq)

        # get the log-odds matrix into a proper shape
        # (each row contains sorted (ACGT) log-odds values)
        logodds = [[y[1] for y in sorted(x.items())] for x in self.log_odds()]
        return _pwm.calculate(seq, logodds)
Example #5
0
    def calculate(self, sequence):
        """
        returns the PWM score for a given sequence for all positions.

        - the sequence can only be a DNA sequence
        - the search is performed only on one strand
        - if the sequence and the motif have the same length, a single
          number is returned
        - otherwise, the result is a one-dimensional list or numpy array
        """
        #TODO - Code itself tolerates ambiguous bases (as NaN).
        if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA):
            raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs" \
                                 % self.alphabet)
        if not isinstance(sequence.alphabet, IUPAC.IUPACUnambiguousDNA):
            raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences" \
                                 % sequence.alphabet)

        #TODO - Force uppercase here and optimise switch statement in C
        #by assuming upper case?
        sequence = str(sequence)
        m = self.length
        n = len(sequence)

        scores = []
        # check if the fast C code can be used
        try:
            import _pwm
        except ImportError:
            # use the slower Python code otherwise
            #The C code handles mixed case so Python version must too:
            sequence = sequence.upper()
            for i in xrange(n - m + 1):
                score = 0.0
                ok = True
                for position in xrange(m):
                    letter = sequence[i + position]
                    try:
                        score += self[letter][position]
                    except KeyError:
                        ok = False
                        break
                if ok:
                    scores.append(score)
                else:
                    scores.append(_nan)
        else:
            # get the log-odds matrix into a proper shape
            # (each row contains sorted (ACGT) log-odds values)
            logodds = [[self[letter][i] for letter in "ACGT"]
                       for i in range(m)]
            scores = _pwm.calculate(sequence, logodds)
        if len(scores) == 1:
            return scores[0]
        else:
            return scores
Example #6
0
    def calculate(self, sequence):
        """
        returns the PWM score for a given sequence for all positions.

        - the sequence can only be a DNA sequence
        - the search is performed only on one strand
        - if the sequence and the motif have the same length, a single
          number is returned
        - otherwise, the result is a one-dimensional list or numpy array
        """
        #TODO - Code itself tolerates ambiguous bases (as NaN).
        if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA):
            raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs" \
                                 % self.alphabet)
        if not isinstance(sequence.alphabet, IUPAC.IUPACUnambiguousDNA):
            raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences" \
                                 % sequence.alphabet)

        #TODO - Force uppercase here and optimise switch statement in C
        #by assuming upper case?
        sequence = str(sequence)
        m = self.length
        n = len(sequence)

        scores = []
        # check if the fast C code can be used
        try:
            import _pwm
        except ImportError:
            # use the slower Python code otherwise
            #The C code handles mixed case so Python version must too:
            sequence = sequence.upper()
            for i in xrange(n-m+1):
                score = 0.0
                ok = True
                for position in xrange(m):
                    letter = sequence[i+position]
                    try:
                        score += self[letter][position]
                    except KeyError:
                        ok = False
                        break
                if ok:
                    scores.append(score)
                else:
                    scores.append(_nan)
        else:
            # get the log-odds matrix into a proper shape
            # (each row contains sorted (ACGT) log-odds values)
            logodds = [[self[letter][i] for letter in "ACGT"] for i in range(m)]
            scores = _pwm.calculate(sequence, logodds)
        if len(scores)==1:
            return scores[0]
        else:
            return scores
Example #7
0
    def calculate(self, sequence):
        """
        returns the PWM score for a given sequence for all positions.

        - the sequence can only be a DNA sequence
        - the search is performed only on one strand
        - if the sequence and the motif have the same length, a single
          number is returned
        - otherwise, the result is a one-dimensional list or numpy array
        """
        if self.alphabet != IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA motifs")
        if sequence.alphabet != IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA sequences")

        sequence = str(sequence)
        m = self.length
        n = len(sequence)

        scores = []
        # check if the fast C code can be used
        try:
            import _pwm
        except ImportError:
            # use the slower Python code otherwise
            for i in xrange(n - m + 1):
                score = 0.0
                for position in xrange(m):
                    letter = sequence[i + position]
                    score += self[letter][position]
                scores.append(score)
        else:
            # get the log-odds matrix into a proper shape
            # (each row contains sorted (ACGT) log-odds values)
            logodds = [[self[letter][i] for letter in "ACGT"]
                       for i in range(m)]
            scores = _pwm.calculate(sequence, logodds)
        if len(scores) == 1:
            return scores[0]
        else:
            return scores
Example #8
0
    def scanPWM(self,seq):
        """
        scans (using a fast C extension) a nucleotide sequence and returns the matrix of log-odds scores for all positions

        - the result is a one-dimensional numpy array
        - the sequence can only be a DNA sequence
        - the search is performed only on one strand
        """
        if self.alphabet!=IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA motifs")
        if seq.alphabet!=IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA sequences")

        
        import numpy
        # get the log-odds matrix into a proper shape (each column contains sorted (ACGT) log-odds values)
        logodds=numpy.array([map(lambda x: x[1],sorted(x.items())) for x in self.log_odds()]).transpose()
        
        import _pwm
        
        return _pwm.calculate(seq.tostring(),logodds)
Example #9
0
    def calculate(self, sequence):
        """
        returns the PWM score for a given sequence for all positions.

        - the sequence can only be a DNA sequence
        - the search is performed only on one strand
        - if the sequence and the motif have the same length, a single
          number is returned
        - otherwise, the result is a one-dimensional list or numpy array
        """
        if self.alphabet!=IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA motifs")
        if sequence.alphabet!=IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA sequences")

        sequence = str(sequence)
        m = self.length
        n = len(sequence)

        scores = []
        # check if the fast C code can be used
        try:
            import _pwm
        except ImportError:
            # use the slower Python code otherwise
            for i in xrange(n-m+1):
                score = 0.0
                for position in xrange(m):
                    letter = sequence[i+position]
                    score += self[letter][position]
                scores.append(score)
        else:
            # get the log-odds matrix into a proper shape
            # (each row contains sorted (ACGT) log-odds values)
            logodds = [[self[letter][i] for letter in "ACGT"] for i in range(m)]
            scores = _pwm.calculate(sequence, logodds)
        if len(scores)==1:
            return scores[0]
        else:
            return scores
Example #10
0
    def scanPWM(self, seq):
        """
        scans (using a fast C extension) a nucleotide sequence and returns the matrix of log-odds scores for all positions

        - the result is a one-dimensional numpy array
        - the sequence can only be a DNA sequence
        - the search is performed only on one strand
        """
        if self.alphabet != IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA motifs")
        if seq.alphabet != IUPAC.unambiguous_dna:
            raise ValueError("Wrong alphabet! Use only with DNA sequences")

        import numpy
        # get the log-odds matrix into a proper shape (each column contains sorted (ACGT) log-odds values)
        logodds = numpy.array([
            map(lambda x: x[1], sorted(x.items())) for x in self.log_odds()
        ]).transpose()

        import _pwm

        return _pwm.calculate(seq.tostring(), logodds)