def scanPWM(self, seq): """Matrix of log-odds scores for a nucleotide sequence. scans a nucleotide sequence and returns the matrix of log-odds scores for all positions. - the result is a one-dimensional list or numpy array - the sequence can only be a DNA sequence - the search is performed only on one strand """ # TODO - Code itself tolerates ambiguous bases (as NaN). if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA): raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs" % self.alphabet) if not isinstance(seq.alphabet, IUPAC.IUPACUnambiguousDNA): raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences" % sequence.alphabet) seq = str(seq) # check if the fast C code can be used try: import _pwm except ImportError: # use the slower Python code otherwise return self._pwm_calculate(seq) # get the log-odds matrix into a proper shape # (each row contains sorted (ACGT) log-odds values) logodds=[[y[1] for y in sorted(x.items())] for x in self.log_odds()] return _pwm.calculate(seq, logodds)
def scanPWM(self, seq): """Matrix of log-odds scores for a nucleotide sequence. scans a nucleotide sequence and returns the matrix of log-odds scores for all positions. - the result is a one-dimensional list or numpy array - the sequence can only be a DNA sequence - the search is performed only on one strand """ # TODO - Code itself tolerates ambiguous bases (as NaN). if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA): raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs" % self.alphabet) if not isinstance(seq.alphabet, IUPAC.IUPACUnambiguousDNA): raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences" % sequence.alphabet) seq = str(seq) # check if the fast C code can be used try: import _pwm except ImportError: # use the slower Python code otherwise return self._pwm_calculate(seq) # get the log-odds matrix into a proper shape # (each row contains sorted (ACGT) log-odds values) logodds = [[y[1] for y in sorted(x.items())] for x in self.log_odds()] return _pwm.calculate(seq, logodds)
def scanPWM(self,seq): """Matrix of log-odds scores for a nucleotide sequence. scans a nucleotide sequence and returns the matrix of log-odds scores for all positions. - the result is a one-dimensional list or numpy array - the sequence can only be a DNA sequence - the search is performed only on one strand """ if self.alphabet!=IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA motifs") if seq.alphabet!=IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA sequences") seq = seq.tostring() # check if the fast C code can be used try: import _pwm except ImportError: # use the slower Python code otherwise return self._pwm_calculate(seq) # get the log-odds matrix into a proper shape # (each row contains sorted (ACGT) log-odds values) logodds=[[y[1] for y in sorted(x.items())] for x in self.log_odds()] return _pwm.calculate(seq, logodds)
def scanPWM(self, seq): """Matrix of log-odds scores for a nucleotide sequence. scans a nucleotide sequence and returns the matrix of log-odds scores for all positions. - the result is a one-dimensional list or numpy array - the sequence can only be a DNA sequence - the search is performed only on one strand """ if self.alphabet != IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA motifs") if seq.alphabet != IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA sequences") seq = str(seq) # check if the fast C code can be used try: import _pwm except ImportError: # use the slower Python code otherwise return self._pwm_calculate(seq) # get the log-odds matrix into a proper shape # (each row contains sorted (ACGT) log-odds values) logodds = [[y[1] for y in sorted(x.items())] for x in self.log_odds()] return _pwm.calculate(seq, logodds)
def calculate(self, sequence): """ returns the PWM score for a given sequence for all positions. - the sequence can only be a DNA sequence - the search is performed only on one strand - if the sequence and the motif have the same length, a single number is returned - otherwise, the result is a one-dimensional list or numpy array """ #TODO - Code itself tolerates ambiguous bases (as NaN). if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA): raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs" \ % self.alphabet) if not isinstance(sequence.alphabet, IUPAC.IUPACUnambiguousDNA): raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences" \ % sequence.alphabet) #TODO - Force uppercase here and optimise switch statement in C #by assuming upper case? sequence = str(sequence) m = self.length n = len(sequence) scores = [] # check if the fast C code can be used try: import _pwm except ImportError: # use the slower Python code otherwise #The C code handles mixed case so Python version must too: sequence = sequence.upper() for i in xrange(n - m + 1): score = 0.0 ok = True for position in xrange(m): letter = sequence[i + position] try: score += self[letter][position] except KeyError: ok = False break if ok: scores.append(score) else: scores.append(_nan) else: # get the log-odds matrix into a proper shape # (each row contains sorted (ACGT) log-odds values) logodds = [[self[letter][i] for letter in "ACGT"] for i in range(m)] scores = _pwm.calculate(sequence, logodds) if len(scores) == 1: return scores[0] else: return scores
def calculate(self, sequence): """ returns the PWM score for a given sequence for all positions. - the sequence can only be a DNA sequence - the search is performed only on one strand - if the sequence and the motif have the same length, a single number is returned - otherwise, the result is a one-dimensional list or numpy array """ #TODO - Code itself tolerates ambiguous bases (as NaN). if not isinstance(self.alphabet, IUPAC.IUPACUnambiguousDNA): raise ValueError("PSSM has wrong alphabet: %s - Use only with DNA motifs" \ % self.alphabet) if not isinstance(sequence.alphabet, IUPAC.IUPACUnambiguousDNA): raise ValueError("Sequence has wrong alphabet: %r - Use only with DNA sequences" \ % sequence.alphabet) #TODO - Force uppercase here and optimise switch statement in C #by assuming upper case? sequence = str(sequence) m = self.length n = len(sequence) scores = [] # check if the fast C code can be used try: import _pwm except ImportError: # use the slower Python code otherwise #The C code handles mixed case so Python version must too: sequence = sequence.upper() for i in xrange(n-m+1): score = 0.0 ok = True for position in xrange(m): letter = sequence[i+position] try: score += self[letter][position] except KeyError: ok = False break if ok: scores.append(score) else: scores.append(_nan) else: # get the log-odds matrix into a proper shape # (each row contains sorted (ACGT) log-odds values) logodds = [[self[letter][i] for letter in "ACGT"] for i in range(m)] scores = _pwm.calculate(sequence, logodds) if len(scores)==1: return scores[0] else: return scores
def calculate(self, sequence): """ returns the PWM score for a given sequence for all positions. - the sequence can only be a DNA sequence - the search is performed only on one strand - if the sequence and the motif have the same length, a single number is returned - otherwise, the result is a one-dimensional list or numpy array """ if self.alphabet != IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA motifs") if sequence.alphabet != IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA sequences") sequence = str(sequence) m = self.length n = len(sequence) scores = [] # check if the fast C code can be used try: import _pwm except ImportError: # use the slower Python code otherwise for i in xrange(n - m + 1): score = 0.0 for position in xrange(m): letter = sequence[i + position] score += self[letter][position] scores.append(score) else: # get the log-odds matrix into a proper shape # (each row contains sorted (ACGT) log-odds values) logodds = [[self[letter][i] for letter in "ACGT"] for i in range(m)] scores = _pwm.calculate(sequence, logodds) if len(scores) == 1: return scores[0] else: return scores
def scanPWM(self,seq): """ scans (using a fast C extension) a nucleotide sequence and returns the matrix of log-odds scores for all positions - the result is a one-dimensional numpy array - the sequence can only be a DNA sequence - the search is performed only on one strand """ if self.alphabet!=IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA motifs") if seq.alphabet!=IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA sequences") import numpy # get the log-odds matrix into a proper shape (each column contains sorted (ACGT) log-odds values) logodds=numpy.array([map(lambda x: x[1],sorted(x.items())) for x in self.log_odds()]).transpose() import _pwm return _pwm.calculate(seq.tostring(),logodds)
def calculate(self, sequence): """ returns the PWM score for a given sequence for all positions. - the sequence can only be a DNA sequence - the search is performed only on one strand - if the sequence and the motif have the same length, a single number is returned - otherwise, the result is a one-dimensional list or numpy array """ if self.alphabet!=IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA motifs") if sequence.alphabet!=IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA sequences") sequence = str(sequence) m = self.length n = len(sequence) scores = [] # check if the fast C code can be used try: import _pwm except ImportError: # use the slower Python code otherwise for i in xrange(n-m+1): score = 0.0 for position in xrange(m): letter = sequence[i+position] score += self[letter][position] scores.append(score) else: # get the log-odds matrix into a proper shape # (each row contains sorted (ACGT) log-odds values) logodds = [[self[letter][i] for letter in "ACGT"] for i in range(m)] scores = _pwm.calculate(sequence, logodds) if len(scores)==1: return scores[0] else: return scores
def scanPWM(self, seq): """ scans (using a fast C extension) a nucleotide sequence and returns the matrix of log-odds scores for all positions - the result is a one-dimensional numpy array - the sequence can only be a DNA sequence - the search is performed only on one strand """ if self.alphabet != IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA motifs") if seq.alphabet != IUPAC.unambiguous_dna: raise ValueError("Wrong alphabet! Use only with DNA sequences") import numpy # get the log-odds matrix into a proper shape (each column contains sorted (ACGT) log-odds values) logodds = numpy.array([ map(lambda x: x[1], sorted(x.items())) for x in self.log_odds() ]).transpose() import _pwm return _pwm.calculate(seq.tostring(), logodds)