def _score(self, alignment): if self.use_seq_weights: seq_weights = alignment.get_seq_weights() else: seq_weights = [1.0] * len(alignment.msa) if self.bg_distribution is None: # Estimate bg distribution from this alignment q = weighted_freq_count_pseudocount((aa for seq in alignment.msa for aa in seq), seq_weights, PSEUDOCOUNT) else: q = self.bg_distribution scores = [] for i in xrange(len(alignment.msa[0])): col = get_column(i, alignment.msa) n_gaps = col.count("-") assert n_gaps < len(col) if self.gap_cutoff != 1 and n_gaps / len(col) > self.gap_cutoff: score = self.SCORE_OVER_GAP_CUTOFF else: score = self._score_col(col, seq_weights, q) if self.use_gap_penalty: # vn_entropy has this commented out for some reason score *= weighted_gap_penalty(col, seq_weights) scores.append(score) return scores
def _score(self, alignment): if self.use_seq_weights: seq_weights = alignment.get_seq_weights() else: seq_weights = [1.] * len(alignment.msa) # Estimate bg distribution from this alignment if hasattr(self, 'bg_distribution'): if not self.bg_distribution: q = dict((aa, 0) for aa in amino_acids) for seq in self.msa: for aa in seq: q[aa] += 1 self.bg_distribution = q scores = [] for i in xrange(len(alignment.msa[0])): col = get_column(i, alignment.msa) n_gaps = col.count('-') assert n_gaps < len(col) if self.gap_cutoff != 1 and n_gaps/len(col) > self.gap_cutoff: score = self.SCORE_OVER_GAP_CUTOFF else: score = self._score_col(col, seq_weights) if self.use_gap_penalty: # vn_entropy has this commented out for some reason score *= weighted_gap_penalty(col, seq_weights) scores.append(score) return scores