def add_scores(self, df): seqs = list(df['Sequence']) miss_seqs = list(df['Missing']) ns = NormScore() lc_raw = tools_lc.calc_lc_motifs(seqs, self.k, self.alph_lca, self.thresh_lce) lc_norms = ns.lc_norm_score(seqs) lengths = tools_fasta.get_lengths(seqs) miss_count = self.get_missing(miss_seqs) df['Length'] = lengths df['Miss Count'] = miss_count df['LC Norm'] = lc_norms df['LC Raw'] = lc_raw return df
def write_tsv(self): """Write a tsv file that is score, nomiss_score, length""" df = pd.read_csv(self.norm_fpi, sep='\t', index_col=0) seqs = df['Sequence'] miss_seqs = df['Missing'] lens = [len(seq) for seq in seqs] raw_scores = tools_lc.calc_lc_motifs(seqs, self.k, self.lca, self.lce) nomiss_scores = tools_lc.calc_lc_motifs_nomiss(seqs, miss_seqs, self.k, self.lca, self.lce) df_dict = { 'score': raw_scores, 'nomiss_score': nomiss_scores, 'Length': lens } cols = ['score', 'nomiss_score', 'Length'] df = pd.DataFrame(df_dict, columns=cols) df.to_csv(self.fpo, sep='\t')
def get_raw_scores(self, lc_label, k, lca, lce): if lc_label == 'LCA || LCE': scores = tools_lc.calc_lc_motifs(self.seqs, k, lca, lce) elif lc_label == 'LCA & LCE': scores = [] for seq in self.seqs: scores.append(tools_lc.count_lca_and_lce(seq, k, lca, lce)) elif lc_label == 'LCA & ~LCE': scores = [] for seq in self.seqs: scores.append(tools_lc.count_lca_not_lce(seq, k, lca, lce)) elif lc_label == '~LCA & LCE': scores = [] for seq in self.seqs: scores.append(tools_lc.count_not_lca_lce(seq, k, lca, lce)) else: raise Exception('Unexpected logical expression') return scores
def lc_norm_score(self, seqs): lens = [len(seq) for seq in seqs] scores = tools_lc.calc_lc_motifs(seqs, self.k, self.lca, self.lce) lc_norm = self.norm_function(self.lc_m, self.lc_b, scores, lens) return lc_norm
def count_lca_or_lce(self, seqs): lc_motifs = tools_lc.calc_lc_motifs(seqs, self.k, self.lca, self.lce) return lc_motifs
def mb_lc(self, k, lca, lce): """LCA || LCE""" scores = tools_lc.calc_lc_motifs(self.seqs, k, lca, lce) lr = linregress(self.lens, scores) return lr