Exemple #1
0
    def comp(self):
        df = pd.read_csv(self.an_fpi, sep='\t', index_col=0)
        all_kmers = {}
        for i, row in df.iterrows():
            print(i)
            seq = row['Sequence']
            ss = row['Secondary Structure']
            miss = row['Missing']
            xss = self.add_x(ss, miss)

            seq_kmers = tools_lc.seq_to_kmers(seq, self.k)
            ss_kmers = tools_lc.seq_to_kmers(xss, self.k)
            for seq_kmer, ss_kmer in zip(seq_kmers, ss_kmers):
                if tools_lc.lca_motif(seq_kmer,
                                      self.lca) or tools_lc.lce_motif(
                                          seq_kmer, self.lce):
                    if set(ss_kmer) <= {'S', 'T', 'P', 'X'}:
                        if seq_kmer in all_kmers:
                            all_kmers[seq_kmer] += 1
                        else:
                            all_kmers[seq_kmer] = 1
        for item in all_kmers:
            if all_kmers[item] > 200:
                print(item)
                print(all_kmers[item])
 def seq_lca2(self, seqs):
     all_kmers = ''
     for seq in seqs:
         kmers = tools_lc.seq_to_kmers(seq, self.k)
         for kmer in kmers:
             if tools_lc.lca_motif(kmer, self.lca):
                 all_kmers += kmer
     return all_kmers
 def count_lca_and_lce(self, seqs):
     all_counts = []
     for seq in seqs:
         count = 0
         kmers = tools_lc.seq_to_kmers(seq, self.k)
         for kmer in kmers:
             if tools_lc.lce_motif(kmer, self.lce):
                 if tools_lc.lca_motif(kmer, self.lca):
                     count += 1
         all_counts.append(count)
     return all_counts
Exemple #4
0
 def count_lca_charge(self, seqs):
     lca_counts = []
     for seq in seqs:
         lca_motifs = 0
         kmers = tools_lc.seq_to_kmers(seq, self.k)
         for kmer in kmers:
             if tools_lc.lca_motif(kmer, self.lca):
                 if not tools_lc.lce_motif(kmer, self.lce):
                     if ('K' in kmer) and ('R' in kmer) and ('E' in kmer):
                         lca_motifs += 1
         lca_counts.append(lca_motifs)
     return lca_counts
 def seq_lca(self, seqs):
     seq_kmers = []
     lca_counts = []
     for seq in seqs:
         lca_motifs = 0
         kmer_str = ''
         kmers = tools_lc.seq_to_kmers(seq, self.k)
         for kmer in kmers:
             if tools_lc.lca_motif(kmer, self.lca):
                 kmer_str += kmer
                 lca_motifs += 1
         lca_counts.append(lca_motifs)
         seq_kmers.append(kmer_str)
     return lca_counts, seq_kmers