def make_kmer_vec(self, data): """Make a kmer vector with options k, upto, revcomp, normalize. :param data: file object or sequence list. :return: kmer vector. """ sequence_list = get_data(data) kmer_list = get_kmer_list(self.k, self.upto, self.alphabet) rev_kmer_list = [] revcomp = False vec = make_kmer_vector(sequence_list, kmer_list, rev_kmer_list, self.k, self.upto, revcomp, self.normalize) return vec
def make_revckmer_vec(self, data): """Make a reverse compliment kmer vector with options k, upto, normalize. :param data: file object or sequence list. :return: reverse compliment kmer vector. """ sequence_list = get_data(data) kmer_list = get_kmer_list(self.k, self.upto, self.alphabet) # Use lexicographically first version of {kmer, revcomp(kmer)}. rev_kmer_list = make_revcomp_kmer_list(kmer_list) revcomp = True vec = make_kmer_vector(sequence_list, kmer_list, rev_kmer_list, self.k, self.upto, revcomp, self.normalize) return vec
def make_idkmer_vec(self, data, hs, non_hs): """Make IDKmer vector. :param data: Need to processed FASTA file. :param hs: Positive FASTA file. :param non_hs: Negative FASTA file. """ from nacutil import make_kmer_list from nacutil import diversity from nacutil import id_x_s rev_kmer_list, upto, revcomp, normalize = [], False, False, False pos_s_list = get_data(hs) neg_s_list = get_data(non_hs) # print self.k if self.upto is False: k_list = [self.k] else: k_list = list(range(1, self.k + 1)) # print 'k_list =', k_list # Get all kmer ID from 1-kmer to 6-kmer. # Calculate standard source S vector. pos_s_vec, neg_s_vec = [], [] diversity_pos_s, diversity_neg_s = [], [] for k in k_list: kmer_list = make_kmer_list(k, self.alphabet) temp_pos_s_vec = make_kmer_vector(pos_s_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize) temp_neg_s_vec = make_kmer_vector(neg_s_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize) temp_pos_s_vec = [ sum(e) for e in zip(*[e for e in temp_pos_s_vec]) ] temp_neg_s_vec = [ sum(e) for e in zip(*[e for e in temp_neg_s_vec]) ] pos_s_vec.append(temp_pos_s_vec) neg_s_vec.append(temp_neg_s_vec) diversity_pos_s.append(diversity(temp_pos_s_vec)) diversity_neg_s.append(diversity(temp_neg_s_vec)) # Calculate Diversity(X) and ID(X, S). sequence_list = get_data(data) vec = [] for seq in sequence_list: # print seq temp_vec = [] for k in k_list: kmer_list = make_kmer_list(k, self.alphabet) seq_list = [seq] kmer_vec = make_kmer_vector(seq_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize) # print 'k', k # print 'kmer_vec', kmer_vec # print diversity_pos_s if upto is False: k = 1 # print 'pos_vec', pos_s_vec # print 'neg_vec', neg_s_vec # print 'diversity_pos_s', diversity_pos_s temp_vec.append( round( id_x_s(kmer_vec[0], pos_s_vec[k - 1], diversity_pos_s[k - 1]), 3)) temp_vec.append( round( id_x_s(kmer_vec[0], neg_s_vec[k - 1], diversity_neg_s[k - 1]), 3)) vec.append(temp_vec) return vec