Ejemplo n.º 1
0
    def make_kmer_vec(self, data):
        """Make a kmer vector with options k, upto, revcomp, normalize.

        :param data: file object or sequence list.
        :return: kmer vector.
        """
        sequence_list = get_data(data)

        kmer_list = get_kmer_list(self.k, self.upto, self.alphabet)

        rev_kmer_list = []
        revcomp = False
        vec = make_kmer_vector(sequence_list, kmer_list, rev_kmer_list, self.k, self.upto, revcomp, self.normalize)
        return vec
Ejemplo n.º 2
0
    def make_revckmer_vec(self, data):
        """Make a reverse compliment kmer vector with options k, upto, normalize.

        :param data: file object or sequence list.
        :return: reverse compliment kmer vector.
        """
        sequence_list = get_data(data)

        kmer_list = get_kmer_list(self.k, self.upto, self.alphabet)

        # Use lexicographically first version of {kmer, revcomp(kmer)}.
        rev_kmer_list = make_revcomp_kmer_list(kmer_list)
        revcomp = True
        vec = make_kmer_vector(sequence_list, kmer_list, rev_kmer_list, self.k,
                               self.upto, revcomp, self.normalize)
        return vec
Ejemplo n.º 3
0
    def make_idkmer_vec(self, data, hs, non_hs):
        """Make IDKmer vector.

        :param data: Need to processed FASTA file.
        :param hs: Positive FASTA file.
        :param non_hs: Negative FASTA file.
        """
        from nacutil import make_kmer_list
        from nacutil import diversity
        from nacutil import id_x_s

        rev_kmer_list, upto, revcomp, normalize = [], False, False, False

        pos_s_list = get_data(hs)
        neg_s_list = get_data(non_hs)
        # print self.k
        if self.upto is False:
            k_list = [self.k]
        else:
            k_list = list(range(1, self.k + 1))

        # print 'k_list =', k_list

        # Get all kmer ID from 1-kmer to 6-kmer.
        # Calculate standard source S vector.
        pos_s_vec, neg_s_vec = [], []
        diversity_pos_s, diversity_neg_s = [], []
        for k in k_list:
            kmer_list = make_kmer_list(k, self.alphabet)

            temp_pos_s_vec = make_kmer_vector(pos_s_list, kmer_list,
                                              rev_kmer_list, k, upto, revcomp,
                                              normalize)
            temp_neg_s_vec = make_kmer_vector(neg_s_list, kmer_list,
                                              rev_kmer_list, k, upto, revcomp,
                                              normalize)

            temp_pos_s_vec = [
                sum(e) for e in zip(*[e for e in temp_pos_s_vec])
            ]
            temp_neg_s_vec = [
                sum(e) for e in zip(*[e for e in temp_neg_s_vec])
            ]

            pos_s_vec.append(temp_pos_s_vec)
            neg_s_vec.append(temp_neg_s_vec)

            diversity_pos_s.append(diversity(temp_pos_s_vec))
            diversity_neg_s.append(diversity(temp_neg_s_vec))

        # Calculate Diversity(X) and ID(X, S).
        sequence_list = get_data(data)
        vec = []

        for seq in sequence_list:
            # print seq
            temp_vec = []
            for k in k_list:
                kmer_list = make_kmer_list(k, self.alphabet)
                seq_list = [seq]
                kmer_vec = make_kmer_vector(seq_list, kmer_list, rev_kmer_list,
                                            k, upto, revcomp, normalize)
                # print 'k', k
                # print 'kmer_vec', kmer_vec

                # print diversity_pos_s
                if upto is False:
                    k = 1

                # print 'pos_vec', pos_s_vec
                # print 'neg_vec', neg_s_vec
                # print 'diversity_pos_s', diversity_pos_s

                temp_vec.append(
                    round(
                        id_x_s(kmer_vec[0], pos_s_vec[k - 1],
                               diversity_pos_s[k - 1]), 3))
                temp_vec.append(
                    round(
                        id_x_s(kmer_vec[0], neg_s_vec[k - 1],
                               diversity_neg_s[k - 1]), 3))

            vec.append(temp_vec)

        return vec