Beispiel #1
0
def pseknc(input_data, k, w, lamada, phyche_list, alphabet, extra_index_file=None, all_prop=False, theta_type=1):
    """This is a complete process in PseKNC.

    :param k: int, the value of k-tuple.
    :param phyche_list: list, the input physicochemical properties list.
    :param extra_index_file: a file path includes the user-defined phyche_index.
    :param all_prop: bool, choose all physicochemical properties or not.
    """
    phyche_list = get_phyche_list(k, phyche_list,
                                  extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop)
    # Get phyche_vals.
    if alphabet == index_list.DNA or alphabet == index_list.RNA:
        if extra_index_file is not None:
            extra_phyche_index = get_extra_index(extra_index_file)
            from util import normalize_index

            phyche_vals = get_phyche_value(k, phyche_list, alphabet,
                                           normalize_index(extra_phyche_index, alphabet, is_convert_dict=True))
        else:
            phyche_vals = get_phyche_value(k, phyche_list, alphabet)
    elif alphabet == index_list.PROTEIN:
        phyche_vals = get_aaindex(phyche_list)
        if extra_index_file is not None:
            phyche_vals.extend(extend_aaindex(extra_index_file))

    seq_list = get_data(input_data, alphabet)

    return make_pseknc_vector(seq_list, phyche_vals, k, w, lamada, alphabet, theta_type)
Beispiel #2
0
def ipseknc(input_data, k, w, lamada, phyche_list, alphabet, extra_index_file=None, all_prop=False):
    """This is a complete process in iPseKNC, k is kmer, but the index is just for dinucleotide.

    :param k: int, the value of k-tuple.
    :param phyche_list: list, the input physicochemical properties list.
    :param extra_index_file: a file path includes the user-defined phyche_index.
    :param all_prop: bool, choose all physicochemical properties or not.
    """
    phyche_list = get_phyche_list(k=2, phyche_list=phyche_list,
                                  extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop)

    # Get phyche_vals.
    if extra_index_file is not None:
        extra_phyche_index = get_extra_index(extra_index_file)
        from util import normalize_index

        phyche_vals = get_phyche_value(k=2, phyche_list=phyche_list, alphabet=alphabet,
                                       extra_phyche_index=normalize_index(extra_phyche_index, alphabet,
                                                                          is_convert_dict=True))
    else:
        phyche_vals = get_phyche_value(k=2, phyche_list=phyche_list, alphabet=alphabet)

    seq_list = get_data(input_data, alphabet)

    return make_pseknc_vector(seq_list, phyche_vals, k, w, lamada, alphabet, theta_type=3)
Beispiel #3
0
def ipseknc(input_data, k, w, lamada, phyche_list, alphabet, extra_index_file=None, all_prop=False):
    """This is a complete process in iPseKNC, k is kmer, but the index is just for dinucleotide.

    :param k: int, the value of k-tuple.
    :param phyche_list: list, the input physicochemical properties list.
    :param extra_index_file: a file path includes the user-defined phyche_index.
    :param all_prop: bool, choose all physicochemical properties or not.
    """
    phyche_list = get_phyche_list(k=2, phyche_list=phyche_list,
                                  extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop)

    # Get phyche_vals.
    if extra_index_file is not None:
        extra_phyche_index = get_extra_index(extra_index_file)
        from util import normalize_index

        phyche_vals = get_phyche_value(k=2, phyche_list=phyche_list, alphabet=alphabet,
                                       extra_phyche_index=normalize_index(extra_phyche_index, alphabet,
                                                                          is_convert_dict=True))
    else:
        phyche_vals = get_phyche_value(k=2, phyche_list=phyche_list, alphabet=alphabet)

    seq_list = get_data(input_data, alphabet)

    return make_pseknc_vector(seq_list, phyche_vals, k, w, lamada, alphabet, theta_type=3)
Beispiel #4
0
def pseknc(input_data, k, w, lamada, phyche_list, alphabet, extra_index_file=None, all_prop=False, theta_type=1):
    """This is a complete process in PseKNC.

    :param k: int, the value of k-tuple.
    :param phyche_list: list, the input physicochemical properties list.
    :param extra_index_file: a file path includes the user-defined phyche_index.
    :param all_prop: bool, choose all physicochemical properties or not.
    """
    phyche_list = get_phyche_list(k, phyche_list,
                                  extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop)
    # Get phyche_vals.
    if alphabet == index_list.DNA or alphabet == index_list.RNA:
        if extra_index_file is not None:
            extra_phyche_index = get_extra_index(extra_index_file)
            from util import normalize_index

            phyche_vals = get_phyche_value(k, phyche_list, alphabet,
                                           normalize_index(extra_phyche_index, alphabet, is_convert_dict=True))
        else:
            phyche_vals = get_phyche_value(k, phyche_list, alphabet)
    elif alphabet == index_list.PROTEIN:
        phyche_vals = get_aaindex(phyche_list)
        if extra_index_file is not None:
            phyche_vals.extend(extend_aaindex(extra_index_file))

    seq_list = get_data(input_data, alphabet)

    return make_pseknc_vector(seq_list, phyche_vals, k, w, lamada, alphabet, theta_type)
Beispiel #5
0
def acc(input_data,
        k,
        lag,
        phyche_list,
        alphabet,
        extra_index_file=None,
        all_prop=False,
        theta_type=1):
    """This is a complete acc in PseKNC.

    :param k: int, the value of k-tuple.
    :param phyche_list: list, the input physicochemical properties list.
    :param extra_index_file: a file path includes the user-defined phyche_index.
    :param all_prop: bool, choose all physicochemical properties or not.
    :param theta_type: the value 1, 2 and 3 for ac, cc or acc.
    """
    phyche_list = get_phyche_list(k,
                                  phyche_list,
                                  extra_index_file=extra_index_file,
                                  alphabet=alphabet,
                                  all_prop=all_prop)
    # print(phyche_list)
    # Get phyche_vals.
    if alphabet == index_list.DNA or alphabet == index_list.RNA:
        if extra_index_file is not None:
            extra_phyche_index = get_extra_index(extra_index_file)
            from util import normalize_index
            phyche_vals = get_phyche_value(
                k, phyche_list, alphabet,
                normalize_index(extra_phyche_index,
                                alphabet,
                                is_convert_dict=True))
        else:
            phyche_vals = get_phyche_value(k, phyche_list, alphabet)
    elif alphabet == index_list.PROTEIN:
        phyche_vals = get_aaindex(phyche_list)
        # print(phyche_vals)
        if extra_index_file is not None:
            phyche_vals.extend(extend_aaindex(extra_index_file))

    seqs = get_data(input_data, alphabet)
    if alphabet == index_list.PROTEIN:
        # Transform the data format to dict {acid: [phyche_vals]}.
        phyche_keys = phyche_vals[0].index_dict.keys()
        phyche_vals = [e.index_dict.values() for e in phyche_vals]
        new_phyche_vals = zip(*[e for e in phyche_vals])
        phyche_vals = {
            key: list(val)
            for key, val in zip(phyche_keys, new_phyche_vals)
        }

    if theta_type == 1:
        return make_ac_vec(seqs, lag, phyche_vals, k)
    elif theta_type == 2:
        return make_cc_vec(seqs, lag, phyche_vals, k)
    elif theta_type == 3:
        return make_acc_vec(seqs, lag, phyche_vals, k)
Beispiel #6
0
def acc(input_data, k, lag, phyche_list, alphabet, extra_index_file=None, all_prop=False, theta_type=1):
    """This is a complete acc in PseKNC.

    :param k: int, the value of k-tuple.
    :param phyche_list: list, the input physicochemical properties list.
    :param extra_index_file: a file path includes the user-defined phyche_index.
    :param all_prop: bool, choose all physicochemical properties or not.
    :param theta_type: the value 1, 2 and 3 for ac, cc or acc.
    """
    phyche_list = get_phyche_list(
        k, phyche_list, extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop
    )
    # print(phyche_list)
    # Get phyche_vals.
    if alphabet == index_list.DNA or alphabet == index_list.RNA:
        if extra_index_file is not None:
            extra_phyche_index = get_extra_index(extra_index_file)
            from util import normalize_index

            phyche_vals = get_phyche_value(
                k, phyche_list, alphabet, normalize_index(extra_phyche_index, alphabet, is_convert_dict=True)
            )
        else:
            phyche_vals = get_phyche_value(k, phyche_list, alphabet)
    elif alphabet == index_list.PROTEIN:
        phyche_vals = get_aaindex(phyche_list)
        # print(phyche_vals)
        if extra_index_file is not None:
            phyche_vals.extend(extend_aaindex(extra_index_file))

    seqs = get_data(input_data, alphabet)
    if alphabet == index_list.PROTEIN:
        # Transform the data format to dict {acid: [phyche_vals]}.
        phyche_keys = phyche_vals[0].index_dict.keys()
        phyche_vals = [e.index_dict.values() for e in phyche_vals]
        new_phyche_vals = zip(*[e for e in phyche_vals])
        phyche_vals = {key: list(val) for key, val in zip(phyche_keys, new_phyche_vals)}

    if theta_type == 1:
        return make_ac_vec(seqs, lag, phyche_vals, k)
    elif theta_type == 2:
        return make_cc_vec(seqs, lag, phyche_vals, k)
    elif theta_type == 3:
        return make_acc_vec(seqs, lag, phyche_vals, k)
Beispiel #7
0
    ]]

    print('Begin PseDNC')

    psednc = PseDNC()
    vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'])
    print(vec)
    print(len(vec[0]))

    psednc = PseDNC(lamada=2, w=0.1)
    vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'])
    print(vec)
    print(len(vec[0]))

    vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'],
                                 extra_phyche_index=normalize_index(
                                     phyche_index, is_convert_dict=True))
    print(vec)
    print(len(vec[0]))

    print('Begin PseKNC')

    pseknc = PseKNC()
    vec = pseknc.make_pseknc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'])
    print(vec)
    print(len(vec[0]))

    pseknc = PseKNC(k=2, lamada=1, w=0.05)
    vec = pseknc.make_pseknc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'])
    print(vec)
    print(len(vec[0]))
Beispiel #8
0
    if seq.no != 1:
        print("Error, the basic function get_data5")
        error = True

    # Normalization of physicochemical index.
    from util import normalize_index

    phyche_index = [[
        0.026, 0.036, 0.031, 0.033, 0.016, 0.026, 0.014, 0.031, 0.025, 0.025,
        0.026, 0.036, 0.017, 0.025, 0.016, 0.026
    ],
                    [
                        0.038, 0.038, 0.037, 0.036, 0.025, 0.042, 0.026, 0.037,
                        0.038, 0.036, 0.042, 0.038, 0.018, 0.038, 0.025, 0.038
                    ]]
    if normalize_index(phyche_index) \
            != [[0.06, 1.5, 0.78, 1.07, -1.38, 0.06, -1.66, 0.78, -0.08, -0.08, 0.06, 1.5, -1.23, -0.08, -1.38, 0.06],
                [0.5, 0.5, 0.36, 0.22, -1.36, 1.08, -1.22, 0.36, 0.5, 0.22, 1.08, 0.5, -2.37, 0.5, -1.36, 0.5]]:
        print("Error, the basic function normalize_index")
        error = True

    print("Basic function test end!")

    # ######################################################################################
    # Nucleic acid Composition

    # Basic kmer
    from nac import Kmer

    kmer = Kmer(k=2)
    if kmer.make_kmer_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) \
Beispiel #9
0
    if seq.seq != 'GACTGAACTGCACTTTGGTTTCATATTATTTGCTC':
        print("Error, the basic function get_data4")
        error = True
    if seq.no != 1:
        print("Error, the basic function get_data5")
        error = True

    # Normalization of physicochemical index.
    from util import normalize_index

    phyche_index = [
        [0.026, 0.036, 0.031, 0.033, 0.016, 0.026, 0.014, 0.031, 0.025, 0.025, 0.026, 0.036, 0.017, 0.025, 0.016,
         0.026],
        [0.038, 0.038, 0.037, 0.036, 0.025, 0.042, 0.026, 0.037, 0.038, 0.036, 0.042, 0.038, 0.018, 0.038, 0.025,
         0.038]]
    if normalize_index(phyche_index) \
            != [[0.06, 1.5, 0.78, 1.07, -1.38, 0.06, -1.66, 0.78, -0.08, -0.08, 0.06, 1.5, -1.23, -0.08, -1.38, 0.06],
                [0.5, 0.5, 0.36, 0.22, -1.36, 1.08, -1.22, 0.36, 0.5, 0.22, 1.08, 0.5, -2.37, 0.5, -1.36, 0.5]]:
        print("Error, the basic function normalize_index")
        error = True

    print("Basic function test end!")

    # ######################################################################################
    # Nucleic acid Composition

    # Basic kmer
    from nac import Kmer

    kmer = Kmer(k=2)
    if kmer.make_kmer_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC']) \
Beispiel #10
0
        self.k = 2
        check_psenac(self.lamada, self.w, self.k)

    def make_psednc_vec(self, input_data, phyche):

        sequence_list = get_data(input_data)
        #sequence_list, phyche_value = get_sequence_list_and_phyche_value_psednc(input_data, extra_phyche_index)
        phyche_value = phyche
        from psenacutil import make_pseknc_vector

        vector = make_pseknc_vector(sequence_list,
                                    self.lamada,
                                    self.w,
                                    self.k,
                                    phyche_value,
                                    theta_type=1)

        return vector


phy = pd.read_csv('phy.csv', header=-1, index_col=None)
phyche_index = np.array(phy)
phyche_index_dict = normalize_index(phyche_index, is_convert_dict=True)
psednc = PseDNC(lamada=23, w=0.05)
vec = psednc.make_psednc_vec(open('S_data.txt'), phyche=phyche_index_dict)
#print(len(vec[0]))
feature = np.array(vec)
data_new = np.matrix(feature)
data_PseDNC = pd.DataFrame(data=data_new)
data_PseDNC.to_csv('PseDNC_S.csv')
Beispiel #11
0
                     -0.654, 0.567, 1.019]]

    print('Begin PseDNC')

    psednc = PseDNC()
    vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'])
    print(vec)
    print(len(vec[0]))

    psednc = PseDNC(lamada=2, w=0.1)
    vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'])
    print(vec)
    print(len(vec[0]))

    vec = psednc.make_psednc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'],
                                 extra_phyche_index=normalize_index(phyche_index, is_convert_dict=True))
    print(vec)
    print(len(vec[0]))

    print('Begin PseKNC')

    pseknc = PseKNC()
    vec = pseknc.make_pseknc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'])
    print(vec)
    print(len(vec[0]))

    pseknc = PseKNC(k=2, lamada=1, w=0.05)
    vec = pseknc.make_pseknc_vec(['GACTGAACTGCACTTTGGTTTCATATTATTTGCTC'])
    print(vec)
    print(len(vec[0]))