def CTD_feature_extractor(self, protein_sequence):
     value_list = []
     features_list = ["_Polarizability", "_SolventAccessibility", "_SecondaryStr", 
                    "_Charge", "_Polarity", "_NormalizedVDWV", "_Hydrophobicity"]
     ctd_features = CTD.CalculateCTD(protein_sequence)
     for feature_keys in ctd_features.keys():
         value = ctd_features.get(feature_keys)
         if value > 1:
             value = round((value/100), 3)
         value_list.append(value)
     return np.array(value_list, dtype=np.float32)
Esempio n. 2
0
def generate_features(sequence):
    seq = str(sequence)
    feature = CTD.CalculateCTD(seq)
    feature_all = []
    count = 0
    for key in sorted(feature.keys()):
        feature_all.append(feature[key])
        count = count + 1
        if count == 147:
            feature_numpy = np.array(feature_all)
            # print(feature_numpy)
            return feature_numpy
Esempio n. 3
0
def feature_generation(input_file):
    global feature_file, feature
    #    feature_file = open(os.path.expanduser("C:/Users/Rayin/Google Drive/Tier_2_MOE2014/5_Journal/Plos_One/Data/feature/feature.csv"), 'w')
    input_file = pd.DataFrame(input_file)
    input_file = input_file['seq']
    count = 0
    for row in range(0, len(input_file)):
        seq = str(input_file.loc[row])
        feature = CTD.CalculateCTD(seq)
        if row == 0:
            write_header()
        for key in sorted(feature.keys()):
            feature_value = feature[key]
            count = count + 1
            if count == 147:
                count = 0
                write_to_csv = str(feature_value) + '\n'
            else:
                write_to_csv = str(feature_value) + ','
            #write_to_csv = str(feature_value) + ','
            feature_file.write(write_to_csv)

    feature_file.close()
Esempio n. 4
0
        res[i + '+' + i] = round(dict1[i] + dict2[i], 3)
        res[i + '*' + i] = round(dict1[i] * dict2[i], 3)
    return res


if __name__ == '__main__':
    from PyBioMed.PyDNA import PyDNAac

    DNA_des = PyDNAac.GetTCC('GACTGAACTGCACTTTGGTTTCATATTATTTGCTC',
                             phyche_index=['Dnase I', 'Nucleosome', 'MW-kg'])

    print DNA_des

    from PyBioMed.PyProtein import CTD
    protein = "ADGCGVGEGTGQGPMCNCMCMKWVYADEDAADLESDSFADEDASLESDSFPWSNQRVFCSFADEDAS"
    protein_des = CTD.CalculateCTD(protein)

    from PyBioMed.PyMolecule import moe
    from rdkit import Chem
    smis = ['CCCC', 'CCCCC', 'CCCCCC', 'CC(N)C(=O)O', 'CC(N)C(=O)[O-].[Na+]']
    m = Chem.MolFromSmiles(smis[3])
    mol_des = moe.GetMOE(m)

    mol_mol_interaction1 = CalculateInteraction1(mol_des, mol_des)
    print mol_mol_interaction1

    mol_mol_interaction2 = CalculateInteraction2(mol_des, mol_des)
    print mol_mol_interaction2

    mol_mol_interaction3 = CalculateInteraction3(mol_des, mol_des)
    print mol_mol_interaction3
Esempio n. 5
0
def test_pyinteration():

    from PyBioMed.PyInteraction.PyInteraction import CalculateInteraction1
    from PyBioMed.PyInteraction.PyInteraction import CalculateInteraction2
    from PyBioMed.PyInteraction.PyInteraction import CalculateInteraction3

    from PyBioMed.PyDNA import PyDNAac

    print '...............................................................'
    print 'testing the DNA descriptors'

    DNA_des = PyDNAac.GetTCC('GACTGAACTGCACTTTGGTTTCATATTATTTGCTC',
                             phyche_index=['Dnase I', 'Nucleosome', 'MW-kg'])

    print DNA_des

    print '...............................................................'
    print 'testing the protein descriptors'

    from PyBioMed.PyProtein import CTD
    protein = "ADGCGVGEGTGQGPMCNCMCMKWVYADEDAADLESDSFADEDASLESDSFPWSNQRVFCSFADEDAS"
    protein_des = CTD.CalculateCTD(protein)

    print '...............................................................'
    print 'testing the molecular descriptors'

    from PyBioMed.PyMolecule import moe
    from rdkit import Chem
    smis = ['CCCC', 'CCCCC', 'CCCCCC', 'CC(N)C(=O)O', 'CC(N)C(=O)[O-].[Na+]']
    m = Chem.MolFromSmiles(smis[3])
    mol_des = moe.GetMOE(m)

    print '...............................................................'
    print 'testing the Interaction type 1 module'

    mol_mol_interaction1 = CalculateInteraction1(mol_des, mol_des)
    print mol_mol_interaction1

    pro_mol_interaction1 = CalculateInteraction1(mol_des, protein_des)
    print pro_mol_interaction1

    DNA_mol_interaction1 = CalculateInteraction1(DNA_des, mol_des)
    print DNA_mol_interaction1

    print '...............................................................'
    print 'testing the Interaction type 2 module'

    mol_mol_interaction2 = CalculateInteraction2(mol_des, mol_des)
    print mol_mol_interaction2

    pro_mol_interaction2 = CalculateInteraction2(mol_des, protein_des)
    print pro_mol_interaction2

    DNA_mol_interaction2 = CalculateInteraction2(DNA_des, mol_des)
    print DNA_mol_interaction2

    print '...............................................................'
    print 'testing the Interaction type 3 module'

    mol_mol_interaction3 = CalculateInteraction3(mol_des, mol_des)
    print mol_mol_interaction3
Esempio n. 6
0
def test_pyprotein():

    AAC = eval(modulelists[0])
    AC = eval(modulelists[1])
    CTD = eval(modulelists[2])
    QSO = eval(modulelists[3])
    PAAC = eval(modulelists[4])
    GPFU = eval(modulelists[5])
    GSS = eval(modulelists[6])

    print '...............................................................'

    print "testing the GetSubSeq module"

    ProteinSequence = 'ADGCGVGEGTGQGPMCNCMCMKWVYADEDAADLESDSFADEDASLESDSFPWSNQRVFCSFADEDAS'

    temp = GSS.GetSubSequence(ProteinSequence, ToAA='D', window=5)

    print temp

    print '...............................................................'

    print "testing the AAComposition module"

    temp = AAC.CalculateAAComposition(ProteinSequence)

    print temp

    temp = AAC.CalculateDipeptideComposition(ProteinSequence)

    temp = AAC.GetSpectrumDict(ProteinSequence)

    temp = AAC.CalculateAADipeptideComposition(ProteinSequence)

    print '...............................................................'

    print "testing the Autocorrelation module"

    temp = AC.CalculateNormalizedMoreauBrotoAuto(ProteinSequence,
                                                 [AC._ResidueASA],
                                                 ['ResidueASA'])

    print temp

    temp = AC.CalculateMoranAuto(ProteinSequence, [AC._ResidueASA],
                                 ['ResidueASA'])

    print temp

    temp = AC.CalculateGearyAuto(ProteinSequence, [AC._ResidueASA],
                                 ['ResidueASA'])

    print temp

    temp = AC.CalculateAutoTotal(ProteinSequence)

    print '...............................................................'

    print "testing the CTD module"

    temp = CTD.CalculateC(ProteinSequence)

    print temp

    temp = CTD.CalculateT(ProteinSequence)

    print temp

    temp = CTD.CalculateD(ProteinSequence)

    print temp

    temp = CTD.CalculateCTD(ProteinSequence)

    print temp

    print '...............................................................'

    print "testing the QuasiSequenceOrder module"

    temp = QSO.GetSequenceOrderCouplingNumberTotal(ProteinSequence, maxlag=30)

    print temp

    temp = QSO.GetQuasiSequenceOrder(ProteinSequence, maxlag=30, weight=0.1)

    print temp

    print '...............................................................'

    print "testing the PseudoAAC module"

    temp = PAAC.GetAPseudoAAC(ProteinSequence, lamda=10, weight=0.5)

    print temp

    temp = PAAC._GetPseudoAAC(ProteinSequence, lamda=10, weight=0.05)

    print temp

    print '...............................................................'

    print "Tested successfully!"
Esempio n. 7
0
def test_pyinteration():

    from PyBioMed.PyInteraction.PyInteraction import CalculateInteraction1
    from PyBioMed.PyInteraction.PyInteraction import CalculateInteraction2
    from PyBioMed.PyInteraction.PyInteraction import CalculateInteraction3

    from PyBioMed.PyDNA import PyDNAac

    print("...............................................................")
    print("testing the DNA descriptors")

    DNA_des = PyDNAac.GetTCC(
        "GACTGAACTGCACTTTGGTTTCATATTATTTGCTC",
        phyche_index=["Dnase I", "Nucleosome", "MW-kg"],
    )

    print(DNA_des)

    print("...............................................................")
    print("testing the protein descriptors")

    from PyBioMed.PyProtein import CTD

    protein = "ADGCGVGEGTGQGPMCNCMCMKWVYADEDAADLESDSFADEDASLESDSFPWSNQRVFCSFADEDAS"
    protein_des = CTD.CalculateCTD(protein)

    print("...............................................................")
    print("testing the molecular descriptors")

    from PyBioMed.PyMolecule import moe
    from rdkit import Chem

    smis = ["CCCC", "CCCCC", "CCCCCC", "CC(N)C(=O)O", "CC(N)C(=O)[O-].[Na+]"]
    m = Chem.MolFromSmiles(smis[3])
    mol_des = moe.GetMOE(m)

    print("...............................................................")
    print("testing the Interaction type 1 module")

    mol_mol_interaction1 = CalculateInteraction1(mol_des, mol_des)
    print(mol_mol_interaction1)

    pro_mol_interaction1 = CalculateInteraction1(mol_des, protein_des)
    print(pro_mol_interaction1)

    DNA_mol_interaction1 = CalculateInteraction1(DNA_des, mol_des)
    print(DNA_mol_interaction1)

    print("...............................................................")
    print("testing the Interaction type 2 module")

    mol_mol_interaction2 = CalculateInteraction2(mol_des, mol_des)
    print(mol_mol_interaction2)

    pro_mol_interaction2 = CalculateInteraction2(mol_des, protein_des)
    print(pro_mol_interaction2)

    DNA_mol_interaction2 = CalculateInteraction2(DNA_des, mol_des)
    print(DNA_mol_interaction2)

    print("...............................................................")
    print("testing the Interaction type 3 module")

    mol_mol_interaction3 = CalculateInteraction3(mol_des, mol_des)
    print(mol_mol_interaction3)