import prop import fold import fitRF fOut = open("CM-RF.txt", "w") # 1. CM vector parsing CM_Path = "/Volumes/Macintosh HD 2/FSU/2018Fall/SeminarJose/Seminar2/FinalProject/Supplementary_Materials/CM" time0 = time.time() CM_vec = feature.parse_feature_CM(CM_Path) # print(CM_vec[1]) time1 = time.time() fOut.write("Execuation time 01 - CM vector parsing: %s sec.\n" % (time1 - time0)) # 2. Property parsing QM9_Prop, QM9_Index = prop.parse_prop("/Volumes/Macintosh HD 2/FSU/2018Fall/SeminarJose/Seminar2/FinalProject/Supplementary_Materials/qm9-mol-info-standardized-v1") time2 = time.time() fOut.write("Execuation time 02 - Property parsing: %s sec.\n" % (time2 - time1)) # 3. Fold parsing Predefined_Split, Train_Index, Test_Index = fold.predefined_fold() time3 = time.time() fOut.write("Execuation time 03 - Fold parsing: %s sec. \n" % (time3 - time2)) # 4. Reindex to training and testing set Train_X = np.zeros([len(Train_Index[0]), CM_vec.shape[1]]) Train_Y = np.zeros([13, len(Train_Index[0])]) Test_X = np.zeros([len(Test_Index[0]), CM_vec.shape[1]]) Test_Y = np.zeros([13, len(Test_Index[0])]) Predict_Y = np.zeros([13, len(Test_Index[0])]) for Indi, i in enumerate(Train_Index[0]):
def mainprog(): #-- Test functions #-- Test of parse_feature_CM # import numpy as np # import time # CM_Path = "/share/home/zyzhu/Documents-Graduate/Faber-Lilienfeld.JCTC.2017.ASAP/SI_Source/Feature_Files/CM" # time0 = time.time() # CM_vec = feature.parse_feature_CM(CM_Path) # print(CM_vec[1]) # print("Execuation time: %s" % (time.time() - time0)) #-- Test of parse_prop # import numpy as np # QM9_Prop, QM9_Index = parse_prop("/share/home/zyzhu/Documents-Graduate/Faber-Lilienfeld.JCTC.2017.ASAP/SI_Source/Prop_Files/qm9-mol-info-standardized-v1") # print(QM9_Index[:100]) # print(QM9_Prop[:5]) #-- Test of predefined_fold # Predefined_Split, Train_Index, Test_Index = fold.predefined_fold() import numpy as np import time # 0. output file FOut = open("CM-BR-EN-RF.txt", "w") time0 = time.time() # 1. CM vector parsing CM_Path = "/share/home/zyzhu/Documents-Graduate/Faber-Lilienfeld.JCTC.2017.ASAP/SI_Source/Feature_Files/CM" CM_vec = feature.parse_feature_CM(CM_Path) # print(CM_vec[1]) time1 = time.time() FOut.write("Execuation time 01 - CM vector parsing: %s \n" % (time1 - time0)) # 2. Property parsing QM9_Prop, QM9_Index = prop.parse_prop( "/share/home/zyzhu/Documents-Graduate/Faber-Lilienfeld.JCTC.2017.ASAP/SI_Source/Prop_Files/qm9-mol-info-standardized-v1" ) time2 = time.time() FOut.write("Execuation time 02 - Property parsing: %s \n" % (time2 - time1)) # 3. Fold parsing Predefined_Split, Train_Index, Test_Index = fold.predefined_fold() time3 = time.time() FOut.write("Execuation time 03 - Fold parsing: %s \n" % (time3 - time2)) # 4. Reindex to training and testing set # Only test 10% testing set. The result should be able to be compared to Table 3 in # Faber et al. (JCTC2017) # For this file, all the properties are tested. # ! Note that CM_vec is 133885 length, so no reindex needed to this vector. # However, some rows in CM_vec is empty. # QM9_Prop as well. # ! The 10% training and testing set ([0]) should be created with reindex process. # ! *_Y as property arrays are indexed as [Prop, reindexed-Id]. # - Train_X # - Train_Y # - Test_X # - Test_Y # - Temp_prop_U: Temporary extraction of the property U0 Train_X = np.zeros([len(Train_Index[0]), CM_vec.shape[1]]) Train_Y = np.zeros([13, len(Train_Index[0])]) Test_X = np.zeros([len(Test_Index[0]), CM_vec.shape[1]]) Test_Y = np.zeros([13, len(Test_Index[0])]) for Indi, i in enumerate(Train_Index[0]): Train_X[Indi] = CM_vec[i - 1] Train_Y[:, Indi] = QM9_Prop[i - 1, :] for Indi, i in enumerate(Test_Index[0]): Test_X[Indi] = CM_vec[i - 1] Test_Y[:, Indi] = QM9_Prop[i - 1, :] time4 = time.time() FOut.write("Execuation time 04 - Reindex: %s \n" % (time4 - time3)) # 5. Training and Testing - BR FOut.write("\n") FOut.write("--- Training and Testing BR ---\n") FOut.write("\n") FOut.write(" Prop_Id" + " Err_MAD" + " Err_RMSD" + " Time_Train" + " Time_Test" + "\n") for Indi in range(13): (Err_MAD, Err_RMSD, Time_Train, Time_Test) = \ fit.fit_BR(Train_X, Train_Y[Indi], Test_X, Test_Y[Indi], Predefined_Split[0]) FOut.write("{:8}{:14.8f}{:14.8f}{:11.2f}{:11.2f}\n".format( Indi, Err_MAD, Err_RMSD, Time_Train, Time_Test)) # 6. Training and Testing - EN FOut.write("\n") FOut.write("--- Training and Testing EN ---\n") FOut.write("\n") FOut.write(" Prop_Id" + " Err_MAD" + " Err_RMSD" + " Time_Train" + " Time_Test" + "\n") for Indi in range(13): (Err_MAD, Err_RMSD, Time_Train, Time_Test) = \ fit.fit_EN(Train_X, Train_Y[Indi], Test_X, Test_Y[Indi], Predefined_Split[0]) FOut.write("{:8}{:14.8f}{:14.8f}{:11.2f}{:11.2f}\n".format( Indi, Err_MAD, Err_RMSD, Time_Train, Time_Test)) FOut.close() # 6. Training and Testing - RF FOut.write("\n") FOut.write("--- Training and Testing RF ---\n") FOut.write("\n") FOut.write(" Prop_Id" + " Err_MAD" + " Err_RMSD" + " Time_Train" + " Time_Test" + "\n") for Indi in range(13): (Err_MAD, Err_RMSD, Time_Train, Time_Test) = \ fit.fit_RF(Train_X, Train_Y[Indi], Test_X, Test_Y[Indi], Predefined_Split[0]) FOut.write("{:8}{:14.8f}{:14.8f}{:11.2f}{:11.2f}\n".format( Indi, Err_MAD, Err_RMSD, Time_Train, Time_Test)) FOut.close()
import folding import fitElasticNet fOut = open("CM-EN.txt", "w") # 1. CM vector parsing CM_Path = "SupplementaryMaterials/CM" time0 = time.time() CM_vec = featureCM.parse_feature_CM(CM_Path) # print(CM_vec[1]) time1 = time.time() fOut.write("Execuation time 01 - CM vector parsing: %s sec.\n" % (time1 - time0)) # 2. Property parsing QM9_Prop, QM9_Index = prop.parse_prop( "SupplementaryMaterials/qm9-mol-info-standardized-v1") time2 = time.time() fOut.write("Execuation time 02 - Property parsing: %s sec.\n" % (time2 - time1)) # 3. Fold parsing Predefined_Split, Train_Index, Test_Index = folding.predef_Fold() time3 = time.time() fOut.write("Execuation time 03 - Fold parsing: %s sec. \n" % (time3 - time2)) # 4. Reindex to training and testing set Train_X = np.zeros([len(Train_Index[0]), CM_vec.shape[1]]) Train_Y = np.zeros([13, len(Train_Index[0])]) Test_X = np.zeros([len(Test_Index[0]), CM_vec.shape[1]]) Test_Y = np.zeros([13, len(Test_Index[0])]) Predict_Y = np.zeros([13, len(Test_Index[0])])