def smi2cm(m): m1 = Chem.MolFromSmiles(m) m = Chem.AddHs(m1) AllChem.EmbedMolecule(m,AllChem.ETKDG()) n_atoms = m.GetNumAtoms() m1=Chem.MolToMolBlock(m) m1=m1.split() axis=[] atom_list=[] for i in range(0,n_atoms): axis.append([float(m1[13+16*i]),float(m1[14+i*16]),float(m1[15+16*i])]) atom_list.append(m1[16+16*i]) feat=CoulombMatrix() mole=(atom_list,axis) feat.fit([mole]) t=feat.transform([mole])[0] CM=t.reshape((n_atoms,n_atoms)).tolist() return CM
]) H2 = (H2_ELES, H2_COORDS) H2_FULL = (H2_ELES, H2_COORDS, H2_UNIT) HCN_ELES = ['H', 'C', 'N'] HCN_COORDS = [ [-1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], ] HCN = (HCN_ELES, HCN_COORDS) if __name__ == "__main__": # Example of fitting the Coulomb matrix and then saving it feat = CoulombMatrix() feat.fit([H2, HCN]) print("Saving Model") feat.save_json("coulomb_model.json") print("Loading Model") feat2 = load_json("coulomb_model.json") print(feat2.transform([H2, HCN])) # Example of fitting a generallized crystal with the Coulomb matrix and # then saving it input_type = ("elements", "coords", "unit_cell") radius = 4.1 feat = CoulombMatrix(input_type=input_type) crystal = GenerallizedCrystal(transformer=feat, radius=radius) feat.fit([H2_FULL])
for i in range(0,molsum): atom_nums=int(text[index]) a=text[index+2:index+atom_nums+2] name='mol'+ str(mol_no) xyz_dict[name]=a index=index+atom_nums+2 mol_no=mol_no-1 atom_list=[] axis=[] for j in range(0,len(a)): atom=a[j].split() atom_list.append(atom[0]) axis.append([float(atom[1]),float(atom[2]),float(atom[3])]) feat=CoulombMatrix() mole=(atom_list,axis) feat.fit([mole]) CM=feat.transform([mole])[0] t=CM.reshape((atom_nums,atom_nums)).tolist() CM_dict.append(t) fd.close() sio.savemat('CM.mat',{'CM':CM_dict}) else: #smi to coordination and coulomb matrix filepath=input("what's sdf filename?") #get CM def smi2cm(m): m1 = Chem.MolFromSmiles(m) m = Chem.AddHs(m1) AllChem.EmbedMolecule(m,AllChem.ETKDG())
0: {1: '1'}, 1: {0: '1'}, } H2_UNIT = numpy.array([ [2., .5, 0.], [.25, 1., 0.], [0., .3, 1.], ]) radius = 4.1 input_type = ("elements", "coords", "unit_cell") X = (H2_ELES, H2_COORDS, H2_UNIT) if __name__ == "__main__": trans = EwaldSumMatrix(input_type=input_type, G_max=3.2, L_max=2.1) res = trans.fit_transform([X]) print(res) trans = SineMatrix(input_type=input_type) res = trans.fit_transform([X]) print(res) # Example of generallized crystal # Any transformer can be used as it just expands the molecule using the # unit cell and coordinates. cm = CoulombMatrix(input_type=input_type) trans = GenerallizedCrystal(transformer=cm, radius=radius) res = trans.fit_transform([X]) print(res)
from utils import load_qm7 if __name__ == "__main__": # This is just boiler plate code to load the data Xin_train, Xin_test, y_train, y_test = load_qm7() # Change this to make the tranformations parallel # Values less than 1 will set to the number of cores the CPU has N_JOBS = 1 # Just a few examples of different features tfs = [ EncodedBond(n_jobs=N_JOBS), EncodedBond(spacing="inverse", n_jobs=N_JOBS), BagOfBonds(n_jobs=N_JOBS), CoulombMatrix(n_jobs=N_JOBS), Connectivity(depth=1, n_jobs=N_JOBS), Connectivity(depth=2, use_bond_order=True, n_jobs=N_JOBS), Connectivity(depth=3, use_coordination=True, n_jobs=N_JOBS), ] for tf in tfs: print(tf) X_train = tf.fit_transform(Xin_train) X_test = tf.transform(Xin_test) # We will not do a hyperparmeter search for simplicity clf = Ridge() clf.fit(X_train, y_train) train_error = MAE(clf.predict(X_train), y_train) test_error = MAE(clf.predict(X_test), y_test)
HCN_COORDS = [ [-1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], ] HCN_CONNS = { 0: {1: '1'}, 1: {0: '1', 2: '3'}, 2: {1: '3'}, } if __name__ == "__main__": # Example of generating the Coulomb matrix with just elements and coords # for a single example molecule. feat = CoulombMatrix() H2 = (H2_ELES, H2_COORDS) feat.fit([H2]) print("Transformed H2") print(feat.transform([H2])) print() # Example of generating the Coulomb matrix with just elements and coords # for multiple molecules. feat = CoulombMatrix() HCN = (HCN_ELES, HCN_COORDS) feat.fit([H2, HCN]) print("Transformed H2") print(feat.transform([H2])) print("H2 and HCN transformed") print(feat.transform([H2, HCN]))
[0., .3, 1.], ]) H2 = (H2_ELES, H2_COORDS) H2_FULL = (H2_ELES, H2_COORDS, H2_UNIT) HCN_ELES = ['H', 'C', 'N'] HCN_COORDS = [ [-1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], ] HCN = (HCN_ELES, HCN_COORDS) if __name__ == "__main__": # Example of fitting the Coulomb matrix and then saving it feat = CoulombMatrix() feat.fit([H2, HCN]) print("Saving Model") feat.save_json("coulomb_model.json") print("Loading Model") feat2 = load_json("coulomb_model.json") print(feat2.transform([H2, HCN])) # Example of fitting a generallized crystal with the Coulomb matrix and # then saving it input_type = ("elements", "coords", "unit_cell") radius = 4.1 feat = CoulombMatrix(input_type=input_type) crystal = GenerallizedCrystal(transformer=feat, radius=radius) feat.fit([H2_FULL])
from molml.features import CoulombMatrix feat = CoulombMatrix(input_type='list', n_jobs=1, sort=False, eigen=False, drop_values=False, only_lower_triangle=False) H2 = (['H', 'H'], [ [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], ]) HCN = (['H', 'C', 'N'], [ [-1.0, 0.0, 0.0], [0.0, 0.0, 0.0], [1.0, 0.0, 0.0], ]) feat.fit([H2, HCN]) print(feat.transform([H2])) print(feat.transform([H2, HCN])) feat2 = CoulombMatrix(input_type='filename') paths = ['data/qm7/qm-%04d.out' % i for i in range(2)] print(feat2.fit_transform(paths))