def readfromtxt(mol, txt): # print('!!!!', filename) globs = globalvars() en_dict = globs.endict() mol.graph = [] for line in txt: line_split = line.split() if len(line_split) == 4 and line_split[0]: # this looks for unique atom IDs in files lm = re.search(r'\d+$', line_split[0]) # if the string ends in digits m will be a Match object, or None otherwise. if lm is not None: symb = re.sub('\d+', '', line_split[0]) # number = lm.group() # # print('sym and number ' +str(symb) + ' ' + str(number)) # globs = globalvars() atom = atom3D(symb, [ float(line_split[1]), float(line_split[2]), float(line_split[3]) ], name=line_split[0]) elif line_split[0] in list(en_dict.keys()): atom = atom3D(line_split[0], [ float(line_split[1]), float(line_split[2]), float(line_split[3]) ]) else: print('cannot find atom type') sys.exit() mol.addAtom(atom) return mol
def substplaceff_mode3(core, substr, substreact, compreact, cpoint, args, connected, frozenats): enc = 0 # align substrate according to connection atom and shadow atom substr.alignmol(substr.getAtom(substreact), atom3D('H', cpoint)) # perform rotations Bcoords = core.getAtomCoords(compreact) adjsidx = substr.getBondedAtoms(substreact)[0] if substr.natoms > 1: # align ligand center of symmetry substr = align_lig_centersym(Bcoords, substr, substreact, core, False) if substr.natoms > 2: # check for linear molecule and align substr = check_rotate_linear_lig(Bcoords, substr, substreact) # check for symmetric molecule substr = check_rotate_symm_lig(Bcoords, substr, substreact, core) # rotate around M-L axis to minimize steric repulsion substr = rotate_MLaxis_minimize_steric( Bcoords, substr, substreact, core) # distort substrate molecule adjsidx = substr.getBondedAtoms(substreact)[0] XYBL = XYcoeff*(substr.getAtom(substreact).rad + substr.getAtom(adjsidx).rad) substr.BCM(adjsidx, substreact, XYBL) # combine molecules ts3D = mol3D() ts3D.copymol3D(core) ts3D = ts3D.combine(substr) ts3D.charge += substr.charge if 'a' in args.ffoption or args.substplaceff: ts3D, enc = ffopt(args.ff, ts3D, connected, 1, frozenats, False, [], 'Adaptive') return ts3D, enc
def GetConf(mol, args, catoms=[]): # Create a mol3D copy with a dummy metal metal Conf3D = mol3D() Conf3D.copymol3D(mol) Conf3D.addAtom(atom3D('Fe', [0, 0, 0])) #Add dummy metal to the mol3D dummy_metal = openbabel.OBAtom() #And add the dummy metal to the OBmol dummy_metal.SetAtomicNum(26) Conf3D.OBMol.AddAtom(dummy_metal) for i in catoms: Conf3D.OBMol.AddBond(i + 1, Conf3D.OBMol.NumAtoms(), 1) natoms = Conf3D.natoms Conf3D.createMolecularGraph() shape = findshape(args, mol) LB, UB = GetBoundsMatrices(Conf3D, natoms, catoms, shape) status = False while not status: D = Metrize(LB, UB, natoms) D0, status = GetCMDists(D, natoms) G = GetMetricMatrix(D, D0, natoms) L, V = Get3Eigs(G, natoms) X = np.dot(V, L) # get projection x = np.reshape(X, 3 * natoms) res1 = optimize.fmin_cg(DistErr, x, fprime=DistErrGrad, gtol=0.1, args=(LB, UB, natoms), disp=0) X = np.reshape(res1, (natoms, 3)) Conf3D = SaveConf(X, Conf3D, True, catoms) return Conf3D
def copymol3D(self, mol0): # copy atoms for i, atom0 in enumerate(mol0.atoms): self.addAtom(atom3D(atom0.sym, atom0.coords())) if atom0.frozen: self.getAtom(i).frozen = True # copy other attributes self.cat = mol0.cat self.charge = mol0.charge self.denticity = mol0.denticity self.ident = mol0.ident self.ffopt = mol0.ffopt self.OBMol = mol0.OBMol
def convert2mol3D(self): # initialize again self.initialize() # get elements dictionary elem = globalvars().elementsbynum() # loop over atoms for atom in openbabel.OBMolAtomIter(self.OBMol): # get coordinates pos = [atom.GetX(), atom.GetY(), atom.GetZ()] # get atomic symbol sym = elem[atom.GetAtomicNum() - 1] # add atom to molecule self.addAtom(atom3D(sym, [pos[0], pos[1], pos[2]]))
def GetConf(mol, args, catoms=[]): """Uses distance geometry to get a random conformer. Parameters ---------- mol : mol3D mol3D class instance for molecule of interest. args : Namespace Namespace argument from inparse. catoms : list, optional List of connection atoms used to generate additional constraints if specified (see GetBoundsMatrices()). Default is empty. Returns ------- Conf3D : mol3D mol3D class instance of new conformer. """ # Create a mol3D copy with a dummy metal metal Conf3D = mol3D() Conf3D.copymol3D(mol) Conf3D.addAtom(atom3D('Fe', [0, 0, 0])) #Add dummy metal to the mol3D dummy_metal = openbabel.OBAtom() #And add the dummy metal to the OBmol dummy_metal.SetAtomicNum(26) Conf3D.OBMol.AddAtom(dummy_metal) for i in catoms: Conf3D.OBMol.AddBond(i + 1, Conf3D.OBMol.NumAtoms(), 1) natoms = Conf3D.natoms Conf3D.createMolecularGraph() shape = findshape(args, mol) LB, UB = GetBoundsMatrices(Conf3D, natoms, catoms, shape) status = False while not status: D = Metrize(LB, UB, natoms) D0, status = GetCMDists(D, natoms) G = GetMetricMatrix(D, D0, natoms) L, V = Get3Eigs(G, natoms) X = np.dot(V, L) # get projection x = np.reshape(X, 3 * natoms) res1 = optimize.fmin_cg(DistErr, x, fprime=DistErrGrad, gtol=0.1, args=(LB, UB, natoms), disp=0) X = np.reshape(res1, (natoms, 3)) Conf3D = SaveConf(X, Conf3D, True, catoms) return Conf3D
def getconnections(core, catom, Midx, BL, ABXang): Ocoords = core.getAtom(catom).coords() Mcoords = core.getAtom(Midx).coords() backbcoords = alignPtoaxis(Ocoords, Ocoords, vecdiff(Ocoords, Mcoords), BL) am = atom3D('C', backbcoords) connPts = [] for iphi in range(1, 359, 10): for itheta in range(1, 179, 1): P = PointTranslateSph(Ocoords, backbcoords, [BL, iphi, itheta]) am.setcoords(P) ang = 180-vecangle(vecdiff(Ocoords, Mcoords), vecdiff(P, Ocoords)) if abs(ang - ABXang) < 1: connPts.append(P) return connPts
def pad_mol(mol, target_atoms): ## adds placeholder atoms ## with zero nuclear charge ## located at the origin ## in order to get consistent size ## coulomb matrix this_natoms = mol.natoms blank_atom = atom3D(Sym='X') # placeholder type blank_atom.frozen = False safe_stop = False counter = 0 while this_natoms < target_atoms and not safe_stop: mol.addAtom(blank_atom) this_natoms = mol.natoms counter += 1 if counter > target_atoms: safe_stop = True print('error padding mol') return mol
def readfromxyz(self, filename): globs = globalvars() en_dict = globs.endict() self.graph = [] fname = filename.split('.xyz')[0] f = open(fname + '.xyz', 'r') s = f.read().splitlines() f.close() # for line in s[2:]: # l = filter(None,line.split(None)) # if len(l) > 3: # atom = atom3D(l[0],[float(l[1]),float(l[2]),float(l[3])]) # self.addAtom(atom) for line in s: line_split = line.split() if len(line_split) == 4 and line_split[0] in en_dict: l = filter(None, line.split(None)) if len(l) > 3: atom = atom3D( l[0], [float(l[1]), float(l[2]), float(l[3])]) self.addAtom(atom)
def tf_ANN_preproc(args, ligs, occs, dents, batslist, tcats, licores): # prepares and runs ANN calculation current_time = time.time() start_time = current_time last_time = current_time ###################### ANN_reason = {} ANN_attributes = {} ###################### r = 0 emsg = list() valid = True catalysis = False metal = args.core this_metal = metal.lower() if len(this_metal) > 2: this_metal = this_metal[0:2] newligs = [] newcats = [] newdents = [] newoccs = [] newdecs = [False] * 6 newdec_inds = [[]] * 6 ANN_trust = False count = -1 for i, lig in enumerate(ligs): this_occ = occs[i] if args.debug: print(('working on lig: ' + str(lig))) print(('occ is ' + str(this_occ))) for j in range(0, int(this_occ)): count += 1 newligs.append(lig) newdents.append(dents[i]) newcats.append(tcats[i]) newoccs.append(1) if args.decoration: newdecs[count] = (args.decoration[i]) newdec_inds[count] = (args.decoration_index[i]) ligs = newligs dents = newdents tcats = newcats occs = newoccs if args.debug: print('tf_nn has finisihed prepping ligands') if not args.geometry == "oct": emsg.append( "[ANN] Geometry is not supported at this time, MUST give -geometry = oct" ) valid = False ANN_reason = 'geometry not oct' if not args.oxstate: emsg.append("\n oxidation state must be given") valid = False ANN_reason = 'oxstate not given' if valid: oxidation_state = args.oxstate valid, oxidation_state = check_metal(this_metal, oxidation_state) if int(oxidation_state) in [3, 4, 5]: catalytic_moieties = ['oxo', 'x', 'hydroxyl', '[O--]', '[OH-]'] if args.debug: print(('the ligands are', ligs)) print((set(ligs).intersection(set(catalytic_moieties)))) if len(set(ligs).intersection(set(catalytic_moieties))) > 0: catalysis = True # generate key in descriptor space ox = int(oxidation_state) spin = args.spin if args.debug: print(('metal is ' + str(this_metal))) print(('metal validity', valid)) if not valid and not catalysis: emsg.append("\n Oxidation state not available for this metal") ANN_reason = 'ox state not available for metal' if valid: high_spin, spin_ops = spin_classify(this_metal, spin, ox) if not valid and not catalysis: emsg.append("\n this spin state not available for this metal") ANN_reason = 'spin state not available for metal' if emsg: print((str(" ".join(["ANN messages:"] + [str(i) for i in emsg])))) current_time = time.time() metal_check_time = current_time - last_time last_time = current_time if args.debug: print(('checking metal/ox took ' + "{0:.2f}".format(metal_check_time) + ' seconds')) if valid or catalysis: (valid, axial_ligs, equitorial_ligs, ax_dent, eq_dent, ax_tcat, eq_tcat, axial_ind_list, equitorial_ind_list, ax_occs, eq_occs, pentadentate) = tf_check_ligands(ligs, batslist, dents, tcats, occs, args.debug) if args.debug: print(("ligand validity is " + str(valid))) print(('Occs', occs)) print(('Ligands', ligs)) print(('Dents', dents)) print(('Bats (backbone atoms)', batslist)) print(('lig validity', valid)) print(('ax ligs', axial_ligs)) print(('eq ligs', equitorial_ligs)) print(('spin is', spin)) if catalysis: valid = False if (not valid) and (not catalysis): ANN_reason = 'found incorrect ligand symmetry' elif not valid and catalysis: if args.debug: print('tf_nn detects catalytic') ANN_reason = 'catalytic structure presented' # placeholder for metal metal_mol = mol3D() metal_mol.addAtom(atom3D(metal)) net_lig_charge = 0 if valid or catalysis: if args.debug: print('loading axial ligands') ax_ligands_list = list() eq_ligands_list = list() for ii, axl in enumerate(axial_ligs): ax_lig3D, r_emsg = lig_load(axl, licores) # load ligand net_lig_charge += ax_lig3D.charge if r_emsg: emsg += r_emsg if ax_tcat: ax_lig3D.cat = ax_tcat if args.debug: print(('custom ax connect atom given (0-ind) ' + str(ax_tcat))) if pentadentate and len(ax_lig3D.cat) > 1: ax_lig3D.cat = [ax_lig3D.cat[-1]] this_lig = ligand(mol3D(), [], ax_dent) this_lig.mol = ax_lig3D # check decoration index if newdecs: if newdecs[axial_ind_list[ii]]: print(('decorating ' + str(axl) + ' with ' + str(newdecs[axial_ind_list[ii]]) + ' at sites ' + str(newdec_inds[axial_ind_list[ii]]))) ax_lig3D = decorate_ligand(args, axl, newdecs[axial_ind_list[ii]], newdec_inds[axial_ind_list[ii]]) ax_lig3D.convert2mol3D() # mol3D representation of ligand for jj in range(0, ax_occs[ii]): ax_ligands_list.append(this_lig) print(('Obtained the net ligand charge, which is... ', net_lig_charge)) if args.debug: print('ax_ligands_list:') print(ax_ligands_list) print([h.mol.cat for h in ax_ligands_list]) if args.debug: print(('loading equitorial ligands ' + str(equitorial_ligs))) for ii, eql in enumerate(equitorial_ligs): eq_lig3D, r_emsg = lig_load(eql, licores) # load ligand net_lig_charge += eq_lig3D.charge if r_emsg: emsg += r_emsg if eq_tcat: eq_lig3D.cat = eq_tcat if args.debug: print(('custom eq connect atom given (0-ind) ' + str(eq_tcat))) if pentadentate and len(eq_lig3D.cat) > 1: eq_lig3D.cat = eq_lig3D.cat[0:4] if newdecs: if args.debug: print(('newdecs' + str(newdecs))) print( ('equitorial_ind_list is ' + str(equitorial_ind_list))) c = 0 if newdecs[equitorial_ind_list[ii]]: if args.debug: print(('decorating ' + str(eql) + ' with ' + str(newdecs[equitorial_ind_list[ii]]) + ' at sites ' + str(newdec_inds[equitorial_ind_list[ii]]))) eq_lig3D = decorate_ligand( args, eql, newdecs[equitorial_ind_list[ii]], newdec_inds[equitorial_ind_list[ii]]) c += 1 eq_lig3D.convert2mol3D() # mol3D representation of ligand this_lig = ligand(mol3D(), [], eq_dent) this_lig.mol = eq_lig3D for jj in range(0, eq_occs[ii]): eq_ligands_list.append(this_lig) if args.debug: print('eq_ligands_list:') print(eq_ligands_list) current_time = time.time() ligand_check_time = current_time - last_time last_time = current_time print(('checking ligs took ' + "{0:.2f}".format(ligand_check_time) + ' seconds')) print( ('writing copies of ligands as used in ANN to currrent dir : ' + os.getcwd())) for kk, l in enumerate(ax_ligands_list): l.mol.writexyz('axlig-' + str(kk) + '.xyz') for kk, l in enumerate(eq_ligands_list): l.mol.writexyz('eqlig-' + str(kk) + '.xyz') # make description of complex custom_ligand_dict = { "eq_ligand_list": eq_ligands_list, "ax_ligand_list": ax_ligands_list, "eq_con_int_list": [h.mol.cat for h in eq_ligands_list], "ax_con_int_list": [h.mol.cat for h in ax_ligands_list] } ox_modifier = {metal: ox} this_complex = assemble_connectivity_from_parts( metal_mol, custom_ligand_dict) if args.debug: print('custom_ligand_dict is : ') print(custom_ligand_dict) if args.debug: print(('finished checking ligands, valid is ' + str(valid))) print('assembling RAC custom ligand configuration dictionary') if valid: # =====Classifiers:===== _descriptor_names = ["oxstate", "spinmult", "charge_lig"] _descriptors = [ox, spin, net_lig_charge] descriptor_names, descriptors = get_descriptor_vector( this_complex, custom_ligand_dict, ox_modifier) descriptor_names = _descriptor_names + descriptor_names descriptors = _descriptors + descriptors flag_oct, geo_lse = ANN_supervisor("geo_static_clf", descriptors, descriptor_names, debug=args.debug) # Test for scikit-learn models # flag_oct, geo_lse = sklearn_supervisor("geo_static_clf", descriptors, descriptor_names, debug=False) sc_pred, sc_lse = ANN_supervisor("sc_static_clf", descriptors, descriptor_names, debug=args.debug) ANN_attributes.update({ "geo_label": 0 if flag_oct[0, 0] <= 0.5 else 1, "geo_prob": flag_oct[0, 0], "geo_LSE": geo_lse[0], "geo_label_trust": lse_trust(geo_lse), "sc_label": 0 if sc_pred[0, 0] <= 0.5 else 1, "sc_prob": sc_pred[0, 0], "sc_LSE": sc_lse[0], "sc_label_trust": lse_trust(sc_lse) }) # build RACs without geo con_mat = this_complex.graph descriptor_names, descriptors = get_descriptor_vector( this_complex, custom_ligand_dict, ox_modifier) # get one-hot-encoding (OHE) descriptor_names, descriptors = create_OHE(descriptor_names, descriptors, metal, oxidation_state) # get alpha alpha = 0.2 # default for B3LYP if args.exchange: try: if float(args.exchange) > 1: alpha = float(args.exchange) / 100 # if given as % elif float(args.exchange) <= 1: alpha = float(args.exchange) except: print('cannot cast exchange argument as a float, using 20%') descriptor_names += ['alpha'] descriptors += [alpha] descriptor_names += ['ox'] descriptors += [ox] descriptor_names += ['spin'] descriptors += [spin] if args.debug: current_time = time.time() rac_check_time = current_time - last_time last_time = current_time print(('getting RACs took ' + "{0:.2f}".format(rac_check_time) + ' seconds')) # get spin splitting: split, latent_split = ANN_supervisor('split', descriptors, descriptor_names, args.debug) if args.debug: current_time = time.time() split_ANN_time = current_time - last_time last_time = current_time print(('split ANN took ' + "{0:.2f}".format(split_ANN_time) + ' seconds')) # get bond lengths: if oxidation_state == '2': r_ls, latent_r_ls = ANN_supervisor('ls_ii', descriptors, descriptor_names, args.debug) r_hs, latent_r_hs = ANN_supervisor('hs_ii', descriptors, descriptor_names, args.debug) elif oxidation_state == '3': r_ls, latent_r_ls = ANN_supervisor('ls_iii', descriptors, descriptor_names, args.debug) r_hs, latent_r_hs = ANN_supervisor('hs_iii', descriptors, descriptor_names, args.debug) if not high_spin: r = r_ls[0] else: r = r_hs[0] if args.debug: current_time = time.time() GEO_ANN_time = current_time - last_time last_time = current_time print(('GEO ANN took ' + "{0:.2f}".format(GEO_ANN_time) + ' seconds')) h**o, latent_homo = ANN_supervisor('h**o', descriptors, descriptor_names, args.debug) if args.debug: current_time = time.time() homo_ANN_time = current_time - last_time last_time = current_time print(('h**o ANN took ' + "{0:.2f}".format(homo_ANN_time) + ' seconds')) gap, latent_gap = ANN_supervisor('gap', descriptors, descriptor_names, args.debug) if args.debug: current_time = time.time() gap_ANN_time = current_time - last_time last_time = current_time print(('gap ANN took ' + "{0:.2f}".format(gap_ANN_time) + ' seconds')) # get minimum distance to train (for splitting) split_dist = find_true_min_eu_dist("split", descriptors, descriptor_names) if args.debug: current_time = time.time() min_dist_time = current_time - last_time last_time = current_time print(('min dist took ' + "{0:.2f}".format(min_dist_time) + ' seconds')) homo_dist = find_true_min_eu_dist("h**o", descriptors, descriptor_names) homo_dist = find_ANN_latent_dist("h**o", latent_homo, args.debug) if args.debug: current_time = time.time() min_dist_time = current_time - last_time last_time = current_time print(('min H**O dist took ' + "{0:.2f}".format(min_dist_time) + ' seconds')) gap_dist = find_true_min_eu_dist("gap", descriptors, descriptor_names) gap_dist = find_ANN_latent_dist("gap", latent_gap, args.debug) if args.debug: current_time = time.time() min_dist_time = current_time - last_time last_time = current_time print(('min GAP dist took ' + "{0:.2f}".format(min_dist_time) + ' seconds')) # save attributes for return ANN_attributes.update({'split': split[0][0]}) ANN_attributes.update({'split_dist': split_dist}) ANN_attributes.update({'This spin': spin}) if split[0][0] < 0 and (abs(split[0]) > 5): ANN_attributes.update({'ANN_ground_state': spin_ops[1]}) elif split[0][0] > 0 and (abs(split[0]) > 5): ANN_attributes.update({'ANN_ground_state': spin_ops[0]}) else: ANN_attributes.update( {'ANN_ground_state': 'dgen ' + str(spin_ops)}) ANN_attributes.update({'h**o': h**o[0][0]}) ANN_attributes.update({'gap': gap[0][0]}) ANN_attributes.update({'homo_dist': homo_dist}) ANN_attributes.update({'gap_dist': gap_dist}) # now that we have bond predictions, we need to map these # back to a length of equal size as the original ligand request # in order for molSimplify to understand if ANN_bondl = len(ligs) * [False] added = 0 for ii, eql in enumerate(equitorial_ind_list): for jj in range(0, eq_occs[ii]): ANN_bondl[added] = r[2] added += 1 for ii, axl in enumerate(axial_ind_list): if args.debug: print((ii, axl, added, ax_occs)) for jj in range(0, ax_occs[ii]): if args.debug: print((jj, axl, added, r[ii])) ANN_bondl[added] = r[ii] added += 1 ANN_attributes.update({'ANN_bondl': 4 * [r[2]] + [r[0], r[1]]}) HOMO_ANN_trust = 'not set' HOMO_ANN_trust_message = "" # Not quite sure if this should be divided by 3 or not, since RAC-155 descriptors if float(homo_dist) < 3: HOMO_ANN_trust_message = 'ANN results should be trustworthy for this complex ' HOMO_ANN_trust = 'high' elif float(homo_dist) < 5: HOMO_ANN_trust_message = 'ANN results are probably useful for this complex ' HOMO_ANN_trust = 'medium' elif float(homo_dist) <= 10: HOMO_ANN_trust_message = 'ANN results are fairly far from training data, be cautious ' HOMO_ANN_trust = 'low' elif float(homo_dist) > 10: HOMO_ANN_trust_message = 'ANN results are too far from training data, be cautious ' HOMO_ANN_trust = 'very low' ANN_attributes.update({'homo_trust': HOMO_ANN_trust}) ANN_attributes.update({'gap_trust': HOMO_ANN_trust}) ANN_trust = 'not set' ANN_trust_message = "" if float(split_dist / 3) < 0.25: ANN_trust_message = 'ANN results should be trustworthy for this complex ' ANN_trust = 'high' elif float(split_dist / 3) < 0.75: ANN_trust_message = 'ANN results are probably useful for this complex ' ANN_trust = 'medium' elif float(split_dist / 3) < 1.0: ANN_trust_message = 'ANN results are fairly far from training data, be cautious ' ANN_trust = 'low' elif float(split_dist / 3) > 1.0: ANN_trust_message = 'ANN results are too far from training data, be cautious ' ANN_trust = 'very low' ANN_attributes.update({'split_trust': ANN_trust}) # print text to std out print( "******************************************************************" ) print( "************** ANN is engaged and advising on spin ***************" ) print( "************** and metal-ligand bond distances ****************" ) print( "******************************************************************" ) if high_spin: print(('You have selected a high-spin state, s = ' + str(spin))) else: print(('You have selected a low-spin state, s = ' + str(spin))) # report to stdout if split[0] < 0 and not high_spin: if abs(split[0]) > 5: print( 'warning, ANN predicts a high spin ground state for this complex' ) else: print( 'warning, ANN predicts a near degenerate ground state for this complex' ) elif split[0] >= 0 and high_spin: if abs(split[0]) > 5: print( 'warning, ANN predicts a low spin ground state for this complex' ) else: print( 'warning, ANN predicts a near degenerate ground state for this complex' ) print(('delta is', split[0], ' spin is ', high_spin)) print(("ANN predicts a spin splitting (HS - LS) of " + "{0:.2f}".format(float(split[0])) + ' kcal/mol at ' + "{0:.0f}".format(100 * alpha) + '% HFX')) print(('ANN low spin bond length (ax1/ax2/eq) is predicted to be: ' + " /".join(["{0:.2f}".format(float(i)) for i in r_ls[0]]) + ' angstrom')) print(('ANN high spin bond length (ax1/ax2/eq) is predicted to be: ' + " /".join(["{0:.2f}".format(float(i)) for i in r_hs[0]]) + ' angstrom')) print(('distance to splitting energy training data is ' + "{0:.2f}".format(split_dist))) print(ANN_trust_message) print(("ANN predicts a H**O value of " + "{0:.2f}".format(float(h**o[0])) + ' eV at ' + "{0:.0f}".format(100 * alpha) + '% HFX')) print(("ANN predicts a LUMO-H**O energetic gap value of " + "{0:.2f}".format(float(gap[0])) + ' eV at ' + "{0:.0f}".format(100 * alpha) + '% HFX')) print(HOMO_ANN_trust_message) print(('distance to H**O training data is ' + "{0:.2f}".format(homo_dist))) print( ('distance to GAP training data is ' + "{0:.2f}".format(gap_dist))) print( "*******************************************************************" ) print( "************** ANN complete, saved in record file *****************" ) print( "*******************************************************************" ) from keras import backend as K # This is done to get rid of the attribute error that is a bug in tensorflow. K.clear_session() current_time = time.time() total_ANN_time = current_time - start_time last_time = current_time print(('Total ML functions took ' + "{0:.2f}".format(total_ANN_time) + ' seconds')) if catalysis: print('-----In Catalysis Mode-----') # build RACs without geo con_mat = this_complex.graph descriptor_names, descriptors = get_descriptor_vector( this_complex, custom_ligand_dict, ox_modifier) # get alpha alpha = 20 # default for B3LYP if args.exchange: try: if float(args.exchange) < 1: alpha = float(args.exchange) * 100 # if given as % elif float(args.exchange) >= 1: alpha = float(args.exchange) except: print('cannot case exchange argument as a float, using 20%') descriptor_names += ['alpha', 'ox', 'spin', 'charge_lig'] descriptors += [alpha, ox, spin, net_lig_charge] if args.debug: current_time = time.time() rac_check_time = current_time - last_time last_time = current_time print(('getting RACs took ' + "{0:.2f}".format(rac_check_time) + ' seconds')) oxo, latent_oxo = ANN_supervisor('oxo', descriptors, descriptor_names, args.debug) if args.debug: current_time = time.time() split_ANN_time = current_time - last_time last_time = current_time oxo_dist, avg_10_NN_dist, avg_traintrain = find_ANN_10_NN_normalized_latent_dist( "oxo", latent_oxo, args.debug) if args.debug: current_time = time.time() min_dist_time = current_time - last_time last_time = current_time print(('min oxo dist took ' + "{0:.2f}".format(min_dist_time) + ' seconds')) ANN_attributes.update({'oxo': oxo[0][0]}) ANN_attributes.update({'oxo_dist': oxo_dist}) hat, latent_hat = ANN_supervisor('hat', descriptors, descriptor_names, args.debug) if args.debug: current_time = time.time() split_ANN_time = current_time - last_time last_time = current_time print(('HAT ANN took ' + "{0:.2f}".format(split_ANN_time) + ' seconds')) hat_dist, avg_10_NN_dist, avg_traintrain = find_ANN_10_NN_normalized_latent_dist( "hat", latent_hat, args.debug) if args.debug: current_time = time.time() min_dist_time = current_time - last_time last_time = current_time print(('min hat dist took ' + "{0:.2f}".format(min_dist_time) + ' seconds')) ANN_attributes.update({'hat': hat[0][0]}) ANN_attributes.update({'hat_dist': hat_dist}) ########## for Oxo and H**O optimization ########## oxo20, latent_oxo20 = ANN_supervisor('oxo20', descriptors, descriptor_names, args.debug) if args.debug: current_time = time.time() oxo20_ANN_time = current_time - last_time last_time = current_time print(('oxo20 ANN took ' + "{0:.2f}".format(oxo20_ANN_time) + ' seconds')) # oxo20_dist = find_ANN_latent_dist("oxo20", latent_oxo20, args.debug) oxo20_dist, avg_10_NN_dist, avg_traintrain = find_ANN_10_NN_normalized_latent_dist( "oxo20", latent_oxo20, args.debug) if args.debug: current_time = time.time() min_dist_time = current_time - last_time last_time = current_time print(('min oxo20 dist took ' + "{0:.2f}".format(min_dist_time) + ' seconds')) ANN_attributes.update({'oxo20': oxo20[0][0]}) ANN_attributes.update({'oxo20_dist': oxo20_dist}) # _ = find_ANN_latent_dist("oxo20", latent_oxo20, args.debug) # _ = find_true_min_eu_dist("oxo20", descriptors, descriptor_names, latent_space_vector=latent_oxo20) homo_empty, latent_homo_empty = ANN_supervisor('homo_empty', descriptors, descriptor_names, args.debug) if args.debug: current_time = time.time() homo_empty_ANN_time = current_time - last_time last_time = current_time print(('homo_empty ANN took ' + "{0:.2f}".format(homo_empty_ANN_time) + ' seconds')) # homo_empty_dist = find_ANN_latent_dist("homo_empty", latent_homo_empty, args.debug) homo_empty_dist, avg_10_NN_dist, avg_traintrain = find_ANN_10_NN_normalized_latent_dist( "homo_empty", latent_homo_empty, args.debug) if args.debug: current_time = time.time() min_dist_time = current_time - last_time last_time = current_time print(('min homo_empty dist took ' + "{0:.2f}".format(min_dist_time) + ' seconds')) ANN_attributes.update({'homo_empty': homo_empty[0][0]}) ANN_attributes.update({'homo_empty_dist': homo_empty_dist}) # _ = find_ANN_latent_dist("homo_empty", latent_homo_empty, args.debug) # _ = find_true_min_eu_dist("homo_empty", descriptors, descriptor_names, latent_space_vector=latent_homo_empty) Oxo20_ANN_trust = 'not set' Oxo20_ANN_trust_message = "" # Not quite sure if this should be divided by 3 or not, since RAC-155 descriptors if float(oxo20_dist) < 0.75: Oxo20_ANN_trust_message = 'Oxo20 ANN results should be trustworthy for this complex ' Oxo20_ANN_trust = 'high' elif float(oxo20_dist) < 1: Oxo20_ANN_trust_message = 'Oxo20 ANN results are probably useful for this complex ' Oxo20_ANN_trust = 'medium' elif float(oxo20_dist) <= 1.25: Oxo20_ANN_trust_message = 'Oxo20 ANN results are fairly far from training data, be cautious ' Oxo20_ANN_trust = 'low' elif float(oxo20_dist) > 1.25: Oxo20_ANN_trust_message = 'Oxo20 ANN results are too far from training data, be cautious ' Oxo20_ANN_trust = 'very low' ANN_attributes.update({'oxo20_trust': Oxo20_ANN_trust}) homo_empty_ANN_trust = 'not set' homo_empty_ANN_trust_message = "" # Not quite sure if this should be divided by 3 or not, since RAC-155 descriptors if float(homo_empty_dist) < 0.75: homo_empty_ANN_trust_message = 'homo_empty ANN results should be trustworthy for this complex ' homo_empty_ANN_trust = 'high' elif float(homo_empty_dist) < 1: homo_empty_ANN_trust_message = 'homo_empty ANN results are probably useful for this complex ' homo_empty_ANN_trust = 'medium' elif float(homo_empty_dist) <= 1.25: homo_empty_ANN_trust_message = 'homo_empty ANN results are fairly far from training data, be cautious ' homo_empty_ANN_trust = 'low' elif float(homo_empty_dist) > 1.25: homo_empty_ANN_trust_message = 'homo_empty ANN results are too far from training data, be cautious ' homo_empty_ANN_trust = 'very low' ANN_attributes.update({'homo_empty_trust': homo_empty_ANN_trust}) #################################################### Oxo_ANN_trust = 'not set' Oxo_ANN_trust_message = "" # Not quite sure if this should be divided by 3 or not, since RAC-155 descriptors if float(oxo_dist) < 3: Oxo_ANN_trust_message = 'Oxo ANN results should be trustworthy for this complex ' Oxo_ANN_trust = 'high' elif float(oxo_dist) < 5: Oxo_ANN_trust_message = 'Oxo ANN results are probably useful for this complex ' Oxo_ANN_trust = 'medium' elif float(oxo_dist) <= 10: Oxo_ANN_trust_message = 'Oxo ANN results are fairly far from training data, be cautious ' Oxo_ANN_trust = 'low' elif float(oxo_dist) > 10: Oxo_ANN_trust_message = 'Oxo ANN results are too far from training data, be cautious ' Oxo_ANN_trust = 'very low' ANN_attributes.update({'oxo_trust': Oxo_ANN_trust}) HAT_ANN_trust = 'not set' HAT_ANN_trust_message = "" # Not quite sure if this should be divided by 3 or not, since RAC-155 descriptors if float(hat_dist) < 3: HAT_ANN_trust_message = 'HAT ANN results should be trustworthy for this complex ' HAT_ANN_trust = 'high' elif float(hat_dist) < 5: HAT_ANN_trust_message = 'HAT ANN results are probably useful for this complex ' HAT_ANN_trust = 'medium' elif float(hat_dist) <= 10: HAT_ANN_trust_message = 'HAT ANN results are fairly far from training data, be cautious ' HAT_ANN_trust = 'low' elif float(hat_dist) > 10: HAT_ANN_trust_message = 'HAT ANN results are too far from training data, be cautious ' HAT_ANN_trust = 'very low' ANN_attributes.update({'hat_trust': HAT_ANN_trust}) print( "*******************************************************************" ) print( "************** CATALYTIC ANN ACTIVATED! ****************" ) print( "*********** Currently advising on Oxo and HAT energies ************" ) print( "*******************************************************************" ) print(("ANN predicts a Oxo20 energy of " + "{0:.2f}".format(float(oxo20[0])) + ' kcal/mol at ' + "{0:.2f}".format(alpha) + '% HFX')) print(Oxo20_ANN_trust_message) print(('Distance to Oxo20 training data in the latent space is ' + "{0:.2f}".format(oxo20_dist))) print(("ANN predicts a empty site beta H**O level of " + "{0:.2f}".format(float(homo_empty[0])) + ' eV at ' + "{0:.2f}".format(alpha) + '% HFX')) print(homo_empty_ANN_trust_message) print(( 'Distance to empty site beta H**O level training data in the latent space is ' + "{0:.2f}".format(homo_empty_dist))) print( '-------------------------------------------------------------------' ) print(("ANN predicts a oxo formation energy of " + "{0:.2f}".format(float(oxo[0])) + ' kcal/mol at ' + "{0:.2f}".format(alpha) + '% HFX')) print(Oxo_ANN_trust_message) print(('Distance to oxo training data in the latent space is ' + "{0:.2f}".format(oxo_dist))) print(("ANN predicts a HAT energy of " + "{0:.2f}".format(float(hat[0])) + ' kcal/mol at ' + "{0:.2f}".format(alpha) + '% HFX')) print(HAT_ANN_trust_message) print(('Distance to HAT training data in the latent space is ' + "{0:.2f}".format(hat_dist))) print( "*******************************************************************" ) print( "************** ANN complete, saved in record file *****************" ) print( "*******************************************************************" ) from keras import backend as K # This is done to get rid of the attribute error that is a bug in tensorflow. K.clear_session() if catalysis: current_time = time.time() total_ANN_time = current_time - start_time last_time = current_time print(('Total Catalysis ML functions took ' + "{0:.2f}".format(total_ANN_time) + ' seconds')) if not valid and not ANN_reason and not catalysis: ANN_reason = ' uncaught rejection (see sdout/stderr)' return valid, ANN_reason, ANN_attributes, catalysis if False: # test Euclidean norm to training data distance train_dist, best_row = find_eu_dist(nn_excitation) ANN_trust = max(0.01, 1.0 - train_dist) ANN_attributes.update({'ANN_closest_train': best_row}) print((' with closest training row ' + best_row[:-2] + ' at ' + str(best_row[-2:]) + '% HFX')) # use ANN to predict fucntional sensitivty HFX_slope = 0 HFX_slope = get_slope(slope_excitation) print(('Predicted HFX exchange sensitivity is : ' + "{0:.2f}".format(float(HFX_slope)) + ' kcal/HFX')) ANN_attributes.update({'ANN_slope': HFX_slope})
def tsgen(mode, args, rootdir, core, substr, compreact, substreact, globs): emsg = False this_diag = run_diag() strfiles = [] adjsidx = substr.getBondedAtoms(substreact)[0] adjcidx = core.getBondedAtoms(compreact)[0] # initialize connecting and frozen atoms for FF opt frozenats = [] for i in range(0, core.natoms): frozenats.append(i) # also freeze the abstracted atom and the heavy atom bonded to it frozenats.append(core.natoms+substreact) frozenats.append(core.natoms+adjsidx) connected = [core.natoms+substreact] # START FUNCTIONALIZING sanity = False if mode == 2: emsg = 'Sorry, this mode is not supported yet. Exiting...' return strfiles, emsg, this_diag elif mode == 1: # oxidative addition of a single group # get first connecting point MXBL = MXdistcoeff*(core.getAtom(compreact).rad + substr.getAtom(substreact).rad) cpoint = getconnection(core, compreact, MXBL) # distort substrate molecule XYBL = XYcoeff*(substr.getAtom(substreact).rad + substr.getAtom(adjsidx).rad) substr.BCM(adjsidx, substreact, XYBL) # align substrate molecule substr.alignmol(substr.getAtom(substreact), atom3D('H', cpoint)) tmp3D = mol3D() tmp3D.copymol3D(core) tmp3D.addAtom(atom3D('Cl', cpoint)) ligalignpts = getconnections( tmp3D, tmp3D.natoms-1, compreact, XYBL, MXYang) if args.substplaceff: # full FF substrate placement print('Full FF-based substrate placement specified.') en_min = 1e6 for n, P in enumerate(ligalignpts): print(('Evaluating FF energy of point ' + str(n+1)+' of '+str(len(ligalignpts)))) coretmp = mol3D() coretmp.copymol3D(core) substrtmp = mol3D() substrtmp.copymol3D(substr) ts3Dtmp, enc = substplaceff_mode1( coretmp, substrtmp, substreact, compreact, cpoint, P, args, connected, frozenats) if enc < en_min: en_min = enc ts3D = mol3D() ts3D.copymol3D(ts3Dtmp) else: # cheap substrate placement print('Cheap substrate placement') ligalignpt = substplacecheap(core, ligalignpts, compreact) ts3D, enc = substplaceff_mode1( core, substr, substreact, compreact, cpoint, ligalignpt, args, connected, frozenats) elif mode == 3: # abstraction # distort A-B bond ABBL = distance(core.getAtomCoords(compreact), core.getAtomCoords( adjcidx)) + 0.05*(core.getAtom(compreact).rad + core.getAtom(adjcidx).rad) core.BCM(compreact, adjcidx, ABBL) # set B-X distance BXBL = 1.1*(substr.getAtom(substreact).rad + core.getAtom(compreact).rad) # get possible connecting points connPts = getconnections(core, compreact, adjcidx, BXBL, ABXang) if args.substplaceff: # full FF substrate placement print('Full FF-based substrate placement specified.') en_min = 1e6 for n, P in enumerate(connPts): print(('Evaluating FF energy of point ' + str(n+1)+' of '+str(len(connPts)))) coretmp = mol3D() coretmp.copymol3D(core) substrtmp = mol3D() substrtmp.copymol3D(substr) ts3Dtmp, enc = substplaceff_mode3( coretmp, substrtmp, substreact, compreact, P, args, connected, frozenats) if enc < en_min: en_min = enc ts3D = mol3D() ts3D.copymol3D(ts3Dtmp) else: # cheap substrate placement print('Cheap substrate placement') cpoint = substplacecheap(core, connPts, compreact) ts3D, enc = substplaceff_mode3( core, substr, substreact, compreact, cpoint, args, connected, frozenats) if 'a' in args.ffoption: print('FF optimized remainder of substrate') ts3D.charge += substr.charge # END FUNCTIONALIZING fname = name_TS(rootdir, args.core, substr, args, bind=args.bind, bsmi=args.nambsmi) ts3D.writexyz(fname) strfiles.append(fname) getinputargs(args, fname) pfold = rootdir.split('/', 1)[-1] # check for molecule sanity sanity, d0 = ts3D.sanitycheck(True) if args.debug: print(('setting sanity diag, min dist at ' + str(d0) + ' (higher is better)')) this_diag.set_sanity(sanity, d0) this_diag.set_mol(ts3D) this_diag.write_report(fname+'.report') del ts3D if sanity: print(('WARNING: Generated complex is not good! Minimum distance between atoms:' + "{0:.2f}".format(d0)+'A\n')) print(('\nIn folder '+pfold+' generated 1 structure(s)!')) return strfiles, emsg, this_diag