def main(**kwargs): outfn = os.path.splitext(opt['infn'])[0] + '_' + opt['suffix'] + '.mol2' success = False ### Read in .sdf file and distinguish each molecule's conformers ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(opt['infn']): oechem.OEThrow.Warning("Unable to open %s for reading" % opt['infn']) return for mol in ifs.GetOEMols(): if mol.GetTitle() == opt['title']: # write out all confs in this mol if no SD tag is specified if opt['sdtag'] == "": success = True write_conf_mol(outfn, mol) return # look for the conformer in this mol with specific SD tag value for i, conf in enumerate(mol.GetConfs()): if oechem.OEGetSDData(conf, opt['sdtag']) == opt['value']: success = True write_conf_mol(outfn, conf) if not success: print("\n** Found no confs matching your criteria. **") ifs.close()
def extractXY(fname, tag): ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(fname): oechem.OEThrow.Warning("Unable to open %s for reading" % fname) xlist = [] ylist = [] for mol in ifs.GetOEMols(): for j, conf in enumerate(mol.GetConfs()): xlist.append(int(mol.GetTitle())) try: ylist.append(float(oechem.OEGetSDData(conf, tag))) except ValueError as e: print("Missing tag data for mol {}, conf {}! Skipping.".format( mol.GetTitle(), j)) print(e) xlist.pop() ### convert to numpy array, take relative e, convert to kcal/mol ylist = np.array(ylist) ylist = ylist - ylist[0] ylist = 627.5095 * ylist return xlist, ylist
def convertSDFfile(reffile, filtfile, writeout): refifs = oechem.oemolistream() filtifs = oechem.oemolistream() ofs = oechem.oemolostream() ### Read in reference file, but don't need its old conformers if not refifs.open(reffile): oechem.OEThrow.Warning("Unable to open %s for reading" % reffile) return ### Read in filtered file and distinguish each molecule's conformers filtifs.SetConfTest( oechem.OEAbsoluteConfTest() ) if not filtifs.open(filtfile): oechem.OEThrow.Warning("Unable to open %s for reading" % filtfile) return ### Open outstream file. if os.path.exists(writeout): print("File already exists: %s. Skip getting results.\n" % (writeout)) return if not ofs.open(writeout): oechem.OEThrow.Fatal("Unable to open %s for writing" % writeout) ### Loop and write molecules. (though refifs should only have ONE mol) for rmol in refifs.GetOEMols(): for fmol in filtifs.GetOEMols(): for i, conf in enumerate( fmol.GetConfs()): rmol.SetCoords(conf.GetCoords()) oechem.OEWriteConstMolecule(ofs, rmol) refifs.close() filtifs.close() ofs.close()
def load_file(fname): ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(fname): oechem.OEThrow.Fatal("Unable to open %s for reading" % fname) mols = ifs.GetOEMols() return mols
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData) oedepict.OEConfigureReportOptions(itf) oedepict.OEConfigurePrepareDepictionOptions(itf) oedepict.OEConfigure2DMolDisplayOptions(itf) if not oechem.OEParseCommandLine(itf, argv): return 1 iname = itf.GetString("-in") ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) # VTL if not ifs.open(iname): oechem.OEThrow.Fatal("Cannot open input file!") oname = itf.GetString("-out") ext = oechem.OEGetFileExtension(oname) if ext != "pdf": oechem.OEThrow.Fatal("Output must be PDF format.") ofs = oechem.oeofstream() if not ofs.open(oname): oechem.OEThrow.Fatal("Cannot open output file!") if itf.HasString("-ringdict"): rdfname = itf.GetString("-ringdict") if not oechem.OEInit2DRingDictionary(rdfname): oechem.OEThrow.Warning("Cannot use user-defined ring dictionary!") ropts = oedepict.OEReportOptions() oedepict.OESetupReportOptions(ropts, itf) ropts.SetFooterHeight(25.0) report = oedepict.OEReport(ropts) popts = oedepict.OEPrepareDepictionOptions() oedepict.OESetupPrepareDepictionOptions(popts, itf) dopts = oedepict.OE2DMolDisplayOptions() oedepict.OESetup2DMolDisplayOptions(dopts, itf) dopts.SetDimensions(report.GetCellWidth(), report.GetCellHeight(), oedepict.OEScale_AutoScale) for mol in ifs.GetOEMols(): # VTL ignore confs; dont use GetOEGraphMols print(mol.GetTitle()) # VTL cell = report.NewCell() oedepict.OEPrepareDepiction(mol, popts) disp = oedepict.OE2DMolDisplay(mol, dopts) oedepict.OERenderMolecule(cell, disp) font = oedepict.OEFont(oedepict.OEFontFamily_Default, oedepict.OEFontStyle_Bold, 12, oedepict.OEAlignment_Center, oechem.OEBlack) for pagenum, footer in enumerate(report.GetFooters()): text = "Page %d of %d" % (pagenum + 1, report.NumPages()) oedepict.OEDrawTextToCenter(footer, text, font) oedepict.OEWriteReport(ofs, ext, report) return 0
def filter_confs(rmsdfile, tag, rmsdout): """ Read in OEMols (and each of their conformers) in 'rmsdfile'. For each molecule: rough filter conformers based on energy differences specified by 'tag', fine filter conformers based on RMSD values. Parameters ---------- rmsdfile : str Name of SDF file with conformers to be filtered tag : str SD tag name with the energy value to roughly screen conformers before RMSD Screening works by removing conformers of very similar energies, where "similar" is defined by thresE parameter. Examples: - "QM Psi4 Final Opt. Energy (Har) mp2/def-sv(p)" - "QM Psi4 Final Single Pt. Energy (Har) mp2/def-sv(p)" rmsdout : str Name of the output file with filtered conformers """ # Parameters for distinguishing cutoff of conformer similarity thresE = 5.E-4 # declare confs diff & skip RMSD comparison above this threshold thresRMSD = 0.2 # above this threshold (Angstrom), confs are "diff" minima wdir, fname = os.path.split(rmsdfile) numConfsF = open(os.path.join(os.getcwd(), "numConfs.txt"), 'a') numConfsF.write("\n{}\n".format(tag)) # Open file to be processed. rmsd_ifs = oechem.oemolistream() if not rmsd_ifs.open(rmsdfile): oechem.OEThrow.Fatal("Unable to open %s for reading" % rmsdfile) rmsd_ifs.SetConfTest(oechem.OEAbsoluteConfTest()) rmsd_molecules = rmsd_ifs.GetOEMols() # Open outstream file. rmsd_ofs = oechem.oemolostream() if os.path.exists(rmsdout): print("%s output file already exists in %s. Skip filtering.\n" % (rmsdout, os.getcwd())) return if not rmsd_ofs.open(rmsdout): oechem.OEThrow.Fatal("Unable to open %s for writing" % rmsdout) # Identify minima and write output file. for mol in rmsd_molecules: if identify_minima(mol, tag, thresE, thresRMSD): numConfsF.write("%s\t%s\n" % (mol.GetTitle(), mol.NumConfs())) oechem.OEWriteConstMolecule(rmsd_ofs, mol) else: numConfsF.write("%s\t0\n" % (mol.GetTitle())) rmsd_ifs.close() numConfsF.close() rmsd_ofs.close() print("Done filtering %s to %s.\n" % (fname, rmsdout))
def timeAvg(titles, sdfRef, method, basis, tag): """ For an SDF file with all confs of all mols, get the average runtime of all conformers for each molecule Parameters ---------- titles: dictionary (empty or not). keys = molTitles. values = [[qm1_avg, qm1_std], [qm2_avg, qm2_std] ... ] sdfRef | str | path+name of SDF file with times for all confs of all mols TODO """ # Open reference file. print("Opening SDF file %s" % sdfRef) ifsRef = oechem.oemolistream() ifsRef.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifsRef.open(sdfRef): oechem.OEThrow.Fatal("Unable to open %s for reading" % sdfRef) molsRef = ifsRef.GetOEMols() timeF = open("timeAvgs.txt", 'a') timeF.write("\nFile: {}\n".format(sdfRef)) timeF.write( "Average [{}/{}] [{}s] over all confs for each molecule\n".format( method, basis, tag)) # Grab all the times. # titles = {} # timelist = [] # stdlist = [] for rmol in molsRef: tmol = np.fromiter(pt.GetSDList(rmol, tag, 'Psi4', method, basis), dtype=np.float64) # exclude conformers for which job did not finish (nan) nanIndices = np.argwhere(np.isnan(tmol)) for i in reversed(nanIndices): # loop in reverse to delete correctly tmol = np.delete(tmol, i) meantime = np.mean(tmol) stdtime = np.std(tmol) timeF.write("%s\t%d confs\t\t%.3f +- %.3f\n" % (rmol.GetTitle(), tmol.size, meantime, stdtime)) name = rmol.GetTitle() if name not in titles: titles[name] = [] titles[name].append([meantime, stdtime]) # titles.append(rmol.GetTitle()) # timelist.append(meantime) # stdlist.append(stdtime) timeF.close() # return titles, timelist, stdlist return titles
def read_mol(infile, many=False): # many for multiple conformers, set False for single conf/mol ifs = oechem.oemolistream() if not ifs.open(infile): oechem.OEThrow.Fatal("Unable to open {} for reading".format(infile)) ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if many: return ifs.GetOEMols() mol = oechem.OEGraphMol() oechem.OEReadMolecule(ifs, mol) ifs.close() return mol
def prelim(sdfRef,spe): # Open file. ifs1 = oechem.oemolistream() ifs1.SetConfTest( oechem.OEAbsoluteConfTest() ) if not ifs1.open(sdfRef): oechem.OEThrow.Fatal("Unable to open %s for reading" % sdfRef) mols = ifs1.GetOEMols() # Determine SD tag from which to obtain energy. if spe.lower()=='true': tagword = "QM spe" else: tagword = "QM opt energy" return mols, tagword
def plotSDF(infile, tag, figname='lineplot.png'): """ Parameters ---------- """ ### Read in .sdf file and distinguish each molecule's conformers ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(infile): oechem.OEThrow.Warning("Unable to open %s for reading" % infile) return xlist = [] ylist = [] for mol in ifs.GetOEMols(): print(mol.GetTitle(), mol.NumConfs()) for j, conf in enumerate(mol.GetConfs()): try: ylist.append(float(oechem.OEGetSDData(conf, tag))) xlist.append(int(mol.GetTitle())) except ValueError as err: pass # mols not converged may not have tag ### convert to numpy array, take relative e, convert to kcal/mol ylist = np.array(ylist) ylist = ylist - ylist[0] ylist = 627.5095 * ylist ### Plot. xlabel = 'conformation number' ylabel = "Relative energy (kcal/mol)" fig = plt.figure() # ax = fig.gca() # ax.set_xticks(np.arange(-1,RefNumConfs+1,2)) plt.ylabel(ylabel, fontsize=14) plt.xlabel(xlabel, fontsize=14) plt.scatter(xlist, ylist) # plt.plot(xlist, ylist) plt.ylim(-1, 16) # plt.xticks(range(RefNumConfs),xlabs,fontsize=12) # plt.yticks(fontsize=12) plt.grid() plt.savefig(figname, bbox_inches='tight') plt.show()
def read_mols_tag(insdf, calctype): if calctype not in {'opt', 'spe'}: sys.exit("Specify a valid calculation type for {}.".format(insdf)) # Open file. ifs1 = oechem.oemolistream() ifs1.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs1.open(insdf): oechem.OEThrow.Fatal("Unable to open %s for reading" % insdf) mols = ifs1.GetOEMols() # Determine SD tag from which to obtain energy. if calctype.lower() == 'spe': tagword = "QM spe" else: tagword = "QM opt energy" return mols, tagword
def confs2turb(insdf): """ Parameters ---------- insdf: string - PATH+name of SDF file """ homedir = os.getcwd() p = sp.call('module load turbomole/7.1/intel', shell=True) ### Read in .sdf file and distinguish each molecule's conformers ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(insdf): oechem.OEThrow.Warning("Unable to open %s for reading" % insdf) return ### For each molecule: for each conf, generate input for mol in ifs.GetOEMols(): print(mol.GetTitle(), mol.NumConfs()) for i, conf in enumerate(mol.GetConfs()): # change into subdirectory to use x2t subdir = os.path.join(homedir, "%s/%s" % (mol.GetTitle(), i + 1)) if not os.path.isdir(subdir): os.makedirs(subdir) os.chdir(subdir) # write out relevant files label = mol.GetTitle() + '_' + str(i + 1) ofile = open('options', 'w') xfile = open('input.xyz', 'w') optinfo, xinfo = GetMolDetails(conf, label) ofile.write(optinfo) xfile.write(xinfo) ofile.close() xfile.close() # run x2t p = sp.Popen('x2t input.xyz > coord', shell=True) p.wait() #os.chdir(wdir) # i don't think i need this? ifs.close()
def quan2modsem(infile, pfile): hdir, fname = os.path.split(infile) # read in sdf file and distinguish each molecule's conformers ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(infile): sys.exit("Unable to open %s for reading" % infile) molecules = ifs.GetOEMols() # open quanformer-generated pickle file with dictionary of hessians hdict = pickle.load(open(pfile, 'rb')) for mol in molecules: print("===== %s =====" % (mol.GetTitle())) for j, conf in enumerate(mol.GetConfs()): # set file locations; dir for modsem needs / at end of string datadir = os.path.join(hdir, "%s/%s/" % (mol.GetTitle(), j + 1)) # extract hessian from the quanformer-generated dictionary (get_psi_results) hessian = hdict[mol.GetTitle()][j + 1] # run modsem bond_list, angle_list, coords, N, hessian, atom_names = prep_hess( mol, hessian) modified_Seminario_method_vtl2.modified_Seminario_method( bond_list, angle_list, coords, N, hessian, atom_names, datadir, datadir, vibrational_scaling=1) # check to make sure files were generated, and note the ones that didn't work # TODO ifs.close()
def confs2psi(insdf, method, basis, spe=False, memory=None): """ Parameters ---------- insdf: string - PATH+name of SDF file method: string - method. E.g. "mp2" basis: string - basis set. E.g. "def2-sv(p)" spe: boolean. True for single point energy calcns, False for geom opt. default option is False. memory: string - memory specification. Psi4 default is 256 Mb. E.g. "1.5 Gb" """ wdir = os.getcwd() ### Read in .sdf file and distinguish each molecule's conformers ifs = oechem.oemolistream() ifs.SetConfTest( oechem.OEAbsoluteConfTest() ) if not ifs.open(insdf): oechem.OEThrow.Warning("Unable to open %s for reading" % insdf) return ### For each molecule: for each conf, generate input for mol in ifs.GetOEMols(): print(mol.GetTitle(), mol.NumConfs()) if not mol.GetTitle(): sys.exit("ERROR: OEMol must have title assigned! Exiting.") for i, conf in enumerate( mol.GetConfs()): # change into subdirectory ./mol/conf/ subdir = os.path.join(wdir,"%s/%s" % (mol.GetTitle(), i+1)) if not os.path.isdir(subdir): os.makedirs(subdir) if os.path.exists(os.path.join(subdir,'input.dat')): print("Input file (\"input.dat\") already exists. Skipping.\n") continue label = mol.GetTitle()+'_'+str(i+1) ofile = open(os.path.join(subdir,'input.dat'), 'w') ofile.write(make_psi_input( conf, label, method, basis, spe, memory)) ofile.close() ifs.close()
def filterConfs(rmsdfile, tag, suffix): """ Read in OEMols (and each of their conformers) in 'rmsdfile'. For each molecule: rough filter conformers based on energy differences specified by 'tag', fine filter conformers based on RMSD values. Parameters ---------- rmsdfile: string - PATH+full name of to-be-filtered SDF file. This path will house soon-generated final output sdf file. tag: string - describing the SD tag with the energy value to rough filter conformers. A very small energy difference is considered to be the same conformer (see thresE). Above this energy difference, RMSD comparison is evaluated to distinguish if two confs are diff. Ex. QM Psi4 Final Opt. Energy (Har) mp2/def-sv(p) QM Psi4 Single Pt. Energy (Har) mp2/def-sv(p) suffix: string - string appended to the basename of rmsdfile to distinguish that this file has been filtered. Ex. if rmsdfile=/some/dir/basename-210.sdf and suffix=220 then output becomes /some/dir/basename-220.sdf """ # Parameters for distinguishing cutoff of conformer similarity thresE = 5.E-4 # declare confs diff & skip RMSD comparison above this threshold thresRMSD = 0.2 # above this threshold (Angstrom), confs are "diff" minima wdir, fname = os.path.split(rmsdfile) numConfsF = open(os.path.join(os.getcwd(), "numConfs.txt"), 'a') numConfsF.write(tag + "\n") # Open file to be processed. rmsd_ifs = oechem.oemolistream() if not rmsd_ifs.open(rmsdfile): oechem.OEThrow.Fatal("Unable to open %s for reading" % rmsdfile) rmsd_ifs.SetConfTest(oechem.OEAbsoluteConfTest()) rmsd_molecules = rmsd_ifs.GetOEMols() # Open outstream file. rmsdout = ("%s-%s.sdf" % (fname.replace('-', '.').split('.')[0], str(suffix))) rmsd_ofs = oechem.oemolostream() if os.path.exists(rmsdout): print("%s output file already exists in %s. Skip filtering.\n" % (rmsdout, os.getcwd())) return if not rmsd_ofs.open(rmsdout): oechem.OEThrow.Fatal("Unable to open %s for writing" % rmsdout) # Identify minima and write output file. for mol in rmsd_molecules: if IdentifyMinima(mol, tag, thresE, thresRMSD): numConfsF.write("%s\t%s\n" % (mol.GetTitle(), mol.NumConfs())) oechem.OEWriteConstMolecule(rmsd_ofs, mol) else: numConfsF.write("%s\t0\n" % (mol.GetTitle())) rmsd_ifs.close() numConfsF.close() rmsd_ofs.close() print("Done filtering %s to %s.\n" % (fname, rmsdout))
def confs_to_psi(insdf, method, basis, calctype='opt', memory=None, via_json=False): """ Read in molecule(s) (and conformers, if present) in insdf file. Create Psi4 input calculations for each structure. Parameters ---------- insdf: string Name of the molecule file for which to create Psi4 input file. SDF format can contain multiple molecules and multiple conformers per molecule in a single file. method: string Name of the method as understood by Psi4. Example: "mp2" basis : string Name of the basis set as understood by Psi4. Example: "def2-sv(p)" calctype : string What kind of Psi4 calculation to run. Supported inputs are: 'opt' for geometry optimization, 'spe' for single point energy calculation, and 'hess' for Hessian calculation. memory : string How much memory each Psi4 job should take. If not specified, the default in Psi4 is 500 Mb. Examples: "2000 MB" "1.5 GB" http://www.psicode.org/psi4manual/master/psithoninput.html via_json : Boolean If True, use JSON wrapper for Psi4 input and output. - Psi4 input would be in "input.py", called with python - Psi4 output would be in "output.json" If False, use normal text files for Psi4 input and output. - Psi4 input would be in "input.dat" - Psi4 output would be in "output.dat" """ wdir = os.getcwd() ### Read in .sdf file and distinguish each molecule's conformers ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(insdf): oechem.OEThrow.Warning("Unable to open %s for reading" % insdf) return ### For each molecule: for each conf, generate input for mol in ifs.GetOEMols(): print(mol.GetTitle(), mol.NumConfs()) if not mol.GetTitle(): sys.exit("ERROR: OEMol must have title assigned! Exiting.") for i, conf in enumerate(mol.GetConfs()): # change into subdirectory ./mol/conf/ subdir = os.path.join(wdir, "%s/%s" % (mol.GetTitle(), i + 1)) if not os.path.isdir(subdir): os.makedirs(subdir) if os.path.exists(os.path.join(subdir, 'input.dat')): print("Input file already exists. Skipping.\n{}\n".format( os.path.join(subdir, 'input.dat'))) continue label = mol.GetTitle() + '_' + str(i + 1) if via_json: ofile = open(os.path.join(subdir, 'input.py'), 'w') ofile.write("# molecule {}\n\nimport numpy as np\nimport psi4" "\nimport json\n\njson_data = ".format(label)) json.dump(make_psi_json(conf, label, method, basis, calctype, memory), ofile, indent=4, separators=(',', ': ')) ofile.write( "\njson_ret = psi4.json_wrapper.run_json(json_data)\n\n") ofile.write("with open(\"output.json\", \"w\") as ofile:\n\t" "json.dump(json_ret, ofile, indent=2)\n\n") else: ofile = open(os.path.join(subdir, 'input.dat'), 'w') ofile.write( make_psi_input(conf, label, method, basis, calctype, memory)) ofile.close() ifs.close()
def getPsiResults(origsdf, finsdf, spe=False, psiout="output.dat", timeout="timer.dat"): """ Read in OEMols (and each of their conformers) in origsdf file, get results from Psi4 calculations in the same directory as origsdf, and write out results into finsdf file. Directory layout is .../maindir/molName/confNumber/outputfiles . Parameters ---------- origsdf: string - PATH+full name of orig pre-opt SDF file. Path should contain (1) all confs' jobs, (2) orig sdf file. This path will house soon-generated final output sdf file. finsdf: string - full name of final SDF file with optimized results. spe: Boolean - are the Psi4 results of a single point energy calcn? psiout: string - name of the Psi4 output files. Default is "output.dat" timeout: string - name of the Psi4 timer files. Default is "timer.dat" Returns ------- method: string - QM method from Psi4 calculations basisset: string - QM basis set from Psi4 calculations None is returned if the function returns early (e.g., if output file already exists) or if there is KeyError from processing last iteration of output file (last conf of last mol). """ wdir, fname = os.path.split(origsdf) wdir = os.getcwd() # Read in .sdf file and distinguish each molecule's conformers ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(origsdf): oechem.OEThrow.Warning("Unable to open %s for reading" % origsdf) quit() molecules = ifs.GetOEMols() # Open outstream file. writeout = os.path.join(wdir, finsdf) write_ofs = oechem.oemolostream() if os.path.exists(writeout): print("File already exists: %s. Skip getting results.\n" % (finsdf)) return (None, None) if not write_ofs.open(writeout): oechem.OEThrow.Fatal("Unable to open %s for writing" % writeout) # For each conformer, process output file and write new data to SDF file for mol in molecules: print("===== %s =====" % (mol.GetTitle())) for j, conf in enumerate(mol.GetConfs()): # GET DETAILS FOR SD TAGS props = {} # dictionary of data for this conformer props['package'] = "Psi4" props['missing'] = False # change into subdirectory ./mol/conf/ subdir = os.path.join(wdir, "%s/%s" % (mol.GetTitle(), j + 1)) if not os.path.isdir(subdir): print("*** No directory found for %s ***" % (subdir)) continue os.chdir(subdir) # Get wall clock time of the job try: props['time'] = get_psi_time(timeout) except IOError: props['time'] = "Timer output file not found" pass # process output and get dictionary results props = process_psi_out(psiout, props, spe) # if output was missing, move on if props['missing']: continue try: props['numSteps'] props['finalEnergy'] props['coords'] except KeyError: sys.exit("ERROR: Psi4 job was incomplete in {}".format(subdir)) # BRIEF ANALYSIS OF STRUCTURE, INTRA HBONDS # Set last coordinates from optimization. skip if missing. if 'coords' in props and len(props['coords']) != 0: conf.SetCoords(oechem.OEFloatArray(props['coords'])) # _____________________ # SET DETAILS TO WRITE MOLECULE # Set SD tags for this molecule pt.SetOptSDTags(conf, props, spe) # Write output file oechem.OEWriteConstMolecule(write_ofs, conf) ifs.close() write_ofs.close() os.chdir(wdir) try: return props['method'], props['basis'] except KeyError: return None, None
def get_psi_results(origsdf, finsdf, calctype='opt', psiout="output.dat", timeout="timer.dat"): """ Read in OEMols (and each of their conformers) in origsdf file, get results from Psi4 calculations in the same directory as origsdf, and write out results into finsdf file. Directory layout is .../maindir/molName/confNumber/outputfiles . Both origsdf and finsdf are located in maindir. Parameters ---------- origsdf: string - original SDF file of input structures of QM calculation finsdf: string - full name of final SDF file with optimized results. calctype: string; one of 'opt','spe','hess' for geometry optimization, single point energy calculation, or Hessian calculation psiout: string - name of the Psi4 output files. Default is "output.dat" timeout: string - name of the Psi4 timer files. Default is "timer.dat" Returns ------- OpenEye OEMol with data in SD tags None is returned if the function returns early (e.g., if output file already exists) or if there is KeyError from processing last iteration of output file (last conf of last mol). """ hdir, fname = os.path.split(origsdf) wdir = os.getcwd() # check that specified calctype is valid if calctype not in {'opt', 'spe', 'hess'}: sys.exit("Specify a valid calculation type.") # read in sdf file and distinguish each molecule's conformers ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(origsdf): sys.exit("Unable to open %s for reading" % origsdf) molecules = ifs.GetOEMols() # open outstream file writeout = os.path.join(wdir, finsdf) write_ofs = oechem.oemolostream() if os.path.exists(writeout): print("File already exists: %s. Skip getting results.\n" % (finsdf)) return (None, None) if not write_ofs.open(writeout): oechem.OEThrow.Fatal("Unable to open %s for writing" % writeout) # Hessian dictionary, where hdict['molTitle']['confIndex'] has np array if calctype == 'hess': hdict = {} # for each conformer, process output file and write new data to SDF file for mol in molecules: print("===== %s =====" % (mol.GetTitle())) if calctype == 'hess': hdict[mol.GetTitle()] = {} for j, conf in enumerate(mol.GetConfs()): props = initiate_dict() # set file locations timef = os.path.join(hdir, "%s/%s/%s" % (mol.GetTitle(), j + 1, timeout)) outf = os.path.join(hdir, "%s/%s/%s" % (mol.GetTitle(), j + 1, psiout)) # process output and get dictionary results props = get_conf_data(props, calctype, timef, outf) # if output was missing or are missing calculation details # move on to next conformer if props['missing'] or (calctype == 'opt' and not all( key in props for key in ['numSteps', 'finalEnergy', 'coords'])): print( "ERROR reading {}\nEither Psi4 job was incomplete OR wrong calctype specified\n" .format(outf)) continue # add data to oemol conf = set_conf_data(conf, props, calctype) # if hessian, append to dict bc does not go to SD tag if calctype == 'hess': hdict[mol.GetTitle()][j + 1] = props['hessian'] # check mol title conf = check_title(conf, origsdf) # write output file oechem.OEWriteConstMolecule(write_ofs, conf) # if hessian, write hdict out to separate file if calctype == 'hess': hfile = os.path.join(wdir, os.path.splitext(finsdf)[0] + '.hess.pickle') pickle.dump(hdict, open(hfile, 'wb')) # close file streams ifs.close() write_ofs.close() try: return props['method'], props['basis'] except KeyError: return None, None
def getTurbResults(origsdf, theory, finsdf, spe=False): """ """ wdir = os.getcwd() p = sp.call('module load turbomole/7.1/intel', shell=True) method = theory.split('/')[0] basisset = theory.split('/')[1] ### Read in .sdf file and distinguish each molecule's conformers ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(origsdf): oechem.OEThrow.Warning("Unable to open %s for reading" % origsdf) return ### Open outstream file. writeout = os.path.join(wdir, finsdf) write_ofs = oechem.oemolostream() if os.path.exists(writeout): print("File already exists: %s. Skip getting results.\n" % (finsdf)) return (None, None) if not write_ofs.open(writeout): oechem.OEThrow.Fatal("Unable to open %s for writing" % writeout) for mol in ifs.GetOEMols(): print("===== %s =====" % (mol.GetTitle())) for i, conf in enumerate(mol.GetConfs()): props = {} # dictionary of data for this conformer props['package'] = "Turbomole" props['method'] = method props['basis'] = basisset # change into subdirectory subdir = os.path.join(wdir, "%s/%s" % (mol.GetTitle(), i + 1)) if not os.path.isdir(subdir): sys.exit("No subdirectories found, are you in the right dir?") os.chdir(subdir) # get time and final coordinates props['time'] = GetTime() if not os.path.exists('coord'): print("Error: the 'coord' file does not exist!") continue p = sp.Popen('t2x -c > final.xyz', shell=True) p.wait() # read in xyz file into another mol and transfer coords xfs = oechem.oemolistream() xfs.SetConfTest(oechem.OEAbsoluteConfTest()) if xfs.open('final.xyz'): xmol = next(xfs.GetOEMols()) conf.SetCoords(xmol.GetCoords()) else: oechem.OEThrow.Warning( "Unable to open 'final.xyz' for reading") xfs.close() # process output and get dictionary results props = process_turb_out(props, spe, cosmo) pt.SetOptSDTags(conf, props, spe) oechem.OEWriteConstMolecule(write_ofs, conf) ifs.close() write_ofs.close() try: return props['method'], props['basis'] except KeyError: return None, None ifs.close()
def am1wib(insdf, outdat, plotout=None): """ Parameters ---------- insdf: string, name of SDF file """ outf = open(outdat, 'w') ### Read in .sdf file and distinguish each molecule's conformers ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifs.open(insdf): oechem.OEThrow.Warning("Unable to open %s for reading" % insdf) return angList = [] # for plotting labelList = [] # for plotting for mol in ifs.GetOEMols(): molName = mol.GetTitle() outf.write('\n\n>>> Molecule: %s\tNumConfs: %d' % (molName, mol.NumConfs())) for i, conf in enumerate(mol.GetConfs()): ### AM1-BCC charge calculation charged_copy = oechem.OEMol(mol) status = oequacpac.OEAssignPartialCharges( charged_copy, oequacpac.OECharges_AM1BCCSym, False, False) if not status: raise (RuntimeError( "OEAssignPartialCharges returned error code %s" % status)) ### Our copy has the charges we want but not the right conformation. ### Copy charges over. Also copy over Wiberg bond orders. partial_charges = [] partial_bondorders = [] for atom in charged_copy.GetAtoms(): partial_charges.append(atom.GetPartialCharge()) for (idx, atom) in enumerate(mol.GetAtoms()): atom.SetPartialCharge(partial_charges[idx]) for bond in charged_copy.GetBonds(): partial_bondorders.append(bond.GetData("WibergBondOrder")) for (idx, bond) in enumerate(mol.GetBonds()): bond.SetData("WibergBondOrder", partial_bondorders[idx]) ### Sum angles around each invertible N, and get Wiberg bond order. for atom in conf.GetAtoms(oechem.OEIsInvertibleNitrogen()): aidx = atom.GetIdx() nbors = list(atom.GetAtoms()) ang1 = math.degrees( oechem.OEGetAngle(conf, nbors[0], atom, nbors[1])) ang2 = math.degrees( oechem.OEGetAngle(conf, nbors[1], atom, nbors[2])) ang3 = math.degrees( oechem.OEGetAngle(conf, nbors[2], atom, nbors[0])) ang_sum = math.fsum([ang1, ang2, ang3]) outf.write("\n\n%s: sum of angles for N, index %d: %f" % (molName, aidx, ang_sum)) angList.append(ang_sum) labelList.append("{}_{}_{}".format(molName, i, aidx)) for bond in atom.GetBonds(): nbor = bond.GetNbr(atom) nidx = nbor.GetIdx() nbor_wib = bond.GetData('WibergBondOrder') outf.write( "\n{}: wiberg bond order for indices {} {}: {}".format( molName, aidx, nidx, nbor_wib)) if plotout is not None: with open(plotout, 'w') as f: lis = [list(range(len(angList))), angList, labelList] for x in zip(*lis): f.write("{0}\t{1}\t{2}\n".format(*x)) ifs.close()
def getRMSD(sdfRef, theory, rmsdict, package='Psi4'): """ Perform RMSD calculation from an SDF file for molecule and its conformers. sdfRef: string, pathname of the SDF file with energies of opt 1 and opt 2 theory: string, level of theory in format of mp2/6-31G* rmsdict: dictionary (can be empty) which will be populated in form of rmsdict[theory][molName] = 0.000 if the RMSD of before/after energies are 0.000 package: string, name of software package used for QM calculation. only Psi4 currently supported """ method, basis = theory.split('/')[0].strip(), theory.split('/')[1].strip() # create a molecule read in stream print("Opening SDF file %s" % sdfRef) ifsRef = oechem.oemolistream() ifsRef.SetConfTest(oechem.OEAbsoluteConfTest()) if not ifsRef.open(sdfRef): oechem.OEThrow.Fatal("Unable to open %s for reading" % sdfRef) # store all molecules in molsRef molsRef = ifsRef.GetOEMols() # create file object for output RMSD calculation RMSD = open("RMSD.txt", 'a') RMSD.write("\nAnalyzing file: %s\n# Level of theory: %s\n" % (sdfRef, theory)) # create file object for initial and final energies energies = open("energies_breakdown.txt", 'a') maximum = open("maxenergies.txt", "a") # Grab energies, perform RMSD calculation, write data to txt files. for rmol in molsRef: molName = rmol.GetTitle() tmol = np.asarray(pt.get_sd_list(rmol, 'QM opt energy', 'Psi4', method, basis), dtype=float) imol = np.asarray(pt.get_sd_list(rmol, 'QM opt energy initial', 'Psi4', method, basis), dtype=float) final = tmol.copy() initial = imol.copy() # subtract conformer[0] energies from all conformers try: tmol -= tmol[0] except IndexError as e: sys.exit("No energies found for {} {}/{}! Check that data is \ stored in tags. Exiting.".format(rmol.GetTitle(), method, basis)) imol -= imol[0] #subtracts initial minus final and sqaures all values fmol = np.subtract(tmol, imol) fmol = fmol[~np.isnan(fmol)] fmol = np.square(fmol) #sums all energies of conformers for given rmol and then takes average with respect to n-1 number of conformers tot = 0 for n in fmol: tot += n average = math.sqrt(tot / (fmol.size - 1)) #convert average from Hartree to Kcal/mol average = average * 627.5095 # puts RMSD values into .txt file, and store in dict for plotting. RMSD.write("#%s\t%.5f RMSD(Kcal/mol)\n" % (molName, average)) rmsdict[theory][molName] = average # store energies of initial and final for molecules conformers in energies.txt energies.write( "\n#%s\n#%s\n#RMSD = %.5f(y)\t\t(x=Hartree, y=kcal/mol)\n#conf. init. Energy(x) \t final Energy(x) \t diff.(x)\tdiff. (y) \n" % (theory, molName, average)) # get list of conformer indices to identify high RMSD ones conflist = pt.get_sd_list(rmol, "original index", package, method, basis) conformer = [] for item in conflist: conformer.append(item.split(',')[0]) # append orig conf conformer = np.asarray(conformer, dtype=int) difference = np.array([]) for i in range(len(tmol)): energies.write( "%r \t %5.9f \t %5.9f \t %5.9f\t%5.9f \n" % (conformer[i], initial[i], final[i], final[i] - initial[i], (final[i] - initial[i]) * 627.5095)) difference = np.append(difference, [(final[i] - initial[i]) * 627.5095]) # find max 3 confs with highest RMSDs try: difference = np.absolute(difference) confmax1 = (np.nanargmax(difference)) # set max conf to zero to find next highest difference[confmax1] = 0 difference = np.absolute(difference) confmax2 = (np.nanargmax(difference)) difference[confmax2] = 0 difference = np.absolute(difference) confmax3 = (np.nanargmax(difference)) difference[confmax3] = 0 max1 = conformer[confmax1] max2 = conformer[confmax2] max3 = conformer[confmax3] except ValueError as e: #print("ValueError: {}".format(e)) # TODO don't plot this mol for all nan's print("All RMSDs in list for file {} mol {} are nan!!!".format( sdfRef, molName)) max1 = max2 = max3 = -1 energies.write( "#*** Max energy differences are conformers (hi-->low): %r, %r, %r ***\n\n" % (max1, max2, max3)) maximum.write("%s, %s : %r, %r, %r\n" % (theory, molName, max1, max2, max3)) maximum.close() RMSD.close() energies.close() return rmsdict