def plot_convergence(rxn, dbse, proj, loadfrompickle, std_mtd, std_bsse, std_bas_list, std_curvestyle, mtd_list, bsse, bas_list, curvestyle, failoninc=True, assigned_title='default', xlabel='default', ylabel='default', legend=True, markersize='default', linewidth='default', standalone=True, plotpath='default', filename='default', filetype='default'): """Prepares convergence plots for all members of array *rxn* of reactions from database *dbse* in project(s) *proj* of modelchemistry defined by all combinations of the method list, *mtd_list*, counterpoise scheme, *bsse*, and basis set list, *bas_list*, arrays. Convergence curves are plotted relative to reference value, with style defined in array *curvestyle* for each of the curves to be created. Curve styles are three-character strings giving marker style, curve color, and line style. Plot title *title* will default to a standard string identifier based on *rxn* and *dbse*, unless otherwise specified. Axis labels *xlabel* and *ylabel* may be specified, but have default values "Basis Set" and "Interaction Energy" otherwise. Ticks on the x axis will be basis sets defined in *bas_list*. Inclusion of legend may be turned on or off by the truth value of *legend*. Marker and line size may be changed by adjusting *markersize* and *linewidth* arguments, both accepting float values. Default linewidth is 1, default marker size is 5.5. Plots are opened if *standalone* is true, in .pdf format by default. File type can be changed by *filetype*, and will be saved in location designated by *plotpath* as *dbse*_*rxn*_convplot.*filetype* if *filename* is left as 'default'. """ # load data from corresponding function arguments asdf = qcdb.Database([dbse], loadfrompickle=loadfrompickle) for pr in proj: asdf.load_qcdata_byproject(pr) # Build 2D modelchems array for novel methods mcs = [] for mtd in mtd_list: for b in bsse: temp_mcs_row = [] for bas in bas_list: temp_bas = [] temp_bas.append(mtd + '-' + b + '-' + bas) temp_mcs_row.append(temp_bas[0]) mcs.append(temp_mcs_row) # Build 1D standard modelchem array std_mcs = [] for std_bas in std_bas_list: std_mcs.append(std_mtd + '-' + std_bsse + '-' + std_bas) # Get benchmark values for all rxns/modelchems in database bench = asdf.benchmark # Where to look for the benchmark values temp_ref_array = [] for lmc, lbm, orxn in asdf.get_reactions(modelchem=bench): temp_ref_array.append( orxn.data[bench].value) # Get all benchmark values for database # TODO: Form rxn array based on more intuitive string argument? # Iterate over array *rxn* and create plot for each one for r in rxn: # Assign plot title if assigned_title == 'default': working_title = dbse + '-' + str(r) + ' ConvPlot' else: working_title = assigned_title # Get benchmark value for r ref = temp_ref_array[r - 1] # Build rank 1 *std_dbdata* array containing the data for the standard convergence curve std_dbdata = [] for std_mc in std_mcs: all_std_mc_data = [] if std_mc in asdf.mcs.keys(): for lmc, lbm, orxn in asdf.get_reactions(modelchem=std_mc, failoninc=failoninc): try: all_std_mc_data.append(orxn.data[std_mc].value) except KeyError, e: if failoninc: raise e else: all_std_mc_data.append(np.NaN) else: print "Standard model chemistry not in mcs.keys(). Try again." std_dbdata.append(all_std_mc_data[r - 1]) std_curve_label = std_mtd + '-' + std_bsse std_x_vals = np.arange(1, len(std_dbdata) + 1)[:] # Build rank 2 *dbdata* array containing all data for rxn *r* to be plotted # Build rank 1 *curve_labels* array for use in legend # Build rank 1 *x_vals* array against which to plot rows of *dbdata* dbdata = [] x_vals = [] curve_labels = [] for row in mcs: # Each row contains different combo of *mcs* and *bsse* temp_dbdata_row = [] for mc in row: # Iterate over different values of *bas* for same *mcs* & *bsse* all_dbse_mc_data = [] if mc in asdf.mcs.keys( ): # If the modelchem exists at all, regardless of some missing data for lmc, lbm, orxn in asdf.get_reactions( modelchem=mc, failoninc=failoninc): # Gets rxndata dictionary try: all_dbse_mc_data.append( orxn.data[mc].value ) # Appends all *dbse*/*mc* rxn datum to temporary array except KeyError, e: # Protects against missing data if failoninc: raise e # Raises offending mc label else: all_dbse_mc_data.append( np.NaN ) # Appends None to dbdata array, so nothing gets plotted for no data else: for i in range( 24 ): # Need to append 24 NaN's to simulate totally empty array all_dbse_mc_data.append( np.NaN ) # Append None to dbdata array for an entire mc if it doesn't exist temp_dbdata_row.append( all_dbse_mc_data[r - 1] ) # Appends rxn *r*/*mc* datum to array for later append to *dbdata* # Split modelchem *mc* name *mtd*-*bsse*-*bas* by '-' delimiter, save in temp array temp_curve_label = mc.split("-") # Concatenate *mtd* + '-' + *bsse* to form unique curve_label, add to *curve_labels* array curve_labels.append(temp_curve_label[0] + '-' + temp_curve_label[1]) dbdata.append(temp_dbdata_row) # Add row to *dbdata* array
dbse + ' @ ' + mtd, exitonfail=False) print('') err = 'rmse' digits = 0.02 mode = '-unCP-atz' active_methods = [ 'PBE', 'B3LYP', 'VV10', 'LCVV10', 'WB97XD', 'WB97XV', 'M062X', 'M11L', 'M11' ] print("""\n<<< Mardirossian:2014:9904 PCCP 2014, 16, 9904 >>>\n""") asdf = qcdb.Database('s22') asdf.load_qcdata_byproject('dhdft') testem(odb=asdf, ss='default', bm='S22B', ans=answers['s22']) asdf = qcdb.Database('hbc6') asdf.load_qcdata_byproject('dhdft') testem(odb=asdf, ss='default', bm='HBC6A', ans=answers['hbc']) asdf = qcdb.Database('nbc10') asdf.load_qcdata_byproject('dhdft') asdf.add_Subset_union('a1', ['bzbz_pd34', 'bzbz_t', 'bzbz_s']) asdf.add_Subset_union('a2', ['meme', 'bzme']) asdf.add_Subset_union('a3', ['pypy_s2', 'pypy_t3']) testem(odb=asdf, ss='a1', bm='NBC10A', ans=answers['nbca1']) testem(odb=asdf, ss='a2', bm='NBC10A', ans=answers['nbca2']) testem(odb=asdf, ss='a3', bm='NBC10A', ans=answers['nbca3'])
from __future__ import print_function import sys sys.path.append('/Users/loriab/linux/qcdb') sys.path.append('/Users/loriab/linux/qcdb/databases') import qcdb a24 = qcdb.Database('A24') a24.load_dilabio() a24.load_f12dilabio() import matplotlib.pyplot as plt import numpy as np mcs1 = [ 'CCSDT-CP-adz', 'CCSDT-CP-atz', 'CCSDT-CP-aqz', 'CCSDT-CP-a5z', 'CCSDT-CP-a6z' ] mcs2 = [ 'CCSDTAF12-CP-adz', 'CCSDTAF12-CP-atz', 'CCSDTAF12-CP-aqz', 'CCSDTAF12-CP-a5z' ] mcs3 = [ 'CCSDTBF12-CP-adz', 'CCSDTBF12-CP-atz', 'CCSDTBF12-CP-aqz', 'CCSDTBF12-CP-a5z' ] mcs4 = ['CCSDTCF12-CP-adz', 'CCSDTCF12-CP-atz', 'CCSDTCF12-CP-aqz'] mcs2f = ['CCSDTAF12-CP-dzf12', 'CCSDTAF12-CP-tzf12', 'CCSDTAF12-CP-qzf12'] mcs3f = ['CCSDTBF12-CP-dzf12', 'CCSDTBF12-CP-tzf12', 'CCSDTBF12-CP-qzf12'] mcs4f = ['CCSDTCF12-CP-dzf12', 'CCSDTCF12-CP-tzf12', 'CCSDTCF12-CP-qzf12'] mcs5 = ['CCSDT-CP-adtz', 'CCSDT-CP-atqz', 'CCSDT-CP-aq5z', 'CCSDT-CP-a56z'] mcs6 = ['CCSDTAF12-CP-adtz', 'CCSDTAF12-CP-atqz', 'CCSDTAF12-CP-aq5z']
import collections sys.path.append('C:\Users\Owner\Documents\GitHub\qcdb') sys.path.append('C:\Users\Owner\Documents\GitHub\qcdb\databases') import qcdb from qcdb.psivarrosetta import useme2psivar from qcdb.modelchems import Method, BasisSet, methods, bases import pandas as pd import numpy as np pd.set_option('display.max_columns', 10) pd.set_option('display.width', 200) # <<< read usemefiles and convert to giant DataFrame >>> dbse = 'A24' dbobj = qcdb.Database(dbse) path = r"""C:\Users\Owner\Documents\f12dilabiousemefiles\usemefiles""" h2kc = qcdb.psi_hartree2kcalmol rawdata = collections.defaultdict(dict) for useme in glob.glob('%s/%s*useme*' % (path, dbse)): spl = os.path.basename(useme).split('.') #dbse = spl[0].split('-')[0] #ocalc = spl[0].split('-')[1] #optns = '_'.join(spl[0].split('-')[2:-1]) basis = spl[0].split('-')[-1] piece = '.'.join(spl[1:]) print(useme, basis, piece) tmp = pd.read_csv('%s' % (useme), index_col=0, sep='\s+', comment='#', na_values='None', names=['rxn', 'dimer', 'monoA-CP', 'monoB-CP']) print(useme, basis, piece) print(tmp.head(10))
#dbnet['NBC10ext'] = ['saptmisc', 'dfit'] #dbnet['ACHC'] = ['saptmisc', 'dfit'] #dbnet['UBQ'] = ['saptmisc', 'bfdbmm'] #dbnet['S22by7'] = ['saptmisc'] #dbnet['S66'] = ['saptmisc'] #dbnet['A24'] = ['saptmisc', 'dilabio'] #dbnet['JSCH'] = ['saptmisc'] #dbnet[''] = [] for db, lproj in dbnet.iteritems(): print '\n<<< %s >>>' % (db) t0 = time.time() if db == 'DB4': asdf = qcdb.DB4(loadfrompickle=True, path=homewrite) else: asdf = qcdb.Database(db, loadfrompickle=True, path=homewrite) dbse = asdf.dbse t1 = time.time() print '%-70s %8.1f' % ('database.py --> Database', t1 - t0) print 'Benchmark', asdf.benchmark Dbfilename = homewrite + '/' + db + '_Db.pickle' with open(Dbfilename, 'wb') as handle: pickle.dump(asdf, handle, pickle.HIGHEST_PROTOCOL) t2 = time.time() print '%-70s %8.1f' % ('* Database --> database_Db.pickle', t2 - t1) for pj in lproj: t3 = time.time() asdf.load_qcdata_byproject(pj) t4 = time.time()