def readDB(db, calcId, cols): with sqlConnect(db) as con: cur = con.cursor() cur.execute("SELECT geomid,results from Calc where CalcId=?", (calcId, )) CalcRow = [[i] + j.split() for i, j in cur] try: CalcRow = np.array(CalcRow, dtype=np.float64) except: # somthing wrong with the res files ind = len(CalcRow[0]) for i in CalcRow: if ind != len(i): raise Exception( "Some thing wrong with the result of GeomID %s" % i[0]) cur.execute("SELECT id,%s from Geometry" % cols) GeomRow = np.array(cur.fetchall()) # This is the list of indexes in geomrow corresponding to the id in calcrow sortedIndex = np.searchsorted(GeomRow[:, 0], CalcRow[:, 0], sorter=GeomRow[:, 0].argsort()) # each row of resArr contains [rho, phi, results...] resArr = np.column_stack([GeomRow[sortedIndex][:, 1:], CalcRow[:, 1:]]) # sort out jumbling of rho, phi values, also remove any duplicates in process, be careful !!! resArr = np.unique(resArr, axis=0) return resArr
def deleteCalcs(dB, pesDir, calcId, geomIdList): geomStr = ' where calcid = %s and geomid in ('%calcId + ','.join(map(str, geomIdList)) + ')' with sqlConnect(dB) as con: cur = con.cursor() cur.execute('select geomid from expcalc' + geomStr) geomList = cur.fetchall() for (geomId,) in geomList: cur.execute('delete from expcalc where geomid = ? and calcid = ?',(geomId,calcId)) print("CalcId = {}, GeomId = {} deleted from ExpCalc Table".format(calcId,geomId)) cur.execute('select type from CalcInfo where id=?',(calcId,)) calcName = cur.fetchone()[0] cur.execute('select geomid from calc' + geomStr) geomList = cur.fetchall() for (geomId,) in geomList: dirToRemove = '{}/geom{}/{}{}'.format(pesDir, geomId,calcName, calcId) if os.path.isdir(dirToRemove): # geomdata is in folder format shutil.rmtree(dirToRemove) elif os.path.isfile(dirToRemove+'.tar.bz2'): # if is it in archived os.remove(dirToRemove+'.tar.bz2') else: print("No GeomData found for CalcId = {}, GeomId = {}".format(calcId,geomId) ) cur.execute('delete from calc where geomid = ? and calcid = ?',(geomId,calcId)) print("CalcId = {}, GeomId = {} deleted from CalcTable and GeomData".format(calcId,geomId))
def resetCalcs(db): # deleteCalcs for all calculations # clear geomdata # clear expdir and rundir with sqlConnect(dB) as con: cur = con.cursor() print("Clearing up tables Calc, ExpCalc, Exports") cur.execute("delete from Calc") cur.execute("delete from ExpCalc") cur.execute("delete from Exports") print("Deleting directories GeomData, ExpDir, RunDir") shutil.rmtree('GeomData') shutil.rmtree('ExpDir') shutil.rmtree('RunDir')
def GetExpMrciNactJobs(dB, calcId, jobs, gidList, constDb): with sqlConnect(dB) as con: cur = con.cursor() # #---WARNING:::: turning off constraint, its not that used anyway sqlQuery = '''SELECT GeomId,Id FROM Calc WHERE CalcId = 1 and GeomId not in (SELECT GeomId FROM Calc WHERE CalcId={0} UNION SELECT GeomId FROM ExpCalc WHERE CalcId={0}) {2} LIMIT {1}'''.format( calcId, jobs, ' and GeomId in ({})'.format(','.join( map(str, gidList))) if gidList else '') cur.execute(sqlQuery) expGClist = cur.fetchall() assert expGClist, "No Exportable geometries found" # preventing null exports return expGClist
def successiveExport(dB, calcId, jobs, gidList, constDb): with sqlConnect(dB) as con: cur = con.cursor() # get all geoms that is left in the expcalc table for all previous calc ids (greater than 1) # if its left in the expcalc (for calcid >1) then it's calcid=1/mcscf was obviously successful. sqlQuery = '''SELECT GeomId,Id FROM Calc WHERE CalcId = 1 and GeomId in (SELECT GeomId from ExpCalc where CalcId={0}) and GeomId not in (SELECT GeomId FROM Calc WHERE CalcId={1} UNION SELECT GeomId FROM ExpCalc WHERE CalcId={1}) {3} LIMIT {2}'''.format( calcId - 1, calcId, jobs, ' and GeomId in ({})'.format(','.join( map(str, gidList))) if gidList else '') cur.execute(sqlQuery) expGClist = cur.fetchall() assert expGClist, "No Exportable geometries found" # preventing null exports return expGClist
def status(dB): with sqlConnect(dB) as con: cur = con.cursor() status = '-'*90+'\033[31m\n\033[5m\033[4mPESMan Status:\033[0m\t\t' cur.execute('select count(id) from Geometry') status+= '\033[35m\033[4mTotal number of geometries: {}\033[0m\n'.format(cur.fetchone()[0]) cur.execute('select type from CalcInfo') names = [i[0] for i in cur] if len(names)==0: status+='{0}{1}{0}'.format('='*90,'\n\t\tNo Calcs are avialable\n') print(status) return status += "{0}\n{1:^10}|{2:^13}|{3:^20}|{4:^20}|{5:^20}\n{0}".format( '='*90,'CalcId','CalcName','Exported Jobs No.','Imported Jobs No.', 'Jobs in ExpCalc') for i,name in enumerate(names, start=1): cur.execute('select sum(NumCalc) from Exports where calcid=?',(i,)) tE = cur.fetchone()[0] tE = tE if tE else 0 cur.execute('select count(*) from calc where calcid=?',(i,)) tD = cur.fetchone()[0] cur.execute('select count(*) from ExpCalc where calcid=?',(i,)) tEx = cur.fetchone()[0] status +="\n{:^10}|{:^13}|{:^20}|{:^20}|{:^20}\n{}".format(i,name,tE,tD,tEx,'-'*90) print(status) # Check for SemiCalc information cur.execute("SELECT name FROM sqlite_master WHERE type='table';") tables = [i for i, in cur.fetchall()] if "SemiCalc" in tables: cur.execute("SELECT count(*) from SemiCalc;") print("No. of results in SemiCalc table {}".format(cur.fetchone()[0])) status = "\033[31m\n\033[4mDirectory Stats:\033[0m\t\t\n" + "="*90 + "\n" subDirs = [x for x in os.listdir('.') if os.path.isdir(x)] for s in subDirs: status += " {:<13} --- {}\n".format(s,size(s)) status += " {:<13} --- {}\n".format('Total',size('.')) + "-"*90 # for s in subDirs: # also prints number of files and folders # status += " {:<13} --- {} ({:>3} folders & {:>3} files)\n".format(s,size(s),folders(s),files(x)) # status += " {:<13} --- {} ({:>3} folders & {:>3} files)\n".format('Total',size('.'),folders('.'),files('.')) + "-"*90 print(status) lastM = time.ctime(os.path.getmtime(os.path.expanduser(dB))) print("Database last modified on : {} \n".format(lastM))
def ExportJobs(dB, calcId, jobs, np, exportDir, pesDir, templ, gidList, sidList, depth, constDb, includePath, molInfo, par, logger): # Main export function that exports a given number of jobs for a specified calcid type # following collects the geomid that are exportable and the calc table id which will be used as their start info if calcId == 1: # Mrci or nact export ExpGeomList = GetExpGeomNearNbr(dB, calcId, gidList, sidList, jobs, depth, constDb, includePath) elif calcId == 2: ExpGeomList = GetExpMrciNactJobs(dB, calcId, jobs, gidList, constDb) else: raise Exception('No calculation is defined for CalId=', calcId) ExpGeomList = successiveExport(dB, calcId, jobs, gidList, constDb) with sqlConnect(dB) as con: con.row_factory = sqlRow cur = con.cursor() cur.execute('SELECT * from CalcInfo WHERE Id=?', (calcId, )) InfoRow = cur.fetchone() assert InfoRow, "No Info for CalcId={} found in data base".format( calcId) calcName, template = InfoRow['Type'], InfoRow[ 'InpTempl'] # unpack calcname and the template from the info table # insert into database one row and use the id as export id cur.execute("INSERT INTO Exports (CalcId) VALUES (?)", (calcId, )) exportId = cur.lastrowid expDir = "{}/Export{}-{}{}".format(exportDir, exportId, calcName, calcId) # remove the export directory if already exists, may have created from some failed export. if os.path.exists(expDir): shutil.rmtree(expDir) #<<<--- shouldn't have happened os.makedirs(expDir) if templ: # if template given use that, o/w use from calcinfo table with open(templ, 'r') as f: template = f.read() jobs = [] for ind, (geomId, startId) in enumerate(ExpGeomList, start=1): cur.execute('SELECT * from Geometry WHERE Id=?', (geomId, )) geom = dict( cur.fetchone()) # to be parsed in geometry, saving unnecessary if startId: cur.execute('SELECT GeomId,Dir from Calc WHERE Id=?', (startId, )) startGId, startDir = cur.fetchone() else: startGId, startDir = 0, None jobs.append([ geomId, geom, calcId, expDir, template, calcName, startGId, startDir, ind, logger.info if np == 1 else print ]) # the logging module though thread safe, can't write from multiple processes, so with multiprocessing simple print is used # This situation can be handled properly with an explicit threading queue, but I didn't want to make it all that complicated # That means in parallel case the statements inside ExportCalc won't be logged to PESMan.log, only be print to console if np == 1: expDirs = [ExportCalc(i) for i in jobs] else: # if parallel export is requested pool = Pool(processes=np) expDirs = pool.map(ExportCalc, jobs) pool.close() # update the export table and expcalc tables with the exported jobs expGeomIds = [i for i, _ in ExpGeomList] cur.execute( "UPDATE Exports SET NumCalc=?, ExpDT=strftime('%H:%M:%S %d-%m-%Y', datetime('now', 'localtime')), \ ExpGeomIds=? WHERE Id=?", (len(expDirs), ' '.join(map(str, expGeomIds)), exportId)) lExpCalc = [(exportId, calcId, i, j) for i, j in zip(expGeomIds, expDirs)] cur.executemany( "INSERT INTO ExpCalc (ExpId,CalcId,GeomId,CalcDir) VALUES (?,?,?,?)", lExpCalc) fExportDat = expDir + "/export.dat" # save the export id and exported directories in export.dat file with open(fExportDat, 'w') as f: f.write("# Auto generated file. Please do not modify\n" + ' '.join(map(str, [exportId] + expDirs))) os.chmod( fExportDat, 0o444 ) # change mode of this file to read-only to prevent accidental writes fPythonFile = "{}/RunJob{}.py".format( expDir, exportId) # save the python file that will run the jobs if par: # export job to run parallel geometry createRunJobParallel(molInfo, fPythonFile) else: # run a single geometry, mandatory for job like primary mcscf createRunJob( molInfo, fPythonFile) # use only this if you want no parallel geometry logger.info( "PESMan export successful: Id {} with {} job(s) exported\n".format( exportId, len(ExpGeomList))) return expDir, exportId, expDirs
def ImportJobs(dB, np, expFile, pesDir, iGl, isDel, isZipped, logger): # imports jobs from a given export.dat file exportDir = os.path.abspath( os.path.dirname(expFile)) # get the main export directory exportId = re.findall( r'Export(\d+)-', exportDir)[0] # get the export id, from the directroy name # so it seems the export.dat is really not needed to import a job, just the path directory is required with sqlConnect(dB) as con: cur = con.cursor() cur.execute('SELECT Status FROM Exports WHERE Id=?', (exportId, )) # check if the export id is open for import exp_row = cur.fetchone() assert exp_row, "Export Id = {} not found in data base".format( exportId) assert exp_row[0] == 0, "Export Id = {} is already closed.".format( exportId) jobs, geomIds = [], [] # now obtain list of jobs which can be imported. cur.execute("SELECT GeomId,CalcDir FROM ExpCalc where ExpId=?", (exportId, )) for geomId, calcDir in cur.fetchall(): dirFull = "{}/{}".format(exportDir, calcDir) # ab initio directory calcFile = "{0}/{1}/{1}.calc".format(exportDir, calcDir) # calcfile name if os.path.isfile(calcFile): # is this job successful? jobs.append([ dirFull, calcFile, pesDir, iGl, isZipped, isDel, logger.info if np == 1 else print ]) geomIds.append(geomId) if np == 1: res = [ImportCalc(i) for i in jobs] else: # if parallel import is requested pool = Pool(processes=np) res = pool.map(ImportCalc, jobs) pool.close() cur.executemany( "INSERT INTO Calc (GeomId,CalcId,Dir,StartGId,Results) VALUES (?, ?, ?, ?, ?)", res) cur.executemany('DELETE FROM ExpCalc WHERE ExpId=? AND GeomId=? ', [(exportId, geomId) for geomId in geomIds]) cur.execute( "UPDATE Exports SET ImpDT=strftime('%H:%M:%S %d-%m-%Y', datetime('now', 'localtime')) WHERE Id=?", (exportId, )) cur.execute("SELECT count(*) FROM ExpCalc WHERE ExpId=?", (exportId, )) if cur.fetchone()[0] == 0: cur.execute("UPDATE Exports SET Status=1 WHERE Id=?", (exportId, )) logger.info('Export Id={} is now closed.'.format(exportId)) if isDel: logger.info("Deleting export directory {}".format(exportDir)) shutil.rmtree(exportDir) else: logger.info('Export Id={} is not closed.'.format(exportId)) logger.info("{} Job(s) have been successfully imported.\n".format( len(geomIds)))
def GetExpGeomNearNbr(dB, calcId, gidList, sidList, jobs, maxDepth, constDb, inclPath): # Get the exportable geometries and their start id for mulit jobs, returns with sqlConnect(dB) as con: cur = con.cursor() # get jobs that is already done and add to excludegeomlist cur.execute("SELECT GeomId,Id FROM Calc WHERE CalcId=?", (calcId, )) DictCalcId = dict(cur.fetchall()) CalcGeomIds = set(DictCalcId.keys()) ExcludeGeomIds = CalcGeomIds.copy() cur.execute("SELECT GeomId FROM ExpCalc WHERE CalcId=?", (calcId, )) ExcludeGeomIds.update((i for (i, ) in cur)) if gidList: sidList += [-1] * (len(gidList) - len(sidList) ) # fill sidList if some missing DictStartId = dict(zip(gidList, sidList)) # create a dict of start ids cond = [] if gidList: cond.append('id in (' + ",".join(map(str, gidList)) + ')') #include gidlist if constDb: cond.append('(' + constDb + ')') #include constraint if not inclPath: cond.append("( tags NOT LIKE '%path%' or tags is null)" ) #exclude pathological # `tags is null` is added as `not like '%path%'` doesn't match `null`!!! Is this the correct way? cond = ' where ' + ' and '.join(cond) if cond else '' cur.execute('SELECT Id,Nbr FROM Geometry' + cond) expGClist = [] # list that collects the tuple exportable jobs info fullGeomList = [] # a naive approach: store all the missed geometries for geomId, nbrList in cur: if geomId in ExcludeGeomIds: continue # geometry already exist, skip if gidList and DictStartId[ geomId] >= 0: # negetive start id will go to main neighbour searching nbrId = DictStartId[geomId] # i.e. 0 or positive startid given if not nbrId: # 0 startid nothing to do here expGClist.append((geomId, 0)) elif nbrId in CalcGeomIds: # positive start id, include if calculation is already done expGClist.append((geomId, DictCalcId[nbrId])) if len(expGClist) == jobs: return expGClist # got all the geometries needed continue nbrList = list(map( int, nbrList.split())) # Care ful about integer mapping nbrId = nbrList[ 0] # for this initial loop only consider first neighbour if nbrId in CalcGeomIds: expGClist.append((geomId, DictCalcId[nbrId])) # got one match if len(expGClist) == jobs: return expGClist continue fullGeomList.append((geomId, nbrList)) #Get allowed depth. Provided every nbr list has same number of elements. This is be quite bad, do something better depth = maxDepth if maxDepth else len( fullGeomList[0][1]) if fullGeomList else 0 exportedGeom = set([]) for d in range(1, depth): # depth loop starting from 1 to end, for geomId, nbrList in fullGeomList: if geomId in exportedGeom: continue nbrId = nbrList[d] # get d-th neighbour if nbrId in CalcGeomIds: expGClist.append( (geomId, DictCalcId[nbrId]) ) # got one match now don't search for any other neighbours if len(expGClist) == jobs: return expGClist exportedGeom.add(geomId) assert expGClist, "No Exportable geometries found" # preventing null exports return expGClist
import numpy as np from sqlite3 import connect as sqlConnect from PESMan import parseConfig from ImpExp import parseResult # list of export.dat files are provided through the default command line arguments # if no argument is provided then only result is saved from the data base # Run this as `python <this_filename> <list of export.dat files> config = parseConfig() dB = config['DataBase']['db'] with sqlConnect(dB) as con: cur = con.cursor() # create a new table that will hold the semi successful jobs, no other information is required at this moment cur.executescript(''' CREATE TABLE IF NOT EXISTS SemiCalc( GeomId INTEGER NOT NULL, CalcId INTEGER NOT NULL, Results TEXT); ''') result = [] for expFile in sys.argv[1:]: # multiple expdirs is allowed through list input exportDir = os.path.abspath(os.path.dirname(expFile)) # get the main export directory exportId = re.findall(r'Export(\d+)-', exportDir)[0] # get the export id, from the directroy name calcId = re.findall(r'Export\d+-\w+(\d+)',exportDir)[0]
config = parseConfig() dbFile = config['DataBase']['db'] nbrDbFile = config['DataBase']['nbr'] dbExist = os.path.exists(dbFile) # neighbour limit to search for lim = 30 # #%%%%%%%%%%%%%%%%%%%%%%%%%% for scattering hyperspherical %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% sql_script = sql_script.replace('$$', 'rho REAL,\ntheta REAL,\nphi REAL,') rho = float(sys.argv[1]) ranges = [4.5, np.deg2rad(30), np.deg2rad(30)] if os.path.exists(nbrDbFile): os.remove(nbrDbFile) with sqlConnect(dbFile) as con, sqlConnect(nbrDbFile) as conNbr: cur = con.cursor() if not dbExist: cur.executescript(sql_script) curNbr = conNbr.cursor() curNbr.executescript(sql_nbrtable_commands) # create the geometry list here newGeomList = np.vstack([[rho, 0, 0], np.stack(np.mgrid[rho:rho:1j, 2:50:25j, 0:180:181j], axis=3).reshape(-1, 3), np.stack(np.mgrid[rho:rho:1j, 51:90:40j, 0:180:181j], axis=3).reshape(-1, 3)])