def main(): args = cT.parseCMD() passwd = cT.Credentials(args.UserName) # define all data you want to pull from pimc data files IN ORDER. fileTypes = ['log','estimator','super','bipart_dens','ntWind'] colNums = [ [4,11,12,13,-4], [0,1,2,3], [0,1], [0,1,2] ] observables = [ ['E','Cv1','Cv2','Cv3','N'], ['rho_s/rho','Wx^2','Wy^2','Wz^2'], ['filmDens','bulkDens'], ['W^2','W_{2d}^2','W_{3d}^2'] ] # grab types of estimators from above. Remove log file. estimTypes = list(fileTypes) if 'log' in estimTypes: estimTypes.pop(estimTypes.index('log')) # create list of reduced file names for combined data. fileNames = [] for f in estimTypes: fileNames.append(str('Reduced'+str(f.capitalize())+'Data.dat')) print '\nCreating: ',fileNames,'\n' # if data doesn't exist, pull it from cluster if not args.pulled: print 'Pulling data from cluster.' # create ssh and sftp instances ssh = paramiko.SSHClient() ssh.load_host_keys(os.path.expanduser( os.path.join("~", ".ssh", "known_hosts"))) ssh.connect('', username=args.UserName, password=passwd) sftp = ssh.open_sftp() # move to desired directory on cluster sftp.chdir(args.targetDir) # create list of only seed directory names, get rid of other things allStuffInDir = sftp.listdir() #for s in seedDirs: # laptop didnt like this method sometimes. wtf? # if s[:4] != 'seed': # seedDirs.pop(seedDirs.index(s)) seedDirs = [] for s in range(len(allStuffInDir)): if 'seed' in allStuffInDir[s]: seedDirs.append( allStuffInDir[s] ) # pull all requested file types into organized directories for s in seedDirs: newName = cT.returnSeedDirName(s) if os.path.exists("./"+newName): sys.exit(newName+' already exists.') os.makedirs("./"+newName) os.chdir("./"+newName) sftp.chdir('./'+s+'/OUTPUT/') moose = sftp.listdir() for thing in fileTypes: for m in moose: if thing in m: sftp.get('./'+m, './'+m) print 'pulled ',thing,' files for ',newName sftp.chdir('../..') os.chdir('..') # Rename files to have seed number replace first three numbers of pimcID. # Optionally delete all seed directories that were pulled from cluster. cT.renameFilesInDirecs(args.delDir) # check for repeated pimcIDs -- broken. #cT.repeatCheck() # close instances of sftp and ssh sftp.close() ssh.close() # optionally combine all data of the same temperature into one # much larger array. if args.Crunch: print 'Crunching Data' cT.crunchData(estimTypes,colNums,observables) # optionally make a trimmed version of the data files that # makes all arrays the length of the shortest array. # NOT NECESSARY BUT SOMETIMES USEFUL. if args.trimData: print 'Decided to make trimmed data files' cT.trimData(fileNames)
def main(): args = cT.parseCMD() passwd = cT.Credentials(args.UserName) # define all data you want to pull from pimc data files IN ORDER. fileTypes = ['log', 'estimator', 'super', 'bipart_dens', 'ntWind'] colNums = [[4, 11, 12, 13, -4], [0, 1, 2, 3], [0, 1], [0, 1, 2]] observables = [['E', 'Cv1', 'Cv2', 'Cv3', 'N'], ['rho_s/rho', 'Wx^2', 'Wy^2', 'Wz^2'], ['filmDens', 'bulkDens'], ['W^2', 'W_{2d}^2', 'W_{3d}^2']] # grab types of estimators from above. Remove log file. estimTypes = list(fileTypes) if 'log' in estimTypes: estimTypes.pop(estimTypes.index('log')) # create list of reduced file names for combined data. fileNames = [] for f in estimTypes: fileNames.append(str('Reduced' + str(f.capitalize()) + 'Data.dat')) print '\nCreating: ', fileNames, '\n' # if data doesn't exist, pull it from cluster if not args.pulled: print 'Pulling data from cluster.' # create ssh and sftp instances ssh = paramiko.SSHClient() ssh.load_host_keys( os.path.expanduser(os.path.join("~", ".ssh", "known_hosts"))) ssh.connect('', username=args.UserName, password=passwd) sftp = ssh.open_sftp() # move to desired directory on cluster sftp.chdir(args.targetDir) # create list of only seed directory names, get rid of other things allStuffInDir = sftp.listdir() #for s in seedDirs: # laptop didnt like this method sometimes. wtf? # if s[:4] != 'seed': # seedDirs.pop(seedDirs.index(s)) seedDirs = [] for s in range(len(allStuffInDir)): if 'seed' in allStuffInDir[s]: seedDirs.append(allStuffInDir[s]) # pull all requested file types into organized directories for s in seedDirs: newName = cT.returnSeedDirName(s) if os.path.exists("./" + newName): sys.exit(newName + ' already exists.') os.makedirs("./" + newName) os.chdir("./" + newName) sftp.chdir('./' + s + '/OUTPUT/') moose = sftp.listdir() for thing in fileTypes: for m in moose: if thing in m: sftp.get('./' + m, './' + m) print 'pulled ', thing, ' files for ', newName sftp.chdir('../..') os.chdir('..') # Rename files to have seed number replace first three numbers of pimcID. # Optionally delete all seed directories that were pulled from cluster. cT.renameFilesInDirecs(args.delDir) # check for repeated pimcIDs -- broken. #cT.repeatCheck() # close instances of sftp and ssh sftp.close() ssh.close() # optionally combine all data of the same temperature into one # much larger array. if args.Crunch: print 'Crunching Data' cT.crunchData(estimTypes, colNums, observables) # optionally make a trimmed version of the data files that # makes all arrays the length of the shortest array. # NOT NECESSARY BUT SOMETIMES USEFUL. if args.trimData: print 'Decided to make trimmed data files' cT.trimData(fileNames)
def main(): # set number of equilibration steps equilNum = 1000 # ------------------------------------------------------------------------- # NOTE: NEW USERS WILL NEED TO CHANGE THESE STRINGS!! # full path to gensubmit and submit file must be supplied as below. # # NOTE: This way of doing subFilePath has been replaced by keeping submit # in the same directory as stateFiles on local machine in the case of # submissions from equilibrated states. See later in this script! genSubPath = '/home/max/Documents/Code/PIMC/SCRIPTS/MTG/' #subFilePath = '/home/max/Documents/Code/PIMC/SCRIPTS/submitscripts/submit' # commands to add directories to path where blitz, boost, pimc sit. # not sure why, but this must be done every time for paramiko. expLibs = 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/users/m/t/mtgraves/local/lib ; ' expPBSstuff = 'export PATH=/opt/pbs/bin/:$PATH ; ' # ------------------------------------------------------------------------- # parse cmd line args = cT.parseCMD() # get username and password for VACC passwd = cT.Credentials(args.UserName) # create ssh and sftp instances ssh = paramiko.SSHClient() ssh.load_host_keys(os.path.expanduser( os.path.join("~", ".ssh", "known_hosts"))) ssh.connect('', username=args.UserName, password=passwd) sftp = ssh.open_sftp() # Move to desired directory on cluster from home and if it doesn't # exist, then create it. This is given by the -t cmd line flag. try: sftp.chdir(args.targetDir) except: sftp.mkdir(args.targetDir) sftp.chdir(args.targetDir) # array of seed numbers, between -L and -H flag from cmd line. seedNums = np.arange(args.lowSeed,args.highSeed+1) # keep track of directory you are in in the terminal workingDir = os.getcwd() # get list of (g)ce-state files in directory supplied from cmdline. os.chdir('./stateFiles') stateFileList = sorted(glob.glob('*state*')) print 'we have ',len(stateFileList),' different stateFiles' # get list of (g)ce-log files also. os.chdir('../logFiles') logFileList = sorted(glob.glob('*log*')) # ----------------------------------------------------------------- # build list of restart strings from log files restartStrList = [] for logFile in logFileList: with open(logFile) as inFile: for n, line in enumerate(inFile): if n == 2: restartStrList += [line[2:]] # ----------------------------------------------------------------- # build list of worm constants and COM update radii wormConstList = [] comUpdateList = [] for s in restartStrList: wormconst = re.findall(r'-C\s\d*\.?\d*(?:e[-+]\d+)?',s) comUpdateRad = re.findall(r'-D\s\d*\.?\d*(?:e[-+]\d+)?',s) wormConstList += wormconst comUpdateList += comUpdateRad subNum = 0 numToRepeat = 99999 # not currently set to start from multiple states if numToRepeat*len(stateFileList) < len(seedNums): sys.exit('Need to up numToRepeat') os.chdir('..') for nummm, seedNum in enumerate(seedNums): # we want to start a few seeds per state. if nummm != 0 and nummm%numToRepeat == 0: subNum += 1 print wormConstList[subNum] print comUpdateList[subNum] print stateFileList[subNum] # create seedXXX directory seedDirName = cT.returnSeedDirName(int(seedNum)) sftp.mkdir(seedDirName) sftp.chdir('./'+seedDirName) # create directory structure inside of each seedXXX direc. sftp.mkdir('out') sftp.mkdir('OUTPUT') # change random number seed in submit file -- this defines hacky. # NOTE: Must have submit script in current working directory. subFilePath = os.getcwd() if subFilePath[-1]!='/': subFilePath +='/' subFilePath += args.stateFilesDir+'/../submit' with open(subFilePath) as inFile, open(subFilePath+'_temp', 'w') as outFile: for n, line in enumerate(inFile): if n==0: if '-p 000' not in line: sys.exit('Include -p 000 in submit script!') outFile.write( re.sub(r'-p 000', r'-p '+str(seedNum), line)) # run gensubmit to create initial submit script. command = ('python '+genSubPath+' '+subFilePath+\ '_temp --cluster=bluemoon '+'-m '+args.memRequest) subprocess.check_call(command, shell=True) # store submit file name generated from gensubmit subFile = glob.glob('*submit-pimc.*') if len(subFile) != 1: sys.exit('you have more than one submit file here') subFile = subFile[0] # optionally set up submit script to call an executable called # pimcNoSwaps, which must be located on your path on the cluster. sF2 = subFile[:-4]+'_temp.pbs' numOccur = 0 stateNum = 0 with open(subFile) as inFile, open(sF2, 'w') as outFile: for n, line in enumerate(inFile): match ='pimc',line) if args.noSwaps: outFile.write( re.sub(r'pimc', r'pimcNoSwaps', line)) else: outFile.write(line) if numOccur%2 == 1: stateNum += 1 if match != None: numOccur += 1 os.rename(sF2,subFile) # we check that there is a directory on the cluster called # stateFiles that contains exactly the (g)ce-state files # that are in the directory supplied from cmdline. # This double-storage is set up to make the user think about # what they are doing. try: sftp.chdir('../stateFiles') stFs = sorted(sftp.listdir()) if stateFileList != stFs: sys.exit('ERROR: State files in targeted directory on '+ 'cluster are different than on your machine.') sftp.chdir('../'+seedDirName) except: sys.exit('ERROR: Must have state files in directory called '+ 'stateFiles in the parent directory of your jobs.') # ----------------------------------------------------------------- # now go through and replace the worm constants. sF2 = subFile[:-4]+'_temp.pbs' numOccur = 0 stateNum = 0 os.chdir(workingDir) with open(subFile) as inFile, open(sF2, 'w') as outFile: for n, line in enumerate(inFile): match ='-C ',line) if match != None: outFile.write( re.sub(r'-C\s\d*\.?\d*(?:e[-+]\d+)?', wormConstList[subNum], line)) else: outFile.write(line) if numOccur%2 == 1: stateNum += 1 if match != None: numOccur += 1 os.rename(sF2,subFile) # ----------------------------------------------------------------- # now go through and replace (or add a new) COM update radii. sF2 = subFile[:-4]+'_temp.pbs' numOccur = 0 stateNum = 0 os.chdir(workingDir) with open(subFile) as inFile, open(sF2, 'w') as outFile: for n, line in enumerate(inFile): match ='pimc -T ',line) match2 ='pimcNoSwaps -T ',line) echoMatch ='echo "',line) matchD ='-D ',line) if echoMatch: outFile.write(line) else: if match != None: if matchD != None: outFile.write( re.sub(r'-D\s\d*\.?\d*(?:e[-+]\d+)?', comUpdateList[subNum], line)) else: outFile.write( re.sub(r'pimc ', r'pimc '+comUpdateList[subNum]+' ', line)) elif match2 != None: if matchD != None: outFile.write( re.sub(r'-D\s\d*\.?\d*(?:e[-+]\d+)?', comUpdateList[subNum], line)) else: outFile.write( re.sub(r'pimcNoSwaps ', r'pimcNoSwaps '+comUpdateList[subNum]+' ', line)) else: outFile.write(line) #try: # outFile.write(line[:-1]+' '+comUpdateList[stateNum]) #except: # outFile.write(line) if numOccur%2 == 1: stateNum += 1 if match != None: numOccur += 1 os.rename(sF2,subFile) # ----------------------------------------------------------------- # now go through and replace the -E #### and -p ### part of the # submitscript with the specified lines. sF2 = subFile[:-4]+'_temp.pbs' numOccur = 0 stateNum = 0 clusterCWDpre = '${PBS_O_WORKDIR}/../stateFiles/' os.chdir(workingDir) with open(subFile) as inFile, open(sF2, 'w') as outFile: for n, line in enumerate(inFile): match ='-E\s\d+',line) if match != None: outFile.write( re.sub(r'-E\s\d+','-E '+str(equilNum)+ ' -s '+clusterCWDpre+stateFileList[subNum], line)) else: outFile.write(line) if numOccur%2 == 1: stateNum += 1 if match != None: numOccur += 1 os.rename(sF2,subFile) # ----------------------------------------------------------------- # copy submit file over to bluemoon sftp.put('./'+subFile, './'+subFile) # optionally submit jobs if args.submitJobs: # build submit command submitStuff = 'qsub '+subFile changeDir = 'cd '+args.targetDir+'/'+seedDirName+' ; ' subComm = changeDir+expLibs+expPBSstuff+submitStuff # submit the command stdin, stdout, stderr = ssh.exec_command(subComm) print 'Output: ',stdout.readlines() if stderr.readlines() != []: print 'Error: ',stderr.readlines() time.sleep(5) sftp.chdir('..') sftp.close() ssh.close()
def main(): # unique tag for the name of the job to be presented in the scheduler. uTag = 'S05-T0.50' # set number of equilibration steps equilNum = 0 # set number of bins to try binNum = 50000 # ------------------------------------------------------------------------- # commands to add directories to path where blitz, boost, pimc sit. # not sure why, but this must be done every time for paramiko. expLibs = 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/users/m/t/mtgraves/local/lib ; ' expPBSstuff = 'export PATH=/opt/pbs/bin/:$PATH ; ' # ------------------------------------------------------------------------- # parse cmd line args = cT.parseCMD() # get username and password for VACC passwd = cT.Credentials(args.UserName) # create ssh and sftp instances ssh = paramiko.SSHClient() ssh.load_host_keys(os.path.expanduser( os.path.join("~", ".ssh", "known_hosts"))) ssh.connect('', username=args.UserName, password=passwd) sftp = ssh.open_sftp() # Move to desired directory on cluster. sftp.chdir(args.targetDir) # array of seed numbers, between -L and -H flag from cmd line. seedNums = np.arange(args.lowSeed,args.highSeed+1) # run through all seeds. for seedNum in seedNums: restartStr = '' # move to seedXXX direc. seedDirName = cT.returnSeedDirName(int(seedNum)) sftp.chdir('./'+seedDirName) # if a resubmit file already exists, delete it. if 'resubmit-pimc.pbs' in sftp.listdir(): sftp.remove('resubmit-pimc.pbs') # grab log file string from OUTPUT sftp.chdir('./OUTPUT') # get names of log files logFileNames = [] for f in sftp.listdir(): if '-log-' in f: logFileNames.append(f) # Naturally sort the log files (account for +/- floats in name). # NOTE: This works for gce- files with all the same string # but varying chemical potential. Any other use must be # thought about first as this hasn't been tested for other uses!!! logFileNames = natsort.natsorted(logFileNames) # build array of restart strings from naturally sorted logfiles restartStrings = [] oneLessLine = False for logFileName in logFileNames: with as inFile: for n,line in enumerate(inFile): # check for whether the restart string in gce-log has been # moved up one line. We want to write this to the resubmit # script. lineUp = False if n == 1: if len(line) < 10: lineUp = True continue else: restartStrings.append(line[2:-1]) if n == 1: # check if log file 2nd line is blank (some are, some aren't... # this is caused by bug in converting trestles (xsede) scripts # over to bluemoon scripts) if line == '': oneLessLine = True continue else: restartStrings.append(line[2:-1]) if n == 2: if not lineUp: restartStrings.append(line[2:-1]) if not oneLessLine: restartStrings.append(line[2:-1]) for nr, restartStr in enumerate(restartStrings): restartStrings[nr]+=' >> ${PBS_O_WORKDIR}/out/pimc-0.out 2>&1' restartStrings[nr] = re.sub(r'-E\s\d+',r'-E '+str(equilNum),restartStrings[nr]) restartStrings[nr] = re.sub(r'-S\s\d+',r'-S '+str(binNum),restartStrings[nr]) restartStrings[nr] = re.sub(r'-W\s\d+',r'-W '+str(29),restartStrings[nr]) sftp.chdir('..') # check if no restart string exists, in this case just skip the seed # (hackyyyy -- needs to start it over in an intelligent manner.) if restartStrings == []: print 'seed number ',str(seedNum),' is empty' sftp.chdir('..') continue else: numStr = 0 reSubName = 'resubmit-pimc.pbs' # determine name of the original submit script. for f in sftp.listdir(): if 'submit-pimc' in f: subFileName = f # determine if original submit script was from trestles or bluemoon tresTOblue = False with as inFile: for n, line in enumerate(inFile): if n == 6: if 'uvm104' in line: tresTOblue = True # write resubmit script for the case of original submit script being # from trestles. if tresTOblue: with,'w') as outFile: # write beginning statements set up with the appropriate torque # parameters. Also, submit to the tmp directory for faster i/o. outFile.write('#!/bin/bash\n\ \n#PBS -S /bin/bash\ \n#PBS -l pmem=1gb,pvmem=1gb\ \n#PBS -l nodes=1:ppn=1\ \n#PBS -l walltime=30:00:00\ \n#PBS -N pimc-%s\ \n#PBS -V\ \n#PBS -j oe\ \n#PBS -o out/pimc-${PBS_JOBID}\ \n#PBS -m n\n\ \nmkdir /tmp/${PBS_JOBID}\ \ncd /tmp/${PBS_JOBID}\ \nmkdir OUTPUT\ \ngzip ${PBS_O_WORKDIR}/OUTPUT/*\ \ncp -r ${PBS_O_WORKDIR}/OUTPUT/* ./OUTPUT/\ \ngunzip OUTPUT/*\ \necho "Starting PBS script submit-pimc.pbs at:`date`" \ \necho " host: ${PBS_O_HOST}"\ \necho " node: `cat ${PBS_NODEFILE}`"\ \necho " jobid: ${PBS_JOBID}"\n\n' % uTag) # write out the submit string appropriate to the job. with as inFile: for n, line in enumerate(inFile): if line[:4] == 'pimc': outFile.write(restartStrings[numStr]) numStr += 1 else: continue # write closing statements to bring data back from the node. outFile.write('\n\ngzip OUTPUT/*\ \ncp OUTPUT/* ${PBS_O_WORKDIR}/OUTPUT\ \ncd ${PBS_O_WORKDIR}\ \ngunzip ${PBS_O_WORKDIR}/OUTPUT\ \nrm -r /tmp/${PBS_JOBID}') # write resubmit script for the case of original submit script being # from bluemoon. else: with as inFile,,'w') as outFile: for n, line in enumerate(inFile): if line[:4] == 'pimc': outFile.write(restartStrings[numStr]) numStr += 1 elif r'#PBS -N' in line: outFile.write(line[:-2]+uTag+'\n') elif r'mkdir OUTPUT' in line: outFile.write(line) outFile.write('gzip ${PBS_O_WORKDIR}/OUTPUT/*\n') outFile.write('cp -r ${PBS_O_WORKDIR}/OUTPUT/* ./OUTPUT/\n') outFile.write('gunzip OUTPUT/*\n') else: outFile.write(line) print restartStrings[0],'\n' # my dumb work-around for allowing writing on cluster to finish. time.sleep(5) # ----------------------------------------------------------------- # optionally submit jobs if args.submitJobs: #if restartStrings == []: # continue #else: # build submit command submitStuff = 'qsub '+reSubName changeDir = 'cd '+args.targetDir+'/'+seedDirName+' ; ' subComm = changeDir+expLibs+expPBSstuff+submitStuff # submit the command stdin, stdout, stderr = ssh.exec_command(subComm) print 'Output: ',stdout.readlines() if stderr.readlines() != []: print 'Error: ',stderr.readlines() sftp.chdir('..')