def checkErr(stde, rsrc, tpr, persDir): """Check whether an error condition is recoverable. Returns True if there is an issue, False if the error is recoverable""" if not os.path.exists(stde): # we assume it's a worker error return False inf=open(stde, 'r') fatalErr=False OK=True for line in inf: if re.match(r'.*Fatal error.*', line): fatalErr=True log.debug("Found fatal error") OK=False if re.match(r'.*PLUMED ERROR.*', line): fatalErr=True log.debug("Found a PLUMED error.") OK=False if fatalErr: if re.match(r'.*domain decomposition.*', line): # the number of cores is wrong log.debug("Found domain decomp error") confFile=os.path.join(persDir, 'conf.gro') extractConf(tpr, confFile) tune.tune(rsrc, confFile, tpr, persDir, rsrc.max.get('cores')-1) OK=True break inf.close() return not OK
def replica(inp): if inp.testing(): # if there are no inputs, we're testing wheter the command can run # cpc.util.plugin.testCommand("trjcat -version") # cpc.util.plugin.testCommand("eneconv -version") # cpc.util.plugin.testCommand("gmxdump -version") return persDir=inp.getPersistentDir() outDir=inp.getOutputDir() fo=inp.getFunctionOutput() rsrc=Resources(inp.getInputValue("resources")) rsrcFilename=os.path.join(persDir, 'rsrc.dat') # check whether we need to reinit pers=cpc.dataflow.Persistence(os.path.join(inp.getPersistentDir(), "persistent.dat")) init=False lasttpr=pers.get('lasttpr') newtpr=inp.getInput('tpr') #if inp.getInputValue('tpr').isUpdated(): if newtpr!= lasttpr: lasttpr=newtpr # there was no previous command. # purge the persistent directory, by moving the confout files to a # backup directory log.debug("(Re)initializing mdrun") confout=glob.glob(os.path.join(persDir, "run_???")) if len(confout)>0: backupDir=os.path.join(persDir, "backup") try: os.mkdir(backupDir) except: pass for conf in confout: try: os.rename(conf, os.path.join(backupDir, os.path.split(conf)[-1])) except: pass init=True pers.set('lasttpr', lasttpr) elif inp.cmd is None: return fo if init: if rsrc.max.get('cores') is None: confFile=os.path.join(persDir, 'conf.gro') extractConf(newtpr, confFile) tune.tune(rsrc, confFile, newtpr, persDir) if inp.cmd is not None: log.debug("Canceling commands") fo.cancelPrevCommands() pers.set('initialized', True) else: if rsrc.max.get('cores') is None: rsrc.load(rsrcFilename) if inp.cmd is not None: log.debug("Return code was %s"%str(inp.cmd.getReturncode())) # try to find out whether the run has already finished confout=glob.glob(os.path.join(persDir, "run_???", "confout.part*.gro")) if len(confout) > 0: log.debug("Extracting data. ") # confout exists. we're finished. Concatenate all the runs if # we need to, but first create the output dict extractData(confout, outDir, persDir, fo) return fo else: tfc=TrajFileCollection(persDir) # first check whether we got an error code back if (inp.cmd is not None) and inp.cmd.getReturncode()!=0: # there was a problem. Check the log stde=os.path.join(tfc.getLastDir(), "stderr") if checkErr(stde, rsrc, newtpr, persDir): if os.path.exists(stde): stdef=open(stde, 'r') errmsg=unicode(stdef.read(), errors='ignore') stdef.close() raise MdrunError("Error running mdrun: %s"%errmsg) else: # now check whether any of the last 4 iterations produced # trajectories trajlist=tfc.getTrajList() if len(trajlist) > 4: ret=False for j in range(4): haveTraj=(len(trajlist[-j-1]) > 0) ret=ret or haveTraj #prevtraj[-j-1] if not ret: stde=os.path.join(tfc.getLastDir(), "stderr") if os.path.exists(stde): stdef=open(stde, 'r') errmsg=unicode(stdef.read(), errors='ignore') stdef.close() else: errmsg="" raise MdrunError("Error running mdrun. No trajectories: %s"% errmsg) # Make a new directory with the continuation of this run #newdirname=currundir #"run_%03d"%(i+1) newdirname=tfc.getNewRunDir() try: os.mkdir(newdirname) except OSError: pass tpr=newtpr src=os.path.join(inp.getBaseDir(), tpr) dst=os.path.join(newdirname,"topol.tpr") shutil.copy(src,dst) # handle command line inputs if inp.getInput('cmdline_options') is not None: cmdlineOpts=shlex.split(inp.getInput('cmdline_options')) else: cmdlineOpts=[] if inp.getInput('priority') is not None: prio=inp.getInput('priority') else: prio=0 lastcpt=tfc.getLastCpt() # copy the checkpoint to the new cmd dir if lastcpt is not None: shutil.copy(lastcpt, os.path.join(newdirname,"state.cpt")) # now add to the priority if this run has already been started completed=tfc.getFractionCompleted(tpr) if completed > 0: # now the priority ranges from 1 to 4, depending on how # far along the simulation is. prio += 1+int(3*(completed)) log.debug("Setting new priority to %d because it's in progress"% prio) # we can always add state.cpt, even if it doesn't exist. # include the plumed file here args=["-quiet", "-s", "topol.tpr", "-noappend", "-cpi", "state.cpt", "-rcon", "0.7", "-plumed", "plumed.dat" ] args.extend(cmdlineOpts) # for the new neighbor search scheme in Gromacs 4.6, set this env # variable if lastcpt is not None: shutil.copy(lastcpt, os.path.join(newdirname,"state.cpt")) # any expected output files. newFileNr=tfc.getLastTrajNr()+1 outputFiles=[ "traj.part%04d.xtc"%newFileNr, "traj.part%04d.trr"%newFileNr, "confout.part%04d.gro"%newFileNr, "ener.part%04d.edr"%newFileNr, "dhdl.part%04d.xvg"%newFileNr, "pullx.part%04d.xvg"%newFileNr, "pullf.part%04d.xvg"%newFileNr, "COLVAR", "HILLS", "bias.dat", "state.cpt", "state_prev.cpt" ] log.debug("Expected output files: %s"%outputFiles) cmd=cpc.command.Command(newdirname, "replica/mdrun",args, minVersion=cpc.command.Version("4.5"), addPriority=prio, outputFiles=outputFiles) if inp.hasInput("resources") and inp.getInput("resources") is not None: #log.debug("resources is %s"%(inp.getInput("resources"))) #rsrc=Resources(inp.getInputValue("resources")) rsrc.updateCmd(cmd) log.debug("Adding command") # copy the plumed file to the run dir plumed_inp=inp.getInput("plumed") log.debug("Adding the PLUMED file: %s"%plumed_inp) src=os.path.join(inp.getBaseDir(),plumed_inp) dst=os.path.join(newdirname,"plumed.dat") # check if we need to restart metadynamics if tfc.lastDir is not None: lasthills=os.path.join(tfc.lastDir,"HILLS") if os.path.isfile(lasthills): plumed_dat=open(plumed_inp,'r').read() log.debug("Adding a RESTART statement to the PLUMED file.") newplumed=re.sub(r"HILLS","HILLS RESTART",plumed_dat) open(dst,"w").write(newplumed) newhills=os.path.join(newdirname,"HILLS") shutil.copy(lasthills,newhills) else: shutil.copy(src,dst) else: shutil.copy(src,dst) fo.addCommand(cmd) if inp.getInputValue('tpr').isUpdated() and inp.cmd is not None: log.debug("Canceling commands") fo.cancelPrevCommands() # and save for further invocations rsrc.save(rsrcFilename) pers.write() return fo
def mdrun(inp): if inp.testing(): # if there are no inputs, we're testing wheter the command can run cpc.util.plugin.testCommand("trjcat -version") cpc.util.plugin.testCommand("eneconv -version") cpc.util.plugin.testCommand("gmxdump -version") return persDir=inp.getPersistentDir() outDir=inp.getOutputDir() fo=inp.getFunctionOutput() rsrc=Resources(inp.getInputValue("resources")) rsrcFilename=os.path.join(persDir, 'rsrc.dat') # check whether we need to reinit pers=cpc.dataflow.Persistence(os.path.join(inp.getPersistentDir(), "persistent.dat")) init=False lasttpr=pers.get('lasttpr') newtpr=inp.getInput('tpr') #if inp.getInputValue('tpr').isUpdated(): if newtpr!= lasttpr: lasttpr=newtpr # there was no previous command. # purge the persistent directory, by moving the confout files to a # backup directory log.debug("(Re)initializing mdrun") confout=glob.glob(os.path.join(persDir, "run_???")) if len(confout)>0: backupDir=os.path.join(persDir, "backup") try: os.mkdir(backupDir) except: pass for conf in confout: try: os.rename(conf, os.path.join(backupDir, os.path.split(conf)[-1])) except: pass init=True pers.set('lasttpr', lasttpr) elif inp.cmd is None: return fo if init: if rsrc.max.get('cores') is None: confFile=os.path.join(persDir, 'conf.gro') extractConf(newtpr, confFile) tune.tune(rsrc, confFile, newtpr, persDir) if inp.cmd is not None: log.debug("Canceling commands") fo.cancelPrevCommands() pers.set('initialized', True) else: if rsrc.max.get('cores') is None: rsrc.load(rsrcFilename) if inp.cmd is not None: log.debug("Return code was %s"%str(inp.cmd.getReturncode())) # try to find out whether the run has already finished confout=glob.glob(os.path.join(persDir, "run_???", "confout.part*.gro")) if len(confout) > 0: log.debug("Extracting data. ") # confout exists. we're finished. Concatenate all the runs if # we need to, but first create the output dict extractData(confout, outDir, persDir, fo) return fo else: tfc=TrajFileCollection(persDir) # first check whether we got an error code back if (inp.cmd is not None) and inp.cmd.getReturncode()!=0: # there was a problem. Check the log stde=os.path.join(tfc.getLastDir(), "stderr") if checkErr(stde, rsrc, newtpr, persDir): if os.path.exists(stde): stdef=open(stde, 'r') errmsg=unicode(stdef.read(), errors='ignore') stdef.close() raise MdrunError("Error running mdrun: %s"%errmsg) else: # now check whether any of the last 4 iterations produced # trajectories trajlist=tfc.getTrajList() if len(trajlist) > 4: ret=False for j in range(4): haveTraj=(len(trajlist[-j-1]) > 0) ret=ret or haveTraj #prevtraj[-j-1] if not ret: stde=os.path.join(tfc.getLastDir(), "stderr") if os.path.exists(stde): stdef=open(stde, 'r') errmsg=unicode(stdef.read(), errors='ignore') stdef.close() else: errmsg="" raise MdrunError("Error running mdrun. No trajectories: %s"% errmsg) # Make a new directory with the continuation of this run #newdirname=currundir #"run_%03d"%(i+1) newdirname=tfc.getNewRunDir() try: os.mkdir(newdirname) except OSError: pass tpr=newtpr src=os.path.join(inp.getBaseDir(), tpr) dst=os.path.join(newdirname,"topol.tpr") shutil.copy(src,dst) # handle command line inputs if inp.getInput('cmdline_options') is not None: cmdlineOpts=shlex.split(inp.getInput('cmdline_options')) else: cmdlineOpts=[] if inp.getInput('priority') is not None: prio=inp.getInput('priority') else: prio=0 lastcpt=tfc.getLastCpt() # copy the checkpoint to the new cmd dir if lastcpt is not None: shutil.copy(lastcpt, os.path.join(newdirname,"state.cpt")) # now add to the priority if this run has already been started completed=tfc.getFractionCompleted(tpr) if completed > 0: # now the priority ranges from 1 to 4, depending on how # far along the simulation is. prio += 1+int(3*(completed)) log.debug("Setting new priority to %d because it's in progress"% prio) # we can always add state.cpt, even if it doesn't exist. # include the plumed file here args=["-quiet", "-s", "topol.tpr", "-noappend", "-cpi", "state.cpt", "-rcon", "0.7", "-plumed", "plumed.dat" ] args.extend(cmdlineOpts) # for the new neighbor search scheme in Gromacs 4.6, set this env # variable if lastcpt is not None: shutil.copy(lastcpt, os.path.join(newdirname,"state.cpt")) # any expected output files. newFileNr=tfc.getLastTrajNr()+1 outputFiles=[ "traj.part%04d.xtc"%newFileNr, "traj.part%04d.trr"%newFileNr, "confout.part%04d.gro"%newFileNr, "ener.part%04d.edr"%newFileNr, "dhdl.part%04d.xvg"%newFileNr, "pullx.part%04d.xvg"%newFileNr, "pullf.part%04d.xvg"%newFileNr, "COLVAR", "HILLS", "bias.dat", "state.cpt", "state_prev.cpt" ] log.debug("Expected output files: %s"%outputFiles) cmd=cpc.command.Command(newdirname, "plumed/mdrun",args, minVersion=cpc.command.Version("4.5"), addPriority=prio, outputFiles=outputFiles) if inp.hasInput("resources") and inp.getInput("resources") is not None: #log.debug("resources is %s"%(inp.getInput("resources"))) #rsrc=Resources(inp.getInputValue("resources")) rsrc.updateCmd(cmd) log.debug("Adding command") # copy the plumed file to the run dir plumed_inp=inp.getInput("plumed") log.debug("Adding the PLUMED file: %s"%plumed_inp) src=os.path.join(inp.getBaseDir(),plumed_inp) dst=os.path.join(newdirname,"plumed.dat") # check if we need to restart metadynamics if tfc.lastDir is not None: lasthills=os.path.join(tfc.lastDir,"HILLS") if os.path.isfile(lasthills): plumed_dat=open(plumed_inp,'r').read() log.debug("Adding a RESTART statement to the PLUMED file.") newplumed=re.sub(r"HILLS","HILLS RESTART",plumed_dat) open(dst,"w").write(newplumed) newhills=os.path.join(newdirname,"HILLS") shutil.copy(lasthills,newhills) else: shutil.copy(src,dst) else: shutil.copy(src,dst) fo.addCommand(cmd) if inp.getInputValue('tpr').isUpdated() and inp.cmd is not None: log.debug("Canceling commands") fo.cancelPrevCommands() # and save for further invocations rsrc.save(rsrcFilename) pers.write() return fo