Example #1
0
def checkErr(stde, rsrc, tpr, persDir):
    """Check whether an error condition is recoverable. 

       Returns True if there is an issue, False if the error is recoverable"""
    if not os.path.exists(stde):
        # we assume it's a worker error
        return False
    inf=open(stde, 'r')
    fatalErr=False
    OK=True
    for line in inf:
        if re.match(r'.*Fatal error.*', line):
            fatalErr=True
            log.debug("Found fatal error")
            OK=False
        if re.match(r'.*PLUMED ERROR.*', line):
            fatalErr=True
            log.debug("Found a PLUMED error.")
            OK=False
        if fatalErr:
            if re.match(r'.*domain decomposition.*', line):
                # the number of cores is wrong
                log.debug("Found domain decomp error")
                confFile=os.path.join(persDir, 'conf.gro')
                extractConf(tpr, confFile)
                tune.tune(rsrc, confFile, tpr, persDir, rsrc.max.get('cores')-1)
                OK=True
                break
    inf.close()
    return not OK
Example #2
0
def checkErr(stde, rsrc, tpr, persDir):
    """Check whether an error condition is recoverable. 

       Returns True if there is an issue, False if the error is recoverable"""
    if not os.path.exists(stde):
        # we assume it's a worker error
        return False
    inf=open(stde, 'r')
    fatalErr=False
    OK=True
    for line in inf:
        if re.match(r'.*Fatal error.*', line):
            fatalErr=True
            log.debug("Found fatal error")
            OK=False
        if re.match(r'.*PLUMED ERROR.*', line):
            fatalErr=True
            log.debug("Found a PLUMED error.")
            OK=False
        if fatalErr:
            if re.match(r'.*domain decomposition.*', line):
                # the number of cores is wrong
                log.debug("Found domain decomp error")
                confFile=os.path.join(persDir, 'conf.gro')
                extractConf(tpr, confFile)
                tune.tune(rsrc, confFile, tpr, persDir, rsrc.max.get('cores')-1)
                OK=True
                break
    inf.close()
    return not OK
Example #3
0
def replica(inp):
    if inp.testing(): 
        # if there are no inputs, we're testing wheter the command can run
#        cpc.util.plugin.testCommand("trjcat -version")
#        cpc.util.plugin.testCommand("eneconv -version")
#        cpc.util.plugin.testCommand("gmxdump -version")
        return 
    persDir=inp.getPersistentDir()
    outDir=inp.getOutputDir()
    fo=inp.getFunctionOutput()
    rsrc=Resources(inp.getInputValue("resources"))
    rsrcFilename=os.path.join(persDir, 'rsrc.dat')
    # check whether we need to reinit
    pers=cpc.dataflow.Persistence(os.path.join(inp.getPersistentDir(),
                                               "persistent.dat"))
    init=False
    lasttpr=pers.get('lasttpr')
    newtpr=inp.getInput('tpr')
    


    #if inp.getInputValue('tpr').isUpdated():
    if newtpr!= lasttpr: 
        lasttpr=newtpr
        # there was no previous command.
        # purge the persistent directory, by moving the confout files to a
        # backup directory
        log.debug("(Re)initializing mdrun")
        confout=glob.glob(os.path.join(persDir, "run_???"))
        if len(confout)>0:
            backupDir=os.path.join(persDir, "backup")
            try:
                os.mkdir(backupDir)    
            except:
                pass
            for conf in confout:
                try:
                    os.rename(conf, os.path.join(backupDir, 
                                                 os.path.split(conf)[-1]))
                except:
                    pass
        init=True
        pers.set('lasttpr', lasttpr)
    elif inp.cmd is None:
        return fo
    if init:
        if rsrc.max.get('cores') is None:
            confFile=os.path.join(persDir, 'conf.gro')
            extractConf(newtpr, confFile)
            tune.tune(rsrc, confFile, newtpr, persDir)
        if inp.cmd is not None:
            log.debug("Canceling commands")
            fo.cancelPrevCommands()
        pers.set('initialized', True)
    else:
        if rsrc.max.get('cores') is None:
            rsrc.load(rsrcFilename)
    if inp.cmd is not None:
        log.debug("Return code was %s"%str(inp.cmd.getReturncode()))
    # try to find out whether the run has already finished
    confout=glob.glob(os.path.join(persDir, "run_???", "confout.part*.gro"))
    if len(confout) > 0:
        log.debug("Extracting data. ")
        # confout exists. we're finished. Concatenate all the runs if
        # we need to, but first create the output dict
        extractData(confout, outDir, persDir, fo)
        return fo
    else:
        tfc=TrajFileCollection(persDir)
        # first check whether we got an error code back
        if (inp.cmd is not None) and inp.cmd.getReturncode()!=0:
            # there was a problem. Check the log
            stde=os.path.join(tfc.getLastDir(), "stderr")
            if checkErr(stde, rsrc, newtpr, persDir):
                if os.path.exists(stde):
                    stdef=open(stde, 'r')
                    errmsg=unicode(stdef.read(), errors='ignore')
                    stdef.close()
                    raise MdrunError("Error running mdrun: %s"%errmsg)
        else:
            # now check whether any of the last 4 iterations produced 
            # trajectories
            trajlist=tfc.getTrajList()
            if len(trajlist) > 4:
                ret=False
                for j in range(4):
                    haveTraj=(len(trajlist[-j-1]) > 0)
                    ret=ret or haveTraj  #prevtraj[-j-1]
                if not ret:
                    stde=os.path.join(tfc.getLastDir(), "stderr")
                    if os.path.exists(stde):
                        stdef=open(stde, 'r')
                        errmsg=unicode(stdef.read(), errors='ignore')
                        stdef.close()
                    else:
                        errmsg=""
                    raise MdrunError("Error running mdrun. No trajectories: %s"%
                                     errmsg)
        # Make a new directory with the continuation of this run
        #newdirname=currundir #"run_%03d"%(i+1)
        newdirname=tfc.getNewRunDir()
        try:
            os.mkdir(newdirname)
        except OSError:
            pass
        tpr=newtpr 
        src=os.path.join(inp.getBaseDir(), tpr)
        dst=os.path.join(newdirname,"topol.tpr")
        shutil.copy(src,dst)
        
        # handle command line inputs
        if inp.getInput('cmdline_options') is not None:
            cmdlineOpts=shlex.split(inp.getInput('cmdline_options'))
        else:
            cmdlineOpts=[]
        if inp.getInput('priority') is not None:
            prio=inp.getInput('priority')
        else:
            prio=0
        lastcpt=tfc.getLastCpt()
        # copy the checkpoint to the new cmd dir
        if lastcpt is not None:
            shutil.copy(lastcpt, os.path.join(newdirname,"state.cpt"))
        # now add to the priority if this run has already been started
        completed=tfc.getFractionCompleted(tpr)
        if completed > 0:
            # now the priority ranges from 1 to 4, depending on how
            # far along the simulation is.
            prio += 1+int(3*(completed))
            log.debug("Setting new priority to %d because it's in progress"%
                      prio)
        # we can always add state.cpt, even if it doesn't exist.
        # include the plumed file here
        args=["-quiet", "-s", "topol.tpr", "-noappend", "-cpi", "state.cpt",
               "-rcon", "0.7", "-plumed", "plumed.dat" ]
        args.extend(cmdlineOpts)
        # for the new neighbor search scheme in Gromacs 4.6, set this env 
        # variable
        if lastcpt is not None:
            shutil.copy(lastcpt, os.path.join(newdirname,"state.cpt"))
        # any expected output files.
        newFileNr=tfc.getLastTrajNr()+1
        outputFiles=[ "traj.part%04d.xtc"%newFileNr, 
                      "traj.part%04d.trr"%newFileNr, 
                      "confout.part%04d.gro"%newFileNr, 
                      "ener.part%04d.edr"%newFileNr, 
                      "dhdl.part%04d.xvg"%newFileNr, 
                      "pullx.part%04d.xvg"%newFileNr, 
                      "pullf.part%04d.xvg"%newFileNr,
                      "COLVAR",
                      "HILLS",
                      "bias.dat",
                      "state.cpt", "state_prev.cpt" ]
        log.debug("Expected output files: %s"%outputFiles)
        cmd=cpc.command.Command(newdirname, "replica/mdrun",args,
                                minVersion=cpc.command.Version("4.5"),
                                addPriority=prio,
                                outputFiles=outputFiles)
        if inp.hasInput("resources") and inp.getInput("resources") is not None:
            #log.debug("resources is %s"%(inp.getInput("resources")))
            #rsrc=Resources(inp.getInputValue("resources"))
            rsrc.updateCmd(cmd)
        log.debug("Adding command")
         # copy the plumed file to the run dir
        plumed_inp=inp.getInput("plumed")
        log.debug("Adding the PLUMED file: %s"%plumed_inp)
        src=os.path.join(inp.getBaseDir(),plumed_inp)
        dst=os.path.join(newdirname,"plumed.dat")
        # check if we need to restart metadynamics
        if tfc.lastDir is not None:
          lasthills=os.path.join(tfc.lastDir,"HILLS")
          if os.path.isfile(lasthills):
            plumed_dat=open(plumed_inp,'r').read()
            log.debug("Adding a RESTART statement to the PLUMED file.")
            newplumed=re.sub(r"HILLS","HILLS RESTART",plumed_dat)
            open(dst,"w").write(newplumed)
            newhills=os.path.join(newdirname,"HILLS")
            shutil.copy(lasthills,newhills)
          else: shutil.copy(src,dst)
        else: shutil.copy(src,dst)

        fo.addCommand(cmd)
        if inp.getInputValue('tpr').isUpdated() and inp.cmd is not None:
            log.debug("Canceling commands")
            fo.cancelPrevCommands()
    # and save for further invocations
    rsrc.save(rsrcFilename)
    pers.write()
    return fo
Example #4
0
def mdrun(inp):
    if inp.testing(): 
        # if there are no inputs, we're testing wheter the command can run
        cpc.util.plugin.testCommand("trjcat -version")
        cpc.util.plugin.testCommand("eneconv -version")
        cpc.util.plugin.testCommand("gmxdump -version")
        return 
    persDir=inp.getPersistentDir()
    outDir=inp.getOutputDir()
    fo=inp.getFunctionOutput()
    rsrc=Resources(inp.getInputValue("resources"))
    rsrcFilename=os.path.join(persDir, 'rsrc.dat')
    # check whether we need to reinit
    pers=cpc.dataflow.Persistence(os.path.join(inp.getPersistentDir(),
                                               "persistent.dat"))
    init=False
    lasttpr=pers.get('lasttpr')
    newtpr=inp.getInput('tpr')
    


    #if inp.getInputValue('tpr').isUpdated():
    if newtpr!= lasttpr: 
        lasttpr=newtpr
        # there was no previous command.
        # purge the persistent directory, by moving the confout files to a
        # backup directory
        log.debug("(Re)initializing mdrun")
        confout=glob.glob(os.path.join(persDir, "run_???"))
        if len(confout)>0:
            backupDir=os.path.join(persDir, "backup")
            try:
                os.mkdir(backupDir)    
            except:
                pass
            for conf in confout:
                try:
                    os.rename(conf, os.path.join(backupDir, 
                                                 os.path.split(conf)[-1]))
                except:
                    pass
        init=True
        pers.set('lasttpr', lasttpr)
    elif inp.cmd is None:
        return fo
    if init:
        if rsrc.max.get('cores') is None:
            confFile=os.path.join(persDir, 'conf.gro')
            extractConf(newtpr, confFile)
            tune.tune(rsrc, confFile, newtpr, persDir)
        if inp.cmd is not None:
            log.debug("Canceling commands")
            fo.cancelPrevCommands()
        pers.set('initialized', True)
    else:
        if rsrc.max.get('cores') is None:
            rsrc.load(rsrcFilename)
    if inp.cmd is not None:
        log.debug("Return code was %s"%str(inp.cmd.getReturncode()))
    # try to find out whether the run has already finished
    confout=glob.glob(os.path.join(persDir, "run_???", "confout.part*.gro"))
    if len(confout) > 0:
        log.debug("Extracting data. ")
        # confout exists. we're finished. Concatenate all the runs if
        # we need to, but first create the output dict
        extractData(confout, outDir, persDir, fo)
        return fo
    else:
        tfc=TrajFileCollection(persDir)
        # first check whether we got an error code back
        if (inp.cmd is not None) and inp.cmd.getReturncode()!=0:
            # there was a problem. Check the log
            stde=os.path.join(tfc.getLastDir(), "stderr")
            if checkErr(stde, rsrc, newtpr, persDir):
                if os.path.exists(stde):
                    stdef=open(stde, 'r')
                    errmsg=unicode(stdef.read(), errors='ignore')
                    stdef.close()
                    raise MdrunError("Error running mdrun: %s"%errmsg)
        else:
            # now check whether any of the last 4 iterations produced 
            # trajectories
            trajlist=tfc.getTrajList()
            if len(trajlist) > 4:
                ret=False
                for j in range(4):
                    haveTraj=(len(trajlist[-j-1]) > 0)
                    ret=ret or haveTraj  #prevtraj[-j-1]
                if not ret:
                    stde=os.path.join(tfc.getLastDir(), "stderr")
                    if os.path.exists(stde):
                        stdef=open(stde, 'r')
                        errmsg=unicode(stdef.read(), errors='ignore')
                        stdef.close()
                    else:
                        errmsg=""
                    raise MdrunError("Error running mdrun. No trajectories: %s"%
                                     errmsg)
        # Make a new directory with the continuation of this run
        #newdirname=currundir #"run_%03d"%(i+1)
        newdirname=tfc.getNewRunDir()
        try:
            os.mkdir(newdirname)
        except OSError:
            pass
        tpr=newtpr 
        src=os.path.join(inp.getBaseDir(), tpr)
        dst=os.path.join(newdirname,"topol.tpr")
        shutil.copy(src,dst)
        
        # handle command line inputs
        if inp.getInput('cmdline_options') is not None:
            cmdlineOpts=shlex.split(inp.getInput('cmdline_options'))
        else:
            cmdlineOpts=[]
        if inp.getInput('priority') is not None:
            prio=inp.getInput('priority')
        else:
            prio=0
        lastcpt=tfc.getLastCpt()
        # copy the checkpoint to the new cmd dir
        if lastcpt is not None:
            shutil.copy(lastcpt, os.path.join(newdirname,"state.cpt"))
        # now add to the priority if this run has already been started
        completed=tfc.getFractionCompleted(tpr)
        if completed > 0:
            # now the priority ranges from 1 to 4, depending on how
            # far along the simulation is.
            prio += 1+int(3*(completed))
            log.debug("Setting new priority to %d because it's in progress"%
                      prio)
        # we can always add state.cpt, even if it doesn't exist.
        # include the plumed file here
        args=["-quiet", "-s", "topol.tpr", "-noappend", "-cpi", "state.cpt",
               "-rcon", "0.7", "-plumed", "plumed.dat" ]
        args.extend(cmdlineOpts)
        # for the new neighbor search scheme in Gromacs 4.6, set this env 
        # variable
        if lastcpt is not None:
            shutil.copy(lastcpt, os.path.join(newdirname,"state.cpt"))
        # any expected output files.
        newFileNr=tfc.getLastTrajNr()+1
        outputFiles=[ "traj.part%04d.xtc"%newFileNr, 
                      "traj.part%04d.trr"%newFileNr, 
                      "confout.part%04d.gro"%newFileNr, 
                      "ener.part%04d.edr"%newFileNr, 
                      "dhdl.part%04d.xvg"%newFileNr, 
                      "pullx.part%04d.xvg"%newFileNr, 
                      "pullf.part%04d.xvg"%newFileNr,
                      "COLVAR",
                      "HILLS",
                      "bias.dat",
                      "state.cpt", "state_prev.cpt" ]
        log.debug("Expected output files: %s"%outputFiles)
        cmd=cpc.command.Command(newdirname, "plumed/mdrun",args,
                                minVersion=cpc.command.Version("4.5"),
                                addPriority=prio,
                                outputFiles=outputFiles)
        if inp.hasInput("resources") and inp.getInput("resources") is not None:
            #log.debug("resources is %s"%(inp.getInput("resources")))
            #rsrc=Resources(inp.getInputValue("resources"))
            rsrc.updateCmd(cmd)
        log.debug("Adding command")
         # copy the plumed file to the run dir
        plumed_inp=inp.getInput("plumed")
        log.debug("Adding the PLUMED file: %s"%plumed_inp)
        src=os.path.join(inp.getBaseDir(),plumed_inp)
        dst=os.path.join(newdirname,"plumed.dat")
        # check if we need to restart metadynamics
        if tfc.lastDir is not None:
          lasthills=os.path.join(tfc.lastDir,"HILLS")
          if os.path.isfile(lasthills):
            plumed_dat=open(plumed_inp,'r').read()
            log.debug("Adding a RESTART statement to the PLUMED file.")
            newplumed=re.sub(r"HILLS","HILLS RESTART",plumed_dat)
            open(dst,"w").write(newplumed)
            newhills=os.path.join(newdirname,"HILLS")
            shutil.copy(lasthills,newhills)
          else: shutil.copy(src,dst)
        else: shutil.copy(src,dst)

        fo.addCommand(cmd)
        if inp.getInputValue('tpr').isUpdated() and inp.cmd is not None:
            log.debug("Canceling commands")
            fo.cancelPrevCommands()
    # and save for further invocations
    rsrc.save(rsrcFilename)
    pers.write()
    return fo