def test_exitcode_error(self): c = Custodian([], [ExitCodeJob(0)]) c.run() c = Custodian([], [ExitCodeJob(1)]) self.assertRaises(RuntimeError, c.run) c = Custodian([], [ExitCodeJob(1)], terminate_on_nonzero_returncode=False) c.run()
def test_exitcode_error(self): c = Custodian([], [ExitCodeJob(0)]) c.run() c = Custodian([], [ExitCodeJob(1)]) self.assertRaises(ReturnCodeError, c.run) self.assertTrue(c.run_log[-1]["nonzero_return_code"]) c = Custodian([], [ExitCodeJob(1)], terminate_on_nonzero_returncode=False) c.run()
def test_unrecoverable(self): njobs = 100 params = {"initial": 0, "total": 0} h = ExampleHandler2(params) c = Custodian([h], [ExampleJob(i, params) for i in range(njobs)], max_errors=njobs) self.assertRaises(RuntimeError, c.run) self.assertTrue(h.has_error) h = ExampleHandler2b(params) c = Custodian([h], [ExampleJob(i, params) for i in range(njobs)], max_errors=njobs) c.run() self.assertTrue(h.has_error)
def run(self, job_cmd=None): """ run the vasp jobs through custodian if the job list is empty, run a single job with the initial input set """ for j in self.jobs: if job_cmd is not None: j.job_cmd = job_cmd else: j.job_cmd = self.job_cmd c_params = {'jobs': [j.as_dict() for j in self.jobs], 'handlers': [h.as_dict() for h in self.handlers], 'max_errors': 5} c = Custodian(self.handlers, self.jobs, max_errors=5) c.run() for j in self.jobs: self.cal_log.append({"job": j.as_dict(), 'job_id': j.job_id, "corrections": [], 'final_energy': None}) self.job_ids.append(j.job_id) if self.checkpoint_file: dumpfn(self.cal_log, self.checkpoint_file, cls=MontyEncoder, indent=4) else: dumpfn(self.cal_log, Calibrate.LOG_FILE, cls=MontyEncoder, indent=4)
def run(self, fw_spec): # class VaspJob(Job): # """ # A basic vasp job. Just runs whatever is in the directory. But conceivably # can be a complex processing of inputs etc. with initialization. # """ # # def __init__(self, vasp_cmd, output_file="vasp.out", stderr_file="std_err.txt", # suffix="", final=True, backup=True, auto_npar=True, # auto_gamma=True, settings_override=None, # gamma_vasp_cmd=None, copy_magmom=False, auto_continue=False): try: vasp_cmd = os.environ['VASP_CMD'].split() except: raise ValueError('Unable to find vasp command') if 'custodian_jobs' in fw_spec: jobs = fw_spec['custodian_jobs'] else: jobs = [ VaspJob(vasp_cmd=vasp_cmd, auto_npar=False, output_file=os.path.join(self.run_dir, 'vasp.out'), stderr_file=os.path.join(self.run_dir, 'std_err.txt'), backup=False, auto_gamma=False) ] custodian = Custodian(handlers=self.custodian_handlers, jobs=jobs, validators=None, max_errors=10, polling_time_step=10, monitor_freq=30) custodian.run()
def run_task(self, fw_spec): #workdir=fw_spec['workdir'] vasp_cmd = fw_spec['vasp_cmd'] #with cd(workdir): incar = Incar.from_file('INCAR') kpoints = Kpoints.from_file('KPOINTS') poscar = Poscar.from_file('POSCAR') potcar = Potcar.from_file('POTCAR') try: out = Outcar(work_dir + '/OUTCAR') if len(out.run_stats) != 7: raise VaspDoneError() except: try: contcar = Structure.from_file('CONTCAR') os.rename('CONTCAR', 'POSCAR') except: pass job = VaspJob(vasp_cmd) handlers=[VaspErrorHandler(),UnconvergedErrorHandler(),FrozenJobErrorHandler(),\ NonConvergingErrorHandler(nionic_steps=2, change_algo=True),MeshSymmetryErrorHandler()] c = Custodian(handlers, [job], max_errors=10) c.run() else: print 'Vasp job was already done well. No need to rerun!'
def run_task(self, fw_spec): continuation = self.get('continuation', False) # TODO: detour the firework pending the result c = Custodian([ATATWalltimeHandler()], [ATATInfDetJob(continuation=continuation)], monitor_freq=1, polling_time_step=300) cust_result = c.run() if len(cust_result[0]['corrections']) > 0: # we hit the walltime handler, detour another ID Firework os.remove('stop') from dfttk.fworks import InflectionDetectionFW from fireworks import Workflow # we have to add the calc locs for this calculation by hand # because the detour action seems to disable spec mods infdet_wf = Workflow([ InflectionDetectionFW(Structure.from_file('POSCAR'), continuation=True, spec={ 'calc_locs': extend_calc_locs( self.get('name', 'InfDet'), fw_spec) }) ]) return FWAction(detours=[infdet_wf])
def do_run(args): handlers = [VaspErrorHandler(), UnconvergedErrorHandler()] c = Custodian(handlers, get_runs(vasp_command=args.command.split(), target=args.target, mode=args.mode, max_steps=args.max_steps), max_errors=10) c.run()
def run_qchem(cls, qcinp, implicit_solvent, mixed_aux_basis, mixed_basis, input_file="mol.qcinp", output_file="mol.qcout", gzipped=True, run_name=None): mol = qcinp.jobs[0].mol num_atoms = len(mol) for qj in qcinp.jobs: if qj.params["rem"]["jobtype"] != "sp": if mixed_basis is not None: qj.set_basis_set(mixed_basis) if mixed_aux_basis is not None: qj.set_aux_basis_set(mixed_aux_basis) prev_qchem_dir = os.getcwd() qc_exe, half_cpus_cmd, openmp_cmd = cls.get_qchem_cmd(qcinp, mol) logging.basicConfig(level=logging.INFO) qchem_logger = logging.getLogger('QChemDrone') qchem_logger.setLevel(logging.INFO) sh = logging.StreamHandler(stream=sys.stdout) sh.setLevel(getattr(logging, 'INFO')) qchem_logger.addHandler(sh) scf_max_cycles = 200 geom_max_cycles = 200 alt_cmd = {"half_cpus": half_cpus_cmd, "openmp": openmp_cmd} if cls._is_openmp_only_job(qcinp): qc_exe = openmp_cmd alt_cmd["half_cpus"] = shlex.split(" ".join(half_cpus_cmd).replace("-np", "-nt")) alt_cmd.pop("openmp") elif cls._is_openmp_compatible_job(qcinp): qc_exe = openmp_cmd alt_cmd.pop("openmp") else: alt_cmd.pop("openmp") if num_atoms > 50: scf_max_cycles = 300 geom_max_cycles = 500 qcinp.write_file(input_file) if implicit_solvent is not None: solvent_data = implicit_solvent.get('solvent_data', None) if solvent_data is not None: values = ['{:.4f}'.format(solvent_data[t]) for t in ['Dielec', 'SolN', 'SolA', 'SolB', 'SolG', 'SolC', 'SolH']] solvent_text = ' '.join(values) with open('solvent_data', 'w') as f: f.write(solvent_text) qclog_file = os.path.splitext(output_file)[0] + ".qclog" total_physical_memory = cls.get_physical_memory() job = QchemJob(qc_exe, input_file=input_file, output_file=output_file, qclog_file=qclog_file, alt_cmd=alt_cmd, gzipped=gzipped, total_physical_memory=total_physical_memory, run_name=run_name) handler = QChemErrorHandler(qchem_job=job, input_file=input_file, output_file=output_file, scf_max_cycles=scf_max_cycles, geom_max_cycles=geom_max_cycles) c = Custodian(handlers=[handler], jobs=[job], max_errors=50) custodian_out = c.run() cls.clean_up(qcinp) return custodian_out, prev_qchem_dir
def test_max_errors_per_job(self): njobs = 100 params = {"initial": 0, "total": 0} h = ExampleHandler(params) c = Custodian([h], [ExampleJob(i, params) for i in range(njobs)], max_errors=njobs, max_errors_per_job=1) self.assertRaises(RuntimeError, c.run)
def test_validators(self): njobs = 100 params = {"initial": 0, "total": 0} c = Custodian([ExampleHandler(params)], [ExampleJob(i, params) for i in range(njobs)], [ExampleValidator1()], max_errors=njobs) output = c.run() self.assertEqual(len(output), njobs) njobs = 100 params = {"initial": 0, "total": 0} c = Custodian([ExampleHandler(params)], [ExampleJob(i, params) for i in range(njobs)], [ExampleValidator2()], max_errors=njobs) self.assertRaises(RuntimeError, c.run)
def test_run(self): njobs = 100 params = {"initial": 0, "total": 0} c = Custodian([ExampleHandler(params)], [ExampleJob(i, params) for i in range(njobs)], max_errors=njobs) output = c.run() self.assertEqual(len(output), njobs) d = ExampleHandler(params).as_dict()
def test_max_errors_per_handler_warning(self): njobs = 100 params = {"initial": 0, "total": 0} c = Custodian([ExampleHandler1c(params)], [ExampleJob(i, params) for i in range(njobs)], max_errors=njobs * 10, max_errors_per_job=1000) c.run() self.assertTrue(all(len(r["corrections"]) <= 2 for r in c.run_log))
def run_task(self, fw_spec): workdir = fw_spec['workdir'] vasp_cmd = fw_spec['vasp_cmd'] os.chdir(workdir) jobs = VaspJob.double_relaxation_run(vasp_cmd) handlers=[VaspErrorHandler(),UnconvergedErrorHandler(),FrozenJobErrorHandler(),\ NonConvergingErrorHandler(nionic_steps=5, change_algo=True),MeshSymmetryErrorHandler()] c = Custodian(handlers, jobs, max_errors=10) c.run()
def test_checkpoint_loading(self): njobs = 5 params = {"initial": 0, "total": 0} c = Custodian([ExampleHandler(params)], [ExampleJob(i, params) for i in range(njobs)], [ExampleValidator1()], max_errors=100, checkpoint=True) self.assertEqual(len(c.run_log), 3) self.assertEqual(len(c.run()), 5)
def test_max_errors_per_job(self): njobs = 100 params = {"initial": 0, "total": 0} h = ExampleHandler(params) c = Custodian([h], [ExampleJob(i, params) for i in range(njobs)], max_errors=njobs, max_errors_per_job=1) self.assertRaises(MaxCorrectionsPerJobError, c.run) self.assertTrue(c.run_log[-1]["max_errors_per_job"])
def run_QChem(label, encode=None, rem=None, pcm=None, solvent=None, more_info=None, self_correct=True): inname = label + '.inp' outname = label + '.out' logname = label + '.log' command = 'qchem' handlers = [QChemErrorHandler(input_file=inname, output_file=outname)] """If no encoding provided, assume this is the first Firework in workflow and that input file is already written. 'label' is the name of the file without the extension (e.g. .inp, .out). otherwise, take encoding, form new QCInput and write input file, then run. """ if encode != None: qcin = encode_to_QCInput(encode=encode, rem=rem, pcm=pcm, solvent=solvent) qcin.write_file(inname) if self_correct: jobs = [ QCJob(input_file=inname, output_file=outname, qchem_command=command, max_cores=multiprocessing.cpu_count(), qclog_file=logname) ] c = Custodian(handlers, jobs, max_errors=10) c.run() else: job = QCJob(input_file=inname, output_file=outname, qchem_command=command, max_cores=multiprocessing.cpu_count(), qclog_file=logname) job.setup() p = job.run() p.wait() """ qclog = open(logname, "w") current_command = ['qchem', '-nt', '20',inname] print(current_command) subprocess.run(current_command, stdout=qclog, shell=True) """ try: output = [QCOutput(filename=outname)] except: output = QCOutput.multiple_outputs_from_file(QCOutput, filename) return QCOutput_to_encode(output, more_info=more_info)
def test_max_errors_per_handler_raise(self): njobs = 100 params = {"initial": 0, "total": 0} h = ExampleHandler1b(params) c = Custodian([h], [ExampleJob(i, params) for i in range(njobs)], max_errors=njobs * 10, max_errors_per_job=1000) self.assertRaises(MaxCorrectionsPerHandlerError, c.run) self.assertEqual(h.n_applied_corrections, 2) self.assertEqual(len(c.run_log[-1]["corrections"]), 2) self.assertTrue(c.run_log[-1]["max_errors_per_handler"]) self.assertEqual(c.run_log[-1]["handler"], h)
def do_run(args): logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO, filename="run.log") job = NwchemJob(nwchem_cmd=args.command.split(), input_file=args.infile, output_file=args.outfile) c = Custodian([NwchemErrorHandler(output_filename=args.outfile)], [job], max_errors=5, scratch_dir=args.scratch, gzipped_output=args.gzipped, checkpoint=True) c.run()
def run_vasp(override=[], suffix='', walltime=None, buffer_time=None): """ execute vasp with given override and suffix :param override: :param suffix: :return: """ from Classes_Pymatgen import Incar from Classes_Custodian import StandardJob from custodian.custodian import Custodian import os # Determine wheter to use Gamma optimized vasp incar = Incar.from_file('INCAR') if 'AUTO_GAMMA' in incar and incar['AUTO_GAMMA']: vasp = os.environ['VASP_GAMMA'] else: vasp = os.environ['VASP_KPTS'] handlers = [] if walltime: handlers += [ WalltimeHandler( wall_time=walltime, buffer_time=buffer_time, electronic_step_stop=True, ) ] if os.environ['VASP_MPI'] == 'srun': vaspjob = [ StandardJob(['srun', vasp], 'vasp.log', auto_npar=False, backup=False, settings_override=override, suffix=suffix, final=False) ] else: vaspjob = [ StandardJob(['mpirun', '-np', os.environ['VASP_PROCS'], vasp], 'vasp.log', auto_npar=False, backup=False, settings_override=override, suffix=suffix, final=False) ] c = Custodian(handlers, vaspjob, max_errors=10) c.run()
def do_run(args): FORMAT = '%(asctime)s %(message)s' logging.basicConfig(format=FORMAT, level=logging.INFO, filename="run.log") logging.info("Handlers used are %s" % args.handlers) handlers = [load_class("custodian.vasp.handlers", n) for n in args.handlers] validators = [load_class("custodian.vasp.validators", n) for n in args.validators] c = Custodian(handlers, get_jobs(args), validators, max_errors=args.max_errors, scratch_dir=args.scratch, gzipped_output=args.gzip, checkpoint=True) c.run()
def do_run(args): handlers = [ VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler() ] c = Custodian(handlers, get_runs(args), max_errors=10, gzipped_output=args.gzip) c.run() logging.info("Geometry optimization complete")
def test_validators(self): njobs = 100 params = {"initial": 0, "total": 0} c = Custodian( [ExampleHandler(params)], [ExampleJob(i, params) for i in range(njobs)], [ExampleValidator1()], max_errors=njobs, ) output = c.run() self.assertEqual(len(output), njobs) njobs = 100 params = {"initial": 0, "total": 0} v = ExampleValidator2() c = Custodian( [ExampleHandler(params)], [ExampleJob(i, params) for i in range(njobs)], [v], max_errors=njobs, ) self.assertRaises(ValidationError, c.run) self.assertEqual(c.run_log[-1]["validator"], v)
def test_run_interrupted(self): njobs = 100 params = {'initial': 0, 'total': 0} c = Custodian([ExampleHandler(params)], [ExampleJob(i, params) for i in range(njobs)], max_errors=njobs) self.assertEqual(c.run_interrupted(), njobs) self.assertEqual(c.run_interrupted(), njobs) total_done = 1 while total_done < njobs: c.jobs[njobs - 1].run() if params['total'] > 50: self.assertEqual(c.run_interrupted(), njobs - total_done) total_done += 1
def run_task(self, fw_spec): # write a file containing the formula and task_type for somewhat # easier file system browsing self._write_formula_file(fw_spec) # TODO: make this better - is there a way to load an environment # variable as the VASP_EXE? if 'nid' in socket.gethostname(): # hopper compute nodes # TODO: can base ncores on FW_submit.script v_exe = shlex.split('aprun -n 48 vasp') gv_exe = shlex.split('aprun -n 48 gvasp') print 'running on HOPPER' elif 'c' in socket.gethostname(): # mendel compute nodes # TODO: can base ncores on FW_submit.script v_exe = shlex.split('mpirun -n 32 vasp') gv_exe = shlex.split('aprun -n 32 gvasp') print 'running on MENDEL' else: raise ValueError('Unrecognized host!') for job in self.jobs: job.vasp_cmd = v_exe job.gamma_vasp_cmd = gv_exe logging.basicConfig(level=logging.DEBUG) c = Custodian(self.handlers, self.jobs, self.max_errors) custodian_out = c.run() all_errors = set() for run in custodian_out: for correction in run['corrections']: all_errors.update(correction['errors']) stored_data = {'error_list': list(all_errors)} update_spec = { 'prev_vasp_dir': get_block_part(os.getcwd()), 'prev_task_type': fw_spec['task_type'], 'mpsnl': fw_spec['mpsnl'], 'snlgroup_id': fw_spec['snlgroup_id'], 'run_tags': fw_spec['run_tags'] } return FWAction(stored_data=stored_data, update_spec=update_spec)
def vasp_run(args): if len(args.vasp_cmd) == 0: raise NoVaspCommandError("Vasp command must be specified.") elif len(args.vasp_cmd) == 1: vasp_cmd = args.vasp_cmd[0].split() else: vasp_cmd = args.vasp_cmd flags = list(chain.from_iterable(incar_flags.values())) user_incar_settings = list2dict(args.user_incar_setting, flags) handlers = HANDLER_GROUP["default"] if args.timeout: handlers.pop(-1) handlers.append(TooLongTimeCalcErrorHandler(args.timeout)) optimization_args = {"vasp_cmd": vasp_cmd, "removes_wavecar": args.rm_wavecar, "max_relax_num": args.max_relax_num, "left_files": args.left_files, "removed_files": ["PCDAT", "vasprun.xml"]} custodian_args = {"handlers": handlers, "polling_time_step": 5, "monitor_freq": 1, "max_errors": 10, "gzipped_output": False} if args.kpoint_conv: xc = args.xc or Xc.pbesol custodian_args["jobs"] = ViseVaspJob.kpt_converge( xc=xc, convergence_criterion=args.kpoints_criteria, # initial_kpt_density=args.kpoint_density, user_incar_settings=user_incar_settings, **optimization_args) else: custodian_args["jobs"] = ViseVaspJob.structure_optimization_run( **optimization_args) c = Custodian(**custodian_args) c.run()
def run_task(self, fw_spec): handler_groups = { "default": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(),PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), DriftErrorHandler()], "strict": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(),PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), AliasingErrorHandler(), DriftErrorHandler()], "md": [VaspErrorHandler(), NonConvergingErrorHandler()], "no_handler": [] } vasp_cmd = env_chk(self["vasp_cmd"], fw_spec) if isinstance(vasp_cmd, six.string_types): vasp_cmd = os.path.expandvars(vasp_cmd) vasp_cmd = shlex.split(vasp_cmd) # initialize variables scratch_dir = env_chk(self.get("scratch_dir"), fw_spec) gzip_output = self.get("gzip_output", True) max_errors = self.get("max_errors", 5) auto_npar = env_chk(self.get("auto_npar"), fw_spec, strict=False, default=False) gamma_vasp_cmd = env_chk(self.get("gamma_vasp_cmd"), fw_spec, strict=False, default=None) jobs = [VaspJob(vasp_cmd, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd)] # construct handlers handlers = handler_groups[self.get("handler_group", "default")] validators = [] c = Custodian(handlers, jobs, validators=validators, max_errors=max_errors, scratch_dir=scratch_dir, gzipped_output=gzip_output) c.run()
def run_task(self, fw_spec): dec = MontyDecoder() jobs = dec.process_decoded(self["jobs"]) fw_env = fw_spec.get("_fw_env", {}) #Override VASP and gamma VASP commands using fw_env if fw_env.get("vasp_cmd"): for j in jobs: j.vasp_cmd = os.path.expandvars(fw_env["vasp_cmd"]) j.gamma_vasp_cmd = j.gamma_vasp_cmd logging.info("Vasp command is {}".format(j.vasp_cmd)) if fw_env.get("gamma_vasp_cmd"): for j in jobs: j.gamma_vasp_cmd = os.path.expandvars(fw_env["gamma_vasp_cmd"]) logging.info("Vasp gamma command is {}".format( j.gamma_vasp_cmd)) #Override custodian scratch dir. cust_params = self.get("custodian_params", {}) if fw_env.get("scratch_root"): cust_params["scratch_dir"] = os.path.expandvars( fw_env["scratch_root"]) logging.info("Running with custodian params %s" % cust_params) handlers = [ VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler() ] validators = [VasprunXMLValidator()] c = Custodian(handlers=[h.as_dict() for h in handlers], jobs=jobs, validators=[v.as_dict() for v in validators], **cust_params) output = c.run() return FWAction(stored_data=output)
def run_task(self, fw_spec): """ Required Parameters: dir (str path): directory containing the vasp inputs jobs (VaspJob): Contains the cmd needed to run vasp Optional Parameters: custodian_params (dict **kwargs): Contains the job and the scratch directory for a custodian run handlers (list of custodian handlers): Defaults to empty list """ dec = MontyDecoder() dir = dec.process_decoded(self['dir']) cwd = dec.process_decoded(self['cwd']) # Change to the directory with the vasp inputs to run custodian os.chdir(cwd+dir) handlers = dec.process_decoded(self.get('handlers', [])) jobs = dec.process_decoded(self['jobs']) max_errors = dec.process_decoded(self['max_errors']) fw_env = fw_spec.get("_fw_env", {}) cust_params = self.get("custodian_params", {}) # Get the scratch directory if fw_env.get('scratch_root'): cust_params['scratch_dir'] = os.path.expandvars( fw_env['scratch_root']) c = Custodian(handlers=handlers, jobs=jobs, max_errors=max_errors, gzipped_output=True, **cust_params) output = c.run() return FWAction(stored_data=output)
def update_checkpoint(job_ids=None, jfile=None, **kwargs): """ rerun the jobs with job ids in the job_ids list. The jobs are read from the json checkpoint file, jfile. If no job_ids are given then the checkpoint file will be updated with corresponding final energy Args: job_ids: list of job ids to update or q resolve jfile: check point file """ cal_log = loadfn(jfile, cls=MontyDecoder) cal_log_new = [] all_jobs = [] run_jobs = [] handlers = [] final_energy = None incar = None kpoints = None qadapter = None # if updating the specs of the job for k, v in kwargs.items(): if k == 'incar': incar = v if k == 'kpoints': kpoints = v if k == 'que': qadapter = v for j in cal_log: job = j["job"] job.job_id = j['job_id'] all_jobs.append(job) if job_ids and (j['job_id'] in job_ids or job.job_dir in job_ids): logger.info('setting job {0} in {1} to rerun'.format( j['job_id'], job.job_dir)) contcar_file = job.job_dir + os.sep + 'CONTCAR' poscar_file = job.job_dir + os.sep + 'POSCAR' if os.path.isfile(contcar_file) and len( open(contcar_file).readlines()) != 0: logger.info('setting poscar file from {}'.format(contcar_file)) job.vis.poscar = Poscar.from_file(contcar_file) else: logger.info('setting poscar file from {}'.format(poscar_file)) job.vis.poscar = Poscar.from_file(poscar_file) if incar: logger.info('incar overridden') job.vis.incar = incar if kpoints: logger.info('kpoints overridden') job.vis.kpoints = kpoints if qadapter: logger.info('qadapter overridden') job.vis.qadapter = qadapter run_jobs.append(job) if run_jobs: c = Custodian(handlers, run_jobs, max_errors=5) c.run() for j in all_jobs: final_energy = j.get_final_energy() cal_log_new.append({ "job": j.as_dict(), 'job_id': j.job_id, "corrections": [], 'final_energy': final_energy }) dumpfn(cal_log_new, jfile, cls=MontyEncoder, indent=4)