def run_task(self, fw_spec): fw_env = fw_spec.get("_fw_env", {}) if "mpi_cmd" in fw_env: mpi_cmd = fw_spec["_fw_env"]["mpi_cmd"] elif which("mpirun"): mpi_cmd = "mpirun" elif which("aprun"): mpi_cmd = "aprun" else: raise ValueError("No MPI command found!") nproc = os.environ['PBS_NP'] v_exe = shlex.split('{} -n {} {}'.format(mpi_cmd, nproc, fw_env.get("vasp_cmd", "vasp"))) gv_exe = shlex.split('{} -n {} {}'.format(mpi_cmd, nproc, fw_env.get("gvasp_cmd", "gvasp"))) # override vasp executable in custodian jobs for job in self.jobs: job.vasp_cmd = v_exe job.gamma_vasp_cmd = gv_exe # run the custodian c = Custodian(self.handlers, self.jobs, self.max_errors) c.run() update_spec = {'prev_vasp_dir': os.getcwd(), 'prev_task_type': fw_spec['task_type']} return FWAction(update_spec=update_spec)
def run_task(self, fw_spec): FORMAT = '%(asctime)s %(levelname)s: %(pathname)s\n\t%(module)s.%(funcName)s: %(message)s' logging.basicConfig(format=FORMAT, level=logging.INFO, filename="run.log") job = VaspJob(["vasp"],default_vasp_input_set=MinimalVaspInputSet(),auto_npar=False) #Set up Handlers to be used handler_param_dict = {'VaspErrorHandler': ['output_filename'], 'AliasingErrorHandler': ['output_filename'], 'MeshSymmetryErrorHandler': ['output_filename', 'output_vasprun'], 'UnconvergedErrorHandler': ['output_filename'], 'MaxForceErrorHandler': ['output_filename', 'max_force_threshold'], 'PotimErrorHandler': ['input_filename', 'output_filename', 'dE_threshold'], 'FrozenJobErrorHandler': ['output_filename', 'timeout'], 'NonConvergingErrorHandler': ['output_filename', 'nionic_steps', 'change_algo'], 'WalltimeHandler': ['wall_time', 'buffer_time', 'electronic_step_stop'], 'CheckpointHandler': ['interval'], 'PositiveEnergyErrorHandler': ['output_filename']} hnames = self.get('handlers') handler_params = self.get("handler_params", {}) logging.info("handler names: {}".format(hnames)) handlers = [] for n in hnames: np = {} for m in handler_params: if m in handler_param_dict[n]: np[m] = handler_params[m] handlers.append(load_class("custodian.vasp.handlers", n)(np)) c = Custodian(handlers=handlers, validators=[VasprunXMLValidator()], jobs=[job]) output = c.run() return FWAction(stored_data=output, mod_spec=[{'_set': {'PREV_DIR': os.getcwd()}}])
def run_task(self, fw_spec): nproc = os.environ['PBS_NP'] # Figure out the appropriate Vasp Executable based on run machine if 'nid' in socket.gethostname(): # hopper compute nodes v_exe = shlex.split('aprun -n '+str(nproc)+' vasp') gv_exe = shlex.split('aprun -n '+str(nproc)+' gvasp') print 'running on HOPPER' elif 'c' in socket.gethostname(): # mendel compute nodes v_exe = shlex.split('mpirun -n '+str(nproc)+' vasp') gv_exe = shlex.split('mpirun -n '+str(nproc)+' gvasp') print 'running on MENDEL' else: raise ValueError('Unrecognized host!') # override vasp executable in custodian jobs for job in self.jobs: job.vasp_cmd = v_exe job.gamma_vasp_cmd = gv_exe # run the custodian c = Custodian(self.handlers, self.jobs, self.max_errors) c.run() update_spec = {'prev_vasp_dir': os.getcwd(), 'prev_task_type': fw_spec['task_type']} return FWAction(update_spec=update_spec)
def run_task(self, fw_spec): handler_groups = { "default": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler()], "strict": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), AliasingErrorHandler()], "md": [VaspErrorHandler(), NonConvergingErrorHandler()], "no_handler": [] } vasp_cmd = env_chk(self["vasp_cmd"], fw_spec) if isinstance(vasp_cmd, six.string_types): vasp_cmd = os.path.expandvars(vasp_cmd) vasp_cmd = shlex.split(vasp_cmd) # initialize variables job_type = self.get("job_type", "normal") scratch_dir = env_chk(self.get("scratch_dir"), fw_spec) gzip_output = self.get("gzip_output", True) max_errors = self.get("max_errors", 5) auto_npar = env_chk(self.get("auto_npar"), fw_spec, strict=False, default=False) gamma_vasp_cmd = env_chk(self.get("gamma_vasp_cmd"), fw_spec, strict=False, default=None) if gamma_vasp_cmd: gamma_vasp_cmd = shlex.split(gamma_vasp_cmd) # construct jobs if job_type == "normal": jobs = [VaspJob(vasp_cmd, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd)] elif job_type == "double_relaxation_run": jobs = VaspJob.double_relaxation_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), half_kpts_first_relax=False) elif job_type == "full_opt_run": jobs = VaspJob.full_opt_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), max_steps=5, half_kpts_first_relax=False) else: raise ValueError("Unsupported job type: {}".format(job_type)) # construct handlers handlers = handler_groups[self.get("handler_group", "default")] if self.get("max_force_threshold"): handlers.append(MaxForceErrorHandler(max_force_threshold=self["max_force_threshold"])) if self.get("wall_time"): handlers.append(WalltimeHandler(wall_time=self["wall_time"])) validators = [VasprunXMLValidator()] c = Custodian(handlers, jobs, validators=validators, max_errors=max_errors, scratch_dir=scratch_dir, gzipped_output=gzip_output) c.run()
def run_task(self, fw_spec): FORMAT = '%(asctime)s %(message)s' logging.basicConfig(format=FORMAT, level=logging.INFO, filename="run.log") job = VaspJob(vasp_cmd=self["vasp_cmd"], **self.get("vasp_job_params", {})) if self["handlers"] == "all": hnames = ["VaspErrorHandler", "MeshSymmetryErrorHandler", "UnconvergedErrorHandler", "NonConvergingErrorHandler", "PotimErrorHandler"] else: hnames = self["handlers"] handlers = [load_class("custodian.vasp.handlers", n)() for n in hnames] c = Custodian(handlers, [job], **self.get("custodian_params", {})) output = c.run() return FWAction(stored_data=output)
def run_task(self, fw_spec): c = Custodian(self["handlers"], self["jobs"], self.get("validators"), **self.get("custodian_params", {})) c.run()
def run_task(self, fw_spec): handler_groups = { "default": [ VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler() ], "strict": [ VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), AliasingErrorHandler() ], "md": [VaspErrorHandler(), NonConvergingErrorHandler()], "no_handler": [] } vasp_cmd = env_chk(self["vasp_cmd"], fw_spec) if isinstance(vasp_cmd, six.string_types): vasp_cmd = os.path.expandvars(vasp_cmd) vasp_cmd = shlex.split(vasp_cmd) # initialize variables job_type = self.get("job_type", "normal") scratch_dir = env_chk(self.get("scratch_dir"), fw_spec) gzip_output = self.get("gzip_output", True) max_errors = self.get("max_errors", 5) auto_npar = env_chk(self.get("auto_npar"), fw_spec, strict=False, default=False) gamma_vasp_cmd = env_chk(self.get("gamma_vasp_cmd"), fw_spec, strict=False, default=None) if gamma_vasp_cmd: gamma_vasp_cmd = shlex.split(gamma_vasp_cmd) # construct jobs if job_type == "normal": jobs = [ VaspJob(vasp_cmd, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd) ] elif job_type == "double_relaxation_run": jobs = VaspJob.double_relaxation_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), half_kpts_first_relax=False) elif job_type == "full_opt_run": jobs = VaspJob.full_opt_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), max_steps=5, half_kpts_first_relax=False) else: raise ValueError("Unsupported job type: {}".format(job_type)) # construct handlers handlers = handler_groups[self.get("handler_group", "default")] if self.get("max_force_threshold"): handlers.append( MaxForceErrorHandler( max_force_threshold=self["max_force_threshold"])) if self.get("wall_time"): handlers.append(WalltimeHandler(wall_time=self["wall_time"])) validators = [VasprunXMLValidator()] c = Custodian(handlers, jobs, validators=validators, max_errors=max_errors, scratch_dir=scratch_dir, gzipped_output=gzip_output) c.run()
from custodian import Custodian from custodian.qchem.jobs import QCJob from custodian.qchem.handlers import QChemErrorHandler my_input = "mol.qin" my_output = "mol.qout" myjob = QCJob(qchem_command="qchem -slurm",multimode="openmp",input_file=my_input,output_file=my_output,max_cores=12) myhandler = QChemErrorHandler(input_file=my_input,output_file=my_output) c = Custodian([myhandler],[myjob],max_errors_per_job=10,max_errors=10) c.run()
def run_task(self, fw_spec): # initialize variables qchem_cmd = env_chk(self["qchem_cmd"], fw_spec) multimode = env_chk(self.get("multimode"), fw_spec) if multimode == None: multimode = "openmp" """ Note that I'm considering hardcoding openmp in the future because there is basically no reason anyone should ever run QChem on multiple nodes, aka with multimode = mpi. """ input_file = self.get("input_file", "mol.qin") output_file = self.get("output_file", "mol.qout") max_cores = env_chk(self["max_cores"], fw_spec) qclog_file = self.get("qclog_file", "mol.qclog") suffix = self.get("suffix", "") calc_loc = env_chk(self.get("calc_loc"), fw_spec) save_scratch = self.get("save_scratch", False) max_errors = self.get("max_errors", 5) max_iterations = self.get("max_iterations", 10) linked = self.get("linked", True) backup = self.get("backup", True) max_molecule_perturb_scale = self.get("max_molecule_perturb_scale", 0.3) job_type = self.get("job_type", "normal") gzipped_output = self.get("gzipped_output", True) handler_groups = { "default": [ QChemErrorHandler(input_file=input_file, output_file=output_file) ], "no_handler": [] } # construct jobs if job_type == "normal": jobs = [ QCJob(qchem_command=qchem_cmd, max_cores=max_cores, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=suffix, calc_loc=calc_loc, save_scratch=save_scratch, backup=backup) ] elif job_type == "opt_with_frequency_flattener": if linked: jobs = QCJob.opt_with_frequency_flattener( qchem_command=qchem_cmd, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, max_iterations=max_iterations, linked=linked, save_final_scratch=save_scratch, max_cores=max_cores, calc_loc=calc_loc) else: jobs = QCJob.opt_with_frequency_flattener( qchem_command=qchem_cmd, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, max_iterations=max_iterations, max_molecule_perturb_scale=max_molecule_perturb_scale, linked=linked, save_final_scratch=save_scratch, max_cores=max_cores, calc_loc=calc_loc) else: raise ValueError("Unsupported job type: {}".format(job_type)) # construct handlers handlers = handler_groups[self.get("handler_group", "default")] c = Custodian(handlers, jobs, max_errors=max_errors, gzipped_output=gzipped_output) c.run()
def run_task(self, fw_spec): handler_groups = { "default": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), LargeSigmaHandler(), IncorrectSmearingHandler()], "strict": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), AliasingErrorHandler(), DriftErrorHandler(), LargeSigmaHandler(), IncorrectSmearingHandler()], "scan": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), LargeSigmaHandler(), IncorrectSmearingHandler(), ScanMetalHandler()], "md": [VaspErrorHandler(), NonConvergingErrorHandler()], "no_handler": [] } vasp_cmd = env_chk(self["vasp_cmd"], fw_spec) if isinstance(vasp_cmd, str): vasp_cmd = os.path.expandvars(vasp_cmd) vasp_cmd = shlex.split(vasp_cmd) # initialize variables job_type = self.get("job_type", "normal") scratch_dir = env_chk(self.get("scratch_dir"), fw_spec) gzip_output = self.get("gzip_output", True) max_errors = self.get("max_errors", CUSTODIAN_MAX_ERRORS) auto_npar = env_chk(self.get("auto_npar"), fw_spec, strict=False, default=False) gamma_vasp_cmd = env_chk(self.get("gamma_vasp_cmd"), fw_spec, strict=False, default=None) if gamma_vasp_cmd: gamma_vasp_cmd = shlex.split(gamma_vasp_cmd) # construct jobs if job_type == "normal": jobs = [VaspJob(vasp_cmd, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd)] elif job_type == "normal_no_backup": jobs = [VaspJob(vasp_cmd, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd,backup=False)] elif job_type == "double_relaxation_run": jobs = VaspJob.double_relaxation_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), half_kpts_first_relax=self.get("half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "metagga_opt_run": jobs = VaspJob.metagga_opt_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), half_kpts_first_relax=self.get("half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "full_opt_run": jobs = VaspJob.full_opt_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), max_steps=9, half_kpts_first_relax=self.get("half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "neb": # TODO: @shyuep @HanmeiTang This means that NEB can only be run (i) in reservation mode # and (ii) when the queueadapter parameter is overridden and (iii) the queue adapter # has a convention for nnodes (with that name). Can't the number of nodes be made a # parameter that the user sets differently? e.g., fw_spec["neb_nnodes"] must be set # when setting job_type=NEB? Then someone can use this feature in non-reservation # mode and without this complication. -computron nnodes = int(fw_spec["_queueadapter"]["nnodes"]) # TODO: @shyuep @HanmeiTang - I am not sure what the code below is doing. It looks like # it is trying to override the number of processors. But I tried running the code # below after setting "vasp_cmd = 'mpirun -n 16 vasp'" and the code fails. # (i) Is this expecting an array vasp_cmd rather than String? If so, that's opposite to # the rest of this task's convention and documentation # (ii) can we get rid of this hacking in the first place? e.g., allowing the user to # separately set the NEB_VASP_CMD as an env_variable and not rewriting the command # inside this. # -computron # Index the tag "-n" or "-np" index = [i for i, s in enumerate(vasp_cmd) if '-n' in s] ppn = int(vasp_cmd[index[0] + 1]) vasp_cmd[index[0] + 1] = str(nnodes * ppn) # Do the same for gamma_vasp_cmd if gamma_vasp_cmd: index = [i for i, s in enumerate(gamma_vasp_cmd) if '-n' in s] ppn = int(gamma_vasp_cmd[index[0] + 1]) gamma_vasp_cmd[index[0] + 1] = str(nnodes * ppn) jobs = [VaspNEBJob(vasp_cmd, final=False, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd)] else: raise ValueError("Unsupported job type: {}".format(job_type)) # construct handlers handler_group = self.get("handler_group", "default") if isinstance(handler_group, str): handlers = handler_groups[handler_group] else: handlers = handler_group if self.get("max_force_threshold"): handlers.append(MaxForceErrorHandler(max_force_threshold=self["max_force_threshold"])) if self.get("wall_time"): handlers.append(WalltimeHandler(wall_time=self["wall_time"])) if job_type == "neb": validators = [] # CINEB vasprun.xml sometimes incomplete, file structure different else: validators = [VasprunXMLValidator(), VaspFilesValidator()] c = Custodian(handlers, jobs, validators=validators, max_errors=max_errors, scratch_dir=scratch_dir, gzipped_output=gzip_output) c.run() if os.path.exists(zpath("custodian.json")): stored_custodian_data = {"custodian": loadfn(zpath("custodian.json"))} return FWAction(stored_data=stored_custodian_data)
def run_task(self, fw_spec): # initialize variables qchem_cmd = env_chk(self["qchem_cmd"], fw_spec) multimode = self.get("multimode", "openmp") input_file = self.get("input_file", "mol.qin") output_file = self.get("output_file", "mol.qout") max_cores = self.get("max_cores", 32) qclog_file = self.get("qclog_file", "mol.qclog") suffix = self.get("suffix", "") scratch_dir = env_chk(self.get("scratch_dir"), fw_spec) if scratch_dir == None: scratch_dir = "/dev/shm/qcscratch/" save_scratch = self.get("save_scratch", False) save_name = self.get("save_name", "default_save_name") max_errors = self.get("max_errors", 5) max_iterations = self.get("max_iterations", 10) max_molecule_perturb_scale = self.get("max_molecule_perturb_scale", 0.3) job_type = self.get("job_type", "normal") gzipped_output = self.get("gzipped_output", True) sp_params = self.get("sp_params", None) handler_groups = { "default": [ QChemErrorHandler(input_file=input_file, output_file=output_file) ], "no_handler": [] } # construct jobs if job_type == "normal": jobs = [ QCJob(qchem_command=qchem_cmd, multimode=multimode, input_file=input_file, output_file=output_file, max_cores=max_cores, qclog_file=qclog_file, suffix=suffix, scratch_dir=scratch_dir, save_scratch=save_scratch, save_name=save_name) ] elif job_type == "opt_with_frequency_flattener": jobs = QCJob.opt_with_frequency_flattener( qchem_command=qchem_cmd, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, sp_params=sp_params, max_iterations=max_iterations, max_molecule_perturb_scale=max_molecule_perturb_scale, scratch_dir=scratch_dir, save_scratch=save_scratch, save_name=save_name, max_cores=max_cores) elif job_type == "ts_with_frequency_flattener": jobs = QCJob.ts_with_frequency_flattener( qchem_command=qchem_cmd, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, ts_guess_method="fsm", max_iterations=max_iterations, max_molecule_perturb_scale=max_molecule_perturb_scale, scratch_dir=scratch_dir, save_scratch=save_scratch, save_name=save_name, max_cores=max_cores) else: raise ValueError("Unsupported job type: {}".format(job_type)) # construct handlers handlers = handler_groups[self.get("handler_group", "default")] c = Custodian(handlers, jobs, max_errors=max_errors, gzipped_output=gzipped_output) c.run()
def run_task(self, fw_spec): lobster_cmd = env_chk(self.get("lobster_cmd"), fw_spec) gzip_output = self.get("gzip_output", True) gzip_WAVECAR = self.get("gzip_WAVECAR", False) if gzip_WAVECAR: add_files_to_gzip = VASP_OUTPUT_FILES else: add_files_to_gzip = [ f for f in VASP_OUTPUT_FILES if f not in ["WAVECAR"] ] handler_groups = {"default": [], "no_handler": []} validator_groups = { "default": [ LobsterFilesValidator(), EnoughBandsValidator(output_filename="lobsterout"), ], "strict": [ ChargeSpillingValidator(output_filename="lobsterout"), LobsterFilesValidator(), EnoughBandsValidator(output_filename="lobsterout"), ], "no_validator": [], } handler_group = self.get("handler_group", "default") if isinstance(handler_group, str): handlers = handler_groups[handler_group] else: handlers = handler_group validator_group = self.get("validator_group", "default") if isinstance(validator_group, str): validators = validator_groups[validator_group] else: validators = handler_group # LobsterJob gzips output files, Custodian would gzip all output files (even slurm) jobs = [ LobsterJob( lobster_cmd=lobster_cmd, output_file="lobster.out", stderr_file="std_err_lobster.txt", gzipped=gzip_output, add_files_to_gzip=add_files_to_gzip, ) ] c = Custodian( handlers=handlers, jobs=jobs, validators=validators, gzipped_output=False, max_errors=5, ) c.run() if os.path.exists(zpath("custodian.json")): stored_custodian_data = { "custodian": loadfn(zpath("custodian.json")) } return FWAction(stored_data=stored_custodian_data)
def run_task(self, fw_spec): # initialize variables qchem_cmd = env_chk(self["qchem_cmd"], fw_spec) multimode = env_chk(self.get("multimode"), fw_spec) if multimode == None: multimode = "openmp" input_file = self.get("input_file", "mol.qin") output_file = self.get("output_file", "mol.qout") max_cores = env_chk(self["max_cores"], fw_spec) qclog_file = self.get("qclog_file", "mol.qclog") suffix = self.get("suffix", "") scratch_dir = env_chk(self.get("scratch_dir"), fw_spec) if scratch_dir == None: scratch_dir = "/dev/shm/qcscratch/" save_scratch = self.get("save_scratch", False) save_name = self.get("save_name", "default_save_name") max_errors = self.get("max_errors", 5) max_iterations = self.get("max_iterations", 10) max_molecule_perturb_scale = self.get("max_molecule_perturb_scale", 0.3) job_type = self.get("job_type", "normal") gzipped_output = self.get("gzipped_output", True) handler_groups = { "default": [ QChemErrorHandler( input_file=input_file, output_file=output_file) ], "no_handler": [] } # construct jobs if job_type == "normal": jobs = [ QCJob( qchem_command=qchem_cmd, max_cores=max_cores, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, suffix=suffix, scratch_dir=scratch_dir, save_scratch=save_scratch, save_name=save_name) ] elif job_type == "opt_with_frequency_flattener": jobs = QCJob.opt_with_frequency_flattener( qchem_command=qchem_cmd, multimode=multimode, input_file=input_file, output_file=output_file, qclog_file=qclog_file, max_iterations=max_iterations, max_molecule_perturb_scale=max_molecule_perturb_scale, scratch_dir=scratch_dir, save_scratch=save_scratch, save_name=save_name, max_cores=max_cores) else: raise ValueError("Unsupported job type: {}".format(job_type)) # construct handlers handlers = handler_groups[self.get("handler_group", "default")] c = Custodian( handlers, jobs, max_errors=max_errors, gzipped_output=gzipped_output) c.run()
def run_task(self, fw_spec): handler_groups = { "default": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(),PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), DriftErrorHandler()], "strict": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(),PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), AliasingErrorHandler(), DriftErrorHandler()], "md": [VaspErrorHandler(), NonConvergingErrorHandler()], "no_handler": [] } vasp_cmd = env_chk(self["vasp_cmd"], fw_spec) if isinstance(vasp_cmd, six.string_types): vasp_cmd = os.path.expandvars(vasp_cmd) vasp_cmd = shlex.split(vasp_cmd) # initialize variables job_type = self.get("job_type", "normal") scratch_dir = env_chk(self.get("scratch_dir"), fw_spec) gzip_output = self.get("gzip_output", True) max_errors = self.get("max_errors", 5) auto_npar = env_chk(self.get("auto_npar"), fw_spec, strict=False, default=False) gamma_vasp_cmd = env_chk(self.get("gamma_vasp_cmd"), fw_spec, strict=False, default=None) if gamma_vasp_cmd: gamma_vasp_cmd = shlex.split(gamma_vasp_cmd) # construct jobs if job_type == "normal": jobs = [VaspJob(vasp_cmd, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd)] elif job_type == "double_relaxation_run": jobs = VaspJob.double_relaxation_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), half_kpts_first_relax=self.get("half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "metagga_opt_run": jobs = VaspJob.metagga_opt_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), half_kpts_first_relax=self.get("half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "full_opt_run": jobs = VaspJob.full_opt_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), max_steps=9, half_kpts_first_relax=self.get("half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "neb": # TODO: @shyuep @HanmeiTang This means that NEB can only be run (i) in reservation mode # and (ii) when the queueadapter parameter is overridden and (iii) the queue adapter # has a convention for nnodes (with that name). Can't the number of nodes be made a # parameter that the user sets differently? e.g., fw_spec["neb_nnodes"] must be set # when setting job_type=NEB? Then someone can use this feature in non-reservation # mode and without this complication. -computron nnodes = int(fw_spec["_queueadapter"]["nnodes"]) # TODO: @shyuep @HanmeiTang - I am not sure what the code below is doing. It looks like # it is trying to override the number of processors. But I tried running the code # below after setting "vasp_cmd = 'mpirun -n 16 vasp'" and the code fails. # (i) Is this expecting an array vasp_cmd rather than String? If so, that's opposite to # the rest of this task's convention and documentation # (ii) can we get rid of this hacking in the first place? e.g., allowing the user to # separately set the NEB_VASP_CMD as an env_variable and not rewriting the command # inside this. # -computron # Index the tag "-n" or "-np" index = [i for i, s in enumerate(vasp_cmd) if '-n' in s] ppn = int(vasp_cmd[index[0] + 1]) vasp_cmd[index[0] + 1] = str(nnodes * ppn) # Do the same for gamma_vasp_cmd if gamma_vasp_cmd: index = [i for i, s in enumerate(gamma_vasp_cmd) if '-n' in s] ppn = int(gamma_vasp_cmd[index[0] + 1]) gamma_vasp_cmd[index[0] + 1] = str(nnodes * ppn) jobs = [VaspNEBJob(vasp_cmd, final=False, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd)] else: raise ValueError("Unsupported job type: {}".format(job_type)) # construct handlers handler_group = self.get("handler_group", "default") if isinstance(handler_group, six.string_types): handlers = handler_groups[handler_group] else: handlers = handler_group if self.get("max_force_threshold"): handlers.append(MaxForceErrorHandler(max_force_threshold=self["max_force_threshold"])) if self.get("wall_time"): handlers.append(WalltimeHandler(wall_time=self["wall_time"])) if job_type == "neb": validators = [] # CINEB vasprun.xml sometimes incomplete, file structure different else: validators = [VasprunXMLValidator(), VaspFilesValidator()] c = Custodian(handlers, jobs, validators=validators, max_errors=max_errors, scratch_dir=scratch_dir, gzipped_output=gzip_output) c.run() if os.path.exists(zpath("custodian.json")): return FWAction(stored_data=loadfn(zpath("custodian.json")))
def fit(self, X, y, index=None, columns=None, tasks=None): """Fit a SISSO regression based on inputs X and output y. This method supports Multi-Task SISSO. For Single-Task SISSO, y must have a shape (n_samples) or (n_samples, 1). For Multi-Task SISSO, y must have a shape (n_samples, n_tasks). The arrays will be reshaped to fit SISSO's input files. For example, with 10 samples and 3 properties, the output array (y) will be reshaped to (30, 1). The input array (X) is left unchanged. It is also possible to provide samples without an output for some properties by setting that property to NaN. In that case, the corresponding values in the input (X) and output (y) arrays will be removed from the SISSO inputs. In the previous example, if 2 of the samples have NaN for the first property, 1 sample has Nan for the second property and 4 samples have Nan for the third property, the final output array (y) will have a shape (30-2-1-4, 1), i.e. (23, 1), while the final input array (X) will have a shape (23, n_features). Args: X: Feature vectors as an array-like of shape (n_samples, n_features). y: Target values as an array-like of shape (n_samples,) or (n_samples, n_tasks). index: List of string identifiers for each sample. If None, "sampleN" with N=[1, ..., n_samples] will be used. columns: List of string names of the features. If None, "featN" with N=[1, ..., n_features] will be used. tasks: When Multi-Task SISSO is used, this is the list of string names that will be used for each task/property. If None, "taskN" with N=[1, ..., n_tasks] will be used. """ if not self.use_custodian: raise NotImplementedError self.sisso_in = SISSOIn.from_sisso_keywords( # pylint: disable=W0201 ptype=1, ntask=self.ntask, task_weighting=self.task_weighting, desc_dim=self.desc_dim, restart=self.restart, rung=self.rung, opset=self.opset, maxcomplexity=self.maxcomplexity, dimclass=self.dimclass, maxfval_lb=self.maxfval_lb, maxfval_ub=self.maxfval_ub, subs_sis=self.subs_sis, method=self.method, L1L0_size4L0=self.L1L0_size4L0, fit_intercept=self.fit_intercept, metric=self.metric, nm_output=self.nm_output, isconvex=self.isconvex, width=self.width, nvf=self.nvf, vfsize=self.vfsize, vf2sf=self.vf2sf, npf_must=self.npf_must, L1_max_iter=self.L1_max_iter, L1_tole=self.L1_tole, L1_dens=self.L1_dens, L1_nlambda=self.L1_nlambda, L1_minrmse=self.L1_minrmse, L1_warm_start=self.L1_warm_start, L1_weighted=self.L1_weighted, ) # Set up columns. These columns are used by the SISSO model wrapper afterwards # for the prediction if columns is None and isinstance(X, pd.DataFrame): columns = list(X.columns) self.columns = columns or [ # pylint: disable=W0201 "feat{:d}".format(ifeat) for ifeat in range(1, X.shape[1] + 1) ] if len(self.columns) != X.shape[1]: raise ValueError( "Columns should be of the size of the second axis of X.") # Set up data X = np.array(X) y = np.array(y) if y.ndim == 1 or (y.ndim == 2 and y.shape[1] == 1): # Single-Task SISSO self.ntasks = 1 # pylint: disable=W0201 index = index or [ "sample{:d}".format(ii) for ii in range(1, X.shape[0] + 1) ] if len(index) != len(y) or len(index) != len(X): raise ValueError("Index, X and y should have same size.") nsample = None elif y.ndim == 2 and y.shape[1] > 1: # Multi-Task SISSO self.ntasks = y.shape[1] # pylint: disable=W0201 samples_index = index or [ "sample{:d}".format(ii) for ii in range(1, X.shape[0] + 1) ] tasks = tasks or [ "task{:d}".format(ii) for ii in range(1, self.ntasks + 1) ] newX = np.zeros((0, X.shape[1])) newy = np.array([]) index = [] nsample = [] for itask in range(self.ntasks): yadd = y[:, itask] nanindices = np.argwhere(np.isnan(yadd)).flatten() totake = [ ii for ii in range(len(yadd)) if ii not in nanindices ] newy = np.concatenate([newy, np.take(yadd, indices=totake)]) newX = np.row_stack([newX, np.take(X, indices=totake, axis=0)]) nsample.append(len(totake)) index.extend([ "{}_{}".format(sample_index, tasks[itask]) for i_sample, sample_index in enumerate(samples_index) if i_sample in totake ]) X = newX y = newy else: raise ValueError("Wrong shapes.") data = pd.DataFrame(X, index=index, columns=self.columns) data.insert(0, "target", y) data.insert(0, "identifier", index) # Set up SISSODat and SISSOIn sisso_dat = SISSODat(data=data, features_dimensions=self.features_dimensions, nsample=nsample) self.sisso_in.set_keywords_for_SISSO_dat(sisso_dat=sisso_dat) # Run SISSO if self.run_dir is None: makedirs_p("SISSO_runs") timestamp = get_timestamp() self.run_dir = tempfile.mkdtemp(suffix=None, prefix=f"SISSO_dir_{timestamp}_", dir="SISSO_runs") else: makedirs_p(self.run_dir) with cd(self.run_dir): self.sisso_in.to_file(filename="SISSO.in") sisso_dat.to_file(filename="train.dat") job = SISSOJob() c = Custodian(jobs=[job], handlers=[], validators=[]) c.run() self.sisso_out = SISSOOut.from_file( # pylint: disable=W0201 filepath="SISSO.out") # Clean run directory if (self.clean_run_dir ): # TODO: add check here to not remove "." if the user passes . ? shutil.rmtree(self.run_dir)