def from_directory(input_dir, optional_files=None): """ Read in a set of VASP input from a directory. Note that only the standard INCAR, POSCAR, POTCAR and KPOINTS files are read unless optional_filenames is specified. Args: input_dir (str): Directory to read VASP input from. optional_files (dict): Optional files to read in as well as a dict of {filename: Object type}. Object type must have a static method from_file. """ sub_d = {} try: for fname, ftype in [("INCAR", Incar), ("KPOINTS", Kpoints), ("POSCAR", Poscar), ("POTCAR", DPPotcar)]: fullzpath = zpath(os.path.join(input_dir, fname)) sub_d[fname.lower()] = ftype.from_file(fullzpath) except: for fname, ftype in [("INCAR", Incar), ("POSCAR", Poscar), ("POTCAR", DPPotcar)]: fullzpath = zpath(os.path.join(input_dir, fname)) sub_d[fname.lower()] = ftype.from_file(fullzpath) sub_d["optional_files"] = {} if optional_files is not None: for fname, ftype in optional_files.items(): sub_d["optional_files"][fname] = \ ftype.from_file(os.path.join(input_dir, fname)) return VaspInput(**sub_d)
def run_task(self, fw_spec): chgcar_start = False # read the VaspInput from the previous run poscar = Poscar.from_file(zpath('POSCAR')) incar = Incar.from_file(zpath('INCAR')) # figure out what GGA+U values to use and override them # LDAU values to use mpvis = MPVaspInputSet() ggau_incar = mpvis.get_incar(poscar.structure).as_dict() incar_updates = {k: ggau_incar[k] for k in ggau_incar.keys() if 'LDAU' in k} for k in ggau_incar: # update any parameters not set explicitly in previous INCAR if k not in incar and k in ggau_incar: incar_updates[k] = ggau_incar[k] incar.update(incar_updates) # override the +U keys # start from the CHGCAR of previous run if os.path.exists('CHGCAR'): incar['ICHARG'] = 1 chgcar_start = True # write back the new INCAR to the current directory incar.write_file('INCAR') return FWAction(stored_data={'chgcar_start': chgcar_start})
def run_task(self, fw_spec): lobster_cmd = env_chk(self.get("lobster_cmd"), fw_spec) gzip_output = self.get("gzip_output", True) gzip_WAVECAR = self.get("gzip_WAVECAR", False) if gzip_WAVECAR: add_files_to_gzip = VASP_OUTPUT_FILES else: add_files_to_gzip = [f for f in VASP_OUTPUT_FILES if f not in ["WAVECAR"]] handler_groups = {"default": [], "no_handler": []} validator_groups = { "default": [ LobsterFilesValidator(), EnoughBandsValidator(output_filename="lobsterout"), ], "strict": [ ChargeSpillingValidator(output_filename="lobsterout"), LobsterFilesValidator(), EnoughBandsValidator(output_filename="lobsterout"), ], "no_validator": [], } handler_group = self.get("handler_group", "default") if isinstance(handler_group, str): handlers = handler_groups[handler_group] else: handlers = handler_group validator_group = self.get("validator_group", "default") if isinstance(validator_group, str): validators = validator_groups[validator_group] else: validators = handler_group # LobsterJob gzips output files, Custodian would gzip all output files (even slurm) jobs = [ LobsterJob( lobster_cmd=lobster_cmd, output_file="lobster.out", stderr_file="std_err_lobster.txt", gzipped=gzip_output, add_files_to_gzip=add_files_to_gzip, ) ] c = Custodian( handlers=handlers, jobs=jobs, validators=validators, gzipped_output=False, max_errors=5, ) c.run() if os.path.exists(zpath("custodian.json")): stored_custodian_data = {"custodian": loadfn(zpath("custodian.json"))} return FWAction(stored_data=stored_custodian_data)
def from_directory(input_dir): """ Read in a set of FEFF input files from a directory, which is useful when existing FEFF input needs some adjustment. """ sub_d = {} for fname, ftype in [("HEADER", Header), ("PARAMETERS", Tags)]: fullzpath = zpath(os.path.join(input_dir, fname)) sub_d[fname.lower()] = ftype.from_file(fullzpath) # Generation of FEFFDict set requires absorbing atom, need to search # the index of absorption atom in the structure according to the # distance matrix and shell species information contained in feff.inp absorber_index = [] radius = None feffinp = zpath(os.path.join(input_dir, 'feff.inp')) if "RECIPROCAL" not in sub_d["parameters"]: input_atoms = Atoms.cluster_from_file(feffinp) shell_species = np.array([x.species_string for x in input_atoms]) # First row of distance matrix represents the distance from the absorber to # the rest atoms distance_matrix = input_atoms.distance_matrix[0, :] # Get radius value from math import ceil radius = int(ceil(input_atoms.get_distance(input_atoms.index(input_atoms[0]), input_atoms.index(input_atoms[-1])))) for site_index, site in enumerate(sub_d['header'].struct): if site.specie == input_atoms[0].specie: site_atoms = Atoms(sub_d['header'].struct, absorbing_atom=site_index, radius=radius) site_distance = np.array(site_atoms.get_lines())[:, 5].astype(np.float64) site_shell_species = np.array(site_atoms.get_lines())[:, 4] shell_overlap = min(shell_species.shape[0], site_shell_species.shape[0]) if np.allclose(distance_matrix[:shell_overlap], site_distance[:shell_overlap]) and \ np.all(site_shell_species[:shell_overlap] == shell_species[:shell_overlap]): absorber_index.append(site_index) if "RECIPROCAL" in sub_d["parameters"]: absorber_index = sub_d["parameters"]["TARGET"] absorber_index[0] = int(absorber_index[0]) - 1 # Generate the input set if 'XANES' in sub_d["parameters"]: CONFIG = loadfn(os.path.join(MODULE_DIR, "MPXANESSet.yaml")) if radius is None: radius = 10 return FEFFDictSet(absorber_index[0], sub_d['header'].struct, radius=radius, config_dict=CONFIG, edge=sub_d["parameters"]["EDGE"], nkpts=1000, user_tag_settings=sub_d["parameters"])
def run_task(self, fw_spec): vr_path = zpath(self.get("vasprun_path", "vasprun.xml")) min_gap = self.get("min_gap", None) max_gap = self.get("max_gap", None) if not os.path.exists(vr_path): relax_paths = sorted(glob.glob(vr_path + ".relax*")) if relax_paths: if len(relax_paths) > 9: raise ValueError( "CheckBandgap doesn't properly handle >9 relaxations!") vr_path = relax_paths[-1] logger.info("Checking the gap of file: {}".format(vr_path)) vr = Vasprun(vr_path, parse_potcar_file=False) gap = vr.get_band_structure().get_band_gap()["energy"] stored_data = {"band_gap": gap} logger.info("The gap is: {}. Min gap: {}. Max gap: {}".format( gap, min_gap, max_gap)) if (min_gap and gap < min_gap) or (max_gap and gap > max_gap): logger.info("CheckBandgap: failed test!") return FWAction(stored_data=stored_data, exit=True, defuse_workflow=True) return FWAction(stored_data=stored_data)
def process_task(self, path): try: #Override incorrect outcar subdocs for two step relaxations if os.path.exists(os.path.join(path, "relax2")): try: run_stats = {} for i in [1,2]: outcar = Outcar(zpath(os.path.join(path,"relax"+str(i), "OUTCAR"))) m_key = "calculations."+str(i-1)+".output.outcar" self.tasks.update({'dir_name_full': path}, {'$set': {m_key: outcar.to_dict}}) run_stats["relax"+str(i)] = outcar.run_stats except: logger.error("Bad OUTCAR for {}.".format(path)) try: overall_run_stats = {} for key in ["Total CPU time used (sec)", "User time (sec)", "System time (sec)", "Elapsed time (sec)"]: overall_run_stats[key] = sum([v[key] for v in run_stats.values()]) run_stats["overall"] = overall_run_stats except: logger.error("Bad run stats for {}.".format(path)) self.tasks.update({'dir_name_full': path}, {'$set': {"run_stats": run_stats}}) print 'FINISHED', path else: print 'SKIPPING', path except: print '-----' print 'ENCOUNTERED AN EXCEPTION!!!', path traceback.print_exc() print '-----'
def from_symbol_and_functional(symbol, functional=None): if functional is None: functional = SETTINGS.get("PMG_DEFAULT_FUNCTIONAL", "PBE") funcdir = PotcarSingle.functional_dir[functional] if not os.path.isdir(os.path.join( SETTINGS.get("PMG_VASP_PSP_DIR"), funcdir)): functional_dir = {"LDA_US": "pot", "PW91_US": "pot_GGA", "LDA": "potpaw", "PW91": "potpaw_GGA", "LDA_52": "potpaw_LDA.52", "LDA_54": "potpaw_LDA.54", "PBE": "potpaw_PBE", "PBE_52": "potpaw_PBE.52", "PBE_54": "potpaw_PBE.54", } funcdir = functional_dir[functional] d = SETTINGS.get("PMG_VASP_PSP_DIR") if d is None: raise ValueError("No POTCAR directory found. Please set " "the VASP_PSP_DIR environment variable") paths_to_try = [os.path.join(d, funcdir, "POTCAR.{}".format(symbol)), os.path.join(d, funcdir, symbol, "POTCAR.Z"), os.path.join(d, funcdir, symbol, "POTCAR")] for p in paths_to_try: p = os.path.expanduser(p) p = zpath(p) if os.path.exists(p): return PotcarSingleMod.from_file(p) raise IOError("You do not have the right POTCAR with functional " + "{} and label {} in your VASP_PSP_DIR".format(functional, symbol))
def get_defuse_causing_qchem_fwid(qcout_path): dirname = os.path.dirname(qcout_path) fw_spec_path = os.path.join(dirname, "FW.json") with zopen(zpath(fw_spec_path), 'rt') as f: fw_dict = json.load(f) fw_id = fw_dict["fw_id"] return fw_id
def run_task(self, fw_spec): prev_dir = get_loc(fw_spec['prev_vasp_dir']) if '$ALL' in self.files: self.files = os.listdir(prev_dir) for file in self.files: prev_filename = last_relax(os.path.join(prev_dir, file)) dest_file = 'POSCAR' if file == 'CONTCAR' and self.use_contcar else file if prev_filename.endswith('.gz'): dest_file += '.gz' print 'COPYING', prev_filename, dest_file if self.missing_CHGCAR_OK and 'CHGCAR' in dest_file and not os.path.exists( zpath(prev_filename)): print 'Skipping missing CHGCAR' else: shutil.copy2(prev_filename, dest_file) if '.gz' in dest_file: # unzip dest file f = gzip.open(dest_file, 'rb') file_content = f.read() with open(dest_file[0:-3], 'wb') as f_out: f_out.writelines(file_content) f.close() os.remove(dest_file) return FWAction(stored_data={'copied_files': self.files})
def is_valid_vasp_dir(mydir): # note that the OUTCAR and POSCAR are known to be empty in some # situations files = ["OUTCAR", "POSCAR", "INCAR", "KPOINTS"] for f in files: m_file = os.path.join(mydir, f) if not os.path.exists(zpath(m_file)) or not(os.stat(m_file).st_size > 0 or os.stat(m_file+'.gz').st_size > 0): return False return True
def run_task(self, fw_spec): user_incar_settings = {"NPAR": 2} MPStaticVaspInputSet.from_previous_vasp_run( os.getcwd(), user_incar_settings=user_incar_settings) structure = MPStaticVaspInputSet.get_structure(Vasprun( zpath("vasprun.xml")), Outcar(zpath("OUTCAR")), initial_structure=False, additional_info=True) return FWAction( stored_data={ 'refined_structure': structure[1][0].to_dict, 'conventional_standard_structure': structure[1][1].to_dict, 'symmetry_dataset': structure[1][2], 'symmetry_operations': [x.to_dict for x in structure[1][3]] })
def run_task(self, fw_spec): self.user_incar_settings.update({"NPAR": 2}) MPStaticVaspInputSet.from_previous_vasp_run(os.getcwd(), user_incar_settings=self.user_incar_settings, kpoints_density=self.kpoints_density) structure = MPStaticVaspInputSet.get_structure(Vasprun(zpath("vasprun.xml")), Outcar(zpath("OUTCAR")), initial_structure=False, additional_info=True) return FWAction(stored_data={'refined_structure': structure[1][0].as_dict(), 'conventional_standard_structure': structure[1][1].as_dict(), 'symmetry_dataset': structure[1][2], 'symmetry_operations': [x.as_dict() for x in structure[1][3]]})
def correct(self): backup(VASP_BACKUP_FILES | {self.output_filename}) actions = [] vi = VaspInput.from_directory(".") if "lrf_comm" in self.errors: if Outcar(zpath(os.path.join( os.getcwd(), "OUTCAR"))).is_stopped is False: if not vi["INCAR"].get("LPEAD"): actions.append({"dict": "INCAR", "action": {"_set": {"LPEAD": True}}}) VaspModder(vi=vi).apply_actions(actions) return {"errors": list(self.errors), "actions": actions}
def from_directory(input_dir, optional_files=None): """ Read in a LAMMPS input file from a directory. Note that only the standard input file is read unless optional_filenames is specified. Args: input_dir (str): Directory to read LAMMPS input from. optional_files (dict): Optional files to read in as well as a dict of {filename: Object type}. Object type must have a static method from_file. """ sub_d = {} fullzpath = zpath(os.path.join(input_dir, "lammps_input")) return LammpsInput.from_file(fullzpath)
def detect(self, dir_name): signals = set() for filename in self.filename_list: #find the strings that match in the file if not self.ignore_nonexistent_file or os.path.exists(zpath(os.path.join(dir_name, filename))): f = last_relax(os.path.join(dir_name, filename)) errors = string_list_in_file(self.signames_targetstrings.values(), f, ignore_case=self.ignore_case) if self.invert_search: errors_inverted = [item for item in self.targetstrings_signames.keys() if item not in errors] errors = errors_inverted #add the signal names for those strings for e in errors: signals.add(self.targetstrings_signames[e]) return signals
def _check_structure_change(mol1, mol2, qcout_path): """ Check whether structure is changed: Return: True: structure changed, False: unchanged """ dirname = os.path.dirname(qcout_path) fw_spec_path = os.path.join(dirname, "FW.json") with zopen(zpath(fw_spec_path), 'rt') as f: fw = json.load(f) if 'egsnl' not in fw['spec']: raise ValueError("Can't find initial SNL") if 'known_bonds' not in fw['spec']['egsnl']: raise ValueError("Can't find known bonds information") bonds = fw['spec']['egsnl']['known_bonds'] msc = MoleculeStructureComparator(priority_bonds=bonds) return not msc.are_equal(mol1, mol2)
def update_checkpoint(launchpad, launch_dir, launch_id, checkpoint): """ Helper function to update checkpoint Args: launchpad (LaunchPad): LaunchPad to ping with checkpoint data launch_dir (str): directory in which FW_offline.json was created launch_id (int): launch id to update checkpoint (dict): checkpoint data """ if launchpad: launchpad.ping_launch(launch_id, checkpoint=checkpoint) else: fpath = zpath(os.path.join(launch_dir, "FW_offline.json")) with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['checkpoint'] = checkpoint with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False))
def update_checkpoint(launchpad, launch_dir, launch_id, checkpoint): """ Helper function to update checkpoint Args: launchpad (LaunchPad): LaunchPad to ping with checkpoint data launch_dir (str): directory in which FW_offline.json was created launch_id (int): launch id to update checkpoint (dict): checkpoint data """ if launchpad: launchpad.ping_launch(launch_id, checkpoint=checkpoint) else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['checkpoint'] = checkpoint with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False))
def track_file(self, launch_dir=None): """ Reads the monitored file and returns back the last N lines :param launch_dir: directory where job was launched in case of relative filename :return: """ m_file = self.filename if launch_dir and not os.path.isabs(self.filename): m_file = os.path.join(launch_dir, m_file) lines = [] if os.path.exists(m_file): with zopen(zpath(m_file)) as f: for l in reverse_readline(f): lines.append(l) if len(lines) == self.nlines: break self.content = '\n'.join(reversed(lines)) return self.content
def run_task(self, fw_spec): logging.basicConfig(level=logging.INFO) qchem_logger = logging.getLogger('QChemDrone') qchem_logger.setLevel(logging.INFO) sh = logging.StreamHandler(stream=sys.stdout) # sh.setLevel(getattr(logging, 'INFO')) qchem_logger.addHandler(sh) cur_dir = os.getcwd() src_qchem_dir = fw_spec['src_qchem_dir'] for filename in glob.glob(os.path.join(src_qchem_dir, '*')): if os.path.isfile(filename): shutil.copy(filename, cur_dir) if os.path.exists("custodian.json") or os.path.exists( "custodian.json" + ".gz"): with zopen(zpath("custodian.json", 'rt')) as f: custodian_out = json.load(f) else: custodian_out = [] all_errors = set() for run in custodian_out: for correction in run['corrections']: all_errors.update(correction['errors']) prev_qchem_dir = os.getcwd() if MOVE_TO_EG_GARDEN: prev_qchem_dir = move_to_eg_garden(prev_qchem_dir) stored_data = {'error_list': list(all_errors)} update_spec = {'prev_qchem_dir': prev_qchem_dir, 'prev_task_type': fw_spec['task_type']} propagate_keys = ['egsnl', 'snlgroup_id', 'inchi_root', 'mixed_basis', 'mixed_aux_basis', 'mol'] for k in propagate_keys: if k in fw_spec: update_spec[k] = fw_spec[k] return FWAction(stored_data=stored_data, update_spec=update_spec)
def correct(self): backup(VASP_BACKUP_FILES | {self.output_filename}) actions = [] vi = VaspInput.from_directory(".") if "lrf_comm" in self.errors: if self.error_count['lrf_comm'] == 0: if Outcar(zpath(os.path.join(os.getcwd(), "OUTCAR"))).is_stopped is False: # simply rerun the job and increment # error count for next time actions.append({ "dict": "INCAR", "action": { "_set": { "ISTART": 1 } } }) self.error_count['lrf_comm'] += 1 if "kpoints_trans" in self.errors: if self.error_count["kpoints_trans"] == 0: m = reduce(operator.mul, vi["KPOINTS"].kpts[0]) m = max(int(round(m**(1 / 3))), 1) if vi["KPOINTS"].style.name.lower().startswith("m"): m += m % 2 actions.append({ "dict": "KPOINTS", "action": { "_set": { "kpoints": [[m] * 3] } } }) self.error_count['kpoints_trans'] += 1 VaspModder(vi=vi).apply_actions(actions) return {"errors": list(self.errors), "actions": actions}
def track_file(self, launch_dir=None): """ Reads the monitored file and returns back the last N lines Args: launch_dir (str): directory where job was launched in case of relative filename Returns: str: the content(last N lines) """ m_file = self.filename if launch_dir and not os.path.isabs(self.filename): m_file = os.path.join(launch_dir, m_file) lines = [] if self.allow_zipped: m_file = zpath(m_file) if os.path.exists(m_file): with zopen(m_file, "rt") as f: for l in reverse_readline(f): lines.append(l) if len(lines) == self.nlines: break self.content = '\n'.join(reversed(lines)) return self.content
def detect(self, dir_name): signals = set() for filename in self.filename_list: #find the strings that match in the file if not self.ignore_nonexistent_file or os.path.exists( zpath(os.path.join(dir_name, filename))): f = last_relax(os.path.join(dir_name, filename)) errors = string_list_in_file( self.signames_targetstrings.values(), f, ignore_case=self.ignore_case) if self.invert_search: errors_inverted = [ item for item in self.targetstrings_signames.keys() if item not in errors ] errors = errors_inverted #add the signal names for those strings for e in errors: signals.add(self.targetstrings_signames[e]) return signals
def last_relax(filename): # for old runs m_dir = os.path.dirname(filename) m_file = os.path.basename(filename) if os.path.exists(zpath(os.path.join(m_dir, 'relax2', m_file))): return zpath(os.path.join(m_dir, 'relax2', m_file)) elif os.path.exists(zpath(filename)): return zpath(filename) relaxations = glob.glob('%s.relax*' % filename) if relaxations: return sorted(relaxations)[-1] # backup for old runs elif os.path.exists(zpath(os.path.join(m_dir, 'relax1', m_file))): return zpath(os.path.join(m_dir, 'relax1', m_file)) return filename
def correct(self): backup(VASP_BACKUP_FILES | {self.output_filename}) actions = [] vi = VaspInput.from_directory(".") if self.errors.intersection(["tet", "dentet"]): actions.append({"dict": "INCAR", "action": {"_set": {"ISMEAR": 0}}}) if "inv_rot_mat" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"SYMPREC": 1e-8}}}) if "brmix" in self.errors: # If there is not a valid OUTCAR already, increment # error count to 1 to skip first fix if self.error_count['brmix'] == 0: try: assert (Outcar(zpath(os.path.join( os.getcwd(), "OUTCAR"))).is_stopped is False) except: self.error_count['brmix'] += 1 if self.error_count['brmix'] == 0: # Valid OUTCAR - simply rerun the job and increment # error count for next time actions.append({"dict": "INCAR", "action": {"_set": {"ISTART": 1}}}) self.error_count['brmix'] += 1 elif self.error_count['brmix'] == 1: # Use Kerker mixing w/default values for other parameters actions.append({"dict": "INCAR", "action": {"_set": {"IMIX": 1}}}) self.error_count['brmix'] += 1 elif self.error_count['brmix'] == 2 and vi["KPOINTS"].style \ == Kpoints.supported_modes.Gamma: actions.append({"dict": "KPOINTS", "action": {"_set": {"generation_style": "Monkhorst"}}}) actions.append({"dict": "INCAR", "action": {"_unset": {"IMIX": 1}}}) self.error_count['brmix'] += 1 elif self.error_count['brmix'] in [2, 3] and vi["KPOINTS"].style \ == Kpoints.supported_modes.Monkhorst: actions.append({"dict": "KPOINTS", "action": {"_set": {"generation_style": "Gamma"}}}) actions.append({"dict": "INCAR", "action": {"_unset": {"IMIX": 1}}}) self.error_count['brmix'] += 1 if vi["KPOINTS"].num_kpts < 1: all_kpts_even = all([ bool(n % 2 == 0) for n in vi["KPOINTS"].kpts[0] ]) print("all_kpts_even = {}".format(all_kpts_even)) if all_kpts_even: new_kpts = ( tuple(n + 1 for n in vi["KPOINTS"].kpts[0]),) print("new_kpts = {}".format(new_kpts)) actions.append({"dict": "KPOINTS", "action": {"_set": { "kpoints": new_kpts }}}) else: actions.append({"dict": "INCAR", "action": {"_set": {"ISYM": 0}}}) if vi["KPOINTS"].style == Kpoints.supported_modes.Monkhorst: actions.append({"dict": "KPOINTS", "action": { "_set": {"generation_style": "Gamma"}}}) # Based on VASP forum's recommendation, you should delete the # CHGCAR and WAVECAR when dealing with this error. if vi["INCAR"].get("ICHARG", 0) < 10: actions.append({"file": "CHGCAR", "action": { "_file_delete": {'mode': "actual"}}}) actions.append({"file": "WAVECAR", "action": { "_file_delete": {'mode': "actual"}}}) if "zpotrf" in self.errors: # Usually caused by short bond distances. If on the first step, # volume needs to be increased. Otherwise, it was due to a step # being too big and POTIM should be decreased. If a static run # try turning off symmetry. try: oszicar = Oszicar("OSZICAR") nsteps = len(oszicar.ionic_steps) except: nsteps = 0 if nsteps >= 1: potim = float(vi["INCAR"].get("POTIM", 0.5)) / 2.0 actions.append( {"dict": "INCAR", "action": {"_set": {"ISYM": 0, "POTIM": potim}}}) elif vi["INCAR"].get("NSW", 0) == 0 \ or vi["INCAR"].get("ISIF", 0) in range(3): actions.append( {"dict": "INCAR", "action": {"_set": {"ISYM": 0}}}) else: s = vi["POSCAR"].structure s.apply_strain(0.2) actions.append({"dict": "POSCAR", "action": {"_set": {"structure": s.as_dict()}}}) # Based on VASP forum's recommendation, you should delete the # CHGCAR and WAVECAR when dealing with this error. if vi["INCAR"].get("ICHARG", 0) < 10: actions.append({"file": "CHGCAR", "action": {"_file_delete": {'mode': "actual"}}}) actions.append({"file": "WAVECAR", "action": {"_file_delete": {'mode': "actual"}}}) if self.errors.intersection(["subspacematrix", "rspher", "real_optlay", "nicht_konv"]): s = vi["POSCAR"].structure if len(s) < self.natoms_large_cell: actions.append({"dict": "INCAR", "action": {"_set": {"LREAL": False}}}) else: # for large supercell, try an in-between option LREAL = True # prior to LREAL = False if self.error_count['real_optlay'] == 0: # use real space projectors generated by pot actions.append({"dict": "INCAR", "action": {"_set": {"LREAL": True}}}) self.error_count['real_optlay'] += 1 elif self.error_count['real_optlay'] == 1: actions.append({"dict": "INCAR", "action": {"_set": {"LREAL": False}}}) self.error_count['real_optlay'] += 1 if self.errors.intersection(["tetirr", "incorrect_shift"]): if vi["KPOINTS"].style == Kpoints.supported_modes.Monkhorst: actions.append({"dict": "KPOINTS", "action": { "_set": {"generation_style": "Gamma"}}}) if "rot_matrix" in self.errors: if vi["KPOINTS"].style == Kpoints.supported_modes.Monkhorst: actions.append({"dict": "KPOINTS", "action": { "_set": {"generation_style": "Gamma"}}}) else: actions.append({"dict": "INCAR", "action": {"_set": {"ISYM": 0}}}) if "amin" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"AMIN": "0.01"}}}) if "triple_product" in self.errors: s = vi["POSCAR"].structure trans = SupercellTransformation(((1, 0, 0), (0, 0, 1), (0, 1, 0))) new_s = trans.apply_transformation(s) actions.append({"dict": "POSCAR", "action": {"_set": {"structure": new_s.as_dict()}}, "transformation": trans.as_dict()}) if "pricel" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"SYMPREC": 1e-8, "ISYM": 0}}}) if "brions" in self.errors: potim = float(vi["INCAR"].get("POTIM", 0.5)) + 0.1 actions.append({"dict": "INCAR", "action": {"_set": {"POTIM": potim}}}) if "zbrent" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"IBRION": 1}}}) actions.append({"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}}) if "too_few_bands" in self.errors: if "NBANDS" in vi["INCAR"]: nbands = int(vi["INCAR"]["NBANDS"]) else: with open("OUTCAR") as f: for line in f: if "NBANDS" in line: try: d = line.split("=") nbands = int(d[-1].strip()) break except (IndexError, ValueError): pass actions.append({"dict": "INCAR", "action": {"_set": {"NBANDS": int(1.1 * nbands)}}}) if "pssyevx" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"ALGO": "Normal"}}}) if "eddrmm" in self.errors: # RMM algorithm is not stable for this calculation if vi["INCAR"].get("ALGO", "Normal") in ["Fast", "VeryFast"]: actions.append({"dict": "INCAR", "action": {"_set": {"ALGO": "Normal"}}}) else: potim = float(vi["INCAR"].get("POTIM", 0.5)) / 2.0 actions.append({"dict": "INCAR", "action": {"_set": {"POTIM": potim}}}) if vi["INCAR"].get("ICHARG", 0) < 10: actions.append({"file": "CHGCAR", "action": {"_file_delete": {'mode': "actual"}}}) actions.append({"file": "WAVECAR", "action": {"_file_delete": {'mode': "actual"}}}) if "edddav" in self.errors: if vi["INCAR"].get("ICHARG", 0) < 10: actions.append({"file": "CHGCAR", "action": {"_file_delete": {'mode': "actual"}}}) actions.append({"dict": "INCAR", "action": {"_set": {"ALGO": "All"}}}) if "grad_not_orth" in self.errors: if vi["INCAR"].get("ISMEAR", 1) < 0: actions.append({"dict": "INCAR", "action": {"_set": {"ISMEAR": "0"}}}) if "zheev" in self.errors: if vi["INCAR"].get("ALGO", "Fast").lower() != "exact": actions.append({"dict": "INCAR", "action": {"_set": {"ALGO": "Exact"}}}) if "elf_kpar" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"KPAR": 1}}}) if "rhosyg" in self.errors: if vi["INCAR"].get("SYMPREC", 1e-4) == 1e-4: actions.append({"dict": "INCAR", "action": {"_set": {"ISYM": 0}}}) actions.append({"dict": "INCAR", "action": {"_set": {"SYMPREC": 1e-4}}}) if "posmap" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"SYMPREC": 1e-6}}}) VaspModder(vi=vi).apply_actions(actions) return {"errors": list(self.errors), "actions": actions}
def run_task(self, fw_spec): handler_groups = { "default": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(),PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), DriftErrorHandler()], "strict": [VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(),PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), AliasingErrorHandler(), DriftErrorHandler()], "md": [VaspErrorHandler(), NonConvergingErrorHandler()], "no_handler": [] } vasp_cmd = env_chk(self["vasp_cmd"], fw_spec) if isinstance(vasp_cmd, six.string_types): vasp_cmd = os.path.expandvars(vasp_cmd) vasp_cmd = shlex.split(vasp_cmd) # initialize variables job_type = self.get("job_type", "normal") scratch_dir = env_chk(self.get("scratch_dir"), fw_spec) gzip_output = self.get("gzip_output", True) max_errors = self.get("max_errors", 5) auto_npar = env_chk(self.get("auto_npar"), fw_spec, strict=False, default=False) gamma_vasp_cmd = env_chk(self.get("gamma_vasp_cmd"), fw_spec, strict=False, default=None) if gamma_vasp_cmd: gamma_vasp_cmd = shlex.split(gamma_vasp_cmd) # construct jobs if job_type == "normal": jobs = [VaspJob(vasp_cmd, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd)] elif job_type == "double_relaxation_run": jobs = VaspJob.double_relaxation_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), half_kpts_first_relax=self.get("half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "metagga_opt_run": jobs = VaspJob.metagga_opt_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), half_kpts_first_relax=self.get("half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "full_opt_run": jobs = VaspJob.full_opt_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), max_steps=9, half_kpts_first_relax=self.get("half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "neb": # TODO: @shyuep @HanmeiTang This means that NEB can only be run (i) in reservation mode # and (ii) when the queueadapter parameter is overridden and (iii) the queue adapter # has a convention for nnodes (with that name). Can't the number of nodes be made a # parameter that the user sets differently? e.g., fw_spec["neb_nnodes"] must be set # when setting job_type=NEB? Then someone can use this feature in non-reservation # mode and without this complication. -computron nnodes = int(fw_spec["_queueadapter"]["nnodes"]) # TODO: @shyuep @HanmeiTang - I am not sure what the code below is doing. It looks like # it is trying to override the number of processors. But I tried running the code # below after setting "vasp_cmd = 'mpirun -n 16 vasp'" and the code fails. # (i) Is this expecting an array vasp_cmd rather than String? If so, that's opposite to # the rest of this task's convention and documentation # (ii) can we get rid of this hacking in the first place? e.g., allowing the user to # separately set the NEB_VASP_CMD as an env_variable and not rewriting the command # inside this. # -computron # Index the tag "-n" or "-np" index = [i for i, s in enumerate(vasp_cmd) if '-n' in s] ppn = int(vasp_cmd[index[0] + 1]) vasp_cmd[index[0] + 1] = str(nnodes * ppn) # Do the same for gamma_vasp_cmd if gamma_vasp_cmd: index = [i for i, s in enumerate(gamma_vasp_cmd) if '-n' in s] ppn = int(gamma_vasp_cmd[index[0] + 1]) gamma_vasp_cmd[index[0] + 1] = str(nnodes * ppn) jobs = [VaspNEBJob(vasp_cmd, final=False, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd)] else: raise ValueError("Unsupported job type: {}".format(job_type)) # construct handlers handler_group = self.get("handler_group", "default") if isinstance(handler_group, six.string_types): handlers = handler_groups[handler_group] else: handlers = handler_group if self.get("max_force_threshold"): handlers.append(MaxForceErrorHandler(max_force_threshold=self["max_force_threshold"])) if self.get("wall_time"): handlers.append(WalltimeHandler(wall_time=self["wall_time"])) if job_type == "neb": validators = [] # CINEB vasprun.xml sometimes incomplete, file structure different else: validators = [VasprunXMLValidator(), VaspFilesValidator()] c = Custodian(handlers, jobs, validators=validators, max_errors=max_errors, scratch_dir=scratch_dir, gzipped_output=gzip_output) c.run() if os.path.exists(zpath("custodian.json")): return FWAction(stored_data=loadfn(zpath("custodian.json")))
def correct(self): backup(VASP_BACKUP_FILES | {self.output_filename}) actions = [] vi = VaspInput.from_directory(".") if self.errors.intersection(["tet", "dentet"]): actions.append({"dict": "INCAR", "action": {"_set": {"ISMEAR": 0}}}) if "inv_rot_mat" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"SYMPREC": 1e-8}}}) if "brmix" in self.errors: # If there is not a valid OUTCAR already, increment # error count to 1 to skip first fix if self.error_count['brmix'] == 0: try: assert(Outcar(zpath(os.path.join( os.getcwd(), "OUTCAR"))).is_stopped is False) except: self.error_count['brmix'] += 1 if self.error_count['brmix'] == 0: # Valid OUTCAR - simply rerun the job and increment # error count for next time actions.append({"dict": "INCAR", "action": {"_set": {"ISTART": 1}}}) self.error_count['brmix'] += 1 elif self.error_count['brmix'] == 1: # Use Kerker mixing w/default values for other parameters actions.append({"dict": "INCAR", "action": {"_set": {"IMIX": 1}}}) self.error_count['brmix'] += 1 elif self.error_count['brmix'] == 2 and vi["KPOINTS"].style \ == Kpoints.supported_modes.Gamma: actions.append({"dict": "KPOINTS", "action": {"_set": {"generation_style": "Monkhorst"}}}) actions.append({"dict": "INCAR", "action": {"_unset": {"IMIX": 1}}}) self.error_count['brmix'] += 1 elif self.error_count['brmix'] in [2, 3] and vi["KPOINTS"].style \ == Kpoints.supported_modes.Monkhorst: actions.append({"dict": "KPOINTS", "action": {"_set": {"generation_style": "Gamma"}}}) actions.append({"dict": "INCAR", "action": {"_unset": {"IMIX": 1}}}) self.error_count['brmix'] += 1 if vi["KPOINTS"].num_kpts < 1: all_kpts_even = all([ bool(n % 2 == 0) for n in vi["KPOINTS"].kpts[0] ]) print("all_kpts_even = {}".format(all_kpts_even)) if all_kpts_even: new_kpts = (tuple(n+1 for n in vi["KPOINTS"].kpts[0]),) print("new_kpts = {}".format(new_kpts)) actions.append({"dict": "KPOINTS", "action": {"_set": { "kpoints": new_kpts }}}) else: actions.append({"dict": "INCAR", "action": {"_set": {"ISYM": 0}}}) if vi["KPOINTS"].style == Kpoints.supported_modes.Monkhorst: actions.append({"dict": "KPOINTS", "action": {"_set": {"generation_style": "Gamma"}}}) # Based on VASP forum's recommendation, you should delete the # CHGCAR and WAVECAR when dealing with this error. if vi["INCAR"].get("ICHARG", 0) < 10: actions.append({"file": "CHGCAR", "action": {"_file_delete": {'mode': "actual"}}}) actions.append({"file": "WAVECAR", "action": {"_file_delete": {'mode': "actual"}}}) if "zpotrf" in self.errors: # Usually caused by short bond distances. If on the first step, # volume needs to be increased. Otherwise, it was due to a step # being too big and POTIM should be decreased. try: oszicar = Oszicar("OSZICAR") nsteps = len(oszicar.ionic_steps) except: nsteps = 0 if nsteps >= 1: potim = float(vi["INCAR"].get("POTIM", 0.5)) / 2.0 actions.append( {"dict": "INCAR", "action": {"_set": {"ISYM": 0, "POTIM": potim}}}) else: s = vi["POSCAR"].structure s.apply_strain(0.2) actions.append({"dict": "POSCAR", "action": {"_set": {"structure": s.as_dict()}}}) # Based on VASP forum's recommendation, you should delete the # CHGCAR and WAVECAR when dealing with this error. if vi["INCAR"].get("ICHARG", 0) < 10: actions.append({"file": "CHGCAR", "action": {"_file_delete": {'mode': "actual"}}}) actions.append({"file": "WAVECAR", "action": {"_file_delete": {'mode': "actual"}}}) if self.errors.intersection(["subspacematrix", "rspher", "real_optlay"]): actions.append({"dict": "INCAR", "action": {"_set": {"LREAL": False}}}) if self.errors.intersection(["tetirr", "incorrect_shift"]): if vi["KPOINTS"].style == Kpoints.supported_modes.Monkhorst: actions.append({"dict": "KPOINTS", "action": {"_set": {"generation_style": "Gamma"}}}) if "rot_matrix" in self.errors: if vi["KPOINTS"].style == Kpoints.supported_modes.Monkhorst: actions.append({"dict": "KPOINTS", "action": {"_set": {"generation_style": "Gamma"}}}) else: actions.append({"dict": "INCAR", "action": {"_set": {"ISYM": 0}}}) if "amin" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"AMIN": "0.01"}}}) if "triple_product" in self.errors: s = vi["POSCAR"].structure trans = SupercellTransformation(((1, 0, 0), (0, 0, 1), (0, 1, 0))) new_s = trans.apply_transformation(s) actions.append({"dict": "POSCAR", "action": {"_set": {"structure": new_s.as_dict()}}, "transformation": trans.as_dict()}) if "pricel" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"SYMPREC": 1e-8, "ISYM": 0}}}) if "brions" in self.errors: potim = float(vi["INCAR"].get("POTIM", 0.5)) + 0.1 actions.append({"dict": "INCAR", "action": {"_set": {"POTIM": potim}}}) if "zbrent" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"IBRION": 1}}}) actions.append({"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}}) if "too_few_bands" in self.errors: if "NBANDS" in vi["INCAR"]: nbands = int(vi["INCAR"]["NBANDS"]) else: with open("OUTCAR") as f: for line in f: if "NBANDS" in line: try: d = line.split("=") nbands = int(d[-1].strip()) break except (IndexError, ValueError): pass actions.append({"dict": "INCAR", "action": {"_set": {"NBANDS": int(1.1 * nbands)}}}) if "pssyevx" in self.errors: actions.append({"dict": "INCAR", "action": {"_set": {"ALGO": "Normal"}}}) if "eddrmm" in self.errors: # RMM algorithm is not stable for this calculation if vi["INCAR"].get("ALGO", "Normal") in ["Fast", "VeryFast"]: actions.append({"dict": "INCAR", "action": {"_set": {"ALGO": "Normal"}}}) else: potim = float(vi["INCAR"].get("POTIM", 0.5)) / 2.0 actions.append({"dict": "INCAR", "action": {"_set": {"POTIM": potim}}}) if vi["INCAR"].get("ICHARG", 0) < 10: actions.append({"file": "CHGCAR", "action": {"_file_delete": {'mode': "actual"}}}) actions.append({"file": "WAVECAR", "action": {"_file_delete": {'mode': "actual"}}}) if "edddav" in self.errors: if vi["INCAR"].get("ICHARG", 0) < 10: actions.append({"file": "CHGCAR", "action": {"_file_delete": {'mode': "actual"}}}) actions.append({"dict": "INCAR", "action": {"_set": {"ALGO": "All"}}}) if "grad_not_orth" in self.errors: if vi["INCAR"].get("ISMEAR", 1) < 0: actions.append({"dict": "INCAR", "action": {"_set": {"ISMEAR": "0"}}}) VaspModder(vi=vi).apply_actions(actions) return {"errors": list(self.errors), "actions": actions}
def get_task_doc(cls, path, fw_spec=None): """ Get the entire task doc for a path, including any post-processing. """ logger.info("Getting task doc for file:{}".format(path)) qcout = QcOutput(zpath(path)) data = qcout.data initial_mol = data[0]["molecules"][0] mol = data[0]["molecules"][-1] if data[0]["jobtype"] == "freq": mol = Molecule.from_dict(initial_mol.as_dict()) bb = BabelMolAdaptor(mol) pbmol = bb.pybel_mol xyz = XYZ(mol) smiles = pbmol.write(str("smi")).split()[0] can = pbmol.write(str("can")).split()[0] inchi_final = pbmol.write(str("inchi")).strip() svg = cls.modify_svg(cls.xyz2svg(xyz)) comp = mol.composition charge = mol.charge spin_mult = mol.spin_multiplicity data_dict = {} pga = PointGroupAnalyzer(mol) sch_symbol = pga.sch_symbol stationary_type = None has_structure_changing_job = False for d in data: if d["jobtype"] == "opt": data_dict["geom_opt"] = d has_structure_changing_job = True elif d["jobtype"] == "freq": data_dict["freq"] = d has_structure_changing_job = True if not d["has_error"]: if d['frequencies'][0]["frequency"] < -0.00: # it is stupied that -0.00 is less than 0.00 stationary_type = "non-minimum" else: stationary_type = "minimum" else: stationary_type = "unknown" elif d["jobtype"] == "sp": suffix = "" if d["solvent_method"] == "NA" \ else "_" + d["solvent_method"] data_dict["scf" + suffix] = d elif d["jobtype"] == "aimd": data_dict["amid"] = d has_structure_changing_job = True data = data_dict d = { "path": os.path.abspath(path), "folder": os.path.basename(os.path.dirname(os.path.abspath(path))), "calculations": data, "molecule_initial": initial_mol.as_dict(), "molecule_final": mol.as_dict(), "pointgroup": sch_symbol, "pretty_formula": comp.reduced_formula, "reduced_cell_formula_abc": comp.alphabetical_formula, "formula": comp.formula, "charge": charge, "spin_multiplicity": spin_mult, "composition": comp.as_dict(), "elements": list(comp.as_dict().keys()), "nelements": len(comp), "smiles": smiles, "can": can, "inchi_final": inchi_final, "svg": svg, "xyz": str(xyz), "names": get_nih_names(smiles) } if stationary_type: d['stationary_type'] = stationary_type if fw_spec: inchi_initial = fw_spec['inchi'] if inchi_initial != d['inchi_final']: d['inchi_changed'] = True else: d['inchi_changed'] = False if has_structure_changing_job: d['structure_changed'] = cls._check_structure_change( initial_mol, mol, path) else: d['structure_changed'] = False if d['structure_changed']: d['state'] = 'rejected' d['reject_reason'] = 'structural change' if "state" not in d: for v in data_dict.values(): if v['has_error']: d['state'] = "error" errors = d.get("errors", []) errors += v["errors"] d["errors"] = errors if "state" not in d: d["state"] = "successful" return jsanitize(d)
def process_fw(self, dir_name, d): d["task_id_deprecated"] = int( d["task_id"].split('-')[-1]) # useful for WC and AJ # update the run fields to give species group in root, if exists for r in d['run_tags']: if "species_group=" in r: d["species_group"] = int(r.split("=")[-1]) break # custom Materials Project post-processing for FireWorks with zopen(zpath(os.path.join(dir_name, 'FW.json'))) as f: fw_dict = json.load(f) d['fw_id'] = fw_dict['fw_id'] d['snl'] = fw_dict['spec']['mpsnl'] d['snlgroup_id'] = fw_dict['spec']['snlgroup_id'] d['vaspinputset_name'] = fw_dict['spec'].get('vaspinputset_name') d['task_type'] = fw_dict['spec']['task_type'] # Process data for deformed structures if 'deformed' in d['task_type']: d['deformation_matrix'] = fw_dict['spec']['deformation_matrix'] d['original_task_id'] = fw_dict['spec']['original_task_id'] if not self.update_duplicates: if 'optimize structure' in d['task_type'] and 'output' in d: # create a new SNL based on optimized structure new_s = Structure.from_dict(d['output']['crystal']) old_snl = StructureNL.from_dict(d['snl']) history = old_snl.history history.append({ 'name': 'Materials Project structure optimization', 'url': 'http://www.materialsproject.org', 'description': { 'task_type': d['task_type'], 'fw_id': d['fw_id'], 'task_id': d['task_id'] } }) new_snl = StructureNL(new_s, old_snl.authors, old_snl.projects, old_snl.references, old_snl.remarks, old_snl.data, history) # enter new SNL into SNL db # get the SNL mongo adapter sma = SNLMongoAdapter.auto_load() # add snl mpsnl, snlgroup_id, spec_group = sma.add_snl( new_snl, snlgroup_guess=d['snlgroup_id']) d['snl_final'] = mpsnl.as_dict() d['snlgroup_id_final'] = snlgroup_id d['snlgroup_changed'] = (d['snlgroup_id'] != d['snlgroup_id_final']) else: d['snl_final'] = d['snl'] d['snlgroup_id_final'] = d['snlgroup_id'] d['snlgroup_changed'] = False # custom processing for detecting errors new_style = os.path.exists(zpath(os.path.join(dir_name, 'FW.json'))) vasp_signals = {} critical_errors = [ "INPUTS_DONT_EXIST", "OUTPUTS_DONT_EXIST", "INCOHERENT_POTCARS", "VASP_HASNT_STARTED", "VASP_HASNT_COMPLETED", "CHARGE_UNCONVERGED", "NETWORK_QUIESCED", "HARD_KILLED", "WALLTIME_EXCEEDED", "ATOMS_TOO_CLOSE", "DISK_SPACE_EXCEEDED", "NO_RELAX2", "POSITIVE_ENERGY" ] last_relax_dir = dir_name if not new_style: # get the last relaxation dir # the order is relax2, current dir, then relax1. This is because # after completing relax1, the job happens in the current dir. # Finally, it gets moved to relax2. # There are some weird cases where both the current dir and relax2 # contain data. The relax2 is good, but the current dir is bad. if is_valid_vasp_dir(os.path.join(dir_name, "relax2")): last_relax_dir = os.path.join(dir_name, "relax2") elif is_valid_vasp_dir(dir_name): pass elif is_valid_vasp_dir(os.path.join(dir_name, "relax1")): last_relax_dir = os.path.join(dir_name, "relax1") vasp_signals['last_relax_dir'] = last_relax_dir ## see what error signals are present print "getting signals for dir :{}".format(last_relax_dir) sl = SignalDetectorList() sl.append(VASPInputsExistSignal()) sl.append(VASPOutputsExistSignal()) sl.append(VASPOutSignal()) sl.append(HitAMemberSignal()) sl.append(SegFaultSignal()) sl.append(VASPStartedCompletedSignal()) if d['state'] == 'successful' and 'optimize structure' in d[ 'task_type']: sl.append(Relax2ExistsSignal()) signals = sl.detect_all(last_relax_dir) signals = signals.union(WallTimeSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(WallTimeSignal().detect(root_dir)) signals = signals.union(DiskSpaceExceededSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(DiskSpaceExceededSignal().detect(root_dir)) if d.get('output', {}).get('final_energy', None) > 0: signals.add('POSITIVE_ENERGY') signals = list(signals) critical_signals = [val for val in signals if val in critical_errors] vasp_signals['signals'] = signals vasp_signals['critical_signals'] = critical_signals vasp_signals['num_signals'] = len(signals) vasp_signals['num_critical'] = len(critical_signals) if len(critical_signals) > 0 and d['state'] == "successful": d["state"] = "error" d['analysis'] = d.get('analysis', {}) d['analysis']['errors_MP'] = vasp_signals
def run(self, pdb_on_exception=False): """ Run the rocket (check out a job from the database and execute it) Args: pdb_on_exception (bool): whether to invoke the debugger on a caught exception. Default False. """ all_stored_data = {} # combined stored data for *all* the Tasks all_update_spec = {} # combined update_spec for *all* the Tasks all_mod_spec = [] # combined mod_spec for *all* the Tasks lp = self.launchpad launch_dir = os.path.abspath(os.getcwd()) logdir = lp.get_logdir() if lp else None l_logger = get_fw_logger('rocket.launcher', l_dir=logdir, stream_level=ROCKET_STREAM_LOGLEVEL) # check a FW job out of the launchpad if lp: m_fw, launch_id = lp.checkout_fw(self.fworker, launch_dir, self.fw_id) else: # offline mode m_fw = Firework.from_file(os.path.join(os.getcwd(), "FW.json")) # set the run start time fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['started_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) launch_id = None # we don't need this in offline mode... if not m_fw: print("No FireWorks are ready to run and match query! {}".format(self.fworker.query)) return False final_state = None ping_stop = None btask_stops = [] try: if '_launch_dir' in m_fw.spec and lp: prev_dir = launch_dir launch_dir = os.path.expandvars(m_fw.spec['_launch_dir']) if not os.path.abspath(launch_dir): launch_dir = os.path.normpath(os.path.join(os.getcwd(), launch_dir)) # thread-safe "mkdir -p" try: os.makedirs(launch_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise os.chdir(launch_dir) if not os.path.samefile(launch_dir, prev_dir): lp.change_launch_dir(launch_id, launch_dir) if not os.listdir(prev_dir) and REMOVE_USELESS_DIRS: try: os.rmdir(prev_dir) except Exception: pass recovery = m_fw.spec.get('_recovery', None) if recovery: recovery_dir = recovery.get('_prev_dir') recovery_mode = recovery.get('_mode') starting_task = recovery.get('_task_n') all_stored_data.update(recovery.get('_all_stored_data')) all_update_spec.update(recovery.get('_all_update_spec')) all_mod_spec.extend(recovery.get('_all_mod_spec')) if lp: l_logger.log( logging.INFO, 'Recovering from task number {} in folder {}.'.format(starting_task, recovery_dir)) if recovery_mode == 'cp' and launch_dir != recovery_dir: if lp: l_logger.log( logging.INFO, 'Copying data from recovery folder {} to folder {}.'.format(recovery_dir, launch_dir)) distutils.dir_util.copy_tree(recovery_dir, launch_dir, update=1) else: starting_task = 0 files_in = m_fw.spec.get("_files_in", {}) prev_files = m_fw.spec.get("_files_prev", {}) for f in set(files_in.keys()).intersection(prev_files.keys()): # We use zopen for the file objects for transparent handling # of zipped files. shutil.copyfileobj does the actual copy # in chunks that avoid memory issues. with zopen(prev_files[f], "rb") as fin, zopen(files_in[f], "wb") as fout: shutil.copyfileobj(fin, fout) if lp: message = 'RUNNING fw_id: {} in directory: {}'. \ format(m_fw.fw_id, os.getcwd()) l_logger.log(logging.INFO, message) # write FW.json and/or FW.yaml to the directory if PRINT_FW_JSON: m_fw.to_file('FW.json', indent=4) if PRINT_FW_YAML: m_fw.to_file('FW.yaml') my_spec = dict(m_fw.spec) # make a copy of spec, don't override original my_spec["_fw_env"] = self.fworker.env # set up heartbeat (pinging the server that we're still alive) ping_stop = start_ping_launch(lp, launch_id) # start background tasks if '_background_tasks' in my_spec: for bt in my_spec['_background_tasks']: btask_stops.append(start_background_task(bt, m_fw.spec)) # execute the Firetasks! for t_counter, t in enumerate(m_fw.tasks[starting_task:], start=starting_task): checkpoint = {'_task_n': t_counter, '_all_stored_data': all_stored_data, '_all_update_spec': all_update_spec, '_all_mod_spec': all_mod_spec} Rocket.update_checkpoint(lp, launch_dir, launch_id, checkpoint) if lp: l_logger.log(logging.INFO, "Task started: %s." % t.fw_name) if my_spec.get("_add_launchpad_and_fw_id"): t.fw_id = m_fw.fw_id if FWData().MULTIPROCESSING: # hack because AutoProxy manager can't access attributes t.launchpad = LaunchPad.from_dict(self.launchpad.to_dict()) else: t.launchpad = self.launchpad if my_spec.get("_add_fworker"): t.fworker = self.fworker try: m_action = t.run_task(my_spec) except BaseException as e: traceback.print_exc() tb = traceback.format_exc() stop_backgrounds(ping_stop, btask_stops) do_ping(lp, launch_id) # one last ping, esp if there is a monitor # If the exception is serializable, save its details if pdb_on_exception: pdb.post_mortem() try: exception_details = e.to_dict() except AttributeError: exception_details = None except BaseException as e: if lp: l_logger.log(logging.WARNING, "Exception couldn't be serialized: %s " % e) exception_details = None try: m_task = t.to_dict() except Exception: m_task = None m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': m_task, '_exception': {'_stacktrace': tb, '_details': exception_details}}, exit=True) m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) if lp: final_state = 'FIZZLED' lp.complete_launch(launch_id, m_action, final_state) else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'FIZZLED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True # read in a FWAction from a file, in case the task is not Python and cannot return # it explicitly if os.path.exists('FWAction.json'): m_action = FWAction.from_file('FWAction.json') elif os.path.exists('FWAction.yaml'): m_action = FWAction.from_file('FWAction.yaml') if not m_action: m_action = FWAction() # update the global stored data with the data to store and update from this # particular Task all_stored_data.update(m_action.stored_data) all_update_spec.update(m_action.update_spec) all_mod_spec.extend(m_action.mod_spec) # update spec for next task as well my_spec.update(m_action.update_spec) for mod in m_action.mod_spec: apply_mod(mod, my_spec) if lp: l_logger.log(logging.INFO, "Task completed: %s " % t.fw_name) if m_action.skip_remaining_tasks: break # add job packing info if this is needed if FWData().MULTIPROCESSING and STORE_PACKING_INFO: all_stored_data['multiprocess_name'] = multiprocessing.current_process().name # perform finishing operation stop_backgrounds(ping_stop, btask_stops) for b in btask_stops: b.set() do_ping(lp, launch_id) # one last ping, esp if there is a monitor # last background monitors if '_background_tasks' in my_spec: for bt in my_spec['_background_tasks']: if bt.run_on_finish: for task in bt.tasks: task.run_task(m_fw.spec) m_action.stored_data = all_stored_data m_action.mod_spec = all_mod_spec m_action.update_spec = all_update_spec m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) if lp: final_state = 'COMPLETED' lp.complete_launch(launch_id, m_action, final_state) else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'COMPLETED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True except LockedWorkflowError as e: l_logger.log(logging.DEBUG, traceback.format_exc()) l_logger.log(logging.WARNING, "Firework {} reached final state {} but couldn't complete the update of " "the database. Reason: {}\nRefresh the WF to recover the result " "(lpad admin refresh -i {}).".format( self.fw_id, final_state, e, self.fw_id)) return True except Exception: # problems while processing the results. high probability of malformed data. traceback.print_exc() stop_backgrounds(ping_stop, btask_stops) # restore initial state to prevent the raise of further exceptions if lp: lp.restore_backup_data(launch_id, m_fw.fw_id) do_ping(lp, launch_id) # one last ping, esp if there is a monitor # the action produced by the task is discarded m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': None, '_exception': {'_stacktrace': traceback.format_exc(), '_details': None}}, exit=True) try: m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) except Exception: traceback.print_exc() if lp: try: lp.complete_launch(launch_id, m_action, 'FIZZLED') except LockedWorkflowError as e: l_logger.log(logging.DEBUG, traceback.format_exc()) l_logger.log(logging.WARNING, "Firework {} fizzled but couldn't complete the update of the database." " Reason: {}\nRefresh the WF to recover the result " "(lpad admin refresh -i {}).".format( self.fw_id, final_state, e, self.fw_id)) return True else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'FIZZLED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True
def run_task(self, fw_spec): handler_groups = { "default": [ VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), DriftErrorHandler() ], "strict": [ VaspErrorHandler(), MeshSymmetryErrorHandler(), UnconvergedErrorHandler(), NonConvergingErrorHandler(), PotimErrorHandler(), PositiveEnergyErrorHandler(), FrozenJobErrorHandler(), StdErrHandler(), AliasingErrorHandler(), DriftErrorHandler() ], "md": [VaspErrorHandler(), NonConvergingErrorHandler()], "no_handler": [] } vasp_cmd = env_chk(self["vasp_cmd"], fw_spec) if isinstance(vasp_cmd, str): vasp_cmd = os.path.expandvars(vasp_cmd) vasp_cmd = shlex.split(vasp_cmd) # initialize variables job_type = self.get("job_type", "normal") scratch_dir = env_chk(self.get("scratch_dir"), fw_spec) gzip_output = self.get("gzip_output", True) max_errors = self.get("max_errors", CUSTODIAN_MAX_ERRORS) auto_npar = env_chk(self.get("auto_npar"), fw_spec, strict=False, default=False) gamma_vasp_cmd = env_chk(self.get("gamma_vasp_cmd"), fw_spec, strict=False, default=None) if gamma_vasp_cmd: gamma_vasp_cmd = shlex.split(gamma_vasp_cmd) # construct jobs if job_type == "normal": jobs = [ VaspJob(vasp_cmd, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd) ] elif job_type == "double_relaxation_run": jobs = VaspJob.double_relaxation_run( vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), half_kpts_first_relax=self.get("half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "metagga_opt_run": jobs = VaspJob.metagga_opt_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), half_kpts_first_relax=self.get( "half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "full_opt_run": jobs = VaspJob.full_opt_run(vasp_cmd, auto_npar=auto_npar, ediffg=self.get("ediffg"), max_steps=9, half_kpts_first_relax=self.get( "half_kpts_first_relax", HALF_KPOINTS_FIRST_RELAX)) elif job_type == "neb": # TODO: @shyuep @HanmeiTang This means that NEB can only be run (i) in reservation mode # and (ii) when the queueadapter parameter is overridden and (iii) the queue adapter # has a convention for nnodes (with that name). Can't the number of nodes be made a # parameter that the user sets differently? e.g., fw_spec["neb_nnodes"] must be set # when setting job_type=NEB? Then someone can use this feature in non-reservation # mode and without this complication. -computron nnodes = int(fw_spec["_queueadapter"]["nnodes"]) # TODO: @shyuep @HanmeiTang - I am not sure what the code below is doing. It looks like # it is trying to override the number of processors. But I tried running the code # below after setting "vasp_cmd = 'mpirun -n 16 vasp'" and the code fails. # (i) Is this expecting an array vasp_cmd rather than String? If so, that's opposite to # the rest of this task's convention and documentation # (ii) can we get rid of this hacking in the first place? e.g., allowing the user to # separately set the NEB_VASP_CMD as an env_variable and not rewriting the command # inside this. # -computron # Index the tag "-n" or "-np" index = [i for i, s in enumerate(vasp_cmd) if '-n' in s] ppn = int(vasp_cmd[index[0] + 1]) vasp_cmd[index[0] + 1] = str(nnodes * ppn) # Do the same for gamma_vasp_cmd if gamma_vasp_cmd: index = [i for i, s in enumerate(gamma_vasp_cmd) if '-n' in s] ppn = int(gamma_vasp_cmd[index[0] + 1]) gamma_vasp_cmd[index[0] + 1] = str(nnodes * ppn) jobs = [ VaspNEBJob(vasp_cmd, final=False, auto_npar=auto_npar, gamma_vasp_cmd=gamma_vasp_cmd) ] else: raise ValueError("Unsupported job type: {}".format(job_type)) # construct handlers handler_group = self.get("handler_group", "default") if isinstance(handler_group, str): handlers = handler_groups[handler_group] else: handlers = handler_group if self.get("max_force_threshold"): handlers.append( MaxForceErrorHandler( max_force_threshold=self["max_force_threshold"])) if self.get("wall_time"): handlers.append(WalltimeHandler(wall_time=self["wall_time"])) if job_type == "neb": validators = [ ] # CINEB vasprun.xml sometimes incomplete, file structure different else: validators = [VasprunXMLValidator(), VaspFilesValidator()] c = Custodian(handlers, jobs, validators=validators, max_errors=max_errors, scratch_dir=scratch_dir, gzipped_output=gzip_output) c.run() if os.path.exists(zpath("custodian.json")): stored_custodian_data = { "custodian": loadfn(zpath("custodian.json")) } return FWAction(stored_data=stored_custodian_data)
def assimilate(self, path, launches_coll=None): """ Parses vasp runs. Then insert the result into the db. and return the task_id or doc of the insertion. Returns: If in simulate_mode, the entire doc is returned for debugging purposes. Else, only the task_id of the inserted doc is returned. """ d = self.get_task_doc(path) if self.additional_fields: d.update(self.additional_fields) # always add additional fields, even for failed jobs try: d["dir_name_full"] = d["dir_name"].split(":")[1] d["dir_name"] = get_block_part(d["dir_name_full"]) d["stored_data"] = {} except: print 'COULD NOT GET DIR NAME' pprint.pprint(d) print traceback.format_exc() raise ValueError('IMPROPER PARSING OF {}'.format(path)) if not self.simulate: # Perform actual insertion into db. Because db connections cannot # be pickled, every insertion needs to create a new connection # to the db. conn = MongoClient(self.host, self.port) db = conn[self.database] if self.user: db.authenticate(self.user, self.password) coll = db[self.collection] # Insert dos data into gridfs and then remove it from the dict. # DOS data tends to be above the 4Mb limit for mongo docs. A ref # to the dos file is in the dos_fs_id. result = coll.find_one({"dir_name": d["dir_name"]}) if result is None or self.update_duplicates: if self.parse_dos and "calculations" in d: for calc in d["calculations"]: if "dos" in calc: dos = json.dumps(calc["dos"], cls=MontyEncoder) fs = gridfs.GridFS(db, "dos_fs") dosid = fs.put(dos) calc["dos_fs_id"] = dosid del calc["dos"] d["last_updated"] = datetime.datetime.today() if result is None: if ("task_id" not in d) or (not d["task_id"]): d["task_id"] = "mp-{}".format( db.counter.find_one_and_update( {"_id": "taskid"}, {"$inc": {"c": 1}} )["c"]) logger.info("Inserting {} with taskid = {}" .format(d["dir_name"], d["task_id"])) elif self.update_duplicates: d["task_id"] = result["task_id"] logger.info("Updating {} with taskid = {}" .format(d["dir_name"], d["task_id"])) #Fireworks processing self.process_fw(path, d) try: #Add oxide_type struct=Structure.from_dict(d["output"]["crystal"]) d["oxide_type"]=oxide_type(struct) except: logger.error("can't get oxide_type for {}".format(d["task_id"])) d["oxide_type"] = None #Override incorrect outcar subdocs for two step relaxations if "optimize structure" in d['task_type'] and \ os.path.exists(os.path.join(path, "relax2")): try: run_stats = {} for i in [1,2]: o_path = os.path.join(path,"relax"+str(i),"OUTCAR") o_path = o_path if os.path.exists(o_path) else o_path+".gz" outcar = Outcar(o_path) d["calculations"][i-1]["output"]["outcar"] = outcar.as_dict() run_stats["relax"+str(i)] = outcar.run_stats except: logger.error("Bad OUTCAR for {}.".format(path)) try: overall_run_stats = {} for key in ["Total CPU time used (sec)", "User time (sec)", "System time (sec)", "Elapsed time (sec)"]: overall_run_stats[key] = sum([v[key] for v in run_stats.values()]) run_stats["overall"] = overall_run_stats except: logger.error("Bad run stats for {}.".format(path)) d["run_stats"] = run_stats # add is_compatible mpc = MaterialsProjectCompatibility("Advanced") try: func = d["pseudo_potential"]["functional"] labels = d["pseudo_potential"]["labels"] symbols = ["{} {}".format(func, label) for label in labels] parameters = {"run_type": d["run_type"], "is_hubbard": d["is_hubbard"], "hubbards": d["hubbards"], "potcar_symbols": symbols} entry = ComputedEntry(Composition(d["unit_cell_formula"]), 0.0, 0.0, parameters=parameters, entry_id=d["task_id"]) d['is_compatible'] = bool(mpc.process_entry(entry)) except: traceback.print_exc() print 'ERROR in getting compatibility' d['is_compatible'] = None #task_type dependent processing if 'static' in d['task_type']: launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1}) for i in ["conventional_standard_structure", "symmetry_operations", "symmetry_dataset", "refined_structure"]: try: d['stored_data'][i] = launch_doc['action']['stored_data'][i] except: pass #parse band structure if necessary if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\ and d['state'] == 'successful': launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1}) vasp_run = Vasprun(zpath(os.path.join(path, "vasprun.xml")), parse_projected_eigen=False) if 'band structure' in d['task_type']: def string_to_numlist(stringlist): g=re.search('([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)', stringlist) return [float(g.group(i)) for i in range(1,4)] for i in ["kpath_name", "kpath"]: d['stored_data'][i] = launch_doc['action']['stored_data'][i] kpoints_doc = d['stored_data']['kpath']['kpoints'] for i in kpoints_doc: kpoints_doc[i]=string_to_numlist(kpoints_doc[i]) bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'], line_mode=True) else: bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'], line_mode=False) bs_json = json.dumps(bs.as_dict(), cls=MontyEncoder) fs = gridfs.GridFS(db, "band_structure_fs") bs_id = fs.put(bs_json) d['calculations'][0]["band_structure_fs_id"] = bs_id # also override band gap in task doc gap = bs.get_band_gap() vbm = bs.get_vbm() cbm = bs.get_cbm() update_doc = {'bandgap': gap['energy'], 'vbm': vbm['energy'], 'cbm': cbm['energy'], 'is_gap_direct': gap['direct']} d['analysis'].update(update_doc) d['calculations'][0]['output'].update(update_doc) coll.update_one({"dir_name": d["dir_name"]}, {'$set': d}, upsert=True) return d["task_id"], d else: logger.info("Skipping duplicate {}".format(d["dir_name"])) return result["task_id"], result else: d["task_id"] = 0 logger.info("Simulated insert into database for {} with task_id {}" .format(d["dir_name"], d["task_id"])) return 0, d
def process_fw(self, dir_name, d): d["task_id_deprecated"] = int(d["task_id"].split('-')[-1]) # useful for WC and AJ # update the run fields to give species group in root, if exists for r in d['run_tags']: if "species_group=" in r: d["species_group"] = int(r.split("=")[-1]) break # custom Materials Project post-processing for FireWorks with zopen(zpath(os.path.join(dir_name, 'FW.json'))) as f: fw_dict = json.load(f) d['fw_id'] = fw_dict['fw_id'] d['snl'] = fw_dict['spec']['mpsnl'] d['snlgroup_id'] = fw_dict['spec']['snlgroup_id'] d['vaspinputset_name'] = fw_dict['spec'].get('vaspinputset_name') d['task_type'] = fw_dict['spec']['task_type'] # Process data for deformed structures if 'deformed' in d['task_type']: d['deformation_matrix'] = fw_dict['spec']['deformation_matrix'] d['original_task_id'] = fw_dict['spec']['original_task_id'] if not self.update_duplicates: if 'optimize structure' in d['task_type'] and 'output' in d: # create a new SNL based on optimized structure new_s = Structure.from_dict(d['output']['crystal']) old_snl = StructureNL.from_dict(d['snl']) history = old_snl.history history.append( {'name': 'Materials Project structure optimization', 'url': 'http://www.materialsproject.org', 'description': {'task_type': d['task_type'], 'fw_id': d['fw_id'], 'task_id': d['task_id']}}) new_snl = StructureNL(new_s, old_snl.authors, old_snl.projects, old_snl.references, old_snl.remarks, old_snl.data, history) # enter new SNL into SNL db # get the SNL mongo adapter sma = SNLMongoAdapter.auto_load() # add snl mpsnl, snlgroup_id, spec_group = sma.add_snl(new_snl, snlgroup_guess=d['snlgroup_id']) d['snl_final'] = mpsnl.as_dict() d['snlgroup_id_final'] = snlgroup_id d['snlgroup_changed'] = (d['snlgroup_id'] != d['snlgroup_id_final']) else: d['snl_final'] = d['snl'] d['snlgroup_id_final'] = d['snlgroup_id'] d['snlgroup_changed'] = False # custom processing for detecting errors new_style = os.path.exists(zpath(os.path.join(dir_name, 'FW.json'))) vasp_signals = {} critical_errors = ["INPUTS_DONT_EXIST", "OUTPUTS_DONT_EXIST", "INCOHERENT_POTCARS", "VASP_HASNT_STARTED", "VASP_HASNT_COMPLETED", "CHARGE_UNCONVERGED", "NETWORK_QUIESCED", "HARD_KILLED", "WALLTIME_EXCEEDED", "ATOMS_TOO_CLOSE", "DISK_SPACE_EXCEEDED", "NO_RELAX2", "POSITIVE_ENERGY"] last_relax_dir = dir_name if not new_style: # get the last relaxation dir # the order is relax2, current dir, then relax1. This is because # after completing relax1, the job happens in the current dir. # Finally, it gets moved to relax2. # There are some weird cases where both the current dir and relax2 # contain data. The relax2 is good, but the current dir is bad. if is_valid_vasp_dir(os.path.join(dir_name, "relax2")): last_relax_dir = os.path.join(dir_name, "relax2") elif is_valid_vasp_dir(dir_name): pass elif is_valid_vasp_dir(os.path.join(dir_name, "relax1")): last_relax_dir = os.path.join(dir_name, "relax1") vasp_signals['last_relax_dir'] = last_relax_dir ## see what error signals are present print "getting signals for dir :{}".format(last_relax_dir) sl = SignalDetectorList() sl.append(VASPInputsExistSignal()) sl.append(VASPOutputsExistSignal()) sl.append(VASPOutSignal()) sl.append(HitAMemberSignal()) sl.append(SegFaultSignal()) sl.append(VASPStartedCompletedSignal()) if d['state'] == 'successful' and 'optimize structure' in d['task_type']: sl.append(Relax2ExistsSignal()) signals = sl.detect_all(last_relax_dir) signals = signals.union(WallTimeSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(WallTimeSignal().detect(root_dir)) signals = signals.union(DiskSpaceExceededSignal().detect(dir_name)) if not new_style: root_dir = os.path.dirname(dir_name) # one level above dir_name signals = signals.union(DiskSpaceExceededSignal().detect(root_dir)) if d.get('output',{}).get('final_energy', None) > 0: signals.add('POSITIVE_ENERGY') signals = list(signals) critical_signals = [val for val in signals if val in critical_errors] vasp_signals['signals'] = signals vasp_signals['critical_signals'] = critical_signals vasp_signals['num_signals'] = len(signals) vasp_signals['num_critical'] = len(critical_signals) if len(critical_signals) > 0 and d['state'] == "successful": d["state"] = "error" d['analysis'] = d.get('analysis', {}) d['analysis']['errors_MP'] = vasp_signals
def run_task(self, fw_spec): prev_dir = get_loc(fw_spec['prev_vasp_dir']) if '$ALL' in self.files: self.files = os.listdir(prev_dir) for file in self.files: prev_filename = last_relax(os.path.join(prev_dir, file)) dest_file = 'POSCAR' if file == 'CONTCAR' and self.use_contcar else file if prev_filename.endswith('.gz'): dest_file += '.gz' print 'COPYING', prev_filename, dest_file if self.missing_CHGCAR_OK and 'CHGCAR' in dest_file and not os.path.exists(zpath(prev_filename)): print 'Skipping missing CHGCAR' else: shutil.copy2(prev_filename, dest_file) if '.gz' in dest_file: # unzip dest file f = gzip.open(dest_file, 'rb') file_content = f.read() with open(dest_file[0:-3], 'wb') as f_out: f_out.writelines(file_content) f.close() os.remove(dest_file) return FWAction(stored_data={'copied_files': self.files})
def test_zpath(self): fullzpath = zpath(os.path.join(test_dir, "myfile_gz")) self.assertEqual(os.path.join(test_dir, "myfile_gz.gz"), fullzpath)
def main(): import argparse parser = argparse.ArgumentParser( description="Run A QChem Job for a QChem Input File") parser.add_argument( "-d", "--directory", dest="directory", type=str, required=True, help= "the directory contains all the QChem jobs to be pretended to run again" ) parser.add_argument("-p", "--priority", dest="priority", type=int, default=100, help="the FireWorks priority") parser.add_argument("-b", "--batch_size", dest="batch_size", type=int, default=100, help="the number of FireWorks in a Workflow") options = parser.parse_args() fw_priority = options.priority batch_size = options.batch_size lp = LaunchPad.auto_load() src_dir = os.path.abspath(options.directory) src_dir_sub_dirs = glob.glob(os.path.join(src_dir, "*")) num_dirs = len(src_dir_sub_dirs) current_fwid = 1 links_dict = dict() fws_all = [] num_fw_in_current_batch = 0 batch_num = 1 for i, sd in enumerate(src_dir_sub_dirs): if not os.path.isdir(sd): continue fw_json_filename = os.path.join(sd, "FW.json") if not (os.path.exists(fw_json_filename) or os.path.exists(fw_json_filename + ".gz")): continue with zopen(zpath(fw_json_filename), 'rt') as f: fw_dict = json.load(f) print("{percent:4.2%} completed, processing directory {d:s}, " "molecule name {molname:s}," \ " mission {mission:s}".format(percent=i / float(num_dirs), d=sd, molname= fw_dict['spec']['user_tags'][ 'molname'], mission= fw_dict['spec']['user_tags'][ 'mission'])) molname = fw_dict['spec']['user_tags']['molname'] egsnl_tasks = [AddEGSNLTask()] if 'mol' in fw_dict: mol = Molecule.from_dict(fw_dict['spec']['mol']) else: mol = Molecule.from_dict( fw_dict['spec']['qcinp']['jobs'][0]['molecule']) snl = StructureNL(mol, "Xiaohui Qu <*****@*****.**>", "Electrolyte Genome") egsnl_task_spec = { 'task_type': 'Add to SNL database', 'snl': snl.as_dict(), '_category': 'Parse Previous QChem Job', '_priority': fw_priority } snl_fw_id = current_fwid current_fwid += 1 fws_all.append( Firework( egsnl_tasks, egsnl_task_spec, name=get_slug(molname + ' -- Add to SNL database For fake QChem Task'), fw_id=snl_fw_id)) fake_qchem_tasks = [FakeRunQChemTask()] src_qchem_dir = sd fake_qchem_spec = { '_priority': fw_priority * 2, 'src_qchem_dir': src_qchem_dir, '_category': 'Parse Previous QChem Job', 'run_tags': fw_dict['spec']['run_tags'], 'implicit_solvent': fw_dict['spec']['implicit_solvent'], 'task_type': fw_dict['spec']['task_type'], 'charge': fw_dict['spec']['charge'], 'spin_multiplicity': fw_dict['spec']['spin_multiplicity'], 'num_atoms': fw_dict['spec']['num_atoms'], 'user_tags': fw_dict['spec']['user_tags'], 'mol': mol.as_dict(), 'inchi': fw_dict['spec']['inchi'], '_dupefinder': fw_dict['spec']['_dupefinder'], 'qcinp': fw_dict['spec']['qcinp'], 'qm_method': fw_dict['spec']['qm_method'], 'inchi_root': fw_dict['spec']['inchi_root'] } for k in ['mixed_basis', 'mixed_aux_basis']: if k in fw_dict['spec']: fake_qchem_spec[k] = fw_dict['spec'][k] fake_qchem_fw_id = current_fwid current_fwid += 1 fws_all.append( Firework(fake_qchem_tasks, fake_qchem_spec, name='Fake' + fw_dict['name'], fw_id=fake_qchem_fw_id)) links_dict[snl_fw_id] = fake_qchem_fw_id num_fw_in_current_batch += 1 if num_fw_in_current_batch >= 100: wf = Workflow(fws_all, links_dict, "Read Previous QChem Jobs Id-{}".format(batch_num)) lp.add_wf(wf) batch_num += 1 links_dict = dict() fws_all = [] num_fw_in_current_batch = 0 if num_fw_in_current_batch > 0: wf = Workflow(fws_all, links_dict, "Read Previous QChem Jobs") lp.add_wf(wf)
def run(self, pdb_on_exception=False): """ Run the rocket (check out a job from the database and execute it) Args: pdb_on_exception (bool): whether to invoke the debugger on a caught exception. Default False. """ all_stored_data = {} # combined stored data for *all* the Tasks all_update_spec = {} # combined update_spec for *all* the Tasks all_mod_spec = [] # combined mod_spec for *all* the Tasks lp = self.launchpad launch_dir = os.path.abspath(os.getcwd()) logdir = lp.get_logdir() if lp else None l_logger = get_fw_logger('rocket.launcher', l_dir=logdir, stream_level=ROCKET_STREAM_LOGLEVEL) # check a FW job out of the launchpad if lp: m_fw, launch_id = lp.checkout_fw(self.fworker, launch_dir, self.fw_id) else: # offline mode m_fw = Firework.from_file(os.path.join(os.getcwd(), "FW.json")) # set the run start time fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['started_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) launch_id = None # we don't need this in offline mode... if not m_fw: print("No FireWorks are ready to run and match query! {}".format(self.fworker.query)) return False final_state = None ping_stop = None btask_stops = [] try: if '_launch_dir' in m_fw.spec and lp: prev_dir = launch_dir launch_dir = os.path.expandvars(m_fw.spec['_launch_dir']) if not os.path.abspath(launch_dir): launch_dir = os.path.normpath(os.path.join(os.getcwd(), launch_dir)) # thread-safe "mkdir -p" try: os.makedirs(launch_dir) except OSError as exception: if exception.errno != errno.EEXIST: raise os.chdir(launch_dir) if not os.path.samefile(launch_dir, prev_dir): lp.change_launch_dir(launch_id, launch_dir) if not os.listdir(prev_dir) and REMOVE_USELESS_DIRS: try: os.rmdir(prev_dir) except: pass recovery = m_fw.spec.get('_recovery', None) if recovery: recovery_dir = recovery.get('_prev_dir') recovery_mode = recovery.get('_mode') starting_task = recovery.get('_task_n') all_stored_data.update(recovery.get('_all_stored_data')) all_update_spec.update(recovery.get('_all_update_spec')) all_mod_spec.extend(recovery.get('_all_mod_spec')) if lp: l_logger.log( logging.INFO, 'Recovering from task number {} in folder {}.'.format(starting_task, recovery_dir)) if recovery_mode == 'cp' and launch_dir != recovery_dir: if lp: l_logger.log( logging.INFO, 'Copying data from recovery folder {} to folder {}.'.format(recovery_dir, launch_dir)) distutils.dir_util.copy_tree(recovery_dir, launch_dir, update=1) else: starting_task = 0 files_in = m_fw.spec.get("_files_in", {}) prev_files = m_fw.spec.get("_files_prev", {}) for f in set(files_in.keys()).intersection(prev_files.keys()): # We use zopen for the file objects for transparent handling # of zipped files. shutil.copyfileobj does the actual copy # in chunks that avoid memory issues. with zopen(prev_files[f], "rb") as fin, zopen(files_in[f], "wb") as fout: shutil.copyfileobj(fin, fout) if lp: message = 'RUNNING fw_id: {} in directory: {}'.\ format(m_fw.fw_id, os.getcwd()) l_logger.log(logging.INFO, message) # write FW.json and/or FW.yaml to the directory if PRINT_FW_JSON: m_fw.to_file('FW.json', indent=4) if PRINT_FW_YAML: m_fw.to_file('FW.yaml') my_spec = dict(m_fw.spec) # make a copy of spec, don't override original my_spec["_fw_env"] = self.fworker.env # set up heartbeat (pinging the server that we're still alive) ping_stop = start_ping_launch(lp, launch_id) # start background tasks if '_background_tasks' in my_spec: for bt in my_spec['_background_tasks']: btask_stops.append(start_background_task(bt, m_fw.spec)) # execute the Firetasks! for t_counter, t in enumerate(m_fw.tasks[starting_task:], start=starting_task): checkpoint = {'_task_n': t_counter, '_all_stored_data': all_stored_data, '_all_update_spec': all_update_spec, '_all_mod_spec': all_mod_spec} Rocket.update_checkpoint(lp, launch_dir, launch_id, checkpoint) if lp: l_logger.log(logging.INFO, "Task started: %s." % t.fw_name) if my_spec.get("_add_launchpad_and_fw_id"): t.fw_id = m_fw.fw_id if FWData().MULTIPROCESSING: # hack because AutoProxy manager can't access attributes t.launchpad = LaunchPad.from_dict(self.launchpad.to_dict()) else: t.launchpad = self.launchpad if my_spec.get("_add_fworker"): t.fworker = self.fworker try: m_action = t.run_task(my_spec) except BaseException as e: traceback.print_exc() tb = traceback.format_exc() stop_backgrounds(ping_stop, btask_stops) do_ping(lp, launch_id) # one last ping, esp if there is a monitor # If the exception is serializable, save its details if pdb_on_exception: pdb.post_mortem() try: exception_details = e.to_dict() except AttributeError: exception_details = None except BaseException as e: if lp: l_logger.log(logging.WARNING, "Exception couldn't be serialized: %s " % e) exception_details = None try: m_task = t.to_dict() except: m_task = None m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': m_task, '_exception': {'_stacktrace': tb, '_details': exception_details}}, exit=True) m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) if lp: final_state = 'FIZZLED' lp.complete_launch(launch_id, m_action, final_state) else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'FIZZLED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True # read in a FWAction from a file, in case the task is not Python and cannot return # it explicitly if os.path.exists('FWAction.json'): m_action = FWAction.from_file('FWAction.json') elif os.path.exists('FWAction.yaml'): m_action = FWAction.from_file('FWAction.yaml') if not m_action: m_action = FWAction() # update the global stored data with the data to store and update from this # particular Task all_stored_data.update(m_action.stored_data) all_update_spec.update(m_action.update_spec) all_mod_spec.extend(m_action.mod_spec) # update spec for next task as well my_spec.update(m_action.update_spec) for mod in m_action.mod_spec: apply_mod(mod, my_spec) if lp: l_logger.log(logging.INFO, "Task completed: %s " % t.fw_name) if m_action.skip_remaining_tasks: break # add job packing info if this is needed if FWData().MULTIPROCESSING and STORE_PACKING_INFO: all_stored_data['multiprocess_name'] = multiprocessing.current_process().name # perform finishing operation stop_backgrounds(ping_stop, btask_stops) for b in btask_stops: b.set() do_ping(lp, launch_id) # one last ping, esp if there is a monitor # last background monitors if '_background_tasks' in my_spec: for bt in my_spec['_background_tasks']: if bt.run_on_finish: for task in bt.tasks: task.run_task(m_fw.spec) m_action.stored_data = all_stored_data m_action.mod_spec = all_mod_spec m_action.update_spec = all_update_spec m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) if lp: final_state = 'COMPLETED' lp.complete_launch(launch_id, m_action, final_state) else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'COMPLETED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True except LockedWorkflowError as e: l_logger.log(logging.DEBUG, traceback.format_exc()) l_logger.log(logging.WARNING, "Firework {} reached final state {} but couldn't complete the update of " "the database. Reason: {}\nRefresh the WF to recover the result " "(lpad admin refresh -i {}).".format( self.fw_id, final_state, e, self.fw_id)) return True except: # problems while processing the results. high probability of malformed data. traceback.print_exc() stop_backgrounds(ping_stop, btask_stops) # restore initial state to prevent the raise of further exceptions if lp: lp.restore_backup_data(launch_id, m_fw.fw_id) do_ping(lp, launch_id) # one last ping, esp if there is a monitor # the action produced by the task is discarded m_action = FWAction(stored_data={'_message': 'runtime error during task', '_task': None, '_exception': {'_stacktrace': traceback.format_exc(), '_details': None}}, exit=True) try: m_action = self.decorate_fwaction(m_action, my_spec, m_fw, launch_dir) except: traceback.print_exc() if lp: try: lp.complete_launch(launch_id, m_action, 'FIZZLED') except LockedWorkflowError as e: l_logger.log(logging.DEBUG, traceback.format_exc()) l_logger.log(logging.WARNING, "Firework {} fizzled but couldn't complete the update of the database." " Reason: {}\nRefresh the WF to recover the result " "(lpad admin refresh -i {}).".format( self.fw_id, final_state, e, self.fw_id)) return True else: fpath = zpath("FW_offline.json") with zopen(fpath) as f_in: d = json.loads(f_in.read()) d['fwaction'] = m_action.to_dict() d['state'] = 'FIZZLED' d['completed_on'] = datetime.utcnow().isoformat() with zopen(fpath, "wt") as f_out: f_out.write(json.dumps(d, ensure_ascii=False)) return True
def run_task(self, fw_spec): incar = Incar.from_file(zpath("INCAR")) incar.update({"ISIF": 2}) incar.write_file("INCAR") return FWAction()
def assimilate(self, path, launches_coll=None): """ Parses vasp runs. Then insert the result into the db. and return the task_id or doc of the insertion. Returns: If in simulate_mode, the entire doc is returned for debugging purposes. Else, only the task_id of the inserted doc is returned. """ d = self.get_task_doc(path) if self.additional_fields: d.update(self.additional_fields ) # always add additional fields, even for failed jobs try: d["dir_name_full"] = d["dir_name"].split(":")[1] d["dir_name"] = get_block_part(d["dir_name_full"]) d["stored_data"] = {} except: print 'COULD NOT GET DIR NAME' pprint.pprint(d) print traceback.format_exc() raise ValueError('IMPROPER PARSING OF {}'.format(path)) if not self.simulate: # Perform actual insertion into db. Because db connections cannot # be pickled, every insertion needs to create a new connection # to the db. conn = MongoClient(self.host, self.port) db = conn[self.database] if self.user: db.authenticate(self.user, self.password) coll = db[self.collection] # Insert dos data into gridfs and then remove it from the dict. # DOS data tends to be above the 4Mb limit for mongo docs. A ref # to the dos file is in the dos_fs_id. result = coll.find_one({"dir_name": d["dir_name"]}) if result is None or self.update_duplicates: if self.parse_dos and "calculations" in d: for calc in d["calculations"]: if "dos" in calc: dos = json.dumps(calc["dos"], cls=MontyEncoder) fs = gridfs.GridFS(db, "dos_fs") dosid = fs.put(dos) calc["dos_fs_id"] = dosid del calc["dos"] d["last_updated"] = datetime.datetime.today() if result is None: if ("task_id" not in d) or (not d["task_id"]): d["task_id"] = "mp-{}".format( db.counter.find_one_and_update({"_id": "taskid"}, {"$inc": { "c": 1 }})["c"]) logger.info("Inserting {} with taskid = {}".format( d["dir_name"], d["task_id"])) elif self.update_duplicates: d["task_id"] = result["task_id"] logger.info("Updating {} with taskid = {}".format( d["dir_name"], d["task_id"])) #Fireworks processing self.process_fw(path, d) try: #Add oxide_type struct = Structure.from_dict(d["output"]["crystal"]) d["oxide_type"] = oxide_type(struct) except: logger.error("can't get oxide_type for {}".format( d["task_id"])) d["oxide_type"] = None #Override incorrect outcar subdocs for two step relaxations if "optimize structure" in d['task_type'] and \ os.path.exists(os.path.join(path, "relax2")): try: run_stats = {} for i in [1, 2]: o_path = os.path.join(path, "relax" + str(i), "OUTCAR") o_path = o_path if os.path.exists( o_path) else o_path + ".gz" outcar = Outcar(o_path) d["calculations"][ i - 1]["output"]["outcar"] = outcar.as_dict() run_stats["relax" + str(i)] = outcar.run_stats except: logger.error("Bad OUTCAR for {}.".format(path)) try: overall_run_stats = {} for key in [ "Total CPU time used (sec)", "User time (sec)", "System time (sec)", "Elapsed time (sec)" ]: overall_run_stats[key] = sum( [v[key] for v in run_stats.values()]) run_stats["overall"] = overall_run_stats except: logger.error("Bad run stats for {}.".format(path)) d["run_stats"] = run_stats # add is_compatible mpc = MaterialsProjectCompatibility("Advanced") try: func = d["pseudo_potential"]["functional"] labels = d["pseudo_potential"]["labels"] symbols = ["{} {}".format(func, label) for label in labels] parameters = { "run_type": d["run_type"], "is_hubbard": d["is_hubbard"], "hubbards": d["hubbards"], "potcar_symbols": symbols } entry = ComputedEntry(Composition(d["unit_cell_formula"]), 0.0, 0.0, parameters=parameters, entry_id=d["task_id"]) d['is_compatible'] = bool(mpc.process_entry(entry)) except: traceback.print_exc() print 'ERROR in getting compatibility' d['is_compatible'] = None #task_type dependent processing if 'static' in d['task_type']: launch_doc = launches_coll.find_one( { "fw_id": d['fw_id'], "launch_dir": { "$regex": d["dir_name"] } }, {"action.stored_data": 1}) for i in [ "conventional_standard_structure", "symmetry_operations", "symmetry_dataset", "refined_structure" ]: try: d['stored_data'][i] = launch_doc['action'][ 'stored_data'][i] except: pass #parse band structure if necessary if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\ and d['state'] == 'successful': launch_doc = launches_coll.find_one( { "fw_id": d['fw_id'], "launch_dir": { "$regex": d["dir_name"] } }, {"action.stored_data": 1}) vasp_run = Vasprun(zpath(os.path.join(path, "vasprun.xml")), parse_projected_eigen=True) if 'band structure' in d['task_type']: def string_to_numlist(stringlist): g = re.search( '([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)', stringlist) return [float(g.group(i)) for i in range(1, 4)] for i in ["kpath_name", "kpath"]: d['stored_data'][i] = launch_doc['action'][ 'stored_data'][i] kpoints_doc = d['stored_data']['kpath']['kpoints'] for i in kpoints_doc: if isinstance(kpoints_doc[i], six.string_types): kpoints_doc[i] = string_to_numlist( kpoints_doc[i]) bs = vasp_run.get_band_structure( efermi=d['calculations'][0]['output']['outcar'] ['efermi'], line_mode=True) else: bs = vasp_run.get_band_structure( efermi=d['calculations'][0]['output']['outcar'] ['efermi'], line_mode=False) bs_json = json.dumps(bs.as_dict(), cls=MontyEncoder) fs = gridfs.GridFS(db, "band_structure_fs") bs_id = fs.put(bs_json) d['calculations'][0]["band_structure_fs_id"] = bs_id # also override band gap in task doc gap = bs.get_band_gap() vbm = bs.get_vbm() cbm = bs.get_cbm() update_doc = { 'bandgap': gap['energy'], 'vbm': vbm['energy'], 'cbm': cbm['energy'], 'is_gap_direct': gap['direct'] } d['analysis'].update(update_doc) d['calculations'][0]['output'].update(update_doc) coll.update_one({"dir_name": d["dir_name"]}, {'$set': d}, upsert=True) return d["task_id"], d else: logger.info("Skipping duplicate {}".format(d["dir_name"])) return result["task_id"], result else: d["task_id"] = 0 logger.info( "Simulated insert into database for {} with task_id {}".format( d["dir_name"], d["task_id"])) return 0, d