def im_sol_sub_def_energy_parse(): m_description = 'Command to parse solute substitution defect ' \ 'energies for intermetallics from the VASP DFT ' \ 'calculations.' parser = ArgumentParser(description=m_description) parser.add_argument("--mpid", type=str.lower, help="Materials Project id of the intermetallic structure.\n" \ "For more info on Materials Project, please refer to " \ "www.materialsproject.org") parser.add_argument("--solute", help="Solute Element") parser.add_argument("--mapi_key", default = None, help="Your Materials Project REST API key.\n" \ "For more info, please refer to " \ "www.materialsproject.org/opne") args = parser.parse_args() energy_dict = solute_def_parse_energy(args.mpid, args.solute, args.mapi_key) if energy_dict: fl_nm = args.mpid+'_solute-'+args.solute+'_raw_defect_energy.json' dumpfn(energy_dict, fl_nm, indent=2, cls=MontyEncoder)
def im_vac_antisite_def_energy_parse(): m_description = 'Command to parse vacancy and antisite defect ' \ 'energies for intermetallics from the VASP DFT ' \ 'calculations.' parser = ArgumentParser(description=m_description) parser.add_argument("--mpid", type=str.lower, help="Materials Project id of the intermetallic structure.\n" \ "For more info on Materials Project, please refer to " \ "www.materialsproject.org") parser.add_argument("--mapi_key", default = None, help="Your Materials Project REST API key.\n" \ "For more info, please refer to " \ "www.materialsproject.org/opne") args = parser.parse_args() print args energy_dict = vac_antisite_def_parse_energy(args.mpid, args.mapi_key) print type(energy_dict) for key,value in energy_dict.items(): print key print type(key), type(value) for key2, val2 in value.items(): print type(key2), type(val2) if energy_dict: fl_nm = args.mpid+'_raw_defect_energy.json' dumpfn(energy_dict, fl_nm, cls=MontyEncoder, indent=2)
def run_task(self, fw_spec): transformations = [] transformation_params = self.get("transformation_params", [{} for i in range(len(self["transformations"]))]) for t in self["transformations"]: found = False for m in ["advanced_transformations", "defect_transformations", "site_transformations", "standard_transformations"]: mod = import_module("pymatgen.transformations.{}".format(m)) try: t_cls = getattr(mod, t) except AttributeError: continue t_obj = t_cls(**transformation_params.pop(0)) transformations.append(t_obj) found = True if not found: raise ValueError("Could not find transformation: {}".format(t)) # TODO: @matk86 - should prev_calc_dir use CONTCAR instead of POSCAR? Note that if # current dir, maybe it is POSCAR indeed best ... -computron structure = self['structure'] if not self.get('prev_calc_dir', None) else \ Poscar.from_file(os.path.join(self['prev_calc_dir'], 'POSCAR')).structure ts = TransformedStructure(structure) transmuter = StandardTransmuter([ts], transformations) final_structure = transmuter.transformed_structures[-1].final_structure.copy() vis_orig = self["vasp_input_set"] vis_dict = vis_orig.as_dict() vis_dict["structure"] = final_structure.as_dict() vis_dict.update(self.get("override_default_vasp_params", {}) or {}) vis = vis_orig.__class__.from_dict(vis_dict) vis.write_input(".") dumpfn(transmuter.transformed_structures[-1], "transformations.json")
def run(self, job_cmd=None): """ run the vasp jobs through custodian if the job list is empty, run a single job with the initial input set """ for j in self.jobs: if job_cmd is not None: j.job_cmd = job_cmd else: j.job_cmd = self.job_cmd c_params = {'jobs': [j.as_dict() for j in self.jobs], 'handlers': [h.as_dict() for h in self.handlers], 'max_errors': 5} c = Custodian(self.handlers, self.jobs, max_errors=5) c.run() for j in self.jobs: self.cal_log.append({"job": j.as_dict(), 'job_id': j.job_id, "corrections": [], 'final_energy': None}) self.job_ids.append(j.job_id) if self.checkpoint_file: dumpfn(self.cal_log, self.checkpoint_file, cls=MontyEncoder, indent=4) else: dumpfn(self.cal_log, Calibrate.LOG_FILE, cls=MontyEncoder, indent=4)
def setup(self): """ Performs initial setup for VaspJob, including overriding any settings and backing up. """ decompress_dir('.') if self.backup: for f in VASP_INPUT_FILES: shutil.copy(f, "{}.orig".format(f)) if self.auto_npar: try: incar = Incar.from_file("INCAR") # Only optimized NPAR for non-HF and non-RPA calculations. if not (incar.get("LHFCALC") or incar.get("LRPA") or incar.get("LEPSILON")): if incar.get("IBRION") in [5, 6, 7, 8]: # NPAR should not be set for Hessian matrix # calculations, whether in DFPT or otherwise. del incar["NPAR"] else: import multiprocessing # try sge environment variable first # (since multiprocessing counts cores on the current # machine only) ncores = os.environ.get('NSLOTS') or \ multiprocessing.cpu_count() ncores = int(ncores) for npar in range(int(math.sqrt(ncores)), ncores): if ncores % npar == 0: incar["NPAR"] = npar break incar.write_file("INCAR") except: pass if self.auto_continue: if os.path.exists("continue.json"): actions = loadfn("continue.json").get("actions") logger.info("Continuing previous VaspJob. Actions: {}".format(actions)) backup(VASP_BACKUP_FILES, prefix="prev_run") VaspModder().apply_actions(actions) else: # Default functionality is to copy CONTCAR to POSCAR and set # ISTART to 1 in the INCAR, but other actions can be specified if self.auto_continue is True: actions = [{"file": "CONTCAR", "action": {"_file_copy": {"dest": "POSCAR"}}}, {"dict": "INCAR", "action": {"_set": {"ISTART": 1}}}] else: actions = self.auto_continue dumpfn({"actions": actions}, "continue.json") if self.settings_override is not None: VaspModder().apply_actions(self.settings_override)
def generate_single_job_dict(): """ Used to generate test dictionary for single jobs. """ single_job_dict = {} for file in single_job_out_names: single_job_dict[file] = QCOutput(os.path.join(test_dir, file)).data dumpfn(single_job_dict, "single_job.json")
def run(self): """ Override of Custodian.run() to include instructions to copy the temp_dir to the scratch partition on slave compute nodes if requested. """ cwd = os.getcwd() with ScratchDir(self.scratch_dir, create_symbolic_link=True, copy_to_current_on_exit=True, copy_from_current_on_enter=True) as temp_dir: self._manage_node_scratch(temp_dir_path=temp_dir, job_start=True) self.total_errors = 0 start = datetime.datetime.now() logger.info("Run started at {} in {}.".format( start, temp_dir)) v = sys.version.replace("\n", " ") logger.info("Custodian running on Python version {}".format(v)) try: # skip jobs until the restart for job_n, job in islice(enumerate(self.jobs, 1), self.restart, None): self._run_job(job_n, job, temp_dir) # Checkpoint after each job so that we can recover from # last point and remove old checkpoints if self.checkpoint: super(SSHCustodian, self)._save_checkpoint(cwd, job_n) except CustodianError as ex: logger.error(ex.message) if ex.raises: raise RuntimeError("{} errors reached: {}. Exited..." .format(self.total_errors, ex)) finally: # Log the corrections to a json file. logger.info("Logging to {}...".format(super(SSHCustodian, self).LOG_FILE)) dumpfn(self.run_log, super(SSHCustodian, self).LOG_FILE, cls=MontyEncoder, indent=4) end = datetime.datetime.now() logger.info("Run ended at {}.".format(end)) run_time = end - start logger.info("Run completed. Total time taken = {}." .format(run_time)) # Remove duplicate copy of log file, provided it ends with # ".log" for x in ([x for x in os.listdir(temp_dir) if re.match(r'\w*\.log', x)]): os.remove(os.path.join(temp_dir, x)) self._manage_node_scratch(temp_dir_path=temp_dir, job_start=False) if self.gzipped_output: gzip_dir(".") # Cleanup checkpoint files (if any) if run is successful. super(SSHCustodian, self)._delete_checkpoints(cwd) return self.run_log
def run(self): """ Runs all the jobs jobs. Returns: All errors encountered as a list of list. [[error_dicts for job 1], [error_dicts for job 2], ....] """ cwd = os.getcwd() with ScratchDir(self.scratch_dir, create_symbolic_link=True, copy_to_current_on_exit=True, copy_from_current_on_enter=True) as temp_dir: self.total_errors = 0 start = datetime.datetime.now() logger.info("Run started at {} in {}.".format( start, temp_dir)) v = sys.version.replace("\n", " ") logger.info("Custodian running on Python version {}".format(v)) logger.info("Hostname: {}, Cluster: {}".format( *get_execution_host_info())) try: # skip jobs until the restart for job_n, job in islice(enumerate(self.jobs, 1), self.restart, None): self._run_job(job_n, job) # Checkpoint after each job so that we can recover from last # point and remove old checkpoints if self.checkpoint: self.restart = job_n Custodian._save_checkpoint(cwd, job_n) except CustodianError as ex: logger.error(ex.message) if ex.raises: raise RuntimeError("{} errors reached: {}. Exited..." .format(self.total_errors, ex)) finally: # Log the corrections to a json file. logger.info("Logging to {}...".format(Custodian.LOG_FILE)) dumpfn(self.run_log, Custodian.LOG_FILE, cls=MontyEncoder, indent=4) end = datetime.datetime.now() logger.info("Run ended at {}.".format(end)) run_time = end - start logger.info("Run completed. Total time taken = {}." .format(run_time)) if self.gzipped_output: gzip_dir(".") # Cleanup checkpoint files (if any) if run is successful. Custodian._delete_checkpoints(cwd) return self.run_log
def generate_multi_job_dict(): """ Used to generate test dictionary for multiple jobs """ multi_job_dict = {} for file in multi_job_out_names: outputs = QCOutput.multiple_outputs_from_file(QCOutput, os.path.join(test_dir, file), keep_sub_files=False) data = [] for sub_output in outputs: data.append(sub_output.data) multi_job_dict[file] = data dumpfn(multi_job_dict, "multi_job.json")
def update_checkpoint(launchpad, launch_id, checkpoint): """ Helper function to update checkpoint Args: launchpad (LaunchPad): LaunchPad to ping with checkpoint data launch_id (int): launch id to update checkpoint (dict): checkpoint data """ if launchpad: launchpad.ping_launch(launch_id, checkpoint=checkpoint) else: offline_info = loadfn("FW_offline.json") offline_info.update({"checkpoint": checkpoint}) dumpfn(offline_info, "FW_offline.json")
def add_config_var(args): d = {} if os.path.exists(SETTINGS_FILE): shutil.copy(SETTINGS_FILE, SETTINGS_FILE + ".bak") print("Existing %s backed up to %s" % (SETTINGS_FILE, SETTINGS_FILE + ".bak")) d = loadfn(SETTINGS_FILE) toks = args.var_spec if len(toks) % 2 != 0: print("Bad variable specification!") sys.exit(-1) for i in range(int(len(toks) / 2)): d[toks[2 * i]] = toks[2 * i + 1] dumpfn(d, SETTINGS_FILE, default_flow_style=False) print("New %s written!" % (SETTINGS_FILE))
def _do_check(self, handlers, terminate_func=None): """ checks the specified handlers. Returns True iff errors caught """ corrections = [] for h in handlers: try: if h.check(): if h.max_num_corrections is not None \ and h.n_applied_corrections >= h.max_num_corrections: msg = "Maximum number of corrections {} reached " \ "for handler {}".format(h.max_num_corrections, h) if h.raise_on_max: self.run_log[-1]["handler"] = h self.run_log[-1]["max_errors_per_handler"] = True raise MaxCorrectionsPerHandlerError(msg, True, h.max_num_corrections, h) else: logger.warning(msg+" Correction not applied.") continue if terminate_func is not None and h.is_terminating: logger.info("Terminating job") terminate_func() # make sure we don't terminate twice terminate_func = None d = h.correct() d["handler"] = h logger.error("\n" + pformat(d, indent=2, width=-1)) corrections.append(d) h.n_applied_corrections += 1 except Exception: if not self.skip_over_errors: raise else: import traceback logger.error("Bad handler %s " % h) logger.error(traceback.format_exc()) corrections.append( {"errors": ["Bad handler %s " % h], "actions": []}) self.total_errors += len(corrections) self.errors_current_job += len(corrections) self.run_log[-1]["corrections"].extend(corrections) # We do a dump of the run log after each check. dumpfn(self.run_log, Custodian.LOG_FILE, cls=MontyEncoder, indent=4) return len(corrections) > 0
def test_DiagnosticProperties_class(self): with ScratchDir("."): os.environ["BEEP_PROCESSING_DIR"] = TEST_FILE_DIR pcycler_run_loc = os.path.join( TEST_FILE_DIR, "PreDiag_000240_000227_truncated_structure.json") pcycler_run = auto_load_processed(pcycler_run_loc) featurizer = DiagnosticProperties.from_run(pcycler_run_loc, os.getcwd(), pcycler_run) path, local_filename = os.path.split(featurizer.name) folder = os.path.split(path)[-1] dumpfn(featurizer, featurizer.name) self.assertEqual(folder, "DiagnosticProperties") self.assertEqual(featurizer.X.shape, (30, 9)) print(list(featurizer.X.iloc[2, :])) self.assertListEqual(list(featurizer.X.iloc[2, :]), [ 141, 0.9859837086597274, 7.885284043, 4.323121513988055, 21.12108276469096, 30, 100, 'reset', 'discharge_energy' ])
def test_mpk(self): d = {"hello": "world"} # Test automatic format detection dumpfn(d, "monte_test.mpk") d2 = loadfn("monte_test.mpk") self.assertEqual( d, {k.decode('utf-8'): v.decode('utf-8') for k, v in d2.items()}) os.remove("monte_test.mpk") # Test to ensure basename is respected, and not directory with ScratchDir('.'): os.mkdir("mpk_test") os.chdir("mpk_test") fname = os.path.abspath("test_file.json") dumpfn({"test": 1}, fname) with open("test_file.json", "r") as f: reloaded = json.loads(f.read()) self.assertEqual(reloaded['test'], 1)
def save_model(self, filename: str) -> None: """ Save the model to a keras model hdf5 and a json config for additional converters Args: filename: (str) output file name Returns: None """ self.model.save(filename) dumpfn( { "graph_converter": self.graph_converter, "target_scaler": self.target_scaler, "metadata": self.metadata }, filename + ".json", )
def save_model(self, filename): """ Save the model to a keras model hdf5 and a json config for additional converters Args: filename: (str) output file name Returns: None """ self.model.save(filename) dumpfn( { 'graph_converter': self.graph_converter, 'target_scaler': self.target_scaler, 'metadata': self.metadata }, filename + '.json' )
def add_config_var(args): """ Add configuration args. :param args: """ d = {} if os.path.exists(SETTINGS_FILE): shutil.copy(SETTINGS_FILE, SETTINGS_FILE + ".bak") print("Existing {} backed up to {}".format(SETTINGS_FILE, SETTINGS_FILE + ".bak")) d = loadfn(SETTINGS_FILE) toks = args.var_spec if len(toks) % 2 != 0: print("Bad variable specification!") sys.exit(-1) for i in range(int(len(toks) / 2)): d[toks[2 * i]] = toks[2 * i + 1] dumpfn(d, SETTINGS_FILE) print("New %s written!" % (SETTINGS_FILE))
def test_formula_query(): op = FormulaQuery() assert op.query("Si2O4") == { "criteria": { "composition_reduced.O": 2.0, "composition_reduced.Si": 1.0, "nelements": 2, } } with ScratchDir("."): dumpfn(op, "temp.json") new_op = loadfn("temp.json") assert new_op.query("Si2O4") == { "criteria": { "composition_reduced.O": 2.0, "composition_reduced.Si": 1.0, "nelements": 2, } }
def test_multi_material_id_query(): op = MultiMaterialIDQuery() assert op.query(material_ids="mp-149, mp-13") == { "criteria": { "material_id": { "$in": ["mp-149", "mp-13"] } } } with ScratchDir("."): dumpfn(op, "temp.json") new_op = loadfn("temp.json") assert new_op.query(material_ids="mp-149, mp-13") == { "criteria": { "material_id": { "$in": ["mp-149", "mp-13"] } } }
def create_aflow_test_docs(): auids = [ 'aflow:0132ab6b9cddd429', # Has an elastic tensor file 'aflow:0136cbe39e59c471', # An average joe material 'aflow:d0c93a9396dc599e' ] query = AflowAPIQuery.from_pymongo({'auid': { '$in': auids }}, AflowIngester._available_kws, 50, property_reduction=True) if query.N != len(auids): auids_retrieved = [ material['auid'] for page in query.responses.values() for material in page.values() ] auids_not_retrieved = set(auids) - set(auids_retrieved) raise ValueError( "Not all materials retrieved. Perhaps they have been deprecated? " "Unavailabie auids:\n{}".format(auids_not_retrieved)) data = [] for item in query: raw_data = item.raw try: contcar_data = item.files['CONTCAR.relax.vasp']() except Exception: contcar_data = None try: elastic_tensor_data = item.files['AEL_elastic_tensor.json']() elastic_tensor_data = json.loads(elastic_tensor_data) except Exception: elastic_tensor_data = None raw_data['CONTCAR_relax_vasp'] = contcar_data raw_data['AEL_elastic_tensor_json'] = elastic_tensor_data data.append(raw_data) dumpfn(data, os.path.join(TEST_DATA_DIR, 'aflow_store.json'))
def references_to_bib(refs): """ Takes a list of reference strings and converts them to bibtex entries Args: refs ([str]): list of string references, which can be bibtex entries, digital object identifiers ("doi:DOI_GOES_HERE") or urls ("url:URL_GOES_HERE") Returns: (list): list of bibtex formatted strings """ parsed_refs = [] for ref in refs: if ref in _REFERENCE_CACHE: parsed_ref = _REFERENCE_CACHE[ref] elif ref.startswith('@'): parsed_ref = ref elif ref.startswith('url:'): # uses arbitrary key url = ref.split('url:')[1] parsed_ref = """@misc{{url:{0}, url = {{{1}}} }}""".format(str(abs(url.__hash__()))[0:6], url) elif ref.startswith('doi:'): doi = ref.split('doi:')[1] parsed_ref = content_negotiation(doi, format='bibentry') else: raise ValueError( 'Unknown reference style for ' 'reference: {} (please either ' 'supply a BibTeX string, or a string ' 'starting with url: followed by a URL or ' 'starting with doi: followed by a DOI)'.format(ref)) if ref not in _REFERENCE_CACHE: _REFERENCE_CACHE[ref] = parsed_ref dumpfn(_REFERENCE_CACHE, _REFERENCE_CACHE_PATH) parsed_refs.append(parsed_ref) return parsed_refs
def test_grain_boundary_structure_query(): op = GBStructureQuery() assert op.query( sigma=5, type=GBTypeEnum.twist, chemsys="Si-Fe", pretty_formula="Fe2Si4", gb_plane="1,1,1", rotation_axis="1,0,1", ) == { "criteria": { "sigma": 5, "type": "twist", "chemsys": "Fe-Si", "pretty_formula": "FeSi2", "gb_plane": [1, 1, 1], "rotation_axis": [1, 0, 1], } } with ScratchDir("."): dumpfn(op, "temp.json") new_op = loadfn("temp.json") assert new_op.query( sigma=5, type=GBTypeEnum.twist, chemsys="Si-Fe", pretty_formula="Fe2Si4", gb_plane="1,1,1", rotation_axis="1,0,1", ) == { "criteria": { "sigma": 5, "type": "twist", "chemsys": "Fe-Si", "pretty_formula": "FeSi2", "gb_plane": [1, 1, 1], "rotation_axis": [1, 0, 1], } }
def post_equi(confs, inter_param): # find all POSCARs and their name like mp-xxx # ... conf_dirs = [] for conf in confs: conf_dirs.extend(glob.glob(conf)) conf_dirs.sort() task_dirs = [] for ii in conf_dirs: task_dirs.append(os.path.abspath(os.path.join(ii, 'relaxation', 'relax_task'))) task_dirs.sort() # generate a list of task names like mp-xxx/relaxation # ... # dump the relaxation result. for ii in task_dirs: poscar = os.path.join(ii, 'POSCAR') inter = make_calculator(inter_param, poscar) res = inter.compute(ii) dumpfn(res, os.path.join(ii, 'result.json'), indent=4)
def test_molecule_elements_query(): op = MoleculeElementsQuery() assert op.query(elements="Si, O, P") == { "criteria": { "elements": { "$all": ["Si", "O", "P"] } } } with ScratchDir("."): dumpfn(op, "temp.json") new_op = loadfn("temp.json") assert new_op.query(elements="Si, O, P") == { "criteria": { "elements": { "$all": ["Si", "O", "P"] } } }
def generate_characterize_path_files(rn, old_solved_PRs, dist_and_path): pickle_in = open( os.path.join( test_dir, "unittest_RN_before_characterize_path.pkl", ), "wb", ) pickle.dump(rn, pickle_in) pickle_in = open( os.path.join(test_dir, "unittest_characterize_path_PRs_IN.pkl"), "wb", ) pickle.dump(old_solved_PRs, pickle_in) dumpfn( dist_and_path, os.path.join( test_dir, "unittest_characterize_path_path_IN.json", ), )
def test_from_csv(self): csv_file = os.path.join(TEST_FILE_DIR, "parameter_test.csv") # Test basic functionality with ScratchDir('.') as scratch_dir: makedirs_p(os.path.join(scratch_dir, "procedures")) makedirs_p(os.path.join(scratch_dir, "names")) generate_protocol_files_from_csv(csv_file, scratch_dir) self.assertEqual( len(os.listdir(os.path.join(scratch_dir, "procedures"))), 3) # Test avoid overwriting file functionality with ScratchDir('.') as scratch_dir: makedirs_p(os.path.join(scratch_dir, "procedures")) makedirs_p(os.path.join(scratch_dir, "names")) dumpfn({"hello": "world"}, "procedures/name_000007.000") generate_protocol_files_from_csv(csv_file, scratch_dir) post_file = loadfn("procedures/name_000007.000") self.assertEqual(post_file, {"hello": "world"}) self.assertEqual( len(os.listdir(os.path.join(scratch_dir, "procedures"))), 3)
def test_has_props_query(): op = HasPropsQuery() assert op.query(has_props="electronic_structure, thermo") == { "criteria": { "has_props": { "$all": ["electronic_structure", "thermo"] } } } with ScratchDir("."): dumpfn(op, "temp.json") new_op = loadfn("temp.json") assert new_op.query(has_props="electronic_structure, thermo") == { "criteria": { "has_props": { "$all": ["electronic_structure", "thermo"] } } }
def test_shear_modulus_query(): op = ShearModulusQuery() q = op.query( g_voigt_min=0, g_voigt_max=5, g_reuss_min=0, g_reuss_max=5, g_vrh_min=0, g_vrh_max=5, ) fields = ["elasticity.g_voigt", "elasticity.g_reuss", "elasticity.g_vrh"] assert q == { "criteria": {field: { "$gte": 0, "$lte": 5 } for field in fields} } with ScratchDir("."): dumpfn(op, "temp.json") new_op = loadfn("temp.json") q = new_op.query( g_voigt_min=0, g_voigt_max=5, g_reuss_min=0, g_reuss_max=5, g_vrh_min=0, g_vrh_max=5, ) assert q == { "criteria": {field: { "$gte": 0, "$lte": 5 } for field in fields} }
def test_insertion_voltage_step_query(): op = InsertionVoltageStepQuery() q = op.query( stability_charge_min=0, stability_charge_max=5, stability_discharge_min=0, stability_discharge_max=5, ) fields = [ "stability_charge", "stability_discharge", ] assert q == { "criteria": {field: { "$gte": 0, "$lte": 5 } for field in fields} } with ScratchDir("."): dumpfn(op, "temp.json") new_op = loadfn("temp.json") q = new_op.query( stability_charge_min=0, stability_charge_max=5, stability_discharge_min=0, stability_discharge_max=5, ) assert q == { "criteria": {field: { "$gte": 0, "$lte": 5 } for field in fields} }
def test_serialization(self): with ScratchDir("."): os.environ["BEEP_PROCESSING_DIR"] = os.getcwd() dataset = BeepDataset.from_features('test_dataset', ['PreDiag'], FEATURIZER_CLASSES, feature_dir=os.path.join(TEST_FILE_DIR, 'data-share/features')) dumpfn(dataset, 'temp_dataset.json') dataset = loadfn('temp_dataset.json') self.assertEqual(dataset.name, 'test_dataset') self.assertEqual(dataset.data.shape, (2, 56)) # from pdb import set_trace; set_trace() self.assertListEqual(list(dataset.data.seq_num), [196, 197]) self.assertIsNone(dataset.X_test) self.assertSetEqual(set(dataset.feature_sets.keys()), {'RPTdQdVFeatures', 'DiagnosticSummaryStats'}) self.assertEqual(dataset.missing.feature_class.iloc[0], 'HPPCResistanceVoltageFeatures') self.assertIsInstance(dataset.filenames, list) os.environ["BEEP_PROCESSING_DIR"] = os.getcwd() dataset2 = BeepDataset.from_features('test_dataset', ['PreDiag'], [RPTdQdVFeatures], feature_dir=os.path.join(TEST_FILE_DIR, 'data-share/features')) dumpfn(dataset2, "temp_dataset_2.json") dataset2 = loadfn('temp_dataset_2.json') self.assertEqual(dataset2.missing.columns.to_list(), ["filename", "feature_class"])
def run_task(self, fw_spec): db_file = env_chk(self["db_file"], fw_spec) wf_uuid = self["wf_uuid"] mc_settings = self.get("mc_settings", {}) # Get Heisenberg models from db mmdb = VaspCalcDb.from_db_file(db_file, admin=True) mmdb.collection = mmdb.db["exchange"] # Get documents docs = list( mmdb.collection.find({"wf_meta.wf_uuid": wf_uuid}, ["heisenberg_model", "nn_cutoff"])) hmodels = [ HeisenbergModel.from_dict(d["heisenberg_model"]) for d in docs ] cutoffs = [hmodel.cutoff for hmodel in hmodels] ordered_hmodels = [ h for _, h in sorted(zip(cutoffs, hmodels), reverse=False) ] # Take the model with smallest NN cutoff hmodel = ordered_hmodels[0] # Get a converged Heisenberg model if one was found # if fw_spec["converged_heisenberg_model"]: # hmodel = HeisenbergModel.from_dict(fw_spec["converged_heisenberg_model"]) vc = VampireCaller(hm=hmodel, **mc_settings) vampire_output = vc.output # Update FW spec update_spec = {"vampire_output": vampire_output} # Write to file dumpfn(vampire_output.as_dict(), "vampire_output.json") return FWAction(update_spec=update_spec)
def test_RPTdQdVFeatures_class(self): with ScratchDir("."): os.environ["BEEP_PROCESSING_DIR"] = TEST_FILE_DIR pcycler_run_loc = os.path.join( TEST_FILE_DIR, "PreDiag_000240_000227_truncated_structure.json") pcycler_run = loadfn(pcycler_run_loc) params_dict = { "diag_ref": 0, "diag_nr": 2, "charge_y_n": 1, "rpt_type": "rpt_2C", "plotting_y_n": 0, } featurizer = RPTdQdVFeatures.from_run(pcycler_run_loc, os.getcwd(), pcycler_run, params_dict) path, local_filename = os.path.split(featurizer.name) folder = os.path.split(path)[-1] dumpfn(featurizer, featurizer.name) self.assertEqual(folder, "RPTdQdVFeatures") self.assertEqual(featurizer.X.shape[1], 11) self.assertEqual(featurizer.metadata["parameters"], params_dict)
def do_query(args): m = MPRester() try: criteria = json.loads(args.criteria) except json.decoder.JSONDecodeError: criteria = args.criteria if args.structure: count = 0 for d in m.query(criteria, properties=["structure", "task_id"]): s = d["structure"] formula = re.sub(r"\s+", "", s.formula) if args.structure == "poscar": fname = "POSCAR.%s_%s" % (d["task_id"], formula) else: fname = "%s-%s.%s" % (d["task_id"], formula, args.structure) s.to(filename=fname) count += 1 print("%d structures written!" % count) elif args.entries: entries = m.get_entries(criteria) dumpfn(entries, args.entries) print("%d entries written to %s!" % (len(entries), args.entries)) else: props = ["e_above_hull", "spacegroup"] props += args.data entries = m.get_entries(criteria, property_data=props) t = [] headers = ["mp-id", "Formula", "Spacegroup", "E/atom (eV)", "E above hull (eV)"] + args.data for e in entries: row = [e.entry_id, e.composition.reduced_formula, e.data["spacegroup"]["symbol"], e.energy_per_atom, e.data["e_above_hull"]] row += [e.data[s] for s in args.data] t.append(row) t = sorted(t, key=lambda x: x[headers.index("E above hull (eV)")]) print(tabulate(t, headers=headers, tablefmt="pipe", floatfmt=".3f"))
def test_substrate_structure_operator(): op = SubstrateStructureQuery() assert op.query(film_orientation="0,1, 1", substrate_orientation="1, 0,1") == { "criteria": { "film_orient": "0 1 1", "orient": "1 0 1" } } with ScratchDir("."): dumpfn(op, "temp.json") new_op = loadfn("temp.json") assert new_op.query(film_orientation="0,1, 1", substrate_orientation="1, 0,1") == { "criteria": { "film_orient": "0 1 1", "orient": "1 0 1" } }
def test_to_from_dict(self): d = self.PxIon.as_dict() ion_entry = self.PxIon.from_dict(d) self.assertEqual(ion_entry.entry.name, "MnO4[-1]", "Wrong Entry!") d = self.PxSol.as_dict() sol_entry = self.PxSol.from_dict(d) self.assertEqual(sol_entry.name, "Mn2O3(s)", "Wrong Entry!") self.assertEqual( sol_entry.energy, self.PxSol.energy, "as_dict and from_dict energies unequal", ) # Ensure computed entry data persists entry = ComputedEntry("TiO2", energy=-20, data={"test": "test"}) pbx_entry = PourbaixEntry(entry=entry) with ScratchDir("."): dumpfn(pbx_entry, "pbx_entry.json") reloaded = loadfn("pbx_entry.json") self.assertIsInstance(reloaded.entry, ComputedEntry) self.assertIsNotNone(reloaded.entry.data)
def pmg_dump(obj, filename, **kwargs): """ Dump an object to a json file using MontyEncoder. Note that these objects can be lists, dicts or otherwise nested pymatgen objects that support the as_dict() and from_dict MSONable protocol. Args: obj (object): Object to dump. filename (str): Filename of file to open. Can be gzipped or bzipped. \*\*kwargs: Any of the keyword arguments supported by the json.dump method. """ return dumpfn(obj, filename, **kwargs)
def run_task(self, fw_spec): transformations = [] transformation_params = self.get( "transformation_params", [{} for i in range(len(self["transformations"]))]) for t in self["transformations"]: found = False for m in [ "advanced_transformations", "defect_transformations", "site_transformations", "standard_transformations" ]: mod = import_module("pymatgen.transformations.{}".format(m)) try: t_cls = getattr(mod, t) except AttributeError: continue t_obj = t_cls(**transformation_params.pop(0)) transformations.append(t_obj) found = True if not found: raise ValueError("Could not find transformation: {}".format(t)) # TODO: @matk86 - should prev_calc_dir use CONTCAR instead of POSCAR? Note that if # current dir, maybe it is POSCAR indeed best ... -computron structure = self['structure'] if not self.get('prev_calc_dir', None) else \ Poscar.from_file(os.path.join(self['prev_calc_dir'], 'POSCAR')).structure ts = TransformedStructure(structure) transmuter = StandardTransmuter([ts], transformations) final_structure = transmuter.transformed_structures[ -1].final_structure.copy() vis_orig = self["vasp_input_set"] vis_dict = vis_orig.as_dict() vis_dict["structure"] = final_structure.as_dict() vis_dict.update(self.get("override_default_vasp_params", {}) or {}) vis = vis_orig.__class__.from_dict(vis_dict) vis.write_input(".") dumpfn(transmuter.transformed_structures[-1], "transformations.json")
def test_xas_task_id_operator(): op = XASTaskIDQuery() assert op.query(task_ids="mp-149, mp-13") == { "criteria": { "task_id": { "$in": ["mp-149", "mp-13"] } } } with ScratchDir("."): dumpfn(op, "temp.json") new_op = loadfn("temp.json") assert new_op.query(task_ids="mp-149, mp-13") == { "criteria": { "task_id": { "$in": ["mp-149", "mp-13"] } } }
def test_path_finding(self): molecule_entries = loadfn( os.path.join(test_dir, "ronalds_MoleculeEntry.json")) li_plus_mol_entry = find_mol_entry_from_xyz_and_charge( molecule_entries, (os.path.join(test_dir, "Li.xyz")), 1) ec_mol_entry = find_mol_entry_from_xyz_and_charge( molecule_entries, (os.path.join(test_dir, "EC.xyz")), 0) ledc_mol_entry = find_mol_entry_from_xyz_and_charge( molecule_entries, (os.path.join(test_dir, "LEDC.xyz")), 0) result = path_finding_wrapper(molecule_entries, [li_plus_mol_entry, ec_mol_entry], ledc_mol_entry) dumpfn(result, "/tmp/lol") result_canonicalized = loadfn("/tmp/lol") expected = loadfn(os.path.join(test_dir, "ronalds_PRs.json")) assert result_canonicalized == expected
def _get_mpid_cache(self): path = os.path.join(os.path.dirname(module_path), "mpid_cache.json") if os.path.isfile(path): mpid_cache = loadfn(path) else: with MPRester() as mpr: # restrict random mpids to those likely experimentally known # and not too large entries = mpr.query( {"nsites": {"$lte": 16}}, ["task_id", "icsd_ids"], chunk_size=0, mp_decode=False, ) mpid_cache = [ entry["task_id"] for entry in entries if len(entry["icsd_ids"]) > 2 ] dumpfn(mpid_cache, path) self.mpid_cache = mpid_cache
def test_run_builder(mongostore): memorystore = MemoryStore("temp") builder = CopyBuilder(mongostore, memorystore) mongostore.update([{ mongostore.key: i, mongostore.last_updated_field: datetime.utcnow() } for i in range(10)]) runner = CliRunner() with runner.isolated_filesystem(): dumpfn(builder, "test_builder.json") result = runner.invoke(run, ["-v", "test_builder.json"]) assert result.exit_code == 0 assert "CopyBuilder" in result.output assert "SerialProcessor" in result.output result = runner.invoke(run, ["-v", "-n", "2", "test_builder.json"]) assert result.exit_code == 0 assert "CopyBuilder" in result.output assert "MultiProcessor" in result.output
def test_possible_oxi_state_query(): op = PossibleOxiStateQuery() assert op.query(possible_species="Cr2+, O2-") == { "criteria": { "possible_species": { "$all": ["Cr2+", "O2-"] } } } with ScratchDir("."): dumpfn(op, "temp.json") new_op = loadfn("temp.json") assert op.query(possible_species="Cr2+, O2-") == { "criteria": { "possible_species": { "$all": ["Cr2+", "O2-"] } } }
def test_dumpfn_loadfn(self): d = {"hello": "world"} dumpfn(d, "monte_test.json", indent=4) d2 = loadfn("monte_test.json") self.assertEqual(d, d2) os.remove("monte_test.json") dumpfn(d, "monte_test.yaml", default_flow_style=False) d2 = loadfn("monte_test.yaml") self.assertEqual(d, d2) dumpfn(d, "monte_test.yaml", Dumper=Dumper) d2 = loadfn("monte_test.yaml") os.remove("monte_test.yaml") dumpfn(d, "monte_test.mpk") d2 = loadfn("monte_test.mpk") self.assertEqual(d, {k.decode('utf-8'): v.decode('utf-8') for k, v in d2.items()}) os.remove("monte_test.mpk")
def test_dumpf_loadf(self): d = {"hello": "world"} dumpfn(d, "monte_test.json", indent=4) d2 = loadfn("monte_test.json") self.assertEqual(d, d2) os.remove("monte_test.json") dumpfn(d, "monte_test.yaml", default_flow_style=False) d2 = loadfn("monte_test.yaml") self.assertEqual(d, d2) dumpfn(d, "monte_test.yaml", Dumper=Dumper) d2 = loadfn("monte_test.yaml") os.remove("monte_test.yaml")
def write_config(path=None): path = os.path.join(os.path.expanduser('~'), ".fireworks", 'FW_config.yaml') if path is None else path dumpfn(config_to_dict(), path)
def solute_def_parse_energy(args): mpid = args.mpid solute = args.solute mapi_key = args.mapi_key if not mpid: print ("============\nERROR: Provide an mpid\n============") return if not solute: print ("============\nERROR: Provide solute element\n============") return if not mapi_key: with MPRester() as mp: structure = mp.get_structure_by_material_id(mpid) else: with MPRester(mapi_key) as mp: structure = mp.get_structure_by_material_id(mpid) energy_dict = {} solutes = [] def_folders = glob.glob(os.path.join( mpid,"solute*subspecie-{}".format(solute))) def_folders += glob.glob(os.path.join(mpid,"bulk")) for defdir in def_folders: fldr_name = os.path.split(defdir)[1] vr_file = os.path.join(defdir,'vasprun.xml') if not os.path.exists(vr_file): print (fldr_name, ": vasprun.xml doesn't exist in the folder. " \ "Abandoning parsing of energies for {}".format(mpid)) break # Further processing for the mpid is not useful try: vr = Vasprun(vr_file) except: print (fldr_name, ":Failure, couldn't parse vaprun.xml file. " "Abandoning parsing of energies for {}".format(mpid)) break if not vr.converged: print (fldr_name, ": Vasp calculation not converged. " "Abandoning parsing of energies for {}".format(mpid)) break # Further processing for the mpid is not useful fldr_fields = fldr_name.split("_") if 'bulk' in fldr_fields: bulk_energy = vr.final_energy bulk_sites = vr.structures[-1].num_sites elif 'solute' in fldr_fields: site_index = int(fldr_fields[1]) site_multiplicity = int(fldr_fields[2].split("-")[1]) site_specie = fldr_fields[3].split("-")[1] substitution_specie = fldr_fields[4].split("-")[1] energy = vr.final_energy solutes.append({'site_index':site_index, 'site_specie':site_specie,'energy':energy, 'substitution_specie':substitution_specie, 'site_multiplicity':site_multiplicity }) else: if not solutes: print("Solute folders do not exist") return {} print("Solute {} calculations successful for {}".format(solute,mpid)) for solute in solutes: solute_flip_energy = solute['energy']-bulk_energy solute['energy'] = solute_flip_energy solutes.sort(key=lambda entry: entry['site_index']) energy_dict[mpid] = {'solutes':solutes} fl_nm = mpid+'_solute-'+args.solute+'_raw_defect_energy.json' dumpfn(energy_dict, fl_nm, indent=2, cls=MontyEncoder)
def vac_antisite_def_parse_energy(args): mpid = args.mpid mapi_key = args.mapi_key if not mpid: print("============\nERROR: Provide an mpid\n============") return if not mapi_key: with MPRester() as mp: structure = mp.get_structure_by_material_id(mpid) else: with MPRester(mapi_key) as mp: structure = mp.get_structure_by_material_id(mpid) energy_dict = {} antisites = [] vacancies = [] def_folders = glob.glob(os.path.join(mpid,"vacancy*")) def_folders += glob.glob(os.path.join(mpid,"antisite*")) def_folders += glob.glob(os.path.join(mpid,"bulk")) for defdir in def_folders: fldr_name = os.path.split(defdir)[1] vr_file = os.path.join(defdir,'vasprun.xml') if not os.path.exists(vr_file): print (fldr_name, ": vasprun.xml doesn't exist in the folder. " \ "Abandoning parsing of energies for {}".format(mpid)) break # Further processing for the mpid is not useful try: vr = Vasprun(vr_file) except: print (fldr_name, ":Failure, couldn't parse vaprun.xml file. " "Abandoning parsing of energies for {}".format(mpid)) break if not vr.converged: print (fldr_name, ": Vasp calculation not converged. " "Abandoning parsing of energies for {}".format(mpid)) break # Further processing for the mpid is not useful fldr_fields = fldr_name.split("_") if 'bulk' in fldr_fields: bulk_energy = vr.final_energy bulk_sites = vr.structures[-1].num_sites elif 'vacancy' in fldr_fields: site_index = int(fldr_fields[1]) site_multiplicity = int(fldr_fields[2].split("-")[1]) site_specie = fldr_fields[3].split("-")[1] energy = vr.final_energy vacancies.append({'site_index':site_index, 'site_specie':site_specie,'energy':energy, 'site_multiplicity':site_multiplicity }) elif 'antisite' in fldr_fields: site_index = int(fldr_fields[1]) site_multiplicity = int(fldr_fields[2].split("-")[1]) site_specie = fldr_fields[3].split("-")[1] substitution_specie = fldr_fields[4].split("-")[1] energy = vr.final_energy antisites.append({'site_index':site_index, 'site_specie':site_specie,'energy':energy, 'substitution_specie':substitution_specie, 'site_multiplicity':site_multiplicity }) else: print("All calculations successful for ", mpid) e0 = bulk_energy/bulk_sites*structure.num_sites for vac in vacancies: vac_flip_energy = vac['energy']-bulk_energy vac['energy'] = vac_flip_energy vacancies.sort(key=lambda entry: entry['site_index']) for antisite in antisites: as_flip_energy = antisite['energy']-bulk_energy antisite['energy'] = as_flip_energy antisites.sort(key=lambda entry: entry['site_index']) energy_dict[str(mpid)] = {u"structure":structure, 'e0':e0,'vacancies':vacancies,'antisites':antisites} fl_nm = args.mpid+'_raw_defect_energy.json' dumpfn(energy_dict, fl_nm, cls=MontyEncoder, indent=2)
def run_interrupted(self): """ Runs custodian in a interuppted mode, which sets up and validates jobs but doesn't run the executable Returns: number of remaining jobs Raises: CustodianError on unrecoverable errors, and jobs that fail validation """ try: cwd = os.getcwd() start = datetime.datetime.now() v = sys.version.replace("\n", " ") logger.info("Custodian started in singleshot mode at {} in {}." .format(start, cwd)) logger.info("Custodian running on Python version {}".format(v)) # load run log if os.path.exists(Custodian.LOG_FILE): self.run_log = loadfn(Custodian.LOG_FILE, cls=MontyDecoder) if len(self.run_log) == 0: # starting up an initial job - setup input and quit job_n = 0 job = self.jobs[job_n] logger.info("Setting up job no. 1 ({}) ".format(job.name)) job.setup() self.run_log.append({"job": job.as_dict(), "corrections": [], 'job_n': job_n}) return len(self.jobs) else: # Continuing after running calculation job_n = self.run_log[-1]['job_n'] job = self.jobs[job_n] # If we had to fix errors from a previous run, insert clean log # dict if len(self.run_log[-1]['corrections']) > 0: logger.info("Reran {}.run due to fixable errors".format(job.name)) # check error handlers logger.info("Checking error handlers for {}.run".format(job.name)) if self._do_check(self.handlers): logger.info("Failed validation based on error handlers") # raise an error for an unrecoverable error for x in self.run_log[-1]["corrections"]: if not x["actions"] and x["handler"].raises_runtime_error: s = "Unrecoverable error for handler: {}. " \ "Raising RuntimeError".format(x["handler"]) raise CustodianError(s, True, x["handler"]) logger.info("Corrected input based on error handlers") # Return with more jobs to run if recoverable error caught # and corrected for return len(self.jobs) - job_n # check validators logger.info("Checking validator for {}.run".format(job.name)) for v in self.validators: if v.check(): logger.info("Failed validation based on validator") s = "Validation failed: {}".format(v) raise CustodianError(s, True, v) logger.info("Postprocessing for {}.run".format(job.name)) job.postprocess() # IF DONE WITH ALL JOBS - DELETE ALL CHECKPOINTS AND RETURN # VALIDATED if len(self.jobs) == (job_n + 1): self.finished = True return 0 # Setup next job_n job_n += 1 job = self.jobs[job_n] self.run_log.append({"job": job.as_dict(), "corrections": [], 'job_n': job_n}) job.setup() return len(self.jobs) - job_n except CustodianError as ex: logger.error(ex.message) if ex.raises: raise RuntimeError("{} errors reached: {}. Exited..." .format(self.total_errors, ex)) finally: #Log the corrections to a json file. logger.info("Logging to {}...".format(Custodian.LOG_FILE)) dumpfn(self.run_log, Custodian.LOG_FILE, cls=MontyEncoder, indent=4) end = datetime.datetime.now() logger.info("Run ended at {}.".format(end)) run_time = end - start logger.info("Run completed. Total time taken = {}." .format(run_time)) if self.finished and self.gzipped_output: gzip_dir(".")
def test_as_dict(self): dumpfn(self.entry_set, "temp_entry_set.json") entry_set = loadfn("temp_entry_set.json") self.assertEqual(len(entry_set), len(self.entry_set)) os.remove("temp_entry_set.json")
def save(self, fname="Transport_Properties.json"): dumpfn(self.props_dict, fname)
def update_checkpoint(job_ids=None, jfile=None, **kwargs): """ rerun the jobs with job ids in the job_ids list. The jobs are read from the json checkpoint file, jfile. If no job_ids are given then the checkpoint file will be updated with corresponding final energy Args: job_ids: list of job ids to update or q resolve jfile: check point file """ cal_log = loadfn(jfile, cls=MontyDecoder) cal_log_new = [] all_jobs = [] run_jobs = [] handlers = [] final_energy = None incar = None kpoints = None qadapter = None #if updating the specs of the job for k, v in kwargs.items(): if k == 'incar': incar = v if k == 'kpoints': kpoints = v if k == 'que': qadapter = v for j in cal_log: job = j["job"] job.job_id = j['job_id'] all_jobs.append(job) if job_ids and (j['job_id'] in job_ids or job.job_dir in job_ids): logger.info('setting job {0} in {1} to rerun'.format(j['job_id'], job.job_dir)) contcar_file = job.job_dir+os.sep+'CONTCAR' poscar_file = job.job_dir+os.sep+'POSCAR' if os.path.isfile(contcar_file) and len(open(contcar_file).readlines()) != 0 : logger.info('setting poscar file from {}' .format(contcar_file)) job.vis.poscar = Poscar.from_file(contcar_file) else: logger.info('setting poscar file from {}' .format(poscar_file)) job.vis.poscar = Poscar.from_file(poscar_file) if incar: logger.info('incar overridden') job.vis.incar = incar if kpoints: logger.info('kpoints overridden') job.vis.kpoints = kpoints if qadapter: logger.info('qadapter overridden') job.vis.qadapter = qadapter run_jobs.append(job) if run_jobs: c = Custodian(handlers, run_jobs, max_errors=5) c.run() for j in all_jobs: final_energy = j.get_final_energy() cal_log_new.append({"job": j.as_dict(), 'job_id': j.job_id, "corrections": [], 'final_energy': final_energy}) dumpfn(cal_log_new, jfile, cls=MontyEncoder, indent=4)