def process_vasprun(self, dir_name, taskname, filename): """ Process a vasprun.xml file. """ vasprun_file = os.path.join(dir_name, filename) if self.parse_projected_eigen and (self.parse_projected_eigen != 'final' or \ taskname == self.runs[-1]): parse_projected_eigen = True else: parse_projected_eigen = False r = Vasprun(vasprun_file, parse_projected_eigen=parse_projected_eigen) d = r.as_dict() d["dir_name"] = os.path.abspath(dir_name) d["completed_at"] = \ str(datetime.datetime.fromtimestamp(os.path.getmtime( vasprun_file))) d["cif"] = str(CifWriter(r.final_structure)) d["density"] = r.final_structure.density if self.parse_dos and (self.parse_dos != 'final' \ or taskname == self.runs[-1]): try: d["dos"] = r.complete_dos.as_dict() except Exception: logger.warning( "No valid dos data exist in {}.\n Skipping dos".format( dir_name)) if taskname == "relax1" or taskname == "relax2": d["task"] = {"type": "aflow", "name": taskname} else: d["task"] = {"type": taskname, "name": taskname} d["oxide_type"] = oxide_type(r.final_structure) return d
def old_style_mat(new_style_mat): """ Creates the base document for the old MP mapidoc style from the new document structure """ mat = {} for mp, new_key in mp_conversion_dict.items(): if has(new_style_mat, new_key): set_(mat, mp, get(new_style_mat, new_key)) mat["is_ordered"] = True mat["is_compatible"] = True struc = Structure.from_dict(mat["structure"]) mat["oxide_type"] = oxide_type(struc) mat["reduced_cell_formula"] = struc.composition.reduced_composition.as_dict() mat["unit_cell_formula"] = struc.composition.as_dict() mat["full_formula"] = "".join(struc.formula.split()) vals = sorted(mat["reduced_cell_formula"].values()) mat["anonymous_formula"] = {string.ascii_uppercase[i]: float(vals[i]) for i in range(len(vals))} mat["initial_structure"] = new_style_mat.get("initial_structure", None) mat["nsites"] = struc.get_primitive_structure().num_sites set_(mat, "pseudo_potential.functional", "PBE") set_(mat, "pseudo_potential.labels", [p["titel"].split()[1] for p in get(new_style_mat, "calc_settings.potcar_spec")]) mat["ntask_ids"] = len(get(new_style_mat, "task_ids")) set_(mat, "pseudo_potential.pot_type", "paw") add_blessed_tasks(mat, new_style_mat) add_cifs(mat) check_relaxation(mat, new_style_mat) return mat
def process_vasprun(self, dir_name, taskname, filename): """ Process a vasprun.xml file. """ vasprun_file = os.path.join(dir_name, filename) if self.parse_projected_eigen and (self.parse_projected_eigen != 'final' or \ taskname == self.runs[-1]): parse_projected_eigen = True else: parse_projected_eigen = False r = Vasprun(vasprun_file,parse_projected_eigen=parse_projected_eigen) d = r.as_dict() d["dir_name"] = os.path.abspath(dir_name) d["completed_at"] = \ str(datetime.datetime.fromtimestamp(os.path.getmtime( vasprun_file))) d["cif"] = str(CifWriter(r.final_structure)) d["density"] = r.final_structure.density if self.parse_dos and (self.parse_dos != 'final' \ or taskname == self.runs[-1]): try: d["dos"] = r.complete_dos.as_dict() except Exception: logger.warning("No valid dos data exist in {}.\n Skipping dos" .format(dir_name)) if taskname == "relax1" or taskname == "relax2": d["task"] = {"type": "aflow", "name": taskname} else: d["task"] = {"type": taskname, "name": taskname} d["oxide_type"] = oxide_type(r.final_structure) return d
def get_correction(self, entry): comp = entry.composition if len(comp) == 1: # Skip element entry return 0 correction = 0 # Check for sulfide corrections if Element("S") in comp: sf_type = "sulfide" if entry.data.get("sulfide_type"): sf_type = entry.data["sulfide_type"] elif hasattr(entry, "structure"): sf_type = sulfide_type(entry.structure) if sf_type in self.sulfide_correction: correction += self.sulfide_correction[sf_type] * comp["S"] # Check for oxide, peroxide, superoxide, and ozonide corrections. if Element("O") in comp: if self.correct_peroxide: if entry.data.get("oxide_type"): if entry.data["oxide_type"] in self.oxide_correction: ox_corr = self.oxide_correction[ entry.data["oxide_type"]] correction += ox_corr * comp["O"] if entry.data["oxide_type"] == "hydroxide": ox_corr = self.oxide_correction["oxide"] correction += ox_corr * comp["O"] elif hasattr(entry, "structure"): ox_type, nbonds = oxide_type(entry.structure, 1.05, return_nbonds=True) if ox_type in self.oxide_correction: correction += self.oxide_correction[ox_type] * \ nbonds elif ox_type == "hydroxide": correction += self.oxide_correction["oxide"] * \ comp["O"] else: rform = entry.composition.reduced_formula if rform in UCorrection.common_peroxides: correction += self.oxide_correction["peroxide"] * \ comp["O"] elif rform in UCorrection.common_superoxides: correction += self.oxide_correction["superoxide"] * \ comp["O"] elif rform in UCorrection.ozonides: correction += self.oxide_correction["ozonide"] * \ comp["O"] elif Element("O") in comp.elements and len(comp.elements)\ > 1: correction += self.oxide_correction['oxide'] * \ comp["O"] else: correction += self.oxide_correction['oxide'] * comp["O"] return correction
def old_style_mat(new_style_mat): """ Creates the base document for the old MP mapidoc style from the new document structure """ mat = {} mp_conversion_dict = _settings["conversion_dict"] mag_types = _settings["mag_types"] # Uses the conversion dict to copy over values which handles the bulk of the work. for mp, new_key in mp_conversion_dict.items(): if has(new_style_mat, new_key): set_(mat, mp, get(new_style_mat, new_key)) # Anything coming through DFT is always ordered mat["is_ordered"] = True mat["is_compatible"] = True struc = Structure.from_dict(mat["structure"]) mat["oxide_type"] = oxide_type(struc) mat["reduced_cell_formula"] = struc.composition.reduced_composition.as_dict( ) mat["unit_cell_formula"] = struc.composition.as_dict() mat["full_formula"] = "".join(struc.formula.split()) vals = sorted(mat["reduced_cell_formula"].values()) mat["anonymous_formula"] = { string.ascii_uppercase[i]: float(vals[i]) for i in range(len(vals)) } mat["initial_structure"] = new_style_mat.get("initial_structure", None) mat["nsites"] = struc.get_primitive_structure().num_sites set_(mat, "pseudo_potential.functional", "PBE") set_( mat, "pseudo_potential.labels", [ p["titel"].split()[1] for p in get(new_style_mat, "calc_settings.potcar_spec") ], ) set_(mat, "pseudo_potential.pot_type", "paw") mat["blessed_tasks"] = { d["task_type"]: d["task_id"] for d in new_style_mat["origins"] } mat["deprecated_tasks"] = new_style_mat.get("deprecated_tasks", []) mat["ntask_ids"] = len(mat["task_ids"]) return mat
def get_entries(self, chemsys): """ Get all entries in a chemsys from materials Args: chemsys(str): a chemical system represented by string elements seperated by a dash (-) Returns: set(ComputedEntry): a set of entries for this system """ self.logger.info("Getting entries for: {}".format(chemsys)) new_q = dict(self.query) new_q["chemsys"] = {"$in": list(chemsys_permutations(chemsys))} fields = [ "structure", self.materials.key, "thermo.energy", "unit_cell_formula", "calc_settings.is_hubbard", "calc_settings.hubbards", "calc_settings.potcar_spec", "calc_settings.run_type" ] data = list(self.materials.query(fields, new_q)) all_entries = [] for d in data: parameters = { "is_hubbard": d["calc_settings"]["is_hubbard"], "hubbards": d["calc_settings"]["hubbards"], "potcar_spec": d["calc_settings"]["potcar_spec"], "run_type": d["calc_settings"]["run_type"] } entry = ComputedEntry(Composition(d["unit_cell_formula"]), d["thermo"]["energy"], 0.0, parameters=parameters, entry_id=d[self.materials.key], data={ "oxide_type": oxide_type( Structure.from_dict(d["structure"])) }) all_entries.append(entry) self.logger.info("Total entries in {} : {}".format( chemsys, len(all_entries))) return all_entries
def get_correction(self, entry): comp = entry.composition rform = entry.composition.reduced_formula if rform in self.cpd_energies: return self.cpd_energies[rform] * comp.num_atoms \ - entry.uncorrected_energy correction = 0 #Check for oxide, peroxide, superoxide, and ozonide corrections. if self.correct_peroxide: if len(comp) >= 2 and Element("O") in comp: if "oxide_type" in entry.data: if entry.data["oxide_type"] in self.oxide_correction: ox_corr = self.oxide_correction[ entry.data["oxide_type"]] correction += ox_corr * comp["O"] if entry.data["oxide_type"] == "hydroxide": ox_corr = self.oxide_correction["oxide"] correction += ox_corr * comp["O"] elif hasattr(entry, "structure"): ox_type, nbonds = oxide_type(entry.structure, 1.05, return_nbonds=True) if ox_type in self.oxide_correction: correction += self.oxide_correction[ox_type] * \ nbonds elif ox_type == "hydroxide": correction += self.oxide_correction["oxide"] * comp["O"] else: if rform in UCorrection.common_peroxides: correction += self.oxide_correction["peroxide"] * \ comp["O"] elif rform in UCorrection.common_superoxides: correction += self.oxide_correction["superoxide"] * \ comp["O"] elif rform in UCorrection.ozonides: correction += self.oxide_correction["ozonide"] * \ comp["O"] elif Element("O") in comp.elements and len(comp.elements)\ > 1: correction += self.oxide_correction['oxide'] * comp["O"] else: correction += self.oxide_correction['oxide'] * comp["O"] return correction
def entry(self): """ Turns a Task Doc into a ComputedEntry""" entry_dict = { "correction": 0.0, "entry_id": self.task_id, "composition": self.output.structure.composition, "energy": self.output.energy, "parameters": { "potcar_spec": self.input.potcar_spec, # This is done to be compatible with MontyEncoder for the ComputedEntry "run_type": str(self.run_type), }, "data": { "oxide_type": oxide_type(self.output.structure), "last_updated": self.last_updated, }, } return ComputedEntry.from_dict(entry_dict)
def get_entries(self, chemsys): """ Get all entries in a chemsys from materials Args: chemsys(str): a chemical system represented by string elements seperated by a dash (-) Returns: set(ComputedEntry): a set of entries for this system """ self.logger.info("Getting entries for: {}".format(chemsys)) new_q = dict(self.query) new_q["chemsys"] = {"$in": list(chemsys_permutations(chemsys))} fields = [ "structure", self.materials.key, "thermo.energy_per_atom", "composition", "calc_settings" ] data = list(self.materials.query(fields, new_q)) all_entries = [] for d in data: comp = Composition(d["composition"]) entry = ComputedEntry( comp, d["thermo"]["energy_per_atom"] * comp.num_atoms, 0.0, parameters=d["calc_settings"], entry_id=d[self.materials.key], data={ "oxide_type": oxide_type(Structure.from_dict(d["structure"])) }) all_entries.append(entry) self.logger.info("Total entries in {} : {}".format( chemsys, len(all_entries))) return all_entries
def old_style_mat(new_mat): mat = {} for mp, new_key in mp_conversion_dict.items(): if has(new_mat, new_key): set_(mat, mp, get(new_mat, new_key)) mat["is_orderd"] = True mat["is_compatible"] = True struc = Structure.from_dict(mat["structure"]) mat["oxide_type"] = oxide_type(struc) mat["reduced_cell_formula"] = struc.composition.as_dict() mat["full_formula"] = "".join(struc.formula.split()) vals = sorted(mat["reduced_cell_formula"].values()) mat["anonymous_formula"] = { string.ascii_uppercase[i]: float(vals[i]) for i in range(len(vals)) } set_(mat, "original_task_id", get(new_mat, "material_id")) set_(mat, "ntask_ids", len(get(new_mat, "task_ids"))) set_(mat, "input.incar", get(new_mat, "inputs.structure_optimization.incar")) set_(mat, "input.kpoints", get(new_mat, "inputs.structure_optimization.kpoints")) set_(mat, "encut", get(new_mat, "inputs.structure_optimization.incar.ENCUT")) mat["pseudo_potential"] = { "pot_type": "paw", "labels": get(new_mat, "input.structure_optimization.potcar.symbols"), "functional": get(new_mat, "input.structure_optimization.potcar.functional") } return mat
def test_oxide_type(self): el_li = Element("Li") el_o = Element("O") latt = Lattice([[3.985034, 0.0, 0.0], [0.0, 4.881506, 0.0], [0.0, 0.0, 2.959824]]) elts = [el_li, el_li, el_o, el_o, el_o, el_o] coords = list() coords.append([0.500000, 0.500000, 0.500000]) coords.append([0.0, 0.0, 0.0]) coords.append([0.632568, 0.085090, 0.500000]) coords.append([0.367432, 0.914910, 0.500000]) coords.append([0.132568, 0.414910, 0.000000]) coords.append([0.867432, 0.585090, 0.000000]) struct = Structure(latt, elts, coords) self.assertEqual(oxide_type(struct, 1.1), "superoxide") el_li = Element("Li") el_o = Element("O") elts = [el_li, el_o, el_o, el_o] latt = Lattice.from_parameters(3.999911, 3.999911, 3.999911, 133.847504, 102.228244, 95.477342) coords = [[0.513004, 0.513004, 1.000000], [0.017616, 0.017616, 0.000000], [0.649993, 0.874790, 0.775203], [0.099587, 0.874790, 0.224797]] struct = Structure(latt, elts, coords) self.assertEqual(oxide_type(struct, 1.1), "ozonide") latt = Lattice.from_parameters(3.159597, 3.159572, 7.685205, 89.999884, 89.999674, 60.000510) el_li = Element("Li") el_o = Element("O") elts = [el_li, el_li, el_li, el_li, el_o, el_o, el_o, el_o] coords = [[0.666656, 0.666705, 0.750001], [0.333342, 0.333378, 0.250001], [0.000001, 0.000041, 0.500001], [0.000001, 0.000021, 0.000001], [0.333347, 0.333332, 0.649191], [0.333322, 0.333353, 0.850803], [0.666666, 0.666686, 0.350813], [0.666665, 0.666684, 0.149189]] struct = Structure(latt, elts, coords) self.assertEqual(oxide_type(struct, 1.1), "peroxide") el_li = Element("Li") el_o = Element("O") el_h = Element("H") latt = Lattice.from_parameters(3.565276, 3.565276, 4.384277, 90.000000, 90.000000, 90.000000) elts = [el_h, el_h, el_li, el_li, el_o, el_o] coords = [[0.000000, 0.500000, 0.413969], [0.500000, 0.000000, 0.586031], [0.000000, 0.000000, 0.000000], [0.500000, 0.500000, 0.000000], [0.000000, 0.500000, 0.192672], [0.500000, 0.000000, 0.807328]] struct = Structure(latt, elts, coords) self.assertEqual(oxide_type(struct, 1.1), "hydroxide") el_li = Element("Li") el_n = Element("N") el_h = Element("H") latt = Lattice.from_parameters(3.565276, 3.565276, 4.384277, 90.000000, 90.000000, 90.000000) elts = [el_h, el_h, el_li, el_li, el_n, el_n] coords = [[0.000000, 0.500000, 0.413969], [0.500000, 0.000000, 0.586031], [0.000000, 0.000000, 0.000000], [0.500000, 0.500000, 0.000000], [0.000000, 0.500000, 0.192672], [0.500000, 0.000000, 0.807328]] struct = Structure(latt, elts, coords) self.assertEqual(oxide_type(struct, 1.1), "None") el_o = Element("O") latt = Lattice.from_parameters(4.389828, 5.369789, 5.369789, 70.786622, 69.244828, 69.244828) elts = [el_o, el_o, el_o, el_o, el_o, el_o, el_o, el_o] coords = [[0.844609, 0.273459, 0.786089], [0.155391, 0.213911, 0.726541], [0.155391, 0.726541, 0.213911], [0.844609, 0.786089, 0.273459], [0.821680, 0.207748, 0.207748], [0.178320, 0.792252, 0.792252], [0.132641, 0.148222, 0.148222], [0.867359, 0.851778, 0.851778]] struct = Structure(latt, elts, coords) self.assertEqual(oxide_type(struct, 1.1), "None")
def get_correction(self, entry): comp = entry.composition if len(comp) == 1: # Skip element entry return 0 correction = 0 # Check for sulfide corrections if Element("S") in comp: sf_type = "sulfide" if entry.data.get("sulfide_type"): sf_type = entry.data["sulfide_type"] elif hasattr(entry, "structure"): sf_type = sulfide_type(entry.structure) if sf_type in self.sulfide_correction: correction += self.sulfide_correction[sf_type] * comp["S"] # Check for oxide, peroxide, superoxide, and ozonide corrections. if Element("O") in comp: if self.correct_peroxide: if entry.data.get("oxide_type"): if entry.data["oxide_type"] in self.oxide_correction: ox_corr = self.oxide_correction[ entry.data["oxide_type"]] correction += ox_corr * comp["O"] if entry.data["oxide_type"] == "hydroxide": ox_corr = self.oxide_correction["oxide"] correction += ox_corr * comp["O"] elif hasattr(entry, "structure"): ox_type, nbonds = oxide_type(entry.structure, 1.05, return_nbonds=True) if ox_type in self.oxide_correction: correction += self.oxide_correction[ox_type] * \ nbonds elif ox_type == "hydroxide": correction += self.oxide_correction["oxide"] * \ comp["O"] else: warnings.warn( "No structure or oxide_type parameter present. Note " "that peroxide/superoxide corrections are not as " "reliable and relies only on detection of special" "formulas, e.g., Li2O2.") rform = entry.composition.reduced_formula if rform in UCorrection.common_peroxides: correction += self.oxide_correction["peroxide"] * \ comp["O"] elif rform in UCorrection.common_superoxides: correction += self.oxide_correction["superoxide"] * \ comp["O"] elif rform in UCorrection.ozonides: correction += self.oxide_correction["ozonide"] * \ comp["O"] elif Element("O") in comp.elements and len(comp.elements) \ > 1: correction += self.oxide_correction['oxide'] * \ comp["O"] else: correction += self.oxide_correction['oxide'] * comp["O"] return correction
def assimilate(self, path, launches_coll=None): """ Parses vasp runs. Then insert the result into the db. and return the task_id or doc of the insertion. Returns: If in simulate_mode, the entire doc is returned for debugging purposes. Else, only the task_id of the inserted doc is returned. """ d = self.get_task_doc(path) if self.additional_fields: d.update(self.additional_fields) # always add additional fields, even for failed jobs try: d["dir_name_full"] = d["dir_name"].split(":")[1] d["dir_name"] = get_block_part(d["dir_name_full"]) d["stored_data"] = {} except: print 'COULD NOT GET DIR NAME' pprint.pprint(d) print traceback.format_exc() raise ValueError('IMPROPER PARSING OF {}'.format(path)) if not self.simulate: # Perform actual insertion into db. Because db connections cannot # be pickled, every insertion needs to create a new connection # to the db. conn = MongoClient(self.host, self.port) db = conn[self.database] if self.user: db.authenticate(self.user, self.password) coll = db[self.collection] # Insert dos data into gridfs and then remove it from the dict. # DOS data tends to be above the 4Mb limit for mongo docs. A ref # to the dos file is in the dos_fs_id. result = coll.find_one({"dir_name": d["dir_name"]}) if result is None or self.update_duplicates: if self.parse_dos and "calculations" in d: for calc in d["calculations"]: if "dos" in calc: dos = json.dumps(calc["dos"], cls=MontyEncoder) fs = gridfs.GridFS(db, "dos_fs") dosid = fs.put(dos) calc["dos_fs_id"] = dosid del calc["dos"] d["last_updated"] = datetime.datetime.today() if result is None: if ("task_id" not in d) or (not d["task_id"]): d["task_id"] = "mp-{}".format( db.counter.find_one_and_update( {"_id": "taskid"}, {"$inc": {"c": 1}} )["c"]) logger.info("Inserting {} with taskid = {}" .format(d["dir_name"], d["task_id"])) elif self.update_duplicates: d["task_id"] = result["task_id"] logger.info("Updating {} with taskid = {}" .format(d["dir_name"], d["task_id"])) #Fireworks processing self.process_fw(path, d) try: #Add oxide_type struct=Structure.from_dict(d["output"]["crystal"]) d["oxide_type"]=oxide_type(struct) except: logger.error("can't get oxide_type for {}".format(d["task_id"])) d["oxide_type"] = None #Override incorrect outcar subdocs for two step relaxations if "optimize structure" in d['task_type'] and \ os.path.exists(os.path.join(path, "relax2")): try: run_stats = {} for i in [1,2]: o_path = os.path.join(path,"relax"+str(i),"OUTCAR") o_path = o_path if os.path.exists(o_path) else o_path+".gz" outcar = Outcar(o_path) d["calculations"][i-1]["output"]["outcar"] = outcar.as_dict() run_stats["relax"+str(i)] = outcar.run_stats except: logger.error("Bad OUTCAR for {}.".format(path)) try: overall_run_stats = {} for key in ["Total CPU time used (sec)", "User time (sec)", "System time (sec)", "Elapsed time (sec)"]: overall_run_stats[key] = sum([v[key] for v in run_stats.values()]) run_stats["overall"] = overall_run_stats except: logger.error("Bad run stats for {}.".format(path)) d["run_stats"] = run_stats # add is_compatible mpc = MaterialsProjectCompatibility("Advanced") try: func = d["pseudo_potential"]["functional"] labels = d["pseudo_potential"]["labels"] symbols = ["{} {}".format(func, label) for label in labels] parameters = {"run_type": d["run_type"], "is_hubbard": d["is_hubbard"], "hubbards": d["hubbards"], "potcar_symbols": symbols} entry = ComputedEntry(Composition(d["unit_cell_formula"]), 0.0, 0.0, parameters=parameters, entry_id=d["task_id"]) d['is_compatible'] = bool(mpc.process_entry(entry)) except: traceback.print_exc() print 'ERROR in getting compatibility' d['is_compatible'] = None #task_type dependent processing if 'static' in d['task_type']: launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1}) for i in ["conventional_standard_structure", "symmetry_operations", "symmetry_dataset", "refined_structure"]: try: d['stored_data'][i] = launch_doc['action']['stored_data'][i] except: pass #parse band structure if necessary if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\ and d['state'] == 'successful': launch_doc = launches_coll.find_one({"fw_id": d['fw_id'], "launch_dir": {"$regex": d["dir_name"]}}, {"action.stored_data": 1}) vasp_run = Vasprun(zpath(os.path.join(path, "vasprun.xml")), parse_projected_eigen=False) if 'band structure' in d['task_type']: def string_to_numlist(stringlist): g=re.search('([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)', stringlist) return [float(g.group(i)) for i in range(1,4)] for i in ["kpath_name", "kpath"]: d['stored_data'][i] = launch_doc['action']['stored_data'][i] kpoints_doc = d['stored_data']['kpath']['kpoints'] for i in kpoints_doc: kpoints_doc[i]=string_to_numlist(kpoints_doc[i]) bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'], line_mode=True) else: bs=vasp_run.get_band_structure(efermi=d['calculations'][0]['output']['outcar']['efermi'], line_mode=False) bs_json = json.dumps(bs.as_dict(), cls=MontyEncoder) fs = gridfs.GridFS(db, "band_structure_fs") bs_id = fs.put(bs_json) d['calculations'][0]["band_structure_fs_id"] = bs_id # also override band gap in task doc gap = bs.get_band_gap() vbm = bs.get_vbm() cbm = bs.get_cbm() update_doc = {'bandgap': gap['energy'], 'vbm': vbm['energy'], 'cbm': cbm['energy'], 'is_gap_direct': gap['direct']} d['analysis'].update(update_doc) d['calculations'][0]['output'].update(update_doc) coll.update_one({"dir_name": d["dir_name"]}, {'$set': d}, upsert=True) return d["task_id"], d else: logger.info("Skipping duplicate {}".format(d["dir_name"])) return result["task_id"], result else: d["task_id"] = 0 logger.info("Simulated insert into database for {} with task_id {}" .format(d["dir_name"], d["task_id"])) return 0, d
def get_entries(self, chemsys): """ Get all entries in a chemsys from materials Args: chemsys(str): a chemical system represented by string elements seperated by a dash (-) Returns: set(ComputedEntry): a set of entries for this system """ self.logger.info("Getting entries for: {}".format(chemsys)) # First check the cache all_chemsys = chemsys_permutations(chemsys) cached_chemsys = all_chemsys & set(self.entries_cache.keys()) query_chemsys = all_chemsys - cached_chemsys self.logger.debug("Getting {} entries from cache for {}".format( len(cached_chemsys), chemsys)) # Query for any chemsys we don't have new_q = dict(self.query) new_q["chemsys"] = {"$in": list(query_chemsys)} new_q["deprecated"] = False fields = [ "structure", self.materials.key, "thermo.energy_per_atom", "composition", "calc_settings", "_sbxn", ] data = list(self.materials.query(properties=fields, criteria=new_q)) # Start with entries from cache all_entries = list( chain.from_iterable(self.entries_cache[c] for c in cached_chemsys)) for d in data: comp = Composition(d["composition"]) entry = ComputedEntry( comp, d["thermo"]["energy_per_atom"] * comp.num_atoms, 0.0, parameters=d["calc_settings"], entry_id=d[self.materials.key], data={ "oxide_type": oxide_type(Structure.from_dict(d["structure"])), "_sbxn": d.get("_sbxn", []), }, ) # Add to cache elsyms = sorted(set([el.symbol for el in comp.elements])) self.entries_cache["-".join(elsyms)].append(entry) all_entries.append(entry) self.logger.info("Total entries in {} : {}".format( chemsys, len(all_entries))) return all_entries
def get_correction(self, entry): comp = entry.composition if len(comp) == 1: # Skip element entry return 0 correction = 0 # Check for sulfide corrections if Element("S") in comp: sf_type = "sulfide" if entry.data.get("sulfide_type"): sf_type = entry.data["sulfide_type"] elif hasattr(entry, "structure"): sf_type = sulfide_type(entry.structure) if sf_type in self.sulfide_correction: correction += self.sulfide_correction[sf_type] * comp["S"] # Check for oxide, peroxide, superoxide, and ozonide corrections. if Element("O") in comp: if self.correct_peroxide: if entry.data.get("oxide_type"): if entry.data["oxide_type"] in self.oxide_correction: ox_corr = self.oxide_correction[ entry.data["oxide_type"]] correction += ox_corr * comp["O"] if entry.data["oxide_type"] == "hydroxide": ox_corr = self.oxide_correction["oxide"] correction += ox_corr * comp["O"] elif hasattr(entry, "structure"): ox_type, nbonds = oxide_type(entry.structure, 1.05, return_nbonds=True) if ox_type in self.oxide_correction: correction += self.oxide_correction[ox_type] * \ nbonds elif ox_type == "hydroxide": correction += self.oxide_correction["oxide"] * \ comp["O"] else: warnings.warn( "No structure or oxide_type parameter present. Note" "that peroxide/superoxide corrections are not as " "reliable and relies only on detection of special" "formulas, e.g., Li2O2.") rform = entry.composition.reduced_formula if rform in UCorrection.common_peroxides: correction += self.oxide_correction["peroxide"] * \ comp["O"] elif rform in UCorrection.common_superoxides: correction += self.oxide_correction["superoxide"] * \ comp["O"] elif rform in UCorrection.ozonides: correction += self.oxide_correction["ozonide"] * \ comp["O"] elif Element("O") in comp.elements and len(comp.elements)\ > 1: correction += self.oxide_correction['oxide'] * \ comp["O"] else: correction += self.oxide_correction['oxide'] * comp["O"] return correction
def list_oxide_type(list_struc): return [ oxide_type(structure, relative_cutoff=1.2, return_nbonds=True) for structure in list_struc ]
def assimilate(self, path, launches_coll=None): """ Parses vasp runs. Then insert the result into the db. and return the task_id or doc of the insertion. Returns: If in simulate_mode, the entire doc is returned for debugging purposes. Else, only the task_id of the inserted doc is returned. """ d = self.get_task_doc(path) if self.additional_fields: d.update(self.additional_fields ) # always add additional fields, even for failed jobs try: d["dir_name_full"] = d["dir_name"].split(":")[1] d["dir_name"] = get_block_part(d["dir_name_full"]) d["stored_data"] = {} except: print 'COULD NOT GET DIR NAME' pprint.pprint(d) print traceback.format_exc() raise ValueError('IMPROPER PARSING OF {}'.format(path)) if not self.simulate: # Perform actual insertion into db. Because db connections cannot # be pickled, every insertion needs to create a new connection # to the db. conn = MongoClient(self.host, self.port) db = conn[self.database] if self.user: db.authenticate(self.user, self.password) coll = db[self.collection] # Insert dos data into gridfs and then remove it from the dict. # DOS data tends to be above the 4Mb limit for mongo docs. A ref # to the dos file is in the dos_fs_id. result = coll.find_one({"dir_name": d["dir_name"]}) if result is None or self.update_duplicates: if self.parse_dos and "calculations" in d: for calc in d["calculations"]: if "dos" in calc: dos = json.dumps(calc["dos"], cls=MontyEncoder) fs = gridfs.GridFS(db, "dos_fs") dosid = fs.put(dos) calc["dos_fs_id"] = dosid del calc["dos"] d["last_updated"] = datetime.datetime.today() if result is None: if ("task_id" not in d) or (not d["task_id"]): d["task_id"] = "mp-{}".format( db.counter.find_one_and_update({"_id": "taskid"}, {"$inc": { "c": 1 }})["c"]) logger.info("Inserting {} with taskid = {}".format( d["dir_name"], d["task_id"])) elif self.update_duplicates: d["task_id"] = result["task_id"] logger.info("Updating {} with taskid = {}".format( d["dir_name"], d["task_id"])) #Fireworks processing self.process_fw(path, d) try: #Add oxide_type struct = Structure.from_dict(d["output"]["crystal"]) d["oxide_type"] = oxide_type(struct) except: logger.error("can't get oxide_type for {}".format( d["task_id"])) d["oxide_type"] = None #Override incorrect outcar subdocs for two step relaxations if "optimize structure" in d['task_type'] and \ os.path.exists(os.path.join(path, "relax2")): try: run_stats = {} for i in [1, 2]: o_path = os.path.join(path, "relax" + str(i), "OUTCAR") o_path = o_path if os.path.exists( o_path) else o_path + ".gz" outcar = Outcar(o_path) d["calculations"][ i - 1]["output"]["outcar"] = outcar.as_dict() run_stats["relax" + str(i)] = outcar.run_stats except: logger.error("Bad OUTCAR for {}.".format(path)) try: overall_run_stats = {} for key in [ "Total CPU time used (sec)", "User time (sec)", "System time (sec)", "Elapsed time (sec)" ]: overall_run_stats[key] = sum( [v[key] for v in run_stats.values()]) run_stats["overall"] = overall_run_stats except: logger.error("Bad run stats for {}.".format(path)) d["run_stats"] = run_stats # add is_compatible mpc = MaterialsProjectCompatibility("Advanced") try: func = d["pseudo_potential"]["functional"] labels = d["pseudo_potential"]["labels"] symbols = ["{} {}".format(func, label) for label in labels] parameters = { "run_type": d["run_type"], "is_hubbard": d["is_hubbard"], "hubbards": d["hubbards"], "potcar_symbols": symbols } entry = ComputedEntry(Composition(d["unit_cell_formula"]), 0.0, 0.0, parameters=parameters, entry_id=d["task_id"]) d['is_compatible'] = bool(mpc.process_entry(entry)) except: traceback.print_exc() print 'ERROR in getting compatibility' d['is_compatible'] = None #task_type dependent processing if 'static' in d['task_type']: launch_doc = launches_coll.find_one( { "fw_id": d['fw_id'], "launch_dir": { "$regex": d["dir_name"] } }, {"action.stored_data": 1}) for i in [ "conventional_standard_structure", "symmetry_operations", "symmetry_dataset", "refined_structure" ]: try: d['stored_data'][i] = launch_doc['action'][ 'stored_data'][i] except: pass #parse band structure if necessary if ('band structure' in d['task_type'] or "Uniform" in d['task_type'])\ and d['state'] == 'successful': launch_doc = launches_coll.find_one( { "fw_id": d['fw_id'], "launch_dir": { "$regex": d["dir_name"] } }, {"action.stored_data": 1}) vasp_run = Vasprun(zpath(os.path.join(path, "vasprun.xml")), parse_projected_eigen=True) if 'band structure' in d['task_type']: def string_to_numlist(stringlist): g = re.search( '([0-9\-\.eE]+)\s+([0-9\-\.eE]+)\s+([0-9\-\.eE]+)', stringlist) return [float(g.group(i)) for i in range(1, 4)] for i in ["kpath_name", "kpath"]: d['stored_data'][i] = launch_doc['action'][ 'stored_data'][i] kpoints_doc = d['stored_data']['kpath']['kpoints'] for i in kpoints_doc: if isinstance(kpoints_doc[i], six.string_types): kpoints_doc[i] = string_to_numlist( kpoints_doc[i]) bs = vasp_run.get_band_structure( efermi=d['calculations'][0]['output']['outcar'] ['efermi'], line_mode=True) else: bs = vasp_run.get_band_structure( efermi=d['calculations'][0]['output']['outcar'] ['efermi'], line_mode=False) bs_json = json.dumps(bs.as_dict(), cls=MontyEncoder) fs = gridfs.GridFS(db, "band_structure_fs") bs_id = fs.put(bs_json) d['calculations'][0]["band_structure_fs_id"] = bs_id # also override band gap in task doc gap = bs.get_band_gap() vbm = bs.get_vbm() cbm = bs.get_cbm() update_doc = { 'bandgap': gap['energy'], 'vbm': vbm['energy'], 'cbm': cbm['energy'], 'is_gap_direct': gap['direct'] } d['analysis'].update(update_doc) d['calculations'][0]['output'].update(update_doc) coll.update_one({"dir_name": d["dir_name"]}, {'$set': d}, upsert=True) return d["task_id"], d else: logger.info("Skipping duplicate {}".format(d["dir_name"])) return result["task_id"], result else: d["task_id"] = 0 logger.info( "Simulated insert into database for {} with task_id {}".format( d["dir_name"], d["task_id"])) return 0, d