def post_process(self, mat): """ Any extra post-processing on a material doc """ # Add structure metadata back into document and convert back to conventional standard if "structure" in mat: structure = Structure.from_dict(mat["structure"]) sga = SpacegroupAnalyzer(structure, symprec=SYMPREC) mat["structure"] = structure.as_dict() mat.update(structure_metadata(structure)) # Deprecate materials with bad structures or energies if "structure" in mat["invalid_props"]: mat.update({"deprecated": True}) elif "thermo.energy_per_atom" in mat["invalid_props"]: mat.update({"deprecated": True}) else: mat.update({"deprecated": False}) # Reorder voigt output from VASP to standard voigt notation if has(mat, "piezo.ionic"): mat["piezo"]["ionic"] = PiezoTensor.from_vasp_voigt( mat["piezo"]["ionic"]).voigt.tolist() if has(mat, "piezo.static"): mat["piezo"]["static"] = PiezoTensor.from_vasp_voigt( mat["piezo"]["static"]).voigt.tolist()
def add_thermo(mat, thermo): if has(thermo, "thermo.e_above_hull"): set_(mat, "e_above_hull", get(thermo, "thermo.e_above_hull")) if has(thermo, "thermo.formation_energy_per_atom"): set_(mat, "formation_energy_per_atom", get(thermo, "thermo.formation_energy_per_atom")) if has(thermo, "thermo.decomposes_to"): set_(mat, "decomposes_to", get(thermo, "thermo.decomposes_to"))
def old_style_mat(new_style_mat): """ Creates the base document for the old MP mapidoc style from the new document structure """ mat = {} for mp, new_key in mp_conversion_dict.items(): if has(new_style_mat, new_key): set_(mat, mp, get(new_style_mat, new_key)) mat["is_ordered"] = True mat["is_compatible"] = True struc = Structure.from_dict(mat["structure"]) mat["oxide_type"] = oxide_type(struc) mat["reduced_cell_formula"] = struc.composition.reduced_composition.as_dict() mat["unit_cell_formula"] = struc.composition.as_dict() mat["full_formula"] = "".join(struc.formula.split()) vals = sorted(mat["reduced_cell_formula"].values()) mat["anonymous_formula"] = {string.ascii_uppercase[i]: float(vals[i]) for i in range(len(vals))} mat["initial_structure"] = new_style_mat.get("initial_structure", None) mat["nsites"] = struc.get_primitive_structure().num_sites set_(mat, "pseudo_potential.functional", "PBE") set_(mat, "pseudo_potential.labels", [p["titel"].split()[1] for p in get(new_style_mat, "calc_settings.potcar_spec")]) mat["ntask_ids"] = len(get(new_style_mat, "task_ids")) set_(mat, "pseudo_potential.pot_type", "paw") add_blessed_tasks(mat, new_style_mat) add_cifs(mat) check_relaxation(mat, new_style_mat) return mat
def add_es(mat, new_mat, es): bs_origin = None dos_origin = None try: bs_origin = next((origin for origin in new_mat.get("origins", []) if "Line" in origin["task_type"]), None) dos_origin = next((origin for origin in new_mat.get("origins", []) if "Uniform" in origin["task_type"]), None) if bs_origin: u_type = "GGA+U" if "+U" in bs_origin["task_type"] else "GGA" set_(mat, "band_structure.{}.task_id".format(u_type), bs_origin["task_id"]) if has(es, "band_gap"): set_(mat, "band_gap.search_gap", get(es, "band_gap")) if dos_origin: u_type = "GGA+U" if "+U" in dos_origin["task_type"] else "GGA" set_(mat, "dos.{}.task_id".format(u_type), dos_origin["task_id"]) except Exception as e: print("Error in adding electronic structure: {}".format(e)) mat["has_bandstructure"] = bool(bs_origin) and bool(dos_origin)
def filter_and_group_tasks(self, tasks): """ Groups tasks by structure matching """ filtered_tasks = [t for t in tasks if task_type( t['input']['incar']) in self.allowed_tasks] structures = [Structure.from_dict( t["output"]['structure']) for t in filtered_tasks] if self.separate_mag_orderings: for structure in structures: if has(structure.site_properties,"magmom"): structure.add_spin_by_site(structure.site_properties['magmom']) structure.remove_site_property('magmom') for idx, s in enumerate(structures): s.index = idx sm = StructureMatcher(ltol=self.ltol, stol=self.stol, angle_tol=self.angle_tol, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator()) grouped_structures = sm.group_structures(structures) grouped_tasks = [[filtered_tasks[struc.index] for struc in group] for group in grouped_structures] return grouped_tasks
def add_snl(mat, snl=None): snl = snl if snl else {} mat["snl"] = snl.get("snl", None) mat["snl_final"] = snl.get("snl", None) mat["created_at"] = get(snl, "snl.about.created_at") if has( snl, "snl.about.created_at") else datetime.utcnow() mat["icsd_ids"] = snl.get("icsd_ids", [])
def task_to_prop_list(self, task): """ Converts a task into an list of properties with associated metadata """ t_type = task_type(task['orig_inputs']) t_id = task["task_id"] # Convert the task doc into a serious of properties in the materials # doc with the right document structure props = [] for prop in self.__settings: if t_type in prop["quality_score"].keys(): if has(task, prop["tasks_key"]): props.append({ "value": get(task, prop["tasks_key"]), "task_type": t_type, "task_id": t_id, "quality_score": prop["quality_score"][t_type], "track": prop.get("track", False), "aggregate": prop.get("aggregate", False), "last_updated": task[self.tasks.lu_field], "energy": get(task, "output.energy", 0.0), "materials_key": prop["materials_key"] }) elif not prop.get("optional", False): self.logger.error("Failed getting {} for task: {}".format(prop["tasks_key"], t_id)) return props
def add_elastic(mat, elastic): es_aliases = { "G_Reuss": "g_reuss", "G_VRH": "g_vrh", "G_Voigt": "g_voigt", "G_Voigt_Reuss_Hill": "g_vrh", "K_Reuss": "k_reuss", "K_VRH": "k_vrh", "K_Voigt": "k_voigt", "K_Voigt_Reuss_Hill": "k_vrh", # "calculations": "calculations", <--- TODO: Add to elastic builder? "elastic_anisotropy": "universal_anisotropy", "elastic_tensor": "elastic_tensor", "homogeneous_poisson": "homogeneous_poisson", "poisson_ratio": "homogeneous_poisson", "universal_anisotropy": "universal_anisotropy", "elastic_tensor_original": "elastic_tensor_original", "compliance_tensor": "compliance_tensor", "third_order": "third_order" } mat["elasticity"] = { k: elastic["elasticity"].get(v, None) for k, v in es_aliases.items() } if has(elastic, "elasticity.structure.sites"): mat["elasticity"]["nsites"] = len( get(elastic, "elasticity.structure.sites")) else: mat["elasticity"]["nsites"] = len(get(mat, "structure.sites"))
def sandbox_props(mat): mat["sbxn"] = mat.get("sbxn", ["core", "jcesr", "vw", "shyamd", "kitchaev"]) mat["sbxd"] = [] for sbx in mat["sbxn"]: sbx_d = {k: get(mat, v) for k, v in SANDBOXED_PROPERTIES.items() if has(mat, k)} sbx_d["id"] = sbx mat["sbxd"].append(sbx_d)
def substitute(d: Dict, aliases: Dict): """ Substitutes keys in dictionary Accepts multilevel mongo like keys """ for alias, key in aliases.items(): if has(d, key): set_(d, alias, get(d, key)) unset(d, key)
def add_elastic(mat, new_style_mat): if "elasticity" in new_style_mat: if has(new_style_mat, "elasticity.structure.sites"): mat["elasticity"]["nsites"] = len( get(new_style_mat, "elasticity.structure.sites")) else: mat["elasticity"]["nsites"] = len(get(mat, "structure.sites")) if get("elasticity.warnings", None) is None: mat["elasticity"]["warnings"] = []
def sandbox_props(mat): mat["sbxn"] = mat["sbxn"] if mat["sbxn"] else ["core"] mat["sbxd"] = [] for sbx in mat["sbxn"]: sbx_d = { k: get(mat, v) for k, v in SANDBOXED_PROPERTIES.items() if has(mat, k) } sbx_d["id"] = sbx mat["sbxd"].append(sbx_d)
def old_style_mat(new_style_mat): """ Creates the base document for the old MP mapidoc style from the new document structure """ mat = {} mp_conversion_dict = _settings["conversion_dict"] mag_types = _settings["mag_types"] # Uses the conversion dict to copy over values which handles the bulk of the work. for mp, new_key in mp_conversion_dict.items(): if has(new_style_mat, new_key): set_(mat, mp, get(new_style_mat, new_key)) # Anything coming through DFT is always ordered mat["is_ordered"] = True mat["is_compatible"] = True struc = Structure.from_dict(mat["structure"]) mat["oxide_type"] = oxide_type(struc) mat["reduced_cell_formula"] = struc.composition.reduced_composition.as_dict( ) mat["unit_cell_formula"] = struc.composition.as_dict() mat["full_formula"] = "".join(struc.formula.split()) vals = sorted(mat["reduced_cell_formula"].values()) mat["anonymous_formula"] = { string.ascii_uppercase[i]: float(vals[i]) for i in range(len(vals)) } mat["initial_structure"] = new_style_mat.get("initial_structure", None) mat["nsites"] = struc.get_primitive_structure().num_sites set_(mat, "pseudo_potential.functional", "PBE") set_( mat, "pseudo_potential.labels", [ p["titel"].split()[1] for p in get(new_style_mat, "calc_settings.potcar_spec") ], ) set_(mat, "pseudo_potential.pot_type", "paw") mat["blessed_tasks"] = { d["task_type"]: d["task_id"] for d in new_style_mat["origins"] } mat["deprecated_tasks"] = new_style_mat.get("deprecated_tasks", []) mat["ntask_ids"] = len(mat["task_ids"]) return mat
def update_targets(self, items): """ Inserts the new task_types into the task_types collection """ snls = [] for snl_dict in filter(None, items): for mat_id, snl_list in snl_dict.items(): snl = sorted( snl_list, key=lambda x: StructureNL.from_dict(x).created_at)[0] icsd_ids = [get(snl, "about._icsd.icsd_id") for snl in snl_list if has(snl, "about._icsd")] snls.append( {self.snls.key: mat_id, "snl": snl, "icsd_ids": icsd_ids}) if len(snls) > 0: self.snls.update(snls) else: self.logger.info("No items to update")
def add_thermo(mat, new_style_mat): """ Add's the thermo values in with sandboxing """ if "thermo_docs" not in new_style_mat: mat["deprecated"] = True if not mat["deprecated"]: thermo = new_style_mat["thermo_docs"] if "core" in mat["sbxn"]: main_sbx = "core" else: main_sbx = mat["sbxn"][0] # Get the primary document and set in mat document core_thermo = next(d for d in thermo if main_sbx in d["_sbxn"]) mat["e_above_hull"] = core_thermo["thermo"]["e_above_hull"] mat["formation_energy_per_atom"] = core_thermo["thermo"][ "formation_energy_per_atom" ] if "decomposes_to" in core_thermo["thermo"]: mat["decomposes_to"] = core_thermo["thermo"]["decomposes_to"] sbxd = {} sandbox_props = { "e_above_hull": "thermo.e_above_hull", "decomposes_to": "thermo.decomposes_to", } for doc in thermo: for sbx in doc["_sbxn"]: sbx_d = { k: get(doc, v) for k, v in sandbox_props.items() if has(doc, v) } sbx_d["id"] = sbx sbxd[sbx] = sbx_d mat["sbxd"] = list(sbxd.values())
def old_style_mat(new_mat): mat = {} for mp, new_key in mp_conversion_dict.items(): if has(new_mat, new_key): set_(mat, mp, get(new_mat, new_key)) mat["is_orderd"] = True mat["is_compatible"] = True struc = Structure.from_dict(mat["structure"]) mat["oxide_type"] = oxide_type(struc) mat["reduced_cell_formula"] = struc.composition.as_dict() mat["full_formula"] = "".join(struc.formula.split()) vals = sorted(mat["reduced_cell_formula"].values()) mat["anonymous_formula"] = { string.ascii_uppercase[i]: float(vals[i]) for i in range(len(vals)) } set_(mat, "original_task_id", get(new_mat, "material_id")) set_(mat, "ntask_ids", len(get(new_mat, "task_ids"))) set_(mat, "input.incar", get(new_mat, "inputs.structure_optimization.incar")) set_(mat, "input.kpoints", get(new_mat, "inputs.structure_optimization.kpoints")) set_(mat, "encut", get(new_mat, "inputs.structure_optimization.incar.ENCUT")) mat["pseudo_potential"] = { "pot_type": "paw", "labels": get(new_mat, "input.structure_optimization.potcar.symbols"), "functional": get(new_mat, "input.structure_optimization.potcar.functional") } return mat
def filter_and_group_tasks(self, tasks): """ Groups tasks by structure matching """ filtered_tasks = [ t for t in tasks if task_type(t["orig_inputs"]) in self.allowed_tasks ] structures = [] for idx, t in enumerate(filtered_tasks): s = Structure.from_dict(t["output"]["structure"]) s.index = idx total_mag = get( t, "calcs_reversed.0.output.outcar.total_magnetization", 0) s.total_magnetization = total_mag if total_mag else 0 # a fix for very old tasks that did not report site-projected magnetic moments # so that we can group them appropriately if (("magmom" not in s.site_properties) and (get(t, "input.parameters.ISPIN", 1) == 2) and has(t, "input.parameters.MAGMOM")): # TODO: map input structure sites to output structure sites s.add_site_property("magmom", t["input"]["parameters"]["MAGMOM"]) structures.append(s) grouped_structures = group_structures( structures, ltol=self.ltol, stol=self.stol, angle_tol=self.angle_tol, separate_mag_orderings=self.separate_mag_orderings, ) for group in grouped_structures: yield [filtered_tasks[struc.index] for struc in group]
def group_structures(structures, ltol=0.2, stol=0.3, angle_tol=5, separate_mag_orderings=False): """ Groups structures according to space group and structure matching Args: structures ([Structure]): list of structures to group ltol (float): StructureMatcher tuning parameter for matching tasks to materials stol (float): StructureMatcher tuning parameter for matching tasks to materials angle_tol (float): StructureMatcher tuning parameter for matching tasks to materials separate_mag_orderings (bool): Separate magnetic orderings into different materials """ if separate_mag_orderings: for structure in structures: if has(structure.site_properties, "magmom"): structure.add_spin_by_site(structure.site_properties['magmom']) structure.remove_site_property('magmom') sm = StructureMatcher(ltol=ltol, stol=stol, angle_tol=angle_tol, primitive_cell=True, scale=True, attempt_supercell=False, allow_subset=False, comparator=ElementComparator()) def get_sg(struc): return struc.get_space_group_info(symprec=0.1)[1] # First group by spacegroup number then by structure matching for _, pregroup in groupby(sorted(structures, key=get_sg), key=get_sg): for group in sm.group_structures(sorted(pregroup, key=get_sg)): yield group
def add_snl(mat, snl=None): mat["snl"] = snl.get("snl", None) mat["snl_final"] = snl.get("snl", None) mat["created_at"] = get(snl, "snl.about.created_at") if has( snl, "snl.about.created_at") else None mat["icsd_ids"] = snl.get("icsd_ids", [])
def substitute(d, aliases): for alias, key in aliases.items(): if has(d, key): set_(d, alias, get(d, key)) unset(d, key)
def run_task(self, fw_spec): # get the directory that contains the VASP dir to parse calc_dir = os.getcwd() if "calc_dir" in self: calc_dir = self["calc_dir"] elif self.get("calc_loc"): calc_dir = get_calc_loc(self["calc_loc"], fw_spec["calc_locs"])["path"] # parse the VASP directory logger.info("PARSING DIRECTORY: {}".format(calc_dir)) drone = VaspDrone(additional_fields=self.get("additional_fields"), parse_dos=self.get("parse_dos", False), bandstructure_mode=self.get("bandstructure_mode", False), parse_chgcar=self.get("parse_chgcar", False), parse_aeccar=self.get("parse_aeccar", False)) # assimilate (i.e., parse) task_doc = drone.assimilate(calc_dir) # Check for additional keys to set based on the fw_spec if self.get("fw_spec_field"): task_doc.update(fw_spec[self.get("fw_spec_field")]) # get the database connection db_file = env_chk(self.get('db_file'), fw_spec) # db insertion or taskdoc dump if not db_file: with open("task.json", "w") as f: f.write(json.dumps(task_doc, default=DATETIME_HANDLER)) else: mmdb = VaspCalcDb.from_db_file(db_file, admin=True) t_id = mmdb.insert_task( task_doc, use_gridfs=self.get("parse_dos", False) or bool(self.get("bandstructure_mode", False)) or self.get("parse_chgcar", False) or self.get("parse_aeccar", False)) logger.info("Finished parsing with task_id: {}".format(t_id)) defuse_children = False if task_doc["state"] != "successful": defuse_unsuccessful = self.get("defuse_unsuccessful", DEFUSE_UNSUCCESSFUL) if defuse_unsuccessful is True: defuse_children = True elif defuse_unsuccessful is False: pass elif defuse_unsuccessful == "fizzle": raise RuntimeError( "VaspToDb indicates that job is not successful " "(perhaps your job did not converge within the " "limit of electronic/ionic iterations)!") else: raise RuntimeError("Unknown option for defuse_unsuccessful: " "{}".format(defuse_unsuccessful)) task_fields_to_push = self.get("task_fields_to_push", None) update_spec = {} if task_fields_to_push: if isinstance(task_fields_to_push, dict): for key, path_in_task_doc in task_fields_to_push.items(): if has(task_doc, path_in_task_doc): update_spec[key] = get(task_doc, path_in_task_doc) else: logger.warn( "Could not find {} in task document. Unable to push to next firetask/firework" .format(path_in_task_doc)) else: raise RuntimeError( "Inappropriate type {} for task_fields_to_push. It must be a " "dictionary of format: {key: path} where key refers to a field " "in the spec and path is a full mongo-style path to a " "field in the task document".format( type(task_fields_to_push))) return FWAction(stored_data={"task_id": task_doc.get("task_id", None)}, defuse_children=defuse_children, update_spec=update_spec)
def run_task(self, fw_spec): # get the directory that contains the VASP dir to parse calc_dir = os.getcwd() if "calc_dir" in self: calc_dir = self["calc_dir"] elif self.get("calc_loc"): calc_dir = get_calc_loc(self["calc_loc"], fw_spec["calc_locs"])["path"] # parse the VASP directory logger.info("PARSING DIRECTORY: {}".format(calc_dir)) drone = VaspDrone(additional_fields=self.get("additional_fields"), parse_dos=self.get("parse_dos", False), bandstructure_mode=self.get("bandstructure_mode", False), parse_chgcar=self.get("parse_chgcar", False), parse_aeccar=self.get("parse_aeccar", False)) # assimilate (i.e., parse) task_doc = drone.assimilate(calc_dir) # Check for additional keys to set based on the fw_spec if self.get("fw_spec_field"): task_doc.update(fw_spec[self.get("fw_spec_field")]) # get the database connection db_file = env_chk(self.get('db_file'), fw_spec) # db insertion or taskdoc dump if not db_file: with open("task.json", "w") as f: f.write(json.dumps(task_doc, default=DATETIME_HANDLER)) else: mmdb = VaspCalcDb.from_db_file(db_file, admin=True) t_id = mmdb.insert_task( task_doc, use_gridfs=self.get("parse_dos", False) or bool(self.get("bandstructure_mode", False)) or self.get("parse_chgcar", False) or self.get("parse_aeccar", False)) logger.info("Finished parsing with task_id: {}".format(t_id)) defuse_children = False if task_doc["state"] != "successful": defuse_unsuccessful = self.get("defuse_unsuccessful", DEFUSE_UNSUCCESSFUL) if defuse_unsuccessful is True: defuse_children = True elif defuse_unsuccessful is False: pass elif defuse_unsuccessful == "fizzle": raise RuntimeError( "VaspToDb indicates that job is not successful " "(perhaps your job did not converge within the " "limit of electronic/ionic iterations)!") else: raise RuntimeError("Unknown option for defuse_unsuccessful: " "{}".format(defuse_unsuccessful)) task_fields_to_push = self.get("task_fields_to_push", None) update_spec = {} if task_fields_to_push: if isinstance(task_fields_to_push, dict): for key, path_in_task_doc in task_fields_to_push.items(): if has(task_doc, path_in_task_doc): update_spec[key] = get(task_doc, path_in_task_doc) else: logger.warn("Could not find {} in task document. Unable to push to next firetask/firework".format(path_in_task_doc)) else: raise RuntimeError("Inappropriate type {} for task_fields_to_push. It must be a " "dictionary of format: {key: path} where key refers to a field " "in the spec and path is a full mongo-style path to a " "field in the task document".format(type(task_fields_to_push))) return FWAction(stored_data={"task_id": task_doc.get("task_id", None)}, defuse_children=defuse_children, update_spec=update_spec)