def run_task(self, fw_spec): additional_fields = self.get("additional_fields", {}) # pass the additional_fields first to avoid overriding BoltztrapAnalyzer items d = additional_fields.copy() btrap_dir = os.path.join(os.getcwd(), "boltztrap") d["boltztrap_dir"] = btrap_dir bta = BoltztrapAnalyzer.from_files(btrap_dir) d.update(bta.as_dict()) d["scissor"] = bta.intrans["scissor"] # trim the output for x in ['cond', 'seebeck', 'kappa', 'hall', 'mu_steps', 'mu_doping', 'carrier_conc']: del d[x] if not self.get("hall_doping"): del d["hall_doping"] bandstructure_dir = os.getcwd() d["bandstructure_dir"] = bandstructure_dir # add the structure v, o = get_vasprun_outcar(bandstructure_dir, parse_eigen=False, parse_dos=False) structure = v.final_structure d["structure"] = structure.as_dict() d["formula_pretty"] = structure.composition.reduced_formula d.update(get_meta_from_structure(structure)) # add the spacegroup sg = SpacegroupAnalyzer(Structure.from_dict(d["structure"]), 0.1) d["spacegroup"] = {"symbol": sg.get_space_group_symbol(), "number": sg.get_space_group_number(), "point_group": sg.get_point_group_symbol(), "source": "spglib", "crystal_system": sg.get_crystal_system(), "hall": sg.get_hall()} d["created_at"] = datetime.utcnow() db_file = env_chk(self.get('db_file'), fw_spec) if not db_file: del d["dos"] with open(os.path.join(btrap_dir, "boltztrap.json"), "w") as f: f.write(json.dumps(d, default=DATETIME_HANDLER)) else: mmdb = VaspCalcDb.from_db_file(db_file, admin=True) # dos gets inserted into GridFS dos = json.dumps(d["dos"], cls=MontyEncoder) fsid, compression = mmdb.insert_gridfs(dos, collection="dos_boltztrap_fs", compress=True) d["dos_boltztrap_fs_id"] = fsid del d["dos"] mmdb.db.boltztrap.insert(d)
def run_task(self, fw_spec): additional_fields = self.get("additional_fields", {}) # pass the additional_fields first to avoid overriding BoltztrapAnalyzer items d = additional_fields.copy() btrap_dir = os.path.join(os.getcwd(), "boltztrap") d["boltztrap_dir"] = btrap_dir bta = BoltztrapAnalyzer.from_files(btrap_dir) d.update(bta.as_dict()) d["scissor"] = bta.intrans["scissor"] # trim the output for x in ['cond', 'seebeck', 'kappa', 'hall', 'mu_steps', 'mu_doping', 'carrier_conc']: del d[x] if not self.get("hall_doping"): del d["hall_doping"] bandstructure_dir = os.getcwd() d["bandstructure_dir"] = bandstructure_dir # add the structure v, o = get_vasprun_outcar(bandstructure_dir, parse_eigen=False, parse_dos=False) structure = v.final_structure d["structure"] = structure.as_dict() d["formula_pretty"] = structure.composition.reduced_formula d.update(get_meta_from_structure(structure)) # add the spacegroup sg = SpacegroupAnalyzer(Structure.from_dict(d["structure"]), 0.1) d["spacegroup"] = {"symbol": sg.get_space_group_symbol(), "number": sg.get_space_group_number(), "point_group": sg.get_point_group_symbol(), "source": "spglib", "crystal_system": sg.get_crystal_system(), "hall": sg.get_hall()} d["created_at"] = datetime.utcnow() db_file = env_chk(self.get('db_file'), fw_spec) if not db_file: del d["dos"] with open(os.path.join(btrap_dir, "boltztrap.json"), "w") as f: f.write(json.dumps(d, default=DATETIME_HANDLER)) else: mmdb = VaspCalcDb.from_db_file(db_file, admin=True) # dos gets inserted into GridFS dos = json.dumps(d["dos"], cls=MontyEncoder) fsid, compression = mmdb.insert_gridfs(dos, collection="dos_boltztrap_fs", compress=True) d["dos_boltztrap_fs_id"] = fsid del d["dos"] mmdb.db.boltztrap.insert(d)
def add_wf_metadata(original_wf, structure): """ Adds structure metadata to a workflow Args: original_wf: (Workflow) structure: (Structure) the structure being run by this workflow Returns: Workflow """ original_wf.metadata["structure"] = structure.as_dict() original_wf.metadata.update(get_meta_from_structure(structure)) return original_wf
def add_wf_metadata(original_wf, structure): """ Adds structure metadata to a workflow Args: original_wf: (Workflow) structure: (Structure) the structure being run by this workflow Returns: """ original_wf.metadata["structure"] = structure.as_dict() original_wf.metadata.update(get_meta_from_structure(structure)) return update_wf(original_wf)
def process_item(self, item): """ Calculates all basic descriptors for the structures Args: item (dict): a dict with a task_id and a structure Returns: dict: a basic-descriptors dict """ self.logger.debug("Calculating basic descriptors for {}".format( item[self.materials.key])) struct = Structure.from_dict(item['structure']) descr_doc = {'structure': struct.copy()} descr_doc['meta'] = {'atomate': get_meta_from_structure(struct)} try: comp_descr = [{'name': 'magpie'}] labels = self.cds["magpie"].feature_labels() values = self.cds["magpie"].featurize(struct.composition) for label, value in zip(labels, values): comp_descr[0][label] = value descr_doc['composition_descriptors'] = comp_descr except Exception as e: self.logger.error("Failed getting Magpie descriptors: " "{}".format(e)) descr_doc['site_descriptors'] = \ self.get_site_descriptors_from_struct( descr_doc['structure']) descr_doc['statistics'] = \ self.get_statistics( descr_doc['site_descriptors']) descr_doc[self.descriptors.key] = item[self.materials.key] return descr_doc
def get_slab_fw(slab, transmuter=False, db_file=None, vasp_input_set=None, parents=None, vasp_cmd="vasp", name="", add_slab_metadata=True, user_incar_settings=None): """ Function to generate a a slab firework. Returns a TransmuterFW if bulk_structure is specified, constructing the necessary transformations from the slab and slab generator parameters, or an OptimizeFW if only a slab is specified. Args: slab (Slab or Structure): structure or slab corresponding to the slab to be calculated transmuter (bool): whether or not to use a TransmuterFW based on slab params, if this option is selected, input slab must be a Slab object (as opposed to Structure) vasp_input_set (VaspInputSet): vasp_input_set corresponding to the slab calculation parents (Fireworks or list of ints): parent FWs db_file (string): path to database file vasp_cmd (string): vasp command name (string): name of firework add_slab_metadata (bool): whether to add slab metadata to task doc Returns: Firework corresponding to slab calculation """ vasp_input_set = vasp_input_set or MPSurfaceSet( slab, user_incar_settings=user_incar_settings) # If a bulk_structure is specified, generate the set of transformations, # else just create an optimize FW with the slab if transmuter: if not isinstance(slab, Slab): raise ValueError( "transmuter mode requires slab to be a Slab object") # Get transformation from oriented bulk and slab oriented_bulk = slab.oriented_unit_cell slab_trans_params = get_slab_trans_params(slab) trans_struct = SlabTransformation(**slab_trans_params) slab_from_bulk = trans_struct.apply_transformation(oriented_bulk) # Ensures supercell construction supercell_trans = SupercellTransformation.from_scaling_factors( round(slab.lattice.a / slab_from_bulk.lattice.a), round(slab.lattice.b / slab_from_bulk.lattice.b)) # Get site properties, set velocities to zero if not set to avoid # custodian issue site_props = slab.site_properties if 'velocities' not in site_props: site_props['velocities'] = [0. for s in slab] # Get adsorbates for InsertSitesTransformation if "adsorbate" in slab.site_properties.get("surface_properties", ""): ads_sites = [ site for site in slab if site.properties["surface_properties"] == "adsorbate" ] else: ads_sites = [] transformations = [ "SlabTransformation", "SupercellTransformation", "InsertSitesTransformation", "AddSitePropertyTransformation" ] trans_params = [ slab_trans_params, { "scaling_matrix": supercell_trans.scaling_matrix }, { "species": [site.species_string for site in ads_sites], "coords": [site.frac_coords for site in ads_sites] }, { "site_properties": site_props } ] fw = TransmuterFW(name=name, structure=oriented_bulk, transformations=transformations, transformation_params=trans_params, copy_vasp_outputs=True, db_file=db_file, vasp_cmd=vasp_cmd, parents=parents, vasp_input_set=vasp_input_set) else: fw = OptimizeFW(name=name, structure=slab, vasp_input_set=vasp_input_set, vasp_cmd=vasp_cmd, db_file=db_file, parents=parents, job_type="normal") # Add slab metadata if add_slab_metadata: parent_structure_metadata = get_meta_from_structure( slab.oriented_unit_cell) fw.tasks[-1]["additional_fields"].update({ "slab": slab, "parent_structure": slab.oriented_unit_cell, "parent_structure_metadata": parent_structure_metadata }) return fw
def run_task(self, fw_spec): vasp_calc_dir = self.get("calc_dir", None) vasp_calc_loc = ( get_calc_loc(self["calc_loc"], fw_spec["calc_locs"]) if self.get("calc_loc") else {} ) # get the directory that contains the Lobster dir to parse current_dir = os.getcwd() # parse the Lobster directory logger.info("PARSING DIRECTORY: {}".format(current_dir)) task_doc = {} struct = Structure.from_file(self._find_gz_file("POSCAR")) Lobsterout_here = Lobsterout(self._find_gz_file("lobsterout")) task_doc["output"] = Lobsterout_here.get_doc() Lobsterin_here = Lobsterin.from_file(self._find_gz_file("lobsterin")) task_doc["input"] = Lobsterin_here try: Lobsterin_orig = Lobsterin.from_file(self._find_gz_file("lobsterin.orig")) task_doc["orig_input"] = Lobsterin_orig except ValueError: pass # save custodian details if os.path.exists("custodian.json"): task_doc["custodian"] = loadfn("custodian.json") additional_fields = self.get("additional_fields", {}) if additional_fields: task_doc.update(additional_fields) task_doc.update(get_meta_from_structure(struct)) if vasp_calc_dir != None: task_doc["vasp_dir_name"] = vasp_calc_dir else: task_doc["vasp_dir_name"] = vasp_calc_loc["path"] task_doc["dir_name"] = current_dir # Check for additional keys to set based on the fw_spec if self.get("fw_spec_field"): task_doc.update(fw_spec[self.get("fw_spec_field")]) task_doc["state"] = "successful" task_doc = jsanitize(task_doc) # get the database connection db_file = env_chk(self.get("db_file"), fw_spec) # db insertion or taskdoc dump if not db_file: with open("task_lobster.json", "w") as f: f.write(json.dumps(task_doc, default=DATETIME_HANDLER)) else: db = VaspCalcDb.from_db_file(db_file, admin=True) db.collection = db.db["lobster"] additional_outputs = self.get("additional_outputs", None) if additional_outputs: for filename in additional_outputs: fs_id = None if os.path.isfile(filename): fs_id = put_file_in_gridfs( filename, db, collection_name="lobster_files", compress=True ) elif os.path.isfile(filename + ".gz"): fs_id = put_file_in_gridfs( filename + ".gz", db, collection_name="lobster_files", compress=False, compression_type="zlib", ) if fs_id: key_name = filename.split(".")[0].lower() + "_id" task_doc[key_name] = fs_id db.insert(task_doc) return FWAction()
def _assimilate_from_cif(self, cif_path): # capture any warnings generated by parsing cif file file_ID = cif_path.split('/')[-1].split(".")[0] cif_meta = {} with warnings.catch_warnings(record=True) as w: cif_parser = CifParser(cif_path) for warn in w: if 'cifwarnings' in cif_meta: cif_meta['cifwarnings'].append(str(warn.message)) else: cif_meta['cifwarnings'] = [str(warn.message)] logger.warning('{}: {}'.format(file_ID, warn.message)) cif_dict = cif_parser.as_dict() orig_id = list(cif_dict.keys())[0] easy_dict = cif_dict[orig_id] if '_chemical_name_mineral' in easy_dict: cif_meta['min_name'] = easy_dict['_chemical_name_mineral'] if '_chemical_name_systematic' in easy_dict: cif_meta['chem_name'] = easy_dict['_chemical_name_systematic'] if '_cell_measurement_pressure' in easy_dict: cif_meta['pressure'] = float( easy_dict['_cell_measurement_pressure']) / 1000 else: cif_meta['pressure'] = .101325 with warnings.catch_warnings(record=True) as w: try: struc = cif_parser.get_structures()[0] except ValueError as err: # if cif parsing raises error, write icsd_id to Error_Record and do NOT add structure to mongo database logger.error( file_ID + ': {}'.format(err) + "\nDid not insert structure into Mongo Collection") with open('Error_Record', 'a') as err_rec: err_rec.write(str(file_ID) + ': {}\n'.format(err)) err_rec.close() else: references = self.bibtex_from_cif(cif_path) history = [{ 'name': 'ICSD', 'url': 'https://icsd.fiz-karlsruhe.de/', 'description': { 'id': file_ID } }] cif_meta['references'] = references cif_meta['history'] = history atomate_meta = get_meta_from_structure(struc) # data['nsites'] = meta['nsites'] # data['elements'] = meta['elements'] # data['nelements'] = meta['nelements'] # data['formula'] = meta['formula'] # data['formula_reduced'] = meta['formula_pretty'] # data['formula_reduced_abc'] = meta['formula_reduced_abc'] # data['formula_anonymous'] = meta['formula_anonymous'] # data['chemsys'] = meta['chemsys'] # data['is_valid'] = meta['is_valid'] # data['is_ordered'] = meta['is_ordered'] # unfortunately any warnings are logged after any errors. Not too big of an issue for warn in w: if 'cifwarnings' in cif_meta: cif_meta['cifwarnings'].append(str(warn.message)) else: cif_meta['cifwarnings'] = [str(warn.message)] logger.warning('{}: {}'.format(file_ID, warn.message)) return (struc, cif_meta, atomate_meta)
def assimilate(self, path, dbhost='localhost', dbport=27017, dbname='ICSD', collection_name='ICSD_files', store_mongo=True): """ Assimilate data in a directory path into a pymatgen object. Because of the quirky nature of Python"s multiprocessing, the object must support pymatgen's as_dict() for parallel processing. Args: path: directory path Returns: An assimilated object """ if store_mongo: client = MongoClient(dbhost, dbport) db = client[dbname] col = db[collection_name] data = {} files = os.listdir(path) file_ID = path.split('/')[-1] print(file_ID) data['icsd_id'] = int(file_ID) #data['cifwarnings'] = [] cif_path = os.path.join(path, file_ID + '.cif') # capture any warnings generated by parsing cif file with warnings.catch_warnings(record=True) as w: cif_parser = CifParser(cif_path) for warn in w: if 'cifwarnings' in data: data['cifwarnings'].append(str(warn.message)) else: data['cifwarnings'] = [str(warn.message)] logger.warning('{}: {}'.format(file_ID, warn.message)) cif_dict = cif_parser.as_dict() orig_id = list(cif_dict.keys())[0] easy_dict = cif_dict[orig_id] if '_chemical_name_mineral' in easy_dict: data['min_name'] = easy_dict['_chemical_name_mineral'] if '_chemical_name_systematic' in easy_dict: data['chem_name'] = easy_dict['_chemical_name_systematic'] if '_cell_measurement_pressure' in easy_dict: data['pressure'] = float( easy_dict['_cell_measurement_pressure']) / 1000 else: data['pressure'] = .101325 with warnings.catch_warnings(record=True) as w: try: struc = cif_parser.get_structures()[0] except ValueError as err: # if cif parsing raises error, write icsd_id to Error_Record and do NOT add structure to mongo database logger.error( file_ID + ': {}'.format(err) + "\nDid not insert structure into Mongo Collection") with open('Error_Record', 'a') as err_rec: err_rec.write(str(file_ID) + ': {}\n'.format(err)) err_rec.close() else: authors = 'Donny Winston<*****@*****.**>, Joseph Palakapilly<*****@*****.**>' references = self.bibtex_from_cif(cif_path) history = [{ 'name': 'ICSD', 'url': 'https://icsd.fiz-karlsruhe.de/', 'description': { 'icsd_id': file_ID } }] snl = StructureNL(struc, authors=authors, references=references, history=history) data['snl'] = snl.as_dict() meta = get_meta_from_structure(struc) data['nsites'] = meta['nsites'] data['elements'] = meta['elements'] data['nelements'] = meta['nelements'] data['formula'] = meta['formula'] data['formula_reduced'] = meta['formula_pretty'] data['formula_reduced_abc'] = meta['formula_reduced_abc'] data['formula_anonymous'] = meta['formula_anonymous'] data['chemsys'] = meta['chemsys'] data['is_valid'] = meta['is_valid'] data['is_ordered'] = meta['is_ordered'] #unfortunately any warnings are logged after any errors. Not too big of an issue for warn in w: if 'cifwarnings' in data: data['cifwarnings'].append(str(warn.message)) else: data['cifwarnings'] = [str(warn.message)] logger.warning('{}: {}'.format(file_ID, warn.message)) if 'snl' in data: if store_mongo: col.update_one({'icsd_id': int(file_ID)}, {'$set': data}, upsert=True) return data
def get_slab_fw(slab, transmuter=False, db_file=None, vasp_input_set=None, parents=None, vasp_cmd="vasp", name="", add_slab_metadata=True): """ Function to generate a a slab firework. Returns a TransmuterFW if bulk_structure is specified, constructing the necessary transformations from the slab and slab generator parameters, or an OptimizeFW if only a slab is specified. Args: slab (Slab or Structure): structure or slab corresponding to the slab to be calculated transmuter (bool): whether or not to use a TransmuterFW based on slab params, if this option is selected, input slab must be a Slab object (as opposed to Structure) vasp_input_set (VaspInputSet): vasp_input_set corresponding to the slab calculation parents (Fireworks or list of ints): parent FWs db_file (string): path to database file vasp_cmd (string): vasp command name (string): name of firework add_slab_metadata (bool): whether to add slab metadata to task doc Returns: Firework corresponding to slab calculation """ vasp_input_set = vasp_input_set or MPSurfaceSet(slab) # If a bulk_structure is specified, generate the set of transformations, # else just create an optimize FW with the slab if transmuter: if not isinstance(slab, Slab): raise ValueError("transmuter mode requires slab to be a Slab object") # Get transformation from oriented bulk and slab oriented_bulk = slab.oriented_unit_cell slab_trans_params = get_slab_trans_params(slab) trans_struct = SlabTransformation(**slab_trans_params) slab_from_bulk = trans_struct.apply_transformation(oriented_bulk) # Ensures supercell construction supercell_trans = SupercellTransformation.from_scaling_factors( round(slab.lattice.a / slab_from_bulk.lattice.a), round(slab.lattice.b / slab_from_bulk.lattice.b)) # Get site properties, set velocities to zero if not set to avoid # custodian issue site_props = slab.site_properties if 'velocities' not in site_props: site_props['velocities'] = [0. for s in slab] # Get adsorbates for InsertSitesTransformation if "adsorbate" in slab.site_properties.get("surface_properties", ""): ads_sites = [site for site in slab if site.properties["surface_properties"] == "adsorbate"] else: ads_sites = [] transformations = [ "SlabTransformation", "SupercellTransformation", "InsertSitesTransformation", "AddSitePropertyTransformation"] trans_params = [slab_trans_params, {"scaling_matrix": supercell_trans.scaling_matrix}, {"species": [site.species_string for site in ads_sites], "coords": [site.frac_coords for site in ads_sites]}, {"site_properties": site_props}] fw = TransmuterFW(name=name, structure=oriented_bulk, transformations=transformations, transformation_params=trans_params, copy_vasp_outputs=True, db_file=db_file, vasp_cmd=vasp_cmd, parents=parents, vasp_input_set=vasp_input_set) else: fw = OptimizeFW(name=name, structure=slab, vasp_input_set=vasp_input_set, vasp_cmd=vasp_cmd, db_file=db_file, parents=parents, job_type="normal") # Add slab metadata if add_slab_metadata: parent_structure_metadata = get_meta_from_structure( slab.oriented_unit_cell) fw.tasks[-1]["additional_fields"].update( {"slab": slab, "parent_structure": slab.oriented_unit_cell, "parent_structure_metadata": parent_structure_metadata}) return fw