def run_task(self, fw_spec): # get the directory that contains the dir to parse calc_dir = os.getcwd() if "calc_dir" in self: calc_dir = self["calc_dir"] elif self.get("calc_loc"): calc_dir = get_calc_loc(self["calc_loc"], fw_spec["calc_locs"])["path"] # parse the calc directory logger.info("PARSING DIRECTORY: {} USING DRONE: {}".format( calc_dir, self['drone'].__class__.__name__)) # get the database connection db_file = env_chk(self.get('db_file'), fw_spec) drone = self['drone'].__class__() task_doc = drone.assimilate(calc_dir) if not db_file: with open("task.json", "w") as f: f.write(json.dumps(task_doc, default=DATETIME_HANDLER)) else: mmdb = self["mmdb"] db = mmdb.__class__.from_db_file(db_file) # insert the task document t_id = db.insert(task_doc) logger.info("Finished parsing with task_id: {}".format(t_id)) return FWAction(stored_data={"task_id": task_doc.get("task_id", None)}, defuse_children=(task_doc["state"] != "successful"))
def test_passcalclocs(self): fw1 = Firework([PassCalcLocs(name="fw1")], name="fw1") fw2 = Firework([PassCalcLocs(name="fw2")], name="fw2", parents=fw1) fw3 = Firework([PassCalcLocs(name="fw3")], name="fw3", parents=fw2) wf = Workflow([fw1, fw2, fw3]) self.lp.add_wf(wf) rapidfire(self.lp) fw2 = self.lp.get_fw_by_id(self.lp.get_fw_ids({"name": "fw2"})[0]) fw3 = self.lp.get_fw_by_id(self.lp.get_fw_ids({"name": "fw3"})[0]) self.assertEqual(len(fw2.spec["calc_locs"]), 1) self.assertEqual(len(fw3.spec["calc_locs"]), 2) self.assertEqual(fw3.spec["calc_locs"][0]["name"], "fw1") self.assertEqual(fw3.spec["calc_locs"][1]["name"], "fw2") self.assertNotEqual(fw3.spec["calc_locs"][0]["path"], fw3.spec["calc_locs"][1]["path"]) calc_locs = fw3.spec["calc_locs"] self.assertEqual(get_calc_loc("fw1", calc_locs), calc_locs[0]) self.assertEqual(get_calc_loc("fw2", calc_locs), calc_locs[1]) self.assertEqual(get_calc_loc(True, calc_locs), calc_locs[1])
def run_task(self, fw_spec): lammps_input = self["lammps_input"] diffusion_params = self.get("diffusion_params", {}) # get the directory that contains the LAMMPS dir to parse calc_dir = os.getcwd() if "calc_dir" in self: calc_dir = self["calc_dir"] elif self.get("calc_loc"): calc_dir = get_calc_loc(self["calc_loc"], fw_spec["calc_locs"])["path"] # parse the directory logger.info("PARSING DIRECTORY: {}".format(calc_dir)) d = {} d["dir_name"] = os.path.abspath(os.getcwd()) d["last_updated"] = datetime.today() d["input"] = lammps_input.as_dict() log_file = lammps_input.config_dict["log"] if isinstance(lammps_input.config_dict["dump"], list): dump_file = lammps_input.config_dict["dump"][0].split()[4] else: dump_file = lammps_input.config_dict["dump"].split()[4] is_forcefield = hasattr(lammps_input.lammps_data, "bonds_data") lammpsrun = LammpsRun(lammps_input.data_filename, dump_file, log_file, is_forcefield=is_forcefield) d["natoms"] = lammpsrun.natoms d["nmols"] = lammpsrun.nmols d["box_lengths"] = lammpsrun.box_lengths d["mol_masses"] = lammpsrun.mol_masses d["mol_config"] = lammpsrun.mol_config if diffusion_params: diffusion_analyzer = lammpsrun.get_diffusion_analyzer(**diffusion_params) d["analysis"]["diffusion"] = diffusion_analyzer.get_summary_dict() db_file = env_chk(self.get('db_file'), fw_spec) # db insertion if not db_file: with open("task.json", "w") as f: f.write(json.dumps(d, default=DATETIME_HANDLER)) else: mmdb = MMLammpsDb.from_db_file(db_file) # insert the task document t_id = mmdb.insert(d) logger.info("Finished parsing with task_id: {}".format(t_id)) return FWAction(stored_data={"task_id": d.get("task_id", None)})
def run_task(self, fw_spec): calc_dir = os.getcwd() if "calc_dir" in self: calc_dir = self["calc_dir"] elif self.get("calc_loc"): calc_dir = get_calc_loc(self["calc_loc"], fw_spec["calc_locs"])["path"] logger.info("PARSING DIRECTORY: {}".format(calc_dir)) db_file = env_chk(self.get('db_file'), fw_spec) cluster_dict = None tags = Tags.from_file(filename="feff.inp") if "RECIPROCAL" not in tags: cluster_dict = Atoms.cluster_from_file("feff.inp").as_dict() doc = {"input_parameters": tags.as_dict(), "cluster": cluster_dict, "structure": self["structure"].as_dict(), "absorbing_atom": self["absorbing_atom"], "spectrum_type": self["spectrum_type"], "spectrum": np.loadtxt(os.path.join(calc_dir, self["output_file"])).tolist(), "edge": self.get("edge", None), "metadata": self.get("metadata", None), "dir_name": os.path.abspath(os.getcwd()), "last_updated": datetime.today()} if not db_file: with open("feff_task.json", "w") as f: f.write(json.dumps(doc, default=DATETIME_HANDLER)) # db insertion else: db = MMFeffDb.from_db_file(db_file, admin=True) db.insert(doc) logger.info("Finished parsing the spectrum") return FWAction(stored_data={"task_id": doc.get("task_id", None)})
def run_task(self, fw_spec): if self.get("calc_dir"): # direct setting of calc dir - no calc_locs or filesystem! calc_dir = self["calc_dir"] filesystem = None elif self.get("calc_loc"): # search for calc dir and filesystem within calc_locs calc_loc = get_calc_loc(self["calc_loc"], fw_spec["calc_locs"]) calc_dir = calc_loc["path"] filesystem = calc_loc["filesystem"] else: raise ValueError("Must specify either calc_dir or calc_loc!") fileclient = FileClient(filesystem=filesystem) calc_dir = fileclient.abspath(calc_dir) contcar_to_poscar = self.get("contcar_to_poscar", True) all_files = fileclient.listdir(calc_dir) # determine what files need to be copied if "$ALL" in self.get("additional_files", []): files_to_copy = all_files else: files_to_copy = ['INCAR', 'POSCAR', 'KPOINTS', 'POTCAR', 'OUTCAR', 'vasprun.xml'] if self.get("additional_files"): files_to_copy.extend(self["additional_files"]) if contcar_to_poscar and "CONTCAR" not in files_to_copy: files_to_copy.append("CONTCAR") files_to_copy = [f for f in files_to_copy if f != 'POSCAR'] # remove POSCAR # start file copy for f in files_to_copy: prev_path_full = os.path.join(calc_dir, f) # prev_path = os.path.join(os.path.split(calc_dir)[1], f) dest_fname = 'POSCAR' if f == 'CONTCAR' and contcar_to_poscar else f dest_path = os.path.join(os.getcwd(), dest_fname) relax_ext = "" relax_paths = sorted(fileclient.glob(prev_path_full+".relax*"), reverse=True) if relax_paths: if len(relax_paths) > 9: raise ValueError("CopyVaspOutputs doesn't properly handle >9 relaxations!") m = re.search('\.relax\d*', relax_paths[0]) relax_ext = m.group(0) # detect .gz extension if needed - note that monty zpath() did not seem useful here gz_ext = "" if not (f + relax_ext) in all_files: for possible_ext in [".gz", ".GZ"]: if (f + relax_ext + possible_ext) in all_files: gz_ext = possible_ext if not (f + relax_ext + gz_ext) in all_files: raise ValueError("Cannot find file: {}".format(f)) # copy the file (minus the relaxation extension) fileclient.copy(prev_path_full + relax_ext + gz_ext, dest_path + gz_ext) # unzip the .gz if needed if gz_ext in ['.gz', ".GZ"]: # unzip dest file f = gzip.open(dest_path + gz_ext, 'rt') file_content = f.read() with open(dest_path, 'w') as f_out: f_out.writelines(file_content) f.close() os.remove(dest_path + gz_ext)
def run_task(self, fw_spec): # get the directory that contains the VASP dir to parse calc_dir = os.getcwd() if "calc_dir" in self: calc_dir = self["calc_dir"] elif self.get("calc_loc"): calc_dir = get_calc_loc(self["calc_loc"], fw_spec["calc_locs"])["path"] # parse the VASP directory logger.info("PARSING DIRECTORY: {}".format(calc_dir)) # get the database connection db_file = env_chk(self.get('db_file'), fw_spec) drone = VaspDrone(additional_fields=self.get("additional_fields"), parse_dos=self.get("parse_dos", False), compress_dos=1, bandstructure_mode=self.get("bandstructure_mode", False), compress_bs=1) # assimilate (i.e., parse) task_doc = drone.assimilate(calc_dir) # Check for additional fields to add in the fw_spec if self.get("fw_spec_field"): task_doc.update(fw_spec[self.get("fw_spec_field")]) # db insertion if not db_file: with open("task.json", "w") as f: f.write(json.dumps(task_doc, default=DATETIME_HANDLER)) else: mmdb = MMVaspDb.from_db_file(db_file, admin=True) # insert dos into GridFS if self.get("parse_dos") and "calcs_reversed" in task_doc: for idx, x in enumerate(task_doc["calcs_reversed"]): if "dos" in task_doc["calcs_reversed"][idx]: if idx == 0: # only store most recent DOS dos = json.dumps(task_doc["calcs_reversed"][idx]["dos"], cls=MontyEncoder) gfs_id, compression_type = mmdb.insert_gridfs(dos, "dos_fs") task_doc["calcs_reversed"][idx]["dos_compression"] = compression_type task_doc["calcs_reversed"][idx]["dos_fs_id"] = gfs_id del task_doc["calcs_reversed"][idx]["dos"] # insert band structure into GridFS if self.get("bandstructure_mode") and "calcs_reversed" in task_doc: for idx, x in enumerate(task_doc["calcs_reversed"]): if "bandstructure" in task_doc["calcs_reversed"][idx]: if idx == 0: # only store most recent band structure bs = json.dumps(task_doc["calcs_reversed"][idx]["bandstructure"], cls=MontyEncoder) gfs_id, compression_type = mmdb.insert_gridfs(bs, "bandstructure_fs") task_doc["calcs_reversed"][idx]["bandstructure_compression"] = compression_type task_doc["calcs_reversed"][idx]["bandstructure_fs_id"] = gfs_id del task_doc["calcs_reversed"][idx]["bandstructure"] # insert the task document t_id = mmdb.insert(task_doc) logger.info("Finished parsing with task_id: {}".format(t_id)) if self.get("defuse_unsuccessful", True): defuse_children = (task_doc["state"] != "successful") else: defuse_children = False return FWAction(stored_data={"task_id": task_doc.get("task_id", None)}, defuse_children=defuse_children)