def run_task(self, fw_spec): db_ids_lst = self["db_ids_lst"] ext_db = self.get("ext_db", None) if ext_db == None: ext_db = get_external_database(fw_spec["extdb_connect"]) workflow_id = fw_spec.get("workflow", {"_id": -1}).get("_id", -1) logging.debug(fw_spec) update_spec = fw_spec calc_ids = [] for db_id in db_ids_lst: simulation = ext_db["simulations"].find_one({"_id": int(db_id)}) simulation["nanoclusters"][0]["reference_id"] = int(db_id) simulation["source_id"] = db_id simulation["workflow_id"] = workflow_id dct = update_simulations_collection( extdb_connect=fw_spec["extdb_connect"], **simulation) # update internal workflow data simulation_id = dct["_id"] ## old update simulation internally. # update_spec["simulations"][str(simulation_id)] = dct calc_ids.append(simulation_id) # update temp workflow data update_spec["temp"]["calc_ids"] = calc_ids update_spec.pop("_category") update_spec.pop("name") return FWAction(update_spec=update_spec)
def run_task(self, fw_spec): ase_atoms_lst = self["ase_atoms_lst"] workflow_id = fw_spec.get("workflow", {"_id": -1}).get("_id", -1) logging.debug(fw_spec) update_spec = fw_spec calc_ids = [] for atoms_dict in ase_atoms_lst: atoms = atoms_dict_to_ase(atoms_dict) dct = atoms.info total_energy = dct.get( "E", dct.get( "energy", dct.get( "total_energy", dct.get("TotalEnergy", dct.get("totalenergy", "UNKNOWN"))))) if total_energy == "UNKNOWN": logging.warning( "total energy of cluster not specified in structure file") nanocluster_atom_ids = list(range(len(atoms))) db = get_external_database(fw_spec["extdb_connect"]) simulations = db['simulations'] # request id counter simulation_id = _query_id_counter_and_increment('simulations', db) nanocluster = { "atom_ids": nanocluster_atom_ids, "reference_id": simulation_id } dct = { "_id": simulation_id, "atoms": atoms_dict, "source_id": -1, "workflow_id": workflow_id, "nanoclusters": [nanocluster], "adsorbates": [], "substrates": [], "operations": [], "inp": {}, "output": { "total_energy": total_energy }, } # update external database simulations.insert_one(dct) calc_ids.append(simulation_id) # update temp workflow data update_spec["temp"]["calc_ids"] = calc_ids update_spec.pop("_category") update_spec.pop("name") return FWAction(update_spec=update_spec)
def run_task(self, fw_spec): workflow_id = fw_spec.get("workflow", {"_id": -1}).get("_id", -1) n_initial_configurations = self["n_initial_configurations"] n_configurations = self["n_configurations"] shape = self["shape"] nanocluster_size = self["nanocluster_size"] compositions = self["compositions"] elements = self["elements"] generate_pure_nanoclusters = self["generate_pure_nanoclusters"], bondlength_dct = self["bondlength_dct"] db = get_external_database(fw_spec["extdb_connect"]) simulations = db['simulations'] # generate clusters nanoclusters, calc_ids = self.generate( n_initial_configurations, n_configurations, shape, nanocluster_size, compositions, elements, generate_pure_nanoclusters=generate_pure_nanoclusters, bondlength_dct=bondlength_dct, db=db, workflow_id=workflow_id) # upload all simulations at once simulations.insert_many(nanoclusters) # fireworks update_spec = fw_spec update_spec["calc_ids"] = calc_ids update_spec.pop("_category") update_spec.pop("name") return FWAction(update_spec=update_spec)
def run_task(self, fw_spec): descriptor = self["descriptor"] descriptor_params = self["descriptor_params"] adsorbate_name = self["adsorbate_name"] adsite_types = self["adsite_types"] reference_energy = self["reference_energy"] calc_ids = fw_spec["temp"]["calc_ids"] simulations = fetch_simulations(fw_spec["extdb_connect"], calc_ids) workflow_id = fw_spec.get("workflow", {"_id" : -1 }).get("_id", -1) update_spec = fw_spec logging.debug(fw_spec) desc_lst = [] new_calc_ids = [] db = get_external_database(fw_spec["extdb_connect"]) # create reference of adsorbate in order to store its total energy # for later constructing adsorption energies reference_simulation = update_simulations_collection(extdb_connect = fw_spec["extdb_connect"], atoms = {}, source_id = -1, workflow_id = workflow_id, nanoclusters = [], adsorbates = [], substrates = [], operations = [""], inp = {"adsorbate_name" : adsorbate_name}, output = {"total_energy" : reference_energy},) reference_id = reference_simulation["_id"] all_atomtypes = gather_all_atom_types(calc_ids, simulations) # looping over nc atoms for idx, calc_id in enumerate(calc_ids): simulations_chunk_list = [] ## # get source simulation source_simulation = copy.deepcopy(simulations[str(calc_id)]) atoms_dict = source_simulation["atoms"] atoms = atoms_dict_to_ase(atoms_dict) logging.debug(atoms) # running cluskit on cluster cluster = cluskit.Cluster(atoms) cluster.get_surface_atoms() descriptor_setup = _setup_descriptor(all_atomtypes, descriptor, **descriptor_params) #descriptor_setup = dscribe.descriptors.SOAP(species = all_atomtypes, # nmax = 9, lmax = 6, rcut=5.0, crossover = True, sparse = False) cluster.descriptor_setup = descriptor_setup #looping over adsorption site type for adsite_type in adsite_types: if adsite_type == "top": adsite_type_int = 1 elif adsite_type == "bridge": adsite_type_int = 2 elif adsite_type == "hollow": adsite_type_int = 3 else: logging.error("adsorption site type unknown, known types are: top, bridge, hollow") exit(1) # get adsorption sites for a nanocluster adspos = cluster.get_sites(adsite_type_int) sites_surface_atoms = cluster.site_surface_atom_ids[adsite_type_int] # get descriptor desc = cluster.get_sites_descriptor(adsite_type_int) for i in range(desc.shape[0]): desc_lst.append(desc[i]) adsorbate_lst = adsorbate_pos_to_atoms_lst(adspos, adsorbate_name) #loop over each adsorbate for adsorbate, surface_atoms in zip(adsorbate_lst, sites_surface_atoms): #adsites_dict joint_atoms, cluster_ids, adsorbate_ids = join_cluster_adsorbate(atoms, adsorbate) joint_atoms_dict = ase_to_atoms_dict(joint_atoms) # update external database dct = copy.deepcopy(source_simulation) # calculation originated from this: dct["source_id"] = calc_id dct["workflow_id"] = workflow_id dct["atoms"] = joint_atoms_dict dct["operations"] = [dict({"add_adsorbate" : 1})] dct["adsorbates"].append(dict({"atom_ids" : adsorbate_ids, "reference_id" : reference_id, "site_class" : adsite_type, "site_ids" : surface_atoms.tolist()})) # empty previous input dct["inp"] = {} dct["inp"]["adsite_type"] = adsite_type dct["inp"]["adsorbate"] = adsorbate_name # empty previous output dct["output"] = {} dct["output"]["surface_atoms"] = surface_atoms.tolist() # getting only id for uploading simulations in chunks dct["_id"] = _query_id_counter_and_increment('simulations', db) simulations_chunk_list.append(dct) # update internal workflow data simulation_id = dct["_id"] new_calc_ids.append(simulation_id) db["simulations"].insert_many(simulations_chunk_list) descmatrix = np.array(desc_lst) # saves descmatrix as a path to a numpy array update_spec["temp"]["descmatrix"] = write_descmatrix(descmatrix) update_spec["temp"]["calc_ids"] = new_calc_ids update_spec.pop("_category") update_spec.pop("name") return FWAction(update_spec=update_spec)
def run_task(self, fw_spec): calc_analysis_ids_dict = fw_spec["temp"]["calc_analysis_ids_dict"] chunk_size = int(self["chunk_size"]) adsite_types = self["adsite_types"] n_calcs_started = int(fw_spec["n_calcs_started"]) calc_ids = fw_spec["temp"]["calc_ids"] # analysis ids becomes part of calc_ids analysis_ids = fw_spec["temp"]["analysis_ids"] n_calcs = len(calc_ids) reaction_energies_list = fw_spec["temp"].get( "property", np.zeros(n_calcs).tolist()) is_converged_list = fw_spec["temp"].get("is_converged_list", np.zeros(n_calcs).tolist()) is_same_site_list = fw_spec["temp"].get("is_same_site_list", np.zeros(n_calcs).tolist()) # reorder analysis_ids reordered_analysis_ids = [] for calc_id in calc_ids: if str(calc_id) in calc_analysis_ids_dict: analysis_id = calc_analysis_ids_dict[str(calc_id)] reordered_analysis_ids.append(analysis_id) analysis_ids = reordered_analysis_ids print(chunk_size, type(chunk_size)) if chunk_size == -1: calc_ids = analysis_ids id_range = range(len(calc_ids)) else: calc_ids[n_calcs_started - chunk_size:n_calcs_started] = analysis_ids id_range = range(n_calcs_started - chunk_size, n_calcs_started) calc_ids_chunk = analysis_ids simulations = fetch_simulations(fw_spec["extdb_connect"], calc_ids_chunk) logging.info("Gather Properties of following calculations:") logging.info(calc_ids_chunk) ext_db = get_external_database(fw_spec["extdb_connect"]) # compute reaction energy and store them as lists for ml print("id_range", id_range) for idx, calc_id in zip(id_range, calc_ids_chunk): simulation = simulations[str(calc_id)] structure = simulation["atoms"] # get closest site classified cluster_atoms, adsorbate_atoms, site_ids_list, site_class_list, reference_ids, adsorbate_ids = split_nanocluster_and_adsorbates( simulation) cluster = cluskit.Cluster(cluster_atoms) cluster.get_sites(-1) # assumes only one adsorbate final_position = adsorbate_atoms.get_positions()[-1] closest_sitetype, closest_site_id = cluster.find_closest_site( final_position) closest_site = cluster.site_surface_atom_ids[closest_sitetype][ closest_site_id] if type(closest_site) in (np.int32, int, np.int64): closest_site = np.array([closest_site]) adsorbates = simulation["adsorbates"] initial_site = adsorbates[0].get("site_ids", []) if type(initial_site) in (np.int32, int, np.int64): initial_site = np.array([initial_site]) print(closest_site, initial_site) print("closest_sitetype", closest_sitetype, type(closest_sitetype)) print("closest_site", closest_site, type(closest_site)) if len(set(closest_site) - set(initial_site)) == 0: is_same_site = True else: is_same_site = False adsorbates[0]["site_ids"] = closest_site adsorbates[0]["site_class"] = closest_sitetype if len(initial_site) == 0: logging.warning( "No initial site information found! Could not verify if adsorbate moved to different adsorption site" ) print("calc_id", calc_id, type(calc_id)) print("is_same_site", is_same_site, type(is_same_site)) ext_db["simulations"].update_one({"_id": int(calc_id)}, { "$set": { "adsorbates.0.site_class": int(closest_sitetype), "adsorbates.0.site_ids": closest_site.tolist(), "output.is_same_site": is_same_site } }) is_converged_list[idx] = simulation["output"]["is_converged"] is_same_site_list[idx] = is_same_site print(is_converged_list[idx], idx) # get current simulation total_energy simulation_total_energy = simulation["output"].get( "total_energy", 0.0) # iterate over # adsorbates adsorbates = simulation["adsorbates"] # nanoclusters nanoclusters = simulation["nanoclusters"] # substrates substrates = simulation["substrates"] component_types = [adsorbates, nanoclusters, substrates] reaction_energy = simulation_total_energy print("energy before adding references", reaction_energy) for components in component_types: for component in components: reference_id = component["reference_id"] print(reference_id) try: reference_simulation = simulations[str(reference_id)] except: logging.info("getting reference from database") reference_simulation = ext_db["simulations"].find_one( {"_id": reference_id}) try: total_energy = reference_simulation["output"][ "total_energy"] except: logging.warning( "total_energy not found! Not contributing to reaction energy!" ) total_energy = 0.0 try: reaction_energy -= float(total_energy) except: logging.warning("Energy not understood!") logging.warning(total_energy) print(reaction_energy, "reference", reference_id) reaction_energies_list[idx] = reaction_energy fw_spec["temp"]["calc_analysis_ids_dict"] = {} fw_spec["temp"]["property"] = reaction_energies_list print("reaction_energies_list") print(reaction_energies_list) print(len(reaction_energies_list)) fw_spec["temp"]["is_converged_list"] = is_converged_list fw_spec["temp"]["is_same_site_list"] = is_same_site_list fw_spec["temp"]["analysis_ids"] = [] fw_spec["temp"]["calc_ids"] = calc_ids print("is_converged_list") print(is_converged_list) print("calc_ids", calc_ids) fw_spec.pop("_category") fw_spec.pop("name") return FWAction(update_spec=fw_spec)