def get_singlesites_workflow(template_path, username, password, worker_target_path=None, structures=None, extdb_ids=None, source_path=None, reference_energy=0.0, adsorbate_name='H', chunk_size=100, max_calculations=10000, adsite_types=["top", "bridge", "hollow"], threshold=0.1, n_max_restarts=1, skip_dft=False, is_safeguard=True, extdb_connect={}): """ Attention! This workflow is for atomic adsorbates only! Workflow to determine the adsorption sites and energies of a set of nanocluster structures. The adsorption sites are determined by the python package cluskit and then ranked by farthest point sampling based on their structural local dissimilarity. The adsorption energy is determined by a simulation code (e.g. CP2K) in chunks in a loop. The adsorption energies of the uncalculated structures are inferred by machine learning. Once, the generalization error is low enough, the workflow stops. Args: template_path (str) : absolute path to input file for calculations. It works as a template which is later modified by the simulation-specific Firework. username (str) : user who executed the workflow password (str) : password for user to upload to the database worker_target_path (str) : absolute path on computing resource directory needs to exist structures (list) : list of ase.Atoms objects from where the workflow is started. extdb_ids (list) : unique identifiers of the simulations collection which are used to start the workflow source_path (str) : absolute path on the computing resource to the directory where to read the structures from reference_energy (float) : reference energy for the adsorbate. Can be the total energy of the isolated adsorbate molecule or a different reference point adsorbate_name (str) : element symbold of the adsorbed atom chunk_size (int) : number of calculations to be run simulataneously. Default -1 means all calculations are run at once. max_calculations (int) : maximum number of iterations in the workflow adsite_types (list) : adsorption site types, can contain any combination of "top", "bridge", "hollow" threshold (float) : ML accuracy of convergence criterion. When below, the workflow is defused. n_max_restarts (int) : number of times the calculation is restarted upon failure skip_dft (bool) : If set to true, the simulation step is skipped in all following simulation runs. Instead the structure is returned unchanged. is_safeguard (bool) : if False, the workflow is not paused when not all simulation jobs converge properly after the maximum number of restarts. extdb_connect (dict): dictionary containing the keys host, username, password, authsource and db_name. Returns: fireworks.Workflow : molsinglesites Fireworks Workflow object """ with open(template_path, "r") as f: template = f.read() #FireWork: Initialize workflow with workflow_id from external database parameters = { "template": template, "template_path": template_path, "worker_target_path": worker_target_path, "extdb_ids": extdb_ids, "source_path": source_path, "reference_energy": reference_energy, "max_calculations": max_calculations, "adsorbate_name": adsorbate_name, "chunk_size": chunk_size, "adsite_types": adsite_types, "descriptor": "soap", "descriptor_params": { "nmax": 9, "lmax": 6, "rcut": 5.0, "crossover": True, "sparse": False }, "simulation_method": "cp2k", "threshold": threshold, "n_max_restarts": n_max_restarts, "workflow_type": "singlesites", } fw_init = initialize_workflow_data(username, password, parameters, name="UNNAMED", workflow_type="singlesites", extdb_connect=extdb_connect) # FireWork: Read nanocluster structures and initialise a database # object containing set information if structures != None: jsonified_structures = [] for atoms in structures: atoms_dict = ase_to_atoms_dict(atoms) jsonified_structures.append(atoms_dict) fw_get_structures = start_from_structures(jsonified_structures) elif extdb_ids != None: fw_get_structures = start_from_database(extdb_ids) elif source_path != None: fw_get_structures = read_structures(source_path) else: raise ValueError( 'structures, extdb_ids or source_path contain no entries!') # FireWork: Determine adsites and add to database fw_get_adsites = get_adsites( reference_energy=reference_energy, adsorbate_name=adsorbate_name, adsite_types=adsite_types, descriptor="soap", descriptor_params={ "nmax": 9, "lmax": 6, "rcut": 5.0, }, ) # FireWork: FPS ranking fw_rank_adsites = rank_adsites() # Firework: setup folders for DFT calculations fw_setup_folders = setup_folders(target_path=worker_target_path, name="cp2k_singlesites_id") # add above Fireworks with links workflow_list = [ fw_init, fw_get_structures, fw_get_adsites, fw_rank_adsites, fw_setup_folders, ] links_dict = { fw_init: [fw_get_structures], fw_get_structures: [fw_get_adsites], fw_get_adsites: [fw_rank_adsites], fw_rank_adsites: [fw_setup_folders], } ### loop starts ### max_iterations = int(max_calculations / chunk_size) for i in range(max_iterations): # FireWork: setup, run and extract DFT calculation # (involves checking for errors in DFT and rerunning) fw_chunk_calculations = chunk_calculations( template=template, target_path=worker_target_path, chunk_size=chunk_size, n_max_restarts=n_max_restarts, simulation_method="cp2k", skip_dft=skip_dft, is_safeguard=is_safeguard) workflow_list.append(fw_chunk_calculations) if i == 0: links_dict[fw_setup_folders] = [fw_chunk_calculations] else: links_dict[fw_check_convergence] = [fw_chunk_calculations] # FireWork: update database, # (includes reading relaxed structure and energy) fw_update_converged_data = update_converged_data(chunk_size=chunk_size) workflow_list.append(fw_update_converged_data) links_dict[fw_chunk_calculations] = [fw_update_converged_data] # FireWork: machine learning from database fw_get_mae = get_mae(target_path=worker_target_path) workflow_list.append(fw_get_mae) links_dict[fw_update_converged_data] = [fw_get_mae] # FireWork: check if converged, give intermediary overview. # give summary when finished fw_check_convergence = check_convergence(threshold=threshold) workflow_list.append(fw_check_convergence) links_dict[fw_get_mae] = [fw_check_convergence] ### loop ends ### wf = Workflow(workflow_list, links_dict) return wf
def get_coverage_ladder_workflow(template_path, username, password, worker_target_path=None, start_ids=None, reference_energy=0.0, free_energy_correction=0.0, adsorbate_name='H', max_iterations=100, n_max_restarts=1, skip_dft=False, is_safeguard=True, bond_length=1.5, d=4, l=2, k=7, initial_direction=1, ranking_metric="similarity", extdb_connect={}): """ Workflow to determine a stable coverage of a nanocluster with single adsorbate atoms. One adsorbate at a time is added or removed until certain break criteria are met. Currently only d and max_iterations are stopping criterions. d, l, k, initial_direction and ranking_metric are parameters specific to the coverage ladder workflow. Args: template_path (str) : absolute path to input file for calculations. It works as a template which is later modified by the simulation-specific Firework. username (str) : user who executed the workflow password (str) : password for user to upload to the database worker_target_path (str) : absolute path on computing resource directory needs to exist start_ids (list) : unique identifiers of the simulations collection which are used to start the workflow reference_energy (float) : reference energy for the adsorbate. Can be the total energy of the isolated adsorbate molecule or a different reference point free_energy_correction (float) : free energy correction of the adsorption reaction at hand adsorbate_name (str) : element symbol of the adsorbed atom max_iterations (int) : maximum number of iterations in the workflow n_max_restarts (int) : number of times the calculation is restarted upon failure skip_dft (bool) : If set to true, the simulation step is skipped in all following simulation runs. Instead the structure is returned unchanged. is_safeguard (bool) : if False, the workflow is not paused when not all simulation jobs converge properly after the maximum number of restarts. bond_length (float) : distance in angstrom under which two adsorbed atoms are considered bound, hence too close d (int) : maximum depth of the coverage ladder (termination criterion) l (int) : number of low-energy structures to carry over to the next step k (int) : number of empty candidate sites for adding / adsorbed atoms for removing to consider per step initial_direction (bool) : True will force the initial step to add an adsorbate, False will force the initial step to remove an adsorbate ranking_metric (str) : 'similarity' or 'distance'. Metric based on which to choose k candidates (empty sites / adsorbates) extdb_connect (dict): dictionary containing the keys host, username, password, authsource and db_name. Returns: fireworks.Workflow : coverageladder Fireworks Workflow object """ with open(template_path, "r") as f: template = f.read() #FireWork: Initialize workflow with workflow_id from external database parameters = { "template": template, "template_path": template_path, "worker_target_path": worker_target_path, "start_ids": start_ids, "reference_energy": reference_energy, "max_iterations": max_iterations, "adsorbate_name": adsorbate_name, "descriptor": "soap", "descriptor_params": { "nmax": 9, "lmax": 6, "rcut": 5.0, "crossover": True, "sparse": False }, "simulation_method": "cp2k", "n_max_restarts": n_max_restarts, "workflow_type": "coverageladder", "d": d, "l": l, "k": k, "bond_length": bond_length, "ranking_metric": ranking_metric, } fw_init = initialize_workflow_data(username, password, parameters, name="UNNAMED", workflow_type="coverageladder", extdb_connect=extdb_connect) # FireWork: Initialize coverage ladder fw_start_coverage_ladder = start_coverage_ladder( start_ids, initial_direction=initial_direction, reference_energy=reference_energy, free_energy_correction=free_energy_correction) # add above Fireworks with links workflow_list = [ fw_init, fw_start_coverage_ladder, ] links_dict = { fw_init: [fw_start_coverage_ladder], } ### loop starts ### for i in range(max_iterations): # Firework: add or remove one adsorbate, several times fw_add_remove_adsorbate = add_remove_adsorbate( bond_length=bond_length, k=k, ranking_metric=ranking_metric) workflow_list.append(fw_add_remove_adsorbate) if i == 0: links_dict[fw_start_coverage_ladder] = [fw_add_remove_adsorbate] else: links_dict[fw_step_coverage_ladder] = [fw_add_remove_adsorbate] # Firework: setup folders for DFT calculations, fw_setup_folders = setup_folders(target_path=worker_target_path, name="cp2k_ladder_iter_" + str(i)) workflow_list.append(fw_setup_folders) links_dict[fw_add_remove_adsorbate] = [fw_setup_folders] # FireWork: setup, run and extract DFT calculation # (involves checking for errors in DFT and rerunning) fw_chunk_calculations = chunk_calculations( template=template, target_path=worker_target_path, chunk_size=-1, n_max_restarts=n_max_restarts, simulation_method="cp2k", name="cp2k_ladder_iter_" + str(i), skip_dft=skip_dft, is_safeguard=is_safeguard) workflow_list.append(fw_chunk_calculations) links_dict[fw_setup_folders] = [fw_chunk_calculations] # FireWork: update ladder fw_gather_ladder = gather_ladder() workflow_list.append(fw_gather_ladder) links_dict[fw_chunk_calculations] = [fw_gather_ladder] # Firework: Decision to go up or down the coverage ladder fw_step_coverage_ladder = step_coverage_ladder( l=l, d=d, ) workflow_list.append(fw_step_coverage_ladder) links_dict[fw_gather_ladder] = [fw_step_coverage_ladder] ### loop ends ### wf = Workflow(workflow_list, links_dict) return wf
def get_nanoclusters_workflow(template_path, username, password, worker_target_path=None, structures=None, extdb_ids=None, source_path=None, reference_energy=0.0, atomic_energies={}, n_max_restarts=1, skip_dft=False, is_safeguard=True, extdb_connect={}): """ Workflow to relax the structure of a set of nanoclusters using a simulation software (e.g. CP2K). The cohesive energies are calculated and summarized. Args: template_path (str) : absolute path to input file for calculations. It works as a template which is later modified by the simulation-specific Firework. username (str) : user who executed the workflow password (str) : password for user to upload to the database worker_target_path (str) : absolute path on computing resource directory needs to exist structures (list) : list of ase.Atoms objects from where the workflow is started. extdb_ids (list) : unique identifiers of the simulations collection which are used to start the workflow source_path (str) : absolute path on the computing resource to the directory where to read the structures from reference_energy (float) : reference energy for the adsorbate. Can be the total energy of the isolated adsorbate molecule or a different reference point atomic_energies (dict) : used for computing cohesive energies, not required n_max_restarts (int) : number of times the calculation is restarted upon failure skip_dft (bool) : If set to true, the simulation step is skipped in all following simulation runs. Instead the structure is returned unchanged. is_safeguard (bool) : if False, the workflow is not paused when not all simulation jobs converge properly after the maximum number of restarts. extdb_connect (dict): dictionary containing the keys host, username, password, authsource and db_name. Returns: fireworks.Workflow : nanocluster Fireworks Workflow object """ with open(template_path, "r") as f: template = f.read() #FireWork: Initialize workflow with workflow_id from external database parameters = { "template": template, "template_path": template_path, "worker_target_path": worker_target_path, "extdb_ids": extdb_ids, "source_path": source_path, "reference_energy": reference_energy, "simulation_method": "cp2k", "n_max_restarts": 1, "workflow_type": "relax_nanoclusters", "atomic_energies": atomic_energies, } fw_init = initialize_workflow_data(username, password, parameters, name="UNNAMED", workflow_type="relax_nanoclusters", extdb_connect=extdb_connect) # FireWork: Read nanocluster structures and initialise a database # object containing set information if structures != None: jsonified_structures = [] for atoms in structures: atoms_dict = ase_to_atoms_dict(atoms) jsonified_structures.append(atoms_dict) fw_get_structures = start_from_structures(jsonified_structures) elif extdb_ids != None: fw_get_structures = start_from_database(extdb_ids) elif source_path != None: fw_get_structures = read_structures(source_path) else: raise ValueError( 'structures, extdb_ids or source_path contain no entries!') # Firework: setup folders for DFT calculations fw_setup_folders = setup_folders(target_path=worker_target_path, name="cp2k_nanoclusters_id") # FireWork: setup, run and extract DFT calculation # (involves checking for errors in DFT and rerunning) fw_chunk_calculations = chunk_calculations(template=template, target_path=worker_target_path, n_max_restarts=n_max_restarts, simulation_method="cp2k", skip_dft=skip_dft, is_safeguard=is_safeguard) # FireWork: compare stability of nanoclusters. #Computes cohesive energies if atomic_energies of all involved elements are given fw_compare_nanoclusters = compare_nanoclusters( atomic_energies=atomic_energies) # add above Fireworks with links workflow_list = [ fw_init, fw_get_structures, fw_setup_folders, fw_chunk_calculations, fw_compare_nanoclusters, ] links_dict = { fw_init: [fw_get_structures], fw_get_structures: [fw_setup_folders], fw_setup_folders: [fw_chunk_calculations], fw_chunk_calculations: [fw_compare_nanoclusters], } wf = Workflow(workflow_list, links_dict) return wf
def get_coverage_workflow(template_path, username, password, worker_target_path=None, structures=None, extdb_ids=None, source_path=None, reference_energy=0.0, adsorbate_name='H', max_iterations=10000, adsite_types=["top", "bridge", "hollow"], n_max_restarts=1, skip_dft=False, is_safeguard=True, bond_length=1.4, n_remaining="", extdb_connect={}): """ Workflow to determine a stable coverage of a nanocluster with single adsorbate atoms. As a first step, adsorbates are put on top, bridge and hollow sites. Once the structure is relaxed by DFT, formed adsorbate molecules (pairs of atoms) are replaced by a single adsorbate. The procedure is repeated until no adsorbate molecules form. Args: template_path (str) : absolute path to input file for calculations. It works as a template which is later modified by the simulation-specific Firework. username (str) : user who executed the workflow password (str) : password for user to upload to the database worker_target_path (str) : absolute path on computing resource directory needs to exist structures (list) : list of ase.Atoms objects from where the workflow is started. extdb_ids (list) : unique identifiers of the simulations collection which are used to start the workflow source_path (str) : absolute path on the computing resource to the directory where to read the structures from reference_energy (float) : reference energy for the adsorbate. Can be the total energy of the isolated adsorbate molecule or a different reference point adsorbate_name (str) : element symbol of the adsorbed atom max_iterations (int) : maximum number of iterations in the workflow adsite_types (list) : adsorption site types, can contain any combination of "top", "bridge", "hollow" n_max_restarts (int) : number of times the calculation is restarted upon failure skip_dft (bool) : If set to true, the simulation step is skipped in all following simulation runs. Instead the structure is returned unchanged. is_safeguard (bool) : if False, the workflow is not paused when not all simulation jobs converge properly after the maximum number of restarts. bond_length (float) : distance in angstrom under which two adsorbed atoms are considered bound, hence too close n_remaining (int) : number of adsorbates which should remain after the first pre-DFT pruning of the adsorbate coverage extdb_connect (dict): dictionary containing the keys host, username, password, authsource and db_name. Returns: fireworks.Workflow : coverage Fireworks Workflow object """ with open(template_path, "r") as f: template = f.read() #FireWork: Initialize workflow with workflow_id from external database parameters = { "template": template, "template_path": template_path, "worker_target_path": worker_target_path, "extdb_ids": extdb_ids, "source_path": source_path, "reference_energy": reference_energy, "max_iterations": max_iterations, "adsorbate_name": adsorbate_name, "adsite_types": adsite_types, "descriptor": "soap", "descriptor_params": { "nmax": 9, "lmax": 6, "rcut": 5.0, "crossover": True, "sparse": False }, "simulation_method": "cp2k", "n_max_restarts": n_max_restarts, "workflow_type": "pertype_coverage", } fw_init = initialize_workflow_data(username, password, parameters, name="UNNAMED", workflow_type="coverage", extdb_connect=extdb_connect) # FireWork: Read nanocluster structures and initialise a database # object containing set information if structures != None: jsonified_structures = [] for atoms in structures: atoms_dict = ase_to_atoms_dict(atoms) jsonified_structures.append(atoms_dict) fw_get_structures = start_from_structures(jsonified_structures) elif extdb_ids != None: fw_get_structures = start_from_database(extdb_ids) elif source_path != None: fw_get_structures = read_structures(source_path) else: raise ValueError( 'structures, extdb_ids or source_path contain no entries!') # FireWork: Determine adsites and add to database # create structure with coverage fw_get_per_type_coverage = get_per_type_coverage( reference_energy=reference_energy, adsorbate_name='H', adsite_types=["top", "bridge", "hollow"], descriptor="soap", descriptor_params={ "nmax": 9, "lmax": 6, "rcut": 5.0, "crossover": True, "sparse": False }, ) # FireWork: before running DFT eliminate too close adsorbates # eliminate adsorbate pairs too close if n_remaining: fw_eliminate_pairs = eliminate_closest(adsorbate_name=adsorbate_name, n_remaining=n_remaining) else: fw_eliminate_pairs = eliminate_pairs(adsorbate_name=adsorbate_name, bond_length=bond_length) # add above Fireworks with links workflow_list = [ fw_init, fw_get_structures, fw_get_per_type_coverage, fw_eliminate_pairs, ] links_dict = { fw_init: [fw_get_structures], fw_get_structures: [fw_get_per_type_coverage], fw_get_per_type_coverage: [fw_eliminate_pairs], } ### loop starts ### for i in range(max_iterations): # Firework: setup folders for DFT calculations, fw_setup_folders = setup_folders(target_path=worker_target_path, name="cp2k_coverage_iter_" + str(i)) workflow_list.append(fw_setup_folders) links_dict[fw_eliminate_pairs] = [fw_setup_folders] # FireWork: setup, run and extract DFT calculation # (involves checking for errors in DFT and rerunning) fw_chunk_calculations = chunk_calculations( template=template, target_path=worker_target_path, chunk_size=-1, n_max_restarts=n_max_restarts, simulation_method="cp2k", name="cp2k_coverage_iter_" + str(i), skip_dft=skip_dft, is_safeguard=is_safeguard) workflow_list.append(fw_chunk_calculations) links_dict[fw_setup_folders] = [fw_chunk_calculations] # FireWork: update database, # (includes reading relaxed structure and energy) fw_update_converged_data = update_converged_data(chunk_size=-1) workflow_list.append(fw_update_converged_data) links_dict[fw_chunk_calculations] = [fw_update_converged_data] # eliminate adsorbate pairs too close # early exit here fw_eliminate_pairs = eliminate_pairs(adsorbate_name=adsorbate_name, bond_length=bond_length) workflow_list.append(fw_eliminate_pairs) links_dict[fw_update_converged_data] = [fw_eliminate_pairs] ### loop ends ### wf = Workflow(workflow_list, links_dict) return wf
def get_uniquemolsites_workflow(template_path, username, password, worker_target_path=None, structures=None, extdb_ids=None, source_path=None, reference_energy=0.0, adsorbate={}, adsite_types=["top", "bridge", "hollow"], threshold=0.001, n_max_restarts=1, skip_dft=False, is_safeguard=True, extdb_connect={}): """ Workflow to determine the adsorption sites and energies of a set of nanocluster structures. The adsorption sites are determined by the python package cluskit and then ranked by farthest point sampling based on their structural local dissimilarity. Only sites which are more dissimilar than the given threshold are computed. The adsorption energy is determined by a simulation code (e.g. CP2K). Args: template_path (str) : absolute path to input file for calculations. It works as a template which is later modified by the simulation-specific Firework. username (str) : user who executed the workflow password (str) : password for user to upload to the database worker_target_path (str) : absolute path on computing resource directory needs to exist structures (list) : list of ase.Atoms objects from where the workflow is started. extdb_ids (list) : unique identifiers of the simulations collection which are used to start the workflow source_path (str) : absolute path on the computing resource to the directory where to read the structures from reference_energy (float) : reference energy for the adsorbate. Can be the total energy of the isolated adsorbate molecule or a different reference point adsorbate (dict) : adsorbed molecule as atoms dict. Contains an "X" dummy atom which indicates the anchor point to the nanocluster adsite_types (list) : adsorption site types, can contain any combination of "top", "bridge", "hollow" threshold (float) : threshold of similarity metric between the local structures of the adsorption sites. Only sites which are more dissimilar than the given threshold are computed n_max_restarts (int) : number of times the calculation is restarted upon failure skip_dft (bool) : If set to true, the simulation step is skipped in all following simulation runs. Instead the structure is returned unchanged. is_safeguard (bool) : if False, the workflow is not paused when not all simulation jobs converge properly after the maximum number of restarts. extdb_connect (dict): dictionary containing the keys host, username, password, authsource and db_name. Returns: fireworks.Workflow : molsinglesites Fireworks Workflow object """ with open(template_path, "r") as f: template = f.read() #translate adsorbate molecule to json format adsorbate_dict = ase_to_atoms_dict(adsorbate) #FireWork: Initialize workflow with workflow_id from external database parameters = { "template": template, "template_path": template_path, "worker_target_path": worker_target_path, "extdb_ids": extdb_ids, "source_path": source_path, "reference_energy": reference_energy, "adsorbate": adsorbate_dict, "adsite_types": adsite_types, "descriptor": "soap", "descriptor_params": { "nmax": 9, "lmax": 6, "rcut": 5.0, "crossover": True, "sparse": False }, "simulation_method": "cp2k", "threshold": threshold, "n_max_restarts": n_max_restarts, "workflow_type": "molsinglesites", } fw_init = initialize_workflow_data(username, password, parameters, name="UNNAMED", workflow_type="uniquemolsites", extdb_connect=extdb_connect) # FireWork: Read nanocluster structures and initialise a database # object containing set information if structures != None: jsonified_structures = [] for atoms in structures: atoms_dict = ase_to_atoms_dict(atoms) jsonified_structures.append(atoms_dict) fw_get_structures = start_from_structures(jsonified_structures) elif extdb_ids != None: fw_get_structures = start_from_database(extdb_ids) elif source_path != None: fw_get_structures = read_structures(source_path) else: raise ValueError( 'structures, extdb_ids or source_path contain no entries!') # FireWork: Determine adsites and add to database fw_get_adsites = get_monodentate_unique_adsites( reference_energy=reference_energy, adsorbate=adsorbate_dict, adsite_types=adsite_types, descriptor="soap", descriptor_params={ "nmax": 9, "lmax": 6, "rcut": 5.0 }, threshold=threshold, ) # Firework: setup folders for DFT calculations fw_setup_folders = setup_folders(target_path=worker_target_path, name="cp2k_uniquemolsites_id") # add above Fireworks with links workflow_list = [ fw_init, fw_get_structures, fw_get_adsites, fw_setup_folders, ] links_dict = { fw_init: [fw_get_structures], fw_get_structures: [fw_get_adsites], fw_get_adsites: [fw_setup_folders], } # FireWork: setup, run and extract DFT calculation # (involves checking for errors in DFT and rerunning) fw_chunk_calculations = chunk_calculations(template=template, target_path=worker_target_path, n_max_restarts=n_max_restarts, simulation_method="cp2k", skip_dft=skip_dft, is_safeguard=is_safeguard) workflow_list.append(fw_chunk_calculations) links_dict[fw_setup_folders] = [fw_chunk_calculations] # FireWork: update database, # (includes reading relaxed structure and energy) fw_update_converged_data = update_converged_data(chunk_size=-1) workflow_list.append(fw_update_converged_data) links_dict[fw_chunk_calculations] = [fw_update_converged_data] wf = Workflow(workflow_list, links_dict, name="uniquemolsites") return wf