def solvate(seeds, monomer, aggregate_size, hm_orientations, qc_params, maximum_number_of_seeds, tabu_on=None, grid_on=None, site=None): """ All monomer to seeds. :param seeds: :param monomer: :param aggregate_size: :param hm_orientations: :param qc_params: :param maximum_number_of_seeds: :param tabu_on: :param grid_on: :param site: :return: """ if check_stop_signal(): aggregator_logger.info("Function: solvate") return StopIteration if hm_orientations == 'auto': number_of_orientations = 8 else: number_of_orientations = int(hm_orientations) starting_directory = os.getcwd() aggregator_logger.info( "Starting Aggregation in\n {}".format(starting_directory)) for aggregation_counter in range(2, aggregate_size + 2): if len(seeds) == 0: aggregator_logger.info("No seeds to process") return aggregate_id = "{:03d}".format(aggregation_counter) aggregate_home = 'aggregate_' + aggregate_id file_manager.make_directories(aggregate_home) os.chdir(aggregate_home) aggregator_logger.info( " Starting aggregation cycle: {}".format(aggregation_counter)) seeds = add_one(aggregate_id, seeds, monomer, number_of_orientations, qc_params, maximum_number_of_seeds, tabu_on, grid_on, site) aggregator_logger.info( " Aggregation cycle: {} completed\n".format(aggregation_counter)) if hm_orientations == 'auto' and number_of_orientations <= 256: number_of_orientations *= 2 os.chdir(starting_directory) return
def aggregate(molecules, aggregate_sizes, hm_orientations, qc_params, maximum_number_of_seeds, first_pathway, number_of_pathways, tabu_on, grid_on, site): """ New aggregate module :param grid_on: Toggle the use of grid in generation of trial geometries. :param tabu_on: Toggle use of Tabu list in generation of trial geometries. :param site: Not used now, but needed for create_trial_molecules(). :type number_of_pathways: int :param number_of_pathways: For cluster or aggregate containing different types of molecules or atoms, there are many pathways to explore. This parameter determines how many pathways to explore. :type first_pathway: int :param first_pathway: The starting pathway. This helps in restarting the broken job. :param molecules: molecules or atoms for aggregation or cluster formation. :type molecules: list(Molecules) :param aggregate_sizes: the number of each atom in the final cluster. :type aggregate_sizes: list(int) :param hm_orientations: Number of trial orientations. :type hm_orientations: int :param qc_params: Parameters for Quantum Chemistry Calculations. :type qc_params: dict :param maximum_number_of_seeds: The maximum number of seeds to be selected for the next cycle. :type maximum_number_of_seeds: int :return: None """ if check_stop_signal(): aggregator_logger.info("Function: aggregate") return StopIteration if hm_orientations == 'auto': number_of_orientations = 8 else: number_of_orientations = int(hm_orientations) parent_folder = 'aggregates' old_path = read_old_path() restart = bool(old_path) if not restart: file_manager.make_directories(parent_folder) os.chdir(parent_folder) starting_directory = os.getcwd() if restart: aggregator_logger.info( f"Restarting Aggregation in\n {starting_directory}") else: aggregator_logger.info( f"Starting Aggregation in\n {starting_directory}") seed_names = string.ascii_lowercase ag_id = "ag" monomers_to_be_added = [] for seed_molecule, seed_name, size_of_this_seed in zip( molecules, seed_names, aggregate_sizes): seed_molecule.name = seed_name for _ in range(size_of_this_seed): monomers_to_be_added.append(seed_molecule) ag_id += f"_{seed_name}_000" if len(molecules) == 1: pathways_to_calculate = [monomers_to_be_added] elif restart: pathways_to_calculate = old_path_to_new_path(monomers_to_be_added, old_path) else: pathways_to_calculate = select_pathways(monomers_to_be_added, number_of_pathways) aggregator_logger.info( " The following Afbau paths will be carried out") for i, path in enumerate(pathways_to_calculate): paths_for_print = f' {i:03d}: ' for p in path: paths_for_print += p.name aggregator_logger.info(paths_for_print) seed_storage = OrderedDict() initial_storage = copy.deepcopy(seed_storage) initial_aggregate_id = ag_id outside_counter = first_pathway inside_counter = 1 for i in pathways_to_calculate: aggregator_logger.info(f" Path: {i}") for this_monomer in i: if len(seed_storage) < 1: ag_id = update_id(ag_id, this_monomer.name) seed_storage[ag_id] = [this_monomer] continue this_seed = seed_storage[ag_id] ag_id = update_id(ag_id, this_monomer.name) ag_home = "{}_{:03d}".format(ag_id, outside_counter) if not os.path.exists(ag_home): file_manager.make_directories(ag_home) os.chdir(ag_home) seed_storage[ag_id] = add_one(ag_id, this_seed, this_monomer, number_of_orientations, qc_params, maximum_number_of_seeds, tabu_on, grid_on, site) os.chdir(starting_directory) if len(seed_storage[ag_id]) == 0: aggregator_logger.info(f"No molecules were found from {ag_id}" f"to continue this pathway.") aggregator_logger.info('Breaking! 😟') break seed_storage.popitem(last=False) inside_counter += 1 outside_counter += 1 seed_storage = copy.copy(initial_storage) ag_id = initial_aggregate_id if hm_orientations == 'auto' and number_of_orientations <= 256: number_of_orientations += 8 return
def add_one(aggregate_id, seeds, monomer, hm_orientations, qc_params, maximum_number_of_seeds, tabu_on, grid_on, site): """ Add one monomer to all the seed molecules :param tabu_on: Toggle the use of Tabu list :param grid_on: Toggle the use of Grid :param site: Not used :return: List(Molecule.Molecule) :type maximum_number_of_seeds: int :param maximum_number_of_seeds: The maximum number of seeds to be selected for next cycle :param qc_params: parameters needed for calculation :param qc_params: dict :param hm_orientations: Number of orientation to be used. :type hm_orientations: int :type monomer: Molecule :param monomer: monomer molecule :type seeds: list[Molecule] :param seeds: seed molecules to which monomer will be added. :type aggregate_id: str :param aggregate_id: An id for the aggregate used for job_dir name and xyz file names """ if check_stop_signal(): aggregator_logger.info("Function: add_one") return StopIteration aggregator_logger.info( f' There are {len(seeds)} seed molecules in {aggregate_id}') cwd = os.getcwd() list_of_optimized_molecules = [] for seed_count, each_seed in enumerate(seeds): if check_stop_signal(): aggregator_logger.info("Function: add_one") return aggregator_logger.info(' Seed: {}'.format(seed_count)) seed_id = "{:03d}".format(seed_count) seeds_home = 'seed_' + seed_id if not os.path.exists(seeds_home): file_manager.make_directories(seeds_home) os.chdir(seeds_home) each_seed.mol_to_xyz('seed.xyz') monomer.mol_to_xyz('monomer.xyz') mol_id = '{0}_{1}'.format(seed_id, aggregate_id) aggregator_logger.debug('Making orientations') if not all( os.path.exists(f"trial_{i:03d}_{mol_id}.xyz") for i in range(hm_orientations)): all_orientations = tabu.create_trial_geometries( mol_id, seeds[seed_count], monomer, hm_orientations, tabu_on, grid_on, site) aggregator_logger.debug('Orientations are made.') else: all_orientations = read_orientations(mol_id, hm_orientations) not_converged = all_orientations[:] status_list = [False for _ in not_converged] for i in range(10): if len(not_converged) > 0: aggregator_logger.info( f" Round {i + 1:d} of block optimizations with" f" {len(not_converged):d} molecules") qc_params["opt_threshold"] = 'loose' status_list = [ optimise(each_mol, qc_params) for each_mol in not_converged ] converged = [ n for n, s in zip(not_converged, status_list) if s is True ] list_of_optimized_molecules.extend(converged) not_converged = [ n for n, s in zip(not_converged, status_list) if s == 'CycleExceeded' and not tabu.broken(n) ] not_converged = clustering.remove_similar(not_converged) else: aggregator_logger.info(" All molecules are processed") break else: aggregator_logger.info( " The following molecules are not converged" "after 10 rounds") for n, s in zip(not_converged, status_list): if s == 'CycleExceeded' and not tabu.broken(n): aggregator_logger.info(" ", n.name) os.chdir(cwd) if os.path.exists('selected'): os.chdir('selected') optimized_molecules = [i.name for i in list_of_optimized_molecules] job_done = [] selected_seeds = [] for i in optimized_molecules: for j in os.listdir(): if f'job_{i}' == j: job_done.append(j) if f'result_{i}.xyz' == j: selected_seeds.append(i) list_of_optimized_molecules.pop(j) os.chdir(cwd) else: file_manager.make_directories('selected') if len(list_of_optimized_molecules) < 2: selected_seeds = list_of_optimized_molecules else: aggregator_logger.info(" Clustering") selected_seeds = clustering.choose_geometries( list_of_optimized_molecules, maximum_number_of_seeds=maximum_number_of_seeds) os.chdir('selected') qc_params["opt_threshold"] = 'normal' aggregator_logger.info("Optimizing the selected molecules with higher " "threshold") less_than_ideal = [] for each_file in selected_seeds: not_refined = copy.deepcopy(each_file) status = optimise(each_file, qc_params) if status is True: xyz_file = 'job_' + each_file.name + '/result_' + \ each_file.name + '.xyz' shutil.copy(xyz_file, '.') else: selected_seeds.remove(each_file) less_than_ideal.append(not_refined) if len(selected_seeds) != 0: return selected_seeds aggregator_logger.info(" The optimization could not be refined, \n" " so sending the loosely optimised molecules") return less_than_ideal
def react(reactant_a, reactant_b, gamma_min, gamma_max, hm_orientations, qc_params, site, proximity_factor, tabu_on=None, grid_on=None): """ The Reactor module This is the outer loop generates all the orientations loop over all the gamma values optimize all orientations in each gamma after eliminating the products or failed geometries. """ global workdir workdir = os.getcwd() if readchk(workdir) is not None: chk = readchk(workdir) import shutil # shutil.move('pyar.log','pyar_old.log') reactor_logger.info( '====================Reading from Checkpoint====================') gamma_list = list(chk.keys()).copy() orientations_to_optimize = chk[gamma_list[0]].copy() os.chdir('reaction') cwd = os.getcwd() product_dir = cwd + '/products' else: file_manager.make_directories('reaction') os.chdir('reaction') cwd = os.getcwd() reactor_logger.info('Starting Reactor') reactor_logger.info(f'{hm_orientations} orientations will be tried') reactor_logger.info(f' Gamma (min): {gamma_min}') reactor_logger.info(f' Gamma (max): {gamma_max}') reactor_logger.debug(f'Current working directory: {cwd}') software = qc_params['software'] print_header(gamma_max, gamma_min, hm_orientations, software) # prepare job directories product_dir = cwd + '/products' reactor_logger.debug(f'Product directory: {product_dir}') file_manager.make_directories(product_dir) file_manager.make_directories('trial_geometries') os.chdir('trial_geometries') if site is None: all_orientations = tabu.create_trial_geometries( 'geom', reactant_a, reactant_b, hm_orientations, tabu_on, grid_on, site) else: all_orientations = pyar.scan.generate_guess_for_bonding( 'geom', reactant_a, reactant_b, site[0], site[1], hm_orientations, d_scale=proximity_factor) os.chdir(cwd) gamma_list = np.linspace(gamma_min, gamma_max, num=10, dtype=float) gamma_list = [f"{int(gamma):04d}" for gamma in gamma_list] orientations_to_optimize = all_orientations[:] # print(k.name for k in orientations_to_optimize) chk = {gamma: orientations_to_optimize.copy() for gamma in gamma_list} dumpchk(chk, workdir, reactor_logger) for en, gamma in enumerate(gamma_list): qc_params['gamma'] = gamma reactor_logger.info(f' Current gamma : {gamma}') gamma_id = f"{int(gamma):04d}" gamma_home = cwd + '/gamma_' + gamma_id if not os.path.exists(gamma_home): file_manager.make_directories(gamma_home) os.chdir(gamma_home) optimized_molecules = optimize_all(gamma_id, orientations_to_optimize, chk, product_dir, qc_params) reactor_logger.info( f" {len(optimized_molecules)} geometries from this gamma cycle" ) if len(optimized_molecules) == 0: reactor_logger.info( "No orientations to be optimized for the next gamma cycle.") chk.clear() return if len(optimized_molecules) == 1: orientations_to_optimize = optimized_molecules[:] else: orientations_to_optimize = clustering.remove_similar( optimized_molecules) if (en != len(gamma_list) - 1): chk[gamma_list[en + 1]] = orientations_to_optimize reactor_logger.info( "Number of products found from gamma:{} = {}".format( gamma, len(saved_inchi_strings))) reactor_logger.info("{} geometries are considered for the next gamma " "cycle".format(len(orientations_to_optimize))) reactor_logger.debug("the keys of the molecules for next gamma cycle") for this_orientation in orientations_to_optimize: reactor_logger.debug("{}".format(this_orientation.name)) updtchk(chk, 'gamma', gamma, reactor_logger, workdir) os.chdir(workdir) os.remove('jobs.pkl') reactor_logger.info("Removed checkpoints!!") return
def optimize_all(gamma_id, orientations, chkdict, product_dir, qc_param): gamma = qc_param['gamma'] cwd = os.getcwd() table_of_optimized_molecules = [] for this_molecule in orientations: # print(orientations) job_key = this_molecule.name reactor_logger.info(' Orientation: {}'.format(job_key)) o_key = "_{}".format(job_key[-8:]) orientations_home = 'orientation' + o_key file_manager.make_directories(orientations_home) os.chdir(orientations_home) job_name = gamma_id + o_key this_molecule.name = job_name reactor_logger.info('Optimizing {}'.format(this_molecule.name)) start_xyz_file_name = 'trial_' + this_molecule.name + '.xyz' this_molecule.mol_to_xyz(start_xyz_file_name) start_inchi = pyar.interface.babel.make_inchi_string_from_xyz( start_xyz_file_name) start_smile = pyar.interface.babel.make_smile_string_from_xyz( start_xyz_file_name) status = optimise(this_molecule, qc_param) before_relax = copy.copy(this_molecule) this_molecule.name = job_name reactor_logger.info('... completed') if status is True or \ status == 'converged' \ or status == 'cycle_exceeded': reactor_logger.info(" E({}): {:12.7f}".format( job_name, this_molecule.energy)) if this_molecule.is_bonded(): reactor_logger.info( "The fragments have close contracts. Going for relaxation") this_molecule.mol_to_xyz('trial_relax.xyz') this_molecule.name = 'relax' status = optimise(this_molecule, qc_param) this_molecule.name = job_name if status is True or status == 'converged': this_molecule.mol_to_xyz('result_relax.xyz') current_inchi = pyar.interface.babel.make_inchi_string_from_xyz( 'result_relax.xyz') current_smile = pyar.interface.babel.make_smile_string_from_xyz( 'result_relax.xyz') reactor_logger.info('geometry relaxed') reactor_logger.info( "Checking for product formation with SMILE and InChi strings" ) reactor_logger.info( "Start SMILE: {} Current SMILE: {}".format( start_smile, current_smile)) reactor_logger.info( "Start InChi: {} Current InChi: {}".format( start_inchi, current_inchi)) if (start_inchi == current_inchi and start_smile == current_smile): table_of_optimized_molecules.append(before_relax) reactor_logger.info(f'{job_name} is added to the table' f' to optimize with higher gamma') else: saved_products[job_name] = this_molecule reactor_logger.info(" The geometry is different " "from the stating structure.") reactor_logger.info( " Checking if this is a (new)" " products") if (current_inchi in saved_inchi_strings.values() or current_smile in saved_smile_strings.values()): reactor_logger.info("Both strings matches with " "those of already saved " "products. Discarded") else: reactor_logger.info(" New Product! Saving") saved_inchi_strings[job_name] = current_inchi saved_smile_strings[job_name] = current_smile saved_products[job_name] = this_molecule shutil.copy('result_relax.xyz', product_dir + '/' + job_name + '.xyz') os.chdir(cwd) updtchk(chkdict, 'ori', job_name, reactor_logger, workdir) continue elif status == 'cycle_exceeded': table_of_optimized_molecules.append(before_relax) reactor_logger.info(f'{job_name} is added to the table to ' f'optimize with higher gamma') else: table_of_optimized_molecules.append(this_molecule) reactor_logger.info(f' no close contacts found') reactor_logger.info(f' {job_name} is added to ' f'the table to optimize with higher gamma') updtchk(chkdict, 'ori', job_name, reactor_logger, workdir) os.chdir(cwd) sys.stdout.flush() return table_of_optimized_molecules
def optimise(molecule, qc_params): opt_options = { option: qc_params[option] for option in ['gamma', 'opt_cycles', 'opt_threshold'] } gamma = qc_params['gamma'] custom_keyword = qc_params['custom_keyword'] cwd = os.getcwd() if molecule.name == '': molecule.name = 'Opt job' job_dir = 'job_' + molecule.name if not os.path.exists(job_dir): file_manager.make_directories(job_dir) os.chdir(job_dir) if os.path.exists(f'result_{molecule.name}.xyz'): read_molecule = Molecule.from_xyz(f'result_{molecule.name}.xyz') molecule.energy = read_molecule.energy molecule.optimized_coordinates = read_molecule.coordinates optimiser_logger.info( f' {molecule.name:35s}: {molecule.energy:15.6f}') os.chdir(cwd) return True software = qc_params['software'] if software == 'xtb': from pyar.interface import xtb geometry = xtb.Xtb(molecule, qc_params) elif software == 'xtb_turbo': if gamma == 0.0: from pyar.interface import xtb geometry = xtb.Xtb(molecule, qc_params) else: from pyar.interface import xtbturbo geometry = xtbturbo.XtbTurbo(molecule, qc_params) elif software == 'turbomole': from pyar.interface import turbomole geometry = turbomole.Turbomole(molecule, qc_params) elif software == "mopac": from pyar.interface import mopac geometry = mopac.Mopac(molecule, qc_params) elif software == "orca": from pyar.interface import orca geometry = orca.Orca(molecule, qc_params, custom_keyword=custom_keyword) elif software == 'obabel': from pyar.interface import babel geometry = babel.OBabel(molecule) elif software == 'psi4': from pyar.interface import psi4 geometry = psi4.Psi4(molecule, qc_params) elif software == 'gaussian': from pyar.interface import gaussian geometry = gaussian.Gaussian(molecule, qc_params) # else: # optimiser_logger.error(software, "is not implemented yet") # return NotImplementedError optimize_status = geometry.optimize(opt_options) if optimize_status is True \ or optimize_status == 'converged' \ or optimize_status == 'CycleExceeded': molecule.energy = geometry.energy molecule.coordinates = geometry.optimized_coordinates optimiser_logger.info(f' {molecule.name:35s}: {geometry.energy:15.6f}') elif optimize_status == 'SCFFailed': from numpy.random import uniform molecule.coordinates += uniform(-0.1, 0.1, (molecule.number_of_atoms, 3)) os.chdir(cwd) optimize_status = optimise(molecule, qc_params) else: molecule.energy = None molecule.coordinates = None os.chdir(cwd) return optimize_status
def react(reactant_a, reactant_b, gamma_min, gamma_max, hm_orientations, qc_params, site, proximity_factor, tabu_on=None, grid_on=None): """ The Reactor module This is the outer loop generates all the orientations loop over all the gamma values optimize all orientations in each gamma after eliminating the products or failed geometries. """ file_manager.make_directories('reaction') os.chdir('reaction') cwd = os.getcwd() reactor_logger.info('Starting Reactor') reactor_logger.info(f'{hm_orientations} orientations will be tried') reactor_logger.info(f' Gamma (min): {gamma_min}') reactor_logger.info(f' Gamma (max): {gamma_max}') reactor_logger.debug(f'Current working directory: {cwd}') software = qc_params['software'] print_header(gamma_max, gamma_min, hm_orientations, software) # prepare job directories product_dir = cwd + '/products' reactor_logger.debug(f'Product directory: {product_dir}') file_manager.make_directories(product_dir) file_manager.make_directories('trial_geometries') os.chdir('trial_geometries') if site is None: all_orientations = tabu.create_trial_geometries( 'geom', reactant_a, reactant_b, hm_orientations, tabu_on, grid_on, site) else: all_orientations = pyar.scan.generate_guess_for_bonding( 'geom', reactant_a, reactant_b, site[0], site[1], hm_orientations, d_scale=proximity_factor) os.chdir(cwd) gamma_list = np.linspace(gamma_min, gamma_max, num=10, dtype=float) orientations_to_optimize = all_orientations[:] for gamma in gamma_list: qc_params['gamma'] = gamma reactor_logger.info(f' Current gamma : {gamma}') gamma_id = f"{int(gamma):04d}" gamma_home = cwd + '/gamma_' + gamma_id file_manager.make_directories(gamma_home) os.chdir(gamma_home) optimized_molecules = optimize_all(gamma_id, orientations_to_optimize, product_dir, qc_params) reactor_logger.info( f" {len(optimized_molecules)} geometries from this gamma cycle" ) if len(optimized_molecules) == 0: reactor_logger.info( "No orientations to be optimized for the next gamma cycle.") return if len(optimized_molecules) == 1: orientations_to_optimize = optimized_molecules[:] else: orientations_to_optimize = clustering.remove_similar( optimized_molecules) reactor_logger.info( "Number of products found from gamma:{} = {}".format( gamma, len(saved_inchi_strings))) reactor_logger.info("{} geometries are considered for the next gamma " "cycle".format(len(orientations_to_optimize))) reactor_logger.debug("the keys of the molecules for next gamma cycle") for this_orientation in orientations_to_optimize: reactor_logger.debug("{}".format(this_orientation.name)) os.chdir(cwd) return