def perform_ga(autotst_object, initial_pop=None, top_percent=0.3, tolerance=0.0001, max_generations=500, store_generations=False, store_directory=".", mutation_probability=0.2, delta=30): """ Performs a genetic algorithm to determine the lowest energy conformer of a TS or molecule. :param autotst_object: am autotst_ts, autotst_rxn, or autotst_molecule that you want to perform conformer analysis on * the ase_object of the autotst_object must have a calculator attached to it. :param initial_pop: a DataFrame containing the initial population :param top_percent: float of the top percentage of conformers you want to select :param tolerance: float of one of the possible cut off points for the analysis :param max_generations: int of one of the possible cut off points for the analysis :param store_generations: do you want to store pickle files of each generation :param store_directory: the director where you want the pickle files stored :param mutation_probability: float of the chance of mutation :param delta: the degree change in dihedral angle between each possible dihedral angle :return results: a DataFrame containing the final generation :return unique_conformers: a dictionary with indicies of unique torsion combinations and entries of energy of those torsions """ assert autotst_object, "No AutoTST object provided..." if initial_pop is None: logging.info( "No initial population provided, creating one using base parameters...") initial_pop = create_initial_population(autotst_object) possible_dihedrals = np.arange(0, 360, delta) top = select_top_population(initial_pop, top_percent=top_percent ) population_size = initial_pop.shape[0] results = initial_pop if isinstance(autotst_object, autotst.molecule.AutoTST_Molecule): logging.info("The object given is a `AutoTST_Molecule` object") torsions = autotst_object.torsions ase_object = autotst_object.ase_molecule label = autotst_object.smiles if isinstance(autotst_object, autotst.reaction.AutoTST_Reaction): logging.info("The object given is a `AutoTST_Reaction` object") torsions = autotst_object.ts.torsions ase_object = autotst_object.ts.ase_ts label = autotst_object.label if isinstance(autotst_object, autotst.reaction.AutoTST_TS): logging.info("The object given is a `AutoTST_TS` object") torsions = autotst_object.torsions ase_object = autotst_object.ase_ts label = autotst_object.label assert ase_object.get_calculator( ), "To use GA, you must attach an ASE calculator to the `ase_molecule`." gen_number = 0 complete = False unique_conformers = {} while complete == False: gen_number += 1 logging.info("Performing GA on generation {}".format(gen_number)) r = [] relaxations = {} for individual in range(population_size): parent_0, parent_1 = random.sample(top.index, 2) dihedrals = [] for index, torsion in enumerate(torsions): if random.random() < mutation_probability: dihedral = np.random.choice(possible_dihedrals) else: if 0.5 > random.random(): dihedral = results["torsion_" + str(index)].loc[parent_0] else: dihedral = results["torsion_" + str(index)].loc[parent_1] i, j, k, l = torsion.indices right_mask = torsion.right_mask dihedrals.append(dihedral) ase_object.set_dihedral(a1=i, a2=j, a3=k, a4=l, angle=float(dihedral), mask=right_mask) # Updating the molecule update_from_ase(autotst_object) dihed = tuple(dihedrals) if dihed in relaxations.keys(): logging.info("Found previous relaxations, using it to save time...") constrained_energy, relaxed_energy, ase_copy = relaxations[dihed] else: constrained_energy, relaxed_energy, ase_copy = get_energies( autotst_object) relaxations[dihed] = (constrained_energy, relaxed_energy, ase_copy) relaxed_torsions = [] for torsion in torsions: i, j, k, l = torsion.indices angle = round(ase_copy.get_dihedral(i, j, k, l), -1) angle = int(30 * round(float(angle)/30)) if angle < 0: angle += 360 relaxed_torsions.append(angle) r.append([constrained_energy, relaxed_energy] + dihedrals + relaxed_torsions) results = pd.DataFrame(r) logging.info( "Creating the DataFrame of results for the {}th generation".format(gen_number)) columns = ["constrained_energy", "relaxed_energy"] for i in range(len(torsions)): columns = columns + ["torsion_" + str(i)] for i in range(len(torsions)): columns = columns + ["relaxed_torsion_" + str(i)] results.columns = columns results = results.sort_values("constrained_energy") unique_conformers = get_unique_conformers(results, unique_conformers) if store_generations == True: # This portion stores each generation if desired logging.info("Saving the results DataFrame") generation_name = "{0}_ga_generation_{1}.csv".format( label, gen_number) f = os.path.join(store_directory, generation_name) results.to_csv(f) top = select_top_population(results, top_percent) stats = top.describe() if gen_number >= max_generations: complete = True logging.info("Max generations reached. GA complete.") if abs((stats["constrained_energy"]["max"] - stats["constrained_energy"]["min"]) / stats["constrained_energy"]["min"]) < tolerance: complete = True logging.info("Cutoff criteria reached. GA complete.") return results, unique_conformers
def perform_simple_es(autotst_object, initial_pop=None, top_percent=0.3, min_rms=60, max_generations=500, store_generations=False, store_directory=".", delta=30): """ Performs evolutionary strategy to determine the lowest energy conformer of a TS or molecule. :param autotst_object: an autotst_ts, autotst_rxn, or autotst_molecule that you want to perform conformer analysis on * the ase_object of the autotst_object must have a calculator attached to it. :param initial_pop: a DataFrame containing the initial population :param top_percent: float of the top percentage of conformers you want to select :param min_rms: float of one of the possible cut off points for the analysis :param max_generations: int of one of the possible cut off points for the analysis :param store_generations: do you want to store pickle files of each generation :param store_directory: the director where you want the pickle files stored :param delta: the degree change in dihedral angle between each possible dihedral angle :return results: a DataFrame containing the final generation :return unique_conformers: a dictionary with indicies of unique torsion combinations and entries of energy of those torsions """ assert autotst_object, "No AutoTST object provided..." if initial_pop is None: logging.info( "No initial population provided, creating one using base parameters..." ) initial_pop = create_initial_population(autotst_object, delta=delta) possible_dihedrals = np.arange(0, 360, delta) top = select_top_population(initial_pop, top_percent=top_percent) population_size = initial_pop.shape[0] results = initial_pop if isinstance(autotst_object, autotst.species.Species): logging.info("The object given is a `Molecule` object") torsions = autotst_object.torsions ase_object = autotst_object.ase_molecule label = autotst_object.smiles if isinstance(autotst_object, autotst.reaction.Reaction): logging.info("The object given is a `Reaction` object") torsions = autotst_object.ts.torsions ase_object = autotst_object.ts.ase_ts label = autotst_object.label if isinstance(autotst_object, autotst.reaction.TS): logging.info("The object given is a `TS` object") torsions = autotst_object.torsions ase_object = autotst_object.ase_ts label = autotst_object.label assert ase_object.get_calculator( ), "To use ES, you must attach an ASE calculator to the `ase_molecule`." gen_number = 0 complete = False unique_conformers = {} terminal_torsions, non_terminal_torsions = find_terminal_torsions( autotst_object) while complete == False: relaxed_top = [] for combo in top.iloc[:, 1:].values: for index, torsion in enumerate(non_terminal_torsions): i, j, k, l = torsion.indices right_mask = torsion.right_mask dihedral = combo[index] ase_object.set_dihedral(a1=i, a2=j, a3=k, a4=l, angle=float(dihedral), mask=right_mask) update_from_ase(autotst_object) relaxed_e, relaxed_object = partial_optimize_mol(autotst_object) new_dihedrals = [] for torsion in non_terminal_torsions: i, j, k, l = torsion.indices right_mask = torsion.right_mask d = relaxed_object.get_dihedral(a1=i, a2=j, a3=k, a4=l) new_dihedrals.append(d) relaxed_top.append([relaxed_e] + new_dihedrals) columns = top.columns top = pd.DataFrame(relaxed_top, columns=columns) if store_generations: save_name = "{}_relaxed_top_es_generation_{}.csv".format( label, gen_number) f = os.path.join(store_directory, save_name) top.to_csv(f) gen_number += 1 logging.info("Performing ES on generation {}".format(gen_number)) r = [] for individual in range(population_size): dihedrals = [] for index, torsion in enumerate(non_terminal_torsions): i, j, k, l = torsion.indices right_mask = torsion.right_mask dihedral = random.gauss(top.mean()["torsion_" + str(index)], top.std()["torsion_" + str(index)]) dihedrals.append(dihedral) ase_object.set_dihedral(a1=i, a2=j, a3=k, a4=l, angle=float(dihedral), mask=right_mask) # Updating the molecule update_from_ase(autotst_object) energy = get_energy(autotst_object) r.append([energy] + dihedrals) results = pd.DataFrame(r) logging.info( "Creating the DataFrame of results for the {}th generation".format( gen_number)) results.columns = top.columns results = results.sort_values("energy") unique_conformers = get_unique_conformers(results, unique_conformers, min_rms) if store_generations: # This portion stores each generation if desired logging.info("Saving the results DataFrame") generation_name = "{0}_es_generation_{1}.csv".format( label, gen_number) f = os.path.join(store_directory, generation_name) results.to_csv(f) top = select_top_population(results, top_percent) best = top.iloc[0, 1:] worst = top.iloc[-1, 1:] rms = ((best - worst)**2).mean() if gen_number >= max_generations: complete = True logging.info("Max generations reached. ES complete.") if rms < min_rms: complete = True logging.info("Cutoff criteria reached. ES complete.") return results, unique_conformers
def perform_brute_force(autotst_object, delta=float(30), store_results=True, store_directory="."): """ Perfoms a brute force conformer analysis of a molecule or a transition state :param autotst_object: am autotst_ts, autotst_rxn, or autotst_molecule that you want to perform conformer analysis on * the ase_object of the autotst_object must have a calculator attached to it. :param store_generations: do you want to store pickle files of each generation :param store_directory: the director where you want the pickle files stored :param delta: the degree change in dihedral angle between each possible dihedral angle :return results: a DataFrame containing the final generation :return unique_conformers: a dictionary with indicies of unique torsion combinations and entries of energy of those torsions """ # Takes each of the molecule objects if isinstance(autotst_object, autotst.molecule.AutoTST_Molecule): ase_object = autotst_object.ase_molecule torsions = autotst_object.torsions file_name = autotst_object.smiles + "_brute_force.csv" elif isinstance(autotst_object, autotst.reaction.AutoTST_Reaction): ase_object = autotst_object.ts.ase_ts torsions = autotst_object.ts.torsions file_name = autotst_object.label + "_brute_force.csv" elif isinstance(autotst_object, autotst.reaction.AutoTST_TS): ase_object = autotst_object.ase_ts torsions = autotst_object.torsions file_name = autotst_object.label + "_brute_force.csv" torsion_angles = np.arange(0, 360, delta) torsion_combos = list( itertools.combinations_with_replacement(torsion_angles, len(torsions))) if len(torsions) != 1: torsion_combos = list( set(torsion_combos + list( itertools.combinations_with_replacement( torsion_angles[::-1], len(torsions))))) results = [] for index, combo in enumerate(torsion_combos): logging.info("Generating conformer {}".format(index)) logging.info( "Applying the torsion combo {0} to the molecule or TS.".format( combo)) geo = zip(torsions, combo) for torsion in geo: tor = torsion[0] angle = torsion[1] i, j, k, l = tor.indices right_mask = tor.right_mask ase_object.set_dihedral(a1=i, a2=j, a3=k, a4=l, angle=float(angle), mask=right_mask) update_from_ase(autotst_object) constrained_energy, relaxed_energy, ase_copy = get_energies( autotst_object) relaxed_torsions = [] for torsion in torsions: i, j, k, l = torsion.indices angle = round(ase_copy.get_dihedral(i, j, k, l), -1) angle = int(30 * round(float(angle) / 30)) if angle < 0: angle += 360 relaxed_torsions.append(angle) results.append([constrained_energy, relaxed_energy] + list(combo) + relaxed_torsions) brute_force = pd.DataFrame(results) columns = ["constrained_energy", "relaxed_energy"] for i in range(len(torsions)): columns = columns + ["torsion_" + str(i)] for i in range(len(torsions)): columns = columns + ["relaxed_torsion_" + str(i)] brute_force.columns = columns if store_results: f = os.path.join(store_directory, file_name) brute_force.to_csv(f) unique_conformers = get_unique_conformers(brute_force) return brute_force, unique_conformers