Beispiel #1
0
def perform_ga(autotst_object,
               initial_pop=None,
               top_percent=0.3,
               tolerance=0.0001,
               max_generations=500,
               store_generations=False,
               store_directory=".",
               mutation_probability=0.2,
               delta=30):
    """
    Performs a genetic algorithm to determine the lowest energy conformer of a TS or molecule. 

    :param autotst_object: am autotst_ts, autotst_rxn, or autotst_molecule that you want to perform conformer analysis on
       * the ase_object of the autotst_object must have a calculator attached to it.
    :param initial_pop: a DataFrame containing the initial population
    :param top_percent: float of the top percentage of conformers you want to select
    :param tolerance: float of one of the possible cut off points for the analysis
    :param max_generations: int of one of the possible cut off points for the analysis
    :param store_generations: do you want to store pickle files of each generation
    :param store_directory: the director where you want the pickle files stored
    :param mutation_probability: float of the chance of mutation
    :param delta: the degree change in dihedral angle between each possible dihedral angle

    :return results: a DataFrame containing the final generation
    :return unique_conformers: a dictionary with indicies of unique torsion combinations and entries of energy of those torsions
    """
    assert autotst_object, "No AutoTST object provided..."
    if initial_pop is None:
        logging.info(
            "No initial population provided, creating one using base parameters...")
        initial_pop = create_initial_population(autotst_object)

    possible_dihedrals = np.arange(0, 360, delta)
    top = select_top_population(initial_pop,
                                top_percent=top_percent
                                )

    population_size = initial_pop.shape[0]

    results = initial_pop

    if isinstance(autotst_object, autotst.molecule.AutoTST_Molecule):
        logging.info("The object given is a `AutoTST_Molecule` object")
        torsions = autotst_object.torsions
        ase_object = autotst_object.ase_molecule
        label = autotst_object.smiles

    if isinstance(autotst_object, autotst.reaction.AutoTST_Reaction):
        logging.info("The object given is a `AutoTST_Reaction` object")
        torsions = autotst_object.ts.torsions
        ase_object = autotst_object.ts.ase_ts
        label = autotst_object.label

    if isinstance(autotst_object, autotst.reaction.AutoTST_TS):
        logging.info("The object given is a `AutoTST_TS` object")
        torsions = autotst_object.torsions
        ase_object = autotst_object.ase_ts
        label = autotst_object.label

    assert ase_object.get_calculator(
    ), "To use GA, you must attach an ASE calculator to the `ase_molecule`."
    gen_number = 0
    complete = False
    unique_conformers = {}
    while complete == False:
        gen_number += 1
        logging.info("Performing GA on generation {}".format(gen_number))

        r = []
        relaxations = {}
        for individual in range(population_size):
            parent_0, parent_1 = random.sample(top.index, 2)
            dihedrals = []
            for index, torsion in enumerate(torsions):

                if random.random() < mutation_probability:
                    dihedral = np.random.choice(possible_dihedrals)
                else:
                    if 0.5 > random.random():
                        dihedral = results["torsion_" +
                                           str(index)].loc[parent_0]
                    else:
                        dihedral = results["torsion_" +
                                           str(index)].loc[parent_1]

                i, j, k, l = torsion.indices
                right_mask = torsion.right_mask

                dihedrals.append(dihedral)
                ase_object.set_dihedral(a1=i,
                                        a2=j,
                                        a3=k,
                                        a4=l,
                                        angle=float(dihedral),
                                        mask=right_mask)

            # Updating the molecule
            update_from_ase(autotst_object)

            dihed = tuple(dihedrals)

            if dihed in relaxations.keys():
                logging.info("Found previous relaxations, using it to save time...")
                constrained_energy, relaxed_energy, ase_copy = relaxations[dihed]
            else:
                constrained_energy, relaxed_energy, ase_copy = get_energies(
                    autotst_object)

                relaxations[dihed] = (constrained_energy, relaxed_energy, ase_copy)

            relaxed_torsions = []

            for torsion in torsions:

                i, j, k, l = torsion.indices

                angle = round(ase_copy.get_dihedral(i, j, k, l), -1)
                angle = int(30 * round(float(angle)/30))
                if angle < 0:
                    angle += 360
                relaxed_torsions.append(angle)

            r.append([constrained_energy, relaxed_energy] +
                     dihedrals + relaxed_torsions)

        results = pd.DataFrame(r)
        logging.info(
            "Creating the DataFrame of results for the {}th generation".format(gen_number))

        columns = ["constrained_energy", "relaxed_energy"]
        for i in range(len(torsions)):
            columns = columns + ["torsion_" + str(i)]

        for i in range(len(torsions)):
            columns = columns + ["relaxed_torsion_" + str(i)]
        results.columns = columns
        results = results.sort_values("constrained_energy")

        unique_conformers = get_unique_conformers(results, unique_conformers)

        if store_generations == True:
            # This portion stores each generation if desired
            logging.info("Saving the results DataFrame")

            generation_name = "{0}_ga_generation_{1}.csv".format(
                label, gen_number)
            f = os.path.join(store_directory, generation_name)
            results.to_csv(f)

        top = select_top_population(results, top_percent)

        stats = top.describe()

        if gen_number >= max_generations:
            complete = True
            logging.info("Max generations reached. GA complete.")
        if abs((stats["constrained_energy"]["max"] - stats["constrained_energy"]["min"]) / stats["constrained_energy"]["min"]) < tolerance:
            complete = True
            logging.info("Cutoff criteria reached. GA complete.")

    return results, unique_conformers
Beispiel #2
0
def perform_simple_es(autotst_object,
                      initial_pop=None,
                      top_percent=0.3,
                      min_rms=60,
                      max_generations=500,
                      store_generations=False,
                      store_directory=".",
                      delta=30):
    """
    Performs evolutionary strategy to determine the lowest energy conformer of a TS or molecule.

    :param autotst_object: an autotst_ts, autotst_rxn, or autotst_molecule that you want to perform conformer analysis on
       * the ase_object of the autotst_object must have a calculator attached to it.
    :param initial_pop: a DataFrame containing the initial population
    :param top_percent: float of the top percentage of conformers you want to select
    :param min_rms: float of one of the possible cut off points for the analysis
    :param max_generations: int of one of the possible cut off points for the analysis
    :param store_generations: do you want to store pickle files of each generation
    :param store_directory: the director where you want the pickle files stored
    :param delta: the degree change in dihedral angle between each possible dihedral angle

    :return results: a DataFrame containing the final generation
    :return unique_conformers: a dictionary with indicies of unique torsion combinations and entries of energy of those torsions
    """
    assert autotst_object, "No AutoTST object provided..."
    if initial_pop is None:
        logging.info(
            "No initial population provided, creating one using base parameters..."
        )
        initial_pop = create_initial_population(autotst_object, delta=delta)

    possible_dihedrals = np.arange(0, 360, delta)
    top = select_top_population(initial_pop, top_percent=top_percent)

    population_size = initial_pop.shape[0]

    results = initial_pop

    if isinstance(autotst_object, autotst.species.Species):
        logging.info("The object given is a `Molecule` object")
        torsions = autotst_object.torsions
        ase_object = autotst_object.ase_molecule
        label = autotst_object.smiles

    if isinstance(autotst_object, autotst.reaction.Reaction):
        logging.info("The object given is a `Reaction` object")
        torsions = autotst_object.ts.torsions
        ase_object = autotst_object.ts.ase_ts
        label = autotst_object.label

    if isinstance(autotst_object, autotst.reaction.TS):
        logging.info("The object given is a `TS` object")
        torsions = autotst_object.torsions
        ase_object = autotst_object.ase_ts
        label = autotst_object.label

    assert ase_object.get_calculator(
    ), "To use ES, you must attach an ASE calculator to the `ase_molecule`."
    gen_number = 0
    complete = False
    unique_conformers = {}

    terminal_torsions, non_terminal_torsions = find_terminal_torsions(
        autotst_object)

    while complete == False:

        relaxed_top = []
        for combo in top.iloc[:, 1:].values:
            for index, torsion in enumerate(non_terminal_torsions):
                i, j, k, l = torsion.indices
                right_mask = torsion.right_mask

                dihedral = combo[index]

                ase_object.set_dihedral(a1=i,
                                        a2=j,
                                        a3=k,
                                        a4=l,
                                        angle=float(dihedral),
                                        mask=right_mask)
            update_from_ase(autotst_object)

            relaxed_e, relaxed_object = partial_optimize_mol(autotst_object)

            new_dihedrals = []

            for torsion in non_terminal_torsions:
                i, j, k, l = torsion.indices
                right_mask = torsion.right_mask

                d = relaxed_object.get_dihedral(a1=i, a2=j, a3=k, a4=l)

                new_dihedrals.append(d)

            relaxed_top.append([relaxed_e] + new_dihedrals)

        columns = top.columns
        top = pd.DataFrame(relaxed_top, columns=columns)

        if store_generations:
            save_name = "{}_relaxed_top_es_generation_{}.csv".format(
                label, gen_number)
            f = os.path.join(store_directory, save_name)
            top.to_csv(f)

        gen_number += 1
        logging.info("Performing ES on generation {}".format(gen_number))

        r = []

        for individual in range(population_size):
            dihedrals = []
            for index, torsion in enumerate(non_terminal_torsions):
                i, j, k, l = torsion.indices
                right_mask = torsion.right_mask

                dihedral = random.gauss(top.mean()["torsion_" + str(index)],
                                        top.std()["torsion_" + str(index)])
                dihedrals.append(dihedral)
                ase_object.set_dihedral(a1=i,
                                        a2=j,
                                        a3=k,
                                        a4=l,
                                        angle=float(dihedral),
                                        mask=right_mask)

            # Updating the molecule
            update_from_ase(autotst_object)

            energy = get_energy(autotst_object)

            r.append([energy] + dihedrals)

        results = pd.DataFrame(r)
        logging.info(
            "Creating the DataFrame of results for the {}th generation".format(
                gen_number))

        results.columns = top.columns
        results = results.sort_values("energy")

        unique_conformers = get_unique_conformers(results, unique_conformers,
                                                  min_rms)

        if store_generations:
            # This portion stores each generation if desired
            logging.info("Saving the results DataFrame")

            generation_name = "{0}_es_generation_{1}.csv".format(
                label, gen_number)
            f = os.path.join(store_directory, generation_name)
            results.to_csv(f)

        top = select_top_population(results, top_percent)

        best = top.iloc[0, 1:]
        worst = top.iloc[-1, 1:]

        rms = ((best - worst)**2).mean()

        if gen_number >= max_generations:
            complete = True
            logging.info("Max generations reached. ES complete.")
        if rms < min_rms:
            complete = True
            logging.info("Cutoff criteria reached. ES complete.")

    return results, unique_conformers
Beispiel #3
0
def perform_brute_force(autotst_object,
                        delta=float(30),
                        store_results=True,
                        store_directory="."):
    """
    Perfoms a brute force conformer analysis of a molecule or a transition state

    :param autotst_object: am autotst_ts, autotst_rxn, or autotst_molecule that you want to perform conformer analysis on
       * the ase_object of the autotst_object must have a calculator attached to it.
    :param store_generations: do you want to store pickle files of each generation
    :param store_directory: the director where you want the pickle files stored
    :param delta: the degree change in dihedral angle between each possible dihedral angle

    :return results: a DataFrame containing the final generation
    :return unique_conformers: a dictionary with indicies of unique torsion combinations and entries of energy of those torsions
    """
    # Takes each of the molecule objects
    if isinstance(autotst_object, autotst.molecule.AutoTST_Molecule):
        ase_object = autotst_object.ase_molecule
        torsions = autotst_object.torsions
        file_name = autotst_object.smiles + "_brute_force.csv"

    elif isinstance(autotst_object, autotst.reaction.AutoTST_Reaction):
        ase_object = autotst_object.ts.ase_ts
        torsions = autotst_object.ts.torsions
        file_name = autotst_object.label + "_brute_force.csv"

    elif isinstance(autotst_object, autotst.reaction.AutoTST_TS):
        ase_object = autotst_object.ase_ts
        torsions = autotst_object.torsions
        file_name = autotst_object.label + "_brute_force.csv"

    torsion_angles = np.arange(0, 360, delta)
    torsion_combos = list(
        itertools.combinations_with_replacement(torsion_angles, len(torsions)))
    if len(torsions) != 1:
        torsion_combos = list(
            set(torsion_combos + list(
                itertools.combinations_with_replacement(
                    torsion_angles[::-1], len(torsions)))))

    results = []
    for index, combo in enumerate(torsion_combos):
        logging.info("Generating conformer {}".format(index))
        logging.info(
            "Applying the torsion combo {0} to the molecule or TS.".format(
                combo))
        geo = zip(torsions, combo)
        for torsion in geo:
            tor = torsion[0]
            angle = torsion[1]

            i, j, k, l = tor.indices
            right_mask = tor.right_mask
            ase_object.set_dihedral(a1=i,
                                    a2=j,
                                    a3=k,
                                    a4=l,
                                    angle=float(angle),
                                    mask=right_mask)

        update_from_ase(autotst_object)

        constrained_energy, relaxed_energy, ase_copy = get_energies(
            autotst_object)

        relaxed_torsions = []

        for torsion in torsions:

            i, j, k, l = torsion.indices

            angle = round(ase_copy.get_dihedral(i, j, k, l), -1)
            angle = int(30 * round(float(angle) / 30))
            if angle < 0:
                angle += 360
            relaxed_torsions.append(angle)

        results.append([constrained_energy, relaxed_energy] + list(combo) +
                       relaxed_torsions)

    brute_force = pd.DataFrame(results)
    columns = ["constrained_energy", "relaxed_energy"]
    for i in range(len(torsions)):
        columns = columns + ["torsion_" + str(i)]

    for i in range(len(torsions)):
        columns = columns + ["relaxed_torsion_" + str(i)]

    brute_force.columns = columns

    if store_results:
        f = os.path.join(store_directory, file_name)
        brute_force.to_csv(f)

    unique_conformers = get_unique_conformers(brute_force)

    return brute_force, unique_conformers