Esempio n. 1
0
def pick_lowest_enrgy_mols(mol_lst, num, thoroughness):
    """Pick molecules with low energies. If necessary, the definition also
       makes a conformer without minimization (so not too computationally
       expensive).

    :param mol_lst: The list of MyMol.MyMol objects.
    :type mol_lst: list
    :param num: The number of the lowest-energy ones to keep.
    :type num: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :return: Returns a list of MyMol.MyMol, the best ones.
    :rtype: list
    """

    # Remove identical entries.
    mol_lst = list(set(mol_lst))

    # If the length of the mol_lst is less than num, just return them all.
    if len(mol_lst) <= num:
        return mol_lst

    # First, generate 3D structures. How many? num * thoroughness. mols_3d is
    # a list of Gypsum-DL MyMol.MyMol objects.
    mols_3d = Utils.random_sample(mol_lst, num * thoroughness, "")

    # Now get the energies
    data = []
    for i, mol in enumerate(mols_3d):
        mol.make_first_3d_conf_no_min(
        )  # Make sure at least one conformer exists.

        if len(mol.conformers) > 0:
            energy = mol.conformers[0].energy
            data.append((energy, i))

    data.sort()

    # Now keep only best top few.
    data = data[:num]

    # Keep just the mols there.
    new_mols_list = [mol_lst[d[1]] for d in data]

    # Return those molecules.
    return new_mols_list
def parallel_get_chiral(mol, max_variants_per_compound, thoroughness):
    """A parallelizable function for enumerating chiralities.

    :param mol: The input molecule.
    :type mol: MyMol.MyMol
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :return: A list of MyMol.MyMol objects.
    :rtype: list
    """

    # Get all chiral centers that aren't assigned explicitly in the input
    # molecules.
    unasignd = [p[0] for p in mol.chiral_cntrs_w_unasignd() if p[1] == "?"]
    num = len(unasignd)

    # Get all possible chiral assignments. If the chirality is specified,
    # retain it.
    results = []
    if num == 0:
        # There are no unspecified chiral centers, so just keep existing.
        results.append(mol)
        return results
    elif num == 1:
        # There's only one chiral center.
        options = ["R", "S"]
    else:
        # There are multiple chiral centers.
        starting = [["R"], ["S"]]
        options = [["R"], ["S"]]
        for i in range(num - 1):
            if len(options) > thoroughness * max_variants_per_compound:
                # Unfortunately, this section lends itself to a combinatorial
                # explosion if there are many chiral centers. Necessary to
                # control that or it can become problematic. So truncate early
                # if you already have a enough (so some will unfortunately
                # never be evaluated).
                break
            options = list(itertools.product(options, starting))
            options = [list(itertools.chain(c[0], c[1])) for c in options]

    # Let the user know the number of chiral centers.
    Utils.log(
        "\t"
        + mol.smiles(True)
        + " ("
        + mol.name
        + ") has "
        # + str(len(options))
        + str(2 ** num)
        + " enantiomers when chiral centers with "
        + "no specified chirality are systematically varied."
    )

    # Randomly select a few of the chiral combinations to examine. This is to
    # reduce the potential combinatorial explosion.
    num_to_keep_initially = thoroughness * max_variants_per_compound
    options = Utils.random_sample(options, num_to_keep_initially, "")

    # Go through the chirality combinations and make a molecule with that
    # chirality.
    for option in options:
        # Copy the initial rdkit molecule.
        a_rd_mol = copy.copy(mol.rdkit_mol)

        # Set its chirality.
        for idx, chiral in zip(unasignd, option):
            if chiral == "R":
                a_rd_mol.GetAtomWithIdx(idx).SetChiralTag(
                    Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW
                )
            elif chiral == "S":
                a_rd_mol.GetAtomWithIdx(idx).SetChiralTag(
                    Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW
                )

        # Make a new MyMol.MyMol object from that rdkit molecule.
        new_mol = MyMol.MyMol(a_rd_mol)

        # Add the new molecule to the list of results, if it does not have a
        # bizarre substructure.
        if not new_mol.remove_bizarre_substruc():
            new_mol.contnr_idx = mol.contnr_idx
            new_mol.name = mol.name
            new_mol.genealogy = mol.genealogy[:]
            new_mol.genealogy.append(new_mol.smiles(True) + " (chirality)")
            results.append(new_mol)

    # Return the results.
    return results