Ejemplo n.º 1
0
def enumerate_chiral_molecules(
    contnrs,
    max_variants_per_compound,
    thoroughness,
    num_procs,
    job_manager,
    parallelizer_obj,
):
    """Enumerates all possible enantiomers of a molecule. If the chirality of
       an atom is given, that chiral center is not varied. Only the chirality
       of unspecified chiral centers is varied.

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: list
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param job_manager: The multiprocess mode.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    """

    # No point in continuing none requested.
    if max_variants_per_compound == 0:
        return

    Utils.log("Enumerating all possible enantiomers for all molecules...")

    # Group the molecules so you can feed them to parallelizer.
    params = []
    for contnr in contnrs:
        for mol in contnr.mols:
            params.append(tuple([mol, thoroughness, max_variants_per_compound]))
    params = tuple(params)

    # Run it through the parallelizer.
    tmp = []
    if parallelizer_obj != None:
        tmp = parallelizer_obj.run(params, parallel_get_chiral, num_procs, job_manager)
    else:
        for i in params:
            tmp.append(parallel_get_chiral(i[0], i[1], i[2]))

    # Remove Nones (failed molecules)
    clean = Parallelizer.strip_none(tmp)

    # Flatten the data into a single list.
    flat = Parallelizer.flatten_list(clean)

    # Get the indexes of the ones that failed to generate.
    contnr_idxs_of_failed = Utils.fnd_contnrs_not_represntd(contnrs, flat)

    # Go through the missing ones and throw a message.
    for miss_indx in contnr_idxs_of_failed:
        Utils.log(
            "\tCould not generate valid enantiomers for "
            + contnrs[miss_indx].orig_smi
            + " ("
            + contnrs[miss_indx].name
            + "), so using existing "
            + "(unprocessed) structures."
        )
        for mol in contnrs[miss_indx].mols:
            mol.genealogy.append("(WARNING: Unable to generate enantiomers)")
            clean.append(mol)

    # Keep only the top few compound variants in each container, to prevent a
    # combinatorial explosion.
    ChemUtils.bst_for_each_contnr_no_opt(
        contnrs, flat, max_variants_per_compound, thoroughness
    )
Ejemplo n.º 2
0
def generate_alternate_3d_nonaromatic_ring_confs(contnrs,
                                                 max_variants_per_compound,
                                                 thoroughness, num_procs,
                                                 second_embed, job_manager,
                                                 parallelizer_obj):
    """Docking programs like Vina rotate chemical moieties around their
       rotatable bonds, so it's not necessary to generate a larger rotomer
       library for each molecule. The one exception to this rule is
       non-aromatic rings, which can assume multiple conformations (boat vs.
       chair, etc.). This function generates a few low-energy ring structures
       for each molecule with a non-aromatic ring(s).

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: list
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param second_embed: Whether to try to generate 3D coordinates using an
        older algorithm if the better (default) algorithm fails. This can add
        run time, but sometimes converts certain molecules that would
        otherwise fail.
    :type second_embed: bool
    :param job_manager: The multiprocess mode.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    :return: Returns None if no ring conformers are generated
    :rtype: None
    """

    # Let the user know you've started this step.
    Utils.log("Generating several conformers of molecules with non-aromatic " +
              "rings (boat vs. chair, etc.)...")

    # Create parameters (inputs) to feed to the parallelizer.
    params = []
    ones_with_nonaro_rngs = set([])  # This is just to keep track of which
    # ones have non-aromatic rings.
    for contnr_idx, contnr in enumerate(contnrs):
        if contnr.num_nonaro_rngs > 0:
            ones_with_nonaro_rngs.add(contnr_idx)
            for mol in contnr.mols:
                params.append(
                    tuple([
                        mol, max_variants_per_compound, thoroughness,
                        second_embed
                    ]))
    params = tuple(params)

    # If there are no compounds with non-aromatic rings, no need to continue.
    if len(ones_with_nonaro_rngs) == 0:
        return  # There are no such ligands to process.

    # Run it through the parallelizer
    tmp = []
    if parallelizer_obj != None:
        tmp = parallelizer_obj.run(params, parallel_get_ring_confs, num_procs,
                                   job_manager)
    else:
        for i in params:
            tmp.append(parallel_get_ring_confs(i[0], i[1], i[2], i[3]))

    # Flatten the results.
    results = Parallelizer.flatten_list(tmp)

    # Group by mol. You can't use existing functions because they would
    # require you to recalculate already calculated energies.
    grouped = {}  # Index will be container index. Value is list of
    # (energy, mol) pairs.
    for mol in results:
        # Save the energy as a prop while you're here.
        energy = mol.conformers[0].energy
        mol.mol_props["Energy"] = energy

        # Add the mol with it's energy to the appropriate entry in grouped.
        # Make that entry if needed.
        contnr_idx = mol.contnr_idx
        if not contnr_idx in grouped:
            grouped[contnr_idx] = []
        grouped[contnr_idx].append((energy, mol))

    # Now, for each container, keep only the best ones.
    for contnr_idx in grouped:
        lst_enrgy_mol_pairs = grouped[contnr_idx]

        if len(lst_enrgy_mol_pairs) != 0:
            contnrs[contnr_idx].mols = []  # Note that only affects ones that
            # had non-aromatic rings.
            lst_enrgy_mol_pairs.sort()  # Sorting by energy (first item in
            # pair).

            # Keep only the top ones.
            lst_enrgy_mol_pairs = lst_enrgy_mol_pairs[:
                                                      max_variants_per_compound]

            # Add the top ones to the container mol list.
            for energy, mol in lst_enrgy_mol_pairs:
                contnrs[contnr_idx].add_mol(mol)
        else:
            # There are no entries in the list. It apparently wasn't able to
            # generate any alternate conformers. Let the user know.
            for i in range(len(contnrs[contnr_idx].mols)):
                contnrs[contnr_idx].mols[i].genealogy.append(
                    "(WARNING: Could not generate alternate conformations " +
                    "of nonaromatic ring)")
Ejemplo n.º 3
0
def make_tauts(contnrs, max_variants_per_compound, thoroughness, num_procs,
               job_manager, let_tautomers_change_chirality, parallelizer_obj):
    """Generates tautomers of the molecules. Note that some of the generated
    tautomers are not realistic. If you find a certain improbable
    substructure keeps popping up, add it to the list in the
    `prohibited_substructures` definition found with MyMol.py, in the function
    remove_bizarre_substruc().

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: A list.
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param let_tautomers_change_chirality: Whether to allow tautomers that
      change the total number of chiral centers.
    :type let_tautomers_change_chirality: bool
    :param job_manager: The multithred mode to use.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    """

    # No need to proceed if there are no max variants.
    if max_variants_per_compound == 0:
        return

    Utils.log("Generating tautomers for all molecules...")

    # Create the parameters to feed into the parallelizer object.
    params = []
    for contnr in contnrs:
        for mol_index, mol in enumerate(contnr.mols):
            params.append(tuple([contnr, mol_index,
                                 max_variants_per_compound]))
    params = tuple(params)

    # Run the tautomizer through the parallel object.
    tmp = []
    if parallelizer_obj != None:
        tmp = parallelizer_obj.run(params, parallel_make_taut, num_procs,
                                   job_manager)
    else:
        for i in params:
            tmp.append(parallel_make_taut(i[0], i[1], i[2]))

    # Flatten the resulting list of lists.
    none_data = tmp
    taut_data = Parallelizer.flatten_list(none_data)

    # Remove bad tautomers.
    taut_data = tauts_no_break_arom_rngs(contnrs, taut_data, num_procs,
                                         job_manager, parallelizer_obj)

    if not let_tautomers_change_chirality:
        taut_data = tauts_no_elim_chiral(contnrs, taut_data, num_procs,
                                         job_manager, parallelizer_obj)

    # taut_data = tauts_no_change_hs_to_cs_unless_alpha_to_carbnyl(
    #    contnrs, taut_data, num_procs, job_manager, parallelizer_obj
    # )

    # Keep only the top few compound variants in each container, to prevent a
    # combinatorial explosion.
    ChemUtils.bst_for_each_contnr_no_opt(contnrs, taut_data,
                                         max_variants_per_compound,
                                         thoroughness)
Ejemplo n.º 4
0
def add_hydrogens(contnrs, min_pH, max_pH, st_dev, max_variants_per_compound,
                  thoroughness, num_procs, job_manager,
                  parallelizer_obj):
    """Adds hydrogen atoms to molecule containers, as appropriate for a given
       pH.

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: A list.
    :param min_pH: The minimum pH to consider.
    :type min_pH: float
    :param max_pH: The maximum pH to consider.
    :type max_pH: float
    :param st_dev: The standard deviation. See Dimorphite-DL paper.
    :type st_dev: float
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param job_manager: The multithred mode to use.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    """

    Utils.log("Ionizing all molecules...")

    # Make a simple directory with the ionization parameters.
    protonation_settings = {"min_ph": min_pH,
                            "max_ph": max_pH,
                            "pka_precision": st_dev,
                            "max_variants": thoroughness * max_variants_per_compound}

    # Format the inputs for use in the parallelizer.
    inputs = tuple([tuple([cont, protonation_settings]) for cont in contnrs if type(cont.orig_smi_canonical)==str])

    # Run the parallelizer and collect the results.
    results = []
    if parallelizer_obj !=  None:
        results = parallelizer_obj.run(inputs, parallel_add_H, num_procs, job_manager)
    else:
        for i in inputs:
            results.append(parallel_add_H(i[0],i[1]))

    results = Parallelizer.flatten_list(results)

    # Dimorphite-DL might not have generated ionization states for some
    # molecules. Identify those that are missing.
    contnr_idxs_of_failed = Utils.fnd_contnrs_not_represntd(contnrs, results)

    # For those molecules, just use the original SMILES string, with hydrogen
    # atoms added using RDKit.
    for miss_indx in contnr_idxs_of_failed:
        Utils.log(
            "\tWARNING: Gypsum-DL produced no valid ionization states for " +
            contnrs[miss_indx].orig_smi + " (" +
            contnrs[miss_indx].name + "), so using the original " +
            "smiles."
        )

        amol = contnrs[miss_indx].mol_orig_frm_inp_smi
        amol.contnr_idx = miss_indx

        # Save this failure to the genealogy record.
        amol.genealogy = [
            amol.orig_smi + " (source)",
            amol.orig_smi_deslt + " (desalted)",
            "(WARNING: Gypsum-DL could not assign ionization states)"
        ]

        # Save this one to the results too, even though not processed
        # properly.
        results.append(amol)

    # Keep only the top few compound variants in each container, to prevent a
    # combinatorial explosion.
    ChemUtils.bst_for_each_contnr_no_opt(
        contnrs, results, max_variants_per_compound, thoroughness
    )