def enumerate_chiral_molecules(
    contnrs,
    max_variants_per_compound,
    thoroughness,
    num_procs,
    job_manager,
    parallelizer_obj,
):
    """Enumerates all possible enantiomers of a molecule. If the chirality of
       an atom is given, that chiral center is not varied. Only the chirality
       of unspecified chiral centers is varied.

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: list
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param job_manager: The multiprocess mode.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    """

    # No point in continuing none requested.
    if max_variants_per_compound == 0:
        return

    Utils.log("Enumerating all possible enantiomers for all molecules...")

    # Group the molecules so you can feed them to parallelizer.
    params = []
    for contnr in contnrs:
        for mol in contnr.mols:
            params.append(tuple([mol, thoroughness, max_variants_per_compound]))
    params = tuple(params)

    # Run it through the parallelizer.
    tmp = []
    if parallelizer_obj != None:
        tmp = parallelizer_obj.run(params, parallel_get_chiral, num_procs, job_manager)
    else:
        for i in params:
            tmp.append(parallel_get_chiral(i[0], i[1], i[2]))

    # Remove Nones (failed molecules)
    clean = Parallelizer.strip_none(tmp)

    # Flatten the data into a single list.
    flat = Parallelizer.flatten_list(clean)

    # Get the indexes of the ones that failed to generate.
    contnr_idxs_of_failed = Utils.fnd_contnrs_not_represntd(contnrs, flat)

    # Go through the missing ones and throw a message.
    for miss_indx in contnr_idxs_of_failed:
        Utils.log(
            "\tCould not generate valid enantiomers for "
            + contnrs[miss_indx].orig_smi
            + " ("
            + contnrs[miss_indx].name
            + "), so using existing "
            + "(unprocessed) structures."
        )
        for mol in contnrs[miss_indx].mols:
            mol.genealogy.append("(WARNING: Unable to generate enantiomers)")
            clean.append(mol)

    # Keep only the top few compound variants in each container, to prevent a
    # combinatorial explosion.
    ChemUtils.bst_for_each_contnr_no_opt(
        contnrs, flat, max_variants_per_compound, thoroughness
    )
Beispiel #2
0
def add_hydrogens(contnrs, min_pH, max_pH, st_dev, max_variants_per_compound,
                  thoroughness, num_procs, job_manager,
                  parallelizer_obj):
    """Adds hydrogen atoms to molecule containers, as appropriate for a given
       pH.

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: A list.
    :param min_pH: The minimum pH to consider.
    :type min_pH: float
    :param max_pH: The maximum pH to consider.
    :type max_pH: float
    :param st_dev: The standard deviation. See Dimorphite-DL paper.
    :type st_dev: float
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param job_manager: The multithred mode to use.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    """

    Utils.log("Ionizing all molecules...")

    # Make a simple directory with the ionization parameters.
    protonation_settings = {"min_ph": min_pH,
                            "max_ph": max_pH,
                            "pka_precision": st_dev,
                            "max_variants": thoroughness * max_variants_per_compound}

    # Format the inputs for use in the parallelizer.
    inputs = tuple([tuple([cont, protonation_settings]) for cont in contnrs if type(cont.orig_smi_canonical)==str])

    # Run the parallelizer and collect the results.
    results = []
    if parallelizer_obj !=  None:
        results = parallelizer_obj.run(inputs, parallel_add_H, num_procs, job_manager)
    else:
        for i in inputs:
            results.append(parallel_add_H(i[0],i[1]))

    results = Parallelizer.flatten_list(results)

    # Dimorphite-DL might not have generated ionization states for some
    # molecules. Identify those that are missing.
    contnr_idxs_of_failed = Utils.fnd_contnrs_not_represntd(contnrs, results)

    # For those molecules, just use the original SMILES string, with hydrogen
    # atoms added using RDKit.
    for miss_indx in contnr_idxs_of_failed:
        Utils.log(
            "\tWARNING: Gypsum-DL produced no valid ionization states for " +
            contnrs[miss_indx].orig_smi + " (" +
            contnrs[miss_indx].name + "), so using the original " +
            "smiles."
        )

        amol = contnrs[miss_indx].mol_orig_frm_inp_smi
        amol.contnr_idx = miss_indx

        # Save this failure to the genealogy record.
        amol.genealogy = [
            amol.orig_smi + " (source)",
            amol.orig_smi_deslt + " (desalted)",
            "(WARNING: Gypsum-DL could not assign ionization states)"
        ]

        # Save this one to the results too, even though not processed
        # properly.
        results.append(amol)

    # Keep only the top few compound variants in each container, to prevent a
    # combinatorial explosion.
    ChemUtils.bst_for_each_contnr_no_opt(
        contnrs, results, max_variants_per_compound, thoroughness
    )