Beispiel #1
0
def parallel_durrant_lab_filter(contnr, prohibited_substructs):
    """A parallelizable helper function that checks that tautomers do not
       break any nonaromatic rings present in the original object.

    :param contnr: The molecule container.
    :type contnr: MolContainer.MolContainer
    :param prohibited_substructs: A list of the prohibited substructures.
    :type prohibited_substructs: list
    :return: Either the container with bad molecules removed, or a None
      object.
    :rtype: MolContainer.MolContainer | None
    """

    # Replace any molecules that have prohibited substructure with None.
    for mi, m in enumerate(contnr.mols):
        for pattrn in prohibited_substructs:
            if durrant_lab_contains_bad_substr(
                    m.orig_smi_deslt) or m.rdkit_mol.HasSubstructMatch(pattrn):
                Utils.log("\t" + m.smiles(True) + ", a variant generated " +
                          "from " + contnr.orig_smi + " (" + m.name +
                          "), contains a prohibited substructure, so I'm " +
                          "discarding it.")

                contnr.mols[mi] = None
                continue

    # Now go back and remove those Nones
    contnr.mols = Parallelizer.strip_none(contnr.mols)

    # If there are no molecules, mark this container for deletion.
    if len(contnr.mols) == 0:
        return None

    # Return the container
    return contnr
def desalt_orig_smi(contnrs, num_procs, job_manager, parallelizer_obj):
    """If an input molecule has multiple unconnected fragments, this removes
       all but the largest fragment.

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: list
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param job_manager: The multiprocess mode.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    """

    Utils.log("Desalting all molecules (i.e., keeping only largest fragment).")

    # Desalt each of the molecule containers. This step is very fast, so let's
    # just run it on a single processor always.
    tmp = [desalter(x) for x in contnrs]

    # Go through each contnr and update the orig_smi_deslt. If we update it,
    # also add a note in the genealogy record.
    tmp = Parallelizer.strip_none(tmp)
    for idx in range(0, len(tmp)):
        desalt_mol = tmp[idx]
        # idx = desalt_mol.contnr_idx
        cont = contnrs[idx]

        if contnrs[idx].orig_smi != desalt_mol.orig_smi:
            desalt_mol.genealogy.append(desalt_mol.orig_smi_deslt +
                                        " (desalted)")
            cont.update_orig_smi(desalt_mol.orig_smi_deslt)
        cont.add_mol(desalt_mol)
Beispiel #3
0
def durrant_lab_filters(contnrs, num_procs, job_manager, parallelizer_obj):
    """Removes any molecules that contain prohibited substructures, per the
    durrant-lab filters.

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: A list.
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param job_manager: The multithred mode to use.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    """

    Utils.log("Applying Durrant-lab filters to all molecules...")

    # Get the substructures you won't permit.
    prohibited_smi_substrs = [
        "C=[N-]",
        "[N-]C=[N+]",
        "[nH+]c[n-]",
        "[#7+]~[#7+]",
        "[#7-]~[#7-]",
        "[!#7]~[#7+]~[#7-]~[!#7]"  # Doesn't hit azide.
    ]
    prohibited_substructs = [
        Chem.MolFromSmarts(s) for s in prohibited_smi_substrs
    ]

    # Get the parameters to pass to the parallelizer object.
    params = [[c, prohibited_substructs] for c in contnrs]

    # Run the tautomizer through the parallel object.
    tmp = []
    if parallelizer_obj != None:
        tmp = parallelizer_obj.run(params, parallel_durrant_lab_filter,
                                   num_procs, job_manager)
    else:
        for c in params:
            tmp.append(parallel_durrant_lab_filter(c, prohibited_substructs))

    # Note that results is a list of containers.

    # Stripping out None values (failed).
    results = Parallelizer.strip_none(tmp)

    # You need to get the molecules as a flat array so you can run it through
    # bst_for_each_contnr_no_opt
    mols = []
    for contnr in results:
        mols.extend(contnr.mols)
        # contnr.mols = []  # Necessary because ones are being removed...

    # Using this function just to make the changes.
    ChemUtils.bst_for_each_contnr_no_opt(
        contnrs,
        mols,
        1000,
        1000  # max_variants_per_compound, thoroughness
    )
Beispiel #4
0
def convert_2d_to_3d(
    contnrs,
    max_variants_per_compound,
    thoroughness,
    num_procs,
    job_manager,
    parallelizer_obj,
):
    """Converts the 1D smiles strings into 3D small-molecule models.

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: list
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param job_manager: The multithred mode to use.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    """

    Utils.log("Converting all molecules to 3D structures.")

    # Make the inputs to pass to the parallelizer.
    params = []
    for contnr in contnrs:
        for mol in contnr.mols:
            params.append(tuple([mol]))
    params = tuple(params)

    # Run the parallelizer
    tmp = []
    if parallelizer_obj != None:
        tmp = parallelizer_obj.run(params, parallel_make_3d, num_procs,
                                   job_manager)
    else:
        for i in params:
            tmp.append(parallel_make_3d(i[0]))

    # Remove and Nones from the output, which represent failed molecules.
    clear = Parallelizer.strip_none(tmp)

    # Keep only the top few compound variants in each container, to prevent a
    # combinatorial explosion.
    ChemUtils.bst_for_each_contnr_no_opt(contnrs, clear,
                                         max_variants_per_compound,
                                         thoroughness, False)
Beispiel #5
0
def durrant_lab_filters(contnrs, num_procs, job_manager, parallelizer_obj):
    """Removes any molecules that contain prohibited substructures, per the
    durrant-lab filters.

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: A list.
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param job_manager: The multithred mode to use.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    """

    Utils.log("Applying Durrant-lab filters to all molecules...")

    prohibited_substructs = [
        Chem.MolFromSmarts(s) for s in prohibited_smi_substrs_for_substruc
    ]

    # Get the parameters to pass to the parallelizer object.
    params = [[c, prohibited_substructs] for c in contnrs]

    # Run the tautomizer through the parallel object.
    tmp = []
    if parallelizer_obj != None:
        tmp = parallelizer_obj.run(params, parallel_durrant_lab_filter,
                                   num_procs, job_manager)
    else:
        for c in params:
            tmp.append(parallel_durrant_lab_filter(c, prohibited_substructs))

    # Note that results is a list of containers.

    # Stripping out None values (failed).
    results = Parallelizer.strip_none(tmp)

    # You need to get the molecules as a flat array so you can run it through
    # bst_for_each_contnr_no_opt
    mols = []
    for contnr in results:
        mols.extend(contnr.mols)
        # contnr.mols = []  # Necessary because ones are being removed...

    # contnrs = results

    # print([c.orig_smi for c in results])
    # import pdb; pdb.set_trace()

    # Using this function just to make the changes. Doesn't do energy
    # minimization or anything (as it does later) because max variants
    # and thoroughness maxed out.
    ChemUtils.bst_for_each_contnr_no_opt(
        contnrs,
        mols,
        1000,
        1000  # max_variants_per_compound, thoroughness
    )
Beispiel #6
0
def tauts_no_break_arom_rngs(contnrs, taut_data, num_procs, job_manager,
                             parallelizer_obj):
    """For a given molecule, the number of atomatic rings should never change
       regardless of tautization, ionization, etc. Any taut that breaks
       aromaticity is unlikely to be worth pursuing. So remove it.

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: A list.
    :param taut_data: A list of MyMol.MyMol objects.
    :type taut_data: list
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param job_manager: The multithred mode to use.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    :return: A list of MyMol.MyMol objects, with certain bad ones removed.
    :rtype: list
    """

    # You need to group the taut_data by container to pass it to the
    # paralleizer.
    params = []
    for taut_mol in taut_data:
        for contnr in contnrs:
            if contnr.contnr_idx == taut_mol.contnr_idx:
                container = contnr

        params.append(tuple([taut_mol, container]))
    params = tuple(params)

    # Run it through the parallelizer to remove non-aromatic rings.

    tmp = []
    if parallelizer_obj != None:
        tmp = parallelizer_obj.run(params, parallel_check_nonarom_rings,
                                   num_procs, job_manager)
    else:
        for i in params:
            tmp.append(parallel_check_nonarom_rings(i[0], i[1]))

    # Stripping out None values (failed).
    results = Parallelizer.strip_none(tmp)

    return results
def enumerate_chiral_molecules(
    contnrs,
    max_variants_per_compound,
    thoroughness,
    num_procs,
    job_manager,
    parallelizer_obj,
):
    """Enumerates all possible enantiomers of a molecule. If the chirality of
       an atom is given, that chiral center is not varied. Only the chirality
       of unspecified chiral centers is varied.

    :param contnrs: A list of containers (MolContainer.MolContainer).
    :type contnrs: list
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param num_procs: The number of processors to use.
    :type num_procs: int
    :param job_manager: The multiprocess mode.
    :type job_manager: string
    :param parallelizer_obj: The Parallelizer object.
    :type parallelizer_obj: Parallelizer.Parallelizer
    """

    # No point in continuing none requested.
    if max_variants_per_compound == 0:
        return

    Utils.log("Enumerating all possible enantiomers for all molecules...")

    # Group the molecules so you can feed them to parallelizer.
    params = []
    for contnr in contnrs:
        for mol in contnr.mols:
            params.append(tuple([mol, thoroughness, max_variants_per_compound]))
    params = tuple(params)

    # Run it through the parallelizer.
    tmp = []
    if parallelizer_obj != None:
        tmp = parallelizer_obj.run(params, parallel_get_chiral, num_procs, job_manager)
    else:
        for i in params:
            tmp.append(parallel_get_chiral(i[0], i[1], i[2]))

    # Remove Nones (failed molecules)
    clean = Parallelizer.strip_none(tmp)

    # Flatten the data into a single list.
    flat = Parallelizer.flatten_list(clean)

    # Get the indexes of the ones that failed to generate.
    contnr_idxs_of_failed = Utils.fnd_contnrs_not_represntd(contnrs, flat)

    # Go through the missing ones and throw a message.
    for miss_indx in contnr_idxs_of_failed:
        Utils.log(
            "\tCould not generate valid enantiomers for "
            + contnrs[miss_indx].orig_smi
            + " ("
            + contnrs[miss_indx].name
            + "), so using existing "
            + "(unprocessed) structures."
        )
        for mol in contnrs[miss_indx].mols:
            mol.genealogy.append("(WARNING: Unable to generate enantiomers)")
            clean.append(mol)

    # Keep only the top few compound variants in each container, to prevent a
    # combinatorial explosion.
    ChemUtils.bst_for_each_contnr_no_opt(
        contnrs, flat, max_variants_per_compound, thoroughness
    )