def parallel_durrant_lab_filter(contnr, prohibited_substructs): """A parallelizable helper function that checks that tautomers do not break any nonaromatic rings present in the original object. :param contnr: The molecule container. :type contnr: MolContainer.MolContainer :param prohibited_substructs: A list of the prohibited substructures. :type prohibited_substructs: list :return: Either the container with bad molecules removed, or a None object. :rtype: MolContainer.MolContainer | None """ # Replace any molecules that have prohibited substructure with None. for mi, m in enumerate(contnr.mols): for pattrn in prohibited_substructs: if durrant_lab_contains_bad_substr( m.orig_smi_deslt) or m.rdkit_mol.HasSubstructMatch(pattrn): Utils.log("\t" + m.smiles(True) + ", a variant generated " + "from " + contnr.orig_smi + " (" + m.name + "), contains a prohibited substructure, so I'm " + "discarding it.") contnr.mols[mi] = None continue # Now go back and remove those Nones contnr.mols = Parallelizer.strip_none(contnr.mols) # If there are no molecules, mark this container for deletion. if len(contnr.mols) == 0: return None # Return the container return contnr
def desalt_orig_smi(contnrs, num_procs, job_manager, parallelizer_obj): """If an input molecule has multiple unconnected fragments, this removes all but the largest fragment. :param contnrs: A list of containers (MolContainer.MolContainer). :type contnrs: list :param num_procs: The number of processors to use. :type num_procs: int :param job_manager: The multiprocess mode. :type job_manager: string :param parallelizer_obj: The Parallelizer object. :type parallelizer_obj: Parallelizer.Parallelizer """ Utils.log("Desalting all molecules (i.e., keeping only largest fragment).") # Desalt each of the molecule containers. This step is very fast, so let's # just run it on a single processor always. tmp = [desalter(x) for x in contnrs] # Go through each contnr and update the orig_smi_deslt. If we update it, # also add a note in the genealogy record. tmp = Parallelizer.strip_none(tmp) for idx in range(0, len(tmp)): desalt_mol = tmp[idx] # idx = desalt_mol.contnr_idx cont = contnrs[idx] if contnrs[idx].orig_smi != desalt_mol.orig_smi: desalt_mol.genealogy.append(desalt_mol.orig_smi_deslt + " (desalted)") cont.update_orig_smi(desalt_mol.orig_smi_deslt) cont.add_mol(desalt_mol)
def durrant_lab_filters(contnrs, num_procs, job_manager, parallelizer_obj): """Removes any molecules that contain prohibited substructures, per the durrant-lab filters. :param contnrs: A list of containers (MolContainer.MolContainer). :type contnrs: A list. :param num_procs: The number of processors to use. :type num_procs: int :param job_manager: The multithred mode to use. :type job_manager: string :param parallelizer_obj: The Parallelizer object. :type parallelizer_obj: Parallelizer.Parallelizer """ Utils.log("Applying Durrant-lab filters to all molecules...") # Get the substructures you won't permit. prohibited_smi_substrs = [ "C=[N-]", "[N-]C=[N+]", "[nH+]c[n-]", "[#7+]~[#7+]", "[#7-]~[#7-]", "[!#7]~[#7+]~[#7-]~[!#7]" # Doesn't hit azide. ] prohibited_substructs = [ Chem.MolFromSmarts(s) for s in prohibited_smi_substrs ] # Get the parameters to pass to the parallelizer object. params = [[c, prohibited_substructs] for c in contnrs] # Run the tautomizer through the parallel object. tmp = [] if parallelizer_obj != None: tmp = parallelizer_obj.run(params, parallel_durrant_lab_filter, num_procs, job_manager) else: for c in params: tmp.append(parallel_durrant_lab_filter(c, prohibited_substructs)) # Note that results is a list of containers. # Stripping out None values (failed). results = Parallelizer.strip_none(tmp) # You need to get the molecules as a flat array so you can run it through # bst_for_each_contnr_no_opt mols = [] for contnr in results: mols.extend(contnr.mols) # contnr.mols = [] # Necessary because ones are being removed... # Using this function just to make the changes. ChemUtils.bst_for_each_contnr_no_opt( contnrs, mols, 1000, 1000 # max_variants_per_compound, thoroughness )
def convert_2d_to_3d( contnrs, max_variants_per_compound, thoroughness, num_procs, job_manager, parallelizer_obj, ): """Converts the 1D smiles strings into 3D small-molecule models. :param contnrs: A list of containers (MolContainer.MolContainer). :type contnrs: list :param max_variants_per_compound: To control the combinatorial explosion, only this number of variants (molecules) will be advanced to the next step. :type max_variants_per_compound: int :param thoroughness: How many molecules to generate per variant (molecule) retained, for evaluation. For example, perhaps you want to advance five molecules (max_variants_per_compound = 5). You could just generate five and advance them all. Or you could generate ten and advance the best five (so thoroughness = 2). Using thoroughness > 1 increases the computational expense, but it also increases the chances of finding good molecules. :type thoroughness: int :param num_procs: The number of processors to use. :type num_procs: int :param job_manager: The multithred mode to use. :type job_manager: string :param parallelizer_obj: The Parallelizer object. :type parallelizer_obj: Parallelizer.Parallelizer """ Utils.log("Converting all molecules to 3D structures.") # Make the inputs to pass to the parallelizer. params = [] for contnr in contnrs: for mol in contnr.mols: params.append(tuple([mol])) params = tuple(params) # Run the parallelizer tmp = [] if parallelizer_obj != None: tmp = parallelizer_obj.run(params, parallel_make_3d, num_procs, job_manager) else: for i in params: tmp.append(parallel_make_3d(i[0])) # Remove and Nones from the output, which represent failed molecules. clear = Parallelizer.strip_none(tmp) # Keep only the top few compound variants in each container, to prevent a # combinatorial explosion. ChemUtils.bst_for_each_contnr_no_opt(contnrs, clear, max_variants_per_compound, thoroughness, False)
def durrant_lab_filters(contnrs, num_procs, job_manager, parallelizer_obj): """Removes any molecules that contain prohibited substructures, per the durrant-lab filters. :param contnrs: A list of containers (MolContainer.MolContainer). :type contnrs: A list. :param num_procs: The number of processors to use. :type num_procs: int :param job_manager: The multithred mode to use. :type job_manager: string :param parallelizer_obj: The Parallelizer object. :type parallelizer_obj: Parallelizer.Parallelizer """ Utils.log("Applying Durrant-lab filters to all molecules...") prohibited_substructs = [ Chem.MolFromSmarts(s) for s in prohibited_smi_substrs_for_substruc ] # Get the parameters to pass to the parallelizer object. params = [[c, prohibited_substructs] for c in contnrs] # Run the tautomizer through the parallel object. tmp = [] if parallelizer_obj != None: tmp = parallelizer_obj.run(params, parallel_durrant_lab_filter, num_procs, job_manager) else: for c in params: tmp.append(parallel_durrant_lab_filter(c, prohibited_substructs)) # Note that results is a list of containers. # Stripping out None values (failed). results = Parallelizer.strip_none(tmp) # You need to get the molecules as a flat array so you can run it through # bst_for_each_contnr_no_opt mols = [] for contnr in results: mols.extend(contnr.mols) # contnr.mols = [] # Necessary because ones are being removed... # contnrs = results # print([c.orig_smi for c in results]) # import pdb; pdb.set_trace() # Using this function just to make the changes. Doesn't do energy # minimization or anything (as it does later) because max variants # and thoroughness maxed out. ChemUtils.bst_for_each_contnr_no_opt( contnrs, mols, 1000, 1000 # max_variants_per_compound, thoroughness )
def tauts_no_break_arom_rngs(contnrs, taut_data, num_procs, job_manager, parallelizer_obj): """For a given molecule, the number of atomatic rings should never change regardless of tautization, ionization, etc. Any taut that breaks aromaticity is unlikely to be worth pursuing. So remove it. :param contnrs: A list of containers (MolContainer.MolContainer). :type contnrs: A list. :param taut_data: A list of MyMol.MyMol objects. :type taut_data: list :param num_procs: The number of processors to use. :type num_procs: int :param job_manager: The multithred mode to use. :type job_manager: string :param parallelizer_obj: The Parallelizer object. :type parallelizer_obj: Parallelizer.Parallelizer :return: A list of MyMol.MyMol objects, with certain bad ones removed. :rtype: list """ # You need to group the taut_data by container to pass it to the # paralleizer. params = [] for taut_mol in taut_data: for contnr in contnrs: if contnr.contnr_idx == taut_mol.contnr_idx: container = contnr params.append(tuple([taut_mol, container])) params = tuple(params) # Run it through the parallelizer to remove non-aromatic rings. tmp = [] if parallelizer_obj != None: tmp = parallelizer_obj.run(params, parallel_check_nonarom_rings, num_procs, job_manager) else: for i in params: tmp.append(parallel_check_nonarom_rings(i[0], i[1])) # Stripping out None values (failed). results = Parallelizer.strip_none(tmp) return results
def enumerate_chiral_molecules( contnrs, max_variants_per_compound, thoroughness, num_procs, job_manager, parallelizer_obj, ): """Enumerates all possible enantiomers of a molecule. If the chirality of an atom is given, that chiral center is not varied. Only the chirality of unspecified chiral centers is varied. :param contnrs: A list of containers (MolContainer.MolContainer). :type contnrs: list :param max_variants_per_compound: To control the combinatorial explosion, only this number of variants (molecules) will be advanced to the next step. :type max_variants_per_compound: int :param thoroughness: How many molecules to generate per variant (molecule) retained, for evaluation. For example, perhaps you want to advance five molecules (max_variants_per_compound = 5). You could just generate five and advance them all. Or you could generate ten and advance the best five (so thoroughness = 2). Using thoroughness > 1 increases the computational expense, but it also increases the chances of finding good molecules. :type thoroughness: int :param num_procs: The number of processors to use. :type num_procs: int :param job_manager: The multiprocess mode. :type job_manager: string :param parallelizer_obj: The Parallelizer object. :type parallelizer_obj: Parallelizer.Parallelizer """ # No point in continuing none requested. if max_variants_per_compound == 0: return Utils.log("Enumerating all possible enantiomers for all molecules...") # Group the molecules so you can feed them to parallelizer. params = [] for contnr in contnrs: for mol in contnr.mols: params.append(tuple([mol, thoroughness, max_variants_per_compound])) params = tuple(params) # Run it through the parallelizer. tmp = [] if parallelizer_obj != None: tmp = parallelizer_obj.run(params, parallel_get_chiral, num_procs, job_manager) else: for i in params: tmp.append(parallel_get_chiral(i[0], i[1], i[2])) # Remove Nones (failed molecules) clean = Parallelizer.strip_none(tmp) # Flatten the data into a single list. flat = Parallelizer.flatten_list(clean) # Get the indexes of the ones that failed to generate. contnr_idxs_of_failed = Utils.fnd_contnrs_not_represntd(contnrs, flat) # Go through the missing ones and throw a message. for miss_indx in contnr_idxs_of_failed: Utils.log( "\tCould not generate valid enantiomers for " + contnrs[miss_indx].orig_smi + " (" + contnrs[miss_indx].name + "), so using existing " + "(unprocessed) structures." ) for mol in contnrs[miss_indx].mols: mol.genealogy.append("(WARNING: Unable to generate enantiomers)") clean.append(mol) # Keep only the top few compound variants in each container, to prevent a # combinatorial explosion. ChemUtils.bst_for_each_contnr_no_opt( contnrs, flat, max_variants_per_compound, thoroughness )