def parallel_get_ring_confs(mol, max_variants_per_compound, thoroughness, second_embed): """Gets alternate ring conformations. Meant to run with the parallelizer class. :param mol: The molecule to process (with non-aromatic ring(s)). :type mol: MyMol.MyMol :param max_variants_per_compound: To control the combinatorial explosion, only this number of variants (molecules) will be advanced to the next step. :type max_variants_per_compound: int :param thoroughness: How many molecules to generate per variant (molecule) retained, for evaluation. For example, perhaps you want to advance five molecules (max_variants_per_compound = 5). You could just generate five and advance them all. Or you could generate ten and advance the best five (so thoroughness = 2). Using thoroughness > 1 increases the computational expense, but it also increases the chances of finding good molecules. :type thoroughness: int :param second_embed: Whether to try to generate 3D coordinates using an older algorithm if the better (default) algorithm fails. This can add run time, but sometimes converts certain molecules that would otherwise fail. :type second_embed: bool :return: A list of MyMol.MyMol objects, with alternate ring conformations. :rtype: list """ # Make it easier to access the container index. contnr_idx = mol.contnr_idx # All the molecules in this container must have nonatomatic rings (because # they are all variants of the same source molecule). So just make a new # mols list. # Get the ring atom indecies rings = mol.get_idxs_of_nonaro_rng_atms() # Convert that into the bond indecies. rings_by_bond_indexes = [] # A list of lists, where each inner list has # the indexes of the bonds that comprise a # ring. for ring_atom_indecies in rings: bond_indexes = [] for ring_atm_idx in ring_atom_indecies: a = mol.rdkit_mol.GetAtomWithIdx(ring_atm_idx) bonds = a.GetBonds() for bond in bonds: atom_indecies = [bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()] atom_indecies.remove(ring_atm_idx) other_atm_idx = atom_indecies[0] if other_atm_idx in ring_atom_indecies: bond_indexes.append(bond.GetIdx()) bond_indexes = list(set(bond_indexes)) bond_indexes.sort() rings_by_bond_indexes.append(bond_indexes) # Generate a bunch of conformations, ordered from best energy to worst. # Note that this is cached. Minimizing too. mol.add_conformers(thoroughness * max_variants_per_compound, 0.1, True) if len(mol.conformers) > 0: # Sometimes there are no conformers if it's an impossible structure. # Like # [H]c1nc(N2C(=O)[C@@]3(C([H])([H])[H])[C@@]4([H])O[C@@]([H])(C([H])([H])C4([H])[H])[C@]3(C([H])([H])[H])C2=O)sc1[H] # So don't save this one anyway. # Get the scores (lowest energy) of these minimized conformers. mol.load_conformers_into_rdkit_mol() # Extract just the rings. ring_mols = [ Chem.PathToSubmol(mol.rdkit_mol, bi) for bi in rings_by_bond_indexes ] # Align get the rmsds relative to the first conformation, for each # ring separately. list_of_rmslists = [[]] * len(ring_mols) for k in range(len(ring_mols)): list_of_rmslists[k] = [] AllChem.AlignMolConformers(ring_mols[k], RMSlist=list_of_rmslists[k]) # Get points for each conformer (rmsd_ring1, rmsd_ring2, rmsd_ring3) pts = numpy.array(list_of_rmslists).T pts = numpy.vstack((numpy.array([[0.0] * pts.shape[1]]), pts)) # Cluster those points, get lowest-energy member of each. if len(pts) < max_variants_per_compound: num_clusters = len(pts) else: num_clusters = max_variants_per_compound # When kmeans2 runs on insufficient clusters, it can sometimes throw # an error about empty clusters. This is not necessary to throw for # the user and so we have supressed it here. with warnings.catch_warnings(): warnings.simplefilter("ignore") groups = kmeans2(pts, num_clusters, minit='points')[1] # Note that you have some geometrically diverse conformations here, # but there could be other versions (enantiomers, tautomers, etc.) # that also contribute similar conformations. In the end, you'll be # selecting from all these together, so similar ones could end up # together. best_ones = {} # Key is group id from kmeans (int). Values are the # MyMol.MyConformers objects. conformers = mol.rdkit_mol.GetConformers() for k, grp in enumerate(groups): if not grp in list(best_ones.keys()): best_ones[grp] = mol.conformers[k] best_confs = best_ones.values() # best_confs has the # MyMol.MyConformers objects. # Convert rdkit mols to MyMol.MyMol and save those MyMol.MyMol objects # for returning. results = [] for conf in best_confs: new_mol = copy.deepcopy(mol) c = MyConformer(new_mol, conf.conformer(), second_embed) new_mol.conformers = [c] energy = c.energy new_mol.genealogy = mol.genealogy[:] new_mol.genealogy.append( new_mol.smiles(True) + " (nonaromatic ring conformer: " + str(energy) + " kcal/mol)") results.append(new_mol) # i is mol index return results # If you get here, something went wrong. return None
def parallel_minit(mol, max_variants_per_compound, thoroughness, second_embed): """Minimizes the geometries of a MyMol.MyMol object. Meant to be run within parallelizer. :param mol: The molecule to minimize. :type mol: MyMol.MyMol :param max_variants_per_compound: To control the combinatorial explosion, only this number of variants (molecules) will be advanced to the next step. :type max_variants_per_compound: int :param thoroughness: How many molecules to generate per variant (molecule) retained, for evaluation. For example, perhaps you want to advance five molecules (max_variants_per_compound = 5). You could just generate five and advance them all. Or you could generate ten and advance the best five (so thoroughness = 2). Using thoroughness > 1 increases the computational expense, but it also increases the chances of finding good molecules. :type thoroughness: int :param second_embed: Whether to try to generate 3D coordinates using an older algorithm if the better (default) algorithm fails. This can add run time, but sometimes converts certain molecules that would otherwise fail. :type second_embed: bool :return: A molecule with the minimized conformers inside it. :rtype: MyMol.MyMol """ # Not minimizing. Just adding the conformers. mol.add_conformers( thoroughness * max_variants_per_compound, 0.1, False ) if len(mol.conformers) > 0: # Because it is possible to find a molecule that has no # acceptable conformers (i.e., is not possible geometrically). # Consider this: # O=C([C@@]1([C@@H]2O[C@@H]([C@@]1(C3=O)C)CC2)C)N3c4sccn4 # Further minimize the unoptimized conformers that were among the best # scoring. max_vars_per_cmpd = max_variants_per_compound for i in range(len(mol.conformers[:max_vars_per_cmpd])): mol.conformers[i].minimize() # Remove similar conformers # mol.eliminate_structurally_similar_conformers() # Get the best scoring (lowest energy) of these minimized conformers new_mol = copy.deepcopy(mol) c = MyConformer(new_mol, mol.conformers[0].conformer(), second_embed) new_mol.conformers = [c] best_energy = c.energy # Save to the genealogy record. new_mol.genealogy = mol.genealogy[:] new_mol.genealogy.append( new_mol.smiles(True) + " (optimized conformer: " + str(best_energy) + " kcal/mol)" ) # Save best conformation. For some reason molecular properties # attached to mol are lost when returning from multiple # processors. So save the separately so they can be readded to # the molecule in a bit. # JDD: Still any issue? return new_mol