コード例 #1
0
def parallel_get_ring_confs(mol, max_variants_per_compound, thoroughness,
                            second_embed):
    """Gets alternate ring conformations. Meant to run with the parallelizer class.

    :param mol: The molecule to process (with non-aromatic ring(s)).
    :type mol: MyMol.MyMol
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param second_embed: Whether to try to generate 3D coordinates using an
        older algorithm if the better (default) algorithm fails. This can add
        run time, but sometimes converts certain molecules that would
        otherwise fail.
    :type second_embed: bool
    :return: A list of MyMol.MyMol objects, with alternate ring conformations.
    :rtype: list
    """

    # Make it easier to access the container index.
    contnr_idx = mol.contnr_idx

    # All the molecules in this container must have nonatomatic rings (because
    # they are all variants of the same source molecule). So just make a new
    # mols list.

    # Get the ring atom indecies
    rings = mol.get_idxs_of_nonaro_rng_atms()

    # Convert that into the bond indecies.
    rings_by_bond_indexes = []  # A list of lists, where each inner list has
    # the indexes of the bonds that comprise a
    # ring.
    for ring_atom_indecies in rings:
        bond_indexes = []
        for ring_atm_idx in ring_atom_indecies:
            a = mol.rdkit_mol.GetAtomWithIdx(ring_atm_idx)
            bonds = a.GetBonds()
            for bond in bonds:
                atom_indecies = [bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()]
                atom_indecies.remove(ring_atm_idx)
                other_atm_idx = atom_indecies[0]
                if other_atm_idx in ring_atom_indecies:
                    bond_indexes.append(bond.GetIdx())
        bond_indexes = list(set(bond_indexes))
        bond_indexes.sort()

        rings_by_bond_indexes.append(bond_indexes)

    # Generate a bunch of conformations, ordered from best energy to worst.
    # Note that this is cached. Minimizing too.
    mol.add_conformers(thoroughness * max_variants_per_compound, 0.1, True)

    if len(mol.conformers) > 0:
        # Sometimes there are no conformers if it's an impossible structure.
        # Like
        # [H]c1nc(N2C(=O)[C@@]3(C([H])([H])[H])[C@@]4([H])O[C@@]([H])(C([H])([H])C4([H])[H])[C@]3(C([H])([H])[H])C2=O)sc1[H]
        # So don't save this one anyway.

        # Get the scores (lowest energy) of these minimized conformers.
        mol.load_conformers_into_rdkit_mol()

        # Extract just the rings.
        ring_mols = [
            Chem.PathToSubmol(mol.rdkit_mol, bi)
            for bi in rings_by_bond_indexes
        ]

        # Align get the rmsds relative to the first conformation, for each
        # ring separately.
        list_of_rmslists = [[]] * len(ring_mols)
        for k in range(len(ring_mols)):
            list_of_rmslists[k] = []
            AllChem.AlignMolConformers(ring_mols[k],
                                       RMSlist=list_of_rmslists[k])

        # Get points for each conformer (rmsd_ring1, rmsd_ring2, rmsd_ring3)
        pts = numpy.array(list_of_rmslists).T
        pts = numpy.vstack((numpy.array([[0.0] * pts.shape[1]]), pts))

        # Cluster those points, get lowest-energy member of each.
        if len(pts) < max_variants_per_compound:
            num_clusters = len(pts)
        else:
            num_clusters = max_variants_per_compound

        # When kmeans2 runs on insufficient clusters, it can sometimes throw
        # an error about empty clusters. This is not necessary to throw for
        # the user and so we have supressed it here.
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            groups = kmeans2(pts, num_clusters, minit='points')[1]

        # Note that you have some geometrically diverse conformations here,
        # but there could be other versions (enantiomers, tautomers, etc.)
        # that also contribute similar conformations. In the end, you'll be
        # selecting from all these together, so similar ones could end up
        # together.

        best_ones = {}  # Key is group id from kmeans (int). Values are the
        # MyMol.MyConformers objects.
        conformers = mol.rdkit_mol.GetConformers()
        for k, grp in enumerate(groups):
            if not grp in list(best_ones.keys()):
                best_ones[grp] = mol.conformers[k]
        best_confs = best_ones.values()  # best_confs has the
        # MyMol.MyConformers objects.

        # Convert rdkit mols to MyMol.MyMol and save those MyMol.MyMol objects
        # for returning.
        results = []
        for conf in best_confs:
            new_mol = copy.deepcopy(mol)
            c = MyConformer(new_mol, conf.conformer(), second_embed)
            new_mol.conformers = [c]
            energy = c.energy

            new_mol.genealogy = mol.genealogy[:]
            new_mol.genealogy.append(
                new_mol.smiles(True) + " (nonaromatic ring conformer: " +
                str(energy) + " kcal/mol)")

            results.append(new_mol)  # i is mol index

        return results

    # If you get here, something went wrong.
    return None
コード例 #2
0
def parallel_minit(mol, max_variants_per_compound, thoroughness, second_embed):
    """Minimizes the geometries of a MyMol.MyMol object. Meant to be run
    within parallelizer.

    :param mol: The molecule to minimize.
    :type mol: MyMol.MyMol
    :param max_variants_per_compound: To control the combinatorial explosion,
       only this number of variants (molecules) will be advanced to the next
       step.
    :type max_variants_per_compound: int
    :param thoroughness: How many molecules to generate per variant (molecule)
       retained, for evaluation. For example, perhaps you want to advance five
       molecules (max_variants_per_compound = 5). You could just generate five
       and advance them all. Or you could generate ten and advance the best
       five (so thoroughness = 2). Using thoroughness > 1 increases the
       computational expense, but it also increases the chances of finding good
       molecules.
    :type thoroughness: int
    :param second_embed: Whether to try to generate 3D coordinates using an
        older algorithm if the better (default) algorithm fails. This can add
        run time, but sometimes converts certain molecules that would
        otherwise fail.
    :type second_embed: bool
    :return: A molecule with the minimized conformers inside it.
    :rtype: MyMol.MyMol
    """

    # Not minimizing. Just adding the conformers.
    mol.add_conformers(
        thoroughness * max_variants_per_compound,
        0.1, False
    )

    if len(mol.conformers) > 0:
        # Because it is possible to find a molecule that has no
        # acceptable conformers (i.e., is not possible geometrically).
        # Consider this:
        # O=C([C@@]1([C@@H]2O[C@@H]([C@@]1(C3=O)C)CC2)C)N3c4sccn4

        # Further minimize the unoptimized conformers that were among the best
        # scoring.
        max_vars_per_cmpd = max_variants_per_compound
        for i in range(len(mol.conformers[:max_vars_per_cmpd])):
            mol.conformers[i].minimize()

        # Remove similar conformers
        # mol.eliminate_structurally_similar_conformers()

        # Get the best scoring (lowest energy) of these minimized conformers
        new_mol = copy.deepcopy(mol)
        c = MyConformer(new_mol, mol.conformers[0].conformer(), second_embed)
        new_mol.conformers = [c]
        best_energy = c.energy

        # Save to the genealogy record.
        new_mol.genealogy = mol.genealogy[:]
        new_mol.genealogy.append(
            new_mol.smiles(True) + " (optimized conformer: " +
            str(best_energy) + " kcal/mol)"
        )

        # Save best conformation. For some reason molecular properties
        # attached to mol are lost when returning from multiple
        # processors. So save the separately so they can be readded to
        # the molecule in a bit.
        # JDD: Still any issue?

        return new_mol