예제 #1
0
    def complete_labels(mol: Chem.rdchem.Mol,
                        mollabels_dict: Dict,
                        mark_upmatched: bool = True) -> List:
        """
        Complete the gaps in the atom labels dictionary (normally a list), by given names like CX1.

        :param mol: the molecule to be labelled _in place_.
        :type mol: Chem.rdchem.Mol
        :param mollabels_dict: key is index (int) and value is name like for a normal atomlabels (but with gaps)
        :type mollabels_dict: Dict
        :param mark_upmatched: Add an X between the symbol and the number
        :type mark_upmatched: bool
        :return: atom labels
        :rtype: List[str]
        """
        mollabels = []
        counters = {}
        for i in range(mol.GetNumAtoms()):
            if i in mollabels_dict:
                mollabels.append(mollabels_dict[i])
            else:
                el = mol.GetAtomWithIdx(i).GetSymbol().upper()
                if el in counters:
                    counters[el] += 1
                else:
                    counters[el] = 1
                if mark_upmatched:
                    mollabels.append(f'{el}X{counters[el]}')
                else:
                    mollabels.append(el + str(counters[el]))
        return mollabels
예제 #2
0
 def display(mol: Chem.rdchem.Mol, show='name'):
     # show = 'index' | 'name'
     if show:
         atoms = mol.GetNumAtoms()
         mol = copy.deepcopy(mol)
         for idx in range(atoms):
             if show == 'index':
                 mol.GetAtomWithIdx(idx).SetProp('molAtomMapNumber',
                                                 str(idx))
             elif show == 'name':
                 raise NotImplementedError(
                     'I need to figure out what property is needed as molAtomMapNumber is an str(int)'
                 )
                 mol.GetAtomWithIdx(idx).SetProp(
                     'molAtomMapNumber',
                     str(mol.GetAtomWithIdx(idx).GetProp('AtomLabel')))
             else:
                 raise ValueError
     display(Draw.MolToImage(mol))
     return None
예제 #3
0
def trim_side_chain(mol: Chem.rdchem.Mol, core, unwanted_side_chains):
    """Trim list of side chain from a molecule."""

    mol = Chem.AddHs(mol)

    match = mol.GetSubstructMatch(core)
    map2idx = {}
    map2nei = {}
    unwanted2map = {}
    for patt in unwanted_side_chains:
        unwanted2map[patt] = [
            a.GetAtomMapNum() for a in patt.GetAtoms() if a.GetAtomMapNum()
        ]
    unwanted_mapping = list(
        itertools.chain.from_iterable(unwanted2map.values()))

    for atom in core.GetAtoms():
        num = atom.GetAtomMapNum()
        if num and num in unwanted_mapping:
            mol_atom_idx = match[atom.GetIdx()]
            map2idx[mol_atom_idx] = num
            nei_atoms = mol.GetAtomWithIdx(mol_atom_idx).GetNeighbors()
            map2nei[mol_atom_idx] = [
                n.GetIdx() for n in nei_atoms if n.GetIdx() in match
            ]

    emol = Chem.EditableMol(mol)
    for atom_idx, atom_map in map2idx.items():
        dummy = Chem.rdchem.Atom("*")
        dummy.SetAtomMapNum(atom_map)
        nei_idx = map2nei.get(atom_idx, [None])[0]
        if nei_idx:
            bond = mol.GetBondBetweenAtoms(atom_idx, nei_idx)
            emol.RemoveBond(atom_idx, nei_idx)
            new_ind = emol.AddAtom(dummy)
            emol.AddBond(nei_idx, new_ind, bond.GetBondType())

    mol = emol.GetMol()
    mol = Chem.RemoveHs(mol)
    query_param = AdjustQueryParameters()
    query_param.makeDummiesQueries = False
    query_param.adjustDegree = False
    query_param.aromatizeIfPossible = True
    for patt, _ in unwanted2map.items():
        cur_frag = dm.fix_mol(patt)
        mol = Chem.DeleteSubstructs(mol, cur_frag, onlyFrags=True)

    return dm.keep_largest_fragment(mol)
예제 #4
0
    def label(mol: Chem.rdchem.Mol,
              atomlabels: List) -> None:  # -> mol inplace.
        """
        Assign the prop ``AtomLabel``... https://www.rdkit.org/docs/RDKit_Book.html

        :param mol: the molecule to be labelled _in place_.
        :type mol: Chem.rdchem.Mol
        :param atomlabels: atom labels
        :type atomlabels: List[str]
        :return: None
        """
        assert len(atomlabels) == mol.GetNumAtoms(
        ), 'the number of atoms in mol has to be the same as atomlabels. Hydrogens? dehydrogenate!'
        for idx in range(mol.GetNumAtoms()):
            mol.GetAtomWithIdx(idx).SetProp('AtomLabel', atomlabels[idx])
        return None
예제 #5
0
    def createRDKITconf(self, mol: Chem.rdchem.Mol, conversionFactor: float = 0.1):
        """creates a PyGromosTools CNF type from a rdkit molecule. If a conformation exists the first one will be used.

        Parameters
        ----------
        mol : Chem.rdchem.Mol
            Molecule, possibly with a conformation

        conversionFactor  :  float
            the factor used to convert length from rdkit to Gromos
            (default: angstrom -> nano meter = 0.1)
        """
        inchi = Chem.MolToInchi(mol).split("/")
        if len(inchi) >= 2:
            name = inchi[1]
        else:
            name = "XXX"
        self.__setattr__("TITLE", TITLE("\t" + name + " created from RDKit"))

        # check if conformations exist else create a new one
        if mol.GetNumConformers() < 1:
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol)
            AllChem.UFFOptimizeMolecule(mol)
        conf = mol.GetConformer(0)

        # fill a list with atomP types from RDKit data
        atomList = []
        for i in range(mol.GetNumAtoms()):
            x = conversionFactor * conf.GetAtomPosition(i).x
            y = conversionFactor * conf.GetAtomPosition(i).y
            z = conversionFactor * conf.GetAtomPosition(i).z
            atomType = mol.GetAtomWithIdx(i).GetSymbol()
            atomList.append(blocks.atomP(resID=1, resName=name, atomType=atomType, atomID=i + 1, xp=x, yp=y, zp=z))

        # set POSITION attribute
        self.__setattr__("POSITION", blocks.POSITION(atomList))
        # Defaults set for GENBOX - for liquid sim adjust manually
        self.__setattr__("GENBOX", blocks.GENBOX(pbc=1, length=[4, 4, 4], angles=[90, 90, 90]))
예제 #6
0
def featurization(r_mol: Chem.rdchem.Mol,
                  p_mol: Chem.rdchem.Mol,
                  ):
    """
    Generates features of the reactant and product for one reaction as input for the network.

    Args:
        r_mol: RDKit molecule object for the reactant.
        p_mol: RDKit molecule object for the product.

    Returns:
        data: Torch Geometric Data object, storing the atom and bond features
    """

    # compute properties with rdkit (only works if dataset is clean)
    r_mol.UpdatePropertyCache()
    p_mol.UpdatePropertyCache()

    # fake the number of "atoms" if we are collapsing substructures
    n_atoms = r_mol.GetNumAtoms()

    # topological and 3d distance matrices
    tD_r = Chem.GetDistanceMatrix(r_mol)
    tD_p = Chem.GetDistanceMatrix(p_mol)
    D_r = Chem.Get3DDistanceMatrix(r_mol)
    D_p = Chem.Get3DDistanceMatrix(p_mol)

    f_atoms = list()        # atom (node) features
    edge_index = list()     # list of tuples indicating presence of bonds
    f_bonds = list()        # bond (edge) features

    for a1 in range(n_atoms):

        # Node features
        f_atoms.append(atom_features(r_mol.GetAtomWithIdx(a1)))

        # Edge features
        for a2 in range(a1 + 1, n_atoms):
            # fully connected graph
            edge_index.extend([(a1, a2), (a2, a1)])

            # for now, naively include both reac and prod
            b1_feats = [D_r[a1][a2], D_p[a1][a2]]
            b2_feats = [D_r[a2][a1], D_p[a2][a1]]

            # r_bond = r_mol.GetBondBetweenAtoms(a1, a2)
            # b1_feats.extend(bond_features(r_bond))
            # b2_feats.extend(bond_features(r_bond))
            #
            # p_bond = p_mol.GetBondBetweenAtoms(a1, a2)
            # b1_feats.extend(bond_features(p_bond))
            # b2_feats.extend(bond_features(p_bond))

            f_bonds.append(b1_feats)
            f_bonds.append(b2_feats)

    data = tg.data.Data()
    data.x = torch.tensor(f_atoms, dtype=torch.float)
    data.edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    data.edge_attr = torch.tensor(f_bonds, dtype=torch.float)

    return data