def complete_labels(mol: Chem.rdchem.Mol, mollabels_dict: Dict, mark_upmatched: bool = True) -> List: """ Complete the gaps in the atom labels dictionary (normally a list), by given names like CX1. :param mol: the molecule to be labelled _in place_. :type mol: Chem.rdchem.Mol :param mollabels_dict: key is index (int) and value is name like for a normal atomlabels (but with gaps) :type mollabels_dict: Dict :param mark_upmatched: Add an X between the symbol and the number :type mark_upmatched: bool :return: atom labels :rtype: List[str] """ mollabels = [] counters = {} for i in range(mol.GetNumAtoms()): if i in mollabels_dict: mollabels.append(mollabels_dict[i]) else: el = mol.GetAtomWithIdx(i).GetSymbol().upper() if el in counters: counters[el] += 1 else: counters[el] = 1 if mark_upmatched: mollabels.append(f'{el}X{counters[el]}') else: mollabels.append(el + str(counters[el])) return mollabels
def display(mol: Chem.rdchem.Mol, show='name'): # show = 'index' | 'name' if show: atoms = mol.GetNumAtoms() mol = copy.deepcopy(mol) for idx in range(atoms): if show == 'index': mol.GetAtomWithIdx(idx).SetProp('molAtomMapNumber', str(idx)) elif show == 'name': raise NotImplementedError( 'I need to figure out what property is needed as molAtomMapNumber is an str(int)' ) mol.GetAtomWithIdx(idx).SetProp( 'molAtomMapNumber', str(mol.GetAtomWithIdx(idx).GetProp('AtomLabel'))) else: raise ValueError display(Draw.MolToImage(mol)) return None
def trim_side_chain(mol: Chem.rdchem.Mol, core, unwanted_side_chains): """Trim list of side chain from a molecule.""" mol = Chem.AddHs(mol) match = mol.GetSubstructMatch(core) map2idx = {} map2nei = {} unwanted2map = {} for patt in unwanted_side_chains: unwanted2map[patt] = [ a.GetAtomMapNum() for a in patt.GetAtoms() if a.GetAtomMapNum() ] unwanted_mapping = list( itertools.chain.from_iterable(unwanted2map.values())) for atom in core.GetAtoms(): num = atom.GetAtomMapNum() if num and num in unwanted_mapping: mol_atom_idx = match[atom.GetIdx()] map2idx[mol_atom_idx] = num nei_atoms = mol.GetAtomWithIdx(mol_atom_idx).GetNeighbors() map2nei[mol_atom_idx] = [ n.GetIdx() for n in nei_atoms if n.GetIdx() in match ] emol = Chem.EditableMol(mol) for atom_idx, atom_map in map2idx.items(): dummy = Chem.rdchem.Atom("*") dummy.SetAtomMapNum(atom_map) nei_idx = map2nei.get(atom_idx, [None])[0] if nei_idx: bond = mol.GetBondBetweenAtoms(atom_idx, nei_idx) emol.RemoveBond(atom_idx, nei_idx) new_ind = emol.AddAtom(dummy) emol.AddBond(nei_idx, new_ind, bond.GetBondType()) mol = emol.GetMol() mol = Chem.RemoveHs(mol) query_param = AdjustQueryParameters() query_param.makeDummiesQueries = False query_param.adjustDegree = False query_param.aromatizeIfPossible = True for patt, _ in unwanted2map.items(): cur_frag = dm.fix_mol(patt) mol = Chem.DeleteSubstructs(mol, cur_frag, onlyFrags=True) return dm.keep_largest_fragment(mol)
def label(mol: Chem.rdchem.Mol, atomlabels: List) -> None: # -> mol inplace. """ Assign the prop ``AtomLabel``... https://www.rdkit.org/docs/RDKit_Book.html :param mol: the molecule to be labelled _in place_. :type mol: Chem.rdchem.Mol :param atomlabels: atom labels :type atomlabels: List[str] :return: None """ assert len(atomlabels) == mol.GetNumAtoms( ), 'the number of atoms in mol has to be the same as atomlabels. Hydrogens? dehydrogenate!' for idx in range(mol.GetNumAtoms()): mol.GetAtomWithIdx(idx).SetProp('AtomLabel', atomlabels[idx]) return None
def createRDKITconf(self, mol: Chem.rdchem.Mol, conversionFactor: float = 0.1): """creates a PyGromosTools CNF type from a rdkit molecule. If a conformation exists the first one will be used. Parameters ---------- mol : Chem.rdchem.Mol Molecule, possibly with a conformation conversionFactor : float the factor used to convert length from rdkit to Gromos (default: angstrom -> nano meter = 0.1) """ inchi = Chem.MolToInchi(mol).split("/") if len(inchi) >= 2: name = inchi[1] else: name = "XXX" self.__setattr__("TITLE", TITLE("\t" + name + " created from RDKit")) # check if conformations exist else create a new one if mol.GetNumConformers() < 1: mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol) AllChem.UFFOptimizeMolecule(mol) conf = mol.GetConformer(0) # fill a list with atomP types from RDKit data atomList = [] for i in range(mol.GetNumAtoms()): x = conversionFactor * conf.GetAtomPosition(i).x y = conversionFactor * conf.GetAtomPosition(i).y z = conversionFactor * conf.GetAtomPosition(i).z atomType = mol.GetAtomWithIdx(i).GetSymbol() atomList.append(blocks.atomP(resID=1, resName=name, atomType=atomType, atomID=i + 1, xp=x, yp=y, zp=z)) # set POSITION attribute self.__setattr__("POSITION", blocks.POSITION(atomList)) # Defaults set for GENBOX - for liquid sim adjust manually self.__setattr__("GENBOX", blocks.GENBOX(pbc=1, length=[4, 4, 4], angles=[90, 90, 90]))
def featurization(r_mol: Chem.rdchem.Mol, p_mol: Chem.rdchem.Mol, ): """ Generates features of the reactant and product for one reaction as input for the network. Args: r_mol: RDKit molecule object for the reactant. p_mol: RDKit molecule object for the product. Returns: data: Torch Geometric Data object, storing the atom and bond features """ # compute properties with rdkit (only works if dataset is clean) r_mol.UpdatePropertyCache() p_mol.UpdatePropertyCache() # fake the number of "atoms" if we are collapsing substructures n_atoms = r_mol.GetNumAtoms() # topological and 3d distance matrices tD_r = Chem.GetDistanceMatrix(r_mol) tD_p = Chem.GetDistanceMatrix(p_mol) D_r = Chem.Get3DDistanceMatrix(r_mol) D_p = Chem.Get3DDistanceMatrix(p_mol) f_atoms = list() # atom (node) features edge_index = list() # list of tuples indicating presence of bonds f_bonds = list() # bond (edge) features for a1 in range(n_atoms): # Node features f_atoms.append(atom_features(r_mol.GetAtomWithIdx(a1))) # Edge features for a2 in range(a1 + 1, n_atoms): # fully connected graph edge_index.extend([(a1, a2), (a2, a1)]) # for now, naively include both reac and prod b1_feats = [D_r[a1][a2], D_p[a1][a2]] b2_feats = [D_r[a2][a1], D_p[a2][a1]] # r_bond = r_mol.GetBondBetweenAtoms(a1, a2) # b1_feats.extend(bond_features(r_bond)) # b2_feats.extend(bond_features(r_bond)) # # p_bond = p_mol.GetBondBetweenAtoms(a1, a2) # b1_feats.extend(bond_features(p_bond)) # b2_feats.extend(bond_features(p_bond)) f_bonds.append(b1_feats) f_bonds.append(b2_feats) data = tg.data.Data() data.x = torch.tensor(f_atoms, dtype=torch.float) data.edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous() data.edge_attr = torch.tensor(f_bonds, dtype=torch.float) return data