def copy_origins(cls, annotated: Chem.Mol, target: Chem.Mol): """ Fragmenstein leaves a note of what it did. atom prop _Origin is a json of a list of mol _Name dot AtomIdx. However, the atom order seems to be maintained but I dont trust it. Also dummy atoms are stripped. :param annotated: :param target: :return: a list of origins """ mcs = rdFMCS.FindMCS([target, annotated], atomCompare=rdFMCS.AtomCompare.CompareElements, bondCompare=rdFMCS.BondCompare.CompareAny, ringMatchesRingOnly=True) common = Chem.MolFromSmarts(mcs.smartsString) dmapping = dict( zip(target.GetSubstructMatch(common), annotated.GetSubstructMatch(common))) origins = [] for i in range(target.GetNumAtoms()): if i in dmapping: atom = annotated.GetAtomWithIdx(dmapping[i]) tatom = target.GetAtomWithIdx(i) o = cls._get_origin(atom) tatom.SetProp('_Origin', json.dumps(o)) return origins
def copy_all_possible_origins(cls, annotated: Chem.Mol, target: Chem.Mol) -> Tuple[List[Chem.Mol], List[List[int]]]: """ Monster leaves a note of what it did. atom prop _Origin is a json of a list of mol _Name dot AtomIdx. However, the atom order seems to be maintained but I dont trust it. Also dummy atoms are stripped. :param annotated: :param target: :return: a list of mols and a list of orgins (a list too) """ mcs = rdFMCS.FindMCS([target, annotated], atomCompare=rdFMCS.AtomCompare.CompareElements, bondCompare=rdFMCS.BondCompare.CompareAny, ringMatchesRingOnly=True) common = Chem.MolFromSmarts(mcs.smartsString) options = [] originss = [] for target_match in target.GetSubstructMatches(common): for anno_match in annotated.GetSubstructMatches(common): dmapping = dict(zip(target_match, anno_match)) origins = [] option = Chem.Mol(target) for i in range(option.GetNumAtoms()): if i in dmapping: atom = annotated.GetAtomWithIdx(dmapping[i]) tatom = option.GetAtomWithIdx(i) o = cls._get_origin(atom) tatom.SetProp('_Origin', json.dumps(o)) xyz = cls._get_xyz(atom) if xyz: cls._set_xyz(tatom, xyz) options.append(option) originss.append(origins) return options, originss
def _GetBurdenMatrix(mol: Chem.Mol, propertylabel: str = 'm') -> numpy.matrix: """Calculate weighted Burden matrix and eigenvalues.""" mol = Chem.AddHs(mol) Natom = mol.GetNumAtoms() AdMatrix = Chem.GetAdjacencyMatrix(mol) bondindex = numpy.argwhere(AdMatrix) AdMatrix1 = numpy.array(AdMatrix, dtype=numpy.float32) # The diagonal elements of B, Bii, are either given by # the carbon normalized atomic mass, # van der Waals volume, Sanderson electronegativity, # and polarizability of atom i. for i in range(Natom): atom = mol.GetAtomWithIdx(i) temp = GetRelativeAtomicProperty(element=atom.GetSymbol(), propertyname=propertylabel) AdMatrix1[i, i] = round(temp, 3) # The element of B connecting atoms i and j, Bij, # is equal to the square root of the bond # order between atoms i and j. for i in bondindex: bond = mol.GetBondBetweenAtoms(int(i[0]), int(i[1])) if bond.GetBondType().name == 'SINGLE': AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1), 3) if bond.GetBondType().name == "DOUBLE": AdMatrix1[i[0], i[1]] = round(numpy.sqrt(2), 3) if bond.GetBondType().name == "TRIPLE": AdMatrix1[i[0], i[1]] = round(numpy.sqrt(3), 3) if bond.GetBondType().name == "AROMATIC": AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1.5), 3) # All other elements of B (corresponding non bonded # atom pairs) are set to 0.001 bondnonindex = numpy.argwhere(AdMatrix == 0) for i in bondnonindex: if i[0] != i[1]: AdMatrix1[i[0], i[1]] = 0.001 return numpy.real(numpy.linalg.eigvals(AdMatrix1))
def __extract_subgraph(mol: Chem.Mol, selected_atoms: Set[int]) -> Tuple[Chem.Mol, List[int]]: selected_atoms = set(selected_atoms) roots = [] for idx in selected_atoms: atom = mol.GetAtomWithIdx(idx) bad_neis = [y for y in atom.GetNeighbors() if y.GetIdx() not in selected_atoms] if len(bad_neis) > 0: roots.append(idx) new_mol = Chem.RWMol(mol) for atom_idx in roots: atom = new_mol.GetAtomWithIdx(atom_idx) atom.SetAtomMapNum(1) aroma_bonds = [bond for bond in atom.GetBonds() if bond.GetBondType() == Chem.rdchem.BondType.AROMATIC] aroma_bonds = [bond for bond in aroma_bonds if bond.GetBeginAtom().GetIdx() in selected_atoms and bond.GetEndAtom().GetIdx() in selected_atoms] if len(aroma_bonds) == 0: atom.SetIsAromatic(False) remove_atoms = [atom.GetIdx() for atom in new_mol.GetAtoms() if atom.GetIdx() not in selected_atoms] remove_atoms = sorted(remove_atoms, reverse=True) for atom in remove_atoms: new_mol.RemoveAtom(atom) return new_mol.GetMol(), roots
def from_annotated_mols(cls, annotated_followup: Chem.Mol, hits: Sequence[Chem.Mol] ) -> mRSMD: """ Fragmenstein leaves a note of what it did. atom prop _Origin is a json of a list of mol _Name dot AtomIdx. This classmethod accepts a followup with has this. :param annotated_followup: :param hits: :return: """ mappings = [] for h, hit in enumerate(hits): hname = hit.GetProp('_Name') mapping = [] if hname == '': print(f'{hit} has no name!') else: for i in range(annotated_followup.GetNumAtoms()): atom = annotated_followup.GetAtomWithIdx(i) for oel in cls._get_origin(atom): rex = re.match(hname+'\.(\d+)', oel) if rex is not None: h = int(rex.group(1)) mapping.append((i, h)) mappings.append(mapping) return cls(annotated_followup, hits, mappings)
def _categorise(self, mol: Chem.Mol, uniques: set) -> Dict[str, Union[set, Dict]]: """ What do the novel atoms do in terms of connectivity. Complicated dict output (called ``categories`` in the methods). Really ought to be SetProp of the atoms. * ``uniques`` are set of atoms to classify on * ``internals`` are unique atoms that are connected solely to unique atoms * ``attachments`` are non-unique atoms to which a unique atom connects * ``pairs`` is a dict of unique atom idx --> dict of ``idx`` --> attachment idx and ``type`` bond type. :param mol: molecule to describe :param uniques: set of indices that are new to this molecule :return: """ # pairs = {} internals = set() attachments = set() dummies = set() for i in uniques: # novel atoms unique_atom = mol.GetAtomWithIdx(i) if unique_atom.GetSymbol() == self.dummy_symbol: dummies.add(i) neighbours = {n.GetIdx() for n in unique_atom.GetNeighbors()} if len(neighbours - uniques ) == 0: # unlessone of the connections is not unique. internals.add(i) else: i_attached = neighbours - uniques attachments |= i_attached pairs[i] = [{ 'idx': j, 'type': mol.GetBondBetweenAtoms(i, j).GetBondType() } for j in i_attached] anchors = uniques - internals # store for safekeeping for atom in mol.GetAtoms(): i = atom.GetIdx() if i in internals: # novel and not connected atom.SetProp('_Category', 'internal') elif i in attachments: # not-novel but connected atom.SetProp('_Category', 'overlapping-attachment') elif i in pairs: # dict not set tho atom.SetProp('_Category', 'internal-attachment') else: # overlapping atom.SetProp('_Category', 'overlapping') # if self._debug_draw: # depracated... but this could be useful... # high = list(internals) + list(attachments) + list(anchors) # color = {**{i: (0, 0.8, 0) for i in internals}, # **{i: (0, 0, 0.8) for i in attachments}, # **{i: (0.8, 0, 0.8) for i in anchors}} # print('Purple: anchor atoms, Blue: attachments, Green: internals') # self.draw_nicely(mol, highlightAtoms=high, highlightAtomColors=color) # print({atom.GetIdx(): atom.GetProp('_Category') for atom in mol.GetAtoms()}) return dict(uniques=uniques, internals=internals, attachments=attachments, pairs=pairs, dummies=dummies)
def _CalculateMoranAutocorrelation(mol: Chem.Mol, lag: int = 1, propertylabel: str = 'm') -> float: """Calculate weighted Moran autocorrelation descriptors. :param lag: topological distance between atom i and atom j. :param propertylabel: type of weighted property """ Natom = mol.GetNumAtoms() prolist = [] for i in mol.GetAtoms(): temp = GetRelativeAtomicProperty(i.GetSymbol(), propertyname=propertylabel) prolist.append(temp) aveweight = sum(prolist) / Natom tempp = [numpy.square(x - aveweight) for x in prolist] GetDistanceMatrix = Chem.GetDistanceMatrix(mol) res = 0.0 index = 0 for i in range(Natom): for j in range(Natom): if GetDistanceMatrix[i, j] == lag: atom1 = mol.GetAtomWithIdx(i) atom2 = mol.GetAtomWithIdx(j) temp1 = GetRelativeAtomicProperty(element=atom1.GetSymbol(), propertyname=propertylabel) temp2 = GetRelativeAtomicProperty(element=atom2.GetSymbol(), propertyname=propertylabel) res = res + (temp1 - aveweight) * (temp2 - aveweight) index += 1 else: res = res + 0.0 if sum(tempp) == 0 or index == 0: result = 0 else: result = (res / index) / (sum(tempp) / Natom) return round(result, 3)
def find_symmetry_classes(rdkit_mol: Chem.Mol) -> Dict[int, str]: """ Generate list of tuples of symmetry-equivalent (homotopic) atoms in the molecular graph based on: https://sourceforge.net/p/rdkit/mailman/message/27897393/ Our thanks to Dr Michal Krompiec for the symmetrisation method and its implementation. :param rdkit_mol: molecule to find symmetry classes for (rdkit mol class object) :return: A dict where the keys are the atom indices and the values are their type (type is arbitrarily based on index; only consistency is needed, no specific values) """ # Check CIPRank is present for first atom (can assume it is present for all afterwards) if not rdkit_mol.GetAtomWithIdx(0).HasProp("_CIPRank"): Chem.AssignStereochemistry(rdkit_mol, cleanIt=True, force=True, flagPossibleStereoCenters=True) # Array of ranks showing matching atoms cip_ranks = np.array( [int(atom.GetProp("_CIPRank")) for atom in rdkit_mol.GetAtoms()]) # Map the ranks to the atoms to produce a list of symmetrical atoms atom_symmetry_classes = [ np.where(cip_ranks == rank)[0].tolist() for rank in range(max(cip_ranks) + 1) ] # Convert from list of classes to dict where each key is an atom and each value is its class (just a str) atom_symmetry_classes_dict = {} # i will be used to define the class (just index based) for i, sym_class in enumerate(atom_symmetry_classes): for atom in sym_class: atom_symmetry_classes_dict[atom] = str(i) return atom_symmetry_classes_dict
def subset_rdmol(rdmol: Chem.Mol, atom_indices: Iterable[int], check_bonds: bool = True, return_atom_indices: bool = False) -> Chem.Mol: rdmol = Chem.RWMol(rdmol) to_remove = [i for i in range(rdmol.GetNumAtoms()) if i not in atom_indices] if check_bonds: multiple_bonds = [] # check bonds for i in to_remove: atom = rdmol.GetAtomWithIdx(i) n_bonds = 0 for bond in atom.GetBonds(): other = bond.GetOtherAtomIdx(i) if other in atom_indices: n_bonds += 1 if n_bonds > 1: multiple_bonds.append(i) atom_indices = sorted(atom_indices + multiple_bonds) to_remove = [i for i in to_remove if i not in multiple_bonds] for i in to_remove[::-1]: rdmol.RemoveAtom(i) rdmol.UpdatePropertyCache() if return_atom_indices: return rdmol, atom_indices return rdmol
def __extract_subgraph(mol: Chem.Mol, selected_atoms: Set[int]) -> Tuple[Chem.Mol, List[int]]: """ Extracts a subgraph from an RDKit molecule given a set of atom indices. :param mol: An RDKit molecule from which to extract a subgraph. :param selected_atoms: The atoms which form the subgraph to be extracted. :return: A tuple containing an RDKit molecule representing the subgraph and a list of root atom indices from the selected indices. """ selected_atoms = set(selected_atoms) roots = [] for idx in selected_atoms: atom = mol.GetAtomWithIdx(idx) bad_neis = [y for y in atom.GetNeighbors() if y.GetIdx() not in selected_atoms] if len(bad_neis) > 0: roots.append(idx) new_mol = Chem.RWMol(mol) for atom_idx in roots: atom = new_mol.GetAtomWithIdx(atom_idx) atom.SetAtomMapNum(1) aroma_bonds = [bond for bond in atom.GetBonds() if bond.GetBondType() == Chem.rdchem.BondType.AROMATIC] aroma_bonds = [bond for bond in aroma_bonds if bond.GetBeginAtom().GetIdx() in selected_atoms and bond.GetEndAtom().GetIdx() in selected_atoms] if len(aroma_bonds) == 0: atom.SetIsAromatic(False) remove_atoms = [atom.GetIdx() for atom in new_mol.GetAtoms() if atom.GetIdx() not in selected_atoms] remove_atoms = sorted(remove_atoms, reverse=True) for atom in remove_atoms: new_mol.RemoveAtom(atom) return new_mol.GetMol(), roots
def _CalculateEState(mol: Chem.Mol, skipH: bool = True) -> float: """Get the EState value of each atom in the molecule.""" mol = Chem.AddHs(mol) if skipH: mol = Chem.RemoveHs(mol) tb1 = Chem.GetPeriodicTable() nAtoms = mol.GetNumAtoms() Is = numpy.zeros(nAtoms, numpy.float) for i in range(nAtoms): at = mol.GetAtomWithIdx(i) atNum = at.GetAtomicNum() d = at.GetDegree() if d > 0: h = at.GetTotalNumHs() dv = tb1.GetNOuterElecs(atNum) - h # dv=numpy.array(_AtomHKDeltas(at),'d') N = _GetPrincipleQuantumNumber(atNum) Is[i] = (4.0 / (N * N) * dv + 1) / d dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0) dists += 1 accum = numpy.zeros(nAtoms, numpy.float) for i in range(nAtoms): for j in range(i + 1, nAtoms): p = dists[i, j] if p < 1e6: temp = (Is[i] - Is[j]) / (p * p) accum[i] += temp accum[j] -= temp res = accum + Is return res
def rename_from_template(self, template: Chem.Mol, overwrite: bool = True): """ Assigns to the atoms in self.mol the names based on the template, which does not need to be a perfect match. See ``_fix_atom_names`` for example usage. Does not change the Params. :param template: mol object with atom names :return: None for now. """ AllChem.SanitizeMol(template) #this is where half my issues come from. mcs = rdFMCS.FindMCS([self.mol, template], atomCompare=rdFMCS.AtomCompare.CompareElements, bondCompare=rdFMCS.BondCompare.CompareAny, ringMatchesRingOnly=True) common = Chem.MolFromSmarts(mcs.smartsString) for acceptor, donor in zip(self.mol.GetSubstructMatch(common), template.GetSubstructMatch(common)): a_atom = self.mol.GetAtomWithIdx(acceptor) d_atom = template.GetAtomWithIdx(donor) info = d_atom.GetPDBResidueInfo() if info: self.rename_atom(a_atom, info.GetName(), overwrite=overwrite) else: self.log.debug.info( f'No info in template for atom {d_atom.GetSymbol()} #{donor}' )
def guess_origins(self, mol: Chem.Mol = None, hits: Optional[List[Chem.Mol]] = None): """ Given a positioned mol guess its origins... :param mol: :return: """ if hits is None: hits = self.hits mappings = [] for h, hit in enumerate(hits): hname = hit.GetProp('_Name') for hi, mi in self.get_positional_mapping(hit, mol).items(): atom = mol.GetAtomWithIdx(mi) if atom.HasProp('_Novel') and atom.GetBoolProp( '_Novel') == True: continue # flagged to avoid. elif atom.HasProp( '_Origin') and atom.GetProp('_Origin') != 'none': origin = json.loads(atom.GetProp('_Origin')) else: origin = [] origin.append(f'{hname}.{hi}') atom.SetProp('_Origin', json.dumps(origin))
def _pre_fragment_pairs(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, A2B_mapping: Optional = None) \ -> Dict[int, List[Dict]]: """ Returns {4: [{'idx': 5, 'type': rdkit.Chem.rdchem.BondType.SINGLE, 'idx_F': 5, 'idx_S': 1}], ...} which is slight more than {5: [{'idx': 4, 'type': rdkit.Chem.rdchem.BondType.SINGLE}], ... from categories idx_F: fragmentanda index idx_S: scaffold index required for self.merge, the key is the index of anchoring atom. Calls get_positional_mapping and _categorise. :param scaffold: mol to be added to. :param fragmentanda: mol to be fragmented :param A2B_mapping: see ``get_positional_mapping`` :return: """ # get A2B mapping if A2B_mapping is None: A2B_mapping = self.get_positional_mapping(scaffold, fragmentanda) get_key = lambda d, v: list(d.keys())[list(d.values()).index(v)] if len(A2B_mapping) == 0: raise ConnectionError('No overlap!') # store alternative atom symbols. for si, fi in A2B_mapping.items(): sa = scaffold.GetAtomWithIdx(si) sn = sa.GetSymbol() fn = fragmentanda.GetAtomWithIdx(fi).GetSymbol() if sn != fn: sa.SetProp('_AltSymbol', fn) # prepare. uniques = set(range(fragmentanda.GetNumAtoms())) - set( A2B_mapping.values()) categories = self._categorise(fragmentanda, uniques) pairs = categories['pairs'] for p in pairs: # pairs:Dict[List[Dict]] for pp in pairs[p]: pp['idx_F'] = pp['idx'] # less ambiguous: fragmentanda index pp['idx_S'] = get_key(A2B_mapping, pp['idx']) # scaffold index return pairs
def find_closest_to_ligand(cls, pdb: Chem.Mol, ligand_resn: str) -> Tuple[Chem.Atom, Chem.Atom]: """ Find the closest atom to the ligand :param pdb: a rdkit Chem object :param ligand_resn: 3 letter code :return: tuple of non-ligand atom and ligand atom """ ligand = [atom.GetIdx() for atom in pdb.GetAtoms() if atom.GetPDBResidueInfo().GetResidueName() == ligand_resn] dm = Chem.Get3DDistanceMatrix(pdb) mini = np.take(dm, ligand, 0) mini[mini == 0] = np.nan mini[:, ligand] = np.nan a, b = np.where(mini == np.nanmin(mini)) lig_atom = pdb.GetAtomWithIdx(ligand[int(a[0])]) nonlig_atom = pdb.GetAtomWithIdx(int(b[0])) return (nonlig_atom, lig_atom)
def _get_substruct(mol: Chem.Mol, atoms: List[int]) -> str: """Convert a list of atom indices to a substructure.""" if mol.GetNumAtoms() == 1: smiles = _mol_to_smi(mol) else: # For single-atom cliques, we want the substructure to contain its neighbors if len(atoms) == 1: atoms = atoms[:] atoms.extend([nei.GetIdx() for nei in mol.GetAtomWithIdx(atoms[0]).GetNeighbors()]) smiles = Chem.MolFragmentToSmiles(mol, atoms, kekuleSmiles=True) return _mol_to_smi(_copy_mol(Chem.MolFromSmiles(smiles, sanitize=False)))
def _CalculateAtomEState(mol: Chem.Mol, AtomicNum=6) -> float: """Calculate the sum of the EState indices over all atoms with specified atomic number.""" nAtoms = mol.GetNumAtoms() Is = numpy.zeros(nAtoms, numpy.float) Estate = _CalculateEState(mol) for i in range(nAtoms): at = mol.GetAtomWithIdx(i) atNum = at.GetAtomicNum() if atNum == AtomicNum: Is[i] = Estate[i] res = sum(Is) return res
def _categorise(self, mol: Chem.Mol, uniques: set) -> Dict[str, Union[set, Dict]]: """ What do the novel atoms do in terms of connectivity. Complicated dict output (called ``categories`` in the methods). Really ought to be SetProp of the atoms. * ``uniques`` are set of atoms to classify on * ``internals`` are unique atoms that are connected solely to unique atoms * ``attachments`` are non-unique atoms to which a unique atom connects * ``pairs`` is a dict of unique atom idx --> dict of ``idx`` --> attachment idx and ``type`` bond type. :param mol: molecule to describe :param uniques: set of indices that are new to this molecule :return: """ # pairs = {} internals = set() attachments = set() dummies = set() for i in uniques: unique_atom = mol.GetAtomWithIdx(i) if unique_atom.GetSymbol() == self.dummy_symbol: dummies.add(i) neighbours = {n.GetIdx() for n in unique_atom.GetNeighbors()} if len(neighbours - uniques) == 0: internals.add(i) else: i_attached = neighbours - uniques attachments |= i_attached pairs[i] = [{ 'idx': j, 'type': mol.GetBondBetweenAtoms(i, j).GetBondType() } for j in i_attached] anchors = uniques - internals if self._debug_draw: high = list(internals) + list(attachments) + list(anchors) color = { **{i: (0, 0.8, 0) for i in internals}, **{i: (0, 0, 0.8) for i in attachments}, **{i: (0.8, 0, 0.8) for i in anchors} } self.draw_nicely(mol, highlightAtoms=high, highlightAtomColors=color) return dict(uniques=uniques, internals=internals, attachments=attachments, pairs=pairs, dummies=dummies)
def process_mol(self, test_mol: Chem.Mol) -> list: """ Decompose molecule in sidechains @param test_mol: input molecule @return: list of R-groups as SMILES """ # The subgraph match of the scaffold onto the molecule match_list = test_mol.GetSubstructMatches(self.rg_mol, False) if len(match_list) == 0: return [] # Loop over matches to take care of all symmetry mappings rgroup_smiles_lst = [] for match_idx, lst in enumerate(match_list): [atm.SetAtomMapNum(0) for atm in test_mol.GetAtoms()] match_set = set(lst) # map atom map numbers from the scaffold to the molecule for test_idx, query_idx in zip(lst, self.rg_map_lst): match_atm = test_mol.GetAtomWithIdx(test_idx) match_atm.SetAtomMapNum(query_idx) # Push the atom map numbers to the non-scaffold neighbors for nbr in match_atm.GetNeighbors(): if nbr.GetAtomMapNum() == 0 and (int(nbr.GetIdx()) not in match_set): nbr.SetAtomMapNum(query_idx) # Delete the scaffold, should only leave labeled R-groups rgroup_mol = Chem.DeleteSubstructs(test_mol, self.rg_mol) for atm in rgroup_mol.GetAtoms(): # Get rid of implicit hydrogens on the terminal atoms of the substituents if atm.GetAtomMapNum() > 0: atm.SetNoImplicit(True) # Initialize a list of hydrogen substituents [[H:1],[H:2],...] rgroup_smiles_lst.append( ["[H][*:%d]" % x for x in self.rg_idx_lst]) # Loop over substituents and place them in the appropriate place in the list for frag in Chem.GetMolFrags(rgroup_mol, asMols=True, sanitizeFrags=False): frag_idx = get_fragment_idx(frag) # This enables us to skip over stray fragments that may not have R-group labels if frag_idx > 0: new_frag = grow_rgroup_atoms(frag) rgroup_smiles_lst[match_idx][frag_idx - 1] = Chem.MolToSmiles( new_frag, True) # Here's where we handle symmetry mapping. There may be multiple ways to map the scaffold onto # the molecule. We want to pick the mapping that results in the largest number of non-hydrogen # R-groups. Calculate the number of hydrogens used as rgroups. Sort to put the mapping with # the largest number of non-hydrogen R-groups first. augmented_list = [(count_hydrogens(x), x) for x in rgroup_smiles_lst] augmented_list.sort(key=itemgetter(0)) return augmented_list[0][1]
def _recruit_team(self, mol: Chem.Mol, starting: set, categories: dict, team: Optional[set] = None) -> set: if team is None: team = set() team.add(starting) for atom in mol.GetAtomWithIdx(starting).GetNeighbors(): i = atom.GetIdx() if i in categories['internals'] and i not in team: team = self._recruit_team(mol, i, categories, team) return team
def _recruit_team(self, mol: Chem.Mol, starting: int, uniques: set, team: Optional[set] = None) -> set: if team is None: team = set() team.add(starting) for atom in mol.GetAtomWithIdx(starting).GetNeighbors(): i = atom.GetIdx() if i in uniques and i not in team: team = self._recruit_team(mol, i, uniques, team) return team
def mol_with_atom_index(mol: Chem.Mol) -> Chem.Mol: """ Returns a copy of the molecule that when displayed shows atom indices. :param mol: target molecule :return: labelled molecule """ cp = Chem.Mol(mol) atoms = cp.GetNumAtoms() for idx in range(atoms): cp.GetAtomWithIdx(idx).SetProp( 'molAtomMapNumber', str(mol.GetAtomWithIdx(idx).GetIdx())) return cp
def CalculateChiv4pc(mol: Chem.Mol) -> float: """Calculate valence molecular connectivity chi index for path/cluster.""" accum = 0.0 deltas = [x.GetDegree() for x in mol.GetAtoms()] patt = Chem.MolFromSmarts('*~*(~*)~*~*') HPatt = mol.GetSubstructMatches(patt) for cluster in HPatt: deltas = [_AtomHallKierDeltas(mol.GetAtomWithIdx(x)) for x in cluster] while 0 in deltas: deltas.remove(0) if deltas != []: deltas1 = numpy.array(deltas, numpy.float) accum = accum + 1. / numpy.sqrt(deltas1.prod()) return accum
def mol2xyz_by_confid(molecule: Mol, prefix='rdmol', confid=0, comment_line=''): natoms = molecule.GetNumAtoms() filename = "{}_{}.xyz".format(prefix, confid) s = "{}\n{}\n".format(natoms, comment_line) for i in range(natoms): position = molecule.GetConformer(confid).GetAtomPosition(i) symbol = molecule.GetAtomWithIdx(i).GetSymbol() s += "{}\t{:.6} {:.6} {:.6}\n".format(symbol, position.x, position.y, position.z) with open(filename, 'w') as f: f.write(s)
def get_combined_rmsd(cls, followup_moved: Chem.Mol, followup_placed: Optional[Chem.Mol] = None, hits: Optional[List[Chem.Mol]] = None) -> float: """ Depracated. The inbuilt RMSD calculations in RDKit align the two molecules, this does not align them. This deals with the case of multiple hits. For euclidean distance the square root of the sum of the differences in each coordinates is taken. For a regular RMSD the still-squared distance is averaged before taking the root. Here the average is done across all the atom pairs between each hit and the followup. Therefore, atoms in followup that derive in the blended molecule by multiple atom are scored multiple times. As a classmethod ``followup_placed`` and ``hits`` must be provided. But as an instance method they don't. :param followup_moved: followup compound moved by Igor or similar :param followup_placed: followup compound as placed by Monster :param hits: list of hits. :return: combined RMSD """ # class or instance? if followup_placed is None: # instance assert hasattr( cls, '__class__' ), 'if called as a classmethod the list of hits need to be provided.' followup_placed = cls.positioned_mol if hits is None: # instance assert hasattr( cls, '__class__' ), 'if called as a classmethod the list of hits need to be provided.' hits = cls.hits for i in range(followup_placed.GetNumAtoms()): assert followup_placed.GetAtomWithIdx( i).GetSymbol() == followup_moved.GetAtomWithIdx( i).GetSymbol(), 'The atoms order is changed.' if followup_moved.GetNumAtoms() > followup_placed.GetNumAtoms(): warn( f'Followup moved {followup_moved.GetNumAtoms()} has more atoms that followup placed {followup_placed.GetNumAtoms()}. Assuming these are hydrogens.' ) # calculate tatoms = 0 d = 0 for hit in hits: mapping = list( cls.get_positional_mapping(followup_placed, hit).items()) tatoms += len(mapping) if len(mapping) == 0: continue d += cls._get_square_deviation(followup_moved, hit, mapping) return d / tatoms**0.5
def copy_names(cls, acceptor_mol: Chem.Mol, donor_mol: Chem.Mol): """ Copy names form donor to acceptor by finding MCS. Does it properly and uses ``PDBResidueInfo``. :param acceptor_mol: needs atomnames :param donor_mol: has atomnames :return: """ mcs = rdFMCS.FindMCS([acceptor_mol, donor_mol], atomCompare=rdFMCS.AtomCompare.CompareElements, bondCompare=rdFMCS.BondCompare.CompareOrder, ringMatchesRingOnly=True) common = Chem.MolFromSmarts(mcs.smartsString) pos_match = acceptor_mol.positioned_mol.GetSubstructMatch(common) pdb_match = donor_mol.GetSubstructMatch(common) for m, p in zip(pos_match, pdb_match): ma = acceptor_mol.GetAtomWithIdx(m) pa = donor_mol.GetAtomWithIdx(p) assert ma.GetSymbol() == pa.GetSymbol(), 'The indices do not align! ' + \ f'{ma.GetIdx()}:{ma.GetSymbol()} vs. ' + \ f'{pa.GetIdx()}:{pa.GetSymbol()}' ma.SetMonomerInfo(pa.GetPDBResidueInfo())
def conformer_to_xyz(molecule: Mol, conf_id=0, comment=None) -> str: num_atoms = molecule.GetNumAtoms() string = f'{num_atoms}\n' if comment: string += comment conformer = molecule.GetConformer(conf_id) for atom_idx in range(molecule.GetNumAtoms()): atom = molecule.GetAtomWithIdx(atom_idx) position = conformer.GetAtomPosition(atom_idx) string += f'\n{atom.GetSymbol()} {position.x} {position.y} {position.z}' return string
def assign_features(self, mol: Mol): for atom in mol.GetAtoms(): atom.SetProp('_Feature_Acceptor', '0') atom.SetProp('_Feature_Donor', '0') features = self.feature_factory.GetFeaturesForMol(mol) for feature in features: family = feature.GetFamily() for atom_idx in feature.GetAtomIds(): atom = mol.GetAtomWithIdx(atom_idx) if family == 'Acceptor': atom.SetProp('_Feature_Acceptor', '1') elif family == 'Donor': atom.SetProp('_Feature_Donor', '1')
def convert_to_graph(mol: Chem.Mol, scaffold_ids: t.Tuple[int], anchors: t.Dict[int, int], hba_ids: t.Tuple[int], hbd_ids: t.Tuple[int]) -> nx.Graph: """ Convert `Chem.Mol` object to `nx.Graph` object Args: mol (Chem.Mol): The molecule object to be converted scaffold_ids (t.Tuple[int]): The atom that corresponds to scaffolds anchors (t.Dict[int, int]): The mapping from atom in the molecule to atom in scaffold where it is attached to hba_ids (t.Tuple[int]): The atoms corresponding to hydrogen acceptors hbd_ids (t.Tuple[int]): The atoms corresponding to hydrogen donnors Returns: nx.Graph: The graph converted """ # Initialize graph graph = nx.Graph() # Add nodes nodes = range(mol.GetNumAtoms()) graph.add_nodes_from(nodes) # Add edges bond: Chem.Bond edges = [(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) for bond in mol.GetBonds()] graph.add_edges_from(edges) # Attach properties to nodes for node_id in nodes: atom_i: Chem.Atom = mol.GetAtomWithIdx(node_id) graph.nodes[node_id]['symbol'] = atom_i.GetSymbol() for node_id in anchors: graph.nodes[node_id]['anchor'] = anchors[node_id] for node_id in hba_ids: graph.nodes[node_id]['is_hba'] = True for node_id in hbd_ids: graph.nodes[node_id]['is_hbd'] = True for node_id in scaffold_ids: graph.nodes[node_id]['is_scaffold'] = True return graph
def get_scaffold_anchors(mol: Chem.Mol, scaffold_ids: t.Tuple[int]) -> t.Dict[int, int]: """ Get the indices of atom directly connected with the scaffold. The dictionary returned maps the indices of directly connected atoms in the molecules to the index of the anchor atom in the scaffold """ anchors = {} for scaffold_id, scaffold_id_in_mol in enumerate(scaffold_ids): scaffold_atom: Chem.Atom scaffold_atom = mol.GetAtomWithIdx(scaffold_id_in_mol) neighbor: Chem.Atom for neighbor in scaffold_atom.GetNeighbors(): neighbor_id = neighbor.GetIdx() if neighbor_id not in anchors: anchors[neighbor_id] = scaffold_id return anchors