def from_unannotated_mols(cls, moved_followup: Chem.Mol, hits: Sequence[Chem.Mol], placed_followup: Chem.Mol): """ Mapping is done by positional overlap between placed_followup and hits This mapping is the applied to moved_followup. :param moved_followup: The mol to be scored :param hits: the hits to score against :param placed_followup: the mol to determine how to score :return: """ mappings = [] moved_followup = AllChem.DeleteSubstructs(moved_followup, Chem.MolFromSmiles('*')) placed_followup = AllChem.DeleteSubstructs(placed_followup, Chem.MolFromSmiles('*')) if moved_followup.GetNumAtoms() != placed_followup.GetNumAtoms(): # they may differ just because protons placed_followup = Chem.AddHs(placed_followup) assert moved_followup.GetNumAtoms() == placed_followup.GetNumAtoms( ), 'moved and placed are different!' for h, hit in enumerate(hits): mappings.append( list( Fragmenstein.get_positional_mapping( hit, placed_followup).items())) return cls(moved_followup, hits, mappings)
def get_possible_map( self, other: Chem.Mol, label: str, o_map: Dict[int, int], # followup -> other inter_map: Dict[int, int], # other -> combined combined: Chem.Mol, combined_map: Dict[int, int]) -> Dict[int, int]: """ This analyses a single map (o_map) and returns a possible map :param other: :param label: :param o_map: followup -> other :param inter_map: :param combined: :param combined_map: followup -> combined :return: followup -> other """ possible_map = {} strikes = 0 # x strikes is discarded accounted_for = set(combined_map.keys()) for i, o in o_map.items(): # check each atom is okay # i = followup index # o = other index if i in accounted_for: # this atom is accounted for. Check it is fine. if o in inter_map: # this position overlaps c = inter_map[o] # equivalent index of combined if c not in combined_map.values(): # the other atom does not contribute strikes += 1 elif self.get_key(combined_map, c) == i: pass # that is fine. else: # no it's a different atom strikes += 1 else: # this position does not overlaps. Yet atom is accounted for. strikes += 1 elif o not in inter_map: # new atom that does not overlap possible_map[i] = combined.GetNumAtoms() + o elif inter_map[o] not in combined_map.values(): # overlaps but the overlap was not counted possible_map[i] = combined.GetNumAtoms() + o else: # mismatch! log.debug(f'{label} - {i} mismatch') strikes += 1 if strikes >= self.max_strikes: return {} elif not self.check_possible_distances(other, possible_map, combined, combined_map, cutoff=self.distance_cutoff): return {} else: return possible_map
def CalculateHeteroNumber(mol: Chem.Mol) -> float: """Calculate number of Heteroatoms.""" i = 0 for atom in mol.GetAtoms(): if atom.GetAtomicNum() not in [1, 6]: i += 1 return mol.GetNumAtoms() - i
def to_dgl(self: GraphFeaturiser, mol: Mol) -> dgl.DGLGraph: """Generates a DGL graph from a molecule. Args: mol: The molecule to featurise. Returns: A DGL graph of the featurised molecule. """ num_atoms = mol.GetNumAtoms() bonds = mol.GetBonds() bond_from = [bond.GetBeginAtomIdx() for bond in bonds] bond_to = [bond.GetEndAtomIdx() for bond in bonds] g = dgl.graph((torch.tensor(bond_from), torch.tensor(bond_to)), num_nodes=num_atoms) for key, atom_featuriser in self.atom_featurisers.items(): atom_features = atom_featuriser.process_molecule(mol) g.ndata[key] = torch.tensor(atom_features, dtype=torch.float) for key, bond_featuriser in self.bond_featurisers.items(): bond_features = [ bond_featuriser.process_bond(bond) for bond in bonds ] g.edata[key] = torch.tensor(bond_features, dtype=torch.float) g = dgl.add_reverse_edges(g, copy_edata=True) if self.add_self_loops: g = dgl.add_self_loop(g) return g
def _CalculateEState(mol: Chem.Mol, skipH: bool = True) -> float: """Get the EState value of each atom in the molecule.""" mol = Chem.AddHs(mol) if skipH: mol = Chem.RemoveHs(mol) tb1 = Chem.GetPeriodicTable() nAtoms = mol.GetNumAtoms() Is = numpy.zeros(nAtoms, numpy.float) for i in range(nAtoms): at = mol.GetAtomWithIdx(i) atNum = at.GetAtomicNum() d = at.GetDegree() if d > 0: h = at.GetTotalNumHs() dv = tb1.GetNOuterElecs(atNum) - h # dv=numpy.array(_AtomHKDeltas(at),'d') N = _GetPrincipleQuantumNumber(atNum) Is[i] = (4.0 / (N * N) * dv + 1) / d dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0) dists += 1 accum = numpy.zeros(nAtoms, numpy.float) for i in range(nAtoms): for j in range(i + 1, nAtoms): p = dists[i, j] if p < 1e6: temp = (Is[i] - Is[j]) / (p * p) accum[i] += temp accum[j] -= temp res = accum + Is return res
def copy_origins(cls, annotated: Chem.Mol, target: Chem.Mol): """ Fragmenstein leaves a note of what it did. atom prop _Origin is a json of a list of mol _Name dot AtomIdx. However, the atom order seems to be maintained but I dont trust it. Also dummy atoms are stripped. :param annotated: :param target: :return: a list of origins """ mcs = rdFMCS.FindMCS([target, annotated], atomCompare=rdFMCS.AtomCompare.CompareElements, bondCompare=rdFMCS.BondCompare.CompareAny, ringMatchesRingOnly=True) common = Chem.MolFromSmarts(mcs.smartsString) dmapping = dict( zip(target.GetSubstructMatch(common), annotated.GetSubstructMatch(common))) origins = [] for i in range(target.GetNumAtoms()): if i in dmapping: atom = annotated.GetAtomWithIdx(dmapping[i]) tatom = target.GetAtomWithIdx(i) o = cls._get_origin(atom) tatom.SetProp('_Origin', json.dumps(o)) return origins
def build_bond_features_and_mappings( mol: Chem.Mol, f_atoms: List) -> Tuple[list, list, list, list]: f_bonds = [] a2b = [[] for _ in range(mol.GetNumAtoms()) ] # mapping from atom index to incoming bond indices b2a = [ ] # mapping from bond index to the index of the atom the bond is coming from b2revb = [] # mapping from bond index to the index of the reverse bond for bond in mol.GetBonds(): a1 = bond.GetBeginAtom().GetIdx() a2 = bond.GetEndAtom().GetIdx() f_bond = get_bond_features(bond) f_bonds.append(f_atoms[a1] + f_bond) f_bonds.append(f_atoms[a2] + f_bond) # Update index mappings b1 = len(f_bonds) - 2 b2 = b1 + 1 b2a.append(a1) b2a.append(a2) a2b[a2].append(b1) # b1 = a1 --> a2 a2b[a1].append(b2) # b2 = a2 --> a1 b2revb.append(b2) b2revb.append(b1) return f_bonds, a2b, b2a, b2revb
def process(self, mol: chem.Mol, atom_map: Dict[int, int]) -> GCNGraph: n = mol.GetNumAtoms() + 1 # allocate a new node for graph embedding # all edges (including all self-loops) as index begin_idx = [u.GetBeginAtomIdx() for u in mol.GetBonds()] + [n - 1] * (n - 1) end_idx = [u.GetEndAtomIdx() for u in mol.GetBonds()] + list(range(n - 1)) assert len(begin_idx) == len(end_idx) ran = list(range(n)) index = [begin_idx + end_idx + ran, end_idx + begin_idx + ran] # construct coefficients adjacent matrix deg = torch.tensor( [sqrt(1 / (len(u.GetNeighbors()) + 2)) for u in mol.GetAtoms()] + [sqrt(1 / n)], device=self.device) coeff = deg.reshape(-1, 1) @ deg[None, :] # pairwise coefficients adj = torch.zeros((n, n), device=self.device) adj[index] = coeff[index] # node embedding num = torch.tensor( [atom_map[u.GetAtomicNum()] for u in mol.GetAtoms()] + [len(atom_map)], device=self.device) return GCNGraph(n, adj, num)
def _GetBurdenMatrix(mol: Chem.Mol, propertylabel: str = 'm') -> numpy.matrix: """Calculate weighted Burden matrix and eigenvalues.""" mol = Chem.AddHs(mol) Natom = mol.GetNumAtoms() AdMatrix = Chem.GetAdjacencyMatrix(mol) bondindex = numpy.argwhere(AdMatrix) AdMatrix1 = numpy.array(AdMatrix, dtype=numpy.float32) # The diagonal elements of B, Bii, are either given by # the carbon normalized atomic mass, # van der Waals volume, Sanderson electronegativity, # and polarizability of atom i. for i in range(Natom): atom = mol.GetAtomWithIdx(i) temp = GetRelativeAtomicProperty(element=atom.GetSymbol(), propertyname=propertylabel) AdMatrix1[i, i] = round(temp, 3) # The element of B connecting atoms i and j, Bij, # is equal to the square root of the bond # order between atoms i and j. for i in bondindex: bond = mol.GetBondBetweenAtoms(int(i[0]), int(i[1])) if bond.GetBondType().name == 'SINGLE': AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1), 3) if bond.GetBondType().name == "DOUBLE": AdMatrix1[i[0], i[1]] = round(numpy.sqrt(2), 3) if bond.GetBondType().name == "TRIPLE": AdMatrix1[i[0], i[1]] = round(numpy.sqrt(3), 3) if bond.GetBondType().name == "AROMATIC": AdMatrix1[i[0], i[1]] = round(numpy.sqrt(1.5), 3) # All other elements of B (corresponding non bonded # atom pairs) are set to 0.001 bondnonindex = numpy.argwhere(AdMatrix == 0) for i in bondnonindex: if i[0] != i[1]: AdMatrix1[i[0], i[1]] = 0.001 return numpy.real(numpy.linalg.eigvals(AdMatrix1))
def _compute_sas(mol: Mol, sa_model: Dict[int, float]) -> float: fp = rdMolDescriptors.GetMorganFingerprint(mol, 2) fps = fp.GetNonzeroElements() score1 = 0. nf = 0 # for bitId, v in fps.items(): for bitId, v in fps.items(): nf += v sfp = bitId score1 += sa_model.get(sfp, -4) * v score1 /= nf # features score nAtoms = mol.GetNumAtoms() nChiralCenters = len(FindMolChiralCenters(mol, includeUnassigned=True)) ri = mol.GetRingInfo() nSpiro = rdMolDescriptors.CalcNumSpiroAtoms(mol) nBridgeheads = rdMolDescriptors.CalcNumBridgeheadAtoms(mol) nMacrocycles = 0 for x in ri.AtomRings(): if len(x) > 8: nMacrocycles += 1 sizePenalty = nAtoms**1.005 - nAtoms stereoPenalty = math.log10(nChiralCenters + 1) spiroPenalty = math.log10(nSpiro + 1) bridgePenalty = math.log10(nBridgeheads + 1) macrocyclePenalty = 0. # --------------------------------------- # This differs from the paper, which defines: # macrocyclePenalty = math.log10(nMacrocycles+1) # This form generates better results when 2 or more macrocycles are present if nMacrocycles > 0: macrocyclePenalty = math.log10(2) score2 = 0. - sizePenalty - stereoPenalty - spiroPenalty - bridgePenalty - macrocyclePenalty # correction for the fingerprint density # not in the original publication, added in version 1.1 # to make highly symmetrical molecules easier to synthetise score3 = 0. if nAtoms > len(fps): score3 = math.log(float(nAtoms) / len(fps)) * .5 sascore = score1 + score2 + score3 # need to transform "raw" value into scale between 1 and 10 min = -4.0 max = 2.5 sascore = 11. - (sascore - min + 1) / (max - min) * 9. # smooth the 10-end if sascore > 8.: sascore = 8. + math.log(sascore + 1. - 9.) if sascore > 10.: sascore = 10.0 elif sascore < 1.: sascore = 1.0 return sascore
def construct_discrete_edge_matrix(mol: Chem.Mol): if mol is None: return None N = mol.GetNumAtoms() #adj = Chem.rdmolops.GetAdjacencyMatrix(mol) #size = adj.shape[0] size = MAX_NUMBER_ATOM adjs = numpy.zeros((4, size, size), dtype=numpy.float32) for i in range(N): for j in range(N): bond = mol.GetBondBetweenAtoms(i, j) # type: Chem.Bond if bond is not None: bondType = str(bond.GetBondType()) if bondType == 'SINGLE': adjs[0, i, j] = 1.0 elif bondType == 'DOUBLE': adjs[1, i, j] = 1.0 elif bondType == 'TRIPLE': adjs[2, i, j] = 1.0 elif bondType == 'AROMATIC': adjs[3, i, j] = 1.0 else: print("[ERROR] Unknown bond type", bondType) assert False # Should not come here return adjs
def subset_rdmol(rdmol: Chem.Mol, atom_indices: Iterable[int], check_bonds: bool = True, return_atom_indices: bool = False) -> Chem.Mol: rdmol = Chem.RWMol(rdmol) to_remove = [i for i in range(rdmol.GetNumAtoms()) if i not in atom_indices] if check_bonds: multiple_bonds = [] # check bonds for i in to_remove: atom = rdmol.GetAtomWithIdx(i) n_bonds = 0 for bond in atom.GetBonds(): other = bond.GetOtherAtomIdx(i) if other in atom_indices: n_bonds += 1 if n_bonds > 1: multiple_bonds.append(i) atom_indices = sorted(atom_indices + multiple_bonds) to_remove = [i for i in to_remove if i not in multiple_bonds] for i in to_remove[::-1]: rdmol.RemoveAtom(i) rdmol.UpdatePropertyCache() if return_atom_indices: return rdmol, atom_indices return rdmol
def merge(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, anchor_index: int, attachment_details: List[Dict]) -> Chem.Mol: for detail in attachment_details: attachment_index = detail['idx_F'] # fragmentanda attachment_index scaffold_attachment_index = detail['idx_S'] bond_type = detail['type'] f = Chem.FragmentOnBonds(fragmentanda, [ fragmentanda.GetBondBetweenAtoms(anchor_index, attachment_index).GetIdx() ], addDummies=False) frag_split = [] fragmols = Chem.GetMolFrags(f, asMols=True, fragsMolAtomMapping=frag_split, sanitizeFrags=False) if self._debug_draw: print(frag_split) # Get the fragment of interest. ii = 0 for mol_N, indices in enumerate(frag_split): if anchor_index in indices: break ii += len(indices) else: raise Exception frag = fragmols[mol_N] frag_anchor_index = indices.index(anchor_index) if self._debug_draw: self.draw_nicely(frag) combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag)) scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms() if self._debug_draw: print(scaffold_anchor_index, scaffold_attachment_index, anchor_index, scaffold.GetNumAtoms()) self.draw_nicely(combo) combo.AddBond(scaffold_anchor_index, scaffold_attachment_index, bond_type) Chem.SanitizeMol( combo, sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS + Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY, catchErrors=True) if self._debug_draw: self.draw_nicely(combo) scaffold = combo return scaffold
def CalculateMeanWeiner(mol: Chem.Mol) -> float: """Get Mean Weiner index of a molecule. Or AW. """ N = mol.GetNumAtoms() WeinerNumber = CalculateWeiner(mol) return 2.0 * WeinerNumber / (N * (N - 1))
def CalculateQuadratic(mol: Chem.Mol) -> float: """Get Quadratic index. Or Qindex. """ M = CalculateZagreb1(mol) N = mol.GetNumAtoms() return 3 - 2 * N + M / 2.0
def CalculateArithmeticTopoIndex(mol: Chem.Mol) -> float: """Get Arithmetic topological index. Or Arto. From Narumi H., MATCH (Comm. Math. Comp. Chem.), (1987), 22,195-207. """ nAtoms = mol.GetNumAtoms() nBonds = mol.GetNumBonds() res = 2. * nBonds / nAtoms return res
def match(self, mol: Chem.Mol) -> List[np.ndarray]: matches = self.substruct_matches(mol) mol_size = mol.GetNumAtoms() dense_matches = [ _sparse_to_dense(index_list, mol_size) for index_list in matches ] all_matches = [ _reduce_logical_or(match_set, mol_size) for match_set in _nonnull_powerset(dense_matches) ] return all_matches
def _CalculateAtomEState(mol: Chem.Mol, AtomicNum=6) -> float: """Calculate the sum of the EState indices over all atoms with specified atomic number.""" nAtoms = mol.GetNumAtoms() Is = numpy.zeros(nAtoms, numpy.float) Estate = _CalculateEState(mol) for i in range(nAtoms): at = mol.GetAtomWithIdx(i) atNum = at.GetAtomicNum() if atNum == AtomicNum: Is[i] = Estate[i] res = sum(Is) return res
def match(self, mol: Chem.Mol) -> List[np.ndarray]: subrule_matches = [logic.match(mol) for logic in self.fragment_logics] mol_size = mol.GetNumAtoms() composite_matches = [] for combination in itertools.product(*subrule_matches): if self.rule_type == 'OR': for match_subset in _nonnull_powerset(combination): composite_matches.append( _reduce_logical_or(match_subset, mol_size)) elif self.rule_type == 'AND': composite_matches.append( _reduce_logical_or(combination, mol_size)) return composite_matches
def CalculateGutmanTopo(mol: Chem.Mol) -> float: """Get Gutman molecular topological simple vertex index. Or GMTI. """ nAT = mol.GetNumAtoms() deltas = [x.GetDegree() for x in mol.GetAtoms()] Distance = Chem.GetDistanceMatrix(mol) res = 0.0 for i in range(nAT): for j in range(i + 1, nAT): res = res + deltas[i] * deltas[j] * Distance[i, j] return numpy.log10(res)
def add_names(cls, mol: Chem.Mol, names: List[str], name:Optional[str]=None) -> Chem.Mol: """ Quick way to add atom names to a mol object --adds them the normal way. :param mol: Chem.Mol, will actually be edited in place. :param names: list of unique names. :param name: 3letter code for the molecule. :return: the mol """ assert len(set(names)) == len(names), 'Atom Names are repeated.' if mol.GetNumAtoms() > len(names): warn('There are more atoms in mol than were provided.') elif mol.GetNumAtoms() < len(names): raise ValueError('There are less atoms in mol than were provided.') self = cls() if name is not None: self.NAME = name self.mol = mol self.fix_mol() for name, atom in zip(names, self.mol.GetAtoms()): info = atom.GetPDBResidueInfo().SetName(name) return self.mol