def CalculateGravitationalTopoIndex(mol): """ ################################################################# Gravitational topological index based on topological distance instead of intermolecular distance. ---->Gravto Usage: result=CalculateGravitationalTopoIndex(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ nAT=mol.GetNumAtoms() Distance= Chem.GetDistanceMatrix(mol) res=0.0 Atom=mol.GetAtoms() for i in range(nAT-1): for j in range(i+1,nAT): temp=Atom[i].GetMass()*Atom[j].GetMass() res=res+temp/numpy.power(Distance[i][j],2) return res/100
def CalculateXuIndex(mol): """ ################################################################# Calculation of Xu index ---->Xu Usage: result=CalculateXuIndex(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ nAT=mol.GetNumAtoms() deltas=[x.GetDegree() for x in mol.GetAtoms()] Distance= Chem.GetDistanceMatrix(mol) sigma=scipy.sum(Distance,axis=1) temp1=0.0 temp2=0.0 for i in range(nAT): temp1=temp1+deltas[i]*((sigma[i])**2) temp2=temp2+deltas[i]*(sigma[i]) Xu=numpy.sqrt(nAT)*numpy.log(temp1/temp2) return Xu
def construct_pair_feature(mol, num_max_atoms=WEAVE_DEFAULT_NUM_MAX_ATOMS): """construct pair feature Args: mol (Mol): mol instance num_max_atoms (int): number of max atoms Returns (numpy.ndarray): 2 dimensional array. First axis size is `num_max_atoms` ** 2, representing index of each atom pair. Second axis for feature. """ n_atom = mol.GetNumAtoms() distance_matrix = Chem.GetDistanceMatrix(mol) distance_feature = numpy.zeros(( num_max_atoms**2, MAX_DISTANCE, ), dtype=numpy.float32) for i in range(n_atom): for j in range(n_atom): distance_feature[i * n_atom + j] = construct_distance_vec( distance_matrix, i, j) bond_feature = numpy.zeros(( num_max_atoms**2, 4, ), dtype=numpy.float32) for i in range(n_atom): for j in range(n_atom): bond_feature[i * n_atom + j] = construct_bond_vec(mol, i, j) ring_feature = construct_ring_feature_vec(mol, num_max_atoms=num_max_atoms) feature = numpy.hstack((distance_feature, bond_feature, ring_feature)) return feature
def _featurize(self, datapoint: RDKitMol, **kwargs) -> np.ndarray: """ Featurize the molecule. Parameters ---------- datapoint: RDKitMol RDKit mol object. Returns ------- MATEncoding A MATEncoding dataclass instance consisting of processed node_features, adjacency_matrix and distance_matrix. """ if 'mol' in kwargs: datapoint = kwargs.get("mol") raise DeprecationWarning( 'Mol is being phased out as a parameter, please pass "datapoint" instead.' ) from rdkit import Chem datapoint = self.construct_mol(datapoint) node_features = self.construct_node_features_matrix(datapoint) adjacency_matrix = Chem.GetAdjacencyMatrix(datapoint) distance_matrix = Chem.GetDistanceMatrix(datapoint) node_features, adjacency_matrix, distance_matrix = self._add_dummy_node( node_features, adjacency_matrix, distance_matrix) node_features = self._pad_sequence(node_features) adjacency_matrix = self._pad_sequence(adjacency_matrix) distance_matrix = self._pad_sequence(distance_matrix) return MATEncoding(node_features, adjacency_matrix, distance_matrix)
def CalculateGraphDistance(mol): """ ################################################################# Calculation of graph distance index ---->Tigdi(log value) Usage: result=CalculateGraphDistance(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ Distance= Chem.GetDistanceMatrix(mol) n=int(Distance.max()) res=0.0 for i in range(n): # print Distance==i+1 temp=1./2*sum(sum(Distance==i+1)) #print temp res = res+temp**2 return numpy.log10(res)
def CalculateRadius(mol): """ ################################################################# Calculation of radius based on topology. It is :If ri is the largest matrix entry in row i of the distance matrix D,then the radius is defined as the smallest of the ri [Petitjean 1992]. ---->radiust Usage: result=CalculateRadius(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ Distance=Chem.GetDistanceMatrix(mol) temp=[] for i in Distance: temp.append(max(i)) return min(temp)
def CalculateBalaban(mol): """ ################################################################# Calculation of Balaban index in a molecule ---->J Usage: result=CalculateBalaban(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ adjMat=Chem.GetAdjacencyMatrix(mol) Distance= Chem.GetDistanceMatrix(mol) Nbond=mol.GetNumBonds() Natom=mol.GetNumAtoms() S=numpy.sum(Distance,axis=1) mu=Nbond-Natom+1 sumk=0. for i in range(len(Distance)): si=S[i] for j in range(i,len(Distance)): if adjMat[i,j]==1: sumk += 1./numpy.sqrt(si*S[j]) if mu+1 !=0: J=float(Nbond)/float(mu+1)*sumk else: J=0 return J
def _CalculateMoreauBrotoAutocorrelation(mol, lag=1, propertylabel='m'): """ **Internal used only** Calculation of Moreau-Broto autocorrelation descriptors based on different property weights. """ Natom = mol.GetNumAtoms() GetDistanceMatrix = Chem.GetDistanceMatrix(mol) res = 0.0 for i in range(Natom): for j in range(Natom): if GetDistanceMatrix[i, j] == lag: atom1 = mol.GetAtomWithIdx(i) atom2 = mol.GetAtomWithIdx(j) temp1 = AtomProperty.GetRelativeAtomicProperty( element=atom1.GetSymbol(), propertyname=propertylabel) temp2 = AtomProperty.GetRelativeAtomicProperty( element=atom2.GetSymbol(), propertyname=propertylabel) res = res + temp1 * temp2 else: res = res + 0.0 return round(numpy.log(res / 2 + 1), 3)
def _main_chain_len(s): mol = Chem.MolFromSmiles(s) star_inds = [] for atom in mol.GetAtoms(): if atom.GetSymbol() == '*': star_inds.append(atom.GetIdx()) return Chem.GetDistanceMatrix(mol)[star_inds[0]][star_inds[1]]
def EStateIndices(mol, force=True): """ returns a tuple of EState indices for the molecule Reference: Hall, Mohney and Kier. JCICS _31_ 76-81 (1991) """ if not force and hasattr(mol, '_eStateIndices'): return mol._eStateIndices tbl = Chem.GetPeriodicTable() nAtoms = mol.GetNumAtoms() Is = numpy.zeros(nAtoms, dtype=numpy.float64) for i in range(nAtoms): at = mol.GetAtomWithIdx(i) d = at.GetDegree() if d > 0: atNum = at.GetAtomicNum() dv = tbl.GetNOuterElecs(atNum) - at.GetTotalNumHs() N = GetPrincipleQuantumNumber(atNum) Is[i] = (4. / (N * N) * dv + 1) / d dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0) + 1 accum = numpy.zeros(nAtoms, dtype=numpy.float64) for i in range(nAtoms): for j in range(i + 1, nAtoms): p = dists[i, j] if p < 1e6: tmp = (Is[i] - Is[j]) / (p * p) accum[i] += tmp accum[j] -= tmp res = accum + Is mol._eStateIndices = res return res
def CalculateGutmanTopo(mol): """ ################################################################# Calculation of Gutman molecular topological index based on simple vertex degree ---->GMTI(log value) Usage: result=CalculateGutmanTopo(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ nAT=mol.GetNumAtoms() deltas=[x.GetDegree() for x in mol.GetAtoms()] Distance= Chem.GetDistanceMatrix(mol) res=0.0 for i in range(nAT): for j in range(i+1,nAT): res=res+deltas[i]*deltas[j]*Distance[i,j] return numpy.log10(res)
def CalculateDistanceEqualityTotalInf(mol): """ ################################################################# Total information index on distance equality -->DET Usage: result=CalculateDistanceEqualityTotalInf(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ Distance= Chem.GetDistanceMatrix(mol) nAT=mol.GetNumAtoms() n=1./2*nAT**2-nAT DisType=int(Distance.max()) res=0.0 #res=numpy.zeros(DisType,numpy.float) for i in range(DisType): cc=1./2*sum(sum(Distance==i+1)) res += cc*numpy.log2(cc) return n*numpy.log2(n)-res
def _CalculateEState(mol, skipH=1): """ **Internal used only** Get the EState value of each atom in a molecule """ mol = Chem.AddHs(mol) if skipH == 1: mol = Chem.RemoveHs(mol) tb1 = Chem.GetPeriodicTable() nAtoms = mol.GetNumAtoms() Is = numpy.zeros(nAtoms, numpy.float) for i in range(nAtoms): at = mol.GetAtomWithIdx(i) atNum = at.GetAtomicNum() d = at.GetDegree() if d > 0: h = at.GetTotalNumHs() dv = tb1.GetNOuterElecs(atNum) - h # dv=numpy.array(_AtomHKDeltas(at),'d') N = _GetPrincipleQuantumNumber(atNum) Is[i] = (4.0 / (N * N) * dv + 1) / d dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0) dists += 1 accum = numpy.zeros(nAtoms, numpy.float) for i in range(nAtoms): for j in range(i + 1, nAtoms): p = dists[i, j] if p < 1e6: temp = (Is[i] - Is[j]) / (p * p) accum[i] += temp accum[j] -= temp res = accum + Is return res
def generateChiralDescriptorsForAllCenters(mol, verbose=False): """ Generates descriptors for all chiral centers in the molecule. Details of these descriptors are described in: Schneider et al., Chiral Cliffs: Investigating the Influence of Chirality on Binding Affinity https://doi.org/10.1002/cmdc.201700798. >>> # test molecules are taken from the publication above (see Figure 3 and Figure 8) >>> testmols = { ... "CHEMBL319180" : 'CCCN1C(=O)[C@@H](NC(=O)Nc2cccc(C)c2)N=C(N3CCN(C)CC3)c4ccccc14', ... } >>> mol = Chem.MolFromSmiles(testmols['CHEMBL319180']) >>> desc = generateChiralDescriptorsForAllCenters(mol) >>> desc.keys() dict_keys([6]) >>> desc[6]['arLevel2'] 0 >>> desc[6]['s4_pathLength'] 7 >>> desc[6]['maxDist'] 14 >>> desc[6]['maxDistfromCC'] 7 """ desc = {} dists = Chem.GetDistanceMatrix(mol) for idxChiral, _ in Chem.FindMolChiralCenters(mol): desc[idxChiral] = calculateChiralDescriptors(mol, idxChiral, dists, verbose=False) return desc
def test_mat_encoder_layer(): """Test invoking MATEncoderLayer.""" torch.manual_seed(0) from rdkit import Chem input_ar = torch.Tensor([[1., 2.], [5., 6.]]) mask = torch.Tensor([[1., 1.], [1., 1.]]) mol = Chem.MolFromSmiles("CC") adj_matrix = Chem.GetAdjacencyMatrix(mol) distance_matrix = Chem.GetDistanceMatrix(mol) layer = torch_layers.MATEncoderLayer(dist_kernel='softmax', lambda_attention=0.33, lambda_distance=0.33, h=2, sa_hsize=2, sa_dropout_p=0.0, output_bias=True, d_input=2, d_hidden=2, d_output=2, activation='relu', n_layers=2, ff_dropout_p=0.0, encoder_hsize=2, encoder_dropout_p=0.0) result = layer(input_ar, mask, adj_matrix, distance_matrix, 0.0) output_ar = torch.tensor([[[0.9988, 2.0012], [-0.9999, 3.9999], [0.9988, 2.0012], [-0.9999, 3.9999]], [[5.0000, 6.0000], [3.0000, 8.0000], [5.0000, 6.0000], [3.0000, 8.0000]]]) assert torch.allclose(result, output_ar, rtol=1e-4)
def CalculateDistanceEqualityMeanInf(mol): """ ################################################################# Mean information index on distance equality -->IDE Usage: result=CalculateDistanceEqualityMeanInf(mol) Input: mol is a molecule object Output: result is a numeric value ################################################################# """ Distance= Chem.GetDistanceMatrix(mol) nAT=mol.GetNumAtoms() n=1./2*nAT**2-nAT DisType=int(Distance.max()) res=0.0 cc=numpy.zeros(DisType,numpy.float) for i in range(DisType): cc[i]=1./2*sum(sum(Distance==i+1)) res=_CalculateEntropy(cc/n) return res
def _AssignSymmetryClasses(mol, vdList, bdMat, forceBDMat, numAtoms, cutoff): """ Used by BertzCT vdList: the number of neighbors each atom has bdMat: "balaban" distance matrix """ if forceBDMat: bdMat = Chem.GetDistanceMatrix(mol, useBO=1, useAtomWts=0, force=1, prefix="Balaban") mol._balabanMat = bdMat atomIdx = 0 keysSeen = [] symList = [0] * numAtoms for i in range(numAtoms): tmpList = bdMat[i].tolist() tmpList.sort() theKey = tuple(['%.4f' % x for x in tmpList[:cutoff]]) try: idx = keysSeen.index(theKey) except ValueError: idx = len(keysSeen) keysSeen.append(theKey) symList[i] = idx + 1 return tuple(symList)
def CalculateHarary(mol: Chem.Mol) -> float: """Get Harary number. Or Thara. """ Distance = numpy.array(Chem.GetDistanceMatrix(mol), 'd') return 1.0 / 2 * (sum(1.0 / Distance[Distance != 0]))
def _CalculateGearyAutocorrelation(mol, lag=1, propertylabel="m"): """ ################################################################# **Internal used only** Calculation of Geary autocorrelation descriptors based on different property weights. Usage: res=_CalculateGearyAutocorrelation(mol,lag=1,propertylabel='m') Input: mol is a molecule object. lag is the topological distance between atom i and atom j. propertylabel is the weighted property. Output: res is a numeric value. ################################################################# """ Natom = mol.GetNumAtoms() prolist = [] for i in mol.GetAtoms(): temp = GetRelativeAtomicProperty(i.GetSymbol(), propertyname=propertylabel) prolist.append(temp) aveweight = sum(prolist) / Natom tempp = [numpy.square(x - aveweight) for x in prolist] GetDistanceMatrix = Chem.GetDistanceMatrix(mol) res = 0.0 index = 0 for i in range(Natom): for j in range(Natom): if GetDistanceMatrix[i, j] == lag: atom1 = mol.GetAtomWithIdx(i) atom2 = mol.GetAtomWithIdx(j) temp1 = GetRelativeAtomicProperty( element=atom1.GetSymbol(), propertyname=propertylabel ) temp2 = GetRelativeAtomicProperty( element=atom2.GetSymbol(), propertyname=propertylabel ) res = res + numpy.square(temp1 - temp2) index = index + 1 else: res = res + 0.0 if sum(tempp) == 0 or index == 0: result = 0 else: result = (res / index / 2) / (sum(tempp) / (Natom - 1)) return round(result, 3)
def CalculatePolarityNumber(mol: Chem.Mol) -> float: """Get Polarity number. Or Pol. """ Distance = Chem.GetDistanceMatrix(mol) res = 1. / 2 * sum(sum(Distance == 3)) return res
def CalculateDiameter(mol: Chem.Mol) -> float: """Get largest value of the distance matrix. Or diametert. From Petitjean, M. J. Chem. Inf. Comput. Sci. 1992, 32, 4, 331-337. """ Distance = Chem.GetDistanceMatrix(mol) return Distance.max()
def CalculateXuIndex(mol): """ Calculation of Xu index """ nAT = mol.GetNumAtoms(onlyExplicit=True) deltas = np.array([x.GetDegree() for x in mol.GetAtoms()]) Distance = Chem.GetDistanceMatrix(mol) return _Xu(Distance, nAT, deltas)
def getDistances(mol, fp_dict): distanceMatrix = Chem.GetDistanceMatrix(mol) for point in itertools.combinations(range(distanceMatrix.shape[0]), 2): distance = distanceMatrix[point[0]][point[1]] if distance < 11: addBond(point, mol, int(distance) - 1, fp_dict) #addBond_gaussian(point, mol, int(distance)-1, fp_dict) return fp_dict
def createCorrespondence(self, penalty=3.0): mol1 = self._mol1 mol2 = self._mol2 for atom1 in mol1.GetAtoms(): for atom2 in mol2.GetAtoms(): # store the CIP codes somewhere that doesn't throw errors on comparison when missing try: atom1._CIPCode = atom1.GetProp('_CIPCode') except KeyError: atom1._CIPCode = None try: atom2._CIPCode = atom2.GetProp('_CIPCode') except KeyError: atom2._CIPCode = None # set penalties - 3 strikes and you're out! __tempscore = 0 if atom1.GetImplicitValence() != atom2.GetImplicitValence(): __tempscore += 1 if atom1.GetAtomicNum() != atom2.GetAtomicNum(): __tempscore += 1 if atom1.GetDegree() != atom2.GetDegree(): __tempscore += 1 if atom1.IsInRing() != atom2.IsInRing(): __tempscore += 1 if atom1._CIPCode != atom2._CIPCode: __tempscore += 1 # set upper limit on penalty to 1 __tempscore = min(1, __tempscore / penalty) mapping = (atom1.GetIdx(), atom2.GetIdx(), __tempscore) if __tempscore < 1: self.add_node(mapping) # calculate distance matrices __dmat1 = Chem.GetDistanceMatrix(mol1) __dmat2 = Chem.GetDistanceMatrix(mol2) # create correspondance graph edges for map1 in self.nodes(): for map2 in self.nodes(): # test if criteria are met for correspondance correspondance = __dmat1[map1[0]][map2[0]] == __dmat2[map1[1]][ map2[1]] if correspondance: self.add_edge(map1, map2)
def CalculateWeiner(mol): """ Calculation of Weiner number in a molecule """ dist = Chem.GetDistanceMatrix(mol) s = 1.0 / 2 * dist.sum() if s == 0: s = MINVALUE return np.log10(s)
def getGMTI(mol): nAT = mol.GetNumAtoms() deltas = [x.GetDegree() for x in mol.GetAtoms()] Distance = Chem.GetDistanceMatrix(mol) res = 0.0 for i in range(nAT): for j in range(i + 1, nAT): res = res + deltas[i] * deltas[j] * Distance[i, j] return res
def CalculateSchiultz(mol: Chem.Mol) -> float: """Get Schiultz number. Or Tsch. """ Distance = numpy.array(Chem.GetDistanceMatrix(mol), 'd') Adjacent = numpy.array(Chem.GetAdjacencyMatrix(mol), 'd') VertexDegree = sum(Adjacent) return sum(scipy.dot((Distance + Adjacent), VertexDegree))
def get_surface_area(smile): #print (smile[-25:]) mol0 = Chem.MolFromSmiles(smile) mol = Chem.AddHs(mol0) AllChem.Compute2DCoords(mol) adj = (Chem.GetDistanceMatrix(mol)==1)*1 adj2 = (Chem.GetDistanceMatrix(mol)==2)*1 molMMFF = AllChem.MMFFGetMoleculeProperties(mol) # Chem.MolSurf._LabuteHelper(mol) indiv contribution of surface area atoms = list( map(lambda x: molMMFF.GetMMFFAtomType(x), range(len(mol.GetAtoms())) ) ) AllChem.ComputeGasteigerCharges(mol) charges = np.array([float(mol.GetAtomWithIdx(x).GetProp('_GasteigerCharge')) for x in range(len(atoms))]) surf= np.array(Chem.MolSurf._LabuteHelper(mol)) return (charges,surf[1:],atoms)
def GetChemicalNonequivs(atom, themol): num_unique_substituents = 0 substituents = [[], [], [], []] for item, key in enumerate( ChiralDescriptors.determineAtomSubstituents( atom.GetIdx(), themol, Chem.GetDistanceMatrix(themol))[0]): for subatom in ChiralDescriptors.determineAtomSubstituents( atom.GetIdx(), themol, Chem.GetDistanceMatrix(themol))[0][key]: substituents[item].append( themol.GetAtomWithIdx(subatom).GetSymbol()) num_unique_substituents = len( set( tuple( tuple(substituent) for substituent in substituents if substituent))) # # Logic to determine e.g. whether repeats of CCCCC are cyclopentyl and pentyl or two of either # return num_unique_substituents
def CalculateDiameter(mol): """ Calculation of diameter, which is Largest value in the distance matrix [Petitjean 1992]. """ Distance = Chem.GetDistanceMatrix(mol) res = Distance.max() if res == 0: res = MINVALUE return np.log10(res)