Example #1
0
def CalculateGravitationalTopoIndex(mol):
    """
    #################################################################
    Gravitational topological index based on topological distance 
    
    instead of intermolecular distance.
    
    ---->Gravto
    
    Usage: 
        
        result=CalculateGravitationalTopoIndex(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    nAT=mol.GetNumAtoms()
    Distance= Chem.GetDistanceMatrix(mol) 
    res=0.0
    Atom=mol.GetAtoms()
    for i in range(nAT-1):
        for j in range(i+1,nAT):
            temp=Atom[i].GetMass()*Atom[j].GetMass()
            res=res+temp/numpy.power(Distance[i][j],2)
    
    return res/100      
Example #2
0
def CalculateXuIndex(mol):
    """
    #################################################################
    Calculation of Xu index
    
    ---->Xu
    
    Usage: 
        
        result=CalculateXuIndex(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    nAT=mol.GetNumAtoms()
    deltas=[x.GetDegree() for x in mol.GetAtoms()]
    Distance= Chem.GetDistanceMatrix(mol)
    sigma=scipy.sum(Distance,axis=1)
    temp1=0.0
    temp2=0.0
    for i in range(nAT):
        temp1=temp1+deltas[i]*((sigma[i])**2)
        temp2=temp2+deltas[i]*(sigma[i])
    Xu=numpy.sqrt(nAT)*numpy.log(temp1/temp2)
    
    return Xu
Example #3
0
def construct_pair_feature(mol, num_max_atoms=WEAVE_DEFAULT_NUM_MAX_ATOMS):
    """construct pair feature

    Args:
        mol (Mol): mol instance
        num_max_atoms (int): number of max atoms

    Returns (numpy.ndarray): 2 dimensional array. First axis size is
        `num_max_atoms` ** 2, representing index of each atom pair.
        Second axis for feature.

    """
    n_atom = mol.GetNumAtoms()
    distance_matrix = Chem.GetDistanceMatrix(mol)
    distance_feature = numpy.zeros((
        num_max_atoms**2,
        MAX_DISTANCE,
    ),
                                   dtype=numpy.float32)
    for i in range(n_atom):
        for j in range(n_atom):
            distance_feature[i * n_atom + j] = construct_distance_vec(
                distance_matrix, i, j)
    bond_feature = numpy.zeros((
        num_max_atoms**2,
        4,
    ), dtype=numpy.float32)
    for i in range(n_atom):
        for j in range(n_atom):
            bond_feature[i * n_atom + j] = construct_bond_vec(mol, i, j)
    ring_feature = construct_ring_feature_vec(mol, num_max_atoms=num_max_atoms)
    feature = numpy.hstack((distance_feature, bond_feature, ring_feature))
    return feature
Example #4
0
  def _featurize(self, datapoint: RDKitMol, **kwargs) -> np.ndarray:
    """
    Featurize the molecule.

    Parameters
    ----------
    datapoint: RDKitMol
      RDKit mol object.

    Returns
    -------
    MATEncoding
      A MATEncoding dataclass instance consisting of processed node_features, adjacency_matrix and distance_matrix.
    """
    if 'mol' in kwargs:
      datapoint = kwargs.get("mol")
      raise DeprecationWarning(
          'Mol is being phased out as a parameter, please pass "datapoint" instead.'
      )
    from rdkit import Chem

    datapoint = self.construct_mol(datapoint)

    node_features = self.construct_node_features_matrix(datapoint)
    adjacency_matrix = Chem.GetAdjacencyMatrix(datapoint)
    distance_matrix = Chem.GetDistanceMatrix(datapoint)

    node_features, adjacency_matrix, distance_matrix = self._add_dummy_node(
        node_features, adjacency_matrix, distance_matrix)

    node_features = self._pad_sequence(node_features)
    adjacency_matrix = self._pad_sequence(adjacency_matrix)
    distance_matrix = self._pad_sequence(distance_matrix)

    return MATEncoding(node_features, adjacency_matrix, distance_matrix)
Example #5
0
def CalculateGraphDistance(mol):
    """
    #################################################################
    Calculation of graph distance index
    
    ---->Tigdi(log value)
    
    Usage: 
        
        result=CalculateGraphDistance(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    Distance= Chem.GetDistanceMatrix(mol)
    n=int(Distance.max())
    res=0.0
    for i in range(n):
       # print Distance==i+1
        temp=1./2*sum(sum(Distance==i+1))
        #print temp
        res = res+temp**2

    return numpy.log10(res)
Example #6
0
def CalculateRadius(mol):
    """
    #################################################################
    Calculation of radius based on topology.
    
    It is :If ri is the largest matrix entry in row i of the distance
    
    matrix D,then the radius is defined as the smallest of the ri 
    
    [Petitjean 1992].
    
    ---->radiust
    
    Usage: 
        
        result=CalculateRadius(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    Distance=Chem.GetDistanceMatrix(mol)
    temp=[]
    for i in Distance:
        temp.append(max(i))
    return min(temp)
Example #7
0
def CalculateBalaban(mol):

    
    """
    #################################################################
    Calculation of Balaban index in a molecule
    
    ---->J
    
    Usage: 
        
        result=CalculateBalaban(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    adjMat=Chem.GetAdjacencyMatrix(mol)
    Distance= Chem.GetDistanceMatrix(mol)
    Nbond=mol.GetNumBonds()
    Natom=mol.GetNumAtoms()
    S=numpy.sum(Distance,axis=1)
    mu=Nbond-Natom+1
    sumk=0.
    for i in range(len(Distance)):
        si=S[i]
        for j in range(i,len(Distance)):
            if adjMat[i,j]==1:
                sumk += 1./numpy.sqrt(si*S[j])
    if mu+1 !=0:
        J=float(Nbond)/float(mu+1)*sumk
    else:
        J=0
    return J
Example #8
0
def _CalculateMoreauBrotoAutocorrelation(mol, lag=1, propertylabel='m'):
    """
    **Internal used only**
    Calculation of Moreau-Broto autocorrelation descriptors based on
    different property weights.
    """

    Natom = mol.GetNumAtoms()

    GetDistanceMatrix = Chem.GetDistanceMatrix(mol)
    res = 0.0
    for i in range(Natom):
        for j in range(Natom):
            if GetDistanceMatrix[i, j] == lag:
                atom1 = mol.GetAtomWithIdx(i)
                atom2 = mol.GetAtomWithIdx(j)
                temp1 = AtomProperty.GetRelativeAtomicProperty(
                    element=atom1.GetSymbol(), propertyname=propertylabel)
                temp2 = AtomProperty.GetRelativeAtomicProperty(
                    element=atom2.GetSymbol(), propertyname=propertylabel)
                res = res + temp1 * temp2
            else:
                res = res + 0.0

    return round(numpy.log(res / 2 + 1), 3)
Example #9
0
def _main_chain_len(s):
    mol = Chem.MolFromSmiles(s)
    star_inds = []
    for atom in mol.GetAtoms():
        if atom.GetSymbol() == '*':
            star_inds.append(atom.GetIdx())
    return Chem.GetDistanceMatrix(mol)[star_inds[0]][star_inds[1]]
Example #10
0
def EStateIndices(mol, force=True):
    """ returns a tuple of EState indices for the molecule

    Reference: Hall, Mohney and Kier. JCICS _31_ 76-81 (1991)

  """
    if not force and hasattr(mol, '_eStateIndices'):
        return mol._eStateIndices

    tbl = Chem.GetPeriodicTable()
    nAtoms = mol.GetNumAtoms()
    Is = numpy.zeros(nAtoms, dtype=numpy.float64)
    for i in range(nAtoms):
        at = mol.GetAtomWithIdx(i)
        d = at.GetDegree()
        if d > 0:
            atNum = at.GetAtomicNum()
            dv = tbl.GetNOuterElecs(atNum) - at.GetTotalNumHs()
            N = GetPrincipleQuantumNumber(atNum)
            Is[i] = (4. / (N * N) * dv + 1) / d
    dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0) + 1

    accum = numpy.zeros(nAtoms, dtype=numpy.float64)
    for i in range(nAtoms):
        for j in range(i + 1, nAtoms):
            p = dists[i, j]
            if p < 1e6:
                tmp = (Is[i] - Is[j]) / (p * p)
                accum[i] += tmp
                accum[j] -= tmp

    res = accum + Is
    mol._eStateIndices = res
    return res
Example #11
0
def CalculateGutmanTopo(mol):
    """
    #################################################################
    Calculation of Gutman molecular topological index based on
    
    simple vertex degree
    
    ---->GMTI(log value)
    
    Usage: 
        
        result=CalculateGutmanTopo(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    nAT=mol.GetNumAtoms()
    deltas=[x.GetDegree() for x in mol.GetAtoms()]
    Distance= Chem.GetDistanceMatrix(mol)
    res=0.0
    for i in range(nAT):
        for j in range(i+1,nAT):
            res=res+deltas[i]*deltas[j]*Distance[i,j]

    return numpy.log10(res)
Example #12
0
def CalculateDistanceEqualityTotalInf(mol):
    
    """
    #################################################################
    Total information index on distance equality
    
    -->DET
    
    Usage: 
        
        result=CalculateDistanceEqualityTotalInf(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    Distance= Chem.GetDistanceMatrix(mol)
    nAT=mol.GetNumAtoms()
    n=1./2*nAT**2-nAT
    DisType=int(Distance.max())
    res=0.0
    #res=numpy.zeros(DisType,numpy.float)
    for i in range(DisType):
        cc=1./2*sum(sum(Distance==i+1))
        res += cc*numpy.log2(cc)

    return n*numpy.log2(n)-res
Example #13
0
def _CalculateEState(mol, skipH=1):
    """
    **Internal used only**
    Get the EState value of each atom in a molecule
    """
    mol = Chem.AddHs(mol)
    if skipH == 1:
        mol = Chem.RemoveHs(mol)
    tb1 = Chem.GetPeriodicTable()
    nAtoms = mol.GetNumAtoms()
    Is = numpy.zeros(nAtoms, numpy.float)
    for i in range(nAtoms):
        at = mol.GetAtomWithIdx(i)
        atNum = at.GetAtomicNum()
        d = at.GetDegree()
        if d > 0:
            h = at.GetTotalNumHs()
            dv = tb1.GetNOuterElecs(atNum) - h
            # dv=numpy.array(_AtomHKDeltas(at),'d')
            N = _GetPrincipleQuantumNumber(atNum)
            Is[i] = (4.0 / (N * N) * dv + 1) / d
    dists = Chem.GetDistanceMatrix(mol, useBO=0, useAtomWts=0)
    dists += 1
    accum = numpy.zeros(nAtoms, numpy.float)
    for i in range(nAtoms):
        for j in range(i + 1, nAtoms):
            p = dists[i, j]
            if p < 1e6:
                temp = (Is[i] - Is[j]) / (p * p)
                accum[i] += temp
                accum[j] -= temp
    res = accum + Is
    return res
Example #14
0
def generateChiralDescriptorsForAllCenters(mol, verbose=False):
    """
    Generates descriptors for all chiral centers in the molecule.
    Details of these descriptors are described in: 
    Schneider et al., Chiral Cliffs: Investigating the Influence of Chirality on Binding Affinity
    https://doi.org/10.1002/cmdc.201700798. 
    >>> # test molecules are taken from the publication above (see Figure 3 and Figure 8)
    >>> testmols = {
    ...   "CHEMBL319180" : 'CCCN1C(=O)[C@@H](NC(=O)Nc2cccc(C)c2)N=C(N3CCN(C)CC3)c4ccccc14',
    ...   }
    >>> mol = Chem.MolFromSmiles(testmols['CHEMBL319180'])
    >>> desc = generateChiralDescriptorsForAllCenters(mol)
    >>> desc.keys()
    dict_keys([6])
    >>> desc[6]['arLevel2']
    0
    >>> desc[6]['s4_pathLength']
    7
    >>> desc[6]['maxDist']
    14
    >>> desc[6]['maxDistfromCC']
    7
    """

    desc = {}
    dists = Chem.GetDistanceMatrix(mol)
    for idxChiral, _ in Chem.FindMolChiralCenters(mol):
        desc[idxChiral] = calculateChiralDescriptors(mol,
                                                     idxChiral,
                                                     dists,
                                                     verbose=False)
    return desc
Example #15
0
def test_mat_encoder_layer():
    """Test invoking MATEncoderLayer."""
    torch.manual_seed(0)
    from rdkit import Chem
    input_ar = torch.Tensor([[1., 2.], [5., 6.]])
    mask = torch.Tensor([[1., 1.], [1., 1.]])
    mol = Chem.MolFromSmiles("CC")
    adj_matrix = Chem.GetAdjacencyMatrix(mol)
    distance_matrix = Chem.GetDistanceMatrix(mol)
    layer = torch_layers.MATEncoderLayer(dist_kernel='softmax',
                                         lambda_attention=0.33,
                                         lambda_distance=0.33,
                                         h=2,
                                         sa_hsize=2,
                                         sa_dropout_p=0.0,
                                         output_bias=True,
                                         d_input=2,
                                         d_hidden=2,
                                         d_output=2,
                                         activation='relu',
                                         n_layers=2,
                                         ff_dropout_p=0.0,
                                         encoder_hsize=2,
                                         encoder_dropout_p=0.0)
    result = layer(input_ar, mask, adj_matrix, distance_matrix, 0.0)
    output_ar = torch.tensor([[[0.9988, 2.0012], [-0.9999, 3.9999],
                               [0.9988, 2.0012], [-0.9999, 3.9999]],
                              [[5.0000, 6.0000], [3.0000, 8.0000],
                               [5.0000, 6.0000], [3.0000, 8.0000]]])
    assert torch.allclose(result, output_ar, rtol=1e-4)
Example #16
0
def CalculateDistanceEqualityMeanInf(mol):
    
    """
    #################################################################
    Mean information index on distance equality
    
    -->IDE
    
    Usage: 
        
        result=CalculateDistanceEqualityMeanInf(mol)
        
        Input: mol is a molecule object
        
        Output: result is a numeric value
    #################################################################
    """
    Distance= Chem.GetDistanceMatrix(mol)

    nAT=mol.GetNumAtoms()
    n=1./2*nAT**2-nAT
    DisType=int(Distance.max())
    res=0.0
    cc=numpy.zeros(DisType,numpy.float)
    for i in range(DisType):
        cc[i]=1./2*sum(sum(Distance==i+1))

    res=_CalculateEntropy(cc/n)
      
    return res
Example #17
0
def _AssignSymmetryClasses(mol, vdList, bdMat, forceBDMat, numAtoms, cutoff):
    """
     Used by BertzCT

     vdList: the number of neighbors each atom has
     bdMat: "balaban" distance matrix
     
  """
    if forceBDMat:
        bdMat = Chem.GetDistanceMatrix(mol,
                                       useBO=1,
                                       useAtomWts=0,
                                       force=1,
                                       prefix="Balaban")
        mol._balabanMat = bdMat

    atomIdx = 0
    keysSeen = []
    symList = [0] * numAtoms
    for i in range(numAtoms):
        tmpList = bdMat[i].tolist()
        tmpList.sort()
        theKey = tuple(['%.4f' % x for x in tmpList[:cutoff]])
        try:
            idx = keysSeen.index(theKey)
        except ValueError:
            idx = len(keysSeen)
            keysSeen.append(theKey)
        symList[i] = idx + 1
    return tuple(symList)
Example #18
0
def CalculateHarary(mol: Chem.Mol) -> float:
    """Get Harary number.

    Or Thara.
    """
    Distance = numpy.array(Chem.GetDistanceMatrix(mol), 'd')
    return 1.0 / 2 * (sum(1.0 / Distance[Distance != 0]))
Example #19
0
def _CalculateGearyAutocorrelation(mol, lag=1, propertylabel="m"):
    """
    #################################################################
    **Internal used only**

    Calculation of Geary autocorrelation descriptors based on

    different property weights.

    Usage:

    res=_CalculateGearyAutocorrelation(mol,lag=1,propertylabel='m')

    Input: mol is a molecule object.

    lag is the topological distance between atom i and atom j.

    propertylabel is the weighted property.

    Output: res is a numeric value.
    #################################################################
    """

    Natom = mol.GetNumAtoms()

    prolist = []
    for i in mol.GetAtoms():
        temp = GetRelativeAtomicProperty(i.GetSymbol(), propertyname=propertylabel)
        prolist.append(temp)

    aveweight = sum(prolist) / Natom

    tempp = [numpy.square(x - aveweight) for x in prolist]

    GetDistanceMatrix = Chem.GetDistanceMatrix(mol)
    res = 0.0
    index = 0
    for i in range(Natom):
        for j in range(Natom):
            if GetDistanceMatrix[i, j] == lag:
                atom1 = mol.GetAtomWithIdx(i)
                atom2 = mol.GetAtomWithIdx(j)
                temp1 = GetRelativeAtomicProperty(
                    element=atom1.GetSymbol(), propertyname=propertylabel
                )
                temp2 = GetRelativeAtomicProperty(
                    element=atom2.GetSymbol(), propertyname=propertylabel
                )
                res = res + numpy.square(temp1 - temp2)
                index = index + 1
            else:
                res = res + 0.0

    if sum(tempp) == 0 or index == 0:
        result = 0
    else:
        result = (res / index / 2) / (sum(tempp) / (Natom - 1))

    return round(result, 3)
Example #20
0
def CalculatePolarityNumber(mol: Chem.Mol) -> float:
    """Get Polarity number.

    Or Pol.
    """
    Distance = Chem.GetDistanceMatrix(mol)
    res = 1. / 2 * sum(sum(Distance == 3))
    return res
Example #21
0
def CalculateDiameter(mol: Chem.Mol) -> float:
    """Get largest value of the distance matrix.

    Or diametert.
    From Petitjean, M. J. Chem. Inf. Comput. Sci. 1992, 32, 4, 331-337.
    """
    Distance = Chem.GetDistanceMatrix(mol)
    return Distance.max()
Example #22
0
def CalculateXuIndex(mol):
    """
    Calculation of Xu index
    """
    nAT = mol.GetNumAtoms(onlyExplicit=True)
    deltas = np.array([x.GetDegree() for x in mol.GetAtoms()])
    Distance = Chem.GetDistanceMatrix(mol)
    return _Xu(Distance, nAT, deltas)
Example #23
0
def getDistances(mol, fp_dict):
    distanceMatrix = Chem.GetDistanceMatrix(mol)
    for point in itertools.combinations(range(distanceMatrix.shape[0]), 2):
        distance = distanceMatrix[point[0]][point[1]]
        if distance < 11:
            addBond(point, mol, int(distance) - 1, fp_dict)
            #addBond_gaussian(point, mol, int(distance)-1, fp_dict)
    return fp_dict
Example #24
0
    def createCorrespondence(self, penalty=3.0):

        mol1 = self._mol1
        mol2 = self._mol2

        for atom1 in mol1.GetAtoms():
            for atom2 in mol2.GetAtoms():

                # store the CIP codes somewhere that doesn't throw errors on comparison when missing
                try:
                    atom1._CIPCode = atom1.GetProp('_CIPCode')
                except KeyError:
                    atom1._CIPCode = None
                try:
                    atom2._CIPCode = atom2.GetProp('_CIPCode')
                except KeyError:
                    atom2._CIPCode = None

                # set penalties - 3 strikes and you're out!
                __tempscore = 0
                if atom1.GetImplicitValence() != atom2.GetImplicitValence():
                    __tempscore += 1
                if atom1.GetAtomicNum() != atom2.GetAtomicNum():
                    __tempscore += 1
                if atom1.GetDegree() != atom2.GetDegree(): __tempscore += 1
                if atom1.IsInRing() != atom2.IsInRing(): __tempscore += 1
                if atom1._CIPCode != atom2._CIPCode: __tempscore += 1

                # set upper limit on penalty to 1
                __tempscore = min(1, __tempscore / penalty)
                mapping = (atom1.GetIdx(), atom2.GetIdx(), __tempscore)
                if __tempscore < 1: self.add_node(mapping)

        # calculate distance matrices
        __dmat1 = Chem.GetDistanceMatrix(mol1)
        __dmat2 = Chem.GetDistanceMatrix(mol2)

        # create correspondance graph edges
        for map1 in self.nodes():
            for map2 in self.nodes():

                # test if criteria are met for correspondance
                correspondance = __dmat1[map1[0]][map2[0]] == __dmat2[map1[1]][
                    map2[1]]
                if correspondance: self.add_edge(map1, map2)
Example #25
0
def CalculateWeiner(mol):
    """
    Calculation of Weiner number in a molecule
    """
    dist = Chem.GetDistanceMatrix(mol)
    s = 1.0 / 2 * dist.sum()
    if s == 0:
        s = MINVALUE
    return np.log10(s)
Example #26
0
def getGMTI(mol):
    nAT = mol.GetNumAtoms()
    deltas = [x.GetDegree() for x in mol.GetAtoms()]
    Distance = Chem.GetDistanceMatrix(mol)
    res = 0.0
    for i in range(nAT):
        for j in range(i + 1, nAT):
            res = res + deltas[i] * deltas[j] * Distance[i, j]
    return res
Example #27
0
def CalculateSchiultz(mol: Chem.Mol) -> float:
    """Get Schiultz number.

    Or Tsch.
    """
    Distance = numpy.array(Chem.GetDistanceMatrix(mol), 'd')
    Adjacent = numpy.array(Chem.GetAdjacencyMatrix(mol), 'd')
    VertexDegree = sum(Adjacent)
    return sum(scipy.dot((Distance + Adjacent), VertexDegree))
Example #28
0
def get_surface_area(smile):
    #print (smile[-25:])
    mol0 = Chem.MolFromSmiles(smile)
    mol = Chem.AddHs(mol0)
    AllChem.Compute2DCoords(mol)
    adj = (Chem.GetDistanceMatrix(mol)==1)*1
    adj2 = (Chem.GetDistanceMatrix(mol)==2)*1
    molMMFF = AllChem.MMFFGetMoleculeProperties(mol)
    # Chem.MolSurf._LabuteHelper(mol) indiv contribution of surface area
    atoms = list(
                            map(lambda x: molMMFF.GetMMFFAtomType(x),
                                range(len(mol.GetAtoms()))
                                 )
                            )
    AllChem.ComputeGasteigerCharges(mol)
    charges = np.array([float(mol.GetAtomWithIdx(x).GetProp('_GasteigerCharge')) for x in range(len(atoms))])
    surf= np.array(Chem.MolSurf._LabuteHelper(mol))
    return (charges,surf[1:],atoms)
def GetChemicalNonequivs(atom, themol):
    num_unique_substituents = 0
    substituents = [[], [], [], []]
    for item, key in enumerate(
            ChiralDescriptors.determineAtomSubstituents(
                atom.GetIdx(), themol, Chem.GetDistanceMatrix(themol))[0]):
        for subatom in ChiralDescriptors.determineAtomSubstituents(
                atom.GetIdx(), themol, Chem.GetDistanceMatrix(themol))[0][key]:
            substituents[item].append(
                themol.GetAtomWithIdx(subatom).GetSymbol())
            num_unique_substituents = len(
                set(
                    tuple(
                        tuple(substituent) for substituent in substituents
                        if substituent)))
            #
            # Logic to determine e.g. whether repeats of CCCCC are cyclopentyl and pentyl or two of either
            #
    return num_unique_substituents
Example #30
0
def CalculateDiameter(mol):
    """
    Calculation of diameter, which is Largest value
    in the distance matrix [Petitjean 1992].
    """
    Distance = Chem.GetDistanceMatrix(mol)
    res = Distance.max()
    if res == 0:
        res = MINVALUE
    return np.log10(res)