Beispiel #1
0
def Molecule_volume(mol=Chem.rdchem.Mol(), gv=GaussianVolume()):

    EPS = 0.03
    N = mol.GetNumAtoms()

    for i in range(N):
        i += 1
        gv.childOverlaps.append([])
        gv.gaussians.append(AtomGaussian())

    gv.levels.append(N)
    gv.volume = 0.0
    gv.centroid = np.array([0.0, 0.0, 0.0])

    # Stores the parents of gv.gaussians[i] inside parents[i]
    parents = [[] for i in range(N)]

    # Stores the atom index that have intersection with i_th gaussians inside overlaps[i]
    overlaps = [set() for i in range(N)]

    atomIndex = 0
    vecIndex = N  #Used to indicated the initial position of the child gaussian

    guassian_weight = 2.828427125

    conf = mol.GetConformer()
    for atom in mol.GetAtoms():

        if atom.GetAtomicNum() == 1: continue

        gv.gaussians[atomIndex].centre = np.array(
            conf.GetAtomPosition(
                atomIndex))  # value chacked, same with mol file
        gv.gaussians[atomIndex].alpha = GAlpha(atom.GetAtomicNum())
        gv.gaussians[atomIndex].weight = guassian_weight
        #radius_VDW = Chem.GetPeriodicTable().GetRvdw(atom.GetAtomicNum())
        radius_VDW = ob.GetVdwRad(atom.GetAtomicNum())
        '''it looks like the GetRvdw function in rdkit give 1.95 for Carbon, 
        which is the vdw radius for Br in our paper, here I redefined the value'''
        #gv.gaussians[atomIndex].volume = (4.0 * np.pi/3.0) * radius_VDW **3
        gv.gaussians[atomIndex].volume = SHoverlap(gv.gaussians[atomIndex],
                                                   gv.gaussians[atomIndex])[0]
        #checked, give the same value as (np.pi/gv.gaussians[atomIndex].alpha)**1.5 * gv.gaussians[atomIndex].weight
        gv.gaussians[atomIndex].n = 1
        '''Update volume and centroid of the Molecule'''
        gv.volume += gv.gaussians[atomIndex].volume
        gv.centroid += gv.gaussians[atomIndex].volume * gv.gaussians[
            atomIndex].centre
        '''loop over every atom to find the second level overlap'''

        for i in range(atomIndex):

            ga = atomIntersection(gv.gaussians[i], gv.gaussians[atomIndex])

            # Check if overlap is sufficient enough
            if ga.volume / (gv.gaussians[i].volume +
                            gv.gaussians[atomIndex].volume - ga.volume) < EPS:
                continue

            gv.gaussians.append(ga)
            gv.childOverlaps.append([])

            #append a empty list in the end to store child of this overlap gaussian
            parents.append([i, atomIndex])
            overlaps.append(set())

            gv.volume -= ga.volume
            gv.centroid -= ga.volume * ga.centre

            overlaps[i].add(atomIndex)
            # store the position of the child (vecIndex) in the root (i)
            gv.childOverlaps[i].append(vecIndex)

            vecIndex += 1

        atomIndex += 1

    startLevel = atomIndex
    nextLevel = len(gv.gaussians)
    gv.levels.append(nextLevel)

    LEVEL = 6

    for l in range(2, LEVEL):
        for i in range(startLevel, nextLevel):

            # parents[i] is a pair list e.g.[a1,a2]
            a1 = parents[i][0]
            a2 = parents[i][1]

            # find elements that overlaps with both gaussians(a1 and a2)
            overlaps[i] = overlaps[a1] & overlaps[a2]

            if len(overlaps[i]) == 0: continue
            for elements in overlaps[i]:

                # check if there is a wrong index
                if elements <= a2: continue

                ga = atomIntersection(gv.gaussians[i], gv.gaussians[elements])

                if ga.volume / (gv.gaussians[i].volume +
                                gv.gaussians[elements].volume -
                                ga.volume) < EPS:
                    continue

                gv.gaussians.append(ga)
                #append a empty list in the end to store child of this overlap gaussian
                gv.childOverlaps.append([])

                parents.append([i, elements])
                overlaps.append(set())

                if (ga.n % 2
                    ) == 0:  # even number overlaps give positive contribution
                    gv.volume -= ga.volume
                    gv.centroid -= ga.volume * ga.centre
                else:  # odd number overlaps give negative contribution
                    gv.volume += ga.volume
                    gv.centroid += ga.volume * ga.centre

                # store the position of the child (vecIndex) in the root (i)
                gv.childOverlaps[i].append(vecIndex)

                vecIndex += 1

        startLevel = nextLevel
        nextLevel = len(gv.gaussians)
        gv.levels.append(nextLevel)

    overlaps.clear()  #!!! why so complacated in C++ code?

    parents.clear()
    gv.overlap = Molecule_overlap(gv, gv)

    return gv
Beispiel #2
0
Datei: ob.py Projekt: joskid/oddt
    def _dicts(self):
        max_neighbors = 6  # max of 6 neighbors should be enough
        # Atoms
        atom_dtype = [
            ('id', np.uint32),
            # atom info
            ('coords', np.float32, 3),
            ('radius', np.float32),
            ('charge', np.float32),
            ('atomicnum', np.int8),
            ('atomtype', 'U5' if PY3 else 'a5'),
            ('hybridization', np.int8),
            ('numhs', np.uint8),
            ('formalcharge', np.int8),
            ('neighbors_id', np.int16, max_neighbors),
            ('neighbors', np.float32, (max_neighbors, 3)),
            # residue info
            ('resid', np.int16),
            ('resnum', np.int16),
            ('resname', 'U3' if PY3 else 'a3'),
            ('isbackbone', bool),
            # atom properties
            ('isacceptor', bool),
            ('isdonor', bool),
            ('isdonorh', bool),
            ('ismetal', bool),
            ('ishydrophobe', bool),
            ('isaromatic', bool),
            ('isminus', bool),
            ('isplus', bool),
            ('ishalogen', bool),
            # secondary structure
            ('isalpha', bool),
            ('isbeta', bool)
        ]

        atom_dict = np.empty(self.OBMol.NumAtoms(), dtype=atom_dtype)
        metals = [
            3, 4, 11, 12, 13, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
            31, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 55, 56,
            57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
            74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92, 93,
            94, 95, 96, 97, 98, 99, 100, 101, 102, 103
        ]
        for i, atom in enumerate(self.atoms):

            atomicnum = atom.atomicnum
            # skip non-polar hydrogens for performance
            #            if atomicnum == 1 and atom.OBAtom.IsNonPolarHydrogen():
            #                continue
            atomtype = typetable.Translate(atom.type)  # sybyl atom type
            partialcharge = atom.partialcharge
            coords = atom.coords

            if self.protein:
                residue = Residue(atom.OBAtom.GetResidue())
            else:
                residue = False

            # get neighbors, but only for those atoms which realy need them
            neighbors = np.zeros(max_neighbors,
                                 dtype=[('id', np.int16),
                                        ('coords', np.float32, 3),
                                        ('atomicnum', np.int8)])
            neighbors['coords'].fill(np.nan)
            for n, nbr_atom in enumerate(atom.neighbors):
                if n >= max_neighbors:
                    warnings.warn(
                        'Error while parsing molecule "%s" '
                        'for `atom_dict`. Atom #%i (%s) has %i '
                        'neighbors (max_neighbors=%i). Additional '
                        'neighbors are ignored.' %
                        (self.title, atom.idx0, atomtype, len(
                            atom.neighbors), max_neighbors), UserWarning)
                    break
                if nbr_atom.atomicnum == 1:
                    continue
                neighbors[n] = (nbr_atom.idx0, nbr_atom.coords,
                                nbr_atom.atomicnum)
            assert i == atom.idx0
            atom_dict[i] = (
                i,
                coords,
                ob.GetVdwRad(atomicnum),
                partialcharge,
                atomicnum,
                atomtype,
                atom.OBAtom.GetHyb(),
                atom.OBAtom.GetTotalDegree() - atom.OBAtom.GetHvyDegree(),
                atom.formalcharge,
                neighbors['id'],
                neighbors['coords'],
                # residue info
                residue.idx0 if residue else 0,
                residue.number if residue else 0,
                residue.name if residue else '',
                residue.OBResidue.GetAtomProperty(atom.OBAtom, 2)
                if residue else False,  # is backbone
                # atom properties
                False,  # atom.OBAtom.IsHbondAcceptor(),
                False,  # atom.OBAtom.IsHbondDonor(),
                False,  # atom.OBAtom.IsHbondDonorH(),
                atomicnum in metals,
                atomicnum == 6 and np.in1d(neighbors['atomicnum'],
                                           [6, 1, 0]).all(),  # hydrophobe
                atom.OBAtom.IsAromatic(),
                atom.formalcharge < 0,  # is charged (minus)
                atom.formalcharge > 0,  # is charged (plus)
                atomicnum in [9, 17, 35, 53],  # is halogen?
                False,  # alpha
                False  # beta
            )

        not_carbon = np.argwhere(
            ~np.in1d(atom_dict['atomicnum'], [1, 6])).flatten()
        # Acceptors
        patt = Smarts('[$([O;H1;v2]),'
                      '$([O;H0;v2;!$(O=N-*),'
                      '$([O;-;!$(*-N=O)]),'
                      '$([o;+0])]),'
                      '$([n;+0;!X3;!$([n;H1](cc)cc),'
                      '$([$([N;H0]#[C&v4])]),'
                      '$([N&v3;H0;$(Nc)])]),'
                      '$([F;$(F-[#6]);!$(FC[F,Cl,Br,I])])]')
        matches = np.array(patt.findall(self)).flatten()
        if len(matches) > 0:
            atom_dict['isacceptor'][np.intersect1d(matches - 1,
                                                   not_carbon)] = True

        # Donors
        patt = Smarts(
            '[$([N&!H0&v3,N&!H0&+1&v4,n&H1&+0,$([$([Nv3](-C)(-C)-C)]),'
            '$([$(n[n;H1]),'
            '$(nc[n;H1])])]),'
            # Guanidine can be tautormeic - e.g. Arginine
            '$([NX3,NX2]([!O,!S])!@C(!@[NX3,NX2]([!O,!S]))!@[NX3,NX2]([!O,!S])),'
            '$([O,S;H1;+0])]')
        matches = np.array(patt.findall(self)).flatten()
        if len(matches) > 0:
            atom_dict['isdonor'][np.intersect1d(matches - 1,
                                                not_carbon)] = True
            atom_dict['isdonorh'][[
                n.idx0 for idx in np.argwhere(atom_dict['isdonor']).flatten()
                for n in self.atoms[int(idx)].neighbors if n.atomicnum == 1
            ]] = True

        # Basic group
        patt = Smarts(
            '[$([N;H2&+0][$([C,a]);!$([C,a](=O))]),'
            '$([N;H1&+0]([$([C,a]);!$([C,a](=O))])[$([C,a]);!$([C,a](=O))]),'
            '$([N;H0&+0]([C;!$(C(=O))])([C;!$(C(=O))])[C;!$(C(=O))]),'
            '$([N,n;X2;+0])]')
        matches = np.array(patt.findall(self)).flatten()
        if len(matches) > 0:
            atom_dict['isplus'][np.intersect1d(matches - 1, not_carbon)] = True

        # Acidic group
        patt = Smarts('[CX3](=O)[OX1H0-,OX2H1]')
        matches = np.array(patt.findall(self)).flatten()
        if len(matches) > 0:
            atom_dict['isminus'][np.intersect1d(matches - 1,
                                                not_carbon)] = True

        if self.protein:
            # Protein Residues (alpha helix and beta sheet)
            res_dtype = [('id', np.int16), ('resnum', np.int16),
                         ('resname', 'U3' if PY3 else 'a3'),
                         ('N', np.float32, 3), ('CA', np.float32, 3),
                         ('C', np.float32, 3), ('O', np.float32, 3),
                         ('isalpha', bool), ('isbeta', bool)]  # N, CA, C, O

            b = []
            for residue in self.residues:
                backbone = {}
                for atom in residue:
                    if residue.OBResidue.GetAtomProperty(atom.OBAtom, 1):
                        if atom.atomicnum == 7:
                            backbone['N'] = atom.coords
                        elif atom.atomicnum == 6:
                            if atom.type == 'C3':
                                backbone['CA'] = atom.coords
                            else:
                                backbone['C'] = atom.coords
                        elif atom.atomicnum == 8:
                            backbone['O'] = atom.coords
                if len(backbone.keys()) == 4:
                    b.append((residue.idx0, residue.number, residue.name,
                              backbone['N'], backbone['CA'], backbone['C'],
                              backbone['O'], False, False))
            res_dict = np.array(b, dtype=res_dtype)
            res_dict = detect_secondary_structure(res_dict)
            alpha_mask = np.in1d(atom_dict['resid'],
                                 res_dict[res_dict['isalpha']]['id'])
            atom_dict['isalpha'][alpha_mask] = True
            beta_mask = np.in1d(atom_dict['resid'],
                                res_dict[res_dict['isbeta']]['id'])
            atom_dict['isbeta'][beta_mask] = True

        # Aromatic Rings
        r = []
        for ring in self.sssr:
            if ring.IsAromatic():
                path = [x - 1 for x in ring._path]  # NOTE: mol.sssr is 1-based
                atoms = atom_dict[canonize_ring_path(path)]
                if len(atoms):
                    atom = atoms[0]
                    coords = atoms['coords']
                    centroid = coords.mean(axis=0)
                    # get vector perpendicular to ring
                    ring_vectors = coords - centroid
                    vector = np.cross(ring_vectors,
                                      np.roll(ring_vectors, shift=-1,
                                              axis=0)).mean(axis=0)
                    r.append(
                        (centroid, vector, atom['resid'], atom['resnum'],
                         atom['resname'], atom['isalpha'], atom['isbeta']))
        ring_dict = np.array(r,
                             dtype=[('centroid', np.float32, 3),
                                    ('vector', np.float32, 3),
                                    ('resid', np.int16), ('resnum', np.int16),
                                    ('resname', 'U3' if PY3 else 'a3'),
                                    ('isalpha', bool), ('isbeta', bool)])

        self._atom_dict = atom_dict
        self._atom_dict.setflags(write=False)
        self._ring_dict = ring_dict
        self._ring_dict.setflags(write=False)
        if self.protein:
            self._res_dict = res_dict
            self._res_dict.setflags(write=False)
Beispiel #3
0
 def vdw_radius(atomic_num):
     return ob.GetVdwRad(atomic_num)