Example #1
0
def test_dihedral():
    """Test dihedrals"""
    # Dihedrals
    assert_array_almost_equal(
        dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)),
                 np.array((1, 1, 0))), 0)

    assert_array_almost_equal(
        dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)),
                 np.array((1, 1, 1))), -45)

    # Check benzene ring dihedral
    mol = oddt.toolkit.readstring('smi', 'c1ccccc1')
    mol.make3D()
    assert abs(dihedral(*mol.coords[:4])) < 2.
Example #2
0
def test_dihedral():
    """Test dihedrals"""
    # Dihedrals
    assert_array_almost_equal(
        dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)),
                 np.array((1, 1, 0))), 0)

    assert_array_almost_equal(
        dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)),
                 np.array((1, 1, 1))), -45)

    # Check benzene ring dihedral
    mol = oddt.toolkit.readstring('smi', 'c1ccccc1')
    mol.make3D()
    assert_array_almost_equal(dihedral(mol.coords[0], mol.coords[1],
                                       mol.coords[2], mol.coords[3]),
                              0,
                              decimal=1)
Example #3
0
def test_dihedral():
    """Test dihedrals"""
    # Dihedrals
    assert_array_almost_equal(dihedral(np.array((1, 0, 0)),
                                       np.array((0, 0, 0)),
                                       np.array((0, 1, 0)),
                                       np.array((1, 1, 0))), 0)

    assert_array_almost_equal(dihedral(np.array((1, 0, 0)),
                                       np.array((0, 0, 0)),
                                       np.array((0, 1, 0)),
                                       np.array((1, 1, 1))), -45)

    # Check benzene ring dihedral
    mol = oddt.toolkit.readstring('smi', 'c1ccccc1')
    mol.make3D()
    assert_array_almost_equal(dihedral(mol.coords[0],
                                       mol.coords[1],
                                       mol.coords[2],
                                       mol.coords[3]), 0, decimal=1)
Example #4
0
def test_spatial():
    """Test spatial computations"""

    # Angles
    assert_array_almost_equal(angle(np.array((1, 0, 0)),
                                    np.array((0, 0, 0)),
                                    np.array((0, 1, 0))), 90)

    assert_array_almost_equal(angle(np.array((1, 0, 0)),
                                    np.array((0, 0, 0)),
                                    np.array((1, 1, 0))), 45)

    mol = oddt.toolkit.readstring('smi', 'c1ccccc1')
    mol.make3D()

    # Check benzene ring angle
    assert_array_almost_equal(angle(mol.coords[0],
                                    mol.coords[1],
                                    mol.coords[2]), 120, decimal=1)

    # Dihedrals
    assert_array_almost_equal(dihedral(np.array((1, 0, 0)),
                                       np.array((0, 0, 0)),
                                       np.array((0, 1, 0)),
                                       np.array((1, 1, 0))), 0)

    assert_array_almost_equal(dihedral(np.array((1, 0, 0)),
                                       np.array((0, 0, 0)),
                                       np.array((0, 1, 0)),
                                       np.array((1, 1, 1))), -45)

    # Check benzene ring dihedral
    assert_array_almost_equal(dihedral(mol.coords[0],
                                       mol.coords[1],
                                       mol.coords[2],
                                       mol.coords[3]), 0, decimal=1)

    mol = oddt.toolkit.readstring('smi', 'c1ccccc1')
    mol.make3D()
    mol2 = mol.clone

    # Test rotation
    assert_almost_equal(mol2.coords, rotate(mol2.coords, np.pi, np.pi, np.pi))

    # Rotate perpendicular to ring
    mol2.coords = rotate(mol2.coords, 0, 0, np.pi)

    # RMSD
    assert_almost_equal(rmsd(mol, mol2, method=None), 2.77, decimal=1)
    # Hungarian must be close to zero (RDKit is 0.3)
    assert_almost_equal(rmsd(mol, mol2, method='hungarian'), 0, decimal=0)

    # pick one molecule from docked poses
    mols = list(oddt.toolkit.readfile('sdf', os.path.join(test_data_dir, 'data/dude/xiap/actives_docked.sdf')))
    mols = list(filter(lambda x: x.title == '312335', mols))

    assert_array_almost_equal([rmsd(mols[0], mol) for mol in mols[1:]],
                              [4.753552, 2.501487, 2.7941732, 1.1281863, 0.74440968,
                               1.6256877, 4.762476, 2.7167852, 2.5504358, 1.9303833,
                               2.6200771, 3.1741529, 3.225431, 4.7784939, 4.8035369,
                               7.8962774, 2.2385094, 4.8625236, 3.2036853])

    assert_array_almost_equal([rmsd(mols[0], mol, method='hungarian') for mol in mols[1:]],
                              [2.5984519, 1.7295024, 1.1268076, 1.0285776, 0.73529714,
                               1.4094033, 2.5195069, 1.7449125, 1.5116163, 1.7796179,
                               2.6064286, 3.1576841, 3.2135022, 3.1675091, 2.7001681,
                               5.1263351, 2.0836117, 3.542397, 3.1873631])
Example #5
0
    def _dicts(self):
        # Atoms
        atom_dtype = [('id', 'int16'),
                 # atom info
                 ('coords', 'float16', 3),
                 ('charge', 'float16'),
                 ('atomicnum', 'int8'),
                 ('atomtype','a4'),
                 ('hybridization', 'int8'),
                 ('neighbors', 'float16', (4,3)), # non-H neighbors coordinates for angles (max of 6 neighbors should be enough)
                 # residue info
                 ('resid', 'int16'),
                 ('resname', 'a3'),
                 ('isbackbone', 'bool'),
                 # atom properties
                 ('isacceptor', 'bool'),
                 ('isdonor', 'bool'),
                 ('isdonorh', 'bool'),
                 ('ismetal', 'bool'),
                 ('ishydrophobe', 'bool'),
                 ('isaromatic', 'bool'),
                 ('isminus', 'bool'),
                 ('isplus', 'bool'),
                 ('ishalogen', 'bool'),
                 # secondary structure
                 ('isalpha', 'bool'),
                 ('isbeta', 'bool')
                 ]

        a = []
        atom_dict = np.empty(self.OBMol.NumAtoms(), dtype=atom_dtype)
        metals = [3,4,11,12,13,19,20,21,22,23,24,25,26,27,28,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,87,88,89,90,91,
    92,93,94,95,96,97,98,99,100,101,102,103]
        for i, atom in enumerate(self.atoms):
            
            atomicnum = atom.atomicnum
            # skip non-polar hydrogens for performance
#            if atomicnum == 1 and atom.OBAtom.IsNonPolarHydrogen():
#                continue
            atomtype = typetable.Translate(atom.type) # sybyl atom type
            partialcharge = atom.partialcharge
            coords = atom.coords
            
            if self.protein:
                residue = pybel.Residue(atom.OBAtom.GetResidue())
            else:
                residue = False
            
            # get neighbors, but only for those atoms which realy need them
            neighbors = np.empty(4, dtype=[('coords', 'float16', 3),('atomicnum', 'int8')])
            neighbors.fill(np.nan)
            for n, nbr_atom in enumerate(atom.neighbors):
                # concider raising neighbors list to 6, but must do some benchmarks
                if n > 3:
                    break
                nbr_atomicnum = nbr_atom.atomicnum
                neighbors[n] = (nbr_atom.coords, nbr_atomicnum)
            atom_dict[i] = (atom.idx,
                      coords,
                      partialcharge,
                      atomicnum,
                      atomtype,
                      atom.OBAtom.GetHyb(),
                      neighbors['coords'], #n_coords,
                      # residue info
                      residue.idx if residue else 0,
                      residue.name if residue else '',
                      residue.OBResidue.GetAtomProperty(atom.OBAtom, 2) if residue else False, # is backbone
                      # atom properties
                      atom.OBAtom.IsHbondAcceptor(),
                      atom.OBAtom.IsHbondDonor(),
                      atom.OBAtom.IsHbondDonorH(),
                      atomicnum in metals,
                      atomicnum == 6 and not (np.in1d(neighbors['atomicnum'], [6,1])).any(), #hydrophobe #doble negation, since nan gives False
                      atom.OBAtom.IsAromatic(),
                      atomtype in ['O3-', '02-' 'O-'], # is charged (minus)
                      atomtype in ['N3+', 'N2+', 'Ng+'], # is charged (plus)
                      atomicnum in [9,17,35,53], # is halogen?
                      False, # alpha
                      False # beta
                      )
        
        if self.protein:
            # Protein Residues (alpha helix and beta sheet)
            res_dtype = [('id', 'int16'),
                         ('resname', 'a3'),
                         ('N', 'float16', 3),
                         ('CA', 'float16', 3),
                         ('C', 'float16', 3),
                         ('isalpha', 'bool'),
                         ('isbeta', 'bool')
                         ] # N, CA, C

            b = []
            for residue in self.residues:
                backbone = {}
                for atom in residue:
                    if residue.OBResidue.GetAtomProperty(atom.OBAtom,1):
                        if atom.atomicnum == 7:
                            backbone['N'] = atom.coords
                        elif atom.atomicnum == 6:
                            if atom.type == 'C3':
                                backbone['CA'] = atom.coords
                            else:
                                backbone['C'] = atom.coords
                if len(backbone.keys()) == 3:
                    b.append((residue.idx, residue.name, backbone['N'],  backbone['CA'], backbone['C'], False, False))
            res_dict = np.array(b, dtype=res_dtype)
            
            # detect secondary structure by phi and psi angles
            first = res_dict[:-1]
            second = res_dict[1:]
            psi = dihedral(first['N'], first['CA'], first['C'], second['N'])
            phi = dihedral(first['C'], second['N'], second['CA'], second['C'])
            # mark atoms belonging to alpha and beta
            res_mask_alpha = np.where(((phi > -145) & (phi < -35) & (psi > -70) & (psi < 50))) # alpha
            res_dict['isalpha'][res_mask_alpha] = True
            for i in res_dict[res_mask_alpha]['id']:
                atom_dict['isalpha'][atom_dict['resid'] == i] = True

            res_mask_beta = np.where(((phi >= -180) & (phi < -40) & (psi <= 180) & (psi > 90)) | ((phi >= -180) & (phi < -70) & (psi <= -165))) # beta
            res_dict['isbeta'][res_mask_beta] = True
            atom_dict['isbeta'][np.in1d(atom_dict['resid'], res_dict[res_mask_beta]['id'])] = True

        # Aromatic Rings
        r = []
        for ring in self.sssr:
            if ring.IsAromatic():
                path = ring._path
                atom = atom_dict[atom_dict['id'] == path[0]]
                coords = atom_dict[np.in1d(atom_dict['id'], path)]['coords']
                centroid = coords.mean(axis=0)
                # get vector perpendicular to ring
                vector = np.cross(coords - np.vstack((coords[1:],coords[:1])), np.vstack((coords[1:],coords[:1])) - np.vstack((coords[2:],coords[:2]))).mean(axis=0) - centroid
                r.append((centroid, vector, atom['isalpha'], atom['isbeta']))
        ring_dict = np.array(r, dtype=[('centroid', 'float16', 3),('vector', 'float16', 3),('isalpha', 'bool'),('isbeta', 'bool'),])
        
        self._atom_dict = atom_dict
        self._ring_dict = ring_dict
        if self.protein:
            self._res_dict = res_dict
Example #6
0
def detect_secondary_structure(res_dict):
    """Detect alpha helices and beta sheets in res_dict by phi and psi angles"""
    first = res_dict[:-1]
    second = res_dict[1:]
    psi = dihedral(first['N'], first['CA'], first['C'], second['N'])
    phi = dihedral(first['C'], second['N'], second['CA'], second['C'])
    d = second['id'] - first['id']

    # Alpha helices
    res_mask_alpha = (
        ((phi > -145) & (phi < -35) & (psi > -70) & (psi < 50) & (d == 1))
    )  # alpha
    res_mask_alpha = np.union1d(np.argwhere(res_mask_alpha),
                                np.argwhere(res_mask_alpha))
    # Ignore groups smaller than 3
    for mask_group in np.split(
            res_mask_alpha,
            np.argwhere(np.diff(res_mask_alpha) != 1).flatten() + 1):
        if len(mask_group) >= 3:
            res_dict['isalpha'][mask_group] = True
    # Alpha helices have to form H-Bonds
    hbond_dist_mask = np.abs(
        res_dict[res_dict['isalpha']]['id'] -
        res_dict[res_dict['isalpha']]['id'][:, np.newaxis]) >= 3
    hbond_mask = distance(res_dict[res_dict['isalpha']]['N'],
                          res_dict[res_dict['isalpha']]['O']) < 3.5
    p_mask = ((hbond_mask & hbond_dist_mask).any(axis=0) |
              (hbond_mask & hbond_dist_mask).any(axis=1))
    res_dict['isalpha'][np.argwhere(
        res_dict['isalpha']).flatten()[~p_mask]] = False
    # Ignore groups smaller than 3
    res_mask_alpha = np.argwhere(res_dict['isalpha']).flatten()
    for mask_group in np.split(
            res_mask_alpha,
            np.argwhere(np.diff(res_mask_alpha) != 1).flatten() + 1):
        if 0 < len(mask_group) < 3:
            res_dict['isalpha'][mask_group] = False

    # Beta sheets
    res_mask_beta = (
        ((phi >= -180) & (phi < -40) & (psi <= 180) & (psi > 90) & (d == 1)) |
        ((phi >= -180) & (phi < -70) & (psi <= -165) & (d == 1)))  # beta
    res_mask_beta = np.union1d(np.argwhere(res_mask_beta),
                               np.argwhere(res_mask_beta))
    # Ignore groups smaller than 3
    for mask_group in np.split(
            res_mask_beta,
            np.argwhere(np.diff(res_mask_beta) != 1).flatten() + 1):
        if len(mask_group) >= 3:
            res_dict['isbeta'][mask_group] = True
    # Beta strands have to be alongside eachother
    res_dist_mask = np.abs(res_dict[res_dict['isbeta']]['id'] -
                           res_dict[res_dict['isbeta']]['id'][:,
                                                              np.newaxis]) >= 4
    hbond_mask = distance(res_dict[res_dict['isbeta']]['N'],
                          res_dict[res_dict['isbeta']]['O']) < 3.5
    ca_mask = distance(res_dict[res_dict['isbeta']]['CA'],
                       res_dict[res_dict['isbeta']]['CA']) < 4.5
    p_mask = ((hbond_mask & res_dist_mask).any(axis=0) |
              (hbond_mask & res_dist_mask).any(axis=1) |
              (ca_mask & res_dist_mask).any(axis=0))
    res_dict['isbeta'][np.argwhere(
        res_dict['isbeta']).flatten()[~p_mask]] = False
    # Ignore groups smaller than 3
    res_mask_beta = np.argwhere(res_dict['isbeta']).flatten()
    for mask_group in np.split(
            res_mask_beta,
            np.argwhere(np.diff(res_mask_beta) != 1).flatten() + 1):
        if 0 < len(mask_group) < 3:
            res_dict['isbeta'][mask_group] = False

    return res_dict
Example #7
0
File: common.py Project: oddt/oddt
def detect_secondary_structure(res_dict):
    """Detect alpha helices and beta sheets in res_dict by phi and psi angles"""
    first = res_dict[:-1]
    second = res_dict[1:]
    psi = dihedral(first['N'], first['CA'], first['C'], second['N'])
    phi = dihedral(first['C'], second['N'], second['CA'], second['C'])
    d = second['id'] - first['id']

    # Alpha helices
    res_mask_alpha = (((phi > -145) & (phi < -35) &
                       (psi > -70) & (psi < 50) & (d == 1)))  # alpha
    res_mask_alpha = np.union1d(np.argwhere(res_mask_alpha),
                                np.argwhere(res_mask_alpha))

    # Ignore groups smaller than 3
    for mask_group in np.split(res_mask_alpha, np.argwhere(np.diff(res_mask_alpha) != 1).flatten() + 1):
        if len(mask_group) >= 3:
            res_dict['isalpha'][mask_group] = True

    # Alpha helices have to form H-Bonds
    hbond_dist_mask = np.abs(res_dict[res_dict['isalpha']]['resnum'] -
                             res_dict[res_dict['isalpha']]['resnum'][:, np.newaxis]) >= 3
    hbond_mask = distance(res_dict[res_dict['isalpha']]['N'],
                          res_dict[res_dict['isalpha']]['O']) < 3.5
    p_mask = ((hbond_mask & hbond_dist_mask).any(axis=0) |
              (hbond_mask & hbond_dist_mask).any(axis=1))
    res_dict['isalpha'][np.argwhere(res_dict['isalpha']).flatten()[~p_mask]] = False

    # Ignore groups smaller than 3
    res_mask_alpha = np.argwhere(res_dict['isalpha']).flatten()
    for mask_group in np.split(res_mask_alpha, np.argwhere(np.diff(res_mask_alpha) != 1).flatten() + 1):
        if 0 < len(mask_group) < 3:
            res_dict['isalpha'][mask_group] = False

    # Beta sheets
    res_mask_beta = (((phi >= -180) & (phi < -40) &
                      (psi <= 180) & (psi > 90) & (d == 1)) |
                     ((phi >= -180) & (phi < -70) &
                      (psi <= -165) & (d == 1)))  # beta
    res_mask_beta = np.union1d(np.argwhere(res_mask_beta),
                               np.argwhere(res_mask_beta))

    # Ignore groups smaller than 3
    for mask_group in np.split(res_mask_beta, np.argwhere(np.diff(res_mask_beta) != 1).flatten() + 1):
        if len(mask_group) >= 3:
            res_dict['isbeta'][mask_group] = True

    # Beta strands have to be alongside eachother
    res_dist_mask = np.abs(res_dict[res_dict['isbeta']]['resnum'] -
                           res_dict[res_dict['isbeta']]['resnum'][:, np.newaxis]) >= 4
    hbond_mask = distance(res_dict[res_dict['isbeta']]['N'],
                          res_dict[res_dict['isbeta']]['O']) < 3.5
    ca_mask = distance(res_dict[res_dict['isbeta']]['CA'],
                       res_dict[res_dict['isbeta']]['CA']) < 4.5
    p_mask = ((hbond_mask & res_dist_mask).any(axis=0) |
              (hbond_mask & res_dist_mask).any(axis=1) |
              (ca_mask & res_dist_mask).any(axis=0))
    res_dict['isbeta'][np.argwhere(res_dict['isbeta']).flatten()[~p_mask]] = False

    # Ignore groups smaller than 3
    res_mask_beta = np.argwhere(res_dict['isbeta']).flatten()
    for mask_group in np.split(res_mask_beta, np.argwhere(np.diff(res_mask_beta) != 1).flatten() + 1):
        if 0 < len(mask_group) < 3:
            res_dict['isbeta'][mask_group] = False

    return res_dict
Example #8
0
    def _dicts(self):
        # Atoms
        atom_dtype = [('id', 'int16'),
                 # atom info
                 ('coords', 'float32', 3),
                 ('radius', 'float32'),
                 ('charge', 'float32'),
                 ('atomicnum', 'int8'),
                 ('atomtype','a4'),
                 ('hybridization', 'int8'),
                 ('neighbors', 'float32', (4,3)), # non-H neighbors coordinates for angles (max of 6 neighbors should be enough)
                 # residue info
                 ('resid', 'int16'),
                 ('resname', 'a3'),
                 ('isbackbone', 'bool'),
                 # atom properties
                 ('isacceptor', 'bool'),
                 ('isdonor', 'bool'),
                 ('isdonorh', 'bool'),
                 ('ismetal', 'bool'),
                 ('ishydrophobe', 'bool'),
                 ('isaromatic', 'bool'),
                 ('isminus', 'bool'),
                 ('isplus', 'bool'),
                 ('ishalogen', 'bool'),
                 # secondary structure
                 ('isalpha', 'bool'),
                 ('isbeta', 'bool')
                 ]

        a = []
        atom_dict = np.empty(self.Mol.GetNumAtoms(), dtype=atom_dtype)
        metals = [3,4,11,12,13,19,20,21,22,23,24,25,26,27,28,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,87,88,89,90,91,
    92,93,94,95,96,97,98,99,100,101,102,103]
        for i, atom in enumerate(self.atoms):

            atomicnum = atom.atomicnum
            partialcharge = atom.partialcharge
            coords = atom.coords
            atomtype = atom.Atom.GetProp("_TriposAtomType") if atom.Atom.HasProp("_TriposAtomType") else atom.Atom.GetSymbol()
            if self.protein:
                residue = atom.Atom.GetMonomerInfo()
            else:
                residue = False

            # get neighbors, but only for those atoms which realy need them
            neighbors = np.zeros(4, dtype=[('coords', 'float32', 3),('atomicnum', 'int8')])
            neighbors['coords'].fill(np.nan)
            for n, nbr_atom in enumerate(atom.neighbors):
                neighbors[n] = (nbr_atom.coords, nbr_atom.atomicnum)
            atom_dict[i] = (atom.idx,
                      coords,
                      elementtable.GetRvdw(atomicnum),
                      partialcharge,
                      atomicnum,
                      atomtype,
                      np.clip(atom.Atom.GetHybridization()-1, 0, 3),
                      neighbors['coords'],
                      # residue info
                      residue.GetResidueNumber() if residue else 0,
                      residue.GetResidueName() if residue else '',
                      False, # is backbone
                      # atom properties
                      False, #IsHbondAcceptor
                      False, #IsHbondDonor,
                      False, #IsHbondDonorH,
                      atomicnum in metals,
                      atomicnum == 6 and np.in1d(neighbors['atomicnum'], [6,1,0]).all(), #hydrophobe
                      atom.Atom.GetIsAromatic(),
                      atomtype in ['O3-', '02-' 'O-'] or atom.formalcharge < 0, # is charged (minus)
                      atomtype in ['N3+', 'N2+', 'Ng+'] or atom.formalcharge > 0, # is charged (plus)
                      atomicnum in [9,17,35,53], # is halogen?
                      False, # alpha
                      False # beta
                      )

        # Match features and mark them in atom_dict
        translate_feats = {
                   'Donor':'isdonor',
                   'Acceptor':'isacceptor',
                   'NegIonizable':'isminus',
                   'PosIonizable':'isplus',
                   }

        # build residue dictionary
        if self.protein:
            # for protein finding features per residue is much faster
            if self.protein:
                for res in self.residues:
                    for f, field in translate_feats.iteritems():
                        feats = base_feature_factory.GetFeaturesForMol(res.Residue,includeOnly=f)
                        atom_dict[field][[res.atommap[idx] for f in feats for idx in f.GetAtomIds()]] = True
            res_dict = None
            # Protein Residues (alpha helix and beta sheet)
            res_dtype = [('id', 'int16'),
                         ('resname', 'a3'),
                         ('N', 'float32', 3),
                         ('CA', 'float32', 3),
                         ('C', 'float32', 3),
                         ('isalpha', 'bool'),
                         ('isbeta', 'bool')
                         ] # N, CA, C
            b = []
            aa = Chem.MolFromSmarts('[NX3,NX4+][CX4H,CX4H2][CX3](=[OX1])[O,N]') # amino backbone SMARTS
            conf = self.Mol.GetConformer()
            for path in self.Mol.GetSubstructMatches(aa):
                atom_dict['isbackbone'][np.array(path)] = True
                residue = self.Mol.GetAtomWithIdx(path[0]).GetMonomerInfo()
                b.append((residue.GetResidueNumber(), residue.GetResidueName(), conf.GetAtomPosition(path[0]), conf.GetAtomPosition(path[1]), conf.GetAtomPosition(path[2]), False, False))
            res_dict = np.array(b, dtype=res_dtype)

            # detect secondary structure by phi and psi angles
            first = res_dict[:-1]
            second = res_dict[1:]
            psi = dihedral(first['N'], first['CA'], first['C'], second['N'])
            phi = dihedral(first['C'], second['N'], second['CA'], second['C'])
            # mark atoms belonging to alpha and beta
            res_mask_alpha = np.where(((phi > -145) & (phi < -35) & (psi > -70) & (psi < 50))) # alpha
            res_dict['isalpha'][res_mask_alpha] = True
            for i in res_dict[res_mask_alpha]['id']:
                atom_dict['isalpha'][atom_dict['resid'] == i] = True

            res_mask_beta = np.where(((phi >= -180) & (phi < -40) & (psi <= 180) & (psi > 90)) | ((phi >= -180) & (phi < -70) & (psi <= -165))) # beta
            res_dict['isbeta'][res_mask_beta] = True
            atom_dict['isbeta'][np.in1d(atom_dict['resid'], res_dict[res_mask_beta]['id'])] = True
        else:
            # find features for ligands
            for f, field in translate_feats.iteritems():
                feats = base_feature_factory.GetFeaturesForMol(self.Mol,includeOnly=f)
                atom_dict[field][[idx for f in feats for idx in f.GetAtomIds()]] = True

        ### FIX: remove acidic carbons from isminus group (they are part of smarts)
        atom_dict['isminus'][atom_dict['isminus'] & (atom_dict['atomicnum'] == 6)] = False

        # Aromatic Rings
        r = []
        for path in self.sssr:
            if self.Mol.GetAtomWithIdx(path[0]).GetIsAromatic():
                atom = atom_dict[atom_dict['id'] == path[0]]
                coords = atom_dict[np.in1d(atom_dict['id'], path)]['coords']
                centroid = coords.mean(axis=0)
                # get vector perpendicular to ring
                vector = np.cross(coords - np.vstack((coords[1:],coords[:1])), np.vstack((coords[1:],coords[:1])) - np.vstack((coords[2:],coords[:2]))).mean(axis=0) - centroid
                r.append((centroid, vector, atom['isalpha'], atom['isbeta']))
        ring_dict = np.array(r, dtype=[('centroid', 'float32', 3),('vector', 'float32', 3),('isalpha', 'bool'),('isbeta', 'bool'),])

        self._atom_dict = atom_dict
        self._atom_dict.setflags(write=False)
        self._ring_dict = ring_dict
        self._ring_dict.setflags(write=False)
        if self.protein:
            self._res_dict = res_dict
Example #9
0
    def _dicts(self):
        # Atoms
        atom_dtype = [
            ('id', 'int16'),
            # atom info
            ('coords', 'float16', 3),
            ('charge', 'float16'),
            ('atomicnum', 'int8'),
            ('atomtype', 'a4'),
            ('hybridization', 'int8'),
            (
                'neighbors', 'float16', (4, 3)
            ),  # non-H neighbors coordinates for angles (max of 6 neighbors should be enough)
            # residue info
            ('resid', 'int16'),
            ('resname', 'a3'),
            ('isbackbone', 'bool'),
            # atom properties
            ('isacceptor', 'bool'),
            ('isdonor', 'bool'),
            ('isdonorh', 'bool'),
            ('ismetal', 'bool'),
            ('ishydrophobe', 'bool'),
            ('isaromatic', 'bool'),
            ('isminus', 'bool'),
            ('isplus', 'bool'),
            ('ishalogen', 'bool'),
            # secondary structure
            ('isalpha', 'bool'),
            ('isbeta', 'bool')
        ]

        a = []
        atom_dict = np.empty(self.OBMol.NumAtoms(), dtype=atom_dtype)
        metals = [
            3, 4, 11, 12, 13, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
            31, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 55, 56,
            57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73,
            74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92, 93,
            94, 95, 96, 97, 98, 99, 100, 101, 102, 103
        ]
        for i, atom in enumerate(self.atoms):

            atomicnum = atom.atomicnum
            # skip non-polar hydrogens for performance
            #            if atomicnum == 1 and atom.OBAtom.IsNonPolarHydrogen():
            #                continue
            atomtype = typetable.Translate(atom.type)  # sybyl atom type
            partialcharge = atom.partialcharge
            coords = atom.coords

            if self.protein:
                residue = pybel.Residue(atom.OBAtom.GetResidue())
            else:
                residue = False

            # get neighbors, but only for those atoms which realy need them
            neighbors = np.empty(4,
                                 dtype=[('coords', 'float16', 3),
                                        ('atomicnum', 'int8')])
            neighbors.fill(np.nan)
            for n, nbr_atom in enumerate(atom.neighbors):
                # concider raising neighbors list to 6, but must do some benchmarks
                if n > 3:
                    break
                nbr_atomicnum = nbr_atom.atomicnum
                neighbors[n] = (nbr_atom.coords, nbr_atomicnum)
            atom_dict[i] = (
                atom.idx,
                coords,
                partialcharge,
                atomicnum,
                atomtype,
                atom.OBAtom.GetHyb(),
                neighbors['coords'],  #n_coords,
                # residue info
                residue.idx if residue else 0,
                residue.name if residue else '',
                residue.OBResidue.GetAtomProperty(atom.OBAtom, 2)
                if residue else False,  # is backbone
                # atom properties
                atom.OBAtom.IsHbondAcceptor(),
                atom.OBAtom.IsHbondDonor(),
                atom.OBAtom.IsHbondDonorH(),
                atomicnum in metals,
                atomicnum == 6 and not (np.in1d(neighbors['atomicnum'], [
                    6, 1
                ])).any(),  #hydrophobe #doble negation, since nan gives False
                atom.OBAtom.IsAromatic(),
                atomtype in ['O3-', '02-'
                             'O-'],  # is charged (minus)
                atomtype in ['N3+', 'N2+', 'Ng+'],  # is charged (plus)
                atomicnum in [9, 17, 35, 53],  # is halogen?
                False,  # alpha
                False  # beta
            )

        if self.protein:
            # Protein Residues (alpha helix and beta sheet)
            res_dtype = [('id', 'int16'), ('resname', 'a3'),
                         ('N', 'float16', 3), ('CA', 'float16', 3),
                         ('C', 'float16', 3), ('isalpha', 'bool'),
                         ('isbeta', 'bool')]  # N, CA, C

            b = []
            for residue in self.residues:
                backbone = {}
                for atom in residue:
                    if residue.OBResidue.GetAtomProperty(atom.OBAtom, 1):
                        if atom.atomicnum == 7:
                            backbone['N'] = atom.coords
                        elif atom.atomicnum == 6:
                            if atom.type == 'C3':
                                backbone['CA'] = atom.coords
                            else:
                                backbone['C'] = atom.coords
                if len(backbone.keys()) == 3:
                    b.append((residue.idx, residue.name, backbone['N'],
                              backbone['CA'], backbone['C'], False, False))
            res_dict = np.array(b, dtype=res_dtype)

            # detect secondary structure by phi and psi angles
            first = res_dict[:-1]
            second = res_dict[1:]
            psi = dihedral(first['N'], first['CA'], first['C'], second['N'])
            phi = dihedral(first['C'], second['N'], second['CA'], second['C'])
            # mark atoms belonging to alpha and beta
            res_mask_alpha = np.where(((phi > -145) & (phi < -35) & (psi > -70)
                                       & (psi < 50)))  # alpha
            res_dict['isalpha'][res_mask_alpha] = True
            for i in res_dict[res_mask_alpha]['id']:
                atom_dict['isalpha'][atom_dict['resid'] == i] = True

            res_mask_beta = np.where(
                ((phi >= -180) & (phi < -40) & (psi <= 180) & (psi > 90)) |
                ((phi >= -180) & (phi < -70) & (psi <= -165)))  # beta
            res_dict['isbeta'][res_mask_beta] = True
            atom_dict['isbeta'][np.in1d(atom_dict['resid'],
                                        res_dict[res_mask_beta]['id'])] = True

        # Aromatic Rings
        r = []
        for ring in self.sssr:
            if ring.IsAromatic():
                path = ring._path
                atom = atom_dict[atom_dict['id'] == path[0]]
                coords = atom_dict[np.in1d(atom_dict['id'], path)]['coords']
                centroid = coords.mean(axis=0)
                # get vector perpendicular to ring
                vector = np.cross(
                    coords - np.vstack((coords[1:], coords[:1])),
                    np.vstack((coords[1:], coords[:1])) - np.vstack(
                        (coords[2:], coords[:2]))).mean(axis=0) - centroid
                r.append((centroid, vector, atom['isalpha'], atom['isbeta']))
        ring_dict = np.array(r,
                             dtype=[
                                 ('centroid', 'float16', 3),
                                 ('vector', 'float16', 3),
                                 ('isalpha', 'bool'),
                                 ('isbeta', 'bool'),
                             ])

        self._atom_dict = atom_dict
        self._ring_dict = ring_dict
        if self.protein:
            self._res_dict = res_dict