def test_dihedral(): """Test dihedrals""" # Dihedrals assert_array_almost_equal( dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)), np.array((1, 1, 0))), 0) assert_array_almost_equal( dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)), np.array((1, 1, 1))), -45) # Check benzene ring dihedral mol = oddt.toolkit.readstring('smi', 'c1ccccc1') mol.make3D() assert abs(dihedral(*mol.coords[:4])) < 2.
def test_dihedral(): """Test dihedrals""" # Dihedrals assert_array_almost_equal( dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)), np.array((1, 1, 0))), 0) assert_array_almost_equal( dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)), np.array((1, 1, 1))), -45) # Check benzene ring dihedral mol = oddt.toolkit.readstring('smi', 'c1ccccc1') mol.make3D() assert_array_almost_equal(dihedral(mol.coords[0], mol.coords[1], mol.coords[2], mol.coords[3]), 0, decimal=1)
def test_dihedral(): """Test dihedrals""" # Dihedrals assert_array_almost_equal(dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)), np.array((1, 1, 0))), 0) assert_array_almost_equal(dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)), np.array((1, 1, 1))), -45) # Check benzene ring dihedral mol = oddt.toolkit.readstring('smi', 'c1ccccc1') mol.make3D() assert_array_almost_equal(dihedral(mol.coords[0], mol.coords[1], mol.coords[2], mol.coords[3]), 0, decimal=1)
def test_spatial(): """Test spatial computations""" # Angles assert_array_almost_equal(angle(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0))), 90) assert_array_almost_equal(angle(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((1, 1, 0))), 45) mol = oddt.toolkit.readstring('smi', 'c1ccccc1') mol.make3D() # Check benzene ring angle assert_array_almost_equal(angle(mol.coords[0], mol.coords[1], mol.coords[2]), 120, decimal=1) # Dihedrals assert_array_almost_equal(dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)), np.array((1, 1, 0))), 0) assert_array_almost_equal(dihedral(np.array((1, 0, 0)), np.array((0, 0, 0)), np.array((0, 1, 0)), np.array((1, 1, 1))), -45) # Check benzene ring dihedral assert_array_almost_equal(dihedral(mol.coords[0], mol.coords[1], mol.coords[2], mol.coords[3]), 0, decimal=1) mol = oddt.toolkit.readstring('smi', 'c1ccccc1') mol.make3D() mol2 = mol.clone # Test rotation assert_almost_equal(mol2.coords, rotate(mol2.coords, np.pi, np.pi, np.pi)) # Rotate perpendicular to ring mol2.coords = rotate(mol2.coords, 0, 0, np.pi) # RMSD assert_almost_equal(rmsd(mol, mol2, method=None), 2.77, decimal=1) # Hungarian must be close to zero (RDKit is 0.3) assert_almost_equal(rmsd(mol, mol2, method='hungarian'), 0, decimal=0) # pick one molecule from docked poses mols = list(oddt.toolkit.readfile('sdf', os.path.join(test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) mols = list(filter(lambda x: x.title == '312335', mols)) assert_array_almost_equal([rmsd(mols[0], mol) for mol in mols[1:]], [4.753552, 2.501487, 2.7941732, 1.1281863, 0.74440968, 1.6256877, 4.762476, 2.7167852, 2.5504358, 1.9303833, 2.6200771, 3.1741529, 3.225431, 4.7784939, 4.8035369, 7.8962774, 2.2385094, 4.8625236, 3.2036853]) assert_array_almost_equal([rmsd(mols[0], mol, method='hungarian') for mol in mols[1:]], [2.5984519, 1.7295024, 1.1268076, 1.0285776, 0.73529714, 1.4094033, 2.5195069, 1.7449125, 1.5116163, 1.7796179, 2.6064286, 3.1576841, 3.2135022, 3.1675091, 2.7001681, 5.1263351, 2.0836117, 3.542397, 3.1873631])
def _dicts(self): # Atoms atom_dtype = [('id', 'int16'), # atom info ('coords', 'float16', 3), ('charge', 'float16'), ('atomicnum', 'int8'), ('atomtype','a4'), ('hybridization', 'int8'), ('neighbors', 'float16', (4,3)), # non-H neighbors coordinates for angles (max of 6 neighbors should be enough) # residue info ('resid', 'int16'), ('resname', 'a3'), ('isbackbone', 'bool'), # atom properties ('isacceptor', 'bool'), ('isdonor', 'bool'), ('isdonorh', 'bool'), ('ismetal', 'bool'), ('ishydrophobe', 'bool'), ('isaromatic', 'bool'), ('isminus', 'bool'), ('isplus', 'bool'), ('ishalogen', 'bool'), # secondary structure ('isalpha', 'bool'), ('isbeta', 'bool') ] a = [] atom_dict = np.empty(self.OBMol.NumAtoms(), dtype=atom_dtype) metals = [3,4,11,12,13,19,20,21,22,23,24,25,26,27,28,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,87,88,89,90,91, 92,93,94,95,96,97,98,99,100,101,102,103] for i, atom in enumerate(self.atoms): atomicnum = atom.atomicnum # skip non-polar hydrogens for performance # if atomicnum == 1 and atom.OBAtom.IsNonPolarHydrogen(): # continue atomtype = typetable.Translate(atom.type) # sybyl atom type partialcharge = atom.partialcharge coords = atom.coords if self.protein: residue = pybel.Residue(atom.OBAtom.GetResidue()) else: residue = False # get neighbors, but only for those atoms which realy need them neighbors = np.empty(4, dtype=[('coords', 'float16', 3),('atomicnum', 'int8')]) neighbors.fill(np.nan) for n, nbr_atom in enumerate(atom.neighbors): # concider raising neighbors list to 6, but must do some benchmarks if n > 3: break nbr_atomicnum = nbr_atom.atomicnum neighbors[n] = (nbr_atom.coords, nbr_atomicnum) atom_dict[i] = (atom.idx, coords, partialcharge, atomicnum, atomtype, atom.OBAtom.GetHyb(), neighbors['coords'], #n_coords, # residue info residue.idx if residue else 0, residue.name if residue else '', residue.OBResidue.GetAtomProperty(atom.OBAtom, 2) if residue else False, # is backbone # atom properties atom.OBAtom.IsHbondAcceptor(), atom.OBAtom.IsHbondDonor(), atom.OBAtom.IsHbondDonorH(), atomicnum in metals, atomicnum == 6 and not (np.in1d(neighbors['atomicnum'], [6,1])).any(), #hydrophobe #doble negation, since nan gives False atom.OBAtom.IsAromatic(), atomtype in ['O3-', '02-' 'O-'], # is charged (minus) atomtype in ['N3+', 'N2+', 'Ng+'], # is charged (plus) atomicnum in [9,17,35,53], # is halogen? False, # alpha False # beta ) if self.protein: # Protein Residues (alpha helix and beta sheet) res_dtype = [('id', 'int16'), ('resname', 'a3'), ('N', 'float16', 3), ('CA', 'float16', 3), ('C', 'float16', 3), ('isalpha', 'bool'), ('isbeta', 'bool') ] # N, CA, C b = [] for residue in self.residues: backbone = {} for atom in residue: if residue.OBResidue.GetAtomProperty(atom.OBAtom,1): if atom.atomicnum == 7: backbone['N'] = atom.coords elif atom.atomicnum == 6: if atom.type == 'C3': backbone['CA'] = atom.coords else: backbone['C'] = atom.coords if len(backbone.keys()) == 3: b.append((residue.idx, residue.name, backbone['N'], backbone['CA'], backbone['C'], False, False)) res_dict = np.array(b, dtype=res_dtype) # detect secondary structure by phi and psi angles first = res_dict[:-1] second = res_dict[1:] psi = dihedral(first['N'], first['CA'], first['C'], second['N']) phi = dihedral(first['C'], second['N'], second['CA'], second['C']) # mark atoms belonging to alpha and beta res_mask_alpha = np.where(((phi > -145) & (phi < -35) & (psi > -70) & (psi < 50))) # alpha res_dict['isalpha'][res_mask_alpha] = True for i in res_dict[res_mask_alpha]['id']: atom_dict['isalpha'][atom_dict['resid'] == i] = True res_mask_beta = np.where(((phi >= -180) & (phi < -40) & (psi <= 180) & (psi > 90)) | ((phi >= -180) & (phi < -70) & (psi <= -165))) # beta res_dict['isbeta'][res_mask_beta] = True atom_dict['isbeta'][np.in1d(atom_dict['resid'], res_dict[res_mask_beta]['id'])] = True # Aromatic Rings r = [] for ring in self.sssr: if ring.IsAromatic(): path = ring._path atom = atom_dict[atom_dict['id'] == path[0]] coords = atom_dict[np.in1d(atom_dict['id'], path)]['coords'] centroid = coords.mean(axis=0) # get vector perpendicular to ring vector = np.cross(coords - np.vstack((coords[1:],coords[:1])), np.vstack((coords[1:],coords[:1])) - np.vstack((coords[2:],coords[:2]))).mean(axis=0) - centroid r.append((centroid, vector, atom['isalpha'], atom['isbeta'])) ring_dict = np.array(r, dtype=[('centroid', 'float16', 3),('vector', 'float16', 3),('isalpha', 'bool'),('isbeta', 'bool'),]) self._atom_dict = atom_dict self._ring_dict = ring_dict if self.protein: self._res_dict = res_dict
def detect_secondary_structure(res_dict): """Detect alpha helices and beta sheets in res_dict by phi and psi angles""" first = res_dict[:-1] second = res_dict[1:] psi = dihedral(first['N'], first['CA'], first['C'], second['N']) phi = dihedral(first['C'], second['N'], second['CA'], second['C']) d = second['id'] - first['id'] # Alpha helices res_mask_alpha = ( ((phi > -145) & (phi < -35) & (psi > -70) & (psi < 50) & (d == 1)) ) # alpha res_mask_alpha = np.union1d(np.argwhere(res_mask_alpha), np.argwhere(res_mask_alpha)) # Ignore groups smaller than 3 for mask_group in np.split( res_mask_alpha, np.argwhere(np.diff(res_mask_alpha) != 1).flatten() + 1): if len(mask_group) >= 3: res_dict['isalpha'][mask_group] = True # Alpha helices have to form H-Bonds hbond_dist_mask = np.abs( res_dict[res_dict['isalpha']]['id'] - res_dict[res_dict['isalpha']]['id'][:, np.newaxis]) >= 3 hbond_mask = distance(res_dict[res_dict['isalpha']]['N'], res_dict[res_dict['isalpha']]['O']) < 3.5 p_mask = ((hbond_mask & hbond_dist_mask).any(axis=0) | (hbond_mask & hbond_dist_mask).any(axis=1)) res_dict['isalpha'][np.argwhere( res_dict['isalpha']).flatten()[~p_mask]] = False # Ignore groups smaller than 3 res_mask_alpha = np.argwhere(res_dict['isalpha']).flatten() for mask_group in np.split( res_mask_alpha, np.argwhere(np.diff(res_mask_alpha) != 1).flatten() + 1): if 0 < len(mask_group) < 3: res_dict['isalpha'][mask_group] = False # Beta sheets res_mask_beta = ( ((phi >= -180) & (phi < -40) & (psi <= 180) & (psi > 90) & (d == 1)) | ((phi >= -180) & (phi < -70) & (psi <= -165) & (d == 1))) # beta res_mask_beta = np.union1d(np.argwhere(res_mask_beta), np.argwhere(res_mask_beta)) # Ignore groups smaller than 3 for mask_group in np.split( res_mask_beta, np.argwhere(np.diff(res_mask_beta) != 1).flatten() + 1): if len(mask_group) >= 3: res_dict['isbeta'][mask_group] = True # Beta strands have to be alongside eachother res_dist_mask = np.abs(res_dict[res_dict['isbeta']]['id'] - res_dict[res_dict['isbeta']]['id'][:, np.newaxis]) >= 4 hbond_mask = distance(res_dict[res_dict['isbeta']]['N'], res_dict[res_dict['isbeta']]['O']) < 3.5 ca_mask = distance(res_dict[res_dict['isbeta']]['CA'], res_dict[res_dict['isbeta']]['CA']) < 4.5 p_mask = ((hbond_mask & res_dist_mask).any(axis=0) | (hbond_mask & res_dist_mask).any(axis=1) | (ca_mask & res_dist_mask).any(axis=0)) res_dict['isbeta'][np.argwhere( res_dict['isbeta']).flatten()[~p_mask]] = False # Ignore groups smaller than 3 res_mask_beta = np.argwhere(res_dict['isbeta']).flatten() for mask_group in np.split( res_mask_beta, np.argwhere(np.diff(res_mask_beta) != 1).flatten() + 1): if 0 < len(mask_group) < 3: res_dict['isbeta'][mask_group] = False return res_dict
def detect_secondary_structure(res_dict): """Detect alpha helices and beta sheets in res_dict by phi and psi angles""" first = res_dict[:-1] second = res_dict[1:] psi = dihedral(first['N'], first['CA'], first['C'], second['N']) phi = dihedral(first['C'], second['N'], second['CA'], second['C']) d = second['id'] - first['id'] # Alpha helices res_mask_alpha = (((phi > -145) & (phi < -35) & (psi > -70) & (psi < 50) & (d == 1))) # alpha res_mask_alpha = np.union1d(np.argwhere(res_mask_alpha), np.argwhere(res_mask_alpha)) # Ignore groups smaller than 3 for mask_group in np.split(res_mask_alpha, np.argwhere(np.diff(res_mask_alpha) != 1).flatten() + 1): if len(mask_group) >= 3: res_dict['isalpha'][mask_group] = True # Alpha helices have to form H-Bonds hbond_dist_mask = np.abs(res_dict[res_dict['isalpha']]['resnum'] - res_dict[res_dict['isalpha']]['resnum'][:, np.newaxis]) >= 3 hbond_mask = distance(res_dict[res_dict['isalpha']]['N'], res_dict[res_dict['isalpha']]['O']) < 3.5 p_mask = ((hbond_mask & hbond_dist_mask).any(axis=0) | (hbond_mask & hbond_dist_mask).any(axis=1)) res_dict['isalpha'][np.argwhere(res_dict['isalpha']).flatten()[~p_mask]] = False # Ignore groups smaller than 3 res_mask_alpha = np.argwhere(res_dict['isalpha']).flatten() for mask_group in np.split(res_mask_alpha, np.argwhere(np.diff(res_mask_alpha) != 1).flatten() + 1): if 0 < len(mask_group) < 3: res_dict['isalpha'][mask_group] = False # Beta sheets res_mask_beta = (((phi >= -180) & (phi < -40) & (psi <= 180) & (psi > 90) & (d == 1)) | ((phi >= -180) & (phi < -70) & (psi <= -165) & (d == 1))) # beta res_mask_beta = np.union1d(np.argwhere(res_mask_beta), np.argwhere(res_mask_beta)) # Ignore groups smaller than 3 for mask_group in np.split(res_mask_beta, np.argwhere(np.diff(res_mask_beta) != 1).flatten() + 1): if len(mask_group) >= 3: res_dict['isbeta'][mask_group] = True # Beta strands have to be alongside eachother res_dist_mask = np.abs(res_dict[res_dict['isbeta']]['resnum'] - res_dict[res_dict['isbeta']]['resnum'][:, np.newaxis]) >= 4 hbond_mask = distance(res_dict[res_dict['isbeta']]['N'], res_dict[res_dict['isbeta']]['O']) < 3.5 ca_mask = distance(res_dict[res_dict['isbeta']]['CA'], res_dict[res_dict['isbeta']]['CA']) < 4.5 p_mask = ((hbond_mask & res_dist_mask).any(axis=0) | (hbond_mask & res_dist_mask).any(axis=1) | (ca_mask & res_dist_mask).any(axis=0)) res_dict['isbeta'][np.argwhere(res_dict['isbeta']).flatten()[~p_mask]] = False # Ignore groups smaller than 3 res_mask_beta = np.argwhere(res_dict['isbeta']).flatten() for mask_group in np.split(res_mask_beta, np.argwhere(np.diff(res_mask_beta) != 1).flatten() + 1): if 0 < len(mask_group) < 3: res_dict['isbeta'][mask_group] = False return res_dict
def _dicts(self): # Atoms atom_dtype = [('id', 'int16'), # atom info ('coords', 'float32', 3), ('radius', 'float32'), ('charge', 'float32'), ('atomicnum', 'int8'), ('atomtype','a4'), ('hybridization', 'int8'), ('neighbors', 'float32', (4,3)), # non-H neighbors coordinates for angles (max of 6 neighbors should be enough) # residue info ('resid', 'int16'), ('resname', 'a3'), ('isbackbone', 'bool'), # atom properties ('isacceptor', 'bool'), ('isdonor', 'bool'), ('isdonorh', 'bool'), ('ismetal', 'bool'), ('ishydrophobe', 'bool'), ('isaromatic', 'bool'), ('isminus', 'bool'), ('isplus', 'bool'), ('ishalogen', 'bool'), # secondary structure ('isalpha', 'bool'), ('isbeta', 'bool') ] a = [] atom_dict = np.empty(self.Mol.GetNumAtoms(), dtype=atom_dtype) metals = [3,4,11,12,13,19,20,21,22,23,24,25,26,27,28,29,30,31,37,38,39,40,41,42,43,44,45,46,47,48,49,50,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,87,88,89,90,91, 92,93,94,95,96,97,98,99,100,101,102,103] for i, atom in enumerate(self.atoms): atomicnum = atom.atomicnum partialcharge = atom.partialcharge coords = atom.coords atomtype = atom.Atom.GetProp("_TriposAtomType") if atom.Atom.HasProp("_TriposAtomType") else atom.Atom.GetSymbol() if self.protein: residue = atom.Atom.GetMonomerInfo() else: residue = False # get neighbors, but only for those atoms which realy need them neighbors = np.zeros(4, dtype=[('coords', 'float32', 3),('atomicnum', 'int8')]) neighbors['coords'].fill(np.nan) for n, nbr_atom in enumerate(atom.neighbors): neighbors[n] = (nbr_atom.coords, nbr_atom.atomicnum) atom_dict[i] = (atom.idx, coords, elementtable.GetRvdw(atomicnum), partialcharge, atomicnum, atomtype, np.clip(atom.Atom.GetHybridization()-1, 0, 3), neighbors['coords'], # residue info residue.GetResidueNumber() if residue else 0, residue.GetResidueName() if residue else '', False, # is backbone # atom properties False, #IsHbondAcceptor False, #IsHbondDonor, False, #IsHbondDonorH, atomicnum in metals, atomicnum == 6 and np.in1d(neighbors['atomicnum'], [6,1,0]).all(), #hydrophobe atom.Atom.GetIsAromatic(), atomtype in ['O3-', '02-' 'O-'] or atom.formalcharge < 0, # is charged (minus) atomtype in ['N3+', 'N2+', 'Ng+'] or atom.formalcharge > 0, # is charged (plus) atomicnum in [9,17,35,53], # is halogen? False, # alpha False # beta ) # Match features and mark them in atom_dict translate_feats = { 'Donor':'isdonor', 'Acceptor':'isacceptor', 'NegIonizable':'isminus', 'PosIonizable':'isplus', } # build residue dictionary if self.protein: # for protein finding features per residue is much faster if self.protein: for res in self.residues: for f, field in translate_feats.iteritems(): feats = base_feature_factory.GetFeaturesForMol(res.Residue,includeOnly=f) atom_dict[field][[res.atommap[idx] for f in feats for idx in f.GetAtomIds()]] = True res_dict = None # Protein Residues (alpha helix and beta sheet) res_dtype = [('id', 'int16'), ('resname', 'a3'), ('N', 'float32', 3), ('CA', 'float32', 3), ('C', 'float32', 3), ('isalpha', 'bool'), ('isbeta', 'bool') ] # N, CA, C b = [] aa = Chem.MolFromSmarts('[NX3,NX4+][CX4H,CX4H2][CX3](=[OX1])[O,N]') # amino backbone SMARTS conf = self.Mol.GetConformer() for path in self.Mol.GetSubstructMatches(aa): atom_dict['isbackbone'][np.array(path)] = True residue = self.Mol.GetAtomWithIdx(path[0]).GetMonomerInfo() b.append((residue.GetResidueNumber(), residue.GetResidueName(), conf.GetAtomPosition(path[0]), conf.GetAtomPosition(path[1]), conf.GetAtomPosition(path[2]), False, False)) res_dict = np.array(b, dtype=res_dtype) # detect secondary structure by phi and psi angles first = res_dict[:-1] second = res_dict[1:] psi = dihedral(first['N'], first['CA'], first['C'], second['N']) phi = dihedral(first['C'], second['N'], second['CA'], second['C']) # mark atoms belonging to alpha and beta res_mask_alpha = np.where(((phi > -145) & (phi < -35) & (psi > -70) & (psi < 50))) # alpha res_dict['isalpha'][res_mask_alpha] = True for i in res_dict[res_mask_alpha]['id']: atom_dict['isalpha'][atom_dict['resid'] == i] = True res_mask_beta = np.where(((phi >= -180) & (phi < -40) & (psi <= 180) & (psi > 90)) | ((phi >= -180) & (phi < -70) & (psi <= -165))) # beta res_dict['isbeta'][res_mask_beta] = True atom_dict['isbeta'][np.in1d(atom_dict['resid'], res_dict[res_mask_beta]['id'])] = True else: # find features for ligands for f, field in translate_feats.iteritems(): feats = base_feature_factory.GetFeaturesForMol(self.Mol,includeOnly=f) atom_dict[field][[idx for f in feats for idx in f.GetAtomIds()]] = True ### FIX: remove acidic carbons from isminus group (they are part of smarts) atom_dict['isminus'][atom_dict['isminus'] & (atom_dict['atomicnum'] == 6)] = False # Aromatic Rings r = [] for path in self.sssr: if self.Mol.GetAtomWithIdx(path[0]).GetIsAromatic(): atom = atom_dict[atom_dict['id'] == path[0]] coords = atom_dict[np.in1d(atom_dict['id'], path)]['coords'] centroid = coords.mean(axis=0) # get vector perpendicular to ring vector = np.cross(coords - np.vstack((coords[1:],coords[:1])), np.vstack((coords[1:],coords[:1])) - np.vstack((coords[2:],coords[:2]))).mean(axis=0) - centroid r.append((centroid, vector, atom['isalpha'], atom['isbeta'])) ring_dict = np.array(r, dtype=[('centroid', 'float32', 3),('vector', 'float32', 3),('isalpha', 'bool'),('isbeta', 'bool'),]) self._atom_dict = atom_dict self._atom_dict.setflags(write=False) self._ring_dict = ring_dict self._ring_dict.setflags(write=False) if self.protein: self._res_dict = res_dict
def _dicts(self): # Atoms atom_dtype = [ ('id', 'int16'), # atom info ('coords', 'float16', 3), ('charge', 'float16'), ('atomicnum', 'int8'), ('atomtype', 'a4'), ('hybridization', 'int8'), ( 'neighbors', 'float16', (4, 3) ), # non-H neighbors coordinates for angles (max of 6 neighbors should be enough) # residue info ('resid', 'int16'), ('resname', 'a3'), ('isbackbone', 'bool'), # atom properties ('isacceptor', 'bool'), ('isdonor', 'bool'), ('isdonorh', 'bool'), ('ismetal', 'bool'), ('ishydrophobe', 'bool'), ('isaromatic', 'bool'), ('isminus', 'bool'), ('isplus', 'bool'), ('ishalogen', 'bool'), # secondary structure ('isalpha', 'bool'), ('isbeta', 'bool') ] a = [] atom_dict = np.empty(self.OBMol.NumAtoms(), dtype=atom_dtype) metals = [ 3, 4, 11, 12, 13, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103 ] for i, atom in enumerate(self.atoms): atomicnum = atom.atomicnum # skip non-polar hydrogens for performance # if atomicnum == 1 and atom.OBAtom.IsNonPolarHydrogen(): # continue atomtype = typetable.Translate(atom.type) # sybyl atom type partialcharge = atom.partialcharge coords = atom.coords if self.protein: residue = pybel.Residue(atom.OBAtom.GetResidue()) else: residue = False # get neighbors, but only for those atoms which realy need them neighbors = np.empty(4, dtype=[('coords', 'float16', 3), ('atomicnum', 'int8')]) neighbors.fill(np.nan) for n, nbr_atom in enumerate(atom.neighbors): # concider raising neighbors list to 6, but must do some benchmarks if n > 3: break nbr_atomicnum = nbr_atom.atomicnum neighbors[n] = (nbr_atom.coords, nbr_atomicnum) atom_dict[i] = ( atom.idx, coords, partialcharge, atomicnum, atomtype, atom.OBAtom.GetHyb(), neighbors['coords'], #n_coords, # residue info residue.idx if residue else 0, residue.name if residue else '', residue.OBResidue.GetAtomProperty(atom.OBAtom, 2) if residue else False, # is backbone # atom properties atom.OBAtom.IsHbondAcceptor(), atom.OBAtom.IsHbondDonor(), atom.OBAtom.IsHbondDonorH(), atomicnum in metals, atomicnum == 6 and not (np.in1d(neighbors['atomicnum'], [ 6, 1 ])).any(), #hydrophobe #doble negation, since nan gives False atom.OBAtom.IsAromatic(), atomtype in ['O3-', '02-' 'O-'], # is charged (minus) atomtype in ['N3+', 'N2+', 'Ng+'], # is charged (plus) atomicnum in [9, 17, 35, 53], # is halogen? False, # alpha False # beta ) if self.protein: # Protein Residues (alpha helix and beta sheet) res_dtype = [('id', 'int16'), ('resname', 'a3'), ('N', 'float16', 3), ('CA', 'float16', 3), ('C', 'float16', 3), ('isalpha', 'bool'), ('isbeta', 'bool')] # N, CA, C b = [] for residue in self.residues: backbone = {} for atom in residue: if residue.OBResidue.GetAtomProperty(atom.OBAtom, 1): if atom.atomicnum == 7: backbone['N'] = atom.coords elif atom.atomicnum == 6: if atom.type == 'C3': backbone['CA'] = atom.coords else: backbone['C'] = atom.coords if len(backbone.keys()) == 3: b.append((residue.idx, residue.name, backbone['N'], backbone['CA'], backbone['C'], False, False)) res_dict = np.array(b, dtype=res_dtype) # detect secondary structure by phi and psi angles first = res_dict[:-1] second = res_dict[1:] psi = dihedral(first['N'], first['CA'], first['C'], second['N']) phi = dihedral(first['C'], second['N'], second['CA'], second['C']) # mark atoms belonging to alpha and beta res_mask_alpha = np.where(((phi > -145) & (phi < -35) & (psi > -70) & (psi < 50))) # alpha res_dict['isalpha'][res_mask_alpha] = True for i in res_dict[res_mask_alpha]['id']: atom_dict['isalpha'][atom_dict['resid'] == i] = True res_mask_beta = np.where( ((phi >= -180) & (phi < -40) & (psi <= 180) & (psi > 90)) | ((phi >= -180) & (phi < -70) & (psi <= -165))) # beta res_dict['isbeta'][res_mask_beta] = True atom_dict['isbeta'][np.in1d(atom_dict['resid'], res_dict[res_mask_beta]['id'])] = True # Aromatic Rings r = [] for ring in self.sssr: if ring.IsAromatic(): path = ring._path atom = atom_dict[atom_dict['id'] == path[0]] coords = atom_dict[np.in1d(atom_dict['id'], path)]['coords'] centroid = coords.mean(axis=0) # get vector perpendicular to ring vector = np.cross( coords - np.vstack((coords[1:], coords[:1])), np.vstack((coords[1:], coords[:1])) - np.vstack( (coords[2:], coords[:2]))).mean(axis=0) - centroid r.append((centroid, vector, atom['isalpha'], atom['isbeta'])) ring_dict = np.array(r, dtype=[ ('centroid', 'float16', 3), ('vector', 'float16', 3), ('isalpha', 'bool'), ('isbeta', 'bool'), ]) self._atom_dict = atom_dict self._ring_dict = ring_dict if self.protein: self._res_dict = res_dict