def SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5): """Calculates structural protein-ligand interaction fingerprint (SPLIF), based on http://pubs.acs.org/doi/abs/10.1021/ci500319f. Parameters ---------- ligand, protein : oddt.toolkit.Molecule object Molecules, which are analysed in order to find interactions. depth : int (deafult = 1) The depth of the fingerprint, i.e. the number of bonds in Morgan algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc. size: int (default = 4096) SPLIF is folded to given size. distance_cutoff: float (default=4.5) Cutoff distance for close contacts. Returns ------- SPLIF : numpy array Calculated SPLIF.shape = (no. of atoms, ). Every row consists of three elements: row[0] = index of hashed atoms row[1].shape = (5, 3) -> ligand's atom coords and 4 his neigbor's row[2].shape = (5, 3) -> protein's atom coords and 4 his neigbor's """ # removing h protein_dict = protein.atom_dict[protein.atom_dict['atomicnum'] != 1] ligand_dict = ligand.atom_dict[ligand.atom_dict['atomicnum'] != 1] protein_atoms, ligand_atoms = close_contacts(protein_dict, ligand_dict, cutoff=distance_cutoff) splif = np.zeros((len(ligand_atoms)), dtype=[('hash', int), ('ligand_coords', np.float32, (5, 3)), ('protein_coords', np.float32, (5, 3))]) for i, (ligand_atom, protein_atom) in enumerate(zip(ligand_atoms, protein_atoms)): if ligand_atom['atomicnum'] == 1 or protein_atom['atomicnum'] == 1: continue # function sorted used below solves isue, when order of parameteres # is not correct -> splif(protein, ligand) splif[i] = (hash32( tuple( sorted((_ECFP_atom_hash(ligand, int(ligand_atom['id']), depth=depth)[-1], _ECFP_atom_hash(protein, int(protein_atom['id']), depth=depth)[-1])))), np.vstack((ligand_atom['coords'].reshape( (1, 3)), ligand_atom['neighbors'])), np.vstack((protein_atom['coords'].reshape( (1, 3)), protein_atom['neighbors']))) # folding splif['hash'] = fold(splif['hash'], size) return np.sort(splif)
def test_close_contacts(): """Close contacts test""" cc = [len(close_contacts(rec.atom_dict[rec.atom_dict['atomicnum'] != 1], mol.atom_dict[mol.atom_dict['atomicnum'] != 1], cutoff=3)[0]) for mol in mols] assert_array_equal(cc, [5, 7, 6, 5, 3, 6, 5, 6, 6, 6, 5, 4, 7, 6, 6, 6, 7, 5, 6, 5, 5, 7, 4, 5, 6, 7, 6, 5, 7, 5, 6, 4, 5, 4, 3, 7, 6, 6, 3, 5, 4, 3, 1, 7, 3, 2, 4, 1, 2, 7, 4, 4, 6, 4, 6, 7, 7, 6, 6, 6, 5, 6, 5, 4, 4, 7, 3, 6, 6, 4, 7, 7, 4, 5, 4, 7, 3, 6, 6, 6, 5, 6, 4, 5, 4, 4, 6, 5, 5, 7, 6, 2, 6, 5, 1, 8, 6, 5, 7, 4])
def test_close_contacts(): """Close contacts test""" cc = [ len( close_contacts(rec.atom_dict[rec.atom_dict['atomicnum'] != 1], mol.atom_dict[mol.atom_dict['atomicnum'] != 1], cutoff=3)[0]) for mol in mols ] assert_array_equal(cc, [ 5, 7, 6, 5, 3, 6, 5, 6, 6, 6, 5, 4, 7, 6, 6, 6, 7, 5, 6, 5, 5, 7, 4, 5, 6, 7, 6, 5, 7, 5, 6, 4, 5, 4, 3, 7, 6, 6, 3, 5, 4, 3, 1, 7, 3, 2, 4, 1, 2, 7, 4, 4, 6, 4, 6, 7, 7, 6, 6, 6, 5, 6, 5, 4, 4, 7, 3, 6, 6, 4, 7, 7, 4, 5, 4, 7, 3, 6, 6, 6, 5, 6, 4, 5, 4, 4, 6, 5, 5, 7, 6, 2, 6, 5, 1, 8, 6, 5, 7, 4 ])
def build(self, ligands, protein=None): """ Descriptor building method Parameters ---------- ligands: array-like An array of generator of oddt.toolkit.Molecule objects for which the descriptor is computed protein: oddt.toolkit.Molecule object (default=None) Protein object to be used while generating descriptors. If none, then the default protein (from constructor) is used. Otherwise, protein becomes new global and default protein. Returns ------- descs: numpy array, shape=[n_samples, 351] An array of binana descriptors, aligned with input ligands """ if protein: self.set_protein(protein) else: protein = self.protein protein_dict = protein.atom_dict desc = None for mol in ligands: mol_dict = mol.atom_dict vec = np.array([], dtype=float) vec = tuple() # Vina # TODO: Asynchronous output from vina, push command to score and retrieve at the end? # TODO: Check if ligand has vina scores vec += tuple(self.vina.build(mol).flatten()) # Close Contacts (<4A) vec += tuple(self.cc_4.build(mol).flatten()) # Electrostatics (<4A) ele_rec_types, ele_lig_types = zip(*self.ele_types) ele_mol_atoms = atoms_by_type(mol_dict, ele_lig_types, 'atom_types_ad4') ele_rec_atoms = atoms_by_type(protein_dict, ele_rec_types, 'atom_types_ad4') ele = tuple() for r_t, m_t in self.ele_types: mol_ele_dict, rec_ele_dict = close_contacts( ele_mol_atoms[m_t], ele_rec_atoms[r_t], 4) if len(mol_ele_dict) and len(rec_ele_dict): ele += (mol_ele_dict['charge'] * rec_ele_dict['charge'] / np.sqrt((mol_ele_dict['coords'] - rec_ele_dict['coords'])**2).sum(axis=-1) * 138.94238460104697e4).sum(), # convert to J/mol else: ele += 0, vec += tuple(np.nan_to_num(ele)) # Ligand Atom Types atoms = atoms_by_type(mol_dict, self.ligand_atom_types, 'atom_types_ad4') vec += tuple([len(atoms[t]) for t in self.ligand_atom_types]) # Close Contacts (<2.5A) vec += tuple(self.cc_25.build(mol).flatten()) # H-Bonds (<4A) hbond_mol, hbond_rec, strict = hbonds(mol, protein, 4) # Retain only strict hbonds hbond_mol = hbond_mol[strict] hbond_rec = hbond_rec[strict] backbone = hbond_rec['isbackbone'] alpha = hbond_rec['isalpha'] beta = hbond_rec['isbeta'] other = ~alpha & ~beta donor_mol = hbond_mol['isdonor'] donor_rec = hbond_rec['isdonor'] hbond_vec = ((donor_mol & backbone & alpha).sum(), (donor_mol & backbone & beta).sum(), (donor_mol & backbone & other).sum(), (donor_mol & ~backbone & alpha).sum(), (donor_mol & ~backbone & beta).sum(), (donor_mol & ~backbone & other).sum(), (donor_rec & backbone & alpha).sum(), (donor_rec & backbone & beta).sum(), (donor_rec & backbone & other).sum(), (donor_rec & ~backbone & alpha).sum(), (donor_rec & ~backbone & beta).sum(), (donor_rec & ~backbone & other).sum()) vec += tuple(hbond_vec) # Hydrophobic contacts (<4A) hydrophobic = hydrophobic_contacts(mol, protein, 4)[1] backbone = hydrophobic['isbackbone'] alpha = hydrophobic['isalpha'] beta = hydrophobic['isbeta'] other = ~alpha & ~beta hyd_vec = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(), (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(hydrophobic)) vec += tuple(hyd_vec) # Pi-stacking (<7.5A) pi_mol, pi_rec, pi_paralel, pi_tshaped = pi_stacking( mol, protein, 7.5) alpha = pi_rec['isalpha'] & pi_paralel beta = pi_rec['isbeta'] & pi_paralel other = ~alpha & ~beta & pi_paralel pi_vec = (alpha.sum(), beta.sum(), other.sum()) vec += tuple(pi_vec) # T-shaped Pi-Pi interaction alpha = pi_rec['isalpha'] & pi_tshaped beta = pi_rec['isbeta'] & pi_tshaped other = ~alpha & ~beta & pi_tshaped pi_t_vec = (alpha.sum(), beta.sum(), other.sum()) # Pi-cation (<6A) pi_rec, cat_mol, strict = pi_cation(protein, mol, 6) alpha = pi_rec['isalpha'] & strict beta = pi_rec['isbeta'] & strict other = ~alpha & ~beta & strict pi_cat_vec = (alpha.sum(), beta.sum(), other.sum()) pi_mol, cat_rec, strict = pi_cation(mol, protein, 6) alpha = cat_rec['isalpha'] & strict beta = cat_rec['isbeta'] & strict other = ~alpha & ~beta & strict pi_cat_vec += (alpha.sum(), beta.sum(), other.sum()) vec += tuple(pi_cat_vec) # T-shape (perpendicular Pi's) (<7.5A) vec += tuple(pi_t_vec) # Active site flexibility (<4A) acitve_site = close_contacts( mol_dict[mol_dict['atomicnum'] != 1], protein_dict[protein_dict['atomicnum'] != 1], cutoff=4)[1] backbone = acitve_site['isbackbone'] alpha = acitve_site['isalpha'] beta = acitve_site['isbeta'] other = ~alpha & ~beta as_flex = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(), (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(acitve_site)) vec += tuple(as_flex) # Salt bridges (<5.5) salt_bridge_dict = salt_bridges(mol, protein, 5.5)[1] vec += (salt_bridge_dict['isalpha'].sum(), salt_bridge_dict['isbeta'].sum(), (~salt_bridge_dict['isalpha'] & ~salt_bridge_dict['isbeta']).sum(), len(salt_bridge_dict)) # Rotatable bonds vec += mol.num_rotors, if desc is None: desc = np.zeros(len(vec), dtype=float) desc = np.vstack((desc, np.array(vec, dtype=float))) return desc[1:]
def PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=True, ignore_hoh=True, bits_info=None): """Protein ligand extended connectivity fingerprint. For every pair of atoms in contact, compute ECFP and then hash every single, corresponding depth. Parameters ---------- ligand, protein : oddt.toolkit.Molecule object Molecules, which are analysed in order to find interactions. depth_ligand, depth_protein : int (deafult = (2, 4)) The depth of the fingerprint, i.e. the number of bonds in Morgan algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc. size: int (default = 16384) SPLIF is folded to given size. distance_cutoff: float (default=4.5) Cutoff distance for close contacts. sparse: bool (default = True) Should fingerprints be dense (contain all bits) or sparse (just the on bits). count_bits: bool (default = True) Should the bits be counted or unique. In dense representation it translates to integer array (count_bits=True) or boolean array if False. ignore_hoh: bool (default = True) Should the water molecules be ignored. This is based on the name of the residue ('HOH'). bits_info: dict or None (default = None) If dictionary is provided it is filled with information about bit contents. Root atom index and depth is provided for both ligand and protein. Dictionary is modified in-place. Returns ------- PLEC: numpy array fp (size = atoms in contacts * max(depth_protein, depth_ligand)) """ result = [] bit_info_content = [] # removing h protein_mask = protein_no_h = (protein.atom_dict['atomicnum'] != 1) if ignore_hoh: # a copy is needed, so not modifing inplace protein_mask = protein_mask & (protein.atom_dict['resname'] != 'HOH') protein_dict = protein.atom_dict[protein_mask] ligand_dict = ligand.atom_dict[ligand.atom_dict['atomicnum'] != 1] # atoms in contact protein_atoms, ligand_atoms = close_contacts( protein_dict, ligand_dict, cutoff=distance_cutoff) lig_atom_repr = {aidx: _ECFP_atom_repr(ligand, aidx) for aidx in ligand_dict['id'].tolist()} # HOH residues might be connected to metal atoms prot_atom_repr = {aidx: _ECFP_atom_repr(protein, aidx) for aidx in protein.atom_dict[protein_no_h]['id'].tolist()} for ligand_atom, protein_atom in zip(ligand_atoms['id'].tolist(), protein_atoms['id'].tolist()): ligand_ecfp = _ECFP_atom_hash(ligand, ligand_atom, depth=depth_ligand, atom_repr_dict=lig_atom_repr) protein_ecfp = _ECFP_atom_hash(protein, protein_atom, depth=depth_protein, atom_repr_dict=prot_atom_repr) assert len(ligand_ecfp) == depth_ligand + 1 assert len(protein_ecfp) == depth_protein + 1 # fillvalue is parameter from zip_longest # it's used, when ligand_ecfp and protein_ecfp are not the same size, # so if one is shorter the last given ECFP is used if depth_ligand < depth_protein: fillvalue = depth_ligand, ligand_ecfp[-1] else: fillvalue = depth_protein, protein_ecfp[-1] for (ligand_depth, ligand_bit), (protein_depth, protein_bit) in zip_longest( enumerate(ligand_ecfp), enumerate(protein_ecfp), fillvalue=fillvalue): result.append(hash32((ligand_bit, protein_bit))) if bits_info is not None: bit_info_content.append(PLEC_bit_info_record( ligand_root_atom_idx=ligand_atom, ligand_depth=ligand_depth, protein_root_atom_idx= protein_atom, protein_depth=protein_depth )) # folding and sorting plec = fold(np.array(result), size=size) # add bits info after folding if bits_info is not None: sort_indexes = np.argsort(plec) plec = plec[sort_indexes].astype(np.min_scalar_type(size)) # sort bit info according to folded PLEC for bit_number, bit_info_idx in zip(plec, sort_indexes): if bit_number not in bits_info: bits_info[bit_number] = set() bits_info[bit_number].add(bit_info_content[bit_info_idx]) else: plec = np.sort(plec).astype(np.min_scalar_type(size)) # count_bits if not count_bits: plec = np.unique(plec) # sparse or dense FP if not sparse: plec = sparse_to_dense(plec, size=size) return plec
def PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=True, ignore_hoh=True): """Protein ligand extended connectivity fingerprint. For every pair of atoms in contact, compute ECFP and then hash every single, corresponding depth. Parameters ---------- ligand, protein : oddt.toolkit.Molecule object Molecules, which are analysed in order to find interactions. depth_ligand, depth_protein : int (deafult = (2, 4)) The depth of the fingerprint, i.e. the number of bonds in Morgan algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc. size: int (default = 16384) SPLIF is folded to given size. distance_cutoff: float (default=4.5) Cutoff distance for close contacts. sparse : bool (default = True) Should fingerprints be dense (contain all bits) or sparse (just the on bits). count_bits : bool (default = True) Should the bits be counted or unique. In dense representation it translates to integer array (count_bits=True) or boolean array if False. ignore_hoh : bool (default = True) Should the water molecules be ignored. This is based on the name of the residue ('HOH'). Returns ------- PLEC : numpy array fp (size = atoms in contacts * max(depth_protein, depth_ligand)) """ result = [] # removing h protein_mask = protein_no_h = (protein.atom_dict['atomicnum'] != 1) if ignore_hoh: # a copy is needed, so not modifing inplace protein_mask = protein_mask & (protein.atom_dict['resname'] != 'HOH') protein_dict = protein.atom_dict[protein_mask] ligand_dict = ligand.atom_dict[ligand.atom_dict['atomicnum'] != 1] # atoms in contact protein_atoms, ligand_atoms = close_contacts( protein_dict, ligand_dict, cutoff=distance_cutoff) lig_atom_repr = {aidx: _ECFP_atom_repr(ligand, aidx) for aidx in ligand_dict['id'].tolist()} # HOH residues might be connected to metal atoms prot_atom_repr = {aidx: _ECFP_atom_repr(protein, aidx) for aidx in protein.atom_dict[protein_no_h]['id'].tolist()} for ligand_atom, protein_atom in zip(ligand_atoms['id'].tolist(), protein_atoms['id'].tolist()): ligand_ecfp = _ECFP_atom_hash(ligand, ligand_atom, depth=depth_ligand, atom_repr_dict=lig_atom_repr) protein_ecfp = _ECFP_atom_hash(protein, protein_atom, depth=depth_protein, atom_repr_dict=prot_atom_repr) assert len(ligand_ecfp) == depth_ligand + 1 assert len(protein_ecfp) == depth_protein + 1 # fillvalue is parameter from zip_longest # it's used, when ligand_ecfp and protein_ecfp are not the same size, # so if one is shorter the last given ECFP is used if depth_ligand < depth_protein: fillvalue = ligand_ecfp[-1] else: fillvalue = protein_ecfp[-1] for pair in zip_longest(ligand_ecfp, protein_ecfp, fillvalue=fillvalue): result.append(hash32(pair)) # folding and sorting plec = np.sort(fold(np.array(result), size=size)) # count_bits if not count_bits: plec = np.unique(plec) # sparse or dense FP if not sparse: plec = sparse_to_dense(plec, size=size) return plec
def SPLIF(ligand, protein, depth=1, size=4096, distance_cutoff=4.5): """Calculates structural protein-ligand interaction fingerprint (SPLIF), based on http://pubs.acs.org/doi/abs/10.1021/ci500319f. Parameters ---------- ligand, protein : oddt.toolkit.Molecule object Molecules, which are analysed in order to find interactions. depth : int (deafult = 1) The depth of the fingerprint, i.e. the number of bonds in Morgan algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc. size: int (default = 4096) SPLIF is folded to given size. distance_cutoff: float (default=4.5) Cutoff distance for close contacts. Returns ------- SPLIF : numpy array Calculated SPLIF.shape = (no. of atoms, ). Every row consists of three elements: row[0] = index of hashed atoms row[1].shape = (7, 3) -> ligand's atom coords and 6 his neigbor's row[2].shape = (7, 3) -> protein's atom coords and 6 his neigbor's """ # removing h protein_dict = protein.atom_dict[protein.atom_dict['atomicnum'] != 1] ligand_dict = ligand.atom_dict[ligand.atom_dict['atomicnum'] != 1] protein_atoms, ligand_atoms = close_contacts( protein_dict, ligand_dict, cutoff=distance_cutoff) splif = np.zeros((len(ligand_atoms)), dtype=[('hash', int), ('ligand_coords', np.float32, (7, 3)), ('protein_coords', np.float32, (7, 3))]) lig_atom_repr = {aidx: _ECFP_atom_repr(ligand, int(aidx)) for aidx in ligand_dict['id']} prot_atom_repr = {aidx: _ECFP_atom_repr(protein, int(aidx)) for aidx in protein_dict['id']} for i, (ligand_atom, protein_atom) in enumerate(zip(ligand_atoms, protein_atoms)): if ligand_atom['atomicnum'] == 1 or protein_atom['atomicnum'] == 1: continue # function sorted used below solves isue, when order of parameteres # is not correct -> splif(protein, ligand) splif[i] = (hash32(tuple(sorted(( _ECFP_atom_hash(ligand, int(ligand_atom['id']), depth=depth, atom_repr_dict=lig_atom_repr)[-1], _ECFP_atom_hash(protein, int(protein_atom['id']), depth=depth, atom_repr_dict=prot_atom_repr)[-1])))), np.vstack((ligand_atom['coords'].reshape((1, 3)), ligand_atom['neighbors'])), np.vstack((protein_atom['coords'].reshape((1, 3)), protein_atom['neighbors']))) # folding splif['hash'] = fold(splif['hash'], size) return np.sort(splif)
def build(self, ligands, protein = None): """ Descriptor building method Parameters ---------- ligands: array-like An array of generator of oddt.toolkit.Molecule objects for which the descriptor is computed protein: oddt.toolkit.Molecule object (default=None) Protein object to be used while generating descriptors. If none, then the default protein (from constructor) is used. Otherwise, protein becomes new global and default protein. Returns ------- descs: numpy array, shape=[n_samples, 351] An array of binana descriptors, aligned with input ligands """ if protein: self.set_protein(protein) else: protein = self.protein protein_dict = protein.atom_dict desc = None for mol in ligands: mol_dict = mol.atom_dict vec = np.array([], dtype=float) vec = tuple() # Vina ### TODO: Asynchronous output from vina, push command to score and retrieve at the end? ### TODO: Check if ligand has vina scores scored_mol = self.vina.score(mol, single=True)[0].data vina_scores = ['vina_affinity', 'vina_gauss1', 'vina_gauss2', 'vina_repulsion', 'vina_hydrophobic', 'vina_hydrogen'] vec += tuple([scored_mol[key] for key in vina_scores]) # Close Contacts (<4A) vec += tuple(self.cc_4.build(mol, single=True).flatten()) # Electrostatics (<4A) ele_types = (('A', 'A'), ('A', 'C'), ('A', 'CL'), ('A', 'F'), ('A', 'FE'), ('A', 'HD'), ('A', 'MG'), ('A', 'MN'), ('A', 'N'), ('A', 'NA'), ('A', 'OA'), ('A', 'SA'), ('A', 'ZN'), ('BR', 'C'), ('BR', 'HD'), ('BR', 'OA'), ('C', 'C'), ('C', 'CL'), ('C', 'F'), ('C', 'HD'), ('C', 'MG'), ('C', 'MN'), ('C', 'N'), ('C', 'NA'), ('C', 'OA'), ('C', 'SA'), ('C', 'ZN'), ('CL', 'FE'), ('CL', 'HD'), ('CL', 'MG'), ('CL', 'N'), ('CL', 'OA'), ('CL', 'ZN'), ('F', 'HD'), ('F', 'N'), ('F', 'OA'), ('F', 'SA'), ('F', 'ZN'), ('FE', 'HD'), ('FE', 'N'), ('FE', 'OA'), ('HD', 'HD'), ('HD', 'I'), ('HD', 'MG'), ('HD', 'MN'), ('HD', 'N'), ('HD', 'NA'), ('HD', 'OA'), ('HD', 'P'), ('HD', 'S'), ('HD', 'SA'), ('HD', 'ZN'), ('MG', 'NA'), ('MG', 'OA'), ('MN', 'N'), ('MN', 'OA'), ('N', 'N'), ('N', 'NA'), ('N', 'OA'), ('N', 'SA'), ('N', 'ZN'), ('NA', 'OA'), ('NA', 'SA'), ('NA', 'ZN'), ('OA', 'OA'), ('OA', 'SA'), ('OA', 'ZN'), ('S', 'ZN'), ('SA', 'ZN'), ('A', 'BR'), ('A', 'I'), ('A', 'P'), ('A', 'S'), ('BR', 'N'), ('BR', 'SA'), ('C', 'FE'), ('C', 'I'), ('C', 'P'), ('C', 'S'), ('CL', 'MN'), ('CL', 'NA'), ('CL', 'P'), ('CL', 'S'), ('CL', 'SA'), ('CU', 'HD'), ('CU', 'N'), ('FE', 'NA'), ('FE', 'SA'), ('I', 'N'), ('I', 'OA'), ('MG', 'N'), ('MG', 'P'), ('MG', 'S'), ('MG', 'SA'), ('MN', 'NA'), ('MN', 'P'), ('MN', 'S'), ('MN', 'SA'), ('N', 'P'), ('N', 'S'), ('NA', 'P'), ('NA', 'S'), ('OA', 'P'), ('OA', 'S'), ('P', 'S'), ('P', 'SA'), ('P', 'ZN'), ('S', 'SA'), ('SA', 'SA')) ele_rec_types, ele_lig_types = zip(*ele_types) ele_mol_atoms = atoms_by_type(mol_dict, ele_lig_types, 'atom_types_ad4') ele_rec_atoms = atoms_by_type(protein_dict, ele_rec_types, 'atom_types_ad4') ele = tuple() for r_t, m_t in ele_types: mol_ele_dict, rec_ele_dict = interactions.close_contacts(ele_mol_atoms[m_t], ele_rec_atoms[r_t], 4) if len(mol_ele_dict) and len(rec_ele_dict): ele += (mol_ele_dict['charge'] * rec_ele_dict['charge']/ np.sqrt((mol_ele_dict['coords'] - rec_ele_dict['coords'])**2).sum(axis=-1) * 138.94238460104697e4).sum(), # convert to J/mol else: ele += 0, vec += tuple(ele) # Ligand Atom Types ligand_atom_types = ['A', 'BR', 'C', 'CL', 'F', 'HD', 'I', 'N', 'NA', 'OA', 'P', 'S', 'SA'] atoms = atoms_by_type(mol_dict, ligand_atom_types, 'atom_types_ad4') atoms_counts = [len(atoms[t]) for t in ligand_atom_types] vec += tuple(atoms_counts) # Close Contacts (<2.5A) vec += tuple(self.cc_25.build(mol, single=True).flatten()) # H-Bonds (<4A) hbond_mol, hbond_rec, strict = interactions.hbond(mol, protein, 4) # Retain only strict hbonds hbond_mol = hbond_mol[strict] hbond_rec = hbond_rec[strict] backbone = hbond_rec['isbackbone'] alpha = hbond_rec['isalpha'] beta = hbond_rec['isbeta'] other = ~alpha & ~beta donor_mol = hbond_mol['isdonor'] donor_rec = hbond_rec['isdonor'] hbond_vec = ((donor_mol & backbone & alpha).sum(), (donor_mol & backbone & beta).sum(), (donor_mol & backbone & other).sum(), (donor_mol & ~backbone & alpha).sum(), (donor_mol & ~backbone & beta).sum(), (donor_mol & ~backbone & other).sum(), (donor_rec & backbone & alpha).sum(), (donor_rec & backbone & beta).sum(), (donor_rec & backbone & other).sum(), (donor_rec & ~backbone & alpha).sum(), (donor_rec & ~backbone & beta).sum(), (donor_rec & ~backbone & other).sum()) vec += tuple(hbond_vec) # Hydrophobic contacts (<4A) hydrophobic = interactions.hydrophobic_contacts(mol, protein, 4)[1] backbone = hydrophobic['isbackbone'] alpha = hydrophobic['isalpha'] beta = hydrophobic['isbeta'] other = ~alpha & ~beta hyd_vec = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(), (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(hydrophobic)) vec += tuple(hyd_vec) # Pi-stacking (<7.5A) pi_mol, pi_rec, pi_paralel, pi_tshaped = interactions.pi_stacking(mol, protein, 7.5) alpha = pi_rec['isalpha'] & pi_paralel beta = pi_rec['isbeta'] & pi_paralel other = ~alpha & ~beta & pi_paralel pi_vec = (alpha.sum(), beta.sum(), other.sum()) vec += tuple(pi_vec) # count T-shaped Pi-Pi interaction alpha = pi_rec['isalpha'] & pi_tshaped beta = pi_rec['isbeta'] & pi_tshaped other = ~alpha & ~beta & pi_tshaped pi_t_vec = (alpha.sum(), beta.sum(), other.sum()) # Pi-cation (<6A) pi_rec, cat_mol, strict = interactions.pi_cation(protein, mol, 6) alpha = pi_rec['isalpha'] & strict beta = pi_rec['isbeta'] & strict other = ~alpha & ~beta & strict pi_cat_vec = (alpha.sum(), beta.sum(), other.sum()) pi_mol, cat_rec, strict = interactions.pi_cation(mol, protein, 6) alpha = cat_rec['isalpha'] & strict beta = cat_rec['isbeta'] & strict other = ~alpha & ~beta & strict pi_cat_vec += (alpha.sum(), beta.sum(), other.sum()) vec += tuple(pi_cat_vec) # T-shape (perpendicular Pi's) (<7.5A) vec += tuple(pi_t_vec) # Active site flexibility (<4A) acitve_site = interactions.close_contacts(mol_dict, protein_dict, 4)[1] backbone = acitve_site['isbackbone'] alpha = acitve_site['isalpha'] beta = acitve_site['isbeta'] other = ~alpha & ~beta as_flex = ((backbone & alpha).sum(), (backbone & beta).sum(), (backbone & other).sum(), (~backbone & alpha).sum(), (~backbone & beta).sum(), (~backbone & other).sum(), len(acitve_site)) vec += tuple(as_flex) # Salt bridges (<5.5) salt_bridges = interactions.salt_bridges(mol, protein, 5.5)[1] vec += (salt_bridges['isalpha'].sum(), salt_bridges['isbeta'].sum(), (~salt_bridges['isalpha'] & ~salt_bridges['isbeta']).sum(), len(salt_bridges)) # Rotatable bonds vec += mol.num_rotors, if desc is None: desc = np.zeros(len(vec), dtype=float) desc = np.vstack((desc, np.array(vec, dtype=float))) return desc[1:]
def time_close_contacts(self): for mol in self.mols: close_contacts(mol.atom_dict, self.protein.atom_dict, cutoff=10.)
def PLEC(ligand, protein, depth_ligand=2, depth_protein=4, distance_cutoff=4.5, size=16384, count_bits=True, sparse=True, ignore_hoh=True): """Protein ligand extended connectivity fingerprint. For every pair of atoms in contact, compute ECFP and then hash every single, corresponding depth. Parameters ---------- ligand, protein : oddt.toolkit.Molecule object Molecules, which are analysed in order to find interactions. depth_ligand, depth_protein : int (deafult = (2, 4)) The depth of the fingerprint, i.e. the number of bonds in Morgan algorithm. Note: For ECFP2: depth = 1, ECFP4: depth = 2, etc. size: int (default = 16384) SPLIF is folded to given size. distance_cutoff: float (default=4.5) Cutoff distance for close contacts. sparse : bool (default = True) Should fingerprints be dense (contain all bits) or sparse (just the on bits). count_bits : bool (default = True) Should the bits be counted or unique. In dense representation it translates to integer array (count_bits=True) or boolean array if False. ignore_hoh : bool (default = True) Should the water molecules be ignored. This is based on the name of the residue ('HOH'). Returns ------- PLEC : numpy array Calculated fp (size = no. of atoms in contacts * max(depth_protein, depth_ligand)) """ result = [] # removing h protein_mask = (protein.atom_dict['atomicnum'] != 1) if ignore_hoh: protein_mask = protein_mask & (protein.atom_dict['resname'] !='HOH') protein_dict = protein.atom_dict[protein_mask] ligand_dict = ligand.atom_dict[ligand.atom_dict['atomicnum'] != 1] # atoms in contact protein_atoms, ligand_atoms = close_contacts( protein_dict, ligand_dict, cutoff=distance_cutoff) for ligand_atom, protein_atom in zip(ligand_atoms['id'], protein_atoms['id']): ligand_ecfp = _ECFP_atom_hash(ligand, int(ligand_atom), depth=depth_ligand) protein_ecfp = _ECFP_atom_hash(protein, int(protein_atom), depth=depth_protein) assert len(ligand_ecfp) == depth_ligand + 1 assert len(protein_ecfp) == depth_protein + 1 # fillvalue is parameter from zip_longest # it's used, when ligand_ecfp and protein_ecfp are not the same size, # so if one is shorter the last given ECFP is used if depth_ligand < depth_protein: fillvalue = ligand_ecfp[-1] else: fillvalue = protein_ecfp[-1] for pair in zip_longest(ligand_ecfp, protein_ecfp, fillvalue=fillvalue): result.append(hash32(pair)) # folding and sorting plec = np.sort(fold(np.array(result), size=size)) # count_bits if not count_bits: plec = np.unique(plec) # sparse or dense FP if not sparse: plec = sparse_to_dense(plec, size=size) return plec
def time_close_contacts(self): for mol in self.mols: close_contacts(mol.atom_dict, self.protein.atom_dict, cutoff=10.)