def __init__(self, pdb_structure, config, use_cache=None, write_cache=True): from moleculekit.molecule import Molecule from moleculekit.tools.atomtyper import prepareProteinForAtomtyping import os.path self.config = config use_cache_voxels = use_cache or config['use_cache_voxels'] file_name = str(os.path.basename(pdb_structure)) check_oeb = self.config['cache'] + file_name.split(".")[0] + ".npy" if use_cache_voxels and os.path.isfile(check_oeb): self.prot_vox_t = np.load(check_oeb) else: prot = Molecule(pdb_structure) prot = prepareProteinForAtomtyping(prot, verbose=False) prot_vox, prot_centers, prot_N = getVoxelDescriptors( prot, buffer=0, voxelsize=config['voxelsize'], boxsize=config['bp_dimension'], center=config['bp_centers'], validitychecks=False) nchannels = prot_vox.shape[1] self.prot_vox_t = prot_vox.transpose().reshape( [1, nchannels, prot_N[0], prot_N[1], prot_N[2]]) if write_cache or use_cache_voxels: np.save(check_oeb, self.prot_vox_t)
def get_voxel_test(test_pdb, k, voxel_size): """ Compute a voxel around each resiude of the structure and compute the voxelization """ tut_data = home(dataDir='/home/sdv/m2bi/gollitrault/M2BI/projet_long/src') boxsize = [voxel_size,voxel_size,voxel_size] list_prot_vox = [] parser = PDBParser() structure = parser.get_structure('test', test_pdb) residues = [r for r in structure.get_residues()] if len(residues) == len(get_sequence(structure)): prot = Molecule(os.path.join(tut_data, test_pdb)) prot = prepareProteinForAtomtyping(prot) for i in range(len(residues)):#len(residues_1) prot_vox, prot_centers, prot_N = getVoxelDescriptors(\ prot,\ boxsize = boxsize,\ center = list(residues[i]["CA"].get_vector()),\ validitychecks=False) list_prot_vox.append(prot_vox) else: print("BAD number of residues do not correspond dont know why") quit() return(list_prot_vox)
def __init__(self, pdb_structure, config): from moleculekit.molecule import Molecule from moleculekit.tools.atomtyper import prepareProteinForAtomtyping self.config = config prot = Molecule(pdb_structure) prot = prepareProteinForAtomtyping(prot, verbose=False) prot_vox, prot_centers, prot_N = getVoxelDescriptors(prot, buffer=0, voxelsize=config['voxelsize'], boxsize=config['bp_dimension'], center=config['bp_centers'], validitychecks=False) nchannels = prot_vox.shape[1] self.prot_vox_t = prot_vox.transpose().reshape([1, nchannels, prot_N[0], prot_N[1], prot_N[2]])
def get_voxel(self, path): complex = os.path.join(self.root, path) for ele in os.listdir(complex): if fnmatch.fnmatch(ele, '*_protein.pdb'): prot = Molecule(os.path.join(complex, ele)) prot.filter('protein') # If your structure is fully protonated and contains all bond information in prot.bonds skip this step! prot = prepareProteinForAtomtyping(prot) prot.view(guessBonds=False) prot_vox, prot_centers, prot_N = getVoxelDescriptors( prot, boxsize=[24, 24, 24], center=[0, 0, 0], buffer=1) prot.view(guessBonds=False) viewVoxelFeatures(prot_vox, prot_centers, prot_N) nchannels = prot_vox.shape[1] prot_vox_t = prot_vox.transpose().reshape( [1, nchannels, prot_N[0], prot_N[1], prot_N[2]]) prot_vox_t = torch.tensor(prot_vox_t.astype(np.float32)) for ele in os.listdir(complex): if fnmatch.fnmatch(ele, '*_ligand.mol2'): slig = SmallMol(os.path.join(os.path.join( complex, ele)), force_reading=True) slig.view(guessBonds=False) # For the ligand since it's small we could increase the voxel resolution if we so desire to 0.5 A instead of the default 1 A. lig_vox, lig_centers, lig_N = getVoxelDescriptors( slig, boxsize=[24, 24, 24], center=[0, 0, 0], voxelsize=1, buffer=1) slig.view(guessBonds=False) viewVoxelFeatures(lig_vox, lig_centers, lig_N) lig_vox_t = lig_vox.transpose().reshape( [1, nchannels, lig_N[0], lig_N[1], lig_N[2]]) lig_vox_t = torch.tensor(lig_vox_t.astype(np.float32)) x = torch.cat((lig_vox_t, prot_vox_t), 1) x.squeeze_(0) return x
def __init__(self, obs_config: Config): """ :param obs_config """ super().__init__(obs_config) self.obs_config = obs_config prot = Molecule(self.obs_config.pdb) prot = prepareProteinForAtomtyping(prot, verbose=False) prot_vox, prot_centers, prot_N = getVoxelDescriptors(prot, buffer=0, voxelsize=self.obs_config.voxelsize, boxsize=self.obs_config.boxsize, center=self.obs_config.centers, method=self.obs_config.method, validitychecks=self.obs_config.validity_check) self.prot_vox_t = self.reshape(prot_vox, prot_N)
def compute_voxel_features(mutation_site, pdb_file, verbose=False, boxsize=16, voxelsize=1, rotations=None): """Compute voxel features around the mutation site. Parameters ---------- pdbqt_wt : AutoDock PDBQT file AutoDock PDBQT file of the wild-type protein. pdbqt_mt : AutoDock PDBQT file AutoDock PDBQT file of the mutant protein. mutation_site : int Residue sequence position where the mutation took place. rotations : list Rotation angles in radian around x, y, and z axes. Returns ------- NumPy nd-array """ mol = Molecule(pdb_file) prot = prepareProteinForAtomtyping(mol, verbose=verbose) center = mol.get('coords', 'resid ' + str(mutation_site) + ' and name CB') # center_wt = compute_interaction_center(pdb_file_wt, mutation_site) if center.size == 0: center = mol.get('coords', 'resid ' + str(mutation_site) + ' and name CA') # if rotations is None: features, _, _ = getVoxelDescriptors(prot, center=center.flatten(), boxsize=[boxsize, boxsize, boxsize], voxelsize=voxelsize, validitychecks=False) else: voxel_centers = getCenters(prot, boxsize=[boxsize, boxsize, boxsize], center=center.flatten(), voxelsize=voxelsize) rotated_voxel_centers = rotateCoordinates(voxel_centers[0], rotations, center.flatten()) features, _ = getVoxelDescriptors(prot, usercenters=rotated_voxel_centers, validitychecks=False) # return features nchannels = features.shape[1] n_voxels = int(boxsize / voxelsize) features = features.transpose().reshape((nchannels, n_voxels, n_voxels, n_voxels)) return features
def generateProteinDescriptor(pdb_file,voxel_size=64,mode='standard'): # Remove HetAtoms pdb_id = os.path.basename(pdb_file).split('.')[0] # If your structure is fully protonated and contains all bond information in prot.bonds skip this step! if mode != 'scPDB': pdb_file_nonhet = removeHETAtoms(pdb_file,pdb_id) prot = Molecule(pdb_file_nonhet) prot = prepareProteinForAtomtyping(prot) prot.center() prot.write(pdb_file_nonhet) prot = Molecule(pdb_file_nonhet) prot.view(guessBonds=False) else: prot = SmallMol(pdb_file) try: prot_vox, prot_centers, prot_N = getVoxelDescriptors(prot,voxelsize=1, buffer=1,center=(0,0,0),boxsize=(voxel_size,voxel_size,voxel_size)) except: return False nchannels = prot_vox.shape[1] # Reshape Voxels prot_vox_t = prot_vox.transpose().reshape([1, nchannels, prot_N[0], prot_N[1], prot_N[2]]) return prot_vox_t
def calcFeatures(number, ligPath, altLigPath, protPath, altProtPath, boxsize, targetpath): features = {} try: sm = SmallMol(ligPath, force_reading=True, fixHs=False) x = np.mean(sm.get('coords')[:, 0]) y = np.mean(sm.get('coords')[:, 1]) z = np.mean(sm.get('coords')[:, 2]) smallChannels, sm = voxeldescriptors.getChannels(sm) except: sm = SmallMol(altLigPath, force_reading=True, fixHs=False) x = np.mean(sm.get('coords')[:, 0]) y = np.mean(sm.get('coords')[:, 1]) z = np.mean(sm.get('coords')[:, 2]) smallChannels, sm = voxeldescriptors.getChannels(sm) features['smallChannels'] = smallChannels features['sm'] = sm try: prot = Molecule(protPath) if prot.numAtoms > 50000: factorx = boxsize[0] * 2.5 factory = boxsize[1] * 2.5 factorz = boxsize[2] * 2.5 prot.filter('z < ' + format(z + factorz) + ' and z > ' + format(z - factorz)) prot.filter('x < ' + format(x + factorx) + ' and x > ' + format(x - factorx)) prot.filter('y < ' + format(y + factory) + ' and y > ' + format(y - factory)) prot.filter('protein') prot.bonds = prot._getBonds() prot = prepareProteinForAtomtyping(prot) prot.set(value='Se', field='element', sel='name SE') protChannels, prot = voxeldescriptors.getChannels(prot) except: try: prot = Molecule(altProtPath) if prot.numAtoms > 50000: factorx = boxsize[0] * 2.5 factory = boxsize[1] * 2.5 factorz = boxsize[2] * 2.5 prot.filter('z < ' + format(z + factorz) + ' and z > ' + format(z - factorz)) prot.filter('x < ' + format(x + factorx) + ' and x > ' + format(x - factorx)) prot.filter('y < ' + format(y + factory) + ' and y > ' + format(y - factory)) prot.filter('protein') prot.bonds = prot._getBonds() prot = prepareProteinForAtomtyping(prot) prot.set(value='Se', field='element', sel='name SE') protChannels, prot = voxeldescriptors.getChannels(prot) except: try: prot = Molecule(protPath) if prot.numAtoms > 50000: factorx = boxsize[0] * 2.5 factory = boxsize[1] * 2.5 factorz = boxsize[2] * 2.5 prot.filter('z < ' + format(z + factorz) + ' and z > ' + format(z - factorz)) prot.filter('x < ' + format(x + factorx) + ' and x > ' + format(x - factorx)) prot.filter('y < ' + format(y + factory) + ' and y > ' + format(y - factory)) prot.filter('protein') prot.filter('not resname 3EB') prot = proteinPrepare(prot) prot = autoSegment(prot) # Residues are not supported try: prot.mutateResidue('resname TPO', 'THR') except: pass try: prot.mutateResidue('resname MSE', 'MET') except: pass try: prot.mutateResidue('resname SEP', 'SER') except: pass prot = charmm.build(prot, ionize=False) protChannels, prot = voxeldescriptors.getChannels(prot) except: f = open("../../Data/prep_log.txt", "a") f.writelines('Protein ' + protPath + ' leads to errors! Proteinnumber: ' + str(number) + '\n') f.close() protChannels = None features['protChannels'] = protChannels features['prot'] = prot return features
def calcProtVoxel(self, x, y, z, protPath, number, altProtPath): try: prot = Molecule(protPath) if prot.numAtoms > 50000: factorx = self.boxsize[0] * 2.5 factory = self.boxsize[1] * 2.5 factorz = self.boxsize[2] * 2.5 prot.filter('z < ' + format(z + factorz) + ' and z > ' + format(z - factorz)) prot.filter('x < ' + format(x + factorx) + ' and x > ' + format(x - factorx)) prot.filter('y < ' + format(y + factory) + ' and y > ' + format(y - factory)) prot.filter('protein') prot.bonds = prot._getBonds() prot = prepareProteinForAtomtyping(prot) prot.set(value='Se', field='element', sel='name SE') f, c, n = voxeldescriptors.getVoxelDescriptors( prot, center=[x, y, z], boxsize=self.boxsize) except: try: prot = Molecule(protPath) if prot.numAtoms > 50000: factorx = self.boxsize[0] * 2.5 factory = self.boxsize[1] * 2.5 factorz = self.boxsize[2] * 2.5 prot.filter('z < ' + format(z + factorz) + ' and z > ' + format(z - factorz)) prot.filter('x < ' + format(x + factorx) + ' and x > ' + format(x - factorx)) prot.filter('y < ' + format(y + factory) + ' and y > ' + format(y - factory)) prot.filter('protein') prot.filter('not resname 3EB') prot = proteinPrepare(prot) prot = autoSegment(prot) prot.set(value='Se', field='element', sel='name SE') try: prot.mutateResidue('resname TPO', 'THR') except: pass try: prot.mutateResidue('resname MSE', 'MET') except: pass try: prot.mutateResidue('resname SEP', 'SER') except: pass prot = charmm.build(prot, ionize=False) f, c, n = voxeldescriptors.getVoxelDescriptors( prot, center=[x, y, z], boxsize=self.boxsize) except: try: prot = Molecule(altProtPath) if prot.numAtoms > 50000: factorx = self.boxsize[0] * 2.5 factory = self.boxsize[1] * 2.5 factorz = self.boxsize[2] * 2.5 prot.filter('z < ' + format(z + factorz) + ' and z > ' + format(z - factorz)) prot.filter('x < ' + format(x + factorx) + ' and x > ' + format(x - factorx)) prot.filter('y < ' + format(y + factory) + ' and y > ' + format(y - factory)) prot.filter('protein') prot.bonds = prot._getBonds() prot = prepareProteinForAtomtyping(prot) prot.set(value='Se', field='element', sel='name SE') f, c, n = voxeldescriptors.getVoxelDescriptors( prot, center=[x, y, z], boxsize=self.boxsize) except: f = open("../../Data/prep_log.txt", "a") f.writelines('Protein ' + protPath + ' leads to errors! Proteinnumber: ' + str(number) + '\n') f.close() f = np.random.rand(13824, 8) c = np.random.rand(13824, 3) n = [24, 24, 24] return f, c, n
def get_voxel_data(path_bound,\ list_bound_pdb_file, k, voxel_size): """ Compute a voxel around each resiude of the structure and compute the voxelization """ tut_data = home(dataDir='/home/sdv/m2bi/gollitrault/M2BI/projet_long/src') boxsize = [voxel_size,voxel_size,voxel_size] list_prot_vox = [] parser = PDBParser() for i in range(k): structure_1 = parser.get_structure('test_bound_1',\ path_bound + \ "/templates/" +\ list_bound_pdb_file[i][0:-1]+\ '_1.pdb') structure_2 = parser.get_structure('test_bound_2',\ path_bound +\ "/templates/" +\ list_bound_pdb_file[i][0:-1]+\ '_2.pdb') residues_1 = [r for r in structure_1.get_residues()] residues_2 = [r for r in structure_1.get_residues()] if len(residues_1) == len(get_sequence(structure_1)) and\ len(residues_2) == len(get_sequence(structure_2)): #vox try: prot_1 = Molecule(os.path.join(tut_data,\ path_bound +\ "/templates/" +\ list_bound_pdb_file[i][0:-1]+\ '_1.pdb')) except: return(list_prot_vox) try: prot_2 = Molecule(os.path.join(tut_data,\ path_bound +\ "/templates/" +\ list_bound_pdb_file[i][0:-1]+\ '_2.pdb')) except: return(list_prot_vox) try: prot_1 = prepareProteinForAtomtyping(prot_1) except: return(list_prot_vox) try: prot_2 = prepareProteinForAtomtyping(prot_2) except: return(list_prot_vox) #prot.view(guessBonds=False) for i in range(len(residues_1)):#len(residues_1) try: prot_vox, prot_centers, prot_N = getVoxelDescriptors(\ prot_1,\ boxsize = boxsize,\ center = list(residues_1[i]["CA"].get_vector()),\ validitychecks=False) except: return(list_prot_vox) list_prot_vox.append(prot_vox) for i in range(len(residues_1)):#len(residues_2) try: prot_vox, prot_centers, prot_N = getVoxelDescriptors(\ prot_2,\ boxsize = boxsize,\ center = list(residues_2[i]["CA"].get_vector()),\ validitychecks=False) except: return(list_prot_vox) list_prot_vox.append(prot_vox) make_voxel_npy_data(list_prot_vox) else: print("BAD number of residues do not correspond dont know why") return(list_prot_vox)