예제 #1
0
    def __init__(self,
                 pdb_structure,
                 config,
                 use_cache=None,
                 write_cache=True):
        from moleculekit.molecule import Molecule
        from moleculekit.tools.atomtyper import prepareProteinForAtomtyping
        import os.path

        self.config = config

        use_cache_voxels = use_cache or config['use_cache_voxels']
        file_name = str(os.path.basename(pdb_structure))
        check_oeb = self.config['cache'] + file_name.split(".")[0] + ".npy"
        if use_cache_voxels and os.path.isfile(check_oeb):
            self.prot_vox_t = np.load(check_oeb)
        else:
            prot = Molecule(pdb_structure)

            prot = prepareProteinForAtomtyping(prot, verbose=False)
            prot_vox, prot_centers, prot_N = getVoxelDescriptors(
                prot,
                buffer=0,
                voxelsize=config['voxelsize'],
                boxsize=config['bp_dimension'],
                center=config['bp_centers'],
                validitychecks=False)
            nchannels = prot_vox.shape[1]

            self.prot_vox_t = prot_vox.transpose().reshape(
                [1, nchannels, prot_N[0], prot_N[1], prot_N[2]])

            if write_cache or use_cache_voxels:
                np.save(check_oeb, self.prot_vox_t)
예제 #2
0
def get_voxel_test(test_pdb, k, voxel_size):
    """
    Compute a voxel around each resiude of the structure and compute 
    the voxelization
    """
    tut_data = home(dataDir='/home/sdv/m2bi/gollitrault/M2BI/projet_long/src')
    boxsize = [voxel_size,voxel_size,voxel_size]
    list_prot_vox = []
    parser = PDBParser()
    structure = parser.get_structure('test', test_pdb)
    residues = [r for r in structure.get_residues()]
    if len(residues) == len(get_sequence(structure)):
    
        prot = Molecule(os.path.join(tut_data, test_pdb))
        prot = prepareProteinForAtomtyping(prot)
        for i in range(len(residues)):#len(residues_1)
            prot_vox, prot_centers, prot_N = getVoxelDescriptors(\
                          prot,\
                          boxsize = boxsize,\
                          center = list(residues[i]["CA"].get_vector()),\
                          validitychecks=False)
            list_prot_vox.append(prot_vox)
    else:
        print("BAD number of residues do not correspond dont know why")
        quit()
    return(list_prot_vox)
예제 #3
0
    def __init__(self, pdb_structure, config):
        from moleculekit.molecule import Molecule
        from moleculekit.tools.atomtyper import prepareProteinForAtomtyping
        self.config = config
        prot = Molecule(pdb_structure)
        prot = prepareProteinForAtomtyping(prot, verbose=False)
        prot_vox, prot_centers, prot_N = getVoxelDescriptors(prot, buffer=0, voxelsize=config['voxelsize'], boxsize=config['bp_dimension'],
                                                     center=config['bp_centers'], validitychecks=False)
        nchannels = prot_vox.shape[1]

        self.prot_vox_t = prot_vox.transpose().reshape([1, nchannels, prot_N[0], prot_N[1], prot_N[2]])
예제 #4
0
    def get_voxel(self, path):
        complex = os.path.join(self.root, path)
        for ele in os.listdir(complex):
            if fnmatch.fnmatch(ele, '*_protein.pdb'):
                prot = Molecule(os.path.join(complex, ele))
                prot.filter('protein')

                # If your structure is fully protonated and contains all bond information in prot.bonds skip this step!
                prot = prepareProteinForAtomtyping(prot)

                prot.view(guessBonds=False)
                prot_vox, prot_centers, prot_N = getVoxelDescriptors(
                    prot, boxsize=[24, 24, 24], center=[0, 0, 0], buffer=1)
                prot.view(guessBonds=False)
                viewVoxelFeatures(prot_vox, prot_centers, prot_N)

                nchannels = prot_vox.shape[1]

                prot_vox_t = prot_vox.transpose().reshape(
                    [1, nchannels, prot_N[0], prot_N[1], prot_N[2]])
                prot_vox_t = torch.tensor(prot_vox_t.astype(np.float32))

                for ele in os.listdir(complex):
                    if fnmatch.fnmatch(ele, '*_ligand.mol2'):
                        slig = SmallMol(os.path.join(os.path.join(
                            complex, ele)),
                                        force_reading=True)
                        slig.view(guessBonds=False)

                        # For the ligand since it's small we could increase the voxel resolution if we so desire to 0.5 A instead of the default 1 A.
                        lig_vox, lig_centers, lig_N = getVoxelDescriptors(
                            slig,
                            boxsize=[24, 24, 24],
                            center=[0, 0, 0],
                            voxelsize=1,
                            buffer=1)
                        slig.view(guessBonds=False)
                        viewVoxelFeatures(lig_vox, lig_centers, lig_N)

                        lig_vox_t = lig_vox.transpose().reshape(
                            [1, nchannels, lig_N[0], lig_N[1], lig_N[2]])
                        lig_vox_t = torch.tensor(lig_vox_t.astype(np.float32))

        x = torch.cat((lig_vox_t, prot_vox_t), 1)
        x.squeeze_(0)
        return x
예제 #5
0
    def __init__(self, obs_config: Config):
        """

        :param obs_config
        """
        super().__init__(obs_config)
        self.obs_config = obs_config

        prot = Molecule(self.obs_config.pdb)

        prot = prepareProteinForAtomtyping(prot, verbose=False)
        prot_vox, prot_centers, prot_N = getVoxelDescriptors(prot, buffer=0, voxelsize=self.obs_config.voxelsize,
                                                             boxsize=self.obs_config.boxsize,
                                                             center=self.obs_config.centers,
                                                             method=self.obs_config.method,
                                                             validitychecks=self.obs_config.validity_check)

        self.prot_vox_t = self.reshape(prot_vox, prot_N)
예제 #6
0
def compute_voxel_features(mutation_site, pdb_file, verbose=False, 
        boxsize=16, voxelsize=1, rotations=None):
    """Compute voxel features around the mutation site.

    Parameters
    ----------
    pdbqt_wt : AutoDock PDBQT file
        AutoDock PDBQT file of the wild-type protein.
    pdbqt_mt : AutoDock PDBQT file
        AutoDock PDBQT file of the mutant protein.
    mutation_site : int
        Residue sequence position where the mutation took place.
    rotations : list
        Rotation angles in radian around x, y, and z axes.

    Returns
    -------
    NumPy nd-array

    """
    mol = Molecule(pdb_file)
    prot = prepareProteinForAtomtyping(mol, verbose=verbose)
    center = mol.get('coords', 'resid ' + str(mutation_site) + ' and name CB')
    # center_wt = compute_interaction_center(pdb_file_wt, mutation_site)
    if center.size == 0:
        center = mol.get('coords', 'resid ' + str(mutation_site) + ' and name CA')
    #
    if rotations is None:
        features, _, _ = getVoxelDescriptors(prot, center=center.flatten(), 
                boxsize=[boxsize, boxsize, boxsize], voxelsize=voxelsize, validitychecks=False)
    else:
        voxel_centers = getCenters(prot, boxsize=[boxsize, boxsize, boxsize], 
                center=center.flatten(), voxelsize=voxelsize)
        rotated_voxel_centers = rotateCoordinates(voxel_centers[0], rotations, center.flatten())
        features, _ = getVoxelDescriptors(prot, usercenters=rotated_voxel_centers, validitychecks=False)
    # return features
    nchannels = features.shape[1]
    n_voxels = int(boxsize / voxelsize)
    features = features.transpose().reshape((nchannels, n_voxels, n_voxels, n_voxels))

    return features
def generateProteinDescriptor(pdb_file,voxel_size=64,mode='standard'): 
    # Remove HetAtoms
    pdb_id = os.path.basename(pdb_file).split('.')[0]
    # If your structure is fully protonated and contains all bond information in prot.bonds skip this step!
    if mode != 'scPDB':
        pdb_file_nonhet = removeHETAtoms(pdb_file,pdb_id)
        prot = Molecule(pdb_file_nonhet)
        prot = prepareProteinForAtomtyping(prot)
        prot.center()
        prot.write(pdb_file_nonhet)
        prot = Molecule(pdb_file_nonhet)
        prot.view(guessBonds=False)
    else:
        prot = SmallMol(pdb_file)     
    try:
        prot_vox, prot_centers, prot_N = getVoxelDescriptors(prot,voxelsize=1, buffer=1,center=(0,0,0),boxsize=(voxel_size,voxel_size,voxel_size))
    except:
        return False
    nchannels = prot_vox.shape[1]
    # Reshape Voxels
    prot_vox_t = prot_vox.transpose().reshape([1, nchannels, prot_N[0], prot_N[1], prot_N[2]]) 
    return prot_vox_t
예제 #8
0
    def calcFeatures(number, ligPath, altLigPath, protPath, altProtPath,
                     boxsize, targetpath):
        features = {}
        try:
            sm = SmallMol(ligPath, force_reading=True, fixHs=False)
            x = np.mean(sm.get('coords')[:, 0])
            y = np.mean(sm.get('coords')[:, 1])
            z = np.mean(sm.get('coords')[:, 2])
            smallChannels, sm = voxeldescriptors.getChannels(sm)
        except:
            sm = SmallMol(altLigPath, force_reading=True, fixHs=False)
            x = np.mean(sm.get('coords')[:, 0])
            y = np.mean(sm.get('coords')[:, 1])
            z = np.mean(sm.get('coords')[:, 2])
            smallChannels, sm = voxeldescriptors.getChannels(sm)
        features['smallChannels'] = smallChannels
        features['sm'] = sm
        try:
            prot = Molecule(protPath)
            if prot.numAtoms > 50000:
                factorx = boxsize[0] * 2.5
                factory = boxsize[1] * 2.5
                factorz = boxsize[2] * 2.5
                prot.filter('z < ' + format(z + factorz) + ' and z > ' +
                            format(z - factorz))
                prot.filter('x < ' + format(x + factorx) + ' and x > ' +
                            format(x - factorx))
                prot.filter('y < ' + format(y + factory) + ' and y > ' +
                            format(y - factory))
            prot.filter('protein')
            prot.bonds = prot._getBonds()
            prot = prepareProteinForAtomtyping(prot)
            prot.set(value='Se', field='element', sel='name SE')
            protChannels, prot = voxeldescriptors.getChannels(prot)

        except:
            try:
                prot = Molecule(altProtPath)
                if prot.numAtoms > 50000:
                    factorx = boxsize[0] * 2.5
                    factory = boxsize[1] * 2.5
                    factorz = boxsize[2] * 2.5
                    prot.filter('z < ' + format(z + factorz) + ' and z > ' +
                                format(z - factorz))
                    prot.filter('x < ' + format(x + factorx) + ' and x > ' +
                                format(x - factorx))
                    prot.filter('y < ' + format(y + factory) + ' and y > ' +
                                format(y - factory))
                prot.filter('protein')
                prot.bonds = prot._getBonds()
                prot = prepareProteinForAtomtyping(prot)
                prot.set(value='Se', field='element', sel='name SE')
                protChannels, prot = voxeldescriptors.getChannels(prot)
            except:
                try:
                    prot = Molecule(protPath)
                    if prot.numAtoms > 50000:
                        factorx = boxsize[0] * 2.5
                        factory = boxsize[1] * 2.5
                        factorz = boxsize[2] * 2.5
                        prot.filter('z < ' + format(z + factorz) +
                                    ' and z > ' + format(z - factorz))
                        prot.filter('x < ' + format(x + factorx) +
                                    ' and x > ' + format(x - factorx))
                        prot.filter('y < ' + format(y + factory) +
                                    ' and y > ' + format(y - factory))
                    prot.filter('protein')
                    prot.filter('not resname 3EB')
                    prot = proteinPrepare(prot)
                    prot = autoSegment(prot)
                    # Residues are not supported
                    try:
                        prot.mutateResidue('resname TPO', 'THR')
                    except:
                        pass
                    try:
                        prot.mutateResidue('resname MSE', 'MET')
                    except:
                        pass
                    try:
                        prot.mutateResidue('resname SEP', 'SER')
                    except:
                        pass
                    prot = charmm.build(prot, ionize=False)
                    protChannels, prot = voxeldescriptors.getChannels(prot)
                except:
                    f = open("../../Data/prep_log.txt", "a")
                    f.writelines('Protein ' + protPath +
                                 ' leads to errors! Proteinnumber: ' +
                                 str(number) + '\n')
                    f.close()
                    protChannels = None
        features['protChannels'] = protChannels
        features['prot'] = prot
        return features
예제 #9
0
    def calcProtVoxel(self, x, y, z, protPath, number, altProtPath):
        try:
            prot = Molecule(protPath)
            if prot.numAtoms > 50000:
                factorx = self.boxsize[0] * 2.5
                factory = self.boxsize[1] * 2.5
                factorz = self.boxsize[2] * 2.5
                prot.filter('z < ' + format(z + factorz) + ' and z > ' +
                            format(z - factorz))
                prot.filter('x < ' + format(x + factorx) + ' and x > ' +
                            format(x - factorx))
                prot.filter('y < ' + format(y + factory) + ' and y > ' +
                            format(y - factory))
            prot.filter('protein')
            prot.bonds = prot._getBonds()
            prot = prepareProteinForAtomtyping(prot)
            prot.set(value='Se', field='element', sel='name SE')
            f, c, n = voxeldescriptors.getVoxelDescriptors(
                prot, center=[x, y, z], boxsize=self.boxsize)

        except:
            try:
                prot = Molecule(protPath)
                if prot.numAtoms > 50000:
                    factorx = self.boxsize[0] * 2.5
                    factory = self.boxsize[1] * 2.5
                    factorz = self.boxsize[2] * 2.5
                    prot.filter('z < ' + format(z + factorz) + ' and z > ' +
                                format(z - factorz))
                    prot.filter('x < ' + format(x + factorx) + ' and x > ' +
                                format(x - factorx))
                    prot.filter('y < ' + format(y + factory) + ' and y > ' +
                                format(y - factory))
                prot.filter('protein')
                prot.filter('not resname 3EB')
                prot = proteinPrepare(prot)
                prot = autoSegment(prot)
                prot.set(value='Se', field='element', sel='name SE')
                try:
                    prot.mutateResidue('resname TPO', 'THR')
                except:
                    pass
                try:
                    prot.mutateResidue('resname MSE', 'MET')
                except:
                    pass
                try:
                    prot.mutateResidue('resname SEP', 'SER')
                except:
                    pass
                prot = charmm.build(prot, ionize=False)
                f, c, n = voxeldescriptors.getVoxelDescriptors(
                    prot, center=[x, y, z], boxsize=self.boxsize)
            except:
                try:
                    prot = Molecule(altProtPath)
                    if prot.numAtoms > 50000:
                        factorx = self.boxsize[0] * 2.5
                        factory = self.boxsize[1] * 2.5
                        factorz = self.boxsize[2] * 2.5
                        prot.filter('z < ' + format(z + factorz) +
                                    ' and z > ' + format(z - factorz))
                        prot.filter('x < ' + format(x + factorx) +
                                    ' and x > ' + format(x - factorx))
                        prot.filter('y < ' + format(y + factory) +
                                    ' and y > ' + format(y - factory))
                    prot.filter('protein')
                    prot.bonds = prot._getBonds()
                    prot = prepareProteinForAtomtyping(prot)
                    prot.set(value='Se', field='element', sel='name SE')
                    f, c, n = voxeldescriptors.getVoxelDescriptors(
                        prot, center=[x, y, z], boxsize=self.boxsize)
                except:
                    f = open("../../Data/prep_log.txt", "a")
                    f.writelines('Protein ' + protPath +
                                 ' leads to errors! Proteinnumber: ' +
                                 str(number) + '\n')
                    f.close()
                    f = np.random.rand(13824, 8)
                    c = np.random.rand(13824, 3)
                    n = [24, 24, 24]
        return f, c, n
예제 #10
0
def get_voxel_data(path_bound,\
                   list_bound_pdb_file, k, voxel_size):
    """
    Compute a voxel around each resiude of the structure and compute 
    the voxelization
    """
    tut_data = home(dataDir='/home/sdv/m2bi/gollitrault/M2BI/projet_long/src')
    boxsize = [voxel_size,voxel_size,voxel_size]
    list_prot_vox = []
    parser = PDBParser()
    for i in range(k):

        structure_1 = parser.get_structure('test_bound_1',\
                                            path_bound + \
                                            "/templates/" +\
                                            list_bound_pdb_file[i][0:-1]+\
                                            '_1.pdb')
        structure_2 = parser.get_structure('test_bound_2',\
                                            path_bound +\
                                            "/templates/" +\
                                            list_bound_pdb_file[i][0:-1]+\
                                            '_2.pdb')
        
        residues_1 = [r for r in structure_1.get_residues()]
        residues_2 = [r for r in structure_1.get_residues()]
        if len(residues_1) == len(get_sequence(structure_1)) and\
           len(residues_2) == len(get_sequence(structure_2)):
            #vox
            try:
                prot_1 = Molecule(os.path.join(tut_data,\
                                               path_bound +\
                                               "/templates/" +\
                                               list_bound_pdb_file[i][0:-1]+\
                                               '_1.pdb'))
            except:
                return(list_prot_vox)
            try:
                prot_2 = Molecule(os.path.join(tut_data,\
                                               path_bound +\
                                               "/templates/" +\
                                               list_bound_pdb_file[i][0:-1]+\
                                               '_2.pdb'))
            except:
                return(list_prot_vox)
            
            try:
                prot_1 = prepareProteinForAtomtyping(prot_1)
            except:
                return(list_prot_vox)
            
            try:
                prot_2 = prepareProteinForAtomtyping(prot_2)
            except:
                return(list_prot_vox)
            #prot.view(guessBonds=False)

            for i in range(len(residues_1)):#len(residues_1)
                try:
                    prot_vox, prot_centers, prot_N = getVoxelDescriptors(\
                              prot_1,\
                              boxsize = boxsize,\
                              center = list(residues_1[i]["CA"].get_vector()),\
                              validitychecks=False)
                except:
                    return(list_prot_vox)
                list_prot_vox.append(prot_vox)
                
            for i in range(len(residues_1)):#len(residues_2)
                try:
                    prot_vox, prot_centers, prot_N = getVoxelDescriptors(\
                              prot_2,\
                              boxsize = boxsize,\
                              center = list(residues_2[i]["CA"].get_vector()),\
                              validitychecks=False)
                except:
                    return(list_prot_vox)
                list_prot_vox.append(prot_vox)
            make_voxel_npy_data(list_prot_vox)
        else:
            print("BAD number of residues do not correspond dont know why")
    
    return(list_prot_vox)