예제 #1
0
    def _add_colors(self):
        h_native = prody.HierView(self._native)
        h_prediction = prody.HierView(self._prediction)

        for (native_res, pred_res) in zip(h_native.iterResidues(),
                                          h_prediction.iterResidues()):
            native_coords = native_res.getCoords()
            pred_coords = pred_res.getCoords()

            d = numpy.linalg.norm(native_coords - pred_coords)
            pred_res.setTempFactors(d)
 def must_be_filtered(cls, pdb):
     """
     Checks if the structure has at least n proteic chains.
     """
     hw = prody.HierView(pdb.select("protein"))
     
     return len(set([chain.getSequence() for chain in hw.iterChains()])) != 1
예제 #3
0
    def register_all_ligand_onsite(self, hetero_part, OUT=True):
        for pick_one in pd.HierView(hetero_part).iterResidues():
            # less than 3 atoms may be not ok
            if pick_one.numAtoms() <= 3:
                continue

            self.bundle_ligand_data(pick_one, fake_ligand=False, OUT=OUT)
예제 #4
0
    def get_residue_onfly(self, resid):
        '''

        :param resid:
        :return:
        '''
        for pick_one in pd.HierView(self.hetero).iterResidues():
            # less than 3 atoms may be not ok
            if str(pick_one.getResindex()) == resid:
                print 'here'
                self.bundle_ligand_data(pick_one, fake_ligand=False, OUT=True)
def process_water_structures(initial_pdb, main_chains, ligand):
    """
    Detects the waters we have to keep (important for the simulation) and returns 
    a structure holding them.
    Important waters are the ones closer to Template residue 50 (Ile), the aa is not 
    but it is not guaranteed to be conserved, which means we have to rely into the 
    residue number to choose it, and take any offset into account if needed.
    
    Extra: water molecules must be also close to the binding site. We will pick then the 
    water that has minimum distance to the binding site and residue 50
    
    :param initial_pdb: The pdb (prody structure) we want to extract the chains.
    
    :return: A dictionary indexed by the water id (res. num. + chain id) holding the prody pdb
    structure of that water.
    """
    hw = prody.HierView(initial_pdb.select("protein"))
    water_structs = {}
    for chain in hw.iterChains():
        if chain.getChid() in main_chains:
            # We cannot do a direct selection, instead we iterate
            for i, residue in enumerate(chain.iterResidues()):
                if i == 50:  # 50th residue
                    break

            residue_com = prody.calcCenter(residue)

            if ligand is None:
                ligand_com = prody.calcCenter(initial_pdb)
            else:
                ligand_com = prody.calcCenter(ligand)

            # Identify closer water
            waters = initial_pdb.select("name O and water")
            if waters is not None:
                distance_to_R50 = numpy.sqrt(
                    ((residue_com - waters.getCoords())**2).sum(axis=1))
                distance_to_BindSite = numpy.sqrt(
                    ((ligand_com - waters.getCoords())**2).sum(axis=1))
                distances = distance_to_R50 + distance_to_BindSite
                min_dist = numpy.min(distances)
                min_dist_index = numpy.where(distances == min_dist)
                water_resnum = waters.getResnums()[min_dist_index]
                water_chid = waters.getChids()[min_dist_index][0]
                water_id = "%d:%s" % (water_resnum, water_chid)
                # We use a dict in order to get rid of repeats
                selection_string = "resnum %d and chain %s" % (water_resnum,
                                                               water_chid)
                water_structs[water_id] = initial_pdb.water.select(
                    selection_string).copy()

    return water_structs
예제 #6
0
 def align_and_color(self, native, prediction):
     '''
     Performs alignment and assigns energies to B-factor column.
     '''
     self.align(native, prediction)
     h = hamiltonian.EDENMHamiltonian(self._native.getCoords())
     energy = h.evaluate_energy(self._prediction.getCoords())
     energy_matrix = h.get_energy_matrix()
     atom_energy = numpy.sum(energy_matrix, axis=0)
     hier_view = prody.HierView(self._prediction)
     for index, residue in enumerate(hier_view.iterResidues()):
         residue.setBetas(atom_energy[index])
     return self._align_results, self._native, self._prediction
예제 #7
0
def split_structure(pdb_path):
    pdb_name = os.path.basename(pdb_path).split('.')[0].lower()

    try:
        parsed = prody.parsePDB(pdb_path)
    except Exception as e:
        log('parse_failed.log', '{},{}\n'.format(pdb_name, str(e)))
        return

    hetero = parsed.select(
        '(hetero and not water) or resname ATP or resname ADP or sesname AMP or resname GTP or resname GDP or resname GMP'
    )
    receptor = parsed.select('protein or nucleic')
    if receptor is None:
        log("select_failed.log",
            "{},doesn't have receptor.\n".format(pdb_name))
        return
    if hetero is None:
        log("select_failed.log", "{},doesn't have ligand.\n".format(pdb_name))
        return

    # write ligand into file
    ligand_flags = False
    for each in prody.HierView(hetero).iterResidues():
        if each.select(
                'not hydrogen').numAtoms() < config.heavy_atom_threshold:
            continue
        else:
            ligand_flags = True
            ResId = each.getResindex()
            ligand_path = os.path.join(
                config.splited_ligand_folder, pdb_name,
                "{}_{}_ligand.pdb".format(pdb_name, ResId))
            mkdir(os.path.dirname(ligand_path))
            prody.writePDB(ligand_path, each)

    # if have valid ligand, write down receptor
    if ligand_flags:
        receptor_path = os.path.join(config.splited_receptor_folder,
                                     pdb_name + '.pdb')
        prody.writePDB(receptor_path, receptor)
    else:
        log(
            "threshold_failed.log",
            "{}, no ligand above threshold {}.\n".format(
                pdb_name, config.heavy_atom_threshold))
예제 #8
0
def get_protein_sequence(pdb):
    """
    Generates the 1 letter per residue sequence for a protein. Uses a dictionary that maps the 3 letter naming with the 1 letter naming convention
    Source:
        - Biskit (http://biskit.pasteur.fr/)

    @param pdb: A prody pdb data structure.

    @return: A string with the sequence of this protein.
    """
    # One-liner just for the sake of the challenge
    return "".join([
        aa_dic_standard[resname] if resname in aa_dic_standard else "X"
        for resname in [
            residue.getResname().lower()
            for residue in prody.HierView(pdb).iterResidues()
        ]
    ])
def choose_main_chains(initial_pdb):
    """
    We can have complexes attached to the chain or even duplicated chains
    that cover the same space (ex. in the same model, A and B are one structure
    and C and B form a duplicated protein). We only have to leave two of that 
    main chains, and that's what this function does :) .
    
    :param initial_pdb: The pdb (prody structure) we want to extract the chains.
    
    :return: An array containing the chain ids of the main chains.
    """
    hw = prody.HierView(initial_pdb.select("protein"))
    chain_lengths = []
    for chain in hw.iterChains():
        chain_lengths.append((len(chain.getSequence()), chain.getChid()))

    leave_chains = sorted(chain_lengths)[-2:]
    leave_chains = [chain_id for _, chain_id in leave_chains]
    return leave_chains
예제 #10
0
    def __init__(self, PDB, filepos=None):
        self.PDBname = PDB
        self.heterodict = {}
        self.ct = 0
        self.sequence = ''

        # filepos is to determine whether we download pdb files from wwPDB
        # or use what we have
        # Using downloaded is better
        try:
            if filepos is not None:
                parse = pd.parsePDB(filepos)
            else:
                parse = pd.parsePDB(PDB)
        except:
            # raise IOError
            logging.warning(
                'PDB {} is ignored due to file-not-found error'.format(PDB))
            return

        if not os.path.exists('data/' + PDB):
            os.mkdir('data/' + PDB)
        pd.writePDB('data/{0}/{0}.pdb'.format(PDB), parse)

        receptor = parse.select('protein')
        pd.writePDB('data/{0}/{0}_hydro_receptor.pdb'.format(PDB), receptor)
        repair_pdbfile('data/{0}/{0}_hydro_receptor.pdb'.format(PDB), PDB)

        hetero = parse.select(
            '(hetero and not water) or resname ATP or resname ADP')

        for pick_one in pd.HierView(hetero).iterResidues():
            # less than 3 atoms may be not ok
            if pick_one.numAtoms() <= 3:
                continue

            ResId = str(pick_one.getResindex())

            # Extract this ligand from protein (as input for openbabel)
            filename = 'data/{0}/{0}_{1}_ligand.pdb'.format(PDB, ResId)

            if not os.path.exists(filename):
                pd.writePDB(filename, pick_one)
예제 #11
0
파일: downloadPDB.py 프로젝트: LXander/dock
    def downloads(self,item):
        download_address = self.get_address(item)
        if os.path.exists(os.path.join(FLAGS.rowdata_folder,item+'.pdb')):
            print item," exists"
            return None
        print 'download ',item
        os.system('wget -P {}  {}'.format(FLAGS.rowdata_folder,download_address))

        pdbname = item.lower()
        ligand_folder = os.path.join(FLAGS.splited_ligand_folder,pdbname)
        try_create_chain_folder(ligand_folder)

        try:
            parsed = prody.parsePDB(os.path.join(FLAGS.rowdata_folder,item+'.pdb'))
        except:
            self.error_log('can not parse {}.\n'.format(item))
            return None
        
        hetero = parsed.select('(hetero and not water) or resname ATP or resname ADP or sesname AMP or resname GTP or resname GDP or resname GMP')
        receptor = parsed.select('protein or nucleic')
        if receptor is None:
            self.error_log("{} doesn't have receptor.\n".format(item))
            return None
        if hetero is None:
            self.error_log("{} doesn't have ligand.\n".format(item))
            return None
        ligand_flags = False
        for each in prody.HierView(hetero).iterResidues():
            if each.numAtoms() <= 10:
                continue
            else:
                ligand_flags = True
                ResId = each.getResindex()
                ligand_path = os.path.join(FLAGS.splited_ligand_folder,pdbname,"{}_{}_ligand.pdb".format(pdbname,ResId))
                try_create_chain_parent_folder(ligand_path)
                prody.writePDB(ligand_path,each)

        if ligand_flags:
            receptor_path = os.path.join(FLAGS.splited_receptor_folder,pdbname+'.pdb')
            prody.writePDB(receptor_path,receptor)
        else:
            self.error_log("{} doesn't convert, not ligand have more than 10 atoms.\n".format(item))
def curate_struct(initial_pdb, main_chains, pdb_alignment, parameters):
    """
    Returns the "curated" pdb. A curated pdb has potentially 2 waters around residue 
    50 of each chain, a ligand and two main (symmetric) chains; everything else must be 
    deleted. This function will work even in the case that the 2 later are not present, 
    which can happen when processing any of the "mandatory" structures (those can pass 
    the filters automatically).
    
    :param initial_pdb: The prody pdb structure we want to extract the chains.
    
    :return: The "curated" pdb and the ligand
    """
    # Get chain info (without ligand or waters)
    hw = prody.HierView(initial_pdb.select("protein"))
    pdb_alignment["pdb"]["num_chains"] = hw.numChains()

    # Pick main chains
    prot_struct = initial_pdb.select(
        CurationSelections.PROTEIN_CHAIN_TEMPLATE %
        (" ".join(main_chains))).copy()

    # Add the ligand (if found), must be part of other chains (not main_chains)
    ligand_struct = initial_pdb.select(CurationSelections.LIGAND_SELECTION)
    if ligand_struct is not None and ligand_struct.numAtoms(
    ) >= parameters["min_ligand_atoms"]:
        tmp_struct = prot_struct + ligand_struct.copy()
    else:
        tmp_struct = prot_struct

    # Add "important" waters, if found
    water_structs = process_water_structures(initial_pdb, main_chains,
                                             ligand_struct)
    pdb_alignment["pdb"]["waters"] = water_structs.keys(
    )  # Keep track of added waters in the alignment file
    for water_id in water_structs:
        tmp_struct = tmp_struct + water_structs[water_id]

    return tmp_struct, ligand_struct
 def must_be_filtered(cls, pdb, num_chains):
     """
     Checks if the structure has at least n proteic chains.
     """
     hw = prody.HierView(pdb.select("protein"))
     return hw.numChains() != num_chains
예제 #14
0
    def __init__(self, PDB, filepos=None, OUT=True, **kwargs):
        '''

        :param PDB: name of PDB
        :param filepos: directory of where PDB file stores
        :param OUT: if true, splitted files will be output in './data' folder
        :param kwargs: for further extension
        '''
        self.PDBname = PDB
        self.heterodict = {}
        self.ct = 0
        self.sequence = {}
        self.pure_protein = None
        self.pure_nucleic = None
        self.pdb_filename = filepos.split('/')[-1]

        if 'BOX' in kwargs:
            self.BOX_range = kwargs['BOX']
        else:
            self.BOX_range = 20
        if 'Size' in kwargs:
            self.BOX_size = kwargs['Size']
        else:
            self.BOX_size = 1

        pdb_store_dir = os.path.join(temp_pdb_PREFIX, PDB)

        if not os.path.exists(pdb_store_dir):
            os.mkdir(pdb_store_dir)

        # filepos is to determine whether we download pdb files from wwPDB
        # or use what we have
        # Using downloaded is better
        # parse header for first time

        try:
            if filepos is not None:
                parse, header = pd.parsePDB(filepos, header=True)
            else:
                parse, header = pd.parsePDB(PDB, header=True)
                filepos = PDB + '.pdb.gz'
        except:
            #raise IOError
            print filepos
            logging.warning(
                'PDB {} is ignored due to file-not-found error'.format(PDB))
            return
        #Save resolution
        try:
            self.resolution = header['resolution']
        except:
            self.resolution = 'NA'

        #Copy the file

        self.pure_protein = parse.select('protein')
        self.pure_nucleic = parse.select('nucleic')

        # dirty way to throw away nucleic one
        if self.pure_nucleic is not None:
            return
        copy_pdbfile(filepos,
                     pdb_store_dir + '/{0}.pdb'.format(PDB),
                     zipped=filepos.split('.')[-1] == 'gz')

        #repair by guess, i think
        repair_pdbfile(pdb_store_dir + '/{0}.pdb'.format(PDB), PDB)
        #Generating sequence here
        #storage = []
        #split files by chain
        try:
            parse = pd.parsePDB(pdb_store_dir + '/{0}.pdb'.format(PDB))
        except:
            raise IOError('Cannot parse added H')

        self.chain_list = []
        for chain in parse.getHierView():
            #print chain
            #for seq in storage:
            #    if chain.getSequence()==seq:
            #        continue
            self.chain_list.append(chain.getChid())
            self.sequence[chain.getChid()] = chain.getSequence()
            #storage.append(chain.getSequence())

        #now try to fix the pdb from autodock tools

        hetero = parse.select(
            '(hetero and not water) or resname ATP or resname ADP')

        other = parse.select('protein or nucleic')
        self.receptor = other

        # print parse.numAtoms(), hetero.numAtoms(), other.numAtoms()

        # if OUT:
        if other is not None:
            pd.writePDB(pdb_store_dir + '/{0}_receptor.pdb'.format(PDB), other)
            #repair_pdbfile('data/{0}/{0}_receptor.pdb'.format(PDB),PDB)
        else:
            return
        # Make vectors for every single hetero parts
        # Their values will be stored in a dict

        for pick_one in pd.HierView(hetero).iterResidues():
            # less than 3 atoms may be not ok
            if pick_one.numAtoms() <= 3:
                continue

            self.bundle_ligand_data(pick_one, fake_ligand=False, OUT=OUT)
예제 #15
0
파일: docking.py 프로젝트: LXander/dock
    def downloads(self, item):
        '''
        Download pdb from rcsb and split it into receptor and ligand
        :param item: 4 letter PDB ID '3EML'
        :return:
        '''

        # Download pdb to rowdata_folder
        download_address = 'https://files.rcsb.org/download/' + item + '.pdb'
        os.system('wget -P {} {}'.format(FLAGS.rowdata_folder,
                                         download_address))

        # create folder to store ligand
        pdbname = item.lower()
        ligand_folder = os.path.join(FLAGS.splited_ligand_folder, pdbname)
        if not os.path.exists(ligand_folder):
            os.mkdir(ligand_folder)

        # parse pdb
        try:
            parsed = prody.parsePDB(
                os.path.join(FLAGS.rowdata_folder, item + '.pdb'))
        except:
            self.error_log('can not parse {}.\n'.format(item))
            return None

        # select receptor and ligand
        hetero = parsed.select(
            '(hetero and not water) or resname ATP or resname ADP')
        receptor = parsed.select('protein or nucleic')

        if receptor is None:
            self.error_log("{} doesn't have receptor.\n".format(item))
            return None

        if hetero is None:
            self.error_log("{} doesn't have ligand.\n".format(item))
            return None

        ligand_flags = False

        for each in prody.HierView(hetero).iterResidues():
            if each.numAtoms() <= FLAGS.atom_num_threahold:
                # ignore ligand if atom num is less than threshold
                continue
            else:
                ligand_flags = True
                ResId = each.getResindex()
                ligand_path = os.path.join(
                    FLAGS.splited_ligand_folder, pdbname,
                    "{}_{}_ligand.pdb".format(pdbname, ResId))
                if not os.path.exists(os.path.dirname(ligand_path)):
                    os.mkdir(os.path.dirname(ligand_path))
                prody.writePDB(ligand_path, each)

        if ligand_flags:
            receptor_path = os.path.join(FLAGS.splited_receptor_folder,
                                         pdbname + '.pdb')
            prody.writePDB(receptor_path, receptor)
        else:
            self.error_log(
                "{} doesn't convert, no ligand have more than 10 atoms.\n")
예제 #16
0
def get_str(index):
    structure.setACSIndex(index)
    return structure


#Pairs phi/psi lists by residue for a given frame
def phi_psi_pair(phis, psis):

    return 1


#Generates lists of phi and psi angles for all frames
with mp.Pool() as pool:
    frame_list = pool.map(lambda x: int(x), frame_list)
    structure_list = pool.map(lambda x: get_str(x), frame_list)
    hier_list = pool.map(lambda x: prd.HierView(x), structure_list)
    res_list = map(lambda x, y: x.getResidue('A', y), hier_list, core_res)
    phi_list = pool.map(lambda x: prd.calcPhi(x), res_list)
    res_list = map(lambda x, y: x.getResidue('A', y), hier_list, core_res)
    psi_list = pool.map(lambda x: prd.calcPsi(x), res_list)
    phi_list = list(phi_list)
    psi_list = list(psi_list)

#Generates the columns and rows for a dataframe to store all angles
clmns = []
rows = {}
for i in range(len(init_core_res)):
    clmns.append('phi' f'{i+1}')
    clmns.append('psi' f'{i+1}')

#Generates a dataframe and stores all angle values
예제 #17
0
파일: downloader.py 프로젝트: ellsh/core
    def downloads(self, item):  #4 name of the function is not informative
        '''
        Download pdb from rcsb and split it into receptor and ligand
        :param item: 4 letter PDB ID '3EML'
        :return:
        '''

        # Download pdb to rowdata_folder
        download_address = self.get_address(item)
        os.system('wget -P {} {}'.format(FLAGS.rowdata_folder,
                                         download_address))

        # create folder to store ligand
        pdbname = item.lower()
        ligand_folder = os.path.join(FLAGS.splited_ligand_folder, pdbname)
        if not os.path.exists(ligand_folder):
            os.mkdir(ligand_folder)

        # parse pdb
        try:
            parsed = prody.parsePDB(
                os.path.join(FLAGS.rowdata_folder, item + '.pdb'))
        except:
            self.error_log('can not parse {}.\n'.format(item))
            return None

        # select receptor and ligand
        hetero = parsed.select(
            '(hetero and not water) or resname ATP or resname ADP')
        receptor = parsed.select('protein or nucleic')

        if receptor is None:
            self.error_log("{} doesn't have receptor.\n".format(item))
            return None

        if hetero is None:
            self.error_log("{} doesn't have ligand.\n".format(item))
            return None

        #5 I would create a printable class "statistics"

        ligand_flags = False

        for each in prody.HierView(hetero).iterResidues():
            if each.numAtoms(
            ) <= FLAGS.atom_num_threahold:  # 6there will be many thresholds
                # let's organize them together into a class FLAGS
                # ignore ligand if atom num is less than threshold
                continue
            else:
                ligand_flags = True
                ResId = each.getResindex()
                ligand_path = os.path.join(
                    FLAGS.splited_ligand_folder, pdbname,
                    "{}_{}_ligand.pdb".format(pdbname, ResId))
                if not os.path.exists(os.path.dirname(ligand_path)):
                    os.mkdir(os.path.dirname(ligand_path))
                prody.writePDB(ligand_path, each)

        if ligand_flags:
            receptor_path = os.path.join(
                FLAGS.splited_receptor_folder,
                pdbname + '.pdb')  # 7 splited receptor folder is a bad name
            prody.writePDB(receptor_path, receptor)
        else:
            self.error_log(
                "{} doesn't convert, no ligand have more than 10 atoms.\n"
            )  #8 look at #5 single class "statistics" would help