コード例 #1
0
    def _get_proteins_by_structure(self, pdb_structure, model, file_path):
        """
            _get_proteins_by_structure: Given a pdb_structure, parse the essential protein data
        """
        ppb = PPBuilder()
        protein_data = []

        # Parse for the chain_id and chain sequence
        for c_ele in pdb_structure.get_chains():
            if (c_ele):
                c_ppd_list = []
                for c_ppd in ppb.build_peptides(c_ele):
                    c_pp_seq = str(c_ppd.get_sequence())
                    c_ppd_list.append(c_pp_seq)
                c_seq = ''.join(c_ppd_list)
                protein_data.append({
                    'id':
                    os.path.basename(file_path),
                    'model_id':
                    model,
                    'chain_id':
                    c_ele.get_id(),
                    'sequence':
                    c_seq,
                    'md5':
                    hashlib.md5(c_seq.encode()).hexdigest()
                })

        return protein_data
コード例 #2
0
ファイル: showPymolPath.py プロジェクト: rsanchezgarc/BIPSPI
def getBoundResList(fname_bound, fname_unbound, listOfDictsChainToResId):
    parser = PDBParser(QUIET=True)
    structureUnbound = parser.get_structure(fname_unbound, fname_unbound)
    structureBound = parser.get_structure(fname_bound, fname_bound)
    ppb = PPBuilder()
    pp_list_unbound = ppb.build_peptides(structureUnbound, aa_only=False)
    pp_list_bound = ppb.build_peptides(structureBound, aa_only=False)
    mapper = BoundUnboundMapper(pp_list_unbound, pp_list_bound)
    mapper.build_correspondence()
    newDictsList = []

    for dictOfChainsToRes in listOfDictsChainToResId:
        tempDict = {}
        for chainId_u in dictOfChainsToRes:
            for resId_u in sorted(dictOfChainsToRes[chainId_u]):
                chainId_b_resId_b = mapper.mapUnboundToBoundUsingId(
                    " " if chainId_u == "*" else chainId_u, resId_u)
                #        print(chainId_u, resId_u, chainId_b_resId_b)
                if chainId_b_resId_b is None: continue
                chainId_b, resId_b = chainId_b_resId_b
                if not chainId_b in tempDict:
                    tempDict[chainId_b] = []
                tempDict[chainId_b].append(resId_b)
        newDictsList.append(tempDict)
    return newDictsList
コード例 #3
0
ファイル: worker.py プロジェクト: animesh/scop3d
def doChainAlignments(pdbID, structure, consensusFile, chainSequencesDir,
                      verbose):
    print('Pairwise alignment of chain sequences with consensus...')
    chains = [chain for chain in structure.get_chains()]
    ppb = PPBuilder()
    for chain in chains:
        sequence = ""
        for pp in ppb.build_peptides(chain):
            sequence += pp.get_sequence()
        sequenceID = pdbID + '_' + chain.get_id()
        sequenceOutput = os.path.join(chainSequencesDir, sequenceID + '.fasta')
        if (len(sequence) == 0):
            print("ERROR: Unable to get chain sequence from PDB file.")
            exit("Selected PDB-file does not contain protein structure.")
        with open(sequenceOutput, 'w') as f:
            f.write('>' + sequenceID + '\n' + str(sequence))
        if verbose:
            print('Chain ' + chain.get_id())
            print(sequence)
        subprocess.check_call([
            'needle', '-asequence', sequenceOutput, '-bsequence',
            consensusFile, '-noendweight', '-endopen', '10.0', '-endextend',
            '0.5', '-brief', '-aformat', 'srspair', '-auto', '-aname_outfile',
            pdbID + '_' + chain.get_id(), '-adirectory_outfile',
            chainSequencesDir
        ])
    print('OK')
コード例 #4
0
    def __init__(self,
                 prefix,
                 computedFeatsRootDir,
                 areForTrainAndTest=True,
                 boundAvailable=True,
                 res2res_dist=6.0,
                 statusManager=None):
        '''
      :param prefix: str. An id for the complex
      :param computedFeatsRootDir: str. path where features will be stored

      :param areForTrainAndTest: boolean. True if ligand and receptor are posed in interacting coordinates and thus,
                                we know the label. False if they are for prediction and thus, we cannot assign labels.
      :param: boundAvailable. True if there is a bound and unbound pdb for each complex. False otherwise
      :param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting
                                  (Angstrom)
      :param statusManager: class that implements .setStatus(msg) to communicate
    '''
        ToolManager.__init__(self,
                             computedFeatsRootDir,
                             statusManager=statusManager)
        self.prefix = prefix
        self.areForTrainAndTest = areForTrainAndTest
        self.res2res_dist = res2res_dist
        self.boundAvailable = boundAvailable
        assert not (
            boundAvailable == True and areForTrainAndTest == False
        ), "Error parameters in CMap: boundAvailable==True and areForTrainAndTest==False"
        self.ppb = PPBuilder(
            radius=200)  # radius set to 200 to not worry about broken chains
        self.outPath = myMakeDir(computedFeatsRootDir, "common")
        self.outPathCM = myMakeDir(self.outPath, "contactMaps")
        self.outPathResDict = myMakeDir(self.outPath, "includedResidues")
        self.outPathNeigs = myMakeDir(self.outPath, "voroNeigs")
コード例 #5
0
 def read_structure_seqs(self, strucm):
     """ Extracts sequences from structure"""
     # PDB extrated sequences
     for mod in strucm.st:
         ppb = PPBuilder()
         for chn in mod.get_chains():
             seqs = []
             #self.sequences[ch_id]['pdb'][mod.id] = [1]
             ch_id = chn.id
             wrong_order = False
             for frag in ppb.build_peptides(chn):
                 start = frag[0].get_id()[1]
                 end = frag[-1].get_id()[1]
                 frid = '{}:{}-{}'.format(ch_id, start, end)
                 sqr = SeqRecord(frag.get_sequence(), 'pdbsq_' + frid,
                                 'pdbsq_' + frid,
                                 'PDB sequence chain ' + frid)
                 if start < end:
                     sqr.features.append(
                         SeqFeature(FeatureLocation(start, end)))
                 else:
                     print("Warning: unusual residue numbering at chain ",
                           ch_id)
                     print(
                         "Warning: chain reconstruction may not be available"
                     )
                     sqr.features.append(
                         SeqFeature(FeatureLocation(end, start)))
                     wrong_order = True
                 seqs.append(sqr)
             if ch_id not in self.data:
                 self.add_empty_chain(ch_id)
             self.data[ch_id]['pdb'][mod.id] = seqs
             self.data[ch_id]['pdb']['wrong_order'] = wrong_order
コード例 #6
0
def main():
    parser = optparse.OptionParser()
    parser.add_option("-p",
                      "--pdb",
                      dest="pdb",
                      help="path to PDB file",
                      metavar="STRING")
    parser.add_option("-f",
                      "--pdb_fasta",
                      dest="pdb_fasta",
                      help="path to PDB fasta file (out)",
                      metavar="STRING")

    (options, args) = parser.parse_args()
    pdb_fasta = options.pdb_fasta
    pdb_file = options.pdb

    pdb_name = os.path.basename(pdb_file).split(".")[0]

    parser = BP.PDBParser()
    ppb = PPBuilder(radius=1000)  # retrieve all amino acids
    pdbseq = ""
    structure = parser.get_structure(pdb_name, pdb_file)
    model = structure[0]
    for chain in model:
        for pp in ppb.build_peptides(model[chain.id], aa_only=False):
            pdbseq += (pp.get_sequence())

    print ">", pdb_name, len(pdbseq)
    print pdbseq

    with open(pdb_fasta, "w") as o:
        o.write(">%s %i\n%s\n" % (pdb_name, len(pdbseq), pdbseq))
コード例 #7
0
def fetch_protein(pdb_id: str) -> Tuple[List[str], np.ndarray]:
    # retrieve pdb file from Protein Data Bank
    pdb_file = f"{pdb_id}.pdb"
    pdb_file_path = os.path.join(os.getcwd(), pdb_file)
    protein_url = f"https://files.rcsb.org/download/{pdb_file}"
    req = requests.get(protein_url)
    with open(pdb_file_path, "w") as f:
        f.write(req.text)

    # parse pdb file
    structure = PDBParser().get_structure(pdb_id, pdb_file)
    peptides = PPBuilder().build_peptides(structure)[0]

    # extract amino acid sequence and phi/psi angles
    aa_sequence = list(peptides.get_sequence())
    phi_psi_angles = np.array(
        list(
            map(
                lambda x: (180 if not x[0] else np.rad2deg(x[0]), 180
                           if not x[1] else np.rad2deg(x[1])),
                peptides.get_phi_psi_list()))).T

    # remove pdb file
    subprocess.check_output(["rm", pdb_file])

    return aa_sequence, phi_psi_angles
コード例 #8
0
ファイル: test.py プロジェクト: mfawzysami/3D-Protein-Search
def get_chain_position(input_file, global_index):
    chain = None
    position_in_chain = -1
    file_name, _ = os.path.splitext(input_file)
    file_name = file_name.replace('./', '')
    parser = PDBParser()
    structure = parser.get_structure(file_name, input_file)
    builder = PPBuilder()
    peptides = builder.build_peptides(structure, aa_only=False)
    pps = [EPolyPeptide(pp) for pp in peptides]
    total_length = sum([len(pp) for pp in pps])
    if global_index >= total_length:
        return None, -1
    distance = 0
    offset = 0
    global_index = int(global_index) + 1
    while distance < global_index:
        pp = pps[offset]
        distance += len(pp)
        offset += 1
        if global_index <= distance:
            position_in_chain = global_index - (distance - len(pp))
            chain = pp.chain_id
            break
    return chain, position_in_chain
コード例 #9
0
ファイル: editPDB.py プロジェクト: mtrellet/Parsing-PDB-files
def deleteChain():# Delete a complete chain from a pdb and save the new structure in pdbname_free.pdb
	parser = PDBParser()
	nameStruct=pdb_name.partition('.')[0]
	structure = parser.get_structure(nameStruct, pdb_name)
	header = parser.get_header()
	trailer = parser.get_trailer()
	seq=''
	
	nb_chain=input('How many chain do you want to delete : ')
	for i in range(nb_chain):
		rm_chain=raw_input('What chain you want to delete : ')
		for model in structure:
			for chain in model:
				if(chain.id==rm_chain):
					model.detach_child(chain.id)
	pept = raw_input('Do you want to get a pdb with the sequence in its name : ')
	if(pept == 'y'):
		ppb=PPBuilder()
		for pp in ppb.build_peptides(structure):
			seq = seq + pp.get_sequence()
		seq=seq.lower()
		seq=str(seq)
		w = PDBIO()
		w.set_structure(structure)
		w.save(seq+'_bound.pdb')
	else:
		w = PDBIO()
		w.set_structure(structure)
		w.save(nameStruct+'_without'+rm_chain+'.pdb')
コード例 #10
0
def match_pdb_residue_num_to_seq(model, ref=None):
    """Match PDB residue numbering (as given in PDB file) to
    a reference sequence (can be pdb sequence) numbered by index.

    Reference sequence is 1-indexed (and is indexed as such in output).

    Args:
        model: A biostructmap Model object.
        ref (dict): A dictionary containing reference protein sequences for each
            chain in the protein structure. Defaults to the protein sequences
            given in PDB file.
    Returns:
        dict: A dictionary mapping reference sequence index (key) to
            residue numbering as given in the PDB file (value). For example,
            we might have a key of ('A', 17) for the 17th residue in the
            reference sequence for chain 'A', with a value of
            ('A', (' ', 273, ' ')) that represents the Bio.PDB identifier for
            the corresponding residue.
    """
    ppb = PPBuilder()
    polypeptides = ppb.build_peptides(model.parent().structure)
    if ref is None:
        ref = model.parent().sequences
    output = {}
    for peptide in polypeptides:
        peptide_sequence = peptide.get_sequence()
        # Presume that peptide belongs to a single chain
        chain_id = peptide[0].get_full_id()[2]
        _, ref_to_pdb = align_protein_sequences(peptide_sequence,
                                                ref[chain_id])

        for ref_pos, pdb_pos in ref_to_pdb.items():
            output[(chain_id,
                    ref_pos)] = peptide[pdb_pos - 1].get_full_id()[2:4]
    return output
コード例 #11
0
    def get_contact_map(self, chain_id):
        '''
            Input:
                self: Use Biopython.PDB structure which has been stored in an object variable
                chain_id  : String (usually in ['A','B', 'C' ...]. The number of chains
                        depends on the specific protein and the resulting structure)
            Return:
                Return a complete contact map (see description in exercise sheet) 
                for a given chain in a Biopython.PDB structure as numpy array. 
                The values in the matrix describe the c-alpha distance between all residues 
                in a chain of a Biopython.PDB structure.
                Only integer values of the distance have to be given (see below).
        '''

        length = len(self.get_sequence(chain_id))
        ppb = PPBuilder()
        contact_map = np.zeros((length, length), dtype=np.float32)

        for pp in ppb.build_peptides(self.structure[0][chain_id],
                                     aa_only=True):
            for i, residue_1 in enumerate(pp):
                for j, residue_2 in enumerate(pp):
                    contact_map[i, j] = residue_1['CA'] - residue_2['CA']

        return contact_map.astype(np.int)  # return rounded (integer) values
コード例 #12
0
ファイル: FragmentMapper.py プロジェクト: xnlsbunyu/biopython
    def _map(self, model):
        """Map (PRIVATE).

        :param model: the model that will be mapped
        :type model: L{Model}
        """
        ppb = PPBuilder()
        ppl = ppb.build_peptides(model)
        fd = {}
        for pp in ppl:
            try:
                # make fragments
                flist = _make_fragment_list(pp, self.flength)
                # classify fragments
                mflist = _map_fragment_list(flist, self.reflist)
                for i in range(0, len(pp)):
                    res = pp[i]
                    if i < self.edge:
                        # start residues
                        continue
                    elif i >= (len(pp) - self.edge):
                        # end residues
                        continue
                    else:
                        # fragment
                        index = i - self.edge
                        assert (index >= 0)
                        fd[res] = mflist[index]
            except PDBException as why:
                if why == 'CHAINBREAK':
                    # Funny polypeptide - skip
                    pass
                else:
                    raise PDBException(why)
        return fd
コード例 #13
0
ファイル: testlist.py プロジェクト: alashkov83/hydrocluster
def download(filelist: list, q: Queue, lock: Lock, cursor: sqlite3.Cursor, conn: sqlite3.Connection, dir_name: str):
    """
    :param filelist:
    :param q:
    :param lock:
    :param cursor:
    :param conn:
    :param dir_name:
    """
    with open('status_tmp.txt', 'w') as f:
        f.write('')
    for file in filelist:
        if file in open('status_tmp.txt').readlines():
            continue
        pdbl = PDBList()
        pdbl.retrieve_pdb_file(file, pdir=os.path.join(dir_name, file), file_format='pdb')
        if not os.path.exists(os.path.join(dir_name, file, 'pdb{:s}.ent'.format(file))):
            print("File with ID PDB: {:s} not found!".format(file))
            continue
        parser = PDBParser()
        structure = parser.get_structure('{:s}', os.path.join(dir_name, file, 'pdb{:s}.ent'.format(file)))
        name = parser.header.get('name', '')
        head = parser.header.get('head', '')
        method = parser.header.get('structure_method', '')
        res = parser.header.get('resolution', '')
        ncomp = 0
        nchain = 0
        eclist = []
        for values in parser.header['compound'].values():
            ncomp += 1
            nchain += len(values['chain'].split(','))
            eclist.append(values.get('ec', '') or values.get('ec_number', ''))
        ec = ", ".join(eclist)
        nres = 0
        mmass = 0
        ppb = PPBuilder()
        for pp in ppb.build_peptides(structure):
            seq = pp.get_sequence()
            nres += len(seq)
            seqan = ProteinAnalysis(str(seq))
            mmass += int(seqan.molecular_weight())
        lock.acquire()
        try:
            cursor.execute("""INSERT INTO Structures (IDPDB, NAME, HEAD, METHOD, RESOLUTION, NCOMP, NCHAIN,
NRES, MMASS, EC) VALUES ("{:s}", "{:s}", "{:s}", "{:s}", {:.2f}, {:d}, {:d},{:d}, {:d}, "{:s}")""".format(
                file, name, head, method, res, ncomp, nchain, nres, mmass, ec))
        except sqlite3.DatabaseError as err:
            print("Error: ", err)
            continue
        else:
            print("Download Done for ID PDB: {:s}".format(file))
            conn.commit()
            q.put(file)
        finally:
            lock.release()
            with open('status_tmp.txt', 'at') as f:
                f.write((file + '\n'))
    os.remove('status_tmp.txt')
    q.put(None)
コード例 #14
0
ファイル: blastp.py プロジェクト: Wangzzzzzzzz/chain_cluster
def generate_seq_file(score_file, save_file):
    score_file = './dataFile/' + score_file
    sf = pd.read_csv(score_file, sep='\t')
    mut_chains = sf.iloc[:,0]

    mut_dict = dict()
    mut_track = set()
    pdb_track = set()
    for chain in mut_chains:
        info = chain.split('_')
        pdb_id = info[0]
        chain_id = info[1]
        wt_aa = info[2][0:3]
        mu_aa = info[2][-3:]
        mu_pos = int(''.join(filter(lambda x: x.isdigit(), info[2])))
        if not chain in mut_track:
            mut_track.add(chain)
            if pdb_id in pdb_track:
                mut_dict[pdb_id].append({'chain_id':chain_id,
                                         'wt_aa': wt_aa,
                                         'mu_aa': mu_aa,
                                         'mu_pos': mu_pos,
                                         'name': chain})
            else:
                mut_dict[pdb_id] = [{'chain_id': chain_id,
                                     'wt_aa': wt_aa,
                                     'mu_aa': mu_aa,
                                     'mu_pos': mu_pos,
                                     'name': chain}]
                pdb_track.add(pdb_id)
    del mut_track
    del pdb_track
                
    parser = PDBParser()
    seq_builder = PPBuilder()
    pdb_dl_handle = PDBList()
    PDB_DIR = './dataFile/PDB_dl'
    # check if pdb file exists
    mut_collect = dict()
    for pdb_id in mut_dict.keys():
        if not os.path.exists(PDB_DIR+'/pdb'+pdb_id.lower()+'.ent'):
            pdb_dl_handle.retrieve_pdb_file(pdb_code=pdb_id, file_format='pdb', overwrite=False, pdir=PDB_DIR)
        pdb_file = PDB_DIR+'/pdb'+pdb_id.lower()+'.ent'
        model = parser.get_structure(pdb_id, pdb_file)[0]

        for mutation in mut_dict[pdb_id]:
            protein_chain = model[mutation['chain_id']]
            sequence = "".join([str(pp.get_sequence())
                                for pp in seq_builder.build_peptides(protein_chain)])
            sequence = sequence.replace('\n', '').replace(' ', '')
            assert sequence[mutation['mu_pos']-1] == three_to_one(mutation['wt_aa']), 'Wt amino acid failed to match'
            mut_Seq_list = list(sequence)
            mut_Seq_list[mutation['mu_pos']-1] = three_to_one(mutation['mu_aa'])
            mut_Seq = ''.join(mut_Seq_list)
            mut_collect[mutation['name']] = mut_Seq
    
    with open(save_file, 'w') as output_hl:
        for k, v in mut_collect.items():
            output_hl.write(k+'\t'+v+'\n')
コード例 #15
0
 def real_seq():
     structure = PDBParser().get_structure(protein.protein_id,
                                           protein.protein_id + '.pdb')
     ppb = PPBuilder()
     seq = ''
     for pp in ppb.build_peptides(structure):
         seq += pp.get_sequence()
     return seq
コード例 #16
0
def polypeptide(pdbfile):
	parser = PDBParser()
	structure = parser.get_structure('test', pdbfile)

	builder = PPBuilder()
	pp, = builder.build_peptides(structure)

	return pp
コード例 #17
0
def get_pdb_amino_acid_sequences(pdb_path):
    structure = Bio.PDB.PDBParser(QUIET=True).get_structure(
        pdb_path[:-4], pdb_path)
    ppb = PPBuilder()
    pdb_aas = []
    for pp in ppb.build_peptides(structure):
        pdb_aa = str(pp.get_sequence())
        pdb_aas.append(pdb_aa)
    return pdb_aas
コード例 #18
0
ファイル: freesasa_rsa.py プロジェクト: smaiti7/molmod-data
def parse_structure(path):
    """
    Parses a PDB formatter structure using Biopython's PDB Parser
    Verifies the integrity of the structure (gaps) and its
    suitability for the calculation (is it a complex?).
    """

    print('[+] Reading structure file: {0}'.format(path))
    fname = os.path.basename(path)
    sname = '.'.join(fname.split('.')[:-1])

    try:
        s = P.get_structure(sname, path)
    except Exception as e:
        print('[!] Structure \'{0}\' could not be parsed'.format(sname),
              file=sys.stderr)
        raise Exception(e)

    # Double occupancy check
    for atom in list(s.get_atoms()):
        if atom.is_disordered():
            residue = atom.parent
            sel_at = atom.selected_child
            sel_at.altloc = ' '
            sel_at.disordered_flag = 0
            residue.detach_child(atom.id)
            residue.add(sel_at)

    # Remove HETATMs and solvent
    res_list = list(s.get_residues())
    n_res = len(res_list)
    _ignore = lambda r: r.id[0][0] == 'W' or r.id[0][0] == 'H'
    for res in res_list:
        if _ignore(res):
            chain = res.parent
            chain.detach_child(res.id)
        elif not is_aa(res, standard=True):
            raise ValueError(
                'Unsupported non-standard amino acid found: {0}'.format(
                    res.resname))

    # Detect gaps and compare with no. of chains
    pep_builder = PPBuilder()
    peptides = pep_builder.build_peptides(s)
    n_peptides = len(peptides)
    n_chains = len(set([c.id for c in s.get_chains()]))

    if n_peptides != n_chains:
        print('[!] Structure contains gaps:', file=sys.stderr)
        for i_pp, pp in enumerate(peptides):
            print(
                '\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > {2.parent.id} {2.resname}{2.id[1]}'
                .format(i_pp, pp[0], pp[-1]),
                file=sys.stderr)
        #raise Exception('Calculation cannot proceed')

    return (s, n_chains, n_res)
コード例 #19
0
def find_pdb_limits(pdb_path):
    """"""
    pdb = PDBParser().get_structure('', pdb_path)
    # takes the first (and only) polypeptide
    pp = PPBuilder().build_peptides(pdb)[0]
    start = pp[0].get_id()[1]
    end = pp[-1].get_id()[1]
    seq = pp.get_sequence()
    return (start, end, seq)
コード例 #20
0
    def _model_file_to_data(self, file_path, params):
        """
            _model_file_to_data:
                Do the PDB conversion--parse the model pdb file for creating a pdb data object
        """
        logging.info(
            f'Parsing pdb file {file_path} to a pdb structure with params: {params}'
        )

        parser = PDB.PDBParser(PERMISSIVE=1)
        pdb1 = file_path
        pp_no = 0
        data = {}

        try:
            structure = parser.get_structure("test", pdb1)
        except (RuntimeError, TypeError, KeyError, ValueError) as e:
            logging.info(f'PDBParser errored with message: {e.message}')
            raise
        else:
            ppb = PPBuilder()
            for pp in ppb.build_peptides(structure):
                pp_no += 1

            # logging.info(f'Getting pdb structure data for {structure}!')
            (compound, source) = self._get_compound_source(structure)
            (num_models,
             model_ids) = self._get_models_from_structure(structure)
            (num_chains,
             chain_ids) = self._get_chains_from_structure(structure)
            (num_residues,
             residue_ids) = self._get_residues_from_structure(structure)
            (num_atoms, atom_ids) = self._get_atoms_from_structure(structure)
            model = structure[0]
            protein_data = self._get_proteins_by_structure(
                structure, model.get_id(), file_path)
            (protein_data, params) = self._match_features(params, protein_data)

            pdb_info = params.get('pdb_info', None)
            if pdb_info and pdb_info.get('sequence_identities', None):
                data = {
                    'name': structure.header.get('name', ''),
                    'num_chains': num_chains,
                    'num_residues': num_residues,
                    'num_atoms': num_atoms,
                    'compound': compound,
                    'source': source,
                    'proteins': protein_data
                }
            else:
                logging.info(
                    f'Parsing pdb file {file_path} failed to match KBase genome/features!'
                )
                data = {}
        finally:
            return data, pp_no, params
コード例 #21
0
ファイル: test.py プロジェクト: mfawzysami/3D-Protein-Search
def get_primary_sequence(input_file):
    file_name, _ = os.path.splitext(input_file)
    file_name = file_name.replace('./', '')
    parser = PDBParser()
    structure = parser.get_structure(file_name, input_file)
    builder = PPBuilder()
    seq = ""
    for chain in builder.build_peptides(structure, aa_only=False):
        seq += chain.get_sequence()
    return seq
コード例 #22
0
def get_aa_encoded(protein_file):
    structure = Bio.PDB.PDBParser(QUIET=True).get_structure(
        pdb_path[:-4], pdb_path)
    ppb = PPBuilder()
    pdb_aas = []
    for pp in ppb.build_peptides(structure):
        pdb_aa = str(pp.get_sequence())
        pdb_aas.append(pdb_aa)
    encoded = int_encoding(pdb_aas, AA_CODES)
    return encoded
コード例 #23
0
def get_pdb_torsion_angles(pdb_path, chain_index):
    structure = Bio.PDB.PDBParser(QUIET=True).get_structure(
        pdb_path[:-4], pdb_path)
    A = []
    ppb = PPBuilder()
    pdb_aas = []
    model = ppb.build_peptides(structure)
    chain = model[chain_index]
    phi_psi_list = chain.get_phi_psi_list()
    return [x[0] for x in phi_psi_list], [x[1] for x in phi_psi_list]
コード例 #24
0
def obtian_seq_wo_seq_file(score_file):
    score_file = './dataFile/' + score_file
    sf = pd.read_csv(score_file, sep='\t')
    chains_involved = sf.iloc[:, 0]
    pdb = dict()
    pdb_track = set()
    for chain in chains_involved:
        chain_name = chain[0:6]
        pdb_name = chain[0:4]
        # if we encounter a old pdb
        if pdb_name in pdb_track:
            pdb[pdb_name].add(chain_name)
        # else, we have a new pdb
        else:
            # update the track file
            pdb_track.add(pdb_name)
            pdb[pdb_name] = {chain_name}

    # create the link to the PDB database and retrive all the file
    # related to the files, store them locally under ./dataFile/PDB_dl/
    PDB_DIR = './dataFile/PDB_dl'
    if not os.path.exists(PDB_DIR):
        os.mkdir(PDB_DIR)
    # create the download handle
    pdb_dl_handle = PDBList()
    # download all of the pdb files
    for item in pdb.keys():
        if not os.path.exists(PDB_DIR + '/pdb' + item.lower() + '.ent'):
            pdb_dl_handle.retrieve_pdb_file(pdb_code=item,
                                            file_format='pdb',
                                            overwrite=False,
                                            pdir=PDB_DIR)

    # for each pdb, we will construct the sequence
    seq_dict = dict()
    parser = PDBParser()
    seq_builder = PPBuilder()
    # key is the pdb_id, value is the chain in a
    for pdb_id, chain_names in pdb.items():
        pdb_file = PDB_DIR + '/pdb' + pdb_id.lower() + '.ent'
        model = parser.get_structure(pdb_id, pdb_file)[0]
        for chain in chain_names:
            # extract the last letter, which is the chain name
            chain_id = chain[-1]
            protein_chain = model[chain_id]
            sequence = "".join([
                str(pp.get_sequence())
                for pp in seq_builder.build_peptides(protein_chain)
            ])
            sequence = sequence.replace('\n',
                                        '').replace(' ',
                                                    '')  # clean the bad chars
            seq_dict[chain] = sequence

    return seq_dict
コード例 #25
0
def FASTA_Gen(pdb_name, pdb_id):

    #  print('\n  ## Convert PDB into FASTA for: \033[31m{0} - {1}\033[0m\n'.format(pdb_name, pdb_id))
    peptide = PPBuilder().build_peptides(p.get_structure(pdb_id, pdb_name))

    seq = ''
    for residue in peptide:
        seq = seq + residue.get_sequence()
    seq_obj = SeqRecord(seq, id=pdb_id, description='')

    return seq_obj
コード例 #26
0
 def get_secondary_structure_details(self, name, pdb_file, aa_only=False):
     parser = PDBParser()
     structure = parser.get_structure(name, pdb_file)
     dssp = DSSP(structure[0], pdb_file, acc_array="Wilke")
     ss = "".join([aa[2] for aa in dssp])
     sasa = [residues[aa[1]] * aa[3] for aa in dssp]
     builder = PPBuilder()
     seq = ""
     for chain in builder.build_peptides(structure, aa_only=aa_only):
         seq += chain.get_sequence()
     return name, seq, ss, sasa, structure
コード例 #27
0
def avgenergy(seq, N):
    p = PDBParser(QUIET=True)
    a = "%s_%d.BL000%d0001.pdb"
    avg = 0.0
    positions = {}
    for t in range(1, 11):
        for i in range(1, N + 1):
            aa = a % (seq, t, i)
            try:
                s = p.get_structure(aa, aa)
            except:
                continue

            ppb = PPBuilder()

            chains = s[0].get_list()

            ccr5 = chains[0]
            gp120 = chains[1]

            total_energy = 0
            for r1 in gp120:
                r1Code = str(protein_letters_3to1.get(r1.resname.title()))
                if (r1.get_id()[1] - 352) not in ruleresidues: continue
                for r2 in ccr5:
                    k = distanceBetweenCOM(r1, r2)
                    r2Code = str(protein_letters_3to1.get(r2.resname.title()))
                    try:
                        cutoff_estricto = distancias_estrictas["distances"][
                            r1Code + "_" + r2Code]
                        cutoff_no_estricto = distancias_no_estrictas[
                            "distances"][r1Code + "_" + r2Code]
                    except:
                        cutoff_estricto = distancias_estrictas["distances"][
                            r2Code + "_" + r1Code]
                        cutoff_no_estricto = distancias_no_estrictas[
                            "distances"][r2Code + "_" + r1Code]

                    if k < cutoff_no_estricto:
                        try:
                            en = energia["energy"][r1Code + "_" + r2Code]
                        except:
                            en = energia["energy"][r2Code + "_" + r1Code]

                        if (r1.get_id()[1] - 352) not in positions:
                            positions[r1.get_id()[1] - 352] = []
                        positions[r1.get_id()[1] - 352].append(float(en))

                        total_energy += float(en) * ruleresidues[r1.get_id()[1]
                                                                 - 352]

            avg += total_energy

    return -avg / (N * 10)
コード例 #28
0
 def calculate_RMSD(self,
                    row,
                    source_position,
                    fragment_length,
                    aa_only=False):
     if self.args.source is None:
         setattr(row, "rmsd", -1)
     target_position = row.pos
     source_structure = self.__get_structure__(self.args.source)
     builder = PPBuilder()
     type1 = builder.build_peptides(source_structure, aa_only=aa_only)
     length1 = type1[-1][-1].get_full_id()[3][1]
     fixed_residues = []
     for pp in type1:
         fixed_residues += [x for x in pp]
     fixed = [atom['CA'] for atom in fixed_residues
              ][source_position:source_position + fragment_length]
     builder = PPBuilder()
     target_file = self.get_target_file(row.protein_id)
     if target_file is None:
         setattr(row, "rmsd", -1)
         return
     target_structure = self.__get_structure__(target_file)
     type2 = builder.build_peptides(target_structure, aa_only=aa_only)
     length2 = type2[-1][-1].get_full_id()[3][1]
     moving_residues = []
     for pp in type2:
         moving_residues += [x for x in pp]
     moving = [atom['CA'] for atom in moving_residues
               ][target_position:target_position + fragment_length]
     lengths = [length1, length2]
     smallest = min(int(item) for item in lengths)
     # find RMSD
     if len(fixed) != len(moving):
         setattr(row, "rmsd", -1)
         return
     sup = Bio.PDB.Superimposer()
     sup.set_atoms(fixed, moving)
     sup.apply(target_structure[0].get_atoms())
     RMSD = round(sup.rms, 4)
     setattr(row, "rmsd", RMSD)
コード例 #29
0
def test_add_residue():
    structure = PeptideBuilder.initialize_res("A")
    for aa in "CDEFGHIKLMNPQRSTVWY":
        structure = PeptideBuilder.add_residue(structure, aa)

    # extract peptide from structure and compare to expected
    ppb = PPBuilder()
    pp = next(iter(ppb.build_peptides(structure)))
    assert pp.get_sequence() == "ACDEFGHIKLMNPQRSTVWY"

    # now compare to saved reference structure
    assert compare_to_reference(structure, "extended.pdb")
コード例 #30
0
 def get_sequence(self, chain_id):
   '''
   Input:
     self:     Use Biopython.PDB structure which has been stored in an object
               variable
     chain_id: String (usually in ['A','B', 'C' ...]. The number of chains
               depends on the specific protein and the resulting structure)
   Return:
     Return the amino acid sequence (single-letter alphabet!) of a given chain
     (chain_id) in a Biopython.PDB structure as a string.
   '''
   return PPBuilder().build_peptides(self.structure[0][chain_id])[0].get_sequence()