def _get_proteins_by_structure(self, pdb_structure, model, file_path): """ _get_proteins_by_structure: Given a pdb_structure, parse the essential protein data """ ppb = PPBuilder() protein_data = [] # Parse for the chain_id and chain sequence for c_ele in pdb_structure.get_chains(): if (c_ele): c_ppd_list = [] for c_ppd in ppb.build_peptides(c_ele): c_pp_seq = str(c_ppd.get_sequence()) c_ppd_list.append(c_pp_seq) c_seq = ''.join(c_ppd_list) protein_data.append({ 'id': os.path.basename(file_path), 'model_id': model, 'chain_id': c_ele.get_id(), 'sequence': c_seq, 'md5': hashlib.md5(c_seq.encode()).hexdigest() }) return protein_data
def getBoundResList(fname_bound, fname_unbound, listOfDictsChainToResId): parser = PDBParser(QUIET=True) structureUnbound = parser.get_structure(fname_unbound, fname_unbound) structureBound = parser.get_structure(fname_bound, fname_bound) ppb = PPBuilder() pp_list_unbound = ppb.build_peptides(structureUnbound, aa_only=False) pp_list_bound = ppb.build_peptides(structureBound, aa_only=False) mapper = BoundUnboundMapper(pp_list_unbound, pp_list_bound) mapper.build_correspondence() newDictsList = [] for dictOfChainsToRes in listOfDictsChainToResId: tempDict = {} for chainId_u in dictOfChainsToRes: for resId_u in sorted(dictOfChainsToRes[chainId_u]): chainId_b_resId_b = mapper.mapUnboundToBoundUsingId( " " if chainId_u == "*" else chainId_u, resId_u) # print(chainId_u, resId_u, chainId_b_resId_b) if chainId_b_resId_b is None: continue chainId_b, resId_b = chainId_b_resId_b if not chainId_b in tempDict: tempDict[chainId_b] = [] tempDict[chainId_b].append(resId_b) newDictsList.append(tempDict) return newDictsList
def doChainAlignments(pdbID, structure, consensusFile, chainSequencesDir, verbose): print('Pairwise alignment of chain sequences with consensus...') chains = [chain for chain in structure.get_chains()] ppb = PPBuilder() for chain in chains: sequence = "" for pp in ppb.build_peptides(chain): sequence += pp.get_sequence() sequenceID = pdbID + '_' + chain.get_id() sequenceOutput = os.path.join(chainSequencesDir, sequenceID + '.fasta') if (len(sequence) == 0): print("ERROR: Unable to get chain sequence from PDB file.") exit("Selected PDB-file does not contain protein structure.") with open(sequenceOutput, 'w') as f: f.write('>' + sequenceID + '\n' + str(sequence)) if verbose: print('Chain ' + chain.get_id()) print(sequence) subprocess.check_call([ 'needle', '-asequence', sequenceOutput, '-bsequence', consensusFile, '-noendweight', '-endopen', '10.0', '-endextend', '0.5', '-brief', '-aformat', 'srspair', '-auto', '-aname_outfile', pdbID + '_' + chain.get_id(), '-adirectory_outfile', chainSequencesDir ]) print('OK')
def __init__(self, prefix, computedFeatsRootDir, areForTrainAndTest=True, boundAvailable=True, res2res_dist=6.0, statusManager=None): ''' :param prefix: str. An id for the complex :param computedFeatsRootDir: str. path where features will be stored :param areForTrainAndTest: boolean. True if ligand and receptor are posed in interacting coordinates and thus, we know the label. False if they are for prediction and thus, we cannot assign labels. :param: boundAvailable. True if there is a bound and unbound pdb for each complex. False otherwise :param res2res_dist: float. max distance between any heavy atoms of 2 amino acids to be considered as interacting (Angstrom) :param statusManager: class that implements .setStatus(msg) to communicate ''' ToolManager.__init__(self, computedFeatsRootDir, statusManager=statusManager) self.prefix = prefix self.areForTrainAndTest = areForTrainAndTest self.res2res_dist = res2res_dist self.boundAvailable = boundAvailable assert not ( boundAvailable == True and areForTrainAndTest == False ), "Error parameters in CMap: boundAvailable==True and areForTrainAndTest==False" self.ppb = PPBuilder( radius=200) # radius set to 200 to not worry about broken chains self.outPath = myMakeDir(computedFeatsRootDir, "common") self.outPathCM = myMakeDir(self.outPath, "contactMaps") self.outPathResDict = myMakeDir(self.outPath, "includedResidues") self.outPathNeigs = myMakeDir(self.outPath, "voroNeigs")
def read_structure_seqs(self, strucm): """ Extracts sequences from structure""" # PDB extrated sequences for mod in strucm.st: ppb = PPBuilder() for chn in mod.get_chains(): seqs = [] #self.sequences[ch_id]['pdb'][mod.id] = [1] ch_id = chn.id wrong_order = False for frag in ppb.build_peptides(chn): start = frag[0].get_id()[1] end = frag[-1].get_id()[1] frid = '{}:{}-{}'.format(ch_id, start, end) sqr = SeqRecord(frag.get_sequence(), 'pdbsq_' + frid, 'pdbsq_' + frid, 'PDB sequence chain ' + frid) if start < end: sqr.features.append( SeqFeature(FeatureLocation(start, end))) else: print("Warning: unusual residue numbering at chain ", ch_id) print( "Warning: chain reconstruction may not be available" ) sqr.features.append( SeqFeature(FeatureLocation(end, start))) wrong_order = True seqs.append(sqr) if ch_id not in self.data: self.add_empty_chain(ch_id) self.data[ch_id]['pdb'][mod.id] = seqs self.data[ch_id]['pdb']['wrong_order'] = wrong_order
def main(): parser = optparse.OptionParser() parser.add_option("-p", "--pdb", dest="pdb", help="path to PDB file", metavar="STRING") parser.add_option("-f", "--pdb_fasta", dest="pdb_fasta", help="path to PDB fasta file (out)", metavar="STRING") (options, args) = parser.parse_args() pdb_fasta = options.pdb_fasta pdb_file = options.pdb pdb_name = os.path.basename(pdb_file).split(".")[0] parser = BP.PDBParser() ppb = PPBuilder(radius=1000) # retrieve all amino acids pdbseq = "" structure = parser.get_structure(pdb_name, pdb_file) model = structure[0] for chain in model: for pp in ppb.build_peptides(model[chain.id], aa_only=False): pdbseq += (pp.get_sequence()) print ">", pdb_name, len(pdbseq) print pdbseq with open(pdb_fasta, "w") as o: o.write(">%s %i\n%s\n" % (pdb_name, len(pdbseq), pdbseq))
def fetch_protein(pdb_id: str) -> Tuple[List[str], np.ndarray]: # retrieve pdb file from Protein Data Bank pdb_file = f"{pdb_id}.pdb" pdb_file_path = os.path.join(os.getcwd(), pdb_file) protein_url = f"https://files.rcsb.org/download/{pdb_file}" req = requests.get(protein_url) with open(pdb_file_path, "w") as f: f.write(req.text) # parse pdb file structure = PDBParser().get_structure(pdb_id, pdb_file) peptides = PPBuilder().build_peptides(structure)[0] # extract amino acid sequence and phi/psi angles aa_sequence = list(peptides.get_sequence()) phi_psi_angles = np.array( list( map( lambda x: (180 if not x[0] else np.rad2deg(x[0]), 180 if not x[1] else np.rad2deg(x[1])), peptides.get_phi_psi_list()))).T # remove pdb file subprocess.check_output(["rm", pdb_file]) return aa_sequence, phi_psi_angles
def get_chain_position(input_file, global_index): chain = None position_in_chain = -1 file_name, _ = os.path.splitext(input_file) file_name = file_name.replace('./', '') parser = PDBParser() structure = parser.get_structure(file_name, input_file) builder = PPBuilder() peptides = builder.build_peptides(structure, aa_only=False) pps = [EPolyPeptide(pp) for pp in peptides] total_length = sum([len(pp) for pp in pps]) if global_index >= total_length: return None, -1 distance = 0 offset = 0 global_index = int(global_index) + 1 while distance < global_index: pp = pps[offset] distance += len(pp) offset += 1 if global_index <= distance: position_in_chain = global_index - (distance - len(pp)) chain = pp.chain_id break return chain, position_in_chain
def deleteChain():# Delete a complete chain from a pdb and save the new structure in pdbname_free.pdb parser = PDBParser() nameStruct=pdb_name.partition('.')[0] structure = parser.get_structure(nameStruct, pdb_name) header = parser.get_header() trailer = parser.get_trailer() seq='' nb_chain=input('How many chain do you want to delete : ') for i in range(nb_chain): rm_chain=raw_input('What chain you want to delete : ') for model in structure: for chain in model: if(chain.id==rm_chain): model.detach_child(chain.id) pept = raw_input('Do you want to get a pdb with the sequence in its name : ') if(pept == 'y'): ppb=PPBuilder() for pp in ppb.build_peptides(structure): seq = seq + pp.get_sequence() seq=seq.lower() seq=str(seq) w = PDBIO() w.set_structure(structure) w.save(seq+'_bound.pdb') else: w = PDBIO() w.set_structure(structure) w.save(nameStruct+'_without'+rm_chain+'.pdb')
def match_pdb_residue_num_to_seq(model, ref=None): """Match PDB residue numbering (as given in PDB file) to a reference sequence (can be pdb sequence) numbered by index. Reference sequence is 1-indexed (and is indexed as such in output). Args: model: A biostructmap Model object. ref (dict): A dictionary containing reference protein sequences for each chain in the protein structure. Defaults to the protein sequences given in PDB file. Returns: dict: A dictionary mapping reference sequence index (key) to residue numbering as given in the PDB file (value). For example, we might have a key of ('A', 17) for the 17th residue in the reference sequence for chain 'A', with a value of ('A', (' ', 273, ' ')) that represents the Bio.PDB identifier for the corresponding residue. """ ppb = PPBuilder() polypeptides = ppb.build_peptides(model.parent().structure) if ref is None: ref = model.parent().sequences output = {} for peptide in polypeptides: peptide_sequence = peptide.get_sequence() # Presume that peptide belongs to a single chain chain_id = peptide[0].get_full_id()[2] _, ref_to_pdb = align_protein_sequences(peptide_sequence, ref[chain_id]) for ref_pos, pdb_pos in ref_to_pdb.items(): output[(chain_id, ref_pos)] = peptide[pdb_pos - 1].get_full_id()[2:4] return output
def get_contact_map(self, chain_id): ''' Input: self: Use Biopython.PDB structure which has been stored in an object variable chain_id : String (usually in ['A','B', 'C' ...]. The number of chains depends on the specific protein and the resulting structure) Return: Return a complete contact map (see description in exercise sheet) for a given chain in a Biopython.PDB structure as numpy array. The values in the matrix describe the c-alpha distance between all residues in a chain of a Biopython.PDB structure. Only integer values of the distance have to be given (see below). ''' length = len(self.get_sequence(chain_id)) ppb = PPBuilder() contact_map = np.zeros((length, length), dtype=np.float32) for pp in ppb.build_peptides(self.structure[0][chain_id], aa_only=True): for i, residue_1 in enumerate(pp): for j, residue_2 in enumerate(pp): contact_map[i, j] = residue_1['CA'] - residue_2['CA'] return contact_map.astype(np.int) # return rounded (integer) values
def _map(self, model): """Map (PRIVATE). :param model: the model that will be mapped :type model: L{Model} """ ppb = PPBuilder() ppl = ppb.build_peptides(model) fd = {} for pp in ppl: try: # make fragments flist = _make_fragment_list(pp, self.flength) # classify fragments mflist = _map_fragment_list(flist, self.reflist) for i in range(0, len(pp)): res = pp[i] if i < self.edge: # start residues continue elif i >= (len(pp) - self.edge): # end residues continue else: # fragment index = i - self.edge assert (index >= 0) fd[res] = mflist[index] except PDBException as why: if why == 'CHAINBREAK': # Funny polypeptide - skip pass else: raise PDBException(why) return fd
def download(filelist: list, q: Queue, lock: Lock, cursor: sqlite3.Cursor, conn: sqlite3.Connection, dir_name: str): """ :param filelist: :param q: :param lock: :param cursor: :param conn: :param dir_name: """ with open('status_tmp.txt', 'w') as f: f.write('') for file in filelist: if file in open('status_tmp.txt').readlines(): continue pdbl = PDBList() pdbl.retrieve_pdb_file(file, pdir=os.path.join(dir_name, file), file_format='pdb') if not os.path.exists(os.path.join(dir_name, file, 'pdb{:s}.ent'.format(file))): print("File with ID PDB: {:s} not found!".format(file)) continue parser = PDBParser() structure = parser.get_structure('{:s}', os.path.join(dir_name, file, 'pdb{:s}.ent'.format(file))) name = parser.header.get('name', '') head = parser.header.get('head', '') method = parser.header.get('structure_method', '') res = parser.header.get('resolution', '') ncomp = 0 nchain = 0 eclist = [] for values in parser.header['compound'].values(): ncomp += 1 nchain += len(values['chain'].split(',')) eclist.append(values.get('ec', '') or values.get('ec_number', '')) ec = ", ".join(eclist) nres = 0 mmass = 0 ppb = PPBuilder() for pp in ppb.build_peptides(structure): seq = pp.get_sequence() nres += len(seq) seqan = ProteinAnalysis(str(seq)) mmass += int(seqan.molecular_weight()) lock.acquire() try: cursor.execute("""INSERT INTO Structures (IDPDB, NAME, HEAD, METHOD, RESOLUTION, NCOMP, NCHAIN, NRES, MMASS, EC) VALUES ("{:s}", "{:s}", "{:s}", "{:s}", {:.2f}, {:d}, {:d},{:d}, {:d}, "{:s}")""".format( file, name, head, method, res, ncomp, nchain, nres, mmass, ec)) except sqlite3.DatabaseError as err: print("Error: ", err) continue else: print("Download Done for ID PDB: {:s}".format(file)) conn.commit() q.put(file) finally: lock.release() with open('status_tmp.txt', 'at') as f: f.write((file + '\n')) os.remove('status_tmp.txt') q.put(None)
def generate_seq_file(score_file, save_file): score_file = './dataFile/' + score_file sf = pd.read_csv(score_file, sep='\t') mut_chains = sf.iloc[:,0] mut_dict = dict() mut_track = set() pdb_track = set() for chain in mut_chains: info = chain.split('_') pdb_id = info[0] chain_id = info[1] wt_aa = info[2][0:3] mu_aa = info[2][-3:] mu_pos = int(''.join(filter(lambda x: x.isdigit(), info[2]))) if not chain in mut_track: mut_track.add(chain) if pdb_id in pdb_track: mut_dict[pdb_id].append({'chain_id':chain_id, 'wt_aa': wt_aa, 'mu_aa': mu_aa, 'mu_pos': mu_pos, 'name': chain}) else: mut_dict[pdb_id] = [{'chain_id': chain_id, 'wt_aa': wt_aa, 'mu_aa': mu_aa, 'mu_pos': mu_pos, 'name': chain}] pdb_track.add(pdb_id) del mut_track del pdb_track parser = PDBParser() seq_builder = PPBuilder() pdb_dl_handle = PDBList() PDB_DIR = './dataFile/PDB_dl' # check if pdb file exists mut_collect = dict() for pdb_id in mut_dict.keys(): if not os.path.exists(PDB_DIR+'/pdb'+pdb_id.lower()+'.ent'): pdb_dl_handle.retrieve_pdb_file(pdb_code=pdb_id, file_format='pdb', overwrite=False, pdir=PDB_DIR) pdb_file = PDB_DIR+'/pdb'+pdb_id.lower()+'.ent' model = parser.get_structure(pdb_id, pdb_file)[0] for mutation in mut_dict[pdb_id]: protein_chain = model[mutation['chain_id']] sequence = "".join([str(pp.get_sequence()) for pp in seq_builder.build_peptides(protein_chain)]) sequence = sequence.replace('\n', '').replace(' ', '') assert sequence[mutation['mu_pos']-1] == three_to_one(mutation['wt_aa']), 'Wt amino acid failed to match' mut_Seq_list = list(sequence) mut_Seq_list[mutation['mu_pos']-1] = three_to_one(mutation['mu_aa']) mut_Seq = ''.join(mut_Seq_list) mut_collect[mutation['name']] = mut_Seq with open(save_file, 'w') as output_hl: for k, v in mut_collect.items(): output_hl.write(k+'\t'+v+'\n')
def real_seq(): structure = PDBParser().get_structure(protein.protein_id, protein.protein_id + '.pdb') ppb = PPBuilder() seq = '' for pp in ppb.build_peptides(structure): seq += pp.get_sequence() return seq
def polypeptide(pdbfile): parser = PDBParser() structure = parser.get_structure('test', pdbfile) builder = PPBuilder() pp, = builder.build_peptides(structure) return pp
def get_pdb_amino_acid_sequences(pdb_path): structure = Bio.PDB.PDBParser(QUIET=True).get_structure( pdb_path[:-4], pdb_path) ppb = PPBuilder() pdb_aas = [] for pp in ppb.build_peptides(structure): pdb_aa = str(pp.get_sequence()) pdb_aas.append(pdb_aa) return pdb_aas
def parse_structure(path): """ Parses a PDB formatter structure using Biopython's PDB Parser Verifies the integrity of the structure (gaps) and its suitability for the calculation (is it a complex?). """ print('[+] Reading structure file: {0}'.format(path)) fname = os.path.basename(path) sname = '.'.join(fname.split('.')[:-1]) try: s = P.get_structure(sname, path) except Exception as e: print('[!] Structure \'{0}\' could not be parsed'.format(sname), file=sys.stderr) raise Exception(e) # Double occupancy check for atom in list(s.get_atoms()): if atom.is_disordered(): residue = atom.parent sel_at = atom.selected_child sel_at.altloc = ' ' sel_at.disordered_flag = 0 residue.detach_child(atom.id) residue.add(sel_at) # Remove HETATMs and solvent res_list = list(s.get_residues()) n_res = len(res_list) _ignore = lambda r: r.id[0][0] == 'W' or r.id[0][0] == 'H' for res in res_list: if _ignore(res): chain = res.parent chain.detach_child(res.id) elif not is_aa(res, standard=True): raise ValueError( 'Unsupported non-standard amino acid found: {0}'.format( res.resname)) # Detect gaps and compare with no. of chains pep_builder = PPBuilder() peptides = pep_builder.build_peptides(s) n_peptides = len(peptides) n_chains = len(set([c.id for c in s.get_chains()])) if n_peptides != n_chains: print('[!] Structure contains gaps:', file=sys.stderr) for i_pp, pp in enumerate(peptides): print( '\t{1.parent.id} {1.resname}{1.id[1]} < Fragment {0} > {2.parent.id} {2.resname}{2.id[1]}' .format(i_pp, pp[0], pp[-1]), file=sys.stderr) #raise Exception('Calculation cannot proceed') return (s, n_chains, n_res)
def find_pdb_limits(pdb_path): """""" pdb = PDBParser().get_structure('', pdb_path) # takes the first (and only) polypeptide pp = PPBuilder().build_peptides(pdb)[0] start = pp[0].get_id()[1] end = pp[-1].get_id()[1] seq = pp.get_sequence() return (start, end, seq)
def _model_file_to_data(self, file_path, params): """ _model_file_to_data: Do the PDB conversion--parse the model pdb file for creating a pdb data object """ logging.info( f'Parsing pdb file {file_path} to a pdb structure with params: {params}' ) parser = PDB.PDBParser(PERMISSIVE=1) pdb1 = file_path pp_no = 0 data = {} try: structure = parser.get_structure("test", pdb1) except (RuntimeError, TypeError, KeyError, ValueError) as e: logging.info(f'PDBParser errored with message: {e.message}') raise else: ppb = PPBuilder() for pp in ppb.build_peptides(structure): pp_no += 1 # logging.info(f'Getting pdb structure data for {structure}!') (compound, source) = self._get_compound_source(structure) (num_models, model_ids) = self._get_models_from_structure(structure) (num_chains, chain_ids) = self._get_chains_from_structure(structure) (num_residues, residue_ids) = self._get_residues_from_structure(structure) (num_atoms, atom_ids) = self._get_atoms_from_structure(structure) model = structure[0] protein_data = self._get_proteins_by_structure( structure, model.get_id(), file_path) (protein_data, params) = self._match_features(params, protein_data) pdb_info = params.get('pdb_info', None) if pdb_info and pdb_info.get('sequence_identities', None): data = { 'name': structure.header.get('name', ''), 'num_chains': num_chains, 'num_residues': num_residues, 'num_atoms': num_atoms, 'compound': compound, 'source': source, 'proteins': protein_data } else: logging.info( f'Parsing pdb file {file_path} failed to match KBase genome/features!' ) data = {} finally: return data, pp_no, params
def get_primary_sequence(input_file): file_name, _ = os.path.splitext(input_file) file_name = file_name.replace('./', '') parser = PDBParser() structure = parser.get_structure(file_name, input_file) builder = PPBuilder() seq = "" for chain in builder.build_peptides(structure, aa_only=False): seq += chain.get_sequence() return seq
def get_aa_encoded(protein_file): structure = Bio.PDB.PDBParser(QUIET=True).get_structure( pdb_path[:-4], pdb_path) ppb = PPBuilder() pdb_aas = [] for pp in ppb.build_peptides(structure): pdb_aa = str(pp.get_sequence()) pdb_aas.append(pdb_aa) encoded = int_encoding(pdb_aas, AA_CODES) return encoded
def get_pdb_torsion_angles(pdb_path, chain_index): structure = Bio.PDB.PDBParser(QUIET=True).get_structure( pdb_path[:-4], pdb_path) A = [] ppb = PPBuilder() pdb_aas = [] model = ppb.build_peptides(structure) chain = model[chain_index] phi_psi_list = chain.get_phi_psi_list() return [x[0] for x in phi_psi_list], [x[1] for x in phi_psi_list]
def obtian_seq_wo_seq_file(score_file): score_file = './dataFile/' + score_file sf = pd.read_csv(score_file, sep='\t') chains_involved = sf.iloc[:, 0] pdb = dict() pdb_track = set() for chain in chains_involved: chain_name = chain[0:6] pdb_name = chain[0:4] # if we encounter a old pdb if pdb_name in pdb_track: pdb[pdb_name].add(chain_name) # else, we have a new pdb else: # update the track file pdb_track.add(pdb_name) pdb[pdb_name] = {chain_name} # create the link to the PDB database and retrive all the file # related to the files, store them locally under ./dataFile/PDB_dl/ PDB_DIR = './dataFile/PDB_dl' if not os.path.exists(PDB_DIR): os.mkdir(PDB_DIR) # create the download handle pdb_dl_handle = PDBList() # download all of the pdb files for item in pdb.keys(): if not os.path.exists(PDB_DIR + '/pdb' + item.lower() + '.ent'): pdb_dl_handle.retrieve_pdb_file(pdb_code=item, file_format='pdb', overwrite=False, pdir=PDB_DIR) # for each pdb, we will construct the sequence seq_dict = dict() parser = PDBParser() seq_builder = PPBuilder() # key is the pdb_id, value is the chain in a for pdb_id, chain_names in pdb.items(): pdb_file = PDB_DIR + '/pdb' + pdb_id.lower() + '.ent' model = parser.get_structure(pdb_id, pdb_file)[0] for chain in chain_names: # extract the last letter, which is the chain name chain_id = chain[-1] protein_chain = model[chain_id] sequence = "".join([ str(pp.get_sequence()) for pp in seq_builder.build_peptides(protein_chain) ]) sequence = sequence.replace('\n', '').replace(' ', '') # clean the bad chars seq_dict[chain] = sequence return seq_dict
def FASTA_Gen(pdb_name, pdb_id): # print('\n ## Convert PDB into FASTA for: \033[31m{0} - {1}\033[0m\n'.format(pdb_name, pdb_id)) peptide = PPBuilder().build_peptides(p.get_structure(pdb_id, pdb_name)) seq = '' for residue in peptide: seq = seq + residue.get_sequence() seq_obj = SeqRecord(seq, id=pdb_id, description='') return seq_obj
def get_secondary_structure_details(self, name, pdb_file, aa_only=False): parser = PDBParser() structure = parser.get_structure(name, pdb_file) dssp = DSSP(structure[0], pdb_file, acc_array="Wilke") ss = "".join([aa[2] for aa in dssp]) sasa = [residues[aa[1]] * aa[3] for aa in dssp] builder = PPBuilder() seq = "" for chain in builder.build_peptides(structure, aa_only=aa_only): seq += chain.get_sequence() return name, seq, ss, sasa, structure
def avgenergy(seq, N): p = PDBParser(QUIET=True) a = "%s_%d.BL000%d0001.pdb" avg = 0.0 positions = {} for t in range(1, 11): for i in range(1, N + 1): aa = a % (seq, t, i) try: s = p.get_structure(aa, aa) except: continue ppb = PPBuilder() chains = s[0].get_list() ccr5 = chains[0] gp120 = chains[1] total_energy = 0 for r1 in gp120: r1Code = str(protein_letters_3to1.get(r1.resname.title())) if (r1.get_id()[1] - 352) not in ruleresidues: continue for r2 in ccr5: k = distanceBetweenCOM(r1, r2) r2Code = str(protein_letters_3to1.get(r2.resname.title())) try: cutoff_estricto = distancias_estrictas["distances"][ r1Code + "_" + r2Code] cutoff_no_estricto = distancias_no_estrictas[ "distances"][r1Code + "_" + r2Code] except: cutoff_estricto = distancias_estrictas["distances"][ r2Code + "_" + r1Code] cutoff_no_estricto = distancias_no_estrictas[ "distances"][r2Code + "_" + r1Code] if k < cutoff_no_estricto: try: en = energia["energy"][r1Code + "_" + r2Code] except: en = energia["energy"][r2Code + "_" + r1Code] if (r1.get_id()[1] - 352) not in positions: positions[r1.get_id()[1] - 352] = [] positions[r1.get_id()[1] - 352].append(float(en)) total_energy += float(en) * ruleresidues[r1.get_id()[1] - 352] avg += total_energy return -avg / (N * 10)
def calculate_RMSD(self, row, source_position, fragment_length, aa_only=False): if self.args.source is None: setattr(row, "rmsd", -1) target_position = row.pos source_structure = self.__get_structure__(self.args.source) builder = PPBuilder() type1 = builder.build_peptides(source_structure, aa_only=aa_only) length1 = type1[-1][-1].get_full_id()[3][1] fixed_residues = [] for pp in type1: fixed_residues += [x for x in pp] fixed = [atom['CA'] for atom in fixed_residues ][source_position:source_position + fragment_length] builder = PPBuilder() target_file = self.get_target_file(row.protein_id) if target_file is None: setattr(row, "rmsd", -1) return target_structure = self.__get_structure__(target_file) type2 = builder.build_peptides(target_structure, aa_only=aa_only) length2 = type2[-1][-1].get_full_id()[3][1] moving_residues = [] for pp in type2: moving_residues += [x for x in pp] moving = [atom['CA'] for atom in moving_residues ][target_position:target_position + fragment_length] lengths = [length1, length2] smallest = min(int(item) for item in lengths) # find RMSD if len(fixed) != len(moving): setattr(row, "rmsd", -1) return sup = Bio.PDB.Superimposer() sup.set_atoms(fixed, moving) sup.apply(target_structure[0].get_atoms()) RMSD = round(sup.rms, 4) setattr(row, "rmsd", RMSD)
def test_add_residue(): structure = PeptideBuilder.initialize_res("A") for aa in "CDEFGHIKLMNPQRSTVWY": structure = PeptideBuilder.add_residue(structure, aa) # extract peptide from structure and compare to expected ppb = PPBuilder() pp = next(iter(ppb.build_peptides(structure))) assert pp.get_sequence() == "ACDEFGHIKLMNPQRSTVWY" # now compare to saved reference structure assert compare_to_reference(structure, "extended.pdb")
def get_sequence(self, chain_id): ''' Input: self: Use Biopython.PDB structure which has been stored in an object variable chain_id: String (usually in ['A','B', 'C' ...]. The number of chains depends on the specific protein and the resulting structure) Return: Return the amino acid sequence (single-letter alphabet!) of a given chain (chain_id) in a Biopython.PDB structure as a string. ''' return PPBuilder().build_peptides(self.structure[0][chain_id])[0].get_sequence()