def __call__(self, code, pull_outcome): """Implement the output handler logic""" filename, outcome = pull_outcome if outcome == "Pull Successful": structure = BPDB.PDBParser().get_structure(code[0:4], filename) os.remove(filename) os.rmdir("/".join(filename.split("/")[:-1])) try: chain = BPDB.PPBuilder().build_peptides( structure[0][code[4].upper()])[0] except: try: chain = BPDB.PPBuilder().build_peptides( next(structure[0].get_chains()))[0] except: self.log.append(code + " >>> Pull Failed on Chain") return try: self.data[code[0:5]] = self.__compose__(chain) self.log.append(code + " >>> " + outcome) except: self.log.append(code + " >>> Pull Failed on a Feature") else: self.log.append(code + " >>> " + outcome) pass
def main(): parser = PDB.PDBParser() structure = parser.get_structure('working_pdb', '2eke_optimized.bak') total_length = 0 total_sequence = '' count = 0 ppb = PDB.PPBuilder() for pp in ppb.build_peptides(structure): total_length += len(pp.get_sequence()) total_sequence += pp.get_sequence() if count == 0: first_chain_length = total_length count += 1 chain_letters = '' residue_numbers = [] for chains in structure.get_chains(): chain_letters += chains.get_id() for chains in structure.get_residues(): residue_numbers.append(str(chains.get_id()[1])) for i in range(0, len(residue_numbers)): if int(residue_numbers[i]) > 156: mutant = total_sequence[i] + chain_letters[1] + residue_numbers[ i] + 'a' else: continue runFoldxSimpleMutator(mutant, ['2eke_optimized.bak'])
def get_phi_psi(structure): """ Calculate phi,psi dihedral angles and return lists. Uses the polypeptide class.""" # Create a list of polypeptide objects ppb = PDB.PPBuilder() pp_list = ppb.build_peptides(structure) # Get phi and psi angles phi_angles_list = [] psi_angles_list = [] # Iterate over polypeptide molecules for pp in pp_list: # Calculate phi and psi angles and unpack list and tuple Agg_phi = [] Agg_psi = [] for phi,psi in pp.get_phi_psi_list(): # put them in the lists Agg_phi.append(phi) Agg_psi.append(psi) phi_angles_list.append(Agg_phi) psi_angles_list.append(Agg_psi) return phi_angles_list, psi_angles_list
def chain_to_fasta(chain): """ Extracts the fasta sequence from a PDB file and returns a string containing the extracted sequence. """ ppb = pdb.PPBuilder() for pp in ppb.build_peptides(chain): return pp.get_sequence()
def get_seq(name): from Bio import PDB parser = PDB.PDBParser() struct = parser.get_structure(name, name) ppd = PDB.PPBuilder() peptides = ppd.build_peptides(struct) seq = ''.join([str(pep.get_sequence()) for pep in peptides]) return seq
def extract_pdb(path): """ Fonction qui extrait une sequence d'un PDB """ #utilise Biopython pour extraire une séquence d'un PDB structure = PDB.PDBParser().get_structure("test", path) peptide = PDB.PPBuilder().build_peptides(structure) for i, pep in enumerate(peptide): sequence = str(pep.get_sequence()) return sequence
def write_pdb_seq_to_file(pdb_file): '''IN: (path to) PDB-file with only one chain OUT: sequence of PDB-file (from actual structure, not header)''' struct = PDB.PDBParser().get_structure('current', pdb_file) assert len(list(struct.get_chains())) == 1, \ 'WARINING: There are more than one chains in structure %s. \ \n It will be excluded from analysis.' % struct.get_id() seq = '' for pp in PDB.PPBuilder().build_peptides(struct): seq += pp.get_sequence() return seq
def pdb2fasta(pdbfilename): # 将一个pdb文件转换为fasta序列 new_filename = pdbfilename.replace(".pdb", "") parser = PDB.PDBParser() structure = parser.get_structure(new_filename, pdbfilename) ppb = PDB.PPBuilder() for pp in ppb.build_peptides(structure): ppstring = pp.get_sequence() # print(new_filename, "转换序列为:", ppstring) return ppstring
def getSequencefromPDB(pdbfile, chain='C', index=0): """Get AA sequence from PDB""" parser = PDB.PDBParser(QUIET=True) struct = parser.get_structure(pdbfile, pdbfile) ppb = PDB.PPBuilder() model = struct[0] peptides = ppb.build_peptides(model[chain]) seq = '' for i, pep in enumerate(peptides): seq += str(pep.get_sequence()) return seq
def calc_ramachandran(file_name_list): """ Main calculation and plotting definition :param file_name_list: List of PDB files to plot :return: Nothing """ if RAMA_PREF_VALUES is None: global RAMA_PREF_VALUES RAMA_PREF_VALUES = _cache_RAMA_PREF_VALUES() # Read in the expected torsion angles normals = {} outliers = {} for key, val in RAMA_PREFERENCES.items(): normals[key] = {"x": [], "y": []} outliers[key] = {"x": [], "y": []} # Calculate the torsion angle of the inputs for inp in file_name_list: if not os.path.isfile(inp): continue structure = PDB.PDBParser().get_structure('input_structure', inp) for model in structure: for chain in model: polypeptides = PDB.PPBuilder().build_peptides(chain) for poly_index, poly in enumerate(polypeptides): phi_psi = poly.get_phi_psi_list() for res_index, residue in enumerate(poly): res_name = "{}".format(residue.resname) res_num = residue.id[1] phi, psi = phi_psi[res_index] if phi and psi: if str(poly[res_index + 1].resname) == "PRO": aa_type = "PRE-PRO" elif res_name == "PRO": aa_type = "PRO" elif res_name == "GLY": aa_type = "GLY" else: aa_type = "General" if RAMA_PREF_VALUES[aa_type][int(math.degrees(psi)) + 180][int(math.degrees(phi)) + 180] < \ RAMA_PREFERENCES[aa_type]["bounds"][1]: outliers[aa_type]["x"].append( math.degrees(phi)) outliers[aa_type]["y"].append( math.degrees(psi)) else: normals[aa_type]["x"].append(math.degrees(phi)) normals[aa_type]["y"].append(math.degrees(psi)) return normals, outliers
def get_chain_sequence(chain): """ This function, given a chain object, returns a string with the sequence of the polypeptide or the nucleotide sequence of the chain. """ sequence="" ppb = pdb.PPBuilder() for pp in ppb.build_peptides(chain): sequence=sequence+pp.get_sequence() if not sequence: for residue in chain.get_residues(): res= residue.get_resname()[2] if res in 'ATGCU': sequence=sequence+res return sequence
def get_seq_from_tar(self, tar): for i in tar: try: stream = tar.extractfile('model1.pdb') except Exception as err: warn(str(err)) stream = None else: if stream: #io.BufferedReader p = PDB.PDBParser().get_structure_from_stream( 'model', stream) b = PDB.PPBuilder() pp = b.build_peptides(p)[0] tar.close() return pp.get_sequence() else: if self.verbose: print('IO Stream is None from tarball.')
def SeqFromPDBCode(code): protein_pdb = DATADIR / code / (code + '_protein.pdb') pocket_pdb = DATADIR / code / (code + '_pocket.pdb') parser = PDB.PDBParser(QUIET=True) chain_id = None try: pocket = parser.get_structure(code, pocket_pdb) protein = parser.get_structure(code, protein_pdb) except: print('fail to read {}'.format(code)) return None ppb = PDB.PPBuilder() seqs = [] for chain in protein.get_chains(): seqs.extend([i.get_sequence() for i in ppb.build_peptides(chain)]) seq_str = ''.join([str(i) for i in seqs]) a = seqs[0].alphabet return Seq(seq_str, a)
def get_pdb_sequence(prefix): start_name = prefix + '.pdb' total_length = 0 total_sequence = '' count = 0 parser = PDB.PDBParser() structure = parser.get_structure('working_pdb', start_name) ppb = PDB.PPBuilder() for pp in ppb.build_peptides(structure): total_length += len(pp.get_sequence()) total_sequence += pp.get_sequence() if count == 0: first_chain_length = total_length count += 1 return (total_sequence, total_length, first_chain_length, structure)
def import_protein_structure(inputs, wt_protein_fasta_file): from Bio import PDB import gzip file_list = [] for subdir, dirs, files in os.walk(inputs): for file in files: if file.endswith('.pdb') or file.endswith('.pdb.gz'): file_list.append(os.path.join(subdir, file)) protein_sequences = {} for file in file_list: subdir = file.split('/')[-2] name = subdir + os.path.basename(file).split('.')[0] parser = PDB.PDBParser() if file.endswith('.gz'): pdb = gzip.open(file, 'r') else: pdb = open(file, 'r') io = PDB.PDBIO struct = parser.get_structure(name, pdb) ppb = PDB.PPBuilder() chains = [] for pp in ppb.build_peptides(struct): chains.append(list(pp.get_sequence())) if wt_protein_fasta_file: wt_seq_list = \ list(import_wt_protein_sequence(wt_protein_fasta_file)) final_design_seq = combine_chains(chains[0], chains[1], wt_seq_list) protein_sequences[name] = "".join(final_design_seq) else: if len(chains) > 1: print "Warning: Multiple chains found. Splitting \ sequence into ", len(chains), " DNA sequences for ordering." for index, chain in enumerate(chains): protein_sequences[name + "_chain_" + str(index)] = "".join(chain) return protein_sequences
def get_sequence( self, chain_id ): ''' Input: self: Use Biopython.PDB structure which has been stored in an object variable chain_id : String (usually in ['A','B', 'C' ...]. The number of chains depends on the specific protein and the resulting structure) Return: Return the amino acid sequence (single-letter alphabet!) of a given chain (chain_id) in a Biopython.PDB structure as a string. ''' for model in self.structure: for chain in model: if chain.get_id()==chain_id: print(dir(chain)) print(dir(model)) test=chain ppb=PDB.PPBuilder() for pp in ppb.build_peptides(test): sequence=pp.get_sequence() return sequence
def __get_length_and_resolution(self, file): """ Determine resolution, sequence and length of .pdb file. :param file: pdb file path. :return: pandas series with resolution, sequence and length and .pdb filename. """ parser = bp.PDBParser() ppb = bp.PPBuilder() structure = parser.get_structure( os.path.splitext(os.path.basename(file))[0], file) seq_len = 0 for pp in ppb.build_peptides( structure ): # Retrieve length by looping through each chain in the protein seq_len += len(pp.get_sequence()) # using a functions from PDBParser parser class to get the resolution and protein id from the pdb file return pd.Series( [structure.header['resolution'], seq_len, structure.id])
def SeqFromPDBCode(code): protein_pdb = DATADIR / code / (code + '_protein.pdb') pocket_pdb = DATADIR / code / (code + '_pocket.pdb') parser = PDB.PDBParser(QUIET=True) chain_id = None try: pocket = parser.get_structure(code, pocket_pdb) protein = parser.get_structure(code, protein_pdb) except: return None longest_chain = None for chain in pocket.get_chains(): if chain.id == ' ': continue if longest_chain is None or len(chain) > len(longest_chain): longest_chain = chain if longest_chain is None: return None ppb = PDB.PPBuilder() for chain in protein.get_chains(): if chain.id == longest_chain.id: seqs = [i.get_sequence() for i in ppb.build_peptides(chain)] seq_str = ''.join([str(i) for i in seqs]) a = seqs[0].alphabet return Seq(seq_str, a)
def plot_ramachandran(file): __pdb__=file """ The preferences were calculated from the following artice: Lovell et al. Structure validation by Calpha geometry: phi,psi and Cbeta deviation. 2003 DOI: 10.1002/prot.10286 """ # General variable for the background preferences rama_preferences = { "General": { "file": os.path.join('data',"rama500-general.data"), "cmap": colors.ListedColormap([]), "bounds": [0, 0.002, 0.02, 1], }, "GLY": { "file": os.path.join('data',"rama500-gly-sym.data"), "cmap": colors.ListedColormap([]), "bounds": [0, 0.002, 0.02, 1], }, "PRO": { "file": os.path.join('data',"rama500-pro.data"), "cmap": colors.ListedColormap(['#FFFFFF00', 'skyblue', 'deepskyblue']), "bounds": [0, 0.0005, 0.02, 1], }, "PRE-PRO": { "file": os.path.join('data',"rama500-prepro.data"), "cmap": colors.ListedColormap(['#FFFFFF', '#FFE8C5', '#FFCC7F']), "bounds": [0, 0.002, 0.02, 1], } } r_path = os.path.abspath(os.path.dirname(__file__))#* rama_pref_values = {} for key, val in rama_preferences.items(): rama_pref_values[key] = np.full((360, 360), 0, dtype=np.float64) with open(os.path.join(r_path, val["file"])) as fn: for line in fn: if not line.startswith("#"): # Preference file has values for every second position only rama_pref_values[key][int(float(line.split()[1])) + 180][int(float(line.split()[0])) + 180] = float( line.split()[2]) rama_pref_values[key][int(float(line.split()[1])) + 179][int(float(line.split()[0])) + 179] = float( line.split()[2]) rama_pref_values[key][int(float(line.split()[1])) + 179][int(float(line.split()[0])) + 180] = float( line.split()[2]) rama_pref_values[key][int(float(line.split()[1])) + 180][int(float(line.split()[0])) + 179] = float( line.split()[2]) normals = {} outliers = {} for key, val in rama_preferences.items(): normals[key] = {"x": [], "y": []} outliers[key] = {"x": [], "y": [],'Res':[]} # Calculate the torsion angle of the pdb file. structure = PDB.PDBParser().get_structure('input_structure', __pdb__)#pdb parsing biopython algorithm. for model in structure: for chain in model: polypeptides = PDB.PPBuilder().build_peptides(chain) for poly_index, poly in enumerate(polypeptides): phi_psi = poly.get_phi_psi_list() for res_index, residue in enumerate(poly): res_name = "{}".format(residue.resname) res_num = residue.id[1] phi, psi = phi_psi[res_index] if phi and psi: aa_type = "" if str(poly[res_index + 1].resname) == "General": aa_type = "PRE-PRO" elif res_name == "PRO": aa_type = "General" elif res_name == "GLY": aa_type = "PRE-PRO" else: aa_type = "PRO" bb_type = "General" cc_type = "PRE-PRO" dd_type = "PRO" if rama_pref_values[aa_type][int(math.degrees(psi)) + 180][int(math.degrees(phi)) + 180] < \ rama_preferences[aa_type]["bounds"][1] outliers[aa_type]["x"].append(math.degrees(phi)) outliers[aa_type]["y"].append(math.degrees(psi)) outliers[aa_type]['Res'].append(res_name+'_'+str(res_num)) else: normals[aa_type]["x"].append(math.degrees(phi)) normals[aa_type]["y"].append(math.degrees(psi)) # Generate the plots plt.figure(figsize=(10,10)) for idx, (key, val) in enumerate(sorted(rama_preferences.items(), key=lambda x: x[0].lower())): plt.imshow(rama_pref_values[key], cmap=rama_preferences[key]["cmap"], norm=colors.BoundaryNorm(rama_preferences[key]["bounds"], rama_preferences[key]["cmap"].N), extent=(-180, 180, 180, -180),alpha=0.7) #markers for different aminoacides residues i,e GLY,General,PRO,PRE-PRO. plt.scatter(normals[aa_type]["x"], normals[aa_type]["y"],color="k",s=[10],marker='o') plt.scatter(normals[bb_type]["x"], normals[bb_type]["y"],color="k",s=[35],marker='^') plt.scatter(normals[cc_type]["x"], normals[cc_type]["y"],color="k",s=[35],marker='x') plt.scatter(normals[dd_type]["x"], normals[dd_type]["y"],color="k",s=[25],marker='+') plt.scatter(outliers[key]["x"], outliers[key]["y"],color="red",s=[15],marker=',') for key in outliers: for i, name in enumerate (outliers[key]['Res']): plt.annotate(name, (outliers[key]["x"][i], outliers[key]["y"][i])) plt.xlim([-180, 180]) plt.ylim([-180, 180]) ax = plt.gca() ax.set_xlim(-180, 180) ax.set_ylim(-180, 180) ax.set_xticks([-180, -135, -90, -45, 0, 45, 90, 135, 180], minor=False)# For renamining the plot x, y vlues. ax.set_yticks([-180, -135, -90, -45, 0, 45, 90, 135, 180], minor=False) plt.plot([-180, 180], [0, 0], linewidth=1,color="k",alpha=0.2) plt.plot([0, 0], [-180, 180], linewidth=1,color="k",alpha=0.2) plt.xlabel(r'$\phi$',fontsize=14,color="k",alpha=1) plt.ylabel(r'$\psi$',fontsize=14,color="k",alpha=1) plt.grid(linestyle='--',color="k",alpha=0.4) plt.title('Ramachandran Plot',fontsize=15,color="k",alpha=1,) # for plotting tittle of plot . A = mpatches.Patch(color='deepskyblue',lw=15)#good metho B = mpatches.Patch(color='skyblue',lw=15) C = mpatches.Patch(color='#FFCC7F',lw=15) D = mpatches.Patch(color='#FFE8C5',lw=15) E = mlines.Line2D([], [], color='red', marker='s',linestyle='None', markersize=10) F = mlines.Line2D([], [], color='black', marker='o',linestyle='None', markersize=7,label=" ") G = mlines.Line2D([], [], color='black', marker='^',linestyle='None', markersize=7,label="General/Pre-Pro/Proline Allowed") H = mlines.Line2D([], [], color='black', marker='^',linestyle='None', markersize=7,label="General/Pre-Pro/Proline Favoured") I = mlines.Line2D([], [], color='black', marker='o',linestyle='None', markersize=7,label=" ") J = mlines.Line2D([], [], color='black', marker='x',linestyle='None', markersize=7,label=" ") k = mlines.Line2D([], [], color='black', marker='x',linestyle='None', markersize=7) L = mlines.Line2D([], [], color='red', marker='',linestyle='None', markersize=7,label=" ") M = mlines.Line2D([], [], color='black', marker='',linestyle='None', markersize=7,label="Glycien Favoured") N = mlines.Line2D([], [], color='black', marker='',linestyle='None', markersize=7,label="Glycien Allowed") o = mlines.Line2D([], [], color='black', marker='',linestyle='None', markersize=7,label="Outliers") plt.legend(frameon=False,handles=[A,B,C,D,E,F,I,J,k,L,H,G,M,N,o],loc='upper left', labelspacing=2,fontsize=10,ncol=3,columnspacing=-2.8,bbox_to_anchor=(0.01, -0.06)) #plt.savefig("asd.png", dpi=300) #Uncommet this line of you want so save the plot in a specific location plt.show()
def get_PDB_info(dir): """Extracts sequence, DSSP secondary structure, TMHMM secondary structure and contact information from PDB files in input directory""" #the three vectors you are required to fill. DSSP_vector, TMHMM_vector, oracle = [], [], [] print("There are", len(os.listdir(dir)), "PDB files to parse") #Assemble a machine learning dataset incrementally, for each PDB file in the directory for ind, PDB_file in enumerate(os.listdir(dir)): if ind % 10 == 0: print("Working on structure", ind) if (str(PDB_file) == ".DS_Store"): continue # if(str(PDB_file) == "2dco.pdb"): break #Step 1 : parse your PDB file with biopython to obtain a model object p = PDB.PDBParser() structure = p.get_structure(PDB_file[:-4].upper(), dir + "/" + PDB_file) model = structure[0] #TODO : extract a list of residues from your model object residues = extract_residues(model) print("file", PDB_file, len(residues)) # print("residue_size",len(residues)) # if(len(residues) > 500): continue #TODO : compute a distance matrix of size len(sequence)*len(sequence) with the distance between each residue matrix = compute_distance_matrix(residues) # print("here") #TODO : contact map should be a boolean numpy array of the same size as the distance matrix. #if two amino acids are within 5 angstroms of each other in 3D, but distant of at least 10 in sequence, the table should have True, else False. contact_map = removeConsecutives(matrix) has_contact = [ True if True in contact_map[residue] else False for residue in contact_map ] #TODO : contact info should return the proportion of residues that have an intramolecular contact in your object. contact_info = get_contact_numbers(contact_map) # print(contact_info,"contacts") # TODO : obtain the secondary structure prediction of the PDB model with DSSP dssp_info = get_dssp_info(PDB_file, model, dir) #TODO : obtain the sequence of the PDB file in some way of your choice. sequence = "" ppb = PDB.PPBuilder() for pp in ppb.build_peptides(structure): sequence += pp.get_sequence() dssp_ss = "" #ss stands for secondary structure dssp_seq = "" dssp_keys = sorted(dssp_info.keys()) for key in dssp_keys: curr_ss = dssp_info[key][2] dssp_ss += curr_ss dssp_seq += dssp_info[key][1] converted = convert_info(dssp_ss) # print(dssp_ss) #TODO : write the sequence to a fasta file to call TMHMM with it, or to use the webserver filename = write_fasta(sequence, PDB_file) #TODO : obtain secondary structure prediction for this FASTA file with TMHMM # run_tmhmm will now parse tmhmmm file # test_file = "6j20" tm_ss = run_tmhmm(filename, PDB_file) # if(len(sequence) != len(residues)): continue DSSP_vector, TMHMM_vector, oracle = generate_ML_dataset( sequence, converted, tm_ss, has_contact, DSSP_vector, TMHMM_vector, oracle) # DSSP_vector, TMHMM_vector, oracle = generate_ML_dataset(sequence,converted,has_contact,DSSP_vector, TMHMM_vector, oracle) return DSSP_vector, TMHMM_vector, oracle
def ramachandran(file_name_list): """ Main calculation and plotting definition :param file_name_list: List of PDB files to plot :return: Nothing """ # General variable for the background preferences rama_preferences = { "General": { "file": "data/pref_general.data", "cmap": colors.ListedColormap(['#FFFFFF', '#B3E8FF', '#7FD9FF']), "bounds": [0, 0.0005, 0.02, 1], }, "GLY": { "file": "data/pref_glycine.data", "cmap": colors.ListedColormap(['#FFFFFF', '#FFE8C5', '#FFCC7F']), "bounds": [0, 0.002, 0.02, 1], }, "PRO": { "file": "data/pref_proline.data", "cmap": colors.ListedColormap(['#FFFFFF', '#D0FFC5', '#7FFF8C']), "bounds": [0, 0.002, 0.02, 1], }, "PRE-PRO": { "file": "data/pref_preproline.data", "cmap": colors.ListedColormap(['#FFFFFF', '#B3E8FF', '#7FD9FF']), "bounds": [0, 0.002, 0.02, 1], } } # Read in the expected torsion angles __location__ = os.path.realpath(os.getcwd()) rama_pref_values = {} for key, val in rama_preferences.items(): rama_pref_values[key] = np.full((360, 360), 0, dtype=np.float64) with open(os.path.join(__location__, val["file"])) as fn: for line in fn: if not line.startswith("#"): # Preference file has values for every second position only rama_pref_values[key][int(float(line.split()[1])) + 180][int(float(line.split()[0])) + 180] = float(line.split()[2]) rama_pref_values[key][int(float(line.split()[1])) + 179][int(float(line.split()[0])) + 179] = float(line.split()[2]) rama_pref_values[key][int(float(line.split()[1])) + 179][int(float(line.split()[0])) + 180] = float(line.split()[2]) rama_pref_values[key][int(float(line.split()[1])) + 180][int(float(line.split()[0])) + 179] = float(line.split()[2]) normals = {} outliers = {} for key, val in rama_preferences.items(): normals[key] = {"x": [], "y": []} outliers[key] = {"x": [], "y": []} # Calculate the torsion angle of the inputs for inp in file_name_list: if not os.path.isfile(inp): print("{} not found!".format(inp)) continue structure = PDB.PDBParser().get_structure('input_structure', inp) for model in structure: for chain in model: polypeptides = PDB.PPBuilder().build_peptides(chain) for poly_index, poly in enumerate(polypeptides): phi_psi = poly.get_phi_psi_list() for res_index, residue in enumerate(poly): res_name = "{}".format(residue.resname) res_num = residue.id[1] phi, psi = phi_psi[res_index] if phi and psi: if str(poly[res_index + 1].resname) == "PRO": aa_type = "PRE-PRO" elif res_name == "PRO": aa_type = "PRO" elif res_name == "GLY": aa_type = "GLY" else: aa_type = "General" if rama_pref_values[aa_type][ int(math.degrees(psi)) + 180][int(math.degrees(phi)) + 180] < rama_preferences[aa_type][ "bounds"][1]: print("{} {} {} {}{} is an outlier".format( inp, model, chain, res_name, res_num)) outliers[aa_type]["x"].append( math.degrees(phi)) outliers[aa_type]["y"].append( math.degrees(psi)) else: normals[aa_type]["x"].append(math.degrees(phi)) normals[aa_type]["y"].append(math.degrees(psi)) # Generate the plots for idx, (key, val) in enumerate( sorted(rama_preferences.items(), key=lambda x: x[0].lower())): plt.subplot(2, 2, idx + 1) plt.title(key) plt.imshow(rama_pref_values[key], cmap=rama_preferences[key]["cmap"], norm=colors.BoundaryNorm(rama_preferences[key]["bounds"], rama_preferences[key]["cmap"].N), extent=(-180, 180, 180, -180)) plt.scatter(normals[key]["x"], normals[key]["y"]) plt.scatter(outliers[key]["x"], outliers[key]["y"], color="red") plt.xlim([-180, 180]) plt.ylim([-180, 180]) plt.plot([-180, 180], [0, 0], color="black") plt.plot([0, 0], [-180, 180], color="black") plt.locator_params(axis='x', nbins=7) plt.xlabel(r'$\phi$') plt.ylabel(r'$\psi$') plt.grid() plt.tight_layout() plt.savefig("{0}.png".format( file_name_list[0][:int(len(file_name_list) - 4)]), dpi=300) plt.show()
def prepare_top_dihedrals(top): from Bio import PDB import math structure = PDB.PDBParser().get_structure('input_structure', top) phi_gen = [] psi_gen = [] phi_pre = [] psi_pre = [] phi_pro = [] psi_pro = [] phi_gly = [] psi_gly = [] for model in structure: for chain in model: polypeptides = PDB.PPBuilder().build_peptides(chain) for poly_index, poly in enumerate(polypeptides): phi_psi = poly.get_phi_psi_list() for res_index, residue in enumerate(poly): res_name = "{}".format(residue.resname) phi, psi = phi_psi[res_index] if phi and psi: if str(poly[res_index + 1].resname) == "PRO": phi_pre.append(math.degrees(phi)) psi_pre.append(math.degrees(psi)) elif res_name == "PRO": phi_pro.append(math.degrees(phi)) psi_pro.append(math.degrees(psi)) elif res_name == "GLY": phi_gly.append(math.degrees(phi)) psi_gly.append(math.degrees(psi)) else: phi_gen.append(math.degrees(phi)) psi_gen.append(math.degrees(psi)) return phi_gen, psi_gen, phi_pro, psi_pro, phi_gly, psi_gly, phi_pre, psi_pre #Previous attempt but for some reason MDanalysis calculates torsion angles wrong when the .pdb file #is too long or maybe if a chain is discontinous. I have not seen a pattern in this. #I switched to the dihedral method that was used in PYRAMA which seems to be a better solution. #I still wanted to leave this here in case I can fix this, which means that there's one import less ''' from MDAnalysis.analysis.dihedrals import Ramachandran r_general = u.select_atoms("backbone and segid B and resname VAL PHE ALA LYS ARG CYS GLU LEU MET HIS TYR TRP SER ASN GLN THR ASP ILE") r_pro = u.select_atoms("resname PRO") r_gly = u.select_atoms("resname GLY") R_general = Ramachandran(r_general).run() R_pro = Ramachandran(r_pro).run() R_gly = Ramachandran(r_gly).run() for atom in u.select_atoms("backbone"): print(atom) phi_general = [] psi_general = [] for line in list(R_general.angles): for entry in line: splitted_entry = entry.tolist() phi_general.append(splitted_entry[0]) psi_general.append(splitted_entry[1]) phi_pro = [] psi_pro = [] for line in list(R_pro.angles): for entry in line: splitted_entry = entry.tolist() phi_pro.append(splitted_entry[0]) psi_pro.append(splitted_entry[1]) phi_gly = [] psi_gly = [] for line in list(R_gly.angles): for entry in line: splitted_entry = entry.tolist() phi_gly.append(splitted_entry[0]) psi_gly.append(splitted_entry[1]) ''' '''
from .IonComplex import IonComplex from ..PolyIon import Peptide from ..Ion import fixed_state import tempfile from string import ascii_uppercase from Bio import PDB lister = PDB.PDBList(obsolete_pdb='override') parser = PDB.PDBParser() builder = PDB.PPBuilder() @fixed_state class Protein(IonComplex): """Protein represents an ion composed of a complex of peptides. :param name: Name of the protein. :param ids: Names of the peptide members. :param sequences: Sequences of the peptide members. :param members: An iterable of the peptide members. If members and sequences are not provided, the name will be searched in the Protein DataBase (PDB). If a protein of the same name is available, the sequences of the peptides will be gathered from the PDB. """ _state = { 'name': 'Protein name.', 'members': 'Name of the peptide members.' }
data = [] #initializes a list called data for row in datareader: data.append(row) #adds an element to data for each row in structures.csv #parses csv data using PDB_info class pdb_info = [PDB_info(item) for item in data] for i in range(1, len(pdb_info)): #assigns variable names to pdb_info elements pdb_name = pdb_info[i].id #saves given pdb name as a variable protein_name = pdb_info[i].protein #saves given protein name as a variable complete = pdb_info[i].complete #saves yes or no for complete structure_conf = pdb_info[ i].conformation #saves active or inactive for conformation mutation = pdb_info[i].mutation ppb = pdb.PPBuilder() #peptide class to get sequence last = 10000 #gives location of the pdb file pdb_file = './PDBs/' + pdb_name + '.pdb' parser = pdb.PDBParser() struct = parser.get_structure("name", pdb_file) #read in pdb file using PDBParser #gets name of the structure file if structure_conf == 'active': if complete == 'yes': structure_file = './actives/complete/' + protein_name + '_active.pdb' else: structure_file = './actives/incomplete/' + protein_name + '_active.pdb' else:
from Bio import PDB parser = PDB.PDBParser() structure = parser.get_structure('2FH7', '2FH7.pdb') ppb = PDB.PPBuilder() for pp in ppb.build_peptides(structure): print(pp.get_sequence()) model = structure[0] for pp in ppb.build_peptides(model): print(pp.get_sequence())
def getAminoAcids(structure): ppb = PDB.PPBuilder() sequence = "" for pp in ppb.build_peptides(structure): sequence += str(pp.get_sequence()) return list(sequence)
normals = {} outliers = {} for key, val in rama_preferences.items(): normals[key] = {"x": [], "y": []} outliers[key] = {"x": [], "y": []} # Calculate the torsion angle of the inputs for inp in sys.argv[1:]: if not os.path.isfile(inp): print("{} not found!".format(inp)) continue structure = PDB.PDBParser().get_structure('input_structure', inp) for model in structure: for chain in model: polypeptides = PDB.PPBuilder().build_peptides(chain) for poly_index, poly in enumerate(polypeptides): phi_psi = poly.get_phi_psi_list() for res_index, residue in enumerate(poly): res_name = "{}".format(residue.resname) res_num = residue.id[1] phi, psi = phi_psi[res_index] if phi and psi: aa_type = "" if str(poly[res_index + 1].resname) == "PRO": aa_type = "PRE-PRO" elif res_name == "PRO": aa_type = "PRO" elif res_name == "GLY": aa_type = "GLY" else: