def run_dssp(self): pdb = PDB.PDBList() pdb.retrieve_pdb_file(self.pdb_code, pdir='./', file_format="pdb") p = PDB.PDBParser() f = 'pdb{}.ent'.format(self.pdb_code.lower()) wt_residues = [ i for i in Residue.objects.filter( protein_conformation__protein=self.protein).exclude( protein_segment__slug__in=['N-term', 'C-term']) ] gn_residues = [ i.sequence_number for i in wt_residues if i.generic_number and i.protein_segment.slug not in ['ECL1', 'ECL2', 'ICL3', 'ECL3'] ] structure = p.get_structure(self.pdb_code, f) for chain in structure[0]: ch = chain.get_id() self.chains.append(ch) self.dssp_dict[ch] = OrderedDict() self.dssp_info[ch] = OrderedDict([('H', 0), ('B', 0), ('E', 0), ('G', 0), ('I', 0), ('T', 0), ('S', 0), ('-', 0)]) if len(self.dssp_dict) > 1: dssp = PDB.DSSP(structure[0], f, dssp='/env/bin/dssp') for key in dssp.keys(): if int(key[1][1]) in gn_residues: self.dssp_dict[key[0]][key[1][1]] = dssp[key] self.dssp_info[key[0]][dssp[key][2]] = self.dssp_info[ key[0]][dssp[key][2]] + 1 os.remove(f)
def align_structs(id1, chain1, id2, chain2): """ the main function. gets the ids and the chain's names and finds the alignment with the best RMSD. prints the best RMSD, and saving the alignments file in cif format :param id1: the first file id :param chain1: the first protein's chain :param id2: the second file id :param chain2: the second protein's chain """ # generating the relevant data lst = pdb.PDBList() protein1 = lst.retrieve_pdb_file(id1) protein2 = lst.retrieve_pdb_file(id2) parser = pdb.MMCIFParser() struct1 = parser.get_structure("p1", protein1) struct2 = parser.get_structure("p2", protein2) # creating a lists of CA atoms to align atoms1 = create_atoms_list(struct1, chain1) atoms2 = create_atoms_list(struct2, chain2) if len(atoms1) != len(atoms2): atoms1, atoms2 = bonus_9_2(chain1, chain2, struct1, struct2) # making the align super_imposer = pdb.Superimposer() super_imposer.set_atoms(atoms1, atoms2) super_imposer.apply(struct2[0].get_atoms()) print(super_imposer.rms) # saving the aligned structure to files saving_file(id1, struct1) saving_file(id2, struct2)
def download_pdb(pdb): """ Telecharge des PDB depuis le net """ if not (os.path.isfile("archive_pdb/" + pdb + ".pdb")): PDB.PDBList().retrieve_pdb_file(pdb, pdir="archive_pdb") os.system("cd archive_pdb \n mv pdb" + pdb.lower() + ".ent " + pdb + ".pdb")
def download_pdblist(pdb_dir, list_file): ''' Uses Biopython to download a list of PDBs into specified directory. (Option 3) Called by: AccessPDB.py:main() ''' pdb_codes = read_list_file(list_file) pdbl = bpp.PDBList() pdbl.download_pdb_files(pdb_codes, pdir=pdb_dir, file_format='pdb')
def download_nr(pdb_dir, cutoff): ''' Downloads representative PDB files into specified directory. (Option 1) Called by: AccessPDB.py:main() ''' pdb_codes = get_nr_list(cutoff) pdbl = bpp.PDBList() pdbl.download_pdb_files(pdb_codes, pdir=pdb_dir, file_format='pdb')
def download_cifs(downld_dir, pdbid): print('Downloading MMCIFs...') if not os.path.isfile(downld_dir + "/" + str(pdbid).lower() + ".cif.gz"): #Download if file does not exist pdb1 = PDB.PDBList() pdb1.retrieve_pdb_file(pdbid, pdir=downld_dir) if os.path.isfile(downld_dir + "/" + str(pdbid).lower() + ".cif"): #gzip files cmd = ("gzip -f " + downld_dir + "/" + str(pdbid).lower() + ".cif") subprocess.call(cmd, shell=True)
def download(self, pdbIDList, overrideAll=False): """ Download PDB files from ftp://ftp.wwpdb.org :param pdbIDList: list of structures to download :param overrideAll: Overrides all already downloaded files if set to True :return: None """ pdbList = PDB.PDBList() failedDownloadsLogFile = self.downloadDirectory + "/failedLog" if not os.path.exists(self.downloadDirectory): os.makedirs(self.downloadDirectory) if not overrideAll and os.path.exists(failedDownloadsLogFile): failedLog = open(failedDownloadsLogFile, 'r') try: self.downloadFailed = self.downloadFailed.union( json.load(failedLog)) except: print "no FailLog detected" pass failedLog.close() for pdbID in set(pdbIDList) - self.downloadFailed: if not overrideAll: if os.path.exists(self._getNameFromPdbID(pdbID)): self.downloadSkipped.append(pdbID) continue try: dl_name = pdbList.retrieve_pdb_file( pdbID, pdir=self.downloadDirectory) os.rename(dl_name, self.downloadDirectory + "/" + pdbID + ".pdb") self.downloadSuccessful.append(pdbID) print "Downloaded", pdbID except IOError: #self.downloadFailed.add(pdbID) print "Failed to download", pdbID # TODO: napisat normalny manager try: failedLog = open(failedDownloadsLogFile, 'w') failedLog.write(json.dumps(list(self.downloadFailed))) except: pass
def checkAndGetPDB(path): """ Query a given pdb code from the PDB. :param path: The PDB code of the molecule to query. :type path: str :return: Path to the checked pdb file. """ if path is None: raise IOError("The parameter 'path' must be a str.") if not isinstance(path, str): raise IOError("The parameter 'path' must be a str.") # Setup paths and filenames. pdb_target_name = os.path.basename(path).split('.pdb')[0] pdb_target_dir = os.path.dirname(path) if not os.path.isfile(path): # Query from pdb.org pdb_list = PDB.PDBList() if pdb_target_dir == '': pdb_target_dir = '.' try: print( "PDB file %s could not be found. Attempting to query from protein database server." % (path)) download_target = pdb_list.retrieve_pdb_file(pdb_target_name, pdir=pdb_target_dir, file_format='pdb') if download_target in os.listdir(pdb_target_dir): print("Successfully downloaded structure from PDB.") except: raise # Move and rename the downloaded file. shutil.move(download_target, path) # Cleanup. shutil.rmtree('obsolete') return path
def get(PDB_id, directory=None, format='pdb'): """ gets a structure from the PDB using its PDB id :param PDB_id: PDB id code :type PDB_id: str :param format: File format * "mmCif" (PDBx/mmCif file), * "pdb" (default, format PDB), * "xml" (PDBML/XML format), * "mmtf" (highly compressed), * "bundle" (PDB formatted archive for large structure} :param directory: directory in which to save pdb file :return: structure """ pdb = PDB.PDBList() if directory is not None: pdb.retrieve_pdb_file(PDB_id, pdir=directory, file_format=format, overwrite=True) else: pdb.retrieve_pdb_file(PDB_id, file_format=format, overwrite=True)
def get_pdb_files2(id_list, job="J0"): """ Returns a dictionary containing {result[id]: 'filename'}. """ result = [] pdbl = bpdb.PDBList() for i, pid in enumerate(id_list, 1): filename = pdbl.retrieve_pdb_file(pid, pdir="P%05d" % i, file_format="pdb") if isfile(filename): result += [{"name": pid, "folder": "P%05d" % i}] else: logging.warning("No structure found in PDB for the ID '" + pid + "'") try: rmdir(pid) except: logging.error("Couldn't delete directory for " + str(pid)) return file_list
def from_pdb_code_to_structure(code): """ From a specific pdb code this function retrieves the structure file from the web database and generated a structure instance. """ if pdb_code_check(code): pdbl = pdb.PDBList() parser = pdb.PDBParser(QUIET=True) try: structure = parser.get_structure(code, pdbl.retrieve_pdb_file(code, \ pdir="pdbfiles/")) except urllib.error.URLError: sys.stderr.write("There is no a structure with the pdb code {} \ in the database\n".format(code)) if nmr_check(structure): return structure else: return ValueError else: return ValueError
def PointCloudData(pdbid, chainid): """ Get C-alpha coordinates for the given pdbid and chainid along with the temperature factors and residue names. """ pc = [] bf = [] resnames = [] if not os.path.exists(os.getcwd() + '/' + filename): pdbl = PDB.PDBList() pdbl.retrieve_pdb_file(pdbid, False, os.getcwd(), 'pdb', True) parser = PDB.PDBParser(PERMISSIVE=1) structure = parser.get_structure(pdbid, 'pdb' + pdbid + '.ent') model = structure[0] chain = model[chainid] for residue in chain: for atom in residue: if atom.get_id() == "CA": resnames.append(residue.get_resname()) bf.append(atom.get_bfactor()) pc.append(atom.get_coord()) pointcloud = np.asarray(pc) return pointcloud, bf, resnames
def _retrieve_chain(pdb_code_input, model_id=0): import Bio.PDB as pdb import Bio.Seq import tempfile import os import StringIO import urllib2 import shutil import simtk.openmm.app as app pdb_code, chain_code = pdb_code_input.split("_") temp_dir = tempfile.mkdtemp() os.chdir(temp_dir) pdb_fetcher = pdb.PDBList() pdb_filepath = pdb_fetcher.retrieve_pdb_file(pdb_code) parser = pdb.PDBParser() structure = parser.get_structure(pdb_code, pdb_filepath) chain_result = structure[model_id][chain_code] outval = StringIO.StringIO() io = pdb.PDBIO() io.set_structure(chain_result) io.save(outval) outval.seek(0) shutil.rmtree(temp_dir) return outval
def test_chemical_composition(protein_id): """Test crystals.PDBParser returns the same chemical composition as BIO.PDB.PDBParser implementation, i.e. the same elements in the right proportions.""" pdb_list = biopdb.PDBList(verbose=False, obsolete_pdb=gettempdir()) biopdb_parser = biopdb.PDBParser() with catch_warnings(): filterwarnings("ignore", category=biopdb.PDBExceptions.PDBConstructionWarning) with tempfile.TemporaryDirectory() as temp_dir: with PDBParser(protein_id, download_dir=temp_dir) as parser: fname = pdb_list.retrieve_pdb_file(protein_id, pdir=temp_dir, file_format="pdb") # Note: Bio.PDB atoms store element as uppercase strings. Thus, they must be changed to titlecase crystals_chemical_composition = Counter( [atm.element for atm in parser.atoms()]) biopdb_chemical_composition = Counter([ atm.element.title() for atm in biopdb_parser.get_structure( protein_id, fname).get_atoms() ]) assert biopdb_chemical_composition == crystals_chemical_composition
def setUp(self): self.pdb_list = biopdb.PDBList(verbose=False, obsolete_pdb=gettempdir()) self.biopdb_parser = biopdb.PDBParser()
'a PDB file')) if mdl.pdb_code_check(options.code): pdb_id = options.code else: raise ValueError('Input code is not a PDB code') if options.infile: pdbfile = options.infile else: pdbfile = 'pdb' + pdb_id + '.ent' pdbalignedfile = pdb_id + 'align.pdb' pdb_superimp = pathname + pdb_id + 'superimp.pdb' if not os.path.exists(pathname + pdbfile): pdbobj = pdb.PDBList() pdbobj.retrieve_pdb_file(pdb_id, pdir=pathname) if not (pdbfile.endswith('pdb') or pdbfile.endswith('ent')): raise ValueError(('Your input file is not a valid PDB file, please use a ' 'pdb or ent file')) atom_list = [] if options.atom == 'CA': atom_list = ['CA'] elif options.atom == 'Back': atom_list = ['N', 'CA', 'C', 'O'] if options.verb: print("Initializing analysis information")
def new_xtals(self, uniprot): ''' List GPCR crystal structures missing from GPCRdb and the yaml files. Adds missing structures to DB. ''' structs = self.pdb_request_by_uniprot(uniprot) try: protein = Protein.objects.get(accession=uniprot) except: protein = None try: x50s = Residue.objects.filter(protein_conformation__protein=protein,generic_number__label__in=['1x50','2x50','3x50','4x50','5x50','6x50','7x50']) except: x50s = None if structs!=['null']: for s in structs: missing_from_db, missing_yaml = False, False try: st_obj = Structure.objects.get(pdb_code__index=s) except: if s not in self.exceptions: check = self.pdb_request_by_pdb(s) if check==1: self.db_list.append(s) missing_from_db = True if s not in self.yamls and s not in self.exceptions: if s not in self.db_list: check = self.pdb_request_by_pdb(s) else: check = 1 if check==1: self.yaml_list.append(s) missing_yaml = True if not missing_from_db: continue try: pdb_data_dict = fetch_pdb_info(s, protein, new_xtal=True) exp_method = pdb_data_dict['experimental_method'] if exp_method=='Electron Microscopy': st_type = StructureType.objects.get(slug='electron-microscopy') elif exp_method=='X-ray diffraction': st_type = StructureType.objects.get(slug='x-ray-diffraction') if 'deletions' in pdb_data_dict: for d in pdb_data_dict['deletions']: presentx50s = [] for x in x50s: if not d['start']<x.sequence_number<d['end']: presentx50s.append(x) # Filter out ones without all 7 x50 positions present in the xtal if len(presentx50s)!=7: try: del self.db_list[self.db_list.index(s)] missing_from_db = False del self.yaml_list[self.yaml_list.index(s)] except: pass else: print('Warning: no deletions in pdb info, check {}'.format(s)) continue if missing_from_db: pref_chain = '' resolution = pdb_data_dict['resolution'] pdb_code, created = WebLink.objects.get_or_create(index=s, web_resource=WebResource.objects.get(slug='pdb')) pdbl = PDB.PDBList() pdbl.retrieve_pdb_file(s, pdir='./', file_format="pdb") with open('./pdb{}.ent'.format(s).lower(),'r') as f: lines = f.readlines() pdb_file = '' publication_date, pubmed, doi = '','','' state = ProteinState.objects.get(slug='inactive') new_prot, created = Protein.objects.get_or_create(entry_name=s.lower(), accession=None, name=s.lower(), sequence=pdb_data_dict['wt_seq'], family=protein.family, parent=protein, residue_numbering_scheme=protein.residue_numbering_scheme, sequence_type=ProteinSequenceType.objects.get(slug='mod'), source=ProteinSource.objects.get(name='OTHER'), species=protein.species) new_prot_conf, created = ProteinConformation.objects.get_or_create(protein=new_prot, state=state, template_structure=None) for line in lines: if line.startswith('REVDAT 1'): publication_date = line[13:22] if line.startswith('JRNL PMID'): pubmed = line[19:].strip() if line.startswith('JRNL DOI'): doi = line[19:].strip() pdb_file+=line pdb_data, created = PdbData.objects.get_or_create(pdb=pdb_file) d = datetime.strptime(publication_date,'%d-%b-%y') publication_date = d.strftime('%Y-%m-%d') try: if doi!='': try: publication = Publication.objects.get(web_link__index=doi) except Publication.DoesNotExist as e: p = Publication() try: p.web_link = WebLink.objects.get(index=doi, web_resource__slug='doi') except WebLink.DoesNotExist: wl = WebLink.objects.create(index=doi, web_resource = WebResource.objects.get(slug='doi')) p.web_link = wl p.update_from_doi(doi=doi) p.save() publication = p elif pubmed!='': try: publication = Publication.objects.get(web_link__index=pubmed) except Publication.DoesNotExist as e: p = Publication() try: p.web_link = WebLink.objects.get(index=pubmed, web_resource__slug='pubmed') except WebLink.DoesNotExist: wl = WebLink.objects.create(index=pubmed, web_resource = WebResource.objects.get(slug='pubmed')) p.web_link = wl p.update_from_pubmed_data(index=pubmed) p.save() publication = p except: pass pcs = PdbChainSelector(s, protein) pcs.run_dssp() preferred_chain = pcs.select_chain() # Run state identification # Create yaml files with open('../../data/protwis/gpcr/structure_data/constructs/{}.yaml'.format(pdb_code.index), 'w') as construct_file: yaml.dump({'name': pdb_code.index.lower(), 'protein': protein.entry_name}, construct_file, indent=4) with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'w') as structure_file: struct_yaml_dict = {'construct': pdb_code.index.lower(), 'pdb': pdb_code.index, 'preferred_chain': preferred_chain, 'auxiliary_protein': '', 'ligand': {'name': 'None', 'pubchemId': 'None', 'title': 'None', 'role': '.nan', 'type': 'None'}, 'signaling_protein': 'None', 'state': 'Inactive'} auxiliary_proteins, ligands = [], [] if pdb_data_dict['ligands']!='None': for key, values in pdb_data_dict['ligands'].items(): if key in ['SO4','NA','CLR','OLA','OLB','OLC','TAR','NAG','EPE','BU1','ACM','GOL','PEG','PO4','TLA','BOG','CIT','PLM','BMA','MAN','MLI','PGE']: continue else: ligands.append({'name': key, 'pubchemId': 'None', 'title': pdb_data_dict['ligands'][key]['comp_name'], 'role': '.nan', 'type': 'None'}) for key, values in pdb_data_dict['auxiliary'].items(): if pdb_data_dict['auxiliary'][key]['subtype'] in ['Expression tag', 'Linker']: continue else: auxiliary_proteins.append(pdb_data_dict['auxiliary'][key]['subtype']) for key, values in pdb_data_dict['construct_sequences'].items(): if key!=protein.entry_name and key not in struct_yaml_dict['auxiliary_protein']: if 'arrestin' in key: struct_yaml_dict['signaling_protein'] = key if len(auxiliary_proteins)>1: struct_yaml_dict['auxiliary_protein'] = ', '.join(auxiliary_proteins) if len(ligands)>1: struct_yaml_dict['ligand'] = ligands yaml.dump(struct_yaml_dict, structure_file, indent=4, default_flow_style=False) # Build residue table for structure build_structure_command = shlex.split('/env/bin/python3 manage.py build_structures -f {}.yaml'.format(pdb_code.index)) subprocess.call(build_structure_command) # Check state struct = Structure.objects.get(pdb_code__index=pdb_code.index) pi = PdbStateIdentifier(struct) pi.run() if pi.state!=None: Structure.objects.filter(pdb_code__index=pdb_code.index).update(state=pi.state) print(pi.state, pi.activation_value) with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'r') as yf: struct_yaml = yaml.load(yf) struct_yaml['state'] = pi.state.name try: struct_yaml['distance'] = round(float(pi.activation_value), 2) except: struct_yaml['distance'] = None with open('../../data/protwis/gpcr/structure_data/structures/{}.yaml'.format(pdb_code.index), 'w') as struct_yaml_file: yaml.dump(struct_yaml, struct_yaml_file, indent=4, default_flow_style=False) # Check sodium pocket new_prot_conf.sodium_pocket() print('{} added to db (preferred_chain chain: {})'.format(s, preferred_chain)) except Exception as msg: print(s, msg)
def analysis(self): """ Basically it contains all the computations needed to perform the EDA. It contain the same steps used in the main CLI. It also generate the plots at the end. When the plots are generated move the software to the plot_window. """ pdb_id = self.controller.app_data["pdbid"] pathname = self.controller.app_data["pathname"] pdbfile = self.controller.app_data["pdbfilename"] atom = self.controller.app_data["atom"] mode = self.controller.app_data["mode"] sys.stderr.write("the selcted mode is: {} ".format(mode)) pdbalignedfile = str(pdb_id) + 'align.pdb' pdb_superimp = str(pathname) + str(pdb_id) + 'superimp.pdb' if not os.path.exists(str(pathname) + str(pdbfile)): pdbobj = pdb.PDBList() pdbobj.retrieve_pdb_file(pdb_id, pdir=str(pathname)) sys.stderr.write("The structure {} have been \ retrieved.\n".format(pdb_id)) atom_list = [] if atom == 'CA': atom_list = ['CA'] elif atom == 'Back': atom_list = ['N', 'CA', 'C', 'O'] if mode == 'MD': pdbref = pdb.PDBList() ref_file = pdbref.retrieve_pdb_file(pdb_id, pdir=pathname) parser = pdb.PDBParser(QUIET=True) reference = parser.get_structure(pdb_id + 'ref', ref_file) try: ED = eda.EDAnalysis(pdb_id, mode, atom_list, pathname + pdbfile, reference=reference) except (eda.WrongModeException, KeyError, ValueError): pass else: ED = eda.EDAnalysis(pdb_id, mode, atom_list, pathname + pdbfile) ED.superimpose_models() if mode == 'NMR': sys.stderr.write("Writting the superimposed file.\n") head = mdl.store_header_text(pathname + pdbfile) self.controller.app_data["head"] = head io = pdb.PDBIO() io.set_structure(ED.structure) io.save(pdb_superimp) mdl.merge_the_header(pdb_superimp, head, pathname + pdbalignedfile) os.remove(pdb_superimp) sys.stderr.write("Calculating means and coordinates\n") ED.createcordsarray() sys.stderr.write("Calculating covariance matrix\n") sys.stderr.write("Calculating eigenvalues and eigenvectors\n") ED.cal_cov() sys.stderr.write("Plotting eigenvalues\n") self.controller.app_data["ED"] = ED #pathplots = self.controller.app_data["pathplots"] n_plot = 30 if ED.n < n_plot: n_plot = ED.n pathplots = pathname + 'plots/' plot = ED.plot_eig_wosv(n_plot) self.controller.app_data["plot"] = plot RMSD_plot = ED.RMSD_res_plot(4, pathplots, origin='interface') self.controller.app_data["RMSD_plot"] = RMSD_plot self.controller.show_frame("plot_window")
def consumer(code, result_queue): try: result_queue.put((BPDB.PDBList().retrieve_pdb_file(pdb_code=code[0:4]), "Pull Successful")) except: result_queue.put(("", "Pull Failed at Download"))
import numpy as np import pandas as pd from Bio import PDB repository = PDB.PDBList() parser = PDB.PDBParser() repository.retrieve_pdb_file('1TUP', pdir='.', file_format='pdb') p53_1tup = parser.get_structure('P 53', 'pdb1tup.ent') my_residues = set() for residue in p53_1tup.get_residues(): my_residues.add(residue.id[0]) print(my_residues) def get_mass(atoms, accept_fun=lambda atom: atom.parent.id[0] != 'W'): return sum([atom.mass for atom in atoms if accept_fun(atom)]) chain_names = [chain.id for chain in p53_1tup.get_chains()] my_mass = np.ndarray((len(chain_names), 3)) for i, chain in enumerate(p53_1tup.get_chains()): my_mass[i, 0] = get_mass(chain.get_atoms()) my_mass[i, 1] = get_mass( chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] not in [' ', 'W']) my_mass[i, 2] = get_mass(chain.get_atoms(), accept_fun=lambda atom: atom.parent.id[0] == 'W') masses = pd.DataFrame(my_mass, index=chain_names, columns=['No Water', 'Zincs', 'Water'])
# Biopython contains function to load all files from list, yet there is no specified delay between loads and you can be # banned for loading numerous files without timeouts # # Script loads an entries.idx # Creates pdb_index file - clean file for dataframe # Creates directory with loaded pdb files # # Note: at this script, I`ve loaded files contained RNA in their title # To change subset of files you should change line with 'mask' variable assignment # Absolute or relative path for directory to store pdbs path = input("Enter path to directory to store files: ") os.makedirs(path, exist_ok=True) # Initialize loading class load_struct = PDB.PDBList() # Parse command to load pdb index file command = shlex.split( 'wget ftp://ftp.wwpdb.org/pub/pdb/derived_data/index/entries.idx') # Run command sp.check_output(command, universal_newlines=True) # Delete junk --lines in file, make header tab-separated as main part of file with open('entries.idx', 'r') as source, open('pdb_index', 'w') as target: for ind, line in enumerate(source): if ind == 0: line = line.replace(', ', '\t') if not line.startswith('-'): target.write(line)
from .IonComplex import IonComplex from ..PolyIon import Peptide from ..Ion import fixed_state import tempfile from string import ascii_uppercase from Bio import PDB lister = PDB.PDBList(obsolete_pdb='override') parser = PDB.PDBParser() builder = PDB.PPBuilder() @fixed_state class Protein(IonComplex): """Protein represents an ion composed of a complex of peptides. :param name: Name of the protein. :param ids: Names of the peptide members. :param sequences: Sequences of the peptide members. :param members: An iterable of the peptide members. If members and sequences are not provided, the name will be searched in the Protein DataBase (PDB). If a protein of the same name is available, the sequences of the peptides will be gathered from the PDB. """ _state = { 'name': 'Protein name.', 'members': 'Name of the peptide members.' }
def main(): """ Load entries.idx from pdb, parse it, load all pdb files contains 'RNA' in header. Perform base pair annotation of all rna pdb files in rna_ids file via rnaview :return: """ # Loading # Initialize loading class load_struct = PDB.PDBList() # Parse command to load pdb index file command = shlex.split( 'wget ftp://ftp.wwpdb.org/pub/pdb/derived_data/index/entries.idx') # Run command sp.check_output(command, universal_newlines=True) # Delete junk --lines in file, make header tab-separated as whole other file with open('entries.idx', 'r') as source, open('pdb_index', 'w') as target: for ind, line in enumerate(source): if ind == 0: line = line.replace(', ', '\t') if not line.startswith('-'): target.write(line) # Create dataframe from full file with pdb index with open('pdb_index', 'r') as source: data = pd.read_csv(source, sep='\t') print(data.shape, data.columns) # Checking data print(data.isnull().any(), data.shape) # Drop files with empty header data.dropna(subset=['HEADER'], inplace=True) print(data.shape) # Filter subset of data with RNA in header mask = data['HEADER'].str.contains('RNA') rna = data[mask] # Create list with PDB ids of files with RNA rna_ids = rna['IDCODE'].unique().tolist() rna_length = len(rna_ids) rna_length, rna.head(), rna_ids with open('rna_ids', 'w') as file: for entry in rna_ids: file.write('{}\n'.format(entry)) # Load 1 pdb file from RNA list, wait 30 seconds for ind, file in enumerate(rna_ids, 1): load_struct.retrieve_pdb_file( file, file_format='pdb', pdir='/home/arleg/PycharmProjects/Bioinformatics/RNA/pdb') print("{} is loaded, {} from {}".format(file, ind, rna_length)) time.sleep(30) # Base pair annotation of all rna pdb files in rna_ids file # Add rnaview environment variable to environment in this script (it should be done despite the record of path to # tool and environment variable in .bashrc) envir = os.environ envir['RNAVIEW'] = '/home/arleg/RNATools/RNAVIEW' # Call rnaview on all files listed in rna_ids # There is an option in rnaview to read all pdbs in one call to tool, yet it didn`t work in my script with open('rna_ids', 'r') as file: for line in file: line = line.strip().lower() # Parse command to terminal command = shlex.split( '/home/arleg/RNATools/RNAVIEW/bin/rnaview pdb/pdb{}.ent'. format(line)) # Run it sp.check_output(command, env=envir, universal_newlines=True)
def __gatherAtoms(self): # try: bfactorCount = 0 bfactorTotal = 0 if True: import Bio.PDB as bio self.hasPDB = True pdbCode = self.pdbCode.lower() print('PSU: load from BioPython', self.pdbCode) parser = bio.PDBParser() biodl = bio.PDBList() structure = None gotPdb = False try: #print('debug get pdb from',self.pdbDataPath + 'pdb' + pdbCode + '.ent') structure = parser.get_structure( pdbCode, self.pdbDataPath + 'pdb' + pdbCode + '.ent') gotPdb = True except: if '_ADJ' not in self.pdbDataPath: #never download the pdb to an adjusted directory import time print('!!! Downloading from pdb: ', self.pdbDataPath, pdbCode) biodl.download_pdb_files([pdbCode], pdir=self.pdbDataPath, file_format='pdb') time.sleep(1) try: structure = parser.get_structure( pdbCode, self.pdbDataPath + 'pdb' + pdbCode + '.ent') gotPdb = True except: import time time.sleep(10) structure = parser.get_structure( pdbCode, self.pdbDataPath + 'pdb' + pdbCode + '.ent') gotPdb = True if gotPdb: resolution = structure.header['resolution'] atomNo = 0 resnum = 1 for model in structure: for chain in model: for residue in chain: r = residue.get_resname() # print('Residue:', r) rid = residue.get_full_id()[3][1] chain = residue.get_full_id()[2] hetatm = residue.get_full_id()[3][0] ridx = resnum resnum = resnum + 1 #decision as to whether r is to be used. for density maps yes, for geoemtry no #print(residue.get_full_id()) #print(r,hetatm) if ( r in self.getAAList() and 'H' not in hetatm ) or self.useAll: # and r!='HOH'):# != 'HOH': # bio.is_aa(residue): for atom in residue: disordered = 'N' useAtom = True if atom.is_disordered(): disordered = 'Y' if self.keepDisordered: if atom.disordered_has_id("A"): atom.disordered_select("A") else: useAtom = False if not self.keepDisordered and useAtom: if atom.get_occupancy() < 1: useAtom = False print( 'debug not passed disordered', atom, atom.get_occupancy()) if useAtom: atomID = atom.get_full_id( )[0] + chain + str( rid) + atom.get_name() if atomID in self.badAtoms: #print(atomID) useAtom = False if useAtom: oneAtom = atm.GeoAtom() oneAtom.setStructureInfo( pdbCode, resolution) oneAtom.setResidueInfo( chain, rid, ridx, r) atomNo += 1 name = atom.get_name() occupant = atom.get_full_id()[4][1] if occupant == ' ': occupant = 'A' x = atom.get_vector()[0] y = atom.get_vector()[1] z = atom.get_vector()[2] bfactor = atom.get_bfactor() if name == 'CA': bfactorCount += 1 bfactorTotal += bfactor occupancy = atom.get_occupancy() oneAtom.setAtomInfo( r, name, atomNo, x, y, z, bfactor, occupant, occupancy, disordered) #if rid < 3: # print(oneAtom) # add density if we can if self.hasDensity: tFoFc, FoFc, Fo, Fc = self.geoDen.getDensityXYZ( x, y, z) oneAtom.setDensityInfo( tFoFc, FoFc, Fo, Fc) # print('Atom:',atomNo) if r in self.getAAList(): self.atoms.append(oneAtom) elif r == 'HOH': self.water.append(oneAtom) else: self.hetatms.append(oneAtom) if bfactorCount > 0: self.averageBfactor = bfactorTotal / bfactorCount # Now set the bFactorRatio for all atoms for atom in self.atoms: atom.values['bfactorRatio'] = atom.values[ 'bfactor'] / self.averageBfactor else: self.averageBfactor = 0 print('PSU: loaded successfully from BioPython', self.pdbCode) self.hasPDB = True else: print('!!! PSU: failed to load', self.pdbCode, 'from', self.pdbDataPath) self.hasPDB = False # except: # self.hasPDB = False return (self.hasPDB)
def analyse_pucker_from_pdbs(pdbinputfilename, ligandinputfilename=None, outputfile = "tessellate_report",output_format="json", output_dir=""): try: import Bio.PDB as bp import tessellate.utils.pucker as puc import numpy as np import json import tessellate.utils.helperfunctions as helperfunctions import tessellate.utils.getRing as getRing import os except Exception as e: print("Error - Cannot import module %s", e) exit(1) #. workaround stdout annoyance, biopdb sometimes uses print try: import sys from io import StringIO except Exception as e: print("Error - Cannot import module ", e) exit(1) all_pucker_json = helperfunctions.init_all_pucker_dictionary() all_macro_pucker_json = helperfunctions.init_all_pucker_dictionary() #PDBDATA = {} outputfile = ".".join([outputfile, output_format]) txt = False if output_format == "txt": txt = True outputfile = open(os.path.join(output_dir, outputfile), 'w') outputfile.write("tessellate "+__version__+" txt\n") outputfile.write( "PDBID RESNAME CHAIN RESID RINGATOMSORDER CONFORMER CONTEXTUAL_CONFORMER ANGULAR_PUCKER_COORDS ORIG_CONFORMER RING_SIZE\n") logger.critical("to be improved, does not match json output at present") nodejson = [] # . read in list of pdbs to read (format is one column of pdb ids pdblist = [] inputfile = open(pdbinputfilename, 'r') for line in inputfile: logger.debug('Read from pdbnames %s', line) pdblist.append(line.strip()) # . read in ligands with preferred ring ordering (format is name, ringsize, numrings , numrings sets of N atom names if ligandinputfilename is None: ligand_dict={'3DR': {'num': 1, 'ringids': [("C2'", "C3'", "C4'", "O4'", "C1'")], 'ringsize': 5}, 'AVU': {'num': 2, 'ringids': [("C2'", "C3'", "C4'", "O4'", "C1'"), ('C2R', 'C3R', 'C4R', 'O4R', 'C1R')], 'ringsize': 5}, 'NAG': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'CTR': {'num': 3, 'ringids': [('C3A', 'C4A', 'C5A', 'O5A', 'C1A', 'C2A'), ('C3B', 'C4B', 'C5B', 'O5B', 'C1B', 'C2B'), ('C3C', 'C4C', 'C5C', 'O5C', 'C1C', 'C2C')], 'ringsize': 6}, 'PSG': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'MAN': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'BMA': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'NDG': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O', 'C1', 'C2')], 'ringsize': 6}, 'BGP': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'G6P': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'ACR': {'num': 4, 'ringids': [('C3A', 'C4A', 'C5A', 'C7A', 'C1A', 'C2A'), ('C3B', 'C4B', 'C5B', 'O5B', 'C1B', 'C2B'), ('C3C', 'C4C', 'C5C', 'O5C', 'C1C', 'C2C'), ('C3D', 'C4D', 'C5D', 'O5D', 'C1D', 'C2D')], 'ringsize': 6}, 'LAK': {'num': 2, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2'), ("C3'", "C4'", "C5'", "O5'", "C1'", "C2'")], 'ringsize': 6}, 'GAL': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'BGC': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'NGA': {'num': 1, 'ringids': [('C3', 'C4', 'C5', 'O5', 'C1', 'C2')], 'ringsize': 6}, 'UPG': {'num': 1, 'ringids': [("C3'", "C4'", "C5'", "O5'", "C1'", "C2'")], 'ringsize': 6}, 'BBA': {'num': 1, 'ringids': [('C1', 'C2', 'C3', 'C', 'C4', 'C5', 'C6')], 'ringsize': 7}, 'H52': {'num': 1, 'ringids': [('N21', 'C22', 'C23', 'N24', 'C25', 'C26', 'C27')], 'ringsize': 7}, '0J0': {'num': 1, 'ringids': [('C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21')], 'ringsize': 7}, '13U': {'num': 1, 'ringids': [('C42', 'C43', 'C44', 'C45', 'C46', 'C47', 'C48', 'C49')], 'ringsize': 8}, 'PS9': {'num': 1, 'ringids': [('S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9')], 'ringsize': 8}} else: ligfile = open(ligandinputfilename, 'r') ligand_dict = {} for line in ligfile: chunked = line.split() id = chunked[0] ringsize = int(chunked[1]) numrings = int(chunked[2]) columns = int(3) # columns in addition to atoms if len(chunked) == (columns + int(numrings) * int(ringsize)): ligand_dict[id] = {} ligand_dict[id]["num"] = numrings ligand_dict[id]["ringids"] = [] ligand_dict[id]["ringsize"] = ringsize for i in range(0, numrings): templist = [] for j in range(0, ringsize): templist.append(chunked[columns + i * ringsize + int(j)]) ligand_dict[id]["ringids"].append(tuple(templist)) logger.debug('Creation of liganddict %s', ligand_dict[id]) else: logger.error('atoms ids relative to number rings does not match in ligands file %s %s %s %s', chunked, len(chunked), numrings, ringsize) logger.debug("Ligand Dict used: %s", ligand_dict) #. ignore these residues aminoacids = ["GLY", "ALA", "SER", "MET", "LYS", "GLU", "PRO", "ASP", "VAL", "PHE", "ASN", "ILE", "TRP", "CYS", "HIS", "LEU", "GLN", "ARG", "TYR", "THR"] water = ["HOH"] other = [] soup = aminoacids + water + other # . download pdbs and if already there will not download for pdbid in pdblist: logger.debug('Ids in downloads list %s', pdbid) pdbl = bp.PDBList() pdbl.retrieve_pdb_file(pdbid, file_format="pdb" , pdir=os.path.join(output_dir,"pdb")) # . parse pdbs for ligands, when found, calc pucker p = bp.PDBParser() # .. loop over all pdbs for pdbid in pdblist: pdbpath = os.path.join(output_dir,"pdb")+"/pdb" + pdbid.lower() + ".ent" # else: # pdbpath = os.path.join(upload_folder,pdbid) logger.debug('path to pdbids downloaded %s', pdbid) structure = p.get_structure(pdbid, pdbpath) # .. get all the residues res_list = bp.Selection.unfold_entities(structure, 'R') # .. loop over the residues for resi in res_list: SSSR = [] rname = resi.get_resname() # .. is this residue in the ligands list, calc pucker if rname in ligand_dict.keys(): logger.debug('Ligand %s appears in the ligand dict ', rname) # .. get atoms coords for nrings in range(0, ligand_dict[rname]["num"]): try: listallcoors = [] # .. some pdbs have missing atoms, for this case , check if atoms exist in PDB check_atoms = [] for i in resi.get_list(): check_atoms.append(i.get_name()) missingatomtest = False for atomindex in range(0, ligand_dict[rname]["ringsize"]): if ligand_dict[rname]["ringids"][nrings][atomindex] not in check_atoms: missingatomtest = True if missingatomtest: logger.debug('Missing atoms for %s', rname) pass # without notifying user ! else: for atomindex in range(0, ligand_dict[rname]["ringsize"]): listallcoors = listallcoors + list( resi[ligand_dict[rname]["ringids"][nrings][atomindex]].get_coord()) pobj = puc.Pucker(tuple(listallcoors)) if pobj and pobj.isvalid: try: thisframeTD = pobj.calculate_triangular_tessellation() logger.debug(thisframeTD) conformer = pobj.deduce_canonical_conformation() nextconformer = pobj.deduce_canonical_conformation(nextguess=True) pconf = pobj.contextualise_conformer(conformer[0],ligand_dict[rname]["ringids"][nrings]) node, log = createnodejson(pdbid, rname, resi.get_parent().get_id(), resi.get_id()[1], ligand_dict[rname]["ringids"][nrings],conformer[0], pconf, thisframeTD, nextconformer[0], pobj.ringsize, pobj.ttnum[conformer[0]]) logger.info(log) if txt: outputfile.write(log) if not nodejson == [] and node in nodejson: logger.debug('ENTRY EXISTS : %s %s', rname, resi.get_id()[1]) else: nodejson.append(node) except Exception as e: logger.error('In known ligs, Pucker object valid, but calc, classify etc. failed %s %s', str(pobj._coords), e) raise e else: logger.info("pobj is None or not valid in ligand ring find %s ", listallcoors) except Exception as e: logger.error("pdb file may be missing coordinates..") raise # not happy about how I raise this error but OK for now # .. Calculate SSSR regardless of whether I have this ligand in the dict or not and then calc pucker # .. this may make sense but will cause duplicates ...... if rname not in soup and len(resi.get_list()) > 5: atomlist = [] if resi.is_disordered(): for atom in resi.get_list(): # use get_list instead of unpacked list if atom.is_disordered(): list_of_disorder = atom.disordered_get_id_list() # just using the last one for now (whatever that is) selected = list_of_disorder[-1] if atom.disordered_has_id(selected): atom.disordered_select(selected) for i in resi.get_list(): atomlist.append([i.get_name(), i.get_coord()]) logger.debug("resi %s atomlist %s length %s", resi, atomlist, len(atomlist)) SSSR = getRing.create_graph_and_find_rings_suite(atomlist,mineuclid=1.0,maxeuclid=2.2) try: # hack to fix this in python3 - working on it for iidx in range(0,len(atomlist),12): logger.debug("individual items atomlist %s", atomlist[iidx:iidx+12]) SSSR_item = getRing.create_graph_and_find_rings_suite(atomlist[iidx:iidx+12],mineuclid=1.0,maxeuclid=2.2) logger.debug("individual items %s", SSSR_item) if SSSR_item in SSSR: pass else: SSSR.extend(SSSR_item) logger.debug("Extending SSSR %s", SSSR_item) except Exception as e: logger.error("Error in SSSR extend %s",e) logger.debug("SSSR rings %s", SSSR) if SSSR: for ring in SSSR: nring = SSSR.index(ring) logger.debug('FOUND %i rings in resi %s ', nring+1, rname) alpharing = getRing.getcommonring(ring) if alpharing is not None: ring = alpharing else: # try see if its common through the common dict for common in getRing.commonrings: if sorted(common) == sorted(ring): ring = common break listallcoors = [] for atomindex in ring: listallcoors = listallcoors + list(resi[atomindex].get_coord()) logger.debug("listallcoors %s", listallcoors) pobj = puc.Pucker(tuple(listallcoors)) if pobj and pobj.isvalid: try: thisframeTD = pobj.calculate_triangular_tessellation() conformer = pobj.deduce_canonical_conformation() pconf = pobj.contextualise_conformer(conformer[0], ring) nextconformer = pobj.deduce_canonical_conformation(nextguess=True) node, log = createnodejson(pdbid, rname, resi.get_parent().get_id(), resi.get_id()[1], ring, conformer[0], pconf, thisframeTD, nextconformer[0], pobj.ringsize, pobj.ttnum[conformer[0]]) logger.info(log) if txt: outputfile.write(log) if not nodejson == [] and node in nodejson: logger.debug('ENTRY EXISTS : %s %s', rname, resi.get_id()[1]) else: nodejson.append(node) except Exception as e: logger.error('In SSSR Pucker object valid, but calc, classify etc. failed %s %s', str(pobj._coords), e) raise e else: logger.info("pobj is None or not valid in SSSR ring find %s %s %s %s", listallcoors,rname,resi.get_id()[1], ring) else: logger.debug('%s has no rings', rname) #. get all rings in this resi, get com macroatomlist = [] if SSSR: logger.debug("SSSR possible macro list %s %s",pdbid,SSSR) for aring in SSSR: ringcoords=[] for ringatom in aring: for itm in resi.get_list(): if itm.get_name() == ringatom: ringcoords.append(itm.get_coord()) #print aring, np.array(np.add.reduce(ringcoords)/len(ringcoords)), "\n" if len(ringcoords)<9 and len(ringcoords)>4: # ignore too large or too small cycles #print len(ringcoords) macroatomlist.append(["".join(aring), np.array(np.add.reduce(ringcoords)/len(ringcoords))]) #macroatomlist.append(["".join(aring), np.array(np.add.reduce(ringcoords))]) logger.debug("resi %s rings %s macroatomlist %s", resi, aring, macroatomlist) if len(macroatomlist) > 4: # need at least five cycles to calculate macropucker #print "macroatom ", macroatomlist #import itertools #for a, b in itertools.combinations(macroatomlist, 2): # work out euclidean distance and choose to call this an edge if mineuclid<dist<maxeuclid #print a, b, np.linalg.norm(a[1] - b[1]) logger.debug("Macroatoms list %s %s",pdbid, macroatomlist) macroSSSR = getRing.create_graph_and_find_rings_suite(macroatomlist,maxeuclid=8.0) logger.debug(macroSSSR) if macroSSSR: #print "mS ", macroSSSR for ring in macroSSSR: nring = macroSSSR.index(ring) logger.debug('FOUND %i macro rings in resi %s ', nring+1, rname) alpharing = getRing.getcommonring(ring) if alpharing is not None: ring = alpharing else: # try see if its common through the common dict for common in getRing.commonrings: if sorted(common) == sorted(ring): ring = common break listallcoors = [] #print macroatomlist for atoms in ring: listallcoors.extend(list((list(x[1]) for x in macroatomlist if x[0] in atoms))) # now flatten the list listallcoors = [y for x in listallcoors for y in x] logger.debug("macro listallcoors %s", listallcoors) pobj = puc.Pucker(tuple(listallcoors)) if pobj and pobj.isvalid: try: thisframeTD = pobj.calculate_triangular_tessellation() conformer = pobj.deduce_canonical_conformation() pconf = pobj.contextualise_conformer(conformer[0], ring) nextconformer = pobj.deduce_canonical_conformation(nextguess=True) logger.debug("Macrocycles %s %s %s %s", thisframeTD, conformer, pconf, nextconformer) node, log = createnodejson(pdbid, rname, resi.get_parent().get_id(), resi.get_id()[1], ring, conformer[0], pconf, thisframeTD, nextconformer[0], pobj.ringsize, pobj.ttnum[conformer[0]], True) logger.info(log) if txt: outputfile.write(log) if not nodejson == [] and node in nodejson: logger.debug('ENTRY EXISTS : %s %s', rname, resi.get_id()[1]) else: nodejson.append(node) except Exception as e: logger.error('In macrocyc, Pucker object valid, but calc, classify etc. failed %s %s', str(pobj._coords), e) pass else: logger.error("pobj is None or not valid in macrocyc %s",listallcoors) else: logger.debug('%s has no macro rings', rname) if output_format == "json": d2 = [key for key in nodejson] helperfunctions.write_to_json(d2, os.path.join(output_dir, outputfile),input_format="pdblist") elif output_format == "bson": d2 = [key for key in nodejson] helperfunctions.write_to_bson(d2, os.path.join(output_dir, outputfile)) elif output_format == "pandas": d2 = [key for key in nodejson] helperfunctions.write_to_pandas_dataframe(d2, os.path.join(output_dir, outputfile)) inputfile.close() if txt: outputfile.close() return
class SelectChains(PDB.Select): """ Only accept the specified chains when saving. """ def __init__(self, chain_letters): self.chain_letters = chain_letters def accept_chain(self, chain): return (chain.get_id() in self.chain_letters) if __name__ == "__main__": """ Parses PDB id's desired chains, and creates new PDB structures. """ import sys if not len(sys.argv) == 2: print ("Usage: $ python %s 'pdb.txt'" % __file__) sys.exit() pdb_textfn = sys.argv[1] pdbList = PDB.PDBList() splitter = ChainSplitter("/home/steve/chain_pdbs") # Change me. with open(pdb_textfn) as pdb_textfile: for line in pdb_textfile: pdb_id = line[:4].lower() chain = line[4] pdb_fn = pdbList.retrieve_pdb_file(pdb_id) splitter.make_pdb(pdb_fn, chain)
def download_pdb(self, info): pdb_id, chain_id = info ## Check if atom has alternative position, if so, keep 'A' position and remove the flag ## but somehow this class doesn't seem to function well class NotDisordered(Select): def accept_atom(self, atom): if not atom.is_disordered() or atom.get_altloc() == 'A': atom.set_altloc(' ') return True else: return False ## BioPython downloads PDB but it gives a lowercase name in pdb{}.ent format biopdb_name = '{0}/pdb{1}.ent'.format(self.work_dir, pdb_id.lower()) biopdb_modf = '{0}/pdb{1}.mod.ent'.format(self.work_dir, pdb_id.lower()) if not os.path.isfile(biopdb_modf): try: PDB.PDBList(verbose=False).retrieve_pdb_file( pdb_id, pdir=self.work_dir, obsolete=False, file_format='pdb') except FileNotFoundError: print( ' \033[31m> ERROR: BioPython cannot download PDB: \033[0m' + pdb_id) return None ## Replace modified AA to avoid mis-recognition in biopython readin ## Replace disordered atoms and keep only the "A" variant ReplacePDBModifiedAA(biopdb_name, biopdb_modf) os.system('grep "REMARK " {0} > {0}.remark'.format(biopdb_modf)) with open(biopdb_modf, 'r') as fi: remarks = [l for l in fi if re.search('REMARK HET ', l)] ## Read the PDB file and extract the chain from structure[0] try: model = PDB.PDBParser(PERMISSIVE=1, QUIET=1).get_structure(pdb_id, biopdb_modf)[0] except KeyError: print(' \033[31m> ERROR: BioPython cannot read in PDB: \033[0m' + biopdb_modf) return None except ValueError: print(' \033[31m> ERROR: PDB file is empty: \033[0m' + biopdb_modf) return None ### Bug alert: as of 20.02.18, Biopython dev hasn't come up with good ### strategy to fix the 'atom.disordered_get_list()' issue with alternative ### position of residue side chains. To go around this, will physically ### remove "B" variant and keep only "A" variant in io = PDB.PDBIO() io.set_structure(model[chain_id]) io.save('{0}/{1}_{2}.pdb'.format(self.work_dir, pdb_id, chain_id), select=NotDisordered()) # Attach REMARK to end of PDB as safekeeping os.system('cat {0}/{1}_{2}.pdb {3}.remark > {1}.temp'.format( self.work_dir, pdb_id, chain_id, biopdb_modf)) os.system('mv {1}.temp {0}/{1}_{2}.pdb'.format(self.work_dir, pdb_id, chain_id)) # os.system('mv {1} {0}/{2}.ent'.format(self.work_dir, biopdb_name, pdb_id)) # os.system('bzip2 -f {0}/{1}.ent'.format(self.work_dir, pdb_id)) # os.system('rm {0} {0}.remark'.format(biopdb_modf)) return '{0}/{1}_{2}.pdb'.format(self.work_dir, pdb_id, chain_id)
''' ========================== calculate the S-S ========================== ''' from Bio import PDB from Bio.PDB import Vector pdb1 = PDB.PDBList() pdb1.retrieve_pdb_file('1C9X') parser = PDB.PDBParser() structure = parser.get_structure('1C9X', 'c9/pdb1C9X.ent') Atom1 = [] Atom2 = [] resid = [] list = [] dict = {} for model in structure: for chain in model: #print(chain) for residue in chain: #print(i, residue.resname, residue.id[1]) if residue.resname == 'CYS': list.append(residue.id[1]) for atom in residue: if atom.name == 'CB': Atom1.append(atom) elif atom.name == 'SG': Atom2.append(atom) i = 0 for i in range(len(Atom1)):
''' Retrieve a PDB structure file from the web and parse it with Biopython. ----------------------------------------------------------- (c) 2013 Allegra Via and Kristian Rother Licensed under the conditions of the Python License This code appears in section 21.2.2 of the book "Managing Biological Data with Python". ----------------------------------------------------------- ''' from Bio import PDB pdbl = PDB.PDBList() pdbl.retrieve_pdb_file("2DN1") parser = PDB.PDBParser() structure = parser.get_structure("2DN1", "dn/pdb2dn1.ent") for model in structure: for chain in model: print chain for residue in chain: print residue.resname, residue.id[1] for atom in residue: print atom.name, atom.coord
def download_PDB_struct(query: str = '1AXC'): """Obtain PDB structural models""" pdbl = PDB.PDBList() pdbl.retrieve_pdb_file(query, pdir='.')