def test_get_recent_changes(self): """Tests the Bio.PDB.PDBList.get_recent_changes method.""" # obsolete_pdb declared to prevent from creating the "obsolete" directory pdblist = PDBList(obsolete_pdb="unimportant") url = pdblist.pdb_server + "/pub/pdb/data/status/latest/added.pdb" entries = pdblist.get_status_list(url) self.assertIsNotNone(entries)
def DoFetchObsoletes(filename): obs = [] fetch_ok = True try: pdblist = PDBList() sys.stdout.write( "INFO: Fetching obsolete structure information online...\n") obs = pdblist.get_all_obsolete() except: fetch_ok = False sys.stderr.write( "[FAILED]\nUnable to fetch obsolete structures information online.\n" ) if fetch_ok: sys.stdout.write("[OK].\n") try: with open(filename, 'w') as f: for ob in obs: f.write("%s\n" % ob) fetch_ok = True except IOError: sys.stderr.write( "ERROR: Could not write obsoletes into file: '%s'.\n" % filename) fetch_ok = False return fetch_ok, obs
def test_get_all_entries(self): """Tests the Bio.PDB.PDBList.get_all_entries method.""" # obsolete_pdb declared to prevent from creating the "obsolete" directory pdblist = PDBList(obsolete_pdb="unimportant") entries = pdblist.get_all_entries() # As number of entries constantly grow, test checks if a certain number was # exceeded self.assertGreater(len(entries), 100000)
def test_get_all_obsolete(self): """Tests the Bio.PDB.PDBList.get_all_obsolete method.""" pdblist = PDBList( obsolete_pdb="unimportant" ) # obsolete_pdb declared to prevent from creating the "obsolete" directory entries = pdblist.get_all_obsolete() # As number of obsolete entries constantly grow, test checks if a certain number was exceeded self.assertTrue(len(entries) > 3000)
def check(self, structure, filename, file_format, obsolete=False, pdir=None): with self.make_temp_directory(os.getcwd()) as tmp: pdblist = PDBList(pdb=tmp, obsolete_pdb=os.path.join(tmp, "obsolete")) path = os.path.join(tmp, filename) if pdir: pdir = os.path.join(tmp, pdir) pdblist.retrieve_pdb_file(structure, obsolete=obsolete, pdir=pdir, file_format=file_format) self.assertTrue(os.path.isfile(path)) os.remove(path)
def download_PDB_file(): fichiers = os.listdir("balibase/RV11.unaligned") for file in fichiers: records = saveFASTA("balibase/RV11.unaligned/" + file) ids = [] for record in records: ids.append(record.id.split("_")[0]) for i in ids: pdbl = PDBList() pdbl.retrieve_pdb_file(i, pdir="PDB")
poly[residue].id)][6:]) featuresList = [ resCode, phi, psi, depth, len(residues), len(curCharged), len(curPolar), len(curNonPolar) ] + [secondary] + energyList if None not in featuresList: #removes residues for which phi/psi cannot be calculated result[poly[residue].id[1]] = featuresList return result pdbList = PDBList() #returns dict mapping PDB ID to (input array, binding output list, catalytic output list) def readAnnotations(path): result = dict() string = readFile(path) for protein in string.splitlines(): #parsing ANNOTATIONS file table elems = protein.split(" ") elems.remove("") elems = [i for i in elems if i != ""] PDBId = elems[0] print("extracting features for PDB ID: ", PDBId)
def getpdbs(names): cwd = os.getcwd() pdbl = PDBList() pdbl.download_pdb_files(names, obsolete=False, file_format="pdb", pdir=cwd) pdbl.download_pdb_files(names, obsolete=True, file_format="pdb", pdir=cwd)
from Bio.PDB.PDBParser import PDBParser from Bio.PDB.PDBList import PDBList pdbl = PDBList() parser = PDBParser() for i in ["ID"]: pdbl.retrieve_pdb_file(pdb_code=i,file_format="pdb",pdir="./") structure_id = i filename = "pdb"+i.lower()+".ent" structure = parser.get_structure(structure_id, filename) print("id: ",structure_id) print("name: ", structure.header["name"]) print("deposition date :", structure.header["deposition_date"]) print("release date :", structure.header["release_date"]) print("structure method : ", structure.header["structure_method"]) print("resolution : ", structure.header["resolution"]) print("")
from Bio.PDB.PDBParser import PDBParser from Bio.PDB.PDBList import PDBList import urllib import xml.etree.ElementTree as ET parser = PDBParser() pdbl = PDBList() structure = parser.get_structure( '4igk', 'p3_website/p3_app/static/p3_app/pdb_files/1JM7.pdb') #structure = pdbl.retrieve_pdb_file('4igk') residues = structure.get_residues() compound = structure.header['compound'] title = structure.header['name'] journal = structure.header['journal_reference'] dict_keys = ['synonym', 'chain', 'fragment', 'molecule'] print compound if 'unp residues' in compound['1']: start_pos, end_pos = compound['1']['fragment'].split( 'unp residues ')[1].split('-') start_pos = int(start_pos) end_pos = int(end_pos) print journal.split() pdb_dict = {} for comp in range(0, len(compound)): key = str(comp + 1) for dkey in dict_keys: compound[key][dkey] #if residue is within start and end positions
def index(request): errors = [] which_error = None if request.POST: try: rsid = request.POST['rs_id'] except MultiValueDictKeyError: rsid = False try: chr_num = request.POST['chr_num'] chr_loc = request.POST['chr_loc'] except MultiValueDictKeyError: chr_num = False chr_loc = False snpsub = request.POST.get('snpsubstitution', '') try: protein_id = request.POST['protein_id'] aa_change = request.POST['aa_change'] except MultiValueDictKeyError: protein_id = False aa_change = False try: gene_id = request.POST['gene_id'] except MultiValueDictKeyError: gene_id = False if rsid != False: if rsid.startswith('rs') == False: errors.append('Please enter a valid rs number') elif not Brca1New.objects.filter(rsid=rsid).exists(): errors.append('This ID is not currently in our database.') if not errors: brca1_objects = Brca1New.objects.filter(rsid=rsid) brca1_object = '' for obj in brca1_objects: sub = obj.hgvs_cdna.split('>')[1] if sub == snpsub: brca1_object = obj break if brca1_object == '': errors.append( 'You have not entered a missense mutation for this position.' ) elif chr_loc != False: try: int(chr_loc) except ValueError: errors.append( 'You have not entered a valid integer for the chromosome location.' ) try: int(chr_num) except ValueError: errors.append( 'You have not entered a valid integer for the chromosome number.' ) if not Brca1New.objects.filter(hg19_pos=chr_loc).exists(): errors.append( 'This position does not currently exist in our database.') var_loc_objects = Brca1New.objects.filter(hg19_pos=chr_loc) brca1_object = '' for obj in var_loc_objects: if obj.hg19_chr != chr_num: var_loc_objects.remove(obj) else: sub = obj.hgvs_cdna.split('>')[1] if sub == snpsub: brca1_object = obj break if brca1_object == '': errors.append( 'You have not entered a missense mutation for this position.' ) elif protein_id != False: if not Brca1New.objects.filter(gene=protein_id).exists(): errors.append( 'You have entered an ID that does not currently exist in our database.' ) else: brca1_object = None for obj in Brca1New.objects.filter(gene=protein_id): if aa_change == obj.hgvs_prot.split( '.')[1] or aa_change == obj.hgvs_prot_code1.split( '.')[1]: brca1_object = obj break if brca1_object == None: errors.append( 'That amino acid change was not found in the database.' ) elif gene_id != False: if not Brca1New.objects.filter(gene=gene_id).exists(): errors.append( 'You have entered an ID that does not currently exist in our database.' ) if errors: if rsid != False: which_error = 'rsid' elif chr_loc != False: which_error = 'chr' elif protein_id != False: which_error = 'protein_id' elif gene_id != False: which_error = 'gene_id' return render(request, 'p3_app/index.html', { 'errors': errors, 'which_error': which_error }) context = {} context['rsid'] = rsid context['chr_loc'] = chr_loc context['chr_num'] = chr_num context['protein_id'] = protein_id context['gene_id'] = gene_id context['aa_change'] = aa_change #get right gene dict based on gene_id -- dictionary? context['gene_dict'] = gene_dict pdb_list_dict = { 'BRCA1': ['1JM7', '4IGK'], } pdb_stop_start_dict = { '1JM7': [1, 103], '4IGK': [1646, 1859], } context['pdb_stop_start_dict'] = pdb_stop_start_dict if gene_id is False: resi_num = brca1_object.hgvs_prot_code1.split('.')[1][:-1][1:] context['resi_num'] = resi_num resi_string = 'resi:' + str(resi_num) + ';chain:A' context['resi_string'] = resi_string pdb_entry_list = pdb_list_dict[brca1_object.gene] pdb_entry = None for pdbe in pdb_entry_list: pdb_stop_start_list = pdb_stop_start_dict[pdbe] if int(resi_num) >= pdb_stop_start_list[0] and int( resi_num) <= pdb_stop_start_list[1]: pdb_entry = pdbe else: pdb_entry = pdb_list_dict[gene_id] resi_num = 0 #create dictionary for this list context['pdb_entry'] = pdb_entry #list of pdb information for each gene for protein sequence diagram pdb_translate_list = [ 'ring', 'brca1', 'bard1', 'brct', 'unp', 'atrip', 'atm', 'rad3' ] do_not_translate_list = ['of', 'being', 'and', 'in', 'with', 'the'] def correct_pdb_capitalization(title): title_split = title.split() title_list = [] for t in title_split: if '/' in t: t = t.split('/') for ts in t: for x in range(0, len(pdb_translate_list)): if pdb_translate_list[x] == ts: index = t.index(ts) ts = ts.upper() t[index] = ts if ts not in do_not_translate_list and ts not in pdb_translate_list: ts = ts.title() t = "/".join(t) title_list.append(t) elif '-' in t: t = t.split('-') for ts in t: for x in range(0, len(pdb_translate_list)): if pdb_translate_list[x] == ts: index = t.index(ts) ts = ts.upper() t[index] = ts if ts not in do_not_translate_list and ts not in pdb_translate_list: ts = ts.title() t = '-'.join(t) title_list.append(t) else: # for x in range(0,len(pdb_translate_list)): # if pdb_translate_list[x] == t: # t = t.upper() if (t not in do_not_translate_list) and ( t not in pdb_translate_list): t = t.title() if t in pdb_translate_list: t = t.upper() title_list.append(t) title = ' '.join(title_list) return title if pdb_entry is not None: parser = PDBParser() pdbl = PDBList() module_dir = os.path.dirname(__file__) # get current directory if type(pdb_entry) == str: file_path = os.path.join( module_dir, 'static/p3_app/pdb_files/' + pdb_entry + '.pdb') pdb_file = open(file_path) structure = parser.get_structure(pdb_entry, pdb_file) compound = structure.header['compound'] title = structure.header['name'] title = correct_pdb_capitalization(title) for keys, values in compound.items(): for k, v in values.items(): if k != 'other_details': values[k] = correct_pdb_capitalization(v) journal = structure.header['journal_reference'] pdb_list = [compound, title, journal] elif type(pdb_entry) == list: pdb_dict = {} for pdbe in pdb_entry: file_path = os.path.join( module_dir, 'static/p3_app/pdb_files/' + pdbe + '.pdb') pdb_file = open(file_path) structure = parser.get_structure(pdb_entry, pdb_file) compound = structure.header['compound'] title = structure.header['name'] title = correct_pdb_capitalization(title) journal = structure.header['journal_reference'] pdb_list = [compound, title, journal] pdb_dict[pdbe] = pdb_list pdb_list = [] context['pdb_dict'] = pdb_dict else: pdb_list = [] context['pdb_list'] = pdb_list #if gene_id is not False: #get gene list #create a list of synonyms and fragments so they're properly capitalized for site pfam_id = 'P38398' #create dictionary for uniprot/gene names when all gene names are present module_dir = os.path.dirname(__file__) # get current directory file_path = os.path.join( module_dir, 'static/p3_app/pfam_files/' + pfam_id + '.xml') xml_file = open(file_path) tree = ET.parse(xml_file) root = tree.getroot() count = 0 match_dict = {} resi_in_domain = None for child in root.iter('match'): type_attr = child.attrib['type'] acc_attr = child.attrib['accession'] id_attr = child.attrib['id'] for grandchild in child.iter('location'): start = grandchild.attrib['start'] end = grandchild.attrib['end'] start = int(start) end = int(end) if resi_num > end and resi_num < start: resi_in_domain = id_attr match_dict[count] = [type_attr, acc_attr, id_attr, start, end] count += 1 for seq in root.iter('sequence'): sequence = seq.text context['sequence'] = sequence context['match_dict'] = match_dict context['resi_in_domain'] = resi_in_domain #additional pfam info for mouseover feature file_path = os.path.join( module_dir, 'static/p3_app/uniprot_files/' + pfam_id + '.xml') record = SeqIO.read(open(file_path), 'uniprot-xml') uniprot_comment_dict = {} uniprot_comment_dict['domain'] = record.annotations['comment_domain'] uniprot_comment_dict['tissue_specificity'] = record.annotations[ 'comment_tissuespecificity'] uniprot_comment_dict['enzyme_regulation'] = record.annotations[ 'comment_enzymeregulation'] uniprot_comment_dict['disease'] = record.annotations['comment_disease'] oi_dict = {} for oi in record.annotations['comment_onlineinformation']: oi = oi.split('@') oi_dict[oi[0]] = oi[1] uniprot_comment_dict['subunit'] = record.annotations['comment_subunit'] uniprot_comment_dict['function'] = record.annotations[ 'comment_function'] uniprot_comment_dict['polymorphism'] = record.annotations[ 'comment_polymorphism'] uniprot_comment_dict['ptm'] = record.annotations['comment_PTM'] context['uniprot_comment_dict'] = uniprot_comment_dict context['oi_dict'] = oi_dict uniprot_references = record.annotations['references'] context['uniprot_references'] = uniprot_references secondary_structure_features = [] variant_features = [] other_uniprot_features = [] for feature in record.features: if feature.qualifiers['type'] == 'strand' or feature.qualifiers[ 'type'] == 'helix' or feature.qualifiers['type'] == 'turn': if feature.qualifiers['type'] == 'strand': feature.type = 'beta_strand' secondary_structure_features.append(feature) elif feature.type == 'cross-link' or feature.type == 'modified residue' or feature.type == 'mutagenesis site' or feature.type == 'sequence variant' or feature.type == 'sequence conflict': variant_features.append(feature) else: other_uniprot_features.append(feature) context['uniprot_features'] = record.features context['secondary_structure_features'] = secondary_structure_features context['variant_features'] = variant_features context['other_uniprot_features'] = other_uniprot_features variation_dict = {} for rsid in Brca1New.objects.values('rsid').distinct(): rsid = rsid['rsid'] if rsid.startswith('rs'): variation_dict[rsid] = True else: continue ''' if not rsid.startswith('rs'): continue else: obj = Brca1New.objects.filter(rsid=rsid)[0] pos = obj.codon variation_dict[rsid] = pos ''' context['variation_dict'] = variation_dict #add info for pubmed list - turn into dictionary of citation values #add checkpoint to make sure residues match of pdb file and residue of variation in database #Brca1New.objects.values('swissprot_type').distinct() count = 0 swissprot_dict = {} for sp_ran in Brca1New.objects.values_list( 'swissprot_range').distinct(): sp_ran = ''.join(sp_ran) obj = Brca1New.objects.filter(swissprot_range=sp_ran)[0] sp_type = obj.swissprot_type.encode('ascii') sp_desc = obj.swissprot_desc.encode('ascii') if sp_desc == '': continue sp_ran = sp_ran.split('[')[1].split(']')[0] if '-' in sp_ran: start, end = sp_ran.split('-') start = int(start) end = int(end) else: start = int(sp_ran) end = int(sp_ran) swissprot_dict[count] = [sp_type, start, end, sp_desc] count += 1 context['swissprot_dict'] = swissprot_dict count = 0 alamut_pd1_dict = {} ala_list = [] ala_indices = [] cdna_pos_list = [] end = 0 for ala_dom in Brca1New.objects.values_list('alamut_proteindomain1'): ala_dom = ''.join(ala_dom) ala_list.append(ala_dom) for cdna_pos in Brca1New.objects.values_list('codon'): cdna_pos = ''.join(cdna_pos) cdna_pos_list.append(cdna_pos) for ala_ind in range(1, len(ala_list)): if ala_list[ala_ind - 1] == ala_list[ala_ind]: continue else: # if ala_list[ala_ind-1] == '': # continue if end == 0: for ai in range(1, len(ala_list)): if ala_list[ai] == ala_list[ala_ind - 1]: start = int(cdna_pos_list[ai]) break else: start = end + 1 end = int(cdna_pos_list[ala_ind]) domain = ala_list[ala_ind - 1] if domain != '': alamut_pd1_dict[count] = [domain, start, end] count += 1 context['alamut_pd1_dict'] = alamut_pd1_dict ala_list = [] alamut_pd2_dict = {} end = 0 count = 0 for ala_dom in Brca1New.objects.values_list('alamut_proteindomain2'): ala_dom = ''.join(ala_dom) ala_list.append(ala_dom) for ala_ind in range(1, len(ala_list)): if ala_list[ala_ind - 1] == ala_list[ala_ind]: continue else: # if ala_list[ala_ind-1] == '': # continue if end == 0: for ai in range(1, len(ala_list)): if ala_list[ai] == ala_list[ala_ind - 1]: start = int(cdna_pos_list[ai]) break else: start = end + 1 end = int(cdna_pos_list[ala_ind]) domain = ala_list[ala_ind - 1] if domain != '': alamut_pd2_dict[count] = [domain, start, end] count += 1 context['alamut_pd2_dict'] = alamut_pd2_dict ala_list = [] alamut_pd3_dict = {} end = 0 count = 0 for ala_dom in Brca1New.objects.values_list('alamut_proteindomain3'): ala_dom = ''.join(ala_dom) ala_list.append(ala_dom) for ala_ind in range(1, len(ala_list)): if ala_list[ala_ind - 1] == ala_list[ala_ind]: continue else: # if ala_list[ala_ind-1] == '': # continue if end == 0: for ai in range(1, len(ala_list)): if ala_list[ai] == ala_list[ala_ind - 1]: start = int(cdna_pos_list[ai]) break else: start = end + 1 end = int(cdna_pos_list[ala_ind]) domain = ala_list[ala_ind - 1] if domain != '': alamut_pd3_dict[count] = [domain, start, end] count += 1 context['alamut_pd3_dict'] = alamut_pd3_dict #make condition if last entry is part of a domain ala_list = [] alamut_pd4_dict = {} end = 0 count = 0 for ala_dom in Brca1New.objects.values_list('alamut_proteindomain4'): ala_dom = ''.join(ala_dom) ala_list.append(ala_dom) for ala_ind in range(1, len(ala_list)): if ala_list[ala_ind - 1] == ala_list[ala_ind]: continue else: # if ala_list[ala_ind-1] == '': # continue if end == 0: for ai in range(1, len(ala_list)): if ala_list[ai] == ala_list[ala_ind - 1]: start = int(cdna_pos_list[ai]) break else: start = end + 1 end = int(cdna_pos_list[ala_ind]) domain = ala_list[ala_ind - 1] if domain != '': alamut_pd4_dict[count] = [domain, start, end] count += 1 context['alamut_pd4_dict'] = alamut_pd4_dict if gene_id is not False: return render(request, 'p3_app/gene_results.html', context) if brca1_object.hgmd_pubmed == 'Pubmed': hgmd_pubmed_list = brca1_object.hgmd_pubmed_list.split(' ') else: hgmd_pubmed_list = None context['hgmd_pubmed_list'] = hgmd_pubmed_list pubmed_dict = {} if hgmd_pubmed_list is not None: for pmid in hgmd_pubmed_list: if pmid in pmid_dict.keys(): pubmed_dict[pmid] = pmid_dict[pmid] context['pubmed_dict'] = pubmed_dict if brca1_object.alamut_siftprediction == '': brca1_object.alamut_siftprediction = None if '_' in brca1_object.muttaster_prediction: brca1_object.muttaster_prediction = brca1_object.muttaster_prediction.replace( '_', ' ') if brca1_object.muttaster_features != '': brca1_object.muttaster_features = brca1_object.muttaster_features.split( ',') ss_img_loc = int(brca1_object.suspect_score) * 4 context['ss_img_loc'] = ss_img_loc agvgd_dict = { 'C0': 40, 'C15': 92, 'C25': 148, 'C35': 205, 'C45': 261, 'C55': 317, 'C65': 375 } context['agvgd_dict'] = agvgd_dict muttaster_model_dict = { 'complex_aa': 'mutation introducing a premature stop codon', 'simple_aae': 'substitution/insertion/deletion of a single amino acid' } context['muttaster_model_dict'] = muttaster_model_dict context['brca1_object'] = brca1_object ''' context = {'rsid':rsid, 'brca1_object':brca1_object,'resi_string':resi_string, 'resi_num':resi_num, 'pdb_entry':pdb_entry, 'chr_num':chr_num,'chr_loc':chr_loc, 'pdb_list':pdb_list, 'match_dict':match_dict, 'sequence':sequence, 'hgmd_pubmed_list':hgmd_pubmed_list, 'swissprot_dict':swissprot_dict, 'alamut_pd1_dict':alamut_pd1_dict,'alamut_pd2_dict':alamut_pd2_dict,'alamut_pd3_dict':alamut_pd3_dict, 'alamut_pd4_dict':alamut_pd4_dict,'agvgd_dict':agvgd_dict, 'ss_img_loc':ss_img_loc,'muttaster_model_dict':muttaster_model_dict, 'pubmed_dict':pubmed_dict, 'gene_dict':gene_dict, 'protein_id':protein_id,'aa_change':aa_change} ''' return render(request, 'p3_app/results_page2.html', context) return render(request, 'p3_app/index.html', {})
""" prots,enzsites = findRestr("orf_coding_all.fasta") print("Non restrictive proteins : ",prots) for enz in enzsites: print("ID ",enz[0]) print("EcoRI ",enz[1][0]) print("XhoI ",enz[1][1]) print("TaqI ",enz[1][2])""" #######################################################################################################"" from Bio.PDB.MMCIFParser import MMCIFParser from Bio.PDB.PDBList import PDBList from Bio.PDB.MMCIF2Dict import MMCIF2Dict pdbl = PDBList() pdbl.retrieve_pdb_file("2GAA") def readPDBFile(filename): mmcif_dict = MMCIF2Dict(filename) nbchains, nbres, nbatoms, res = mmcif_dict[ '_struct_sheet.number_strands'], mmcif_dict[ '_struct_site.pdbx_num_residues'], mmcif_dict[ '_refine_hist.number_atoms_total'], mmcif_dict['_exptl.method'] return sum([int(nbchains[i]) for i in range(len(nbchains))]), nbres, nbatoms, res print(readPDBFile("ga/2gaa.cif"))
def DoRetrievePDBFile(aPDB_Code, aFolder): global USE_ALT_PDB_SERVER done = False errors_before_quit = 20 seconds_between_retries = 30 fetchedfile = "" alt_server = "http://www.rcsb.org/pdb/files/" while done == False: pdblist = None if USE_ALT_PDB_SERVER: pdblist = PDBList(server=alt_server) else: pdblist = PDBList() #pdblist = PDBList( server='ftp://ftp.wwpdb.org') #server = 'ftp://ftp.rcsb.org' #server = "ftp.ebi.ac.uk/pub/databases/pdb/" try: #http://biopython.org/DIST/docs/api/Bio.PDB.PDBList%27-pysrc.html #fetchedfile = pdblist.retrieve_pdb_file( pdb_code=aPDB_Code, pdir=aFolder, file_format="pdb", obsolete=False) fetchedfile = pdblist.retrieve_pdb_file(pdb_code=aPDB_Code, pdir=aFolder, file_format="pdb", obsolete=False) done = True if fetchedfile and len(fetchedfile) and ( fetchedfile.find(".ent") > 0 or fetchedfile.find(".pdb") > 0): #print "Structure fetched, PDB code: " + aPDB_Code print "INFO: Structure " + aPDB_Code + " fetched. [OK]" #io = PDBIO() #io.set_structure( s) #io.save( filename) else: print "WARNING: Fetch failed [FAIL]" except IOError as ex: sys.stderr.write( "WARNING: Could not download structure {0}. An exception of type {1} occured.\n Arguments: {2!r}\n" .format(aPDB_Code, type(ex).__name__, ex.args)) sys.stderr.write("INFO: Retrying connection in %i seconds...\n" % seconds_between_retries) for a in ex.args: #Downloading too many structures too fast? if str(a).lower().find("too many") >= 0: seconds_between_retries += 10 break if str(a).lower().find("No such file") >= 0: #No need to retry return fetchedfile if str(a).lower().find("did not properly respond") >= 0: #No need to retry sys.stderr.write( "INFO: Switching download thread to alternative server '%s'.\n" % alt_server) USE_ALT_PDB_SERVER = True time.sleep(seconds_between_retries) done = False errors_before_quit -= 1 if errors_before_quit <= 0: sys.stderr.write("ERROR: Failed too many times. Quitting...\n") break return fetchedfile
def generate_structural_statistics(jobId, dom, pdb_code, selchain, uploaded_str, modeled_str=False, savequeue="jobinfo"): try: tdata = TripleMapping.objects.get(pk=jobId) except (KeyError, TripleMapping.DoesNotExist): return "str stats gen error!" threeList = [ "ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS", "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL", "TRP", "TYR" ] if uploaded_str == False: pdbl = PDBList() pdbl.retrieve_pdb_file(pdb_code, pdir='./PDB', file_format="pdb") pdb_filename = "./PDB/pdb" + pdb_code.lower() + ".ent" else: if modeled_str == False: pdb_filename = "./PDB/" + jobId + "___" + pdb_code else: pdb_filename = "./PDB/model/" + pdb_code pdbsequencefull = [] pdbsequencenum = [] structure = Bio.PDB.PDBParser().get_structure(pdb_code, pdb_filename) model = structure[0] dssp = DSSP(model, pdb_filename, dssp='mkdssp', acc_array="Wilke") for chain in model: if chain.id == selchain: for residue in chain: if Bio.PDB.Polypeptide.is_aa(residue) == True: number = residue.get_id() try: num = str(number[1]) + str((number[2].rstrip())[0]) except IndexError: num = str(number[1]) pdbsequencenum.append(residue.get_resname() + num) #new_id = (" ", residue.get_id()[1], residue.get_id()[2]) pdbsequencefull.append(residue.get_id()) pdbsequencenum = pdbsequencenum[1:-1] pdbsequencefull = pdbsequencefull[1:-1] dssp_info = [] for i in range(0, len(pdbsequencenum)): chain_res = pdbsequencenum[i] residue_key = pdbsequencefull[i] if (chain_res[0:3] in threeList): dssp_res = dssp[selchain, residue_key] dssp_info.append({ "name": chain_res, "sec": str(dssp_res[2]), "phi": str(dssp_res[4]), "psi": str(dssp_res[5]), "depth": str(dssp_res[3]) }) pdb_coded = pdb_code if (modeled_str == True): pdb_coded = pdb_code.split("_")[3] + "_" + pdb_code.split("_")[4] full_dssp_info = {"_".join([dom, pdb_coded, selchain]): dssp_info} prev_dsspinfo = getattr(tdata, "dsspinfo") if prev_dsspinfo: prev_dsspinfo = prev_dsspinfo.split("]}]")[0] + "]}," else: prev_dsspinfo = "[" setattr(tdata, "dsspinfo", prev_dsspinfo + str(full_dssp_info) + "]") tdata.save() # run ring software and obtain results process = Popen([ "./bin/Ring", "-i", pdb_filename, "-c", selchain, "-N", "./jobs/nodes/" + jobId + "_" + dom + "_" + pdb_code + "_" + selchain + ".nds", "-E", "./jobs/edges/" + jobId + "_" + dom + "_" + pdb_code + "_" + selchain + ".eds", "-g", "1" ], stdout=PIPE) (output, err) = process.communicate() exit_code = process.wait() # read ring software results and generate json objects f1 = open( "./jobs/edges/" + jobId + "_" + dom + "_" + pdb_code + "_" + selchain + ".eds", "r+") lines = f1.readlines() G = nx.MultiGraph() G2 = nx.Graph() pairs = [] singlegraph = {} for l in range(1, len(lines)): line = lines[l] res1 = line.split()[0].split(":")[-1] + line.split()[0].split(":")[1] res2 = line.split()[2].split(":")[-1] + line.split()[2].split(":")[1] order_pair = sorted([res1, res2]) interaction = line.split()[1] energy = float(line.split()[5]) if "NLA" not in res1 and "NLA" not in res1: G.add_edge(res1, res2, weight=energy, itype=interaction) if order_pair not in pairs: singlegraph["".join(order_pair)] = (res1, res2, energy) pairs.append(order_pair) else: new_energy = singlegraph["".join(order_pair)][2] + energy singlegraph["".join(order_pair)] = (res1, res2, new_energy) G2.add_weighted_edges_from(singlegraph.values()) g_distance_dict1 = {(e1, e2, w): 1 / w for e1, e2, w in G.edges(data='weight')} nx.set_edge_attributes(G, g_distance_dict1, 'distance') g_distance_dict = {(e1, e2): 1 / weight for e1, e2, weight in G2.edges(data='weight')} nx.set_edge_attributes(G2, g_distance_dict, 'distance') graph_stats = [] weighted_degree = G.degree(weight='weight') between = nx.betweenness_centrality(G2, weight='weight') closeness = nx.closeness_centrality(G, distance='distance') mutstats = None for k in between: graph_stats.append({ "res": k, "betweeness": between[k], "closeness": closeness[k], "wdegree": weighted_degree[k] }) if (modeled_str == True and k == pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]): mutstats = { "res": k, "betweeness": between[k], "closeness": closeness[k], "wdegree": weighted_degree[k] } graph_stats_full = {"_".join([dom, pdb_coded, selchain]): graph_stats} if (modeled_str == False): prev_gstats = getattr(tdata, "graph_stats") if prev_gstats: prev_gstats = prev_gstats.replace("];", ", ") #.split("}}]")[0] + "}}," else: prev_gstats = "[" setattr(tdata, "graph_stats", prev_gstats + str(graph_stats_full) + "];") tdata.save() else: prev_muts = getattr(tdata, "mut_stats") if prev_muts: prev_muts = prev_muts.replace("]", " , ") else: prev_muts = "[" setattr(tdata, "mut_stats", prev_muts + str(mutstats) + "]") tdata.save() #pass #print(graph_stats) #quit() datag = json_graph.node_link_data(G) s = json.dumps(datag) datag_full = {"_".join([dom, pdb_coded, selchain]): s} if (modeled_str == False): prev_datag = getattr(tdata, "graph_json") if prev_datag: prev_datag = prev_datag.replace("];", ", ") #.split("]}]")[0] + "]}," else: prev_datag = "[" setattr(tdata, "graph_json", prev_datag + str(datag_full) + "];") tdata.save() else: mNode = { "id": pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0] } #nodesAt5 = [x for x,y in G.nodes(data=True) if y['id']== pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0]] #mNode = pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0] #newedges = [(u,v,d) for u,v,d in G.edges(data = True) if ((u['id'] == mNode) or (v['id'] == mNode))] nodesAt5 = [x for x in G.nodes() if x == mNode] #H = nx.MultiGraph() #H.add_edges_from(newedges) H = G.subgraph(nodesAt5) datam = json_graph.node_link_data(H) sm = json.dumps(datam) datam_full = { "_".join([ dom, pdb_coded, pdb_code.split("_")[5] + pdb_code.split("_")[6].split(".")[0] ]): s } prevdatam = getattr(tdata, "mut_json") if prevdatam: prevdatam = prevdatam.replace("];end;", ", ") else: prevdatam = "[" setattr(tdata, "mut_json", prevdatam + str(datam_full) + "];end;") tdata.save() pass f1.close() #save objects into attributes jobs = getattr(tdata, savequeue) print("jobs") print(jobs) job_this = dom + "_" + pdb_code + "_" + selchain if (modeled_str == True): job_this = dom + "_" + pdb_code.split("_")[3] + "_" + pdb_code.split( "_")[4] + "_" + pdb_code.split("_")[6].split( ".")[0] + "_" + pdb_code.split("_")[5] print("job_this") print(job_this) new_jobs = [] for job in jobs.split(","): if job_this in job: new_job = "_".join(job.split("_")[:-1]) + "_done" new_jobs.append(new_job) else: new_jobs.append(job) print("new_jobs") print(new_jobs) setattr(tdata, savequeue, ",".join(new_jobs)) tdata.save() return "str stats gen!"
def test_get_recent_changes(self): """Tests the Bio.PDB.PDBList.get_recent_changes method.""" pdblist = PDBList(obsolete_pdb="unimportant") # obsolete_pdb declared to prevent from creating the "obsolete" directory url = pdblist.pdb_server + '/pub/pdb/data/status/latest/added.pdb' entries = pdblist.get_status_list(url) self.assertIsNotNone(entries)
from Bio.PDB.PDBList import PDBList pdblist = PDBList() pdblist.retrieve_pdb_file( "127d") # downloads structure 127D in PDBx/mmCif format pdblist.retrieve_pdb_file( "127d", file_format="pdb") # downloads structure 127D in PDB format pdblist.retrieve_pdb_file( "127d", file_format="xml") # downloads structure 127D in PDBML/XML format pdblist.retrieve_pdb_file( "127d", file_format="mmtf") # downloads structure 127D in mmtf format pdblist.retrieve_pdb_file( "3k1q", file_format="bundle") # downloads large structure 3K1Q in pdb-like bundle pdblist.retrieve_pdb_file( "347d", obsolete=True) # downloads obsolete structure 347D in PDBx/mmCif format pdblist.download_pdb_files( "1esy", "127D") # downloads structures 127D and 1ESY in PDBx/mmCif format pdblist.download_entire_pdb( ) # downloads entire PDB database in PDBx/mmCif format pdblist.update_pdb() # performs weekle update of the database
#!/usr/bin/env python from Bio.PDB.PDBParser import PDBParser from Bio.PDB.PDBList import PDBList from Bio.PDB import vectors import numpy as np import os # Reading a PDB file #get PDB from rcsb.org basePath = '/home/kenneth/proj/proMin/code/biopython' filename = 'pdb1fdn.ent' pdb = PDBList().retrieve_pdb_file("1FDN", file_format='pdb') #Create a PDBParser object parser = PDBParser() #PERMISSIVE = 0 will list all errors with PDB file structure = parser.get_structure("1FDN", os.path.join(basePath, filename)) # print(type(structure)) #what type of object did the parser return # <class 'Bio.PDB.Structure.Structure'> # print(dir(structure)) #check what attributes exist # ['__class__', '__contains__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', '_generate_full_id', '_id', '_reset_full_id', 'add', 'child_dict', 'child_list', 'copy', 'detach_child', 'detach_parent', 'full_id', 'get_atoms', 'get_chains', 'get_full_id', 'get_id', 'get_iterator', 'get_level', 'get_list', 'get_models', 'get_parent', 'get_residues', 'has_id', 'header', 'id', 'insert', 'level', 'parent', 'set_parent', 'transform', 'xtra'] # # PDB Structure object, layers 1)model which contains 2)chains which contains 3)residues which contains 4)atoms model = structure[0] chain = model["A"] # print(list(chain.get_residues())) #dealing with hetero atom - http://biopython.org/DIST/docs/tutorial/Tutorial.html#sec201 residue = chain[(
from autopack.Ingredient import MultiSphereIngr from upy import hostHelper autopack.helper = hostHelper.Helper() autopack.helper.host = "none" autopack.forceFetch = False from scipy.cluster.vq import kmeans, vq from Bio.PDB.PDBParser import PDBParser from Bio.PDB.PDBList import PDBList from Bio.SeqUtils.ProtParam import ProteinAnalysis from Bio.PDB.Polypeptide import three_to_one from Bio.PDB.Polypeptide import is_aa fetch = PDBList(pdb=data_folder) p = PDBParser(PERMISSIVE=1) def getMWFromSequence(sequence): X = ProteinAnalysis(sequence) mw = X.molecular_weight() return mw def getSequenceStructure(s): seq = "" for r in s.get_residues(): if is_aa(r.get_resname(), standard=True): seq += three_to_one(r.get_resname()) else:
from Bio.PDB.PDBList import PDBList pdbl = PDBList() pdbl.retrieve_pdb_file("6WO1", file_format="mmtf", pdir="/home/koreanraichu/") # 확장자를 따로 입력하지 않으면 CIF파일로 다운르드 된다. # file_format="확장자"로 입력하면 특정 파일 형식으로 받을 수 있다. # pdir="경로"를 입력하면 다운로드 경로도 정할 수 있다.
from Bio.PDB.PDBParser import PDBParser from Bio.PDB.PDBList import PDBList pdbl = PDBList() for vrstica in open('./structures lists/new structures.txt'): structure_id = vrstica.strip('\n') pdbl.retrieve_pdb_file(structure_id, file_format='pdb', pdir='pdb structures')
def test_get_all_obsolete(self): """Tests the Bio.PDB.PDBList.get_all_obsolete method.""" pdblist = PDBList(obsolete_pdb="unimportant") # obsolete_pdb declared to prevent from creating the "obsolete" directory entries = pdblist.get_all_obsolete() # As number of obsolete entries constantly grow, test checks if a certain number was exceeded self.assertTrue(len(entries) > 3000)