def analyze_construct(self, filenames=None): self.logger.info("ANALYZING CONSTRUCT STRUCTURES") # read source files if not filenames: filenames = os.listdir(self.construct_data_dir) for filename in filenames: if filename[-3:] != 'pdb' and filename[-3:] != 'ent': continue root, ext = os.path.splitext(os.path.basename(filename)) print(filename) print(root) filepath = os.sep.join([self.construct_data_dir, filename]) self.logger.info("Working on a file: {}".format(filename)) header = parse_pdb_header(filepath) parser = SequenceParser(filepath) json_data = OrderedDict() json_data["header"] = header json_data.update(parser.get_fusions()) json_data.update(parser.get_mutations()) json_data.update(parser.get_deletions()) json.dump(json_data, open( os.sep.join( [settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': '))
def analyze_construct(self, filenames=None): self.logger.info("ANALYZING CONSTRUCT STRUCTURES") # read source files if not filenames: filenames = os.listdir(self.construct_data_dir) for filename in filenames: if filename[-3:]!='pdb' and filename[-3:]!='ent': continue root, ext = os.path.splitext(os.path.basename(filename)) print(filename) print(root) filepath = os.sep.join([self.construct_data_dir, filename]) self.logger.info("Working on a file: {}".format(filename)) header = parse_pdb_header(filepath) parser = SequenceParser(filepath) json_data = OrderedDict() json_data["header"] = header json_data.update(parser.get_fusions()) json_data.update(parser.get_mutations()) json_data.update(parser.get_deletions()) json.dump(json_data, open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': '))
def handle(self, *args, **options): q = QueryPDB() q.list_xtals(verbose=False) for record in q.new_structures: pdb_code = record[0] wt_id = Protein.objects.get(entry_name=record[1]).id if not os.path.exists(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])): self.download_pdb(pdb_code) self.parser = SequenceParser(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)]), wt_protein_id=wt_id) header = parse_pdb_header(os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])) self.create_yaml(pdb_code, record[1], header)
def handle(self, *args, **options): print("Working on file {}".format(options['pdb_file'])) header = parse_pdb_header(options['pdb_file']) print(header['compound']) sp = SequenceParser(options['pdb_file']) c = list(sp.mapping.keys())[0] poly = sp.get_chain_peptides(c) for peptide in poly: print("Start: {} Stop: {} Len: {}".format(peptide[0].id[1], peptide[-1].id[1], len(peptide))) sp.map_to_wt_blast(c, peptide, None, int(peptide[0].id[1])) sp.map_seqres() sp.save_excel_report("test.xlsx") #sp.get_report()
def post(self, request): # root, ext = os.path.splitext(request._request.FILES['pdb_file'].name) pdb_file = StringIO( request._request.FILES['pdb_file'].file.read().decode( 'UTF-8', "ignore")) header = parse_pdb_header(pdb_file) parser = SequenceParser(pdb_file) json_data = OrderedDict() json_data["header"] = header json_data.update(parser.get_fusions()) json_data.update(parser.get_mutations()) json_data.update(parser.get_deletions()) return Response(json_data)
def handle(self, *args, **options): root, ext = os.path.splitext(os.path.basename(options['pdb_file'])) print("Working on file {}".format(options['pdb_file'])) header = parse_pdb_header(options['pdb_file']) sp = SequenceParser(options['pdb_file']) print(sp.get_fusions()) print(sp.get_mutations()) print(sp.get_deletions()) json_data = {} json_data["header"] = header json_data.update(sp.get_fusions()) json_data.update(sp.get_mutations()) json_data.update(sp.get_deletions()) json.dump(json_data, open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': ')) #json.dump(json_data, open("test.json", 'w'), indent=4, separators=(',', ': '))
def Header_Data(AAAB14, pdbloc, ion_name): """ Returns crucial Header Data for a specific pdbid. Somewhat non specific to Ion Environments project. """ Data = [] Head = AAAB14[0] Tail = AAAB14[1:] Head.insert(1, 'StructMethod') Head.insert(1, 'Resolution') Head.insert(len(Head), 'ECNum') Head.insert(len(Head), 'Type') Head.insert(len(Head), 'Ion') Data.append(Head) for item in Tail: fname = item[0] fname = fname.split('_') pdbid = fname[0] pdb = 'pdb' + pdbid + '.ent' chain = fname[1] handle = open(pdbloc + pdb, 'r') header_dict = parse_pdb_header(handle) handle.close() name = header_dict['name'] head = header_dict['head'] method = header_dict['structure_method'] reso = header_dict['resolution'] Comp = header_dict['compound'] ec = FindEC(chain.lower(), 'ec_number', Comp) if ec.startswith('1.'): txt = 'Oxidoreductase' elif ec.startswith('2.'): txt = 'Transferase' elif ec.startswith('3.'): txt = 'Hydrolase' elif ec.startswith('4.'): txt = 'Lyase' elif ec.startswith('5.'): txt = 'Isomerase' elif ec.startswith('6.'): txt = 'Ligase' else: txt = 'Non_Enzyme' item.insert(1, method) item.insert(1, reso) item.insert(len(item), ec) item.insert(len(item), txt) item.insert(len(item), ion_name) Data.append(item) return Data
def get_pdb(pss_result, if_full_match=True, if_best_resolution=True, if_download=True): outdir_pdb = './pdb_download' if not path.exists(outdir_pdb): makedirs(outdir_pdb) pdb_reso = [] for r in pss_result: pdb_id = None pdb_full = None # fully matched or not if if_full_match: info = r['services'][0]['nodes'][0]['match_context'][0] if info['mismatches'] == 0 \ and info['gaps_opened'] == 0 \ and info['query_length'] == info['subject_length']: pdb_full = r['identifier'] pdb_id = pdb_full.split('_')[0] else: pdb_full = r['identifier'] pdb_id = pdb_full.split('_')[0] # if match, download pdb file if pdb_id and pdb_full: outfile = path.join(outdir_pdb, str(pdb_id) + '.pdb') if if_download else path.join( outdir_pdb, 'tmp.pdb') if download_pdb(pdb_id, outfile): structure = parse_pdb_header(outfile) pdb_reso.append((pdb_full, structure['resolution'])) if if_best_resolution: # find the pdb with best resolution tmp_dict = {r: p for p, r in pdb_reso} best_pdb_id = tmp_dict[max(tmp_dict.keys())] return [(best_pdb_id, tmp_dict[best_pdb_id])] # write to file # with open('./dataset_pos.csv', 'a') as f: # f.write("{}, {}, {}\n".format(best_pdb_id, seq, pdb_reso)) # print("{} - {}".format(i, pdb_reso)) return pdb_reso
def handle(self, *args, **options): q = QueryPDB() q.list_xtals(verbose=False) for record in q.new_structures: pdb_code = record[0] wt_id = Protein.objects.get(entry_name=record[1]).id if not os.path.exists( os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code) ])): self.download_pdb(pdb_code) self.parser = SequenceParser(os.sep.join( [self.pdb_data_dir, "{}.pdb".format(pdb_code)]), wt_protein_id=wt_id) header = parse_pdb_header( os.sep.join([self.pdb_data_dir, "{}.pdb".format(pdb_code)])) self.create_yaml(pdb_code, record[1], header)
def handle(self, *args, **options): root, ext = os.path.splitext(os.path.basename(options['pdb_file'])) print("Working on file {}".format(options['pdb_file'])) header = parse_pdb_header(options['pdb_file']) sp = SequenceParser(options['pdb_file']) print(sp.get_fusions()) print(sp.get_mutations()) print(sp.get_deletions()) json_data = {} json_data["header"] = header json_data.update(sp.get_fusions()) json_data.update(sp.get_mutations()) json_data.update(sp.get_deletions()) json.dump( json_data, open(os.sep.join([settings.DATA_DIR, "{}_auto.json".format(root)]), 'w'), indent=4, separators=(',', ': '))
from Bio.PDB.PDBParser import PDBParser parser = PDBParser(PERMISSIVE=1) structure_id = "2b10" filename = "/home/koreanraichu/2b10.pdb" structure = parser.get_structure(structure_id, filename) print(structure) # 이거 PC에 있는 파일 가져오는건가? # FileNotFoundError: [Errno 2] No such file or directory: 'pdb1fat.ent' from Bio.PDB import parse_pdb_header with open(filename, "r") as handle: header_dict = parse_pdb_header(handle) print(header_dict)
def main_func(self, positions, iteration): # filenames if not positions[1]: filenames = self.filenames[positions[0]:] else: filenames = self.filenames[positions[0]:positions[1]] for source_file in filenames: source_file_path = os.sep.join([self.structure_data_dir, source_file]) if os.path.isfile(source_file_path) and source_file[0] != '.': self.logger.info('Reading file {}'.format(source_file_path)) # read the yaml file with open(source_file_path, 'r') as f: sd = yaml.load(f) # is this a representative structure (will be used to guide structure-based alignments)? representative = False if 'representative' in sd and sd['representative']: representative = True # only process representative structures on first iteration if not representative and iteration == 1: continue # skip representative structures on second iteration if representative and iteration == 2: continue # is there a construct? if 'construct' not in sd: self.logger.error('No construct specified, skipping!') continue # does the construct exists? try: con = Protein.objects.get(entry_name=sd['construct']) except Protein.DoesNotExist: self.logger.error('Construct {} does not exists, skipping!'.format(sd['construct'])) continue # create a structure record try: s = Structure.objects.get(protein_conformation__protein=con) except Structure.DoesNotExist: s = Structure() s.representative = representative # protein state if 'state' not in sd: self.logger.warning('State not defined, using default state {}'.format( settings.DEFAULT_PROTEIN_STATE)) state = settings.DEFAULT_STATE.title() else: state = sd['state'] state_slug = slugify(state) try: ps, created = ProteinState.objects.get_or_create(slug=state_slug, defaults={'name': state}) if created: self.logger.info('Created protein state {}'.format(ps.name)) except IntegrityError: ps = ProteinState.objects.get(slug=state_slug) s.state = ps # protein conformation try: s.protein_conformation = ProteinConformation.objects.get(protein=con) except ProteinConformation.DoesNotExist: self.logger.error('Protein conformation for construct {} does not exists'.format(con)) continue if s.protein_conformation.state is not state: ProteinConformation.objects.filter(protein=con).update(state=ps) # get the PDB file and save to DB sd['pdb'] = sd['pdb'].upper() if not os.path.exists(self.pdb_data_dir): os.makedirs(self.pdb_data_dir) pdb_path = os.sep.join([self.pdb_data_dir, sd['pdb'] + '.pdb']) if not os.path.isfile(pdb_path): self.logger.info('Fetching PDB file {}'.format(sd['pdb'])) url = 'http://www.rcsb.org/pdb/files/%s.pdb' % sd['pdb'] pdbdata_raw = urlopen(url).read().decode('utf-8') with open(pdb_path, 'w') as f: f.write(pdbdata_raw) else: with open(pdb_path, 'r') as pdb_file: pdbdata_raw = pdb_file.read() pdbdata, created = PdbData.objects.get_or_create(pdb=pdbdata_raw) s.pdb_data = pdbdata # UPDATE HETSYN with its PDB reference instead + GRAB PUB DATE, PMID, DOI AND RESOLUTION hetsyn = {} hetsyn_reverse = {} for line in pdbdata_raw.splitlines(): if line.startswith('HETSYN'): m = re.match("HETSYN[\s]+([\w]{3})[\s]+(.+)",line) ### need to fix bad PDB formatting where col4 and col5 are put together for some reason -- usually seen when the id is +1000 if (m): hetsyn[m.group(2).strip()] = m.group(1).upper() hetsyn_reverse[m.group(1)] = m.group(2).strip().upper() if line.startswith('HETNAM'): m = re.match("HETNAM[\s]+([\w]{3})[\s]+(.+)",line) ### need to fix bad PDB formatting where col4 and col5 are put together for some reason -- usually seen when the id is +1000 if (m): hetsyn[m.group(2).strip()] = m.group(1).upper() hetsyn_reverse[m.group(1)] = m.group(2).strip().upper() if line.startswith('REVDAT 1'): sd['publication_date'] = line[13:22] if line.startswith('JRNL PMID'): sd['pubmed_id'] = line[19:].strip() if line.startswith('JRNL DOI'): sd['doi_id'] = line[19:].strip() if len(hetsyn) == 0: self.logger.info("PDB file contained NO hetsyn") with open(pdb_path,'r') as header: header_dict = parse_pdb_header(header) sd['publication_date'] = header_dict['release_date'] sd['resolution'] = str(header_dict['resolution']).strip() sd['structure_method'] = header_dict['structure_method'] # structure type if 'structure_method' in sd and sd['structure_method']: structure_type = sd['structure_method'].capitalize() structure_type_slug = slugify(sd['structure_method']) try: st, created = StructureType.objects.get_or_create(slug=structure_type_slug, defaults={'name': structure_type}) if created: self.logger.info('Created structure type {}'.format(st)) except IntegrityError: st = StructureType.objects.get(slug=structure_type_slug) s.structure_type = st else: self.logger.warning('No structure type specified in PDB file {}'.format(sd['pdb'])) matched = 0 if 'ligand' in sd and sd['ligand']: if isinstance(sd['ligand'], list): ligands = sd['ligand'] else: ligands = [sd['ligand']] for ligand in ligands: if 'name' in ligand: if ligand['name'].upper() in hetsyn: self.logger.info('Ligand {} matched to PDB records'.format(ligand['name'])) matched = 1 ligand['name'] = hetsyn[ligand['name'].upper()] elif ligand['name'].upper() in hetsyn_reverse: matched = 1 if matched==0 and len(hetsyn)>0: self.logger.info('No ligand names found in HET in structure {}'.format(sd['pdb'])) # REMOVE? can be used to dump structure files with updated ligands # yaml.dump(sd, open(source_file_path, 'w'), indent=4) # pdb code if 'pdb' in sd: try: web_resource = WebResource.objects.get(slug='pdb') except: # abort if pdb resource is not found raise Exception('PDB resource not found, aborting!') s.pdb_code, created = WebLink.objects.get_or_create(index=sd['pdb'], web_resource=web_resource) else: self.logger.error('PDB code not specified for structure {}, skipping!'.format(sd['pdb'])) continue # insert into plain text fields if 'preferred_chain' in sd: s.preferred_chain = sd['preferred_chain'] else: self.logger.warning('Preferred chain not specified for structure {}'.format(sd['pdb'])) if 'resolution' in sd: s.resolution = float(sd['resolution']) else: self.logger.warning('Resolution not specified for structure {}'.format(sd['pdb'])) if 'publication_date' in sd: s.publication_date = sd['publication_date'] else: self.logger.warning('Publication date not specified for structure {}'.format(sd['pdb'])) # publication try: if 'doi_id' in sd: try: s.publication = Publication.objects.get(web_link__index=sd['doi_id']) except Publication.DoesNotExist as e: p = Publication() try: p.web_link = WebLink.objects.get(index=sd['doi_id'], web_resource__slug='doi') except WebLink.DoesNotExist: wl = WebLink.objects.create(index=sd['doi_id'], web_resource = WebResource.objects.get(slug='doi')) p.web_link = wl p.update_from_doi(doi=sd['doi_id']) p.save() s.publication = p elif 'pubmed_id' in sd: try: s.publication = Publication.objects.get(web_link__index=sd['pubmed_id']) except Publication.DoesNotExist as e: p = Publication() try: p.web_link = WebLink.objects.get(index=sd['pubmed_id'], web_resource__slug='pubmed') except WebLink.DoesNotExist: wl = WebLink.objects.create(index=sd['pubmed_id'], web_resource = WebResource.objects.get(slug='pubmed')) p.web_link = wl p.update_from_pubmed_data(index=sd['pubmed_id']) p.save() s.publication = p except: self.logger.error('Error saving publication'.format(ps.name)) # save structure before adding M2M relations s.save() #Delete previous interaction data to prevent errors. ResidueFragmentInteraction.objects.filter(structure_ligand_pair__structure=s).delete() StructureLigandInteraction.objects.filter(structure=s).delete() #Remove previous Rotamers/Residues to prepare repopulate Fragment.objects.filter(structure=s).delete() Rotamer.objects.filter(structure=s).all().delete() Residue.objects.filter(protein_conformation=s.protein_conformation).all().delete() # endogenous ligand(s) default_ligand_type = 'Small molecule' if representative and 'endogenous_ligand' in sd and sd['endogenous_ligand']: if isinstance(sd['endogenous_ligand'], list): endogenous_ligands = sd['endogenous_ligand'] else: endogenous_ligands = [sd['endogenous_ligand']] for endogenous_ligand in endogenous_ligands: if endogenous_ligand['type']: lt, created = LigandType.objects.get_or_create(slug=slugify(endogenous_ligand['type']), defaults={'name': endogenous_ligand['type']}) else: lt, created = LigandType.objects.get_or_create(slug=slugify(default_ligand_type), defaults={'name': default_ligand_type}) ligand = Ligand() if 'iupharId' not in endogenous_ligand: endogenous_ligand['iupharId'] = 0 ligand = ligand.load_by_gtop_id(endogenous_ligand['name'], endogenous_ligand['iupharId'], lt) try: s.protein_conformation.protein.parent.endogenous_ligands.add(ligand) except IntegrityError: self.logger.info('Endogenous ligand for protein {}, already added. Skipping.'.format( s.protein_conformation.protein.parent)) # ligands if 'ligand' in sd and sd['ligand']: if isinstance(sd['ligand'], list): ligands = sd['ligand'] else: ligands = [sd['ligand']] for ligand in ligands: l = False peptide_chain = "" if 'chain' in ligand: peptide_chain = ligand['chain'] ligand['name'] = 'pep' if ligand['name'] and ligand['name'] != 'None': # some inserted as none. # use annoted ligand type or default type if ligand['type']: lt, created = LigandType.objects.get_or_create(slug=slugify(ligand['type']), defaults={'name': ligand['type']}) else: lt, created = LigandType.objects.get_or_create( slug=slugify(default_ligand_type), defaults={'name': default_ligand_type}) # set pdb reference for structure-ligand interaction pdb_reference = ligand['name'] # use pubchem_id if 'pubchemId' in ligand and ligand['pubchemId'] and ligand['pubchemId'] != 'None': # create ligand l = Ligand() # update ligand by pubchem id ligand_title = False if 'title' in ligand and ligand['title']: ligand_title = ligand['title'] l = l.load_from_pubchem('cid', ligand['pubchemId'], lt, ligand_title) # if no pubchem id is specified, use name else: # use ligand title, if specified if 'title' in ligand and ligand['title']: ligand['name'] = ligand['title'] # create empty properties lp = LigandProperities.objects.create() # create the ligand try: l, created = Ligand.objects.get_or_create(name=ligand['name'], canonical=True, defaults={'properities': lp, 'ambigious_alias': False}) if created: self.logger.info('Created ligand {}'.format(ligand['name'])) else: pass except IntegrityError: l = Ligand.objects.get(name=ligand['name'], canonical=True) # save ligand l.save() else: continue # structure-ligand interaction if l and ligand['role']: role_slug = slugify(ligand['role']) try: lr, created = LigandRole.objects.get_or_create(slug=role_slug, defaults={'name': ligand['role']}) if created: self.logger.info('Created ligand role {}'.format(ligand['role'])) except IntegrityError: lr = LigandRole.objects.get(slug=role_slug) i, created = StructureLigandInteraction.objects.get_or_create(structure=s, ligand=l, ligand_role=lr, annotated=True, defaults={'pdb_reference': pdb_reference}) if i.pdb_reference != pdb_reference: i.pdb_reference = pdb_reference i.save() # structure segments if 'segments' in sd and sd['segments']: for segment, positions in sd['segments'].items(): # fetch (create if needed) sequence segment try: protein_segment = ProteinSegment.objects.get(slug=segment) except ProteinSegment.DoesNotExist: self.logger.error('Segment {} not found'.format(segment)) continue struct_seg, created = StructureSegment.objects.update_or_create(structure=s, protein_segment=protein_segment, defaults={'start': positions[0], 'end': positions[1]}) # all representive structures should have defined segments elif representative: self.logger.warning('Segments not defined for representative structure {}'.format(sd['pdb'])) # structure segments for modeling if 'segments_in_structure' in sd and sd['segments_in_structure']: for segment, positions in sd['segments_in_structure'].items(): # fetch (create if needed) sequence segment try: protein_segment = ProteinSegment.objects.get(slug=segment) except ProteinSegment.DoesNotExist: self.logger.error('Segment {} not found'.format(segment)) continue struct_seg_mod, created = StructureSegmentModeling.objects.update_or_create(structure=s, protein_segment=protein_segment, defaults={'start': positions[0], 'end': positions[1]}) # structure coordinates if 'coordinates' in sd and sd['coordinates']: for segment, coordinates in sd['coordinates'].items(): # fetch (create if needed) sequence segment try: protein_segment = ProteinSegment.objects.get(slug=segment) except ProteinSegment.DoesNotExist: self.logger.error('Segment {} not found'.format(segment)) continue # fetch (create if needed) coordinates description try: description, created = StructureCoordinatesDescription.objects.get_or_create( text=coordinates) if created: self.logger.info('Created structure coordinate description {}'.format(coordinates)) except IntegrityError: description = StructureCoordinatesDescription.objects.get(text=coordinates) sc = StructureCoordinates() sc.structure = s sc.protein_segment = protein_segment sc.description = description sc.save() # structure engineering if 'engineering' in sd and sd['engineering']: for segment, engineering in sd['engineering'].items(): # fetch (create if needed) sequence segment try: protein_segment = ProteinSegment.objects.get(slug=segment) except ProteinSegment.DoesNotExist: self.logger.error('Segment {} not found'.format(segment)) continue # fetch (create if needed) engineering description try: description, created = StructureEngineeringDescription.objects.get_or_create( text=engineering) if created: self.logger.info('Created structure coordinate description {}'.format(engineering)) except IntegrityError: description = StructureEngineeringDescription.objects.get(text=engineering) se = StructureEngineering() se.structure = s se.protein_segment = protein_segment se.description = description se.save() # protein anomalies scheme = s.protein_conformation.protein.residue_numbering_scheme if 'bulges' in sd and sd['bulges']: pa_slug = 'bulge' try: pab, created = ProteinAnomalyType.objects.get_or_create(slug=pa_slug, defaults={ 'name': 'Bulge'}) if created: self.logger.info('Created protein anomaly type {}'.format(pab)) except IntegrityError: pab = ProteinAnomalyType.objects.get(slug=pa_slug) for segment, bulges in sd['bulges'].items(): for bulge in bulges: try: gn, created = ResidueGenericNumber.objects.get_or_create(label=bulge, scheme=scheme, defaults={'protein_segment': ProteinSegment.objects.get( slug=segment)}) if created: self.logger.info('Created generic number {}'.format(gn)) except IntegrityError: gn = ResidueGenericNumber.objects.get(label=bulge, scheme=scheme) try: pa, created = ProteinAnomaly.objects.get_or_create(anomaly_type=pab, generic_number=gn) if created: self.logger.info('Created protein anomaly {}'.format(pa)) except IntegrityError: pa, created = ProteinAnomaly.objects.get(anomaly_type=pab, generic_number=gn) s.protein_anomalies.add(pa) if 'constrictions' in sd and sd['constrictions']: pa_slug = 'constriction' try: pac, created = ProteinAnomalyType.objects.get_or_create(slug=pa_slug, defaults={ 'name': 'Constriction'}) if created: self.logger.info('Created protein anomaly type {}'.format(pac)) except IntegrityError: pac = ProteinAnomalyType.objects.get(slug=pa_slug) for segment, constrictions in sd['constrictions'].items(): for constriction in constrictions: try: gn, created = ResidueGenericNumber.objects.get_or_create(label=constriction, scheme=scheme, defaults={'protein_segment': ProteinSegment.objects.get( slug=segment)}) if created: self.logger.info('Created generic number {}'.format(gn)) except IntegrityError: gn = ResidueGenericNumber.objects.get(label=constriction, scheme=scheme) try: pa, created = ProteinAnomaly.objects.get_or_create(anomaly_type=pac, generic_number=gn) if created: self.logger.info('Created protein anomaly {}'.format(pa)) except IntegrityError: pa, created = ProteinAnomaly.objects.get(anomaly_type=pac, generic_number=gn) s.protein_anomalies.add(pa) # stabilizing agents, FIXME - redesign this! # fusion proteins moved to constructs, use this for G-proteins and other agents? aux_proteins = [] if 'signaling_protein' in sd and sd['signaling_protein'] and sd['signaling_protein'] != 'None': aux_proteins.append('signaling_protein') if 'auxiliary_protein' in sd and sd['auxiliary_protein'] and sd['auxiliary_protein'] != 'None': aux_proteins.append('auxiliary_protein') for index in aux_proteins: if isinstance(sd[index], list): aps = sd[index] else: aps = [sd[index]] for aux_protein in aps: aux_protein_slug = slugify(aux_protein)[:50] try: sa, created = StructureStabilizingAgent.objects.get_or_create( slug=aux_protein_slug, defaults={'name': aux_protein}) except IntegrityError: sa = StructureStabilizingAgent.objects.get(slug=aux_protein_slug) s.stabilizing_agents.add(sa) # save structure s.save() self.logger.info('Calculate rotamers / residues') self.create_rotamers(s,pdb_path) self.logger.info('Calculate interactions') #Should not error anymore. If it does, fix. runcalculation(sd['pdb'],peptide_chain) parsecalculation(sd['pdb'],False)
from Bio.PDB import * from Bio.PDB import parse_pdb_header from numpy import loadtxt import numpy as np from Bio.PDB import PDBList from Bio.PDB.Entity import Entity from Bio.PDB.Residue import Residue parser = PDBParser() #taking a pdb file of choice from the user # the strip() is to strip spaces in the input of the user so there is less error # the lower() is to take all the input of the user in small letter to reduce the error rate as most pdb files are saved in lower case letters pdb_file = input("Select a pdb file of your choice?").strip().lower() # decided to use with open because i wanted to be able to use dictionary and also run a for loop if i wanted with open(pdb_file, "r") as file: dict_file = parse_pdb_header(file) structure = parser.get_structure(file, file) for key in dict_file: if key == "idcode": id = dict_file[key] #print(id) #calling a class in biopython ppb = PPBuilder() # using the functions of the class to get the sequence of the protein (pdb file) for pp in ppb.build_peptides(structure): seq1 = pp.get_sequence() print("The sequence of the structure is : " + seq1) # storing the model of the structure of the first pdb file model = structure[0] # asking the user for the second file
'query_length'] == info['subject_length']: pdb_id = r['identifier'].split('_')[0] pdb_full = r['identifier'] # if match, download pdb file if pdb_id and pdb_full: page = 'http://files.rcsb.org/view/{}.pdb'.format(pdb_id) req = requests.get(page) if req.status_code == 200: response = req.text outfile = 'tmp.pdb' if outfile: with open(outfile, 'w') as f: f.write(response) # parse to get the resolution structure = parse_pdb_header(outfile) pdb_reso.append((pdb_full, structure['resolution'])) # append to dataset file if pdb_reso: # find the pdb with best resolution tmp_dict = {r: p for p, r in pdb_reso} best_pdb_id = tmp_dict[max(tmp_dict.keys())] # write to file with open('./all_targets.csv', 'a') as f: f.write("{}, {}, {}, {}\n".format(best_pdb_id, pdb_reso, records_all[i].description, seq)) print("{} - {}".format(i, pdb_reso)) except: pass