def getTemplates(self): if self.debug: print(self.seq) result_handle = NCBIWWW.qblast("blastp", "pdb", str(self.seq), expect=0.01) blast_records = NCBIXML.parse(result_handle) if self.debug: print("BLAST Request Finished") print() for record in blast_records: for alignment in record.alignments: id = alignment.accession fasta = self.getFastaFromId(id) title = alignment.title length = alignment.length template = Template(id=id, fasta=fasta, sequence=title, length=length, alignments=[]) self.templates[id] = template self.fastas[id] = fasta for hsp in alignment.hsps: a = Alignment(id=id, title=title, expect=hsp.expect, score=hsp.score, identities=hsp.identities, similarity=(100 * hsp.identities / len(self.seq)), target=hsp.query, targetstart=hsp.query_start, match=hsp.match, template=hsp.sbjct, templatestart=hsp.sbjct_start, length=length) targetfront = str(self.seq[:a.targetstart - 1]) targetend = str(self.seq[(a.targetstart + a.length):]) a.target = ''.join(targetfront) + a.target + ''.join( targetend) a.length = len(a.target) templatefront = ['-'] * (a.targetstart - 1) templateend = ['-'] * (len(self.seq) - (a.targetstart + a.length)) a.template = ''.join(templatefront) + a.template + ''.join( templateend) self.templates[id].alignments.append(a) self.alignments.append(a) for id, fasta in self.fastas.items(): fname = '%s/%s.fasta' % (self.fastasfolder, id) if not os.path.exists(fname): f = open(fname, 'w') SeqIO.write(fasta, f, 'fasta') f.close() for i, a in enumerate(self.alignments): fname = '%s/%s-%s.alignment' % (self.alignmentsfolder, a.id, str(i)) if not os.path.exists(fname): f = open(fname, 'w') json.dump(a.toJSON(), f) f.close() return self.templates.keys()
def getTemplates(self): # http://biopython.org/DIST/docs/api/Bio.Blast.NCBIWWW-module.html if self.debug: print(self.seq) # Send BLAST request to server # Use blastp (protein) for the method # Use pdb as the database result_handle = NCBIWWW.qblast("blastp","pdb",str(self.seq),expect=0.01) # Parse the results into blast records blast_records = NCBIXML.parse(result_handle) if self.debug: print("BLAST Request Finished") # Read through each blast record for record in blast_records: # Grab the alignments from each record for alignment in record.alignments: # Use the alignment id as the template key id = alignment.accession fasta = self.getFastaFromId(id) title = alignment.title length = alignment.length # Set up the template object for this id template = Template( id=id,fasta=fasta,sequence=title, length=length,alignments=[] ) # Store the template in the template dict self.templates[id] = template """ self.templates[id] = {"fasta":self.getFastaFromId(id), 'asequence':alignment.title, 'alength':alignment.length, "alignments":[]} """ # Store fasta in dict self.fastas[id] = fasta # Get all alignments for this template for hsp in alignment.hsps: # Create an alignment object a = Alignment( id=id,title=title,expect=hsp.expect,score=hsp.score, identities=hsp.identities,similarity=(100*hsp.identities/len(self.seq)), target=hsp.query,targetstart=hsp.query_start,match=hsp.match, template=hsp.sbjct,templatestart=hsp.sbjct_start,length=length ) # Alignment isn't necessarily the same size as the sequence targetfront = str(self.seq[:a.targetstart-1]) targetend = str(self.seq[(a.targetstart+a.length):]) a.target = ''.join(targetfront) + a.target + ''.join(targetend) a.length = len(a.target) templatefront = ['-']*(a.targetstart-1) templateend = ['-']*(len(self.seq)-(a.targetstart+a.length)) a.template = ''.join(templatefront) + a.template + ''.join(templateend) if self.debug: print("Seq vs Target Length:",len(self.seq),a.length) # Append the alignment to the template's alignments self.templates[id].alignments.append(a) self.alignments.append(a) """ self.templates[id]["alignments"].append({'expect':hsp.expect, 'score':hsp.score, 'identities':hsp.identities, 'similarity':(100*hsp.identities/len(self.seq)), 'target':hsp.query, 'match':hsp.match, 'template':hsp.sbjct}) """ if self.debug: print() print('****ALIGNMENT***') print('id:',id) print('sequence:', title) print('length:',length) print('e value:', hsp.expect) print('score:', hsp.score) print('identities:',(100*hsp.identities/len(self.seq))) # need to print percentage of similarities print("Target :" + hsp.query[0:75] + '...') print("Match :" + hsp.match[0:75] + '...') print("Template:" + hsp.sbjct[0:75] + '...') print() # Save off the fasta file for id,fasta in self.fastas.items(): fname = '%s/%s.fasta' % (self.fastasfolder,id) if not os.path.exists(fname): f = open(fname,'w') SeqIO.write(fasta,f,'fasta') f.close() # Save off the alignments for i,a in enumerate(self.alignments): fname = '%s/%s-%s.alignment' % (self.alignmentsfolder,a.id,str(i)) if not os.path.exists(fname): f = open(fname,'w') json.dump(a.toJSON(),f) f.close() return self.templates.keys()