Beispiel #1
0
    def getTemplates(self):

        if self.debug:
            print(self.seq)

        result_handle = NCBIWWW.qblast("blastp",
                                       "pdb",
                                       str(self.seq),
                                       expect=0.01)
        blast_records = NCBIXML.parse(result_handle)
        if self.debug:
            print("BLAST Request Finished")
            print()

        for record in blast_records:
            for alignment in record.alignments:
                id = alignment.accession
                fasta = self.getFastaFromId(id)
                title = alignment.title
                length = alignment.length

                template = Template(id=id,
                                    fasta=fasta,
                                    sequence=title,
                                    length=length,
                                    alignments=[])

                self.templates[id] = template
                self.fastas[id] = fasta
                for hsp in alignment.hsps:

                    a = Alignment(id=id,
                                  title=title,
                                  expect=hsp.expect,
                                  score=hsp.score,
                                  identities=hsp.identities,
                                  similarity=(100 * hsp.identities /
                                              len(self.seq)),
                                  target=hsp.query,
                                  targetstart=hsp.query_start,
                                  match=hsp.match,
                                  template=hsp.sbjct,
                                  templatestart=hsp.sbjct_start,
                                  length=length)

                    targetfront = str(self.seq[:a.targetstart - 1])
                    targetend = str(self.seq[(a.targetstart + a.length):])
                    a.target = ''.join(targetfront) + a.target + ''.join(
                        targetend)
                    a.length = len(a.target)

                    templatefront = ['-'] * (a.targetstart - 1)
                    templateend = ['-'] * (len(self.seq) -
                                           (a.targetstart + a.length))
                    a.template = ''.join(templatefront) + a.template + ''.join(
                        templateend)

                    self.templates[id].alignments.append(a)
                    self.alignments.append(a)

        for id, fasta in self.fastas.items():
            fname = '%s/%s.fasta' % (self.fastasfolder, id)
            if not os.path.exists(fname):
                f = open(fname, 'w')
                SeqIO.write(fasta, f, 'fasta')
                f.close()

        for i, a in enumerate(self.alignments):
            fname = '%s/%s-%s.alignment' % (self.alignmentsfolder, a.id,
                                            str(i))
            if not os.path.exists(fname):
                f = open(fname, 'w')
                json.dump(a.toJSON(), f)
                f.close()

        return self.templates.keys()
	def getTemplates(self):
		# http://biopython.org/DIST/docs/api/Bio.Blast.NCBIWWW-module.html
		if self.debug:
			print(self.seq)
		# Send BLAST request to server
		# Use blastp (protein) for the method
		# Use pdb as the database
		result_handle = NCBIWWW.qblast("blastp","pdb",str(self.seq),expect=0.01)
		# Parse the results into blast records
		blast_records = NCBIXML.parse(result_handle)
		if self.debug:
			print("BLAST Request Finished")

		# Read through each blast record
		for record in blast_records:
			# Grab the alignments from each record
			for alignment in record.alignments:
				# Use the alignment id as the template key
				id = alignment.accession
				fasta = self.getFastaFromId(id)
				title = alignment.title
				length = alignment.length
				# Set up the template object for this id
				template = Template(
					id=id,fasta=fasta,sequence=title,
					length=length,alignments=[]
				)
				# Store the template in the template dict
				self.templates[id] = template
				"""
				self.templates[id] = {"fasta":self.getFastaFromId(id),
					'asequence':alignment.title,
					'alength':alignment.length,
					"alignments":[]}
				"""
				# Store fasta in dict
				self.fastas[id] = fasta
				# Get all alignments for this template
				for hsp in alignment.hsps:
					# Create an alignment object
					a = Alignment(
						id=id,title=title,expect=hsp.expect,score=hsp.score,
						identities=hsp.identities,similarity=(100*hsp.identities/len(self.seq)),
						target=hsp.query,targetstart=hsp.query_start,match=hsp.match,
						template=hsp.sbjct,templatestart=hsp.sbjct_start,length=length
					)
					# Alignment isn't necessarily the same size as the sequence
					targetfront = str(self.seq[:a.targetstart-1])
					targetend = str(self.seq[(a.targetstart+a.length):])
					a.target = ''.join(targetfront) + a.target + ''.join(targetend)
					a.length = len(a.target)
					
					templatefront = ['-']*(a.targetstart-1)
					templateend = ['-']*(len(self.seq)-(a.targetstart+a.length))
					a.template = ''.join(templatefront) + a.template + ''.join(templateend)

					if self.debug:
						print("Seq vs Target Length:",len(self.seq),a.length)

					# Append the alignment to the template's alignments
					self.templates[id].alignments.append(a)
					self.alignments.append(a)
					"""
					self.templates[id]["alignments"].append({'expect':hsp.expect,
						'score':hsp.score,
						'identities':hsp.identities,
						'similarity':(100*hsp.identities/len(self.seq)),
						'target':hsp.query,
						'match':hsp.match,
						'template':hsp.sbjct})
					"""

					if self.debug:
						print()
						print('****ALIGNMENT***')
						print('id:',id)
						print('sequence:', title)
						print('length:',length)
						print('e value:', hsp.expect)
						print('score:', hsp.score)
						print('identities:',(100*hsp.identities/len(self.seq))) # need to print percentage of similarities
						print("Target  :" + hsp.query[0:75] + '...')
						print("Match   :" + hsp.match[0:75] + '...')
						print("Template:" + hsp.sbjct[0:75] + '...')
						print()

		# Save off the fasta file
		for id,fasta in self.fastas.items():
			fname = '%s/%s.fasta' % (self.fastasfolder,id)
			if not os.path.exists(fname):
				f = open(fname,'w')
				SeqIO.write(fasta,f,'fasta')
				f.close()

		# Save off the alignments
		for i,a in enumerate(self.alignments):
			fname = '%s/%s-%s.alignment' % (self.alignmentsfolder,a.id,str(i))
			if not os.path.exists(fname):
				f = open(fname,'w')
				json.dump(a.toJSON(),f)
				f.close()

		return self.templates.keys()