Esempio n. 1
0
    def calc_reconstruction(self, exons, blocks, size=None):
	"""Calcuates reconstruction given list of alignment blocks and exons"""
	bases_reconstructed = intspan.intersect(exons, blocks)
	if size is None:
	    size = intspan.cardinality_multi(exons)
	fraction_reconstructed = float(bases_reconstructed)/float(size)
	
	return bases_reconstructed, fraction_reconstructed
Esempio n. 2
0
    def process_single_model(self, matches, gene2txt, contig_coverage=None):
	model = matches[0].model
	matches_by_transcript, matches_by_gene, gene2transcript = self.group_matches(matches)
			    
	results = []
	
	for gene in gene2transcript.keys():
	    matches_gene = matches_by_gene[gene]	    
	    all_txts = self.get_all_transcripts(gene, model, gene2txt)
	    
	    if not all_txts:
		continue
	    
	    collapsed_exons = self.collapse_txts(all_txts)
	    collapsed_blocks = self.collapse_align_blocks([match.align for match in matches_gene])
	    exons_size = intspan.cardinality_multi(collapsed_exons)
	    bases_reconstructed, fraction_reconstructed = self.calc_reconstruction(collapsed_exons, collapsed_blocks, size=exons_size)
	    	    
	    contigs = ','.join([match.align.query for match in matches_gene])
	    
	    if contig_coverage:
		nreads, nbases = self.calc_coverage([match.align.query for match in matches_gene], contig_coverage)
	    else:
		nreads, nbases = '-', '-'
	    	    
	    results.append({'feature': 'gene',
	                    'model': model,
	                    'gene': gene,
	                    'strand': all_txts[0].strand,
	                    'coord': '%s:%s-%s' % (all_txts[0].chrom, collapsed_exons[0][0], collapsed_exons[-1][1]),
	                    'feature_size': exons_size,
	                    'bases_reconstructed': bases_reconstructed,
	                    'reconstruction': "%.3f" % (fraction_reconstructed),
	                    'contigs': contigs,
	                    'num_contigs': len(matches_gene),
	                    'align_blocks': self.blocks_as_string(collapsed_blocks),
	                    'exons': self.blocks_as_string(collapsed_exons),
	                    'num_reads': str(nreads),
	                    'bases_reads': str(nbases),
	                    #'depth': "%.3f" % (float(nbases) / float(exon_len)),
	                    })
	
	    transcripts = gene2transcript[gene].keys()
	    for transcript in sorted(transcripts):	    
		matches_txt = matches_by_transcript[transcript]
		txt = matches_txt[0].txt
	    		
		transcript_len = txt.length
	    
		#best_match
		best_match = None
		for match in matches_txt:
		    if best_match is None or match.coverage > best_match.coverage:
			best_match = match
			
		#reconstruction
		collapsed_blocks = self.collapse_align_blocks([match.align for match in matches_txt])
		exons_size = intspan.cardinality_multi(txt.exons)
		bases_reconstructed, fraction_reconstructed = self.calc_reconstruction(txt.exons, collapsed_blocks, size=exons_size)
		
		contigs = ','.join([match.align.query for match in matches_txt])
	    
		if contig_coverage:
		    nreads, nbases = self.calc_coverage([match.align.query for match in matches_txt], contig_coverage)
		else:
		    nreads, nbases = '-', '-'
	    
		results.append({'feature': 'transcript',
		                'model': txt.model,
		                'transcript': txt.name,
		                'gene': txt.alias,
		                'strand': txt.strand,
		                'coord': '%s:%s-%s' % (txt.chrom, int(txt.txStart) + 1, txt.txEnd),
		                'feature_size': txt.length,
		                'bases_reconstructed': bases_reconstructed,
		                'reconstruction': "%.3f" % (fraction_reconstructed),
		                'contigs': contigs,
		                'num_contigs': len(matches_txt),
		                'best_contig': best_match.align.query,
		                'best_contig_reconstruction': "%.3f" % (best_match.coverage),
		                'align_blocks': self.blocks_as_string(collapsed_blocks),
		                'exons': self.blocks_as_string(txt.exons),
		                'num_reads': str(nreads),
		                'bases_reads': str(nbases),
		                #'depth': "%.3f" % (float(nbases) / float(txt.length)),
		                })
			
	return results