def calc_reconstruction(self, exons, blocks, size=None): """Calcuates reconstruction given list of alignment blocks and exons""" bases_reconstructed = intspan.intersect(exons, blocks) if size is None: size = intspan.cardinality_multi(exons) fraction_reconstructed = float(bases_reconstructed)/float(size) return bases_reconstructed, fraction_reconstructed
def process_single_model(self, matches, gene2txt, contig_coverage=None): model = matches[0].model matches_by_transcript, matches_by_gene, gene2transcript = self.group_matches(matches) results = [] for gene in gene2transcript.keys(): matches_gene = matches_by_gene[gene] all_txts = self.get_all_transcripts(gene, model, gene2txt) if not all_txts: continue collapsed_exons = self.collapse_txts(all_txts) collapsed_blocks = self.collapse_align_blocks([match.align for match in matches_gene]) exons_size = intspan.cardinality_multi(collapsed_exons) bases_reconstructed, fraction_reconstructed = self.calc_reconstruction(collapsed_exons, collapsed_blocks, size=exons_size) contigs = ','.join([match.align.query for match in matches_gene]) if contig_coverage: nreads, nbases = self.calc_coverage([match.align.query for match in matches_gene], contig_coverage) else: nreads, nbases = '-', '-' results.append({'feature': 'gene', 'model': model, 'gene': gene, 'strand': all_txts[0].strand, 'coord': '%s:%s-%s' % (all_txts[0].chrom, collapsed_exons[0][0], collapsed_exons[-1][1]), 'feature_size': exons_size, 'bases_reconstructed': bases_reconstructed, 'reconstruction': "%.3f" % (fraction_reconstructed), 'contigs': contigs, 'num_contigs': len(matches_gene), 'align_blocks': self.blocks_as_string(collapsed_blocks), 'exons': self.blocks_as_string(collapsed_exons), 'num_reads': str(nreads), 'bases_reads': str(nbases), #'depth': "%.3f" % (float(nbases) / float(exon_len)), }) transcripts = gene2transcript[gene].keys() for transcript in sorted(transcripts): matches_txt = matches_by_transcript[transcript] txt = matches_txt[0].txt transcript_len = txt.length #best_match best_match = None for match in matches_txt: if best_match is None or match.coverage > best_match.coverage: best_match = match #reconstruction collapsed_blocks = self.collapse_align_blocks([match.align for match in matches_txt]) exons_size = intspan.cardinality_multi(txt.exons) bases_reconstructed, fraction_reconstructed = self.calc_reconstruction(txt.exons, collapsed_blocks, size=exons_size) contigs = ','.join([match.align.query for match in matches_txt]) if contig_coverage: nreads, nbases = self.calc_coverage([match.align.query for match in matches_txt], contig_coverage) else: nreads, nbases = '-', '-' results.append({'feature': 'transcript', 'model': txt.model, 'transcript': txt.name, 'gene': txt.alias, 'strand': txt.strand, 'coord': '%s:%s-%s' % (txt.chrom, int(txt.txStart) + 1, txt.txEnd), 'feature_size': txt.length, 'bases_reconstructed': bases_reconstructed, 'reconstruction': "%.3f" % (fraction_reconstructed), 'contigs': contigs, 'num_contigs': len(matches_txt), 'best_contig': best_match.align.query, 'best_contig_reconstruction': "%.3f" % (best_match.coverage), 'align_blocks': self.blocks_as_string(collapsed_blocks), 'exons': self.blocks_as_string(txt.exons), 'num_reads': str(nreads), 'bases_reads': str(nbases), #'depth': "%.3f" % (float(nbases) / float(txt.length)), }) return results