def calc_reconstruction(self, exons, blocks, size=None): """Calcuates reconstruction given list of alignment blocks and exons""" bases_reconstructed = intspan.intersect(exons, blocks) if size is None: size = intspan.cardinality_multi(exons) fraction_reconstructed = float(bases_reconstructed)/float(size) return bases_reconstructed, fraction_reconstructed
def combine_results(self, records, add_reads=False): """Combines match results to calculate coverage""" pool_headers = ['num_reads', 'depth', 'contigs', 'num_contigs', 'best_contig', 'best_contig_reconstruction'] grouped_records = self.group_results(records) pooled = {} for model in grouped_records.keys(): pooled[model] = [] genes = grouped_records[model]['genes'].keys() for gene in genes: records = grouped_records[model]['genes'][gene] pooled_gene = {'contigs_list': [], 'best_contig': None, 'best_contig_reconstruction': None, 'num_reads': None, 'depth': None, 'align_blocks': None, 'exons': None } for field in self.report_fields: if field not in pool_headers: pooled_gene[field] = records[0][field] pooled_transcripts = [] exons_gene = self.string_as_blocks(records[0]['exons']) blocks_gene = [] transcripts = grouped_records[model]['transcripts'][gene].keys() for transcript in transcripts: records = grouped_records[model]['transcripts'][gene][transcript] pooled_transcript = {'contigs_list': [], 'best_contig': None, 'best_contig_reconstruction': None, 'num_reads': None, 'depth': None, 'align_blocks': None, 'exons': None } for field in self.report_fields: if field not in pool_headers: pooled_transcript[field] = records[0][field] exons_transcript = self.string_as_blocks(records[0]['exons']) blocks_transcript = [] for record in records: # contigs contigs = record['contigs'].split(',') pooled_transcript['contigs_list'].extend(contigs) pooled_gene['contigs_list'].extend(contigs) # best contig if pooled_transcript['best_contig'] is None or float(record['best_contig_reconstruction']) > float(pooled_transcript['best_contig_reconstruction']): pooled_transcript['best_contig'] = record['best_contig'] pooled_transcript['best_contig_reconstruction'] = record['best_contig_reconstruction'] # num_reads if pooled_transcript['num_reads'] is None: pooled_transcript['num_reads'] = record['num_reads'] pooled_transcript['depth'] = record['depth'] elif record['num_reads'] != '-' and record['num_reads'] != 'na': pooled_transcript['num_reads'] = int(pooled_transcript['num_reads']) if add_reads: pooled_transcript['num_reads'] += int(record['num_reads']) elif pooled_transcript['num_reads'] < int(record['num_reads']): pooled_transcript['num_reads'] = int(record['num_reads']) # reconstruction blocks = self.string_as_blocks(record['align_blocks']) blocks_transcript.append(blocks) blocks_gene.append(blocks) # reconstruction transcript union_blocks = intspan.union(blocks_transcript) bases_reconstructed = intspan.intersect(exons_transcript, union_blocks) fraction_reconstructed = float(bases_reconstructed)/float(pooled_transcript['feature_size']) # update transcript pooled_transcript['contigs'] = ','.join(pooled_transcript['contigs_list']) pooled_transcript['num_contigs'] = len(pooled_transcript['contigs_list']) if pooled_transcript['num_reads'] != '-' and pooled_transcript['num_reads'] != 'na': pooled_transcript['depth'] = "%.3f" % (float(pooled_transcript['num_reads']) / float(pooled_transcript['feature_size'])) pooled_transcripts.append(pooled_transcript) pooled_transcript['bases_reconstructed'] = bases_reconstructed pooled_transcript['reconstruction'] = "%.3f" % (fraction_reconstructed) pooled_transcript['align_blocks'] = self.blocks_as_string(union_blocks) # update gene for pt in pooled_transcripts: if pooled_gene['best_contig'] is None or float(pt['best_contig_reconstruction']) > float(pooled_gene['best_contig_reconstruction']): pooled_gene['best_contig'] = pt['best_contig'] pooled_gene['best_contig_reconstruction'] = pt['best_contig_reconstruction'] if pooled_gene['num_reads'] is None: pooled_gene['num_reads'] = pt['num_reads'] pooled_gene['depth'] = pt['depth'] elif pt['num_reads'] != '-' and pt['num_reads'] != 'na': pooled_gene['num_reads'] = int(pt['num_reads']) if add_reads: pooled_gene['num_reads'] += int(pt['num_reads']) elif pooled_gene['num_reads'] < int(pt['num_reads']): pooled_gene['num_reads'] = int(pt['num_reads']) # coverage gene union_blocks = intspan.union(blocks_gene) bases_reconstructed = intspan.intersect(exons_gene, union_blocks) fraction_reconstructed = float(bases_reconstructed)/float(pooled_gene['feature_size']) pooled_gene['contigs'] = ','.join(pooled_gene['contigs_list']) pooled_gene['num_contigs'] = len(pooled_gene['contigs_list']) if pooled_gene['num_reads'] != '-' and pooled_gene['num_reads'] != 'na': pooled_gene['depth'] = "%.3f" % (float(pooled_gene['num_reads']) / float(pooled_gene['feature_size'])) pooled_gene['bases_reconstructed'] = bases_reconstructed pooled_gene['reconstruction'] = "%.3f" % (fraction_reconstructed) pooled_gene['align_blocks'] = self.blocks_as_string(union_blocks) pooled[model].append(pooled_gene) for pt in pooled_transcripts: pooled[model].append(pt) self.results = [] for model in self.models: if not pooled.has_key(model): continue self.results.extend(pooled[model])