def output_filtered_comparisons(self, output_dir=None, sort_column="bayes_factor", columns_to_write=[#"event_name", "gene_id", "gene_symbol", "sample1_posterior_mean", "sample1_ci_low", "sample1_ci_high", "sample2_posterior_mean", "sample2_ci_low", "sample2_ci_high", "diff", "bayes_factor", "isoforms", "sample1_counts", "sample1_assigned_counts", "sample2_counts", "sample2_assigned_counts", "chrom", "strand", "mRNA_starts", "mRNA_ends"]): """ Output filtered comparisons table. """ if output_dir == None: output_dir = self.misowrap_obj.comparisons_dir # Output each file by event type output_dir = os.path.join(output_dir, "filtered_events") print "Outputting filtered events..." print " - Output dir: %s" %(output_dir) utils.make_dir(output_dir) for event_type, filtered_df in self.filtered_events.iteritems(): curr_output_dir = os.path.join(output_dir, event_type) print "Event type: %s" %(event_type) # View by comparison comparison_labels = \ utils.unique_list(filtered_df.index.get_level_values(0)) print "Outputting %d comparisons" %(len(comparison_labels)) for label in comparison_labels: print "Comparison: %s" %(label) comparison_output_dir = os.path.join(curr_output_dir, label) utils.make_dir(comparison_output_dir) output_filename = os.path.join(comparison_output_dir, "%s.%s.filtered.miso_bf" \ %(label, event_type)) print "Outputting to: %s" %(output_filename) curr_df = filtered_df.ix[label].sort_index(by=sort_column, ascending=False) curr_df.to_csv(output_filename, sep=self.delimiter, float_format="%.4f", cols=columns_to_write)
def new_recs(): for gene_recs in list(events_db.iter_by_parent_childs()): gene_rec = gene_recs[0] event_id = gene_rec.id # Use existing IDs if present if "ensgene_id" in gene_rec.attributes: ensgene_id = gene_rec.attributes["ensg_id"][0] else: ensgene_id = "NA" if "refseq_id" in gene_rec.attributes: refseq_id = gene_rec.attributes["refseq_id"][0] else: refseq_id = "NA" if "gene_symbol" in gene_rec.attributes: gene_symbol = gene_rec.attributes["gsymbol"][0] else: gene_symbol = "NA" if event_id in event_genes_to_info: event_info = event_genes_to_info[event_id] ensgene_ids = \ utils.unique_list(event_info["ensg_id"]) if len(ensgene_ids) > 0 and ensgene_ids[0] != "NA": ensgene_id = ",".join(ensgene_ids) refseq_ids = \ utils.unique_list(event_info["refseq_id"]) if len(refseq_ids) > 0 and refseq_ids[0] != "NA": refseq_id = ",".join(refseq_ids) gene_symbols = \ utils.unique_list(event_info["gsymbol"]) if len(gene_symbols) > 0 and gene_symbols[0] != "NA": gene_symbol = ",".join(gene_symbols) gene_rec.attributes["ensg_id"] = [ensgene_id] gene_rec.attributes["refseq_id"] = [refseq_id] gene_rec.attributes["gsymbol"] = [gene_symbol] # Yield all the gene's records for g in gene_recs: yield g
def output_filtered_comparisons( self, output_dir=None, sort_column="bayes_factor", columns_to_write=[ #"event_name", "gene_id", "gene_symbol", "sample1_posterior_mean", "sample1_ci_low", "sample1_ci_high", "sample2_posterior_mean", "sample2_ci_low", "sample2_ci_high", "diff", "bayes_factor", "isoforms", "sample1_counts", "sample1_assigned_counts", "sample2_counts", "sample2_assigned_counts", "chrom", "strand", "mRNA_starts", "mRNA_ends" ]): """ Output filtered comparisons table. """ if output_dir == None: output_dir = self.misowrap_obj.comparisons_dir # Output each file by event type output_dir = os.path.join(output_dir, "filtered_events") print "Outputting filtered events..." print " - Output dir: %s" % (output_dir) utils.make_dir(output_dir) for event_type, filtered_df in self.filtered_events.iteritems(): curr_output_dir = os.path.join(output_dir, event_type) print "Event type: %s" % (event_type) # View by comparison comparison_labels = \ utils.unique_list(filtered_df.index.get_level_values(0)) print "Outputting %d comparisons" % (len(comparison_labels)) for label in comparison_labels: print "Comparison: %s" % (label) comparison_output_dir = os.path.join(curr_output_dir, label) utils.make_dir(comparison_output_dir) output_filename = os.path.join(comparison_output_dir, "%s.%s.filtered.miso_bf" \ %(label, event_type)) print "Outputting to: %s" % (output_filename) curr_df = filtered_df.ix[label].sort_index(by=sort_column, ascending=False) curr_df.to_csv(output_filename, sep=self.delimiter, float_format="%.4f", cols=columns_to_write)