def compare(settings, logs_outdir, delay=5, dry_run=False): """ Run a MISO samples comparison between all pairs of samples. """ settings_filename = utils.pathify(settings) misowrap_obj = mw.MISOWrap(settings_filename, logs_outdir, logger_label="compare") bam_files = misowrap_obj.bam_files sample_labels = misowrap_obj.sample_labels read_len = misowrap_obj.read_len overhang_len = misowrap_obj.overhang_len miso_bin_dir = misowrap_obj.miso_bin_dir miso_output_dir = misowrap_obj.miso_outdir comparison_groups = misowrap_obj.comparison_groups comparisons_dir = misowrap_obj.comparisons_dir utils.make_dir(comparisons_dir) misowrap_obj.logger.info("Running MISO comparisons...") ## ## Compute comparisons between all pairs ## in a sample group ## for comp_group in comparison_groups: sample_pairs = utils.get_pairwise_comparisons(comp_group) print " - Total of %d comparisons" % (len(sample_pairs)) for sample1, sample2 in sample_pairs: # For each pair of samples, compare their output # along each event type misowrap_obj.logger.info("Comparing %s %s" % (sample1, sample2)) # Directories for each sample sample1_dir = os.path.join(miso_output_dir, sample1) sample2_dir = os.path.join(miso_output_dir, sample2) for event_type in misowrap_obj.event_types: sample1_event_dir = os.path.join(sample1_dir, event_type) sample2_event_dir = os.path.join(sample2_dir, event_type) job_name = "compare_%s_%s_%s" % (sample1, sample2, event_type) event_comparisons_dir = os.path.join(comparisons_dir, event_type) compare_cmd = "%s --compare-samples %s %s %s " "--comparison-labels %s %s" % ( misowrap_obj.compare_miso_cmd, sample1_event_dir, sample2_event_dir, event_comparisons_dir, sample1, sample2, ) misowrap_obj.logger.info("Executing: %s" % (compare_cmd)) if misowrap_obj.use_cluster: if not dry_run: misowrap_obj.my_cluster.launch_job(compare_cmd, job_name, ppn=1) time.sleep(delay) else: if not dry_run: os.system(compare_cmd)
def compare(settings, logs_outdir, delay=5, dry_run=False): """ Run a MISO samples comparison between all pairs of samples. """ settings_filename = utils.pathify(settings) misowrap_obj = mw.MISOWrap(settings_filename, logs_outdir, logger_label="compare") bam_files = misowrap_obj.bam_files sample_labels = misowrap_obj.sample_labels read_len = misowrap_obj.read_len overhang_len = misowrap_obj.overhang_len miso_bin_dir = misowrap_obj.miso_bin_dir miso_output_dir = misowrap_obj.miso_outdir comparison_groups = misowrap_obj.comparison_groups comparisons_dir = misowrap_obj.comparisons_dir utils.make_dir(comparisons_dir) misowrap_obj.logger.info("Running MISO comparisons...") ## ## Compute comparisons between all pairs ## in a sample group ## for comp_group in comparison_groups: sample_pairs = utils.get_pairwise_comparisons(comp_group) print " - Total of %d comparisons" %(len(sample_pairs)) for sample1, sample2 in sample_pairs: # For each pair of samples, compare their output # along each event type misowrap_obj.logger.info("Comparing %s %s" %(sample1, sample2)) # Directories for each sample sample1_dir = os.path.join(miso_output_dir, sample1) sample2_dir = os.path.join(miso_output_dir, sample2) for event_type in misowrap_obj.event_types: sample1_event_dir = os.path.join(sample1_dir, event_type) sample2_event_dir = os.path.join(sample2_dir, event_type) job_name = "compare_%s_%s_%s" %(sample1, sample2, event_type) event_comparisons_dir = \ os.path.join(comparisons_dir, event_type) compare_cmd = "%s --compare-samples %s %s %s " \ "--comparison-labels %s %s" \ %(misowrap_obj.compare_miso_cmd, sample1_event_dir, sample2_event_dir, event_comparisons_dir, sample1, sample2) misowrap_obj.logger.info("Executing: %s" %(compare_cmd)) if misowrap_obj.use_cluster: if not dry_run: misowrap_obj.my_cluster.launch_job(compare_cmd, job_name, ppn=1) time.sleep(delay) else: if not dry_run: os.system(compare_cmd)
def combine_comparisons( settings, logs_outdir, common_cols=["isoforms", "chrom", "strand", "mRNA_starts", "mRNA_ends", "gene_id", "gene_symbol"], delay=5, dry_run=False, NA_VAL="NA", ): """ Output combined MISO comparisons. For each event type, combine the MISO comparisons for the relevant groups based on the 'comparison_groups' in the misowrap settings file. """ settings_filename = utils.pathify(settings) logs_outdir = utils.pathify(logs_outdir) utils.make_dir(logs_outdir) misowrap_obj = mw.MISOWrap(settings_filename, logs_outdir, logger_label="combine_comparisons") comparisons_dir = misowrap_obj.comparisons_dir if not os.path.isdir(comparisons_dir): misowrap_obj.logger.critical("Comparisons directory %s not found. " % (comparisons_dir)) sys.exit(1) # Comparison types to combine: unfiltered comparisons and filtered comparisons # (if available) unfiltered_comp_dir = os.path.join(comparisons_dir, "combined_comparisons") filtered_comp_dir = os.path.join(comparisons_dir, "filtered_events") dirs_to_process = [unfiltered_comp_dir, filtered_comp_dir] comparison_groups = misowrap_obj.comparison_groups for curr_comp_dir in dirs_to_process: if not os.path.isdir(curr_comp_dir): print "Comparisons directory %s not found, skipping" % (curr_comp_dir) continue # For each event type, output the sample comparisons for event_type in misowrap_obj.event_types: # Collection of MISO comparison dataframes (to be merged later) # for the current event type comparison_dfs = [] comparison_labels = [] event_dir = os.path.join(curr_comp_dir, event_type) if not os.path.isdir(event_dir): misowrap_obj.logger.info("Cannot find event type %s dir, " "skipping..." % (event_type)) continue # Look only at sample comparisons within each sample group for comp_group in comparison_groups: sample_pairs = utils.get_pairwise_comparisons(comp_group) misowrap_obj.logger.info(" - Total of %d comparisons" % (len(sample_pairs))) for sample1, sample2 in sample_pairs: # Load miso_bf file for the current comparison # and join it to the combined df comparison_name = "%s_vs_%s" % (sample1, sample2) bf_data = miso_utils.load_miso_bf_file(event_dir, comparison_name, substitute_labels=True) if bf_data is None: misowrap_obj.logger.warning("Could not find comparison %s" % (comparison_name)) continue comparison_dfs.append(bf_data) comparison_labels.append(comparison_name) # Merge the comparison dfs together print "Merging comparisons for %s" % (event_type) combined_df = pandas_utils.combine_dfs(comparison_dfs) output_filename = os.path.join(output_dir, "%s.miso_bf" % (event_type)) misowrap_obj.logger.info("Outputting %s results to: %s" % (event_type, output_filename)) if not dry_run: combined_df.to_csv(output_filename, float_format="%.4f", sep="\t", na_rep=NA_VAL, index=True)
def combine_comparisons(settings, logs_outdir, common_cols=["isoforms", "chrom", "strand", "mRNA_starts", "mRNA_ends", "gene_id", "gene_symbol"], delay=5, dry_run=False, NA_VAL="NA"): """ Output combined MISO comparisons. For each event type, combine the MISO comparisons for the relevant groups based on the 'comparison_groups' in the misowrap settings file. """ settings_filename = utils.pathify(settings) logs_outdir = utils.pathify(logs_outdir) utils.make_dir(logs_outdir) misowrap_obj = mw.MISOWrap(settings_filename, logs_outdir, logger_label="combine_comparisons") comparisons_dir = misowrap_obj.comparisons_dir if not os.path.isdir(comparisons_dir): misowrap_obj.logger.critical("Comparisons directory %s not found. " \ %(comparisons_dir)) sys.exit(1) # Comparison types to combine: unfiltered comparisons and filtered comparisons # (if available) unfiltered_comp_dir = os.path.join(comparisons_dir, "combined_comparisons") filtered_comp_dir = os.path.join(comparisons_dir, "filtered_events") dirs_to_process = [unfiltered_comp_dir, filtered_comp_dir] comparison_groups = misowrap_obj.comparison_groups for curr_comp_dir in dirs_to_process: if not os.path.isdir(curr_comp_dir): print "Comparisons directory %s not found, skipping" %(curr_comp_dir) continue # For each event type, output the sample comparisons for event_type in misowrap_obj.event_types: # Collection of MISO comparison dataframes (to be merged later) # for the current event type comparison_dfs = [] comparison_labels = [] event_dir = os.path.join(curr_comp_dir, event_type) if not os.path.isdir(event_dir): misowrap_obj.logger.info("Cannot find event type %s dir, " \ "skipping..." %(event_type)) continue # Look only at sample comparisons within each sample group for comp_group in comparison_groups: sample_pairs = utils.get_pairwise_comparisons(comp_group) misowrap_obj.logger.info(" - Total of %d comparisons" \ %(len(sample_pairs))) for sample1, sample2 in sample_pairs: # Load miso_bf file for the current comparison # and join it to the combined df comparison_name = "%s_vs_%s" %(sample1, sample2) bf_data = miso_utils.load_miso_bf_file(event_dir, comparison_name, substitute_labels=True) if bf_data is None: misowrap_obj.logger.warning("Could not find comparison %s" \ %(comparison_name)) continue comparison_dfs.append(bf_data) comparison_labels.append(comparison_name) # Merge the comparison dfs together print "Merging comparisons for %s" %(event_type) combined_df = pandas_utils.combine_dfs(comparison_dfs) output_filename = os.path.join(output_dir, "%s.miso_bf" %(event_type)) misowrap_obj.logger.info("Outputting %s results to: %s" \ %(event_type, output_filename)) if not dry_run: combined_df.to_csv(output_filename, float_format="%.4f", sep="\t", na_rep=NA_VAL, index=True)