Esempio n. 1
0
def compare(settings, logs_outdir, delay=5, dry_run=False):
    """
    Run a MISO samples comparison between all pairs of samples.
    """
    settings_filename = utils.pathify(settings)
    misowrap_obj = mw.MISOWrap(settings_filename, logs_outdir, logger_label="compare")
    bam_files = misowrap_obj.bam_files
    sample_labels = misowrap_obj.sample_labels
    read_len = misowrap_obj.read_len
    overhang_len = misowrap_obj.overhang_len
    miso_bin_dir = misowrap_obj.miso_bin_dir
    miso_output_dir = misowrap_obj.miso_outdir
    comparison_groups = misowrap_obj.comparison_groups
    comparisons_dir = misowrap_obj.comparisons_dir
    utils.make_dir(comparisons_dir)
    misowrap_obj.logger.info("Running MISO comparisons...")
    ##
    ## Compute comparisons between all pairs
    ## in a sample group
    ##
    for comp_group in comparison_groups:
        sample_pairs = utils.get_pairwise_comparisons(comp_group)
        print "  - Total of %d comparisons" % (len(sample_pairs))
        for sample1, sample2 in sample_pairs:
            # For each pair of samples, compare their output
            # along each event type
            misowrap_obj.logger.info("Comparing %s %s" % (sample1, sample2))
            # Directories for each sample
            sample1_dir = os.path.join(miso_output_dir, sample1)
            sample2_dir = os.path.join(miso_output_dir, sample2)
            for event_type in misowrap_obj.event_types:
                sample1_event_dir = os.path.join(sample1_dir, event_type)
                sample2_event_dir = os.path.join(sample2_dir, event_type)
                job_name = "compare_%s_%s_%s" % (sample1, sample2, event_type)
                event_comparisons_dir = os.path.join(comparisons_dir, event_type)
                compare_cmd = "%s --compare-samples %s %s %s " "--comparison-labels %s %s" % (
                    misowrap_obj.compare_miso_cmd,
                    sample1_event_dir,
                    sample2_event_dir,
                    event_comparisons_dir,
                    sample1,
                    sample2,
                )
                misowrap_obj.logger.info("Executing: %s" % (compare_cmd))
                if misowrap_obj.use_cluster:
                    if not dry_run:
                        misowrap_obj.my_cluster.launch_job(compare_cmd, job_name, ppn=1)
                        time.sleep(delay)
                else:
                    if not dry_run:
                        os.system(compare_cmd)
Esempio n. 2
0
def compare(settings,
            logs_outdir,
            delay=5,
            dry_run=False):
    """
    Run a MISO samples comparison between all pairs of samples.
    """
    settings_filename = utils.pathify(settings)
    misowrap_obj = mw.MISOWrap(settings_filename,
                               logs_outdir,
                               logger_label="compare")
    bam_files = misowrap_obj.bam_files
    sample_labels = misowrap_obj.sample_labels
    read_len = misowrap_obj.read_len
    overhang_len = misowrap_obj.overhang_len
    miso_bin_dir = misowrap_obj.miso_bin_dir
    miso_output_dir = misowrap_obj.miso_outdir
    comparison_groups = misowrap_obj.comparison_groups
    comparisons_dir = misowrap_obj.comparisons_dir
    utils.make_dir(comparisons_dir)
    misowrap_obj.logger.info("Running MISO comparisons...")
    ##
    ## Compute comparisons between all pairs
    ## in a sample group
    ##
    for comp_group in comparison_groups:
        sample_pairs = utils.get_pairwise_comparisons(comp_group)
        print "  - Total of %d comparisons" %(len(sample_pairs))
        for sample1, sample2 in sample_pairs:
            # For each pair of samples, compare their output
            # along each event type
            misowrap_obj.logger.info("Comparing %s %s" %(sample1,
                                                         sample2))
            # Directories for each sample
            sample1_dir = os.path.join(miso_output_dir,
                                       sample1)
            sample2_dir = os.path.join(miso_output_dir,
                                       sample2)
            for event_type in misowrap_obj.event_types:
                sample1_event_dir = os.path.join(sample1_dir,
                                                 event_type)
                sample2_event_dir = os.path.join(sample2_dir,
                                                 event_type)
                job_name = "compare_%s_%s_%s" %(sample1,
                                                sample2,
                                                event_type)
                event_comparisons_dir = \
                    os.path.join(comparisons_dir,
                                 event_type)
                compare_cmd = "%s --compare-samples %s %s %s " \
                    "--comparison-labels %s %s" \
                    %(misowrap_obj.compare_miso_cmd,
                      sample1_event_dir,
                      sample2_event_dir,
                      event_comparisons_dir,
                      sample1,
                      sample2)
                misowrap_obj.logger.info("Executing: %s" %(compare_cmd))
                if misowrap_obj.use_cluster:
                    if not dry_run:
                        misowrap_obj.my_cluster.launch_job(compare_cmd,
                                                           job_name,
                                                           ppn=1)
                        time.sleep(delay)
                else:
                    if not dry_run:
                        os.system(compare_cmd)
Esempio n. 3
0
def combine_comparisons(
    settings,
    logs_outdir,
    common_cols=["isoforms", "chrom", "strand", "mRNA_starts", "mRNA_ends", "gene_id", "gene_symbol"],
    delay=5,
    dry_run=False,
    NA_VAL="NA",
):
    """
    Output combined MISO comparisons. For each event type,
    combine the MISO comparisons for the relevant groups
    based on the 'comparison_groups' in the misowrap
    settings file.
    """
    settings_filename = utils.pathify(settings)
    logs_outdir = utils.pathify(logs_outdir)
    utils.make_dir(logs_outdir)
    misowrap_obj = mw.MISOWrap(settings_filename, logs_outdir, logger_label="combine_comparisons")
    comparisons_dir = misowrap_obj.comparisons_dir
    if not os.path.isdir(comparisons_dir):
        misowrap_obj.logger.critical("Comparisons directory %s not found. " % (comparisons_dir))
        sys.exit(1)
    # Comparison types to combine: unfiltered comparisons and filtered comparisons
    # (if available)
    unfiltered_comp_dir = os.path.join(comparisons_dir, "combined_comparisons")
    filtered_comp_dir = os.path.join(comparisons_dir, "filtered_events")
    dirs_to_process = [unfiltered_comp_dir, filtered_comp_dir]
    comparison_groups = misowrap_obj.comparison_groups
    for curr_comp_dir in dirs_to_process:
        if not os.path.isdir(curr_comp_dir):
            print "Comparisons directory %s not found, skipping" % (curr_comp_dir)
            continue
        # For each event type, output the sample comparisons
        for event_type in misowrap_obj.event_types:
            # Collection of MISO comparison dataframes (to be merged later)
            # for the current event type
            comparison_dfs = []
            comparison_labels = []
            event_dir = os.path.join(curr_comp_dir, event_type)
            if not os.path.isdir(event_dir):
                misowrap_obj.logger.info("Cannot find event type %s dir, " "skipping..." % (event_type))
                continue
            # Look only at sample comparisons within each sample group
            for comp_group in comparison_groups:
                sample_pairs = utils.get_pairwise_comparisons(comp_group)
                misowrap_obj.logger.info("  - Total of %d comparisons" % (len(sample_pairs)))
                for sample1, sample2 in sample_pairs:
                    # Load miso_bf file for the current comparison
                    # and join it to the combined df
                    comparison_name = "%s_vs_%s" % (sample1, sample2)
                    bf_data = miso_utils.load_miso_bf_file(event_dir, comparison_name, substitute_labels=True)
                    if bf_data is None:
                        misowrap_obj.logger.warning("Could not find comparison %s" % (comparison_name))
                        continue
                    comparison_dfs.append(bf_data)
                    comparison_labels.append(comparison_name)
            # Merge the comparison dfs together
            print "Merging comparisons for %s" % (event_type)
            combined_df = pandas_utils.combine_dfs(comparison_dfs)
            output_filename = os.path.join(output_dir, "%s.miso_bf" % (event_type))
            misowrap_obj.logger.info("Outputting %s results to: %s" % (event_type, output_filename))
            if not dry_run:
                combined_df.to_csv(output_filename, float_format="%.4f", sep="\t", na_rep=NA_VAL, index=True)
Esempio n. 4
0
def combine_comparisons(settings,
                        logs_outdir,
                        common_cols=["isoforms",
                                     "chrom",
                                     "strand",
                                     "mRNA_starts",
                                     "mRNA_ends",
                                     "gene_id",
                                     "gene_symbol"],
                        delay=5,
                        dry_run=False,
                        NA_VAL="NA"):
    """
    Output combined MISO comparisons. For each event type,
    combine the MISO comparisons for the relevant groups
    based on the 'comparison_groups' in the misowrap
    settings file.
    """
    settings_filename = utils.pathify(settings)
    logs_outdir = utils.pathify(logs_outdir)
    utils.make_dir(logs_outdir)
    misowrap_obj = mw.MISOWrap(settings_filename,
                               logs_outdir,
                               logger_label="combine_comparisons")
    comparisons_dir = misowrap_obj.comparisons_dir    
    if not os.path.isdir(comparisons_dir):
        misowrap_obj.logger.critical("Comparisons directory %s not found. " \
                                     %(comparisons_dir))
        sys.exit(1)
    # Comparison types to combine: unfiltered comparisons and filtered comparisons
    # (if available)
    unfiltered_comp_dir = os.path.join(comparisons_dir,
                                       "combined_comparisons")
    filtered_comp_dir = os.path.join(comparisons_dir,
                                     "filtered_events")
    dirs_to_process = [unfiltered_comp_dir, filtered_comp_dir]
    comparison_groups = misowrap_obj.comparison_groups
    for curr_comp_dir in dirs_to_process:
        if not os.path.isdir(curr_comp_dir):
            print "Comparisons directory %s not found, skipping" %(curr_comp_dir)
            continue
        # For each event type, output the sample comparisons
        for event_type in misowrap_obj.event_types:
            # Collection of MISO comparison dataframes (to be merged later)
            # for the current event type
            comparison_dfs = []
            comparison_labels = []
            event_dir = os.path.join(curr_comp_dir, event_type)
            if not os.path.isdir(event_dir):
                misowrap_obj.logger.info("Cannot find event type %s dir, " \
                                         "skipping..." %(event_type))
                continue
            # Look only at sample comparisons within each sample group
            for comp_group in comparison_groups:
                sample_pairs = utils.get_pairwise_comparisons(comp_group)
                misowrap_obj.logger.info("  - Total of %d comparisons" \
                                         %(len(sample_pairs)))
                for sample1, sample2 in sample_pairs:
                    # Load miso_bf file for the current comparison
                    # and join it to the combined df
                    comparison_name = "%s_vs_%s" %(sample1, sample2)
                    bf_data = miso_utils.load_miso_bf_file(event_dir,
                                                           comparison_name,
                                                           substitute_labels=True)
                    if bf_data is None:
                        misowrap_obj.logger.warning("Could not find comparison %s" \
                                                    %(comparison_name))
                        continue
                    comparison_dfs.append(bf_data)
                    comparison_labels.append(comparison_name)
            # Merge the comparison dfs together
            print "Merging comparisons for %s" %(event_type)
            combined_df = pandas_utils.combine_dfs(comparison_dfs)
            output_filename = os.path.join(output_dir,
                                           "%s.miso_bf" %(event_type))
            misowrap_obj.logger.info("Outputting %s results to: %s" \
                                     %(event_type, output_filename))
            if not dry_run:
                combined_df.to_csv(output_filename,
                                   float_format="%.4f",
                                   sep="\t",
                                   na_rep=NA_VAL,
                                   index=True)