Beispiel #1
0
def modisco_table(modisco_dir,
                  contrib_scores,
                  output_dir,
                  report_url=None,
                  contribsf=None,
                  footprint_width=200):
    """Write the pattern table to as .html and .csv
    """
    plt.switch_backend('agg')
    from bpnet.modisco.table import ModiscoData, modisco_table, write_modisco_table
    from bpnet.modisco.motif_clustering import hirearchically_reorder_table
    add_file_logging(output_dir, logger, 'modisco-table')
    print("Loading required data")
    data = ModiscoData.load(modisco_dir,
                            contrib_scores,
                            contribsf=contribsf,
                            footprint_width=footprint_width)

    print("Generating the table")
    df = modisco_table(data)

    print("Writing the results")
    write_modisco_table(df, output_dir, report_url, 'pattern_table')

    print("Writing clustered table")
    write_modisco_table(hirearchically_reorder_table(df, data.tasks),
                        output_dir, report_url, 'pattern_table.sorted')

    print("Writing footprints")
    profiles = OrderedDict([(pattern, {
        task: data.get_profile_wide(pattern, task).mean(axis=0)
        for task in data.tasks
    }) for pattern in data.mf.pattern_names()])
    write_pkl(profiles, Path(output_dir) / 'footprints.pkl')
    print("Done!")
Beispiel #2
0
def chip_nexus_analysis(modisco_dir,
                        trim_frac=0.08,
                        num_workers=20,
                        run_cwm_scan=False,
                        force=False,
                        footprint_width=200):
    """Compute all the results for modisco specific for ChIP-nexus/exo data. Runs:
    - modisco_plot
    - modisco_report
    - modisco_table
    - modisco_export_patterns
    - cwm_scan
    - modisco_export_seqlets

    Note:
      All the sub-commands are only executed if they have not been ran before. Use --force override this.
      Whether the commands have been run before is deterimined by checking if the following file exists:
        `{modisco_dir}/.modisco_report_all/{command}.done`.
    """
    plt.switch_backend('agg')
    from bpnet.utils import ConditionalRun

    modisco_dir = Path(modisco_dir)
    # figure out the contribution scores used
    kwargs = read_json(modisco_dir / "modisco-run.kwargs.json")
    contrib_scores = kwargs["contrib_file"]

    mf = ModiscoFile(f"{modisco_dir}/modisco.h5")
    all_patterns = mf.pattern_names()
    mf.close()
    if len(all_patterns) == 0:
        print("No patterns found.")
        # Touch modisco-chip.html for snakemake
        open(modisco_dir / 'modisco-chip.html', 'a').close()
        open(modisco_dir / 'seqlets/scored_regions.bed', 'a').close()
        return

    # class determining whether to run the command or not (poor-man's snakemake)
    cr = ConditionalRun("modisco_report_all", None, modisco_dir, force=force)

    sync = []
    # --------------------------------------------
    if (not cr.set_cmd('modisco_plot').done()
            or not cr.set_cmd('modisco_enrich_patterns').done()):
        # load ContribFile and pass it to all the functions
        logger.info("Loading ContribFile")
        contribsf = ContribFile.from_modisco_dir(modisco_dir)
        contribsf.cache()
    else:
        contribsf = None
    # --------------------------------------------
    # Basic reports
    if not cr.set_cmd('modisco_plot').done():
        modisco_plot(modisco_dir,
                     modisco_dir / 'plots',
                     heatmap_width=footprint_width,
                     figsize=(10, 10),
                     contribsf=contribsf)
        cr.write()
    sync.append("plots")

    if not cr.set_cmd('modisco_report').done():
        modisco_report(str(modisco_dir), str(modisco_dir))
        cr.write()
    sync.append("modisco-chip.html")

    if not cr.set_cmd('modisco_table').done():
        modisco_table(modisco_dir,
                      contrib_scores,
                      modisco_dir,
                      report_url=None,
                      contribsf=contribsf,
                      footprint_width=footprint_width)
        cr.write()
    sync.append("footprints.pkl")
    sync.append("pattern_table.*")

    if not cr.set_cmd('modisco_export_patterns').done():
        modisco_export_patterns(modisco_dir,
                                output_file=modisco_dir / 'patterns.pkl',
                                contribsf=contribsf)
        cr.write()
    sync.append("patterns.pkl")

    # --------------------------------------------
    # Finding new instances
    if run_cwm_scan:
        if not cr.set_cmd('cwm_scan').done():
            cwm_scan(modisco_dir,
                     modisco_dir / 'instances.bed.gz',
                     trim_frac=trim_frac,
                     contrib_file=None,
                     num_workers=num_workers)
            cr.write()

    # --------------------------------------------
    # Export bed-files and bigwigs

    # Seqlets
    if not cr.set_cmd('modisco_export_seqlets').done():
        modisco_export_seqlets(str(modisco_dir),
                               str(modisco_dir / 'seqlets'),
                               trim_frac=trim_frac)
        cr.write()
    sync.append("seqlets")

    # print the rsync command to run in order to sync the output
    # directories to the webserver
    logger.info("Run the following command to sync files to the webserver")
    dirs = " ".join(sync)
    print(f"rsync -av --progress {dirs} <output_dir>/")