Ejemplo n.º 1
0
 def save(self, file_path):
     """Save model to a file
     """
     from bpnet.utils import write_pkl, SerializableLock
     # fix the serialization of _OPERATIVE_CONFIG_LOCK
     gin.config._OPERATIVE_CONFIG_LOCK = SerializableLock()
     write_pkl(self, file_path)
Ejemplo n.º 2
0
def modisco_table(modisco_dir,
                  contrib_scores,
                  output_dir,
                  report_url=None,
                  contribsf=None,
                  footprint_width=200):
    """Write the pattern table to as .html and .csv
    """
    plt.switch_backend('agg')
    from bpnet.modisco.table import ModiscoData, modisco_table, write_modisco_table
    from bpnet.modisco.motif_clustering import hirearchically_reorder_table
    add_file_logging(output_dir, logger, 'modisco-table')
    print("Loading required data")
    data = ModiscoData.load(modisco_dir,
                            contrib_scores,
                            contribsf=contribsf,
                            footprint_width=footprint_width)

    print("Generating the table")
    df = modisco_table(data)

    print("Writing the results")
    write_modisco_table(df, output_dir, report_url, 'pattern_table')

    print("Writing clustered table")
    write_modisco_table(hirearchically_reorder_table(df, data.tasks),
                        output_dir, report_url, 'pattern_table.sorted')

    print("Writing footprints")
    profiles = OrderedDict([(pattern, {
        task: data.get_profile_wide(pattern, task).mean(axis=0)
        for task in data.tasks
    }) for pattern in data.mf.pattern_names()])
    write_pkl(profiles, Path(output_dir) / 'footprints.pkl')
    print("Done!")
Ejemplo n.º 3
0
def modisco_export_patterns(modisco_dir, output_file, contribsf=None):
    """Export patterns to a pkl file. Don't cluster them

    Adds `stacked_seqlet_contrib` and `n_seqlets` to pattern `attrs`

    Args:
      modisco_dir: modisco directory containing
      output_file: output file path for patterns.pkl
    """
    from bpnet.cli.contrib import ContribFile

    logger.info("Loading patterns")
    modisco_dir = Path(modisco_dir)

    mf = ModiscoFile(modisco_dir / 'modisco.h5')
    patterns = [mf.get_pattern(pname) for pname in mf.pattern_names()]

    if contribsf is None:
        contrib_file = ContribFile.from_modisco_dir(modisco_dir)
        logger.info("Loading ContribFile into memory")
        contrib_file.cache()
    else:
        logger.info("Using the provided ContribFile")
        contrib_file = contribsf

    logger.info("Extracting profile and contribution scores")
    extended_patterns = []
    for p in tqdm(patterns):
        p = p.copy()

        # get seqlets
        valid_seqlets = mf._get_seqlets(p.name)

        # extract the contribution scores
        sti = contrib_file.extract(valid_seqlets, profile_width=None)
        sti.dfi = mf.get_seqlet_intervals(p.name, as_df=True)
        p.attrs['stacked_seqlet_contrib'] = sti
        p.attrs['n_seqlets'] = mf.n_seqlets(p.name)
        extended_patterns.append(p)

    write_pkl(extended_patterns, output_file)