def save(self, file_path): """Save model to a file """ from bpnet.utils import write_pkl, SerializableLock # fix the serialization of _OPERATIVE_CONFIG_LOCK gin.config._OPERATIVE_CONFIG_LOCK = SerializableLock() write_pkl(self, file_path)
def modisco_table(modisco_dir, contrib_scores, output_dir, report_url=None, contribsf=None, footprint_width=200): """Write the pattern table to as .html and .csv """ plt.switch_backend('agg') from bpnet.modisco.table import ModiscoData, modisco_table, write_modisco_table from bpnet.modisco.motif_clustering import hirearchically_reorder_table add_file_logging(output_dir, logger, 'modisco-table') print("Loading required data") data = ModiscoData.load(modisco_dir, contrib_scores, contribsf=contribsf, footprint_width=footprint_width) print("Generating the table") df = modisco_table(data) print("Writing the results") write_modisco_table(df, output_dir, report_url, 'pattern_table') print("Writing clustered table") write_modisco_table(hirearchically_reorder_table(df, data.tasks), output_dir, report_url, 'pattern_table.sorted') print("Writing footprints") profiles = OrderedDict([(pattern, { task: data.get_profile_wide(pattern, task).mean(axis=0) for task in data.tasks }) for pattern in data.mf.pattern_names()]) write_pkl(profiles, Path(output_dir) / 'footprints.pkl') print("Done!")
def modisco_export_patterns(modisco_dir, output_file, contribsf=None): """Export patterns to a pkl file. Don't cluster them Adds `stacked_seqlet_contrib` and `n_seqlets` to pattern `attrs` Args: modisco_dir: modisco directory containing output_file: output file path for patterns.pkl """ from bpnet.cli.contrib import ContribFile logger.info("Loading patterns") modisco_dir = Path(modisco_dir) mf = ModiscoFile(modisco_dir / 'modisco.h5') patterns = [mf.get_pattern(pname) for pname in mf.pattern_names()] if contribsf is None: contrib_file = ContribFile.from_modisco_dir(modisco_dir) logger.info("Loading ContribFile into memory") contrib_file.cache() else: logger.info("Using the provided ContribFile") contrib_file = contribsf logger.info("Extracting profile and contribution scores") extended_patterns = [] for p in tqdm(patterns): p = p.copy() # get seqlets valid_seqlets = mf._get_seqlets(p.name) # extract the contribution scores sti = contrib_file.extract(valid_seqlets, profile_width=None) sti.dfi = mf.get_seqlet_intervals(p.name, as_df=True) p.attrs['stacked_seqlet_contrib'] = sti p.attrs['n_seqlets'] = mf.n_seqlets(p.name) extended_patterns.append(p) write_pkl(extended_patterns, output_file)