def run(params): with open(params.multisample_profile_path, "r") as profile_file: multisample_profile = json.load(profile_file) count_table = generate_table_with_anchor_counts(multisample_profile["Counts"]) logging.info("Loaded %i regions", len(count_table)) logging.info("Normalizing counts") sample_stats = multisample_profile["Parameters"] common.depth_normalize_counts(sample_stats, count_table) logging.info("Filtering counts") count_table = common.filter_counts_by_magnitude( count_table, params.min_inrepeat_reads ) if params.target_region_path: target_regions = load_target_regions(params.target_region_path) logging.info("Restricting analysis to %i regions", len(target_regions)) count_table = common.filter_counts_by_region(count_table, target_regions) logging.info("%i regions left after filtering", len(count_table)) manifest = common.load_manifest(params.manifest_path) sample_status = common.extract_case_control_assignments(manifest) logging.info("Comparing counts") common.compare_counts(params.test_params, sample_status, count_table) logging.info("Correcting p-values") common.correct_pvalues(count_table) output_results(count_table, params.output_path) logging.info("Done")
def run(params): with open(params.multisample_profile_path, "r") as profile_file: multisample_profile = json.load(profile_file) count_table = generate_table_with_anchor_counts( multisample_profile["Counts"]) logging.info("Loaded %i regions", len(count_table)) logging.info("Normalizing counts") sample_stats = multisample_profile["Parameters"] common.depth_normalize_counts(sample_stats, count_table) if params.target_region_path: target_regions = load_target_regions(params.target_region_path) logging.info("Restricting analysis to %i regions", len(target_regions)) count_table = common.filter_counts_by_region(count_table, target_regions) manifest = common.load_manifest(params.manifest_path) sample_status = common.extract_case_control_assignments(manifest) header = "contig\tstart\tend\tmotif\ttop_case_zscore\thigh_case_counts\tcounts" with open(params.output_path, "w") as results_file: print(header, file=results_file) for row in count_table: region_encoding = row["region"] if region_encoding == "unaligned": continue contig, coords = region_encoding.rsplit(":", 1) start, end = coords.split("-") start, end = int(start), int(end) top_case_zscore, cases_with_high_counts = common.run_zscore_analysis( sample_status, row["sample_counts"]) if len(cases_with_high_counts) == 0: continue encoded_case_info = ",".join( "{}:{:.2f}".format(s, c) for s, c in cases_with_high_counts.items()) count_encoding = ",".join( ["{:.2f}".format(c) for _, c in row["sample_counts"].items()]) print( contig, start, end, row["unit"], "{:.2f}".format(top_case_zscore), encoded_case_info, count_encoding, sep="\t", file=results_file, ) logging.info("Done")
def main(): common.init_logger() parameters = load_parameters() samples = common.load_manifest(parameters['manifest_path']) sample_depths = {} combined_regions = process_samples(parameters, samples, sample_depths) normalize_counts(sample_depths, combined_regions) output_json = create_json(combined_regions) write_json(parameters, output_json)
def main(): common.init_logger() parameters = load_parameters() combined_counts = common.load_combined_json(parameters['counts_path']) samples = common.load_manifest(parameters['manifest_path']) count_table = generate_count_table(combined_counts) count_table = common.filter_counts(parameters['min_count'], count_table) sample_status = common.extract_case_control_assignments(samples) common.compare_counts(sample_status, count_table) common.correct_pvalues(count_table) output_results(count_table, parameters['output_path']) logging.info('Done')
def main(): common.init_logger() parameters = load_parameters() combined_counts = common.load_combined_json(parameters['counts_path']) samples = common.load_manifest(parameters['manifest_path']) target_regions = None if parameters['target_regions']: target_regions = load_target_regions(parameters['target_regions']) count_table = generate_count_table(combined_counts) count_table = common.filter_counts(count_table, parameters['min_count'], target_regions) logging.info('%i regions left after initial filtering', len(count_table)) sample_status = common.extract_case_control_assignments(samples) common.compare_counts(parameters['test_params'], sample_status, count_table) common.correct_pvalues(count_table) output_results(count_table, parameters['output_path']) logging.info('Done')
def run(params): with open(params.multisample_profile_path, "r") as profile_file: multisample_profile = json.load(profile_file) count_table = common.generate_table_with_irr_pair_counts( multisample_profile["Counts"]) logging.info("Loaded %i regions", len(count_table)) logging.info("Normalizing counts") sample_stats = multisample_profile["Parameters"] common.depth_normalize_counts(sample_stats, count_table) manifest = common.load_manifest(params.manifest_path) sample_status = common.extract_case_control_assignments(manifest) header = "motif\ttop_case_zscore\thigh_case_counts\tcounts" with open(params.output_path, "w") as results_file: print(header, file=results_file) for row in count_table: top_case_zscore, cases_with_high_counts = common.run_zscore_analysis( sample_status, row["sample_counts"]) if len(cases_with_high_counts) == 0: continue encoded_case_info = ",".join( "{}:{:.2f}".format(s, c) for s, c in cases_with_high_counts.items()) count_encoding = ",".join( ["{:.2f}".format(c) for _, c in row["sample_counts"].items()]) motif = row["unit"] print( motif, "{:.2f}".format(top_case_zscore), encoded_case_info, count_encoding, sep="\t", file=results_file, )