Ejemplo n.º 1
0
def run(params):
    with open(params.multisample_profile_path, "r") as profile_file:
        multisample_profile = json.load(profile_file)
    count_table = generate_table_with_anchor_counts(multisample_profile["Counts"])

    logging.info("Loaded %i regions", len(count_table))

    logging.info("Normalizing counts")
    sample_stats = multisample_profile["Parameters"]
    common.depth_normalize_counts(sample_stats, count_table)

    logging.info("Filtering counts")
    count_table = common.filter_counts_by_magnitude(
        count_table, params.min_inrepeat_reads
    )

    if params.target_region_path:
        target_regions = load_target_regions(params.target_region_path)
        logging.info("Restricting analysis to %i regions", len(target_regions))
        count_table = common.filter_counts_by_region(count_table, target_regions)

    logging.info("%i regions left after filtering", len(count_table))
    manifest = common.load_manifest(params.manifest_path)
    sample_status = common.extract_case_control_assignments(manifest)

    logging.info("Comparing counts")
    common.compare_counts(params.test_params, sample_status, count_table)
    logging.info("Correcting p-values")
    common.correct_pvalues(count_table)
    output_results(count_table, params.output_path)
    logging.info("Done")
def run(params):
    with open(params.multisample_profile_path, "r") as profile_file:
        multisample_profile = json.load(profile_file)
    count_table = generate_table_with_anchor_counts(
        multisample_profile["Counts"])
    logging.info("Loaded %i regions", len(count_table))

    logging.info("Normalizing counts")
    sample_stats = multisample_profile["Parameters"]
    common.depth_normalize_counts(sample_stats, count_table)

    if params.target_region_path:
        target_regions = load_target_regions(params.target_region_path)
        logging.info("Restricting analysis to %i regions", len(target_regions))
        count_table = common.filter_counts_by_region(count_table,
                                                     target_regions)

    manifest = common.load_manifest(params.manifest_path)
    sample_status = common.extract_case_control_assignments(manifest)

    header = "contig\tstart\tend\tmotif\ttop_case_zscore\thigh_case_counts\tcounts"
    with open(params.output_path, "w") as results_file:
        print(header, file=results_file)
        for row in count_table:
            region_encoding = row["region"]
            if region_encoding == "unaligned":
                continue

            contig, coords = region_encoding.rsplit(":", 1)
            start, end = coords.split("-")
            start, end = int(start), int(end)

            top_case_zscore, cases_with_high_counts = common.run_zscore_analysis(
                sample_status, row["sample_counts"])

            if len(cases_with_high_counts) == 0:
                continue

            encoded_case_info = ",".join(
                "{}:{:.2f}".format(s, c)
                for s, c in cases_with_high_counts.items())
            count_encoding = ",".join(
                ["{:.2f}".format(c) for _, c in row["sample_counts"].items()])

            print(
                contig,
                start,
                end,
                row["unit"],
                "{:.2f}".format(top_case_zscore),
                encoded_case_info,
                count_encoding,
                sep="\t",
                file=results_file,
            )

    logging.info("Done")
Ejemplo n.º 3
0
def main():
    common.init_logger()
    parameters = load_parameters()
    samples = common.load_manifest(parameters['manifest_path'])
    sample_depths = {}
    combined_regions = process_samples(parameters, samples, sample_depths)
    normalize_counts(sample_depths, combined_regions)
    output_json = create_json(combined_regions)
    write_json(parameters, output_json)
def main():
    common.init_logger()
    parameters = load_parameters()
    combined_counts = common.load_combined_json(parameters['counts_path'])
    samples = common.load_manifest(parameters['manifest_path'])
    count_table = generate_count_table(combined_counts)
    count_table = common.filter_counts(parameters['min_count'], count_table)
    sample_status = common.extract_case_control_assignments(samples)
    common.compare_counts(sample_status, count_table)
    common.correct_pvalues(count_table)
    output_results(count_table, parameters['output_path'])
    logging.info('Done')
def main():
    common.init_logger()
    parameters = load_parameters()
    combined_counts = common.load_combined_json(parameters['counts_path'])
    samples = common.load_manifest(parameters['manifest_path'])
    target_regions = None
    if parameters['target_regions']:
        target_regions = load_target_regions(parameters['target_regions'])
    count_table = generate_count_table(combined_counts)
    count_table = common.filter_counts(count_table, parameters['min_count'],
                                       target_regions)
    logging.info('%i regions left after initial filtering', len(count_table))
    sample_status = common.extract_case_control_assignments(samples)
    common.compare_counts(parameters['test_params'], sample_status,
                          count_table)
    common.correct_pvalues(count_table)
    output_results(count_table, parameters['output_path'])
    logging.info('Done')
Ejemplo n.º 6
0
def run(params):
    with open(params.multisample_profile_path, "r") as profile_file:
        multisample_profile = json.load(profile_file)
    count_table = common.generate_table_with_irr_pair_counts(
        multisample_profile["Counts"])

    logging.info("Loaded %i regions", len(count_table))

    logging.info("Normalizing counts")
    sample_stats = multisample_profile["Parameters"]
    common.depth_normalize_counts(sample_stats, count_table)

    manifest = common.load_manifest(params.manifest_path)
    sample_status = common.extract_case_control_assignments(manifest)

    header = "motif\ttop_case_zscore\thigh_case_counts\tcounts"
    with open(params.output_path, "w") as results_file:
        print(header, file=results_file)
        for row in count_table:
            top_case_zscore, cases_with_high_counts = common.run_zscore_analysis(
                sample_status, row["sample_counts"])

            if len(cases_with_high_counts) == 0:
                continue

            encoded_case_info = ",".join(
                "{}:{:.2f}".format(s, c)
                for s, c in cases_with_high_counts.items())
            count_encoding = ",".join(
                ["{:.2f}".format(c) for _, c in row["sample_counts"].items()])

            motif = row["unit"]
            print(
                motif,
                "{:.2f}".format(top_case_zscore),
                encoded_case_info,
                count_encoding,
                sep="\t",
                file=results_file,
            )