def script__summarize_depth(depth_file, summarize_contig_depth_yaml,
                            summarize_binned_depth_yaml):
    depth_file = str(depth_file)
    summarize_contig_depth_yaml = str(summarize_contig_depth_yaml)
    summarize_binned_depth_yaml = str(summarize_binned_depth_yaml)

    config = datahandling.load_config()
    """
    Break this into 2?
    """
    depth_dict = {}
    with open(depth_file, "r") as input_file:
        for line in input_file:
            if line[0] != "#":
                contig = line.split("\t")[0]
                depth = int(line.split("\t")[2].strip())
                if contig not in depth_dict:
                    depth_dict[contig] = {}
                if depth in depth_dict[contig]:
                    depth_dict[contig][depth] += 1
                else:
                    depth_dict[contig][depth] = 1

    contig_depth_summary_dict = {}
    for contig in depth_dict:
        total_depth = 0
        total_length = 0
        for depth in depth_dict[contig]:
            length = depth_dict[contig][depth]
            total_depth = total_depth + (length * depth)
            total_length = total_length + length
        contig_depth_summary_dict[contig] = {
            "coverage": total_depth / total_length,
            "total_depth": total_depth,
            "total_length": total_length
        }

    # Removing as it's also looped in vcf so no need to check twice
    # dict is made now to cycle over on range and on contigs
    # binned_depth_summary_dict = {}
    binned_depth = [0] * 100
    # depth_limits = config["serum"]["summarize"]["depth_range"]
    # depth_range = list(range(depth_limits[0], depth_limits[1]))
    # for bound in depth_range:
    #     binned_depth_summary_dict[bound] = 0

    for contig in depth_dict:
        for depth in depth_dict[contig]:
            for i in range(1, 100):
                if depth >= i:
                    binned_depth[i - 1] += depth_dict[contig][depth]

    datahandling.save_yaml({"contig_depth": contig_depth_summary_dict},
                           summarize_contig_depth_yaml)
    datahandling.save_yaml({"binned_depth": binned_depth},
                           summarize_binned_depth_yaml)

    return 0
Beispiel #2
0
import pkg_resources
from ruamel.yaml import YAML
import os
import re
from bifrostlib import datahandling
import sys

config = datahandling.load_config()

global GLOBAL_BIN_VALUES
GLOBAL_BIN_VALUES = [1, 10, 25]


def extract_contigs_sum_cov(file_path, key, data_dict):
    yaml = datahandling.load_yaml(file_path)
    data_dict["results"][key] = yaml
    for bin_value in GLOBAL_BIN_VALUES:
        total_length = 0
        total_depth = 0
        total_contigs = 0
        for contig in yaml["contig_depth"]:
            if yaml["contig_depth"][contig]["coverage"] >= float(bin_value):
                total_length += yaml["contig_depth"][contig]["total_length"]
                total_depth += yaml["contig_depth"][contig]["total_depth"]
                total_contigs += 1
        data_dict["summary"]["bin_contigs_at_{}x".format(
            bin_value)] = total_contigs
        data_dict["summary"]["bin_length_at_{}x".format(
            bin_value)] = total_length
        data_dict["summary"]["bin_coverage_at_{}x".format(bin_value)] = float(
            total_depth / total_length)