def init_param_truth_file(truth_file, out_dir, hg_file):

    # Calculate tumor purity and ploidy
    # overall_ploidy = get_clone_ploidy(sorted_compl_cnv_file, hg_file, ["chrX", "chrY", "chrM"])

    # Complement truth file with diploid regions
    new_truth_file = os.path.join(out_dir,
                                  "cntot_" + os.path.basename(truth_file))
    with open(truth_file, "r") as tf:
        tf_lines = tf.readlines()
        if len(tf_lines[1].strip().split("\t")) == 5:
            new_tf_lines = []
            for tf_line in tf_lines:
                (chr_id, start, end, cnA, cnB) = tf_line.strip().split("\t")
                new_tf_lines.append("\t".join(
                    [chr_id, start, end,
                     str(int(cnA) + int(cnB))]))
        with open(new_truth_file, "w") as new_tf:
            new_tf.writelines("\n".join(new_tf_lines))
        truth_file = new_truth_file
    cnv_file = add_BED_complement(truth_file,
                                  hg_file,
                                  sort=True,
                                  out_dir=out_dir,
                                  hap_split=False)
    #cnv_file = re.sub(".bed$", "_cnv.bed", out_file)
    subprocess.Popen("rm %s" % re.sub("_sorted.bed", "_compl.bed", cnv_file),
                     shell=True)
    if os.path.exists(new_truth_file):
        subprocess.Popen("rm %s" % new_truth_file, shell=True)

    return cnv_file
Exemple #2
0
def init_param_truth_file(truth_file, out_dir, hg_file):

    # Calculate tumor purity and ploidy
    # overall_ploidy = get_clone_ploidy(sorted_compl_cnv_file, hg_file, ["chrX", "chrY", "chrM"])

    # Complement truth file with diploid regions
    new_truth_file = os.path.join(out_dir, "cntot_" + os.path.basename(truth_file))
    with open(truth_file, "r") as tf:
        tf_lines = tf.readlines()
        if len(tf_lines[1].strip().split("\t")) == 5:
            new_tf_lines = []
            for tf_line in tf_lines:
                (chr_id, start, end, cnA, cnB) = tf_line.strip().split("\t")
                new_tf_lines.append("\t".join([chr_id, start, end, str(int(cnA)+int(cnB))]))
        with open(new_truth_file, "w") as new_tf:
            new_tf.writelines("\n".join(new_tf_lines))
        truth_file = new_truth_file
    cnv_file = add_BED_complement(truth_file, hg_file, sort=True, out_dir=out_dir, hap_split=False)
    #cnv_file = re.sub(".bed$", "_cnv.bed", out_file)
    subprocess.Popen("rm %s" % re.sub("_sorted.bed", "_compl.bed", cnv_file), shell=True)
    if os.path.exists(new_truth_file):
        subprocess.Popen("rm %s" % new_truth_file, shell=True)

    return cnv_file
def main():
    """Evaluate Canvas results on simulated data"""

    parser = OptionParser()
    parser.add_option("-s", "--canvas_sim_id", dest="canvas_sim_id", type="string", help="simulation id (find Canvas results directories strating with sim_id")
    parser.add_option("-d", "--canvas_sim_dir", dest="canvas_sim_dir", type="string", help="Canvas results directory")
    (options, args) = parser.parse_args()

    if options.canvas_sim_dir is None and options.canvas_sim_id is None:
        parser.error("Specify Canvas results simulation id or directory")
    if options.canvas_sim_dir is not None and options.canvas_sim_id is not None:
        parser.error("Canvas results simulation id and directory are mutually exclusive options")
    if options.canvas_sim_dir is not None:
        res_dirs = [options.canvas_sim_dir]
    if options.canvas_sim_id is not None:
        canvas_dir = "/illumina/scratch/tmp/users/ccolombo/Canvas/"
        res_dirs = [canvas_dir + cdir for cdir in os.listdir(canvas_dir) if os.path.isdir((canvas_dir + cdir)) and cdir.startswith(options.canvas_sim_id)]
        if len(res_dirs) == 0:
            print "No Canvas results directories found"
    hg_file = "/home/ccolombo/filtered_human.hg19.genome"

    for res_dir in res_dirs:

        print res_dir

        sim_dir = re.sub("/Canvas/", "/simulation/", res_dir)
        res_id = re.sub("sim", "ev", res_dir.split("/")[-1])

        if not os.path.exists(res_dir):
            print("\nCanvas results directory %s does not exist\n\n" % res_dir)
            continue
        if not os.path.exists(res_dir):
            print("\nSimulation directory %s does not exist\n\n" % sim_dir)
            continue
        if os.path.basename(res_dir).startswith("simNorm") or res_dir.endswith("_purity100"):
            continue

        out_dir = os.path.join("/illumina/scratch/tmp/users/ccolombo/evaluation/EvaluateCNV", res_id)
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        truth_file = create_het_cn_file(sim_dir, os.path.join(out_dir, "het_truth_file.bed"), no_dipl=True, hap_split=True, round=True)
        #truth_file = "/illumina/scratch/tmp/users/ccolombo/simulation/HCC2218flt.bed"
        truth_file = add_BED_complement(truth_file, hg_file, sort=False, out_dir=out_dir, hap_split=True)
        perc_file = create_var_perc_file(sim_dir, os.path.join(out_dir, "var_perc.bed"), no_dipl=True)

        # Write simulation parameters to file
        out_file = os.path.join(out_dir, res_id + "_par.txt")
        if not os.path.exists(out_file):
            get_sim_params(sim_dir, truth_file, perc_file, out_file, hg_file)

        # Run EvaluateCNV
        out_file = os.path.join(out_dir, res_id + ".txt")
        excl_file = "/illumina/scratch/tmp/users/ccolombo/evaluation/sim_filter.bed"
        if not os.path.exists(out_file):
        #if True:
            evaluate_CNV(res_dir, truth_file, excl_file, out_file)

        # Run EvaluateCNV only on heterogeneous variants
        out_file = os.path.join(out_dir, res_id + "_onlyhet.txt")
        #if True:
        if not os.path.exists(out_file):
            with open(excl_file, "r") as ef:
                excl_vars = ef.readlines()
            with open(perc_file, "r") as pf:
                for line in pf:
                    (chr_id, start, end, cnA, cnB, perc) = line.strip().split("\t")
                    if float(perc) >= 0.8 and chr_id not in ["chrX", "chrY", "chrM"]:
                        excl_vars.append("\t".join([chr_id, start, end]) + "\n")
            os.system("rm %s" % (perc_file))
            excl_file = os.path.join(out_dir, "filter_onlyhet.bed")
            with open(excl_file, "w") as wf:
                wf.writelines(excl_vars)

            evaluate_CNV(res_dir, truth_file, excl_file, out_file)
def main():
    """Evaluate Canvas results on simulated data"""

    parser = OptionParser()
    parser.add_option(
        "-s",
        "--canvas_sim_id",
        dest="canvas_sim_id",
        type="string",
        help=
        "simulation id (find Canvas results directories strating with sim_id")
    parser.add_option("-d",
                      "--canvas_sim_dir",
                      dest="canvas_sim_dir",
                      type="string",
                      help="Canvas results directory")
    (options, args) = parser.parse_args()

    if options.canvas_sim_dir is None and options.canvas_sim_id is None:
        parser.error("Specify Canvas results simulation id or directory")
    if options.canvas_sim_dir is not None and options.canvas_sim_id is not None:
        parser.error(
            "Canvas results simulation id and directory are mutually exclusive options"
        )
    if options.canvas_sim_dir is not None:
        res_dirs = [options.canvas_sim_dir]
    if options.canvas_sim_id is not None:
        canvas_dir = "/illumina/scratch/tmp/users/ccolombo/Canvas/"
        res_dirs = [
            canvas_dir + cdir for cdir in os.listdir(canvas_dir)
            if os.path.isdir((canvas_dir +
                              cdir)) and cdir.startswith(options.canvas_sim_id)
        ]
        if len(res_dirs) == 0:
            print "No Canvas results directories found"
    hg_file = "/home/ccolombo/filtered_human.hg19.genome"

    for res_dir in res_dirs:

        print res_dir

        sim_dir = re.sub("/Canvas/", "/simulation/", res_dir)
        res_id = re.sub("sim", "ev", res_dir.split("/")[-1])

        if not os.path.exists(res_dir):
            print("\nCanvas results directory %s does not exist\n\n" % res_dir)
            continue
        if not os.path.exists(res_dir):
            print("\nSimulation directory %s does not exist\n\n" % sim_dir)
            continue
        if os.path.basename(res_dir).startswith("simNorm") or res_dir.endswith(
                "_purity100"):
            continue

        out_dir = os.path.join(
            "/illumina/scratch/tmp/users/ccolombo/evaluation/EvaluateCNV",
            res_id)
        if not os.path.exists(out_dir):
            os.mkdir(out_dir)
        truth_file = create_het_cn_file(sim_dir,
                                        os.path.join(out_dir,
                                                     "het_truth_file.bed"),
                                        no_dipl=True,
                                        hap_split=True,
                                        round=True)
        #truth_file = "/illumina/scratch/tmp/users/ccolombo/simulation/HCC2218flt.bed"
        truth_file = add_BED_complement(truth_file,
                                        hg_file,
                                        sort=False,
                                        out_dir=out_dir,
                                        hap_split=True)
        perc_file = create_var_perc_file(sim_dir,
                                         os.path.join(out_dir, "var_perc.bed"),
                                         no_dipl=True)

        # Write simulation parameters to file
        out_file = os.path.join(out_dir, res_id + "_par.txt")
        if not os.path.exists(out_file):
            get_sim_params(sim_dir, truth_file, perc_file, out_file, hg_file)

        # Run EvaluateCNV
        out_file = os.path.join(out_dir, res_id + ".txt")
        excl_file = "/illumina/scratch/tmp/users/ccolombo/evaluation/sim_filter.bed"
        if not os.path.exists(out_file):
            #if True:
            evaluate_CNV(res_dir, truth_file, excl_file, out_file)

        # Run EvaluateCNV only on heterogeneous variants
        out_file = os.path.join(out_dir, res_id + "_onlyhet.txt")
        #if True:
        if not os.path.exists(out_file):
            with open(excl_file, "r") as ef:
                excl_vars = ef.readlines()
            with open(perc_file, "r") as pf:
                for line in pf:
                    (chr_id, start, end, cnA, cnB,
                     perc) = line.strip().split("\t")
                    if float(perc) >= 0.8 and chr_id not in [
                            "chrX", "chrY", "chrM"
                    ]:
                        excl_vars.append("\t".join([chr_id, start, end]) +
                                         "\n")
            os.system("rm %s" % (perc_file))
            excl_file = os.path.join(out_dir, "filter_onlyhet.bed")
            with open(excl_file, "w") as wf:
                wf.writelines(excl_vars)

            evaluate_CNV(res_dir, truth_file, excl_file, out_file)