def plot_vcf_comp(tsv_path, options):
    """ take the big vcf compare table and make precision_recall plots for all the categories"""
    out_dir = os.path.join(options.comp_dir, "comp_plots")
    robust_makedirs(out_dir)
    out_name = os.path.basename(os.path.splitext(tsv_path)[0])
    out_base_path = os.path.join(out_dir, out_name)
    region = out_name.split("-")[-1].upper()
    out_base_path_f1 = os.path.join(out_dir, "-".join(out_name.split("-")[:-1]) + "--f1-" + region)

    params = " ".join(PLOT_PARAMS)

    # precision recall scatter plot
    header = vcf_dist_header(options)
    # strip qual
    header = header[:-1]
    for i in range(len(header) / 2):
        prec_idx = 2 * i
        rec_idx = prec_idx + 1
        qual_idx = len(header)
        print prec_idx, header[prec_idx], rec_idx, header[rec_idx]
        ptoks = header[prec_idx].split("-")
        rtoks = header[rec_idx].split("-")
        assert ptoks[1] == "Precision"
        assert rtoks[1] == "Recall"
        assert ptoks[:1] == rtoks[:1]
        comp_cat  = ptoks[0]
        if comp_cat not in ["TOT", "SNP", "INDEL"]:
            continue
        label = header[prec_idx].replace("Precision", "acc")
        acc_tsv = out_base_path + "_" + label + ".tsv"
        print "Make {} tsv with cols {} {}".format(label, rec_idx, prec_idx)
        # +1 to convert to awk 1-base coordinates. +1 again since header doesnt include row_label col
        awkcmd = '''if (NR!=1) print $1 "\t" ${} "\t" ${} "\t" ${}'''.format(rec_idx + 2, prec_idx + 2, qual_idx + 2)
        awkstr = "awk \'{" + awkcmd + "}\'"
        run("{} {} > {}".format(awkstr, tsv_path, acc_tsv))
        acc_png = out_base_path + "_" + label + ".png"
        title = "VCF"
        if comp_cat == "TOT":
            title += " Total Accuracy"
        else:
            title += " {} Accuracy".format(comp_cat)
        title += " for {}".format(region)
        cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x -0.01 --max_x 1.01 --min_y -0.01 --max_y 1.01".format(acc_tsv, acc_png, title, params)
        print cmd
        os.system(cmd)

        #flatten to max f1 tsv and plot as bars
        f1_tsv = out_base_path_f1 + "_" + label + ".tsv"
        f1_png = out_base_path_f1 + "_" + label + ".png"
        f1_pr_tsv = out_base_path_f1.replace("-f1-", "-f1--pr-") + "_" + label + ".tsv"
        f1_pr_png = out_base_path_f1.replace("-f1-", "-f1--pr-") + "_" + label + ".png"
        f1_qual_tsv = out_base_path_f1.replace("-f1-", "-f1-qual-") + "_" + label + ".tsv"
        f1_qual_png = out_base_path_f1.replace("-f1-", "-f1-qual-") + "_" + label + ".png"

        make_max_f1_tsv(acc_tsv, f1_tsv, f1_pr_tsv, f1_qual_tsv, options)
        cmd = "scripts/barchart.py {} --save {} --title \"{}\" --x_sideways --x_label \"Graph\" --y_label \"Max F1\" {}".format(f1_tsv, f1_png, title, params)
        print cmd
        os.system(cmd)
        cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x -0.01 --max_x 1.01 --min_y -0.01 --max_y 1.01".format(f1_pr_tsv, f1_pr_png, title, params)
        print cmd
        os.system(cmd)
        cmd = "scripts/barchart.py {} --save {} --title \"{}\" --x_sideways --x_label \"Graph\" --y_label \"Quality for Max F1\" {} --max 20".format(f1_qual_tsv, f1_qual_png, title, params)
        print cmd
        os.system(cmd)
        
        if options.top is True:
            # top 20
            cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.798 --max_x 1.002 --min_y 0.798 --max_y 1.002".format(acc_tsv, acc_png.replace(".png", "_top20.png"), title, params)
            print cmd
            os.system(cmd)
            # top 20
            cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 11 --height 5.5 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.796 --max_x 1.004 --min_y 0.796 --max_y 1.004".format(acc_tsv, acc_png.replace(".png", "_top20_inset.png"), title, params)
            print cmd
            os.system(cmd)        
            # top 40
            cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.596 --max_x 1.004 --min_y 0.596 --max_y 1.004".format(acc_tsv, acc_png.replace(".png", "_top40.png"), title, params)
            print cmd
            os.system(cmd)
def plot_vcf_comp(tsv_path, options):
    """ take the big vcf compare table and make precision_recall plots for all the categories"""
    out_dir = os.path.join(options.comp_dir, "comp_plots")
    robust_makedirs(out_dir)
    out_name = os.path.basename(os.path.splitext(tsv_path)[0])
    sample = out_name.split("-")[-1].upper()
    region = out_name.split("-")[-2].upper()
    def out_base_path(tag, label, extension):
        bd = tag if extension != ".tsv" else "tsv"
        ret = os.path.join(out_dir, bd, "-".join(out_name.split("-")[:-1]) + "-{}-{}-".format(sample, tag) + region) + "_" + label + extension
        robust_makedirs(os.path.dirname(ret))
        return ret

    params = " ".join(PLOT_PARAMS)

    # precision recall scatter plot
    header = vcf_dist_header(options)
    # strip qual
    header = header[:-1]
    for i in range(len(header) / 2):
        prec_idx = 2 * i
        rec_idx = prec_idx + 1
        qual_idx = len(header)
        print prec_idx, header[prec_idx], rec_idx, header[rec_idx]
        ptoks = header[prec_idx].split("-")
        rtoks = header[rec_idx].split("-")
        assert ptoks[1] == "Precision"
        assert rtoks[1] == "Recall"
        assert ptoks[:1] == rtoks[:1]
        comp_cat  = ptoks[0]
        if comp_cat not in ["TOT", "SNP", "INDEL"]:
            continue
        label = header[prec_idx].replace("Precision", "acc")
        acc_tsv = out_base_path("pr", label, ".tsv")
        print "Make {} tsv with cols {} {}".format(label, rec_idx, prec_idx)
        # +1 to convert to awk 1-base coordinates. +1 again since header doesnt include row_label col
        awkcmd = '''if (NR!=1) print $1 "\t" ${} "\t" ${} "\t" ${}'''.format(rec_idx + 2, prec_idx + 2, qual_idx + 2)
        awkstr = "awk \'{" + awkcmd + "}\'"
        run("{} {} > {}".format(awkstr, tsv_path, acc_tsv))
        acc_png = out_base_path("pr", label, ".png")
        title = sample.upper() + " "
        if comp_cat == "TOT":
            title += " Total Accuracy"
        else:
            title += " {} Accuracy".format(comp_cat.title())
        if region == "TOTAL":
            title += ", all regions"
        else:
            title += ", {}".format(region)
        cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x -0.01 --max_x 1.01 --min_y -0.01 --max_y 1.01".format(acc_tsv, acc_png, title, params)
        print cmd
        os.system(cmd)

        #flatten to max f1 tsv and plot as bars
        f1_tsv = out_base_path("f1bar", label, ".tsv")
        f1_png = out_base_path("f1bar", label, ".png")
        f1_pr_tsv = out_base_path("f1pr", label, ".tsv")
        f1_pr_png = out_base_path("f1pr", label, ".png")
        f1_qual_tsv = out_base_path("f1qual", label, ".tsv")
        f1_qual_png = out_base_path("f1qual", label, ".png")

        make_max_f1_tsv(acc_tsv, f1_tsv, f1_pr_tsv, f1_qual_tsv, options)
        cmd = "scripts/barchart.py {} --ascending --no_n --save {} --title \"{}\" --x_sideways --x_label \"Graph\" --y_label \"Max F1\" {}".format(f1_tsv, f1_png, title, params)
        print cmd
        os.system(cmd)
        cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5".format(f1_pr_tsv, f1_pr_png, title, params)
        print cmd
        os.system(cmd)
        cmd = "scripts/barchart.py {} --ascending --no_n --save {} --title \"{}\" --x_sideways --x_label \"Graph\" --y_label \"Quality for Max F1\" {}".format(f1_qual_tsv, f1_qual_png, title, params)
        print cmd
        os.system(cmd)
        
        if options.top is True:
            # top 20
            cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.798 --max_x 1.002 --min_y 0.798 --max_y 1.002".format(acc_tsv, acc_png.replace(".png", "_top20.png"), title, params)
            print cmd
            os.system(cmd)
            # top 20
            cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 11 --height 5.5 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.796 --max_x 1.004 --min_y 0.796 --max_y 1.004".format(acc_tsv, acc_png.replace(".png", "_top20_inset.png"), title, params)
            print cmd
            os.system(cmd)        
            # top 40
            cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.596 --max_x 1.004 --min_y 0.596 --max_y 1.004".format(acc_tsv, acc_png.replace(".png", "_top40.png"), title, params)
            print cmd
            os.system(cmd)
            # top .5 bar
            cmd = "scripts/barchart.py {} --ascending --no_n --save {} --title \"{}\" --x_sideways --x_label \"Graph\" --y_label \"Max F1\" {} --min 0.5".format(f1_tsv, f1_png.replace(".png", "_top50.png"), title, params)
            print cmd
            os.system(cmd)
            # top .6 bar
            cmd = "scripts/barchart.py {} --ascending --no_n --save {} --title \"{}\" --x_sideways --x_label \"Graph\" --y_label \"Max F1\" {} --min 0.6".format(f1_tsv, f1_png.replace(".png", "_top60.png"), title, params)
            print cmd
            os.system(cmd)
            # top .7 bar
            cmd = "scripts/barchart.py {} --ascending --no_n --save {} --title \"{}\" --x_sideways --x_label \"Graph\" --y_label \"Max F1\" {} --min 0.7".format(f1_tsv, f1_png.replace(".png", "_top70.png"), title, params)
            print cmd
            os.system(cmd)            
            # top .85 bar
            cmd = "scripts/barchart.py {} --ascending --no_n --save {} --title \"{}\" --x_sideways --x_label \"Graph\" --y_label \"Max F1\" {} --min 0.85".format(f1_tsv, f1_png.replace(".png", "_top85.png"), title, params)
            print cmd
            os.system(cmd)

            # top .25 f1pr scatter
            cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.746 --max_x 1.004 --min_y 0.746 --max_y 1.004".format(f1_pr_tsv, f1_pr_png.replace(".png", "_top25.png"), title, params)
            print cmd
            os.system(cmd)

            # top .50 f1pr scatter
            cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.496 --max_x 1.004 --min_y 0.496 --max_y 1.004".format(f1_pr_tsv, f1_pr_png.replace(".png", "_top50.png"), title, params)
            print cmd
            os.system(cmd)

            # top .65 f1pr scatter
            cmd = "scripts/scatter.py {} --save {} --title \"{}\" --x_label \"Recall\" --y_label \"Precision\" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.646 --max_x 1.004 --min_y 0.646 --max_y 1.004".format(f1_pr_tsv, f1_pr_png.replace(".png", "_top65.png"), title, params)
            print cmd
            os.system(cmd)
def plot_vcf_comp(tsv_path, options):
    """ take the big vcf compare table and make precision_recall plots for all the categories"""
    out_dir = os.path.join(options.comp_dir, "comp_plots")
    robust_makedirs(out_dir)
    out_name = os.path.basename(os.path.splitext(tsv_path)[0])
    sample = out_name.split("-")[-1].upper()
    region = out_name.split("-")[-2].upper()

    def out_base_path(tag, label, extension):
        bd = tag if extension != ".tsv" else "tsv"
        ret = (
            os.path.join(out_dir, bd, "-".join(out_name.split("-")[:-1]) + "-{}-{}-".format(sample, tag) + region)
            + "_"
            + label
            + extension
        )
        robust_makedirs(os.path.dirname(ret))
        return ret

    params = " ".join(PLOT_PARAMS)

    # precision recall scatter plot
    header = vcf_dist_header(options)
    # strip qual
    header = header[:-1]
    for i in range(len(header) / 2):
        prec_idx = 2 * i
        rec_idx = prec_idx + 1
        qual_idx = len(header)
        print prec_idx, header[prec_idx], rec_idx, header[rec_idx]
        ptoks = header[prec_idx].split("-")
        rtoks = header[rec_idx].split("-")
        assert ptoks[1] == "Precision"
        assert rtoks[1] == "Recall"
        assert ptoks[:1] == rtoks[:1]
        comp_cat = ptoks[0]
        if comp_cat not in ["TOT", "SNP", "INDEL"]:
            continue
        label = header[prec_idx].replace("Precision", "acc")
        acc_tsv = out_base_path("pr", label, ".tsv")
        print "Make {} tsv with cols {} {}".format(label, rec_idx, prec_idx)
        # +1 to convert to awk 1-base coordinates. +1 again since header doesnt include row_label col
        awkcmd = """if (NR!=1) print $1 "\t" ${} "\t" ${} "\t" ${}""".format(rec_idx + 2, prec_idx + 2, qual_idx + 2)
        awkstr = "awk '{" + awkcmd + "}'"
        run("{} {} > {}".format(awkstr, tsv_path, acc_tsv))
        acc_png = out_base_path("pr", label, ".png")
        title = sample.upper() + " "
        if comp_cat == "TOT":
            title += " Total Accuracy"
        else:
            title += " {} Accuracy".format(comp_cat.title())
        if region == "TOTAL":
            title += ", all regions"
        else:
            title += ", {}".format(region)
        cmd = 'scripts/scatter.py {} --save {} --title "{}" --x_label "Recall" --y_label "Precision" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x -0.01 --max_x 1.01 --min_y -0.01 --max_y 1.01'.format(
            acc_tsv, acc_png, title, params
        )
        print cmd
        os.system(cmd)

        # flatten to max f1 tsv and plot as bars
        f1_tsv = out_base_path("f1bar", label, ".tsv")
        f1_png = out_base_path("f1bar", label, ".png")
        f1_pr_tsv = out_base_path("f1pr", label, ".tsv")
        f1_pr_png = out_base_path("f1pr", label, ".png")
        f1_qual_tsv = out_base_path("f1qual", label, ".tsv")
        f1_qual_png = out_base_path("f1qual", label, ".png")

        make_max_f1_tsv(acc_tsv, f1_tsv, f1_pr_tsv, f1_qual_tsv, options)
        cmd = 'scripts/barchart.py {} --ascending --no_n --save {} --title "{}" --x_sideways --x_label "Graph" --y_label "Max F1" {}'.format(
            f1_tsv, f1_png, title, params
        )
        print cmd
        os.system(cmd)
        cmd = 'scripts/scatter.py {} --save {} --title "{}" --x_label "Recall" --y_label "Precision" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5'.format(
            f1_pr_tsv, f1_pr_png, title, params
        )
        print cmd
        os.system(cmd)
        cmd = 'scripts/barchart.py {} --ascending --no_n --save {} --title "{}" --x_sideways --x_label "Graph" --y_label "Quality for Max F1" {}'.format(
            f1_qual_tsv, f1_qual_png, title, params
        )
        print cmd
        os.system(cmd)

        if options.top is True:
            # top 20
            cmd = 'scripts/scatter.py {} --save {} --title "{}" --x_label "Recall" --y_label "Precision" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.798 --max_x 1.002 --min_y 0.798 --max_y 1.002'.format(
                acc_tsv, acc_png.replace(".png", "_top20.png"), title, params
            )
            print cmd
            os.system(cmd)
            # top 20
            cmd = 'scripts/scatter.py {} --save {} --title "{}" --x_label "Recall" --y_label "Precision" --width 11 --height 5.5 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.796 --max_x 1.004 --min_y 0.796 --max_y 1.004'.format(
                acc_tsv, acc_png.replace(".png", "_top20_inset.png"), title, params
            )
            print cmd
            os.system(cmd)
            # top 40
            cmd = 'scripts/scatter.py {} --save {} --title "{}" --x_label "Recall" --y_label "Precision" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.596 --max_x 1.004 --min_y 0.596 --max_y 1.004'.format(
                acc_tsv, acc_png.replace(".png", "_top40.png"), title, params
            )
            print cmd
            os.system(cmd)
            # top .5 bar
            cmd = 'scripts/barchart.py {} --ascending --no_n --save {} --title "{}" --x_sideways --x_label "Graph" --y_label "Max F1" {} --min 0.5'.format(
                f1_tsv, f1_png.replace(".png", "_top50.png"), title, params
            )
            print cmd
            os.system(cmd)
            # top .6 bar
            cmd = 'scripts/barchart.py {} --ascending --no_n --save {} --title "{}" --x_sideways --x_label "Graph" --y_label "Max F1" {} --min 0.6'.format(
                f1_tsv, f1_png.replace(".png", "_top60.png"), title, params
            )
            print cmd
            os.system(cmd)
            # top .7 bar
            cmd = 'scripts/barchart.py {} --ascending --no_n --save {} --title "{}" --x_sideways --x_label "Graph" --y_label "Max F1" {} --min 0.7'.format(
                f1_tsv, f1_png.replace(".png", "_top70.png"), title, params
            )
            print cmd
            os.system(cmd)
            # top .85 bar
            cmd = 'scripts/barchart.py {} --ascending --no_n --save {} --title "{}" --x_sideways --x_label "Graph" --y_label "Max F1" {} --min 0.85'.format(
                f1_tsv, f1_png.replace(".png", "_top85.png"), title, params
            )
            print cmd
            os.system(cmd)

            # top .25 f1pr scatter
            cmd = 'scripts/scatter.py {} --save {} --title "{}" --x_label "Recall" --y_label "Precision" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.746 --max_x 1.004 --min_y 0.746 --max_y 1.004'.format(
                f1_pr_tsv, f1_pr_png.replace(".png", "_top25.png"), title, params
            )
            print cmd
            os.system(cmd)

            # top .50 f1pr scatter
            cmd = 'scripts/scatter.py {} --save {} --title "{}" --x_label "Recall" --y_label "Precision" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.496 --max_x 1.004 --min_y 0.496 --max_y 1.004'.format(
                f1_pr_tsv, f1_pr_png.replace(".png", "_top50.png"), title, params
            )
            print cmd
            os.system(cmd)

            # top .65 f1pr scatter
            cmd = 'scripts/scatter.py {} --save {} --title "{}" --x_label "Recall" --y_label "Precision" --width 18 --height 9 {} --lines --no_n --line_width 1.5 --marker_size 5 --min_x 0.646 --max_x 1.004 --min_y 0.646 --max_y 1.004'.format(
                f1_pr_tsv, f1_pr_png.replace(".png", "_top65.png"), title, params
            )
            print cmd
            os.system(cmd)