Beispiel #1
0
def main():
    totalsparams = parpar.get_totals_parameters(TOOL_CHOICES)
    incorrect_parameters = parpar.parameters_are_ok(totalsparams, REQUIRED_PARAMS, PARAM_TYPES)
    if len(incorrect_parameters) == 0:
        # Determine the total number of array CNVs found and missed
        if totalsparams["tool"] == "arraycnv":
            run_arraycnv(totalsparams)

        # Gather the totals per classification label
        if totalsparams["tool"] == "classification":
            run_classification(totalsparams)

        # Gather the number of unique and duplicate FP regions
        if totalsparams["tool"] == "fpregions":
            run_fpregions(totalsparams)

        # Gather the total number of GATK4 Calls
        if totalsparams["tool"] == "numofcalls":
            gttc.get_total_calls(totalsparams["infile"])

        # Gather the total number of GATK4 calls that overlap with an array CNV
        if totalsparams["tool"] == "numofna":
            gttc.get_total_calls(totalsparams["infile"], True)

        # Gather the total number of array CNVs found and missed for dualBED
        if totalsparams["tool"] == "dualbed_arraycnv":
            run_dualbed_arraycnv(totalsparams)

        if totalsparams["tool"] == "dualbed_classification":
            run_dualbed_classification_ratios(totalsparams)
    else:
        print(f"Missing the following parameters: {incorrect_parameters}")
Beispiel #2
0
def main():
    cbd_params = parpar.get_dualbed_parameters(TOOL_CHOICES)
    incorrect_parameters = parpar.parameters_are_ok(cbd_params,
                                                    REQUIRED_PARAMS,
                                                    PARAM_TYPES)

    if len(incorrect_parameters) == 0:
        # Perform the dualBED classification
        if cbd_params["tool"] == "classification":
            print("...Implementing...")

        # Combine two dualBED result files
        if cbd_params["tool"] == "combine":
            print("...Start combining two dualBED files...")
            wrote_file = combine_dualbed_files(cbd_params["infile1"],
                                               cbd_params["infile2"],
                                               cbd_params["outfile"],
                                               cbd_params["no-unique"])
            print(f"...Wrote combined output file?: {wrote_file}...")

        # Filter dualBED file
        if cbd_params["tool"] == "filter":
            print("...Implementing...")
    else:
        print("Please set the following parameters: " +
              ", ".join(incorrect_parameters))
Beispiel #3
0
def main():
    utilparams = parpar.get_util_parameters(TOOL_CHOICES)
    incorrectparams = parpar.parameters_are_ok(utilparams, REQUIRED_PARAMS,
                                               PARAM_TYPES)

    if len(incorrectparams) == 0:
        # Display BED regions overlapping with an array CNV region
        if utilparams["tool"] == "arraybedregion":
            run_arraycnv_bedregions(utilparams)

        # Filter X and Y chromosomes from an interval list file
        if utilparams["tool"] == "filterxy":
            ufxyfi.filter_intervallist(utilparams["intervallist"],
                                       utilparams["outfile"])

        # Fix incorrect array cnv notation (current script versions do not need this!)
        if utilparams["tool"] == "fixarray":
            ufac.fix_array_cnv_notation(utilparams["infile"],
                                        utilparams["outfile"])

        # Selet data for a specified plot region
        if utilparams["tool"] == "selectplotregion":
            run_selectplotregion(utilparams)

        # Get interval log2 ratios for SNP positions
        if utilparams["tool"] == "getsnplog2ratios":
            run_getsnplog2ratios(utilparams)
    else:
        print(f"The following parameters are incorrect: {incorrectparams}")
        parpar.display_tool_usage(utilparams["tool"], TOOL_USAGE)
Beispiel #4
0
def main():
    compare_parameters = parpar.get_comparison_parameters(TOOL_CHOICES)
    incorrect_parameters = parpar.parameters_are_ok(compare_parameters,
                                                    REQUIRED_PARAMS,
                                                    PARAM_TYPES)

    if len(incorrect_parameters) == 0:
        tool1_label = compare_parameters["label1"]
        tool2_label = compare_parameters["label2"]
        print(f"...Reading {tool1_label} classification data...")
        tool1data = ufr.read_classification_file(compare_parameters["file1"])
        print(f"...Reading {tool2_label} classification data...")
        tool2data = ufr.read_classification_file(compare_parameters["file2"])

        # Perform comparison between two tools for found array CNVs
        if compare_parameters["tool"] == "arraycnvs":
            print("...Reading array CNV data...")
            arraydata = ufr.read_array_cnvs(compare_parameters["arraycnvs"])
            print(
                f"...Perform the comparison between {tool1_label} and {tool2_label}..."
            )
            comparisondata = comcom.perform_comparison(
                tool1_label, tool1data, tool2_label, tool2data, arraydata,
                compare_parameters["tp-per-acnv"])

            outfilepath = compare_parameters[
                "outdir"] + "/" + compare_parameters["output-prefix"] + ".txt"
            print(
                f"...Writing comparison data to output file {outfilepath}...")
            wrote_file = ufw.write_comparison_data(outfilepath, comparisondata,
                                                   tool1_label, tool2_label)
            print(f"...Wrote comparison output file?: {wrote_file}...")

        # Perform comparison between two tools for False Positives
        if compare_parameters["tool"] == "false_positives":
            comparisondata = comcom.compare_fps(tool1_label, tool1data,
                                                tool2_label, tool2data)
            outfilepath = compare_parameters[
                "outdir"] + "/" + compare_parameters["output-prefix"] + ".txt"
            wrote_file = ufw.write_fp_comparison(outfilepath, comparisondata,
                                                 tool1_label, tool2_label)
            print(f"...Wrote comparison output file?: {wrote_file}...")

        # Perform comparison between two tools for True Positives
        if compare_parameters["tool"] == "true_positives":
            comparisondata = comcom.compare_tps(tool1_label, tool1data,
                                                tool2_label, tool2data)
            outfilepath = compare_parameters[
                "outdir"] + "/" + compare_parameters["output-prefix"] + ".txt"
            wrote_file = ufw.write_tp_comparison(outfilepath, comparisondata,
                                                 tool1_label, tool2_label)
            print(f"...Wrote comparison output file?: {wrote_file}...")
    else:
        print("Please set the following parameters: " +
              ", ".join(incorrect_parameters))
Beispiel #5
0
def main():
    filterparams = parpar.get_filtering_parameters(TOOL_CHOICES)
    incorrect_params = parpar.parameters_are_ok(filterparams, REQUIRED_PARAMS,
                                                PARAM_TYPES)
    if len(incorrect_params) == 0:
        # Perform filtering with a Common CNV list.
        if filterparams["tool"] == "ccnvfiltering":
            run_ccnvfiltering(filterparams)

        # Perform filtering with Conrad CNVs
        if filterparams["tool"] == "conradfiltering":
            run_conradfiltering(filterparams)

        # Remove GATK4 CNV calls without an overlapping Array CNV call
        if filterparams["tool"] == "nafiltering":
            # fnar.remove_nas(filterparams["infile"], filterparams["outfile"])
            run_nafiltering(filterparams)

        # Filter CNV calls by size
        if filterparams["tool"] == "sizefiltering":
            run_sizefiltering(filterparams)
    else:
        print(f"Missing parameters: {incorrect_params}")
        parpar.display_tool_usage(filterparams["tool"], TOOL_USAGE)
Beispiel #6
0
def main():
    """Performs the main work.

    Parameters
    ----------
    cmd_argvalues
        Command line parameter values
    """
    cmd_argvalues = parpar.get_classification_parameters(TOOL_CHOICES)
    incorrect_parameters = parpar.parameters_are_ok(cmd_argvalues,
                                                    REQUIRED_PARAMS,
                                                    PARAM_TYPES)

    if len(incorrect_parameters) == 0:
        print("...Reading the sample table...")
        sample_data = ufr.read_sample_table(cmd_argvalues["samples"])
        print("...Reading the probe file...")
        probe_data = ufr.read_probes_data(cmd_argvalues["probesfile"])
        print("...Reading the exon data...")
        exon_data = ufr.read_exon_data(cmd_argvalues["exonsfile"])
        print("...Reading the array data...")
        array_data = read_array_cnvs(cmd_argvalues["arrayfile"], exon_data)

        if len(sample_data) > 0 and len(probe_data) > 0 and len(
                exon_data) > 0 and len(array_data) > 0:
            # GATK4 CNV CLASSIFCATION
            if cmd_argvalues["tool"].upper() == "GATK":
                print("...Start combining GATK4 .called.seg files...")
                gatk4_cnv_data = gatk4_combine_seg_files(
                    cmd_argvalues["indir"], sample_data, probe_data, exon_data,
                    "")
                print(
                    f"...Read GATK4 CNV data for {len(gatk4_cnv_data)} samples"
                )
                print("...Evaluating GATK4 CNVs...")
                gatk4_cnv_data = gatk4_evaluate(sample_data, probe_data,
                                                exon_data, array_data,
                                                gatk4_cnv_data)
                #print(gatk4_cnv_data)
                #tmp_print_gatkcnvs(gatk4_cnv_data)
                print("...Classifying leftover array CNVs...")
                array_classify_leftovers(array_data)
                print("...Writing all GATK4 CNV classifications to file...")
                # write_cnv_results(gatk4_cnv_data, cmd_argvalues["output"]+"/gatk4_results.txt")
                write_cnv_results_2(gatk4_cnv_data, cmd_argvalues["output"],
                                    cmd_argvalues["filterneutrals"],
                                    cmd_argvalues["cnvsize"])
                # print("Writing Array CNVs with no GATK4 CNVs to file...")
                # leftoverpath = get_leftover_outpath(cmd_argvalues["output"])
                # write_array_leftovers(array_data, f"{leftoverpath}/array_leftovers.txt")

            # CONIFER CNV CLASSIFICATION
            if cmd_argvalues["tool"] == "conifer":
                print("...Start reading the Conifer data...")
                # conifer_data = ufr.read_conifer_calls(cmd_argvalues["indir"])
                conifer_data = read_conifer_data(cmd_argvalues["infile"],
                                                 sample_data, exon_data,
                                                 probe_data)
                print(
                    f"...Read Conifer CNV data for {len(conifer_data)} samples..."
                )

                print("...Evaluating Conifer CNVs...")
                # conifer_evaluate(conifer_data, array_data, cmd_argvalues["numofexons"], cmd_argvalues["numofprobes"], cmd_argvalues["percentoverlap"], CONIFER_CALL_TRANSLATIONS)
                evaluate_cnv_calls(conifer_data, array_data,
                                   cmd_argvalues["numofexons"],
                                   cmd_argvalues["numofprobes"],
                                   cmd_argvalues["percentoverlap"],
                                   CONIFER_CALL_TRANSLATIONS)

                print("...Classifying leftover array CNVs...")
                array_classify_leftovers(array_data)

                print("...Writing all Conifer classifications to file...")
                file_written = write_cnv_classifications(
                    conifer_data, cmd_argvalues["output"],
                    cmd_argvalues["filterneutrals"], cmd_argvalues["cnvsize"],
                    CONIFER_CALL_TRANSLATIONS, "Conifer")
                print(f"...Output file written?: {file_written}...")

            # EXOMEDEPTH CNV CLASSIFICATION
            if cmd_argvalues["tool"] == "exomedepth":
                print("...Start reading the ExomeDepth data...")
                exomedepth_data = read_combined_exomedepth_data(
                    cmd_argvalues["infile"], sample_data, exon_data,
                    probe_data)
                print(
                    f"...Read ExomeDepth CNV data for {len(exomedepth_data)} samples..."
                )

                print("...Evaluating ExomeDepth CNVs...")
                evaluate_cnv_calls(exomedepth_data, array_data,
                                   cmd_argvalues["numofexons"],
                                   cmd_argvalues["numofprobes"],
                                   cmd_argvalues["percentoverlap"],
                                   EXOMEDEPTH_CALL_TRANSLATIONS)

                print("...Classifying leftover array CNVs...")
                array_classify_leftovers(array_data)

                print("...Writing all ExomeDepth classifications to file...")
                file_written = write_cnv_classifications(
                    exomedepth_data, cmd_argvalues["output"],
                    cmd_argvalues["filterneutrals"], cmd_argvalues["cnvsize"],
                    EXOMEDEPTH_CALL_TRANSLATIONS, "ExomeDepth")
                print(f"...Outout file written?: {file_written}...")
            print("DONE!")