Example #1
0
def compare_to_rm(data):
    """Compare final variant calls against reference materials of known calls.
    """
    toval_data = _get_validate(data)
    if toval_data:
        if isinstance(toval_data["vrn_file"], (list, tuple)):
            raise NotImplementedError("Multiple input files for validation: %s" % toval_data["vrn_file"])
        else:
            vrn_file = os.path.abspath(toval_data["vrn_file"])
        rm_file = normalize_input_path(toval_data["config"]["algorithm"]["validate"], toval_data)
        rm_interval_file = _gunzip(normalize_input_path(toval_data["config"]["algorithm"].get("validate_regions"),
                                                        toval_data),
                                   toval_data)
        caller = _get_caller(toval_data)
        sample = dd.get_sample_name(toval_data)
        base_dir = utils.safe_makedir(os.path.join(toval_data["dirs"]["work"], "validate", sample, caller))
        rm_file = naming.handle_synonyms(rm_file, dd.get_ref_file(data), data["genome_build"], base_dir, data)
        rm_interval_file = (naming.handle_synonyms(rm_interval_file, dd.get_ref_file(data),
                                                   data["genome_build"], base_dir, data)
                            if rm_interval_file else None)
        vmethod = tz.get_in(["config", "algorithm", "validate_method"], data, "rtg")
        if vmethod == "rtg":
            eval_files = _run_rtg_eval(vrn_file, rm_file, rm_interval_file, base_dir, toval_data)
            data["validate"] = _rtg_add_summary_file(eval_files, base_dir, toval_data)
        elif vmethod == "bcbio.variation":
            data["validate"] = _run_bcbio_variation(vrn_file, rm_file, rm_interval_file, base_dir,
                                                    sample, caller, toval_data)
    return [[data]]
Example #2
0
def compare_to_rm(data):
    """Compare final variant calls against reference materials of known calls.
    """
    if isinstance(data, (list, tuple)):
        data = _normalize_cwl_inputs(data)
    toval_data = _get_validate(data)
    if toval_data:
        caller = _get_caller(toval_data)
        sample = dd.get_sample_name(toval_data)
        base_dir = utils.safe_makedir(
            os.path.join(toval_data["dirs"]["work"], "validate", sample,
                         caller))

        if isinstance(toval_data["vrn_file"], (list, tuple)):
            raise NotImplementedError(
                "Multiple input files for validation: %s" %
                toval_data["vrn_file"])
        else:
            vrn_file = os.path.abspath(toval_data["vrn_file"])
        rm_file = normalize_input_path(
            toval_data["config"]["algorithm"]["validate"], toval_data)
        rm_interval_file = _gunzip(
            normalize_input_path(
                toval_data["config"]["algorithm"].get("validate_regions"),
                toval_data), toval_data)
        rm_interval_file = bedutils.clean_file(
            rm_interval_file,
            toval_data,
            bedprep_dir=utils.safe_makedir(os.path.join(base_dir, "bedprep")))
        rm_file = naming.handle_synonyms(rm_file, dd.get_ref_file(data),
                                         data.get("genome_build"), base_dir,
                                         data)
        rm_interval_file = (naming.handle_synonyms(
            rm_interval_file, dd.get_ref_file(data), data.get("genome_build"),
            base_dir, data) if rm_interval_file else None)
        vmethod = tz.get_in(["config", "algorithm", "validate_method"], data,
                            "rtg")
        if not vcfutils.vcf_has_variants(vrn_file):
            # RTG can fail on totally empty files. Skip these since we have nothing.
            pass
        # empty validation file, every call is a false positive
        elif not vcfutils.vcf_has_variants(rm_file):
            eval_files = _setup_call_fps(vrn_file, rm_interval_file, base_dir,
                                         toval_data)
            data["validate"] = _rtg_add_summary_file(eval_files, base_dir,
                                                     toval_data)
        elif vmethod == "rtg":
            eval_files = _run_rtg_eval(vrn_file, rm_file, rm_interval_file,
                                       base_dir, toval_data)
            data["validate"] = _rtg_add_summary_file(eval_files, base_dir,
                                                     toval_data)
        elif vmethod == "hap.py":
            data["validate"] = _run_happy_eval(vrn_file, rm_file,
                                               rm_interval_file, base_dir,
                                               toval_data)
        elif vmethod == "bcbio.variation":
            data["validate"] = _run_bcbio_variation(vrn_file, rm_file,
                                                    rm_interval_file, base_dir,
                                                    sample, caller, toval_data)
    return [[data]]
Example #3
0
def compare_to_rm(data):
    """Compare final variant calls against reference materials of known calls.
    """
    if isinstance(data, (list, tuple)) and cwlutils.is_cwl_run(utils.to_single_data(data[0])):
        data = _normalize_cwl_inputs(data)
    toval_data = _get_validate(data)
    toval_data = cwlutils.unpack_tarballs(toval_data, toval_data)
    if toval_data:
        caller = _get_caller(toval_data)
        sample = dd.get_sample_name(toval_data)
        base_dir = utils.safe_makedir(os.path.join(toval_data["dirs"]["work"], "validate", sample, caller))

        if isinstance(toval_data["vrn_file"], (list, tuple)):
            raise NotImplementedError("Multiple input files for validation: %s" % toval_data["vrn_file"])
        else:
            vrn_file = os.path.abspath(toval_data["vrn_file"])
        rm_file = normalize_input_path(toval_data["config"]["algorithm"]["validate"], toval_data)
        rm_interval_file = _gunzip(normalize_input_path(toval_data["config"]["algorithm"].get("validate_regions"),
                                                        toval_data),
                                   toval_data)
        rm_interval_file = bedutils.clean_file(rm_interval_file, toval_data, prefix="validateregions-",
                                               bedprep_dir=utils.safe_makedir(os.path.join(base_dir, "bedprep")))
        rm_file = naming.handle_synonyms(rm_file, dd.get_ref_file(toval_data), data.get("genome_build"),
                                         base_dir, data)
        rm_interval_file = (naming.handle_synonyms(rm_interval_file, dd.get_ref_file(toval_data),
                                                   data.get("genome_build"), base_dir, data)
                            if rm_interval_file else None)
        vmethod = tz.get_in(["config", "algorithm", "validate_method"], data, "rtg")
        # RTG can fail on totally empty files. Call everything in truth set as false negatives
        if not vcfutils.vcf_has_variants(vrn_file):
            eval_files = _setup_call_false(rm_file, rm_interval_file, base_dir, toval_data, "fn")
            data["validate"] = _rtg_add_summary_file(eval_files, base_dir, toval_data)
        # empty validation file, every call is a false positive
        elif not vcfutils.vcf_has_variants(rm_file):
            eval_files = _setup_call_fps(vrn_file, rm_interval_file, base_dir, toval_data, "fp")
            data["validate"] = _rtg_add_summary_file(eval_files, base_dir, toval_data)
        elif vmethod in ["rtg", "rtg-squash-ploidy"]:
            eval_files = _run_rtg_eval(vrn_file, rm_file, rm_interval_file, base_dir, toval_data, vmethod)
            eval_files = _annotate_validations(eval_files, toval_data)
            data["validate"] = _rtg_add_summary_file(eval_files, base_dir, toval_data)
        elif vmethod == "hap.py":
            data["validate"] = _run_happy_eval(vrn_file, rm_file, rm_interval_file, base_dir, toval_data)
        elif vmethod == "bcbio.variation":
            data["validate"] = _run_bcbio_variation(vrn_file, rm_file, rm_interval_file, base_dir,
                                                    sample, caller, toval_data)
    return [[data]]
Example #4
0
def compare_to_rm(data):
    """Compare final variant calls against reference materials of known calls.
    """
    toval_data = _get_validate(data)
    if toval_data:
        if isinstance(toval_data["vrn_file"], (list, tuple)):
            raise NotImplementedError(
                "Multiple input files for validation: %s" %
                toval_data["vrn_file"])
        else:
            vrn_file = os.path.abspath(toval_data["vrn_file"])
        rm_file = normalize_input_path(
            toval_data["config"]["algorithm"]["validate"], toval_data)
        rm_interval_file = _gunzip(
            normalize_input_path(
                toval_data["config"]["algorithm"].get("validate_regions"),
                toval_data), toval_data)
        caller = _get_caller(toval_data)
        sample = dd.get_sample_name(toval_data)
        base_dir = utils.safe_makedir(
            os.path.join(toval_data["dirs"]["work"], "validate", sample,
                         caller))
        rm_file = naming.handle_synonyms(rm_file, dd.get_ref_file(data),
                                         data["genome_build"], base_dir, data)
        rm_interval_file = (naming.handle_synonyms(
            rm_interval_file, dd.get_ref_file(data), data["genome_build"],
            base_dir, data) if rm_interval_file else None)
        vmethod = tz.get_in(["config", "algorithm", "validate_method"], data,
                            "rtg")
        if vmethod == "rtg":
            eval_files = _run_rtg_eval(vrn_file, rm_file, rm_interval_file,
                                       base_dir, toval_data)
            data["validate"] = _rtg_add_summary_file(eval_files, base_dir,
                                                     toval_data)
        elif vmethod == "bcbio.variation":
            data["validate"] = _run_bcbio_variation(vrn_file, rm_file,
                                                    rm_interval_file, base_dir,
                                                    sample, caller, toval_data)
    return [[data]]