예제 #1
0
def _organize_variants(samples, batch_id):
    """Retrieve variant calls for all samples, merging batched samples into single VCF.
    """
    bam_files = set([])
    caller_names = [x["variantcaller"] for x in samples[0]["variants"]]
    calls = collections.defaultdict(list)
    for data in samples:
        if "work_bam" in data:
            bam_files.add(data["work_bam"])
        for vrn in data["variants"]:
            # for somatic ensemble, discard normal samples and filtered 
            # variants from vcfs
            vrn_file = vrn["vrn_file"]
            if data.get("metadata", False) and data["metadata"].get("phenotype", "normal").lower().startswith("tumor"):
                vrn_file_temp = vrn_file.replace(".vcf", "_tumorOnly_noFilteredCalls.vcf") if ".vcf" in vrn_file else vrn_file_temp + "_tumorOnly_noFilteredCalls.vcf.gz"
                # Select tumor sample and keep only PASS and . calls
                vrn_file = vcfutils.select_sample(in_file=vrn_file, sample=data["name"][1], 
                                                  out_file=vrn_file_temp, 
                                                  config=data["config"], filters="PASS,.")
            calls[vrn["variantcaller"]].append(vrn_file)
    data = samples[0]
    vrn_files = []
    for caller in caller_names:
        fnames = calls[caller]
        if len(fnames) == 1:
            vrn_files.append(fnames[0])
        else:
            vrn_files.append(population.get_multisample_vcf(fnames, batch_id, caller, data))
    return caller_names, vrn_files, list(bam_files)
예제 #2
0
def _organize_variants(samples, batch_id):
    """Retrieve variant calls for all samples, merging batched samples into single VCF.
    """
    bam_files = set([])
    caller_names = [x["variantcaller"] for x in samples[0]["variants"]]
    calls = collections.defaultdict(list)
    for data in samples:
        if "work_bam" in data:
            bam_files.add(data["work_bam"])
        for vrn in data["variants"]:
            # for somatic ensemble, discard normal samples and filtered
            # variants from vcfs
            vrn_file = vrn["vrn_file"]
            if data.get("metadata", False) and data["metadata"].get(
                    "phenotype", "normal").lower().startswith("tumor"):
                vrn_file_temp = vrn_file.replace(
                    ".vcf", "_tumorOnly_noFilteredCalls.vcf"
                ) if ".vcf" in vrn_file else vrn_file_temp + "_tumorOnly_noFilteredCalls.vcf.gz"
                # Select tumor sample and keep only PASS and . calls
                vrn_file = vcfutils.select_sample(in_file=vrn_file,
                                                  sample=data["name"][1],
                                                  out_file=vrn_file_temp,
                                                  config=data["config"],
                                                  filters="PASS,.")
            calls[vrn["variantcaller"]].append(vrn_file)
    data = samples[0]
    vrn_files = []
    for caller in caller_names:
        fnames = calls[caller]
        if len(fnames) == 1:
            vrn_files.append(fnames[0])
        else:
            vrn_files.append(
                population.get_multisample_vcf(fnames, batch_id, caller, data))
    return caller_names, vrn_files, list(bam_files)
예제 #3
0
def _organize_variants(samples, batch_id):
    """Retrieve variant calls for all samples, merging batched samples into single VCF.
    """
    caller_names = [x["variantcaller"] for x in samples[0]["variants"]]
    calls = collections.defaultdict(list)
    for data in samples:
        for vrn in data["variants"]:
            calls[vrn["variantcaller"]].append(vrn["vrn_file"])
    data = samples[0]
    vrn_files = []
    for caller in caller_names:
        fnames = calls[caller]
        if len(fnames) == 1:
            vrn_files.append(fnames[0])
        else:
            vrn_files.append(population.get_multisample_vcf(fnames, batch_id, caller, data))
    return caller_names, vrn_files