def _organize_variants(samples, batch_id): """Retrieve variant calls for all samples, merging batched samples into single VCF. """ bam_files = set([]) caller_names = [x["variantcaller"] for x in samples[0]["variants"]] calls = collections.defaultdict(list) for data in samples: if "work_bam" in data: bam_files.add(data["work_bam"]) for vrn in data["variants"]: # for somatic ensemble, discard normal samples and filtered # variants from vcfs vrn_file = vrn["vrn_file"] if data.get("metadata", False) and data["metadata"].get("phenotype", "normal").lower().startswith("tumor"): vrn_file_temp = vrn_file.replace(".vcf", "_tumorOnly_noFilteredCalls.vcf") if ".vcf" in vrn_file else vrn_file_temp + "_tumorOnly_noFilteredCalls.vcf.gz" # Select tumor sample and keep only PASS and . calls vrn_file = vcfutils.select_sample(in_file=vrn_file, sample=data["name"][1], out_file=vrn_file_temp, config=data["config"], filters="PASS,.") calls[vrn["variantcaller"]].append(vrn_file) data = samples[0] vrn_files = [] for caller in caller_names: fnames = calls[caller] if len(fnames) == 1: vrn_files.append(fnames[0]) else: vrn_files.append(population.get_multisample_vcf(fnames, batch_id, caller, data)) return caller_names, vrn_files, list(bam_files)
def _organize_variants(samples, batch_id): """Retrieve variant calls for all samples, merging batched samples into single VCF. """ bam_files = set([]) caller_names = [x["variantcaller"] for x in samples[0]["variants"]] calls = collections.defaultdict(list) for data in samples: if "work_bam" in data: bam_files.add(data["work_bam"]) for vrn in data["variants"]: # for somatic ensemble, discard normal samples and filtered # variants from vcfs vrn_file = vrn["vrn_file"] if data.get("metadata", False) and data["metadata"].get( "phenotype", "normal").lower().startswith("tumor"): vrn_file_temp = vrn_file.replace( ".vcf", "_tumorOnly_noFilteredCalls.vcf" ) if ".vcf" in vrn_file else vrn_file_temp + "_tumorOnly_noFilteredCalls.vcf.gz" # Select tumor sample and keep only PASS and . calls vrn_file = vcfutils.select_sample(in_file=vrn_file, sample=data["name"][1], out_file=vrn_file_temp, config=data["config"], filters="PASS,.") calls[vrn["variantcaller"]].append(vrn_file) data = samples[0] vrn_files = [] for caller in caller_names: fnames = calls[caller] if len(fnames) == 1: vrn_files.append(fnames[0]) else: vrn_files.append( population.get_multisample_vcf(fnames, batch_id, caller, data)) return caller_names, vrn_files, list(bam_files)
def _organize_variants(samples, batch_id): """Retrieve variant calls for all samples, merging batched samples into single VCF. """ caller_names = [x["variantcaller"] for x in samples[0]["variants"]] calls = collections.defaultdict(list) for data in samples: for vrn in data["variants"]: calls[vrn["variantcaller"]].append(vrn["vrn_file"]) data = samples[0] vrn_files = [] for caller in caller_names: fnames = calls[caller] if len(fnames) == 1: vrn_files.append(fnames[0]) else: vrn_files.append(population.get_multisample_vcf(fnames, batch_id, caller, data)) return caller_names, vrn_files