validation_variants = list() for variant in ordered_variants: if 'cosmic_ids' in samples[sample].keys(): cosmic_ids = samples[sample]['cosmic_ids'].split(',') if variant.cosmic_ids: for cosmic_id in variant.cosmic_ids: if cosmic_id in cosmic_ids: validation_variants.append(variant) break if 'rs_ids' in samples[sample].keys(): rs_ids = samples[sample]['rs_ids'].split(',') if variant.rs_ids: for rs_id in variant.rs_ids: if rs_id in rs_ids: validation_variants.append(variant) break if 'amplicons' in samples[sample].keys(): overlapping_amplicons = variant.amplicon_data['amplicon'].split(',') amplicons = samples[sample]['amplicons'].split(',') if variant.amplicon_data['amplicon']: for amplicon in overlapping_amplicons: if amplicons in amplicons: validation_variants.append(variant) break sys.stdout.write("Retrieved {} total variants\n".format(variants.count())) sys.stdout.write("Writing {} variants to sample report\n".format(len(validation_variants))) utils.write_sample_variant_report(args.report, sample, validation_variants, args.variant_callers)
for amplicon in target_amplicons: coverage_data = SampleCoverage.objects.timeout(None).filter( SampleCoverage.sample == samples[sample]['sample_name'], SampleCoverage.amplicon == amplicon, SampleCoverage.run_id == samples[sample]['run_id'], SampleCoverage.library_name == samples[sample]['library_name'], SampleCoverage.program_name == "sambamba" ) for result in coverage_data: target_amplicon_coverage[amplicon]['num_reads'] = result.num_reads target_amplicon_coverage[amplicon]['mean_coverage'] = result.mean_coverage ordered_variants = variants.order_by('library_name', 'chr', 'pos', 'ref', 'alt').limit(variants.count() + 1000) filtered_variants = list() for variant in ordered_variants: if variant.amplicon_data['amplicon']: amplicons = variant.amplicon_data['amplicon'].split(',') for amplicon in amplicons: if amplicon in target_amplicons: for caller in callers: if caller in variant.callers: filtered_variants.append(variant) break sys.stdout.write("Retrieved {} total variants\n".format(variants.count())) sys.stdout.write("Sending {} variants to reporting\n".format(len(filtered_variants))) utils.write_sample_variant_report(args.report, sample, filtered_variants, target_amplicon_coverage, callers)
'max_maf': args.max_pop_freq, 'depth': args.depth} callers = args.variant_callers.split(',') sys.stdout.write("Processing samples\n") for sample in samples: sys.stdout.write("Running Cassandra query for sample {}\n".format(sample)) variants = SampleVariant.objects.timeout(None).filter(SampleVariant.reference_genome == config['genome_version'], SampleVariant.sample == samples[sample]['sample_name'], SampleVariant.run_id == samples[sample]['run_id'], SampleVariant.library_name == samples[sample]['library_name'], SampleVariant.max_som_aaf >= thresholds['min_saf'], SampleVariant.max_maf_all <= thresholds['max_maf'], SampleVariant.max_depth >= thresholds['depth'] ).allow_filtering() ordered_variants = variants.order_by('library_name', 'chr', 'pos', 'ref', 'alt').limit(variants.count() + 1000) filtered_variants = list() for variant in ordered_variants: if variant.amplicon_data['amplicon']: for caller in callers: if caller in variant.callers: filtered_variants.append(variant) break sys.stdout.write("Retrieved {} total variants\n".format(variants.count())) sys.stdout.write("Writing {} variants to sample report\n".format(len(filtered_variants))) utils.write_sample_variant_report(args.report, sample, filtered_variants, callers)