def process_sample(sample_name, fastq_files, info, bam_files, dirs, config, config_file): """Finalize processing for a sample, potentially multiplexed. """ config = _update_config_w_custom(config, info) genome_build = info.get("genome_build", None) (_, sam_ref) = get_genome_ref(genome_build, config["algorithm"]["aligner"], dirs["galaxy"]) fastq1, fastq2 = combine_fastq_files(fastq_files, dirs["work"]) log.info("Combining and preparing wig file %s" % str(sample_name)) sort_bam = merge_bam_files(bam_files, dirs["work"], config) bam_to_wig(sort_bam, config, config_file) if config["algorithm"]["recalibrate"]: log.info("Recalibrating %s with GATK" % str(sample_name)) gatk_bam = recalibrate_quality(sort_bam, fastq1, fastq2, sam_ref, config) if config["algorithm"]["snpcall"]: log.info("SNP genotyping %s with GATK" % str(sample_name)) vrn_file = run_genotyper(gatk_bam, sam_ref, config) log.info("Calculating variation effects for %s" % str(sample_name)) variation_effects(vrn_file, genome_build, sam_ref, config) if sam_ref is not None: log.info("Generating summary files: %s" % str(sample_name)) generate_align_summary(sort_bam, fastq2 is not None, sam_ref, config, sample_name, config_file)
def process_sample(data): """Finalize processing for a sample, potentially multiplexed. """ if data["config"]["algorithm"]["snpcall"]: logger.info("Finalizing variant calls: %s" % str(data["name"])) data["vrn_file"] = finalize_genotyper(data["vrn_file"], data["work_bam"], data["sam_ref"], data["config"]) logger.info("Calculating variation effects for %s" % str(data["name"])) ann_vrn_file, effects_file = variation_effects(data["vrn_file"], data["sam_ref"], data["genome_build"], data["config"]) if ann_vrn_file: data["vrn_file"] = ann_vrn_file data["effects_file"] = effects_file if data["config"]["algorithm"].get("transcript_assemble", False): data["tx_file"] = assemble_transcripts(data["work_bam"], data["sam_ref"], data["config"]) if data["sam_ref"] is not None: logger.info("Generating summary files: %s" % str(data["name"])) generate_align_summary(data["work_bam"], data["fastq2"] is not None, data["sam_ref"], data["name"], data["config"], data["dirs"]) return [[data]]
def postprocess_variants(data): """Provide post-processing of variant calls. """ if data["config"]["algorithm"]["snpcall"]: logger.info("Finalizing variant calls: %s" % str(data["name"])) data["vrn_file"] = finalize_genotyper(data["vrn_file"], data["work_bam"], data["sam_ref"], data["config"]) logger.info("Calculating variation effects for %s" % str(data["name"])) ann_vrn_file = variation_effects(data["vrn_file"], data["sam_ref"], data["genome_build"], data["config"]) if ann_vrn_file: data["vrn_file"] = ann_vrn_file return [[data]]
def process_sample(sample_name, fastq_files, info, bam_files, dirs, config, config_file): """Finalize processing for a sample, potentially multiplexed. """ config = _update_config_w_custom(config, info) genome_build = info.get("genome_build", None) fastq1, fastq2 = combine_fastq_files(fastq_files, dirs["work"]) if config["algorithm"]["screen_contaminants"]: log.info("Screening for contaminants on sample %s with genome %s" % (str(sample_name), str(genome_build))) screen_for_contamination(fastq1, fastq2, config) # _filter_out_genomes(data) (_, sam_ref) = get_genome_ref(genome_build, config["algorithm"]["aligner"], dirs["galaxy"]) log.info("Combining and preparing wig file %s" % str(sample_name)) sort_bam = merge_bam_files(bam_files, dirs["work"], config) (gatk_bam, vrn_file, effects_file) = ("", "", "") if config["algorithm"]["recalibrate"] and os.path.exists(sort_bam): log.info("Recalibrating %s with GATK" % str(sample_name)) gatk_bam = recalibrate_quality(sort_bam, fastq1, fastq2, sam_ref, dirs, config) if config["algorithm"]["snpcall"]: log.info("SNP genotyping %s with GATK" % str(sample_name)) vrn_file = run_genotyper(gatk_bam, sam_ref, config) log.info("Calculating variation effects for %s" % str(sample_name)) effects_file = variation_effects(vrn_file, genome_build, config) if config["algorithm"].get("transcript_assemble", False) and os.path.exists(sort_bam): tx_file = assemble_transcripts(sort_bam, sam_ref, config) if sam_ref is not None and os.path.exists(sort_bam): log.info("Generating summary files: %s" % str(sample_name)) generate_align_summary(sort_bam, fastq2 is not None, sam_ref, sample_name, config, dirs) if os.path.exists(sort_bam): bam_to_wig(sort_bam, config, config_file) return [sample_name, fastq_files, info, sort_bam, gatk_bam, vrn_file, effects_file]