def scalpel_indel(pairs, log_dir, config_file, ref_mnt): (scalpel, bedtools, bed, fasta, cpus, dustmask_flag, dustmask_bed) = parse_config(config_file) bed = ref_mnt + '/' + bed fasta = ref_mnt + '/' + fasta dustmask_bed = ref_mnt + '/' + dustmask_bed # use get_merged_bams api sample_list = 'sample_list.txt' if not os.path.isfile(sample_list): create_sample_list(pairs) sys.stderr.write(date_time() + 'Sample pairs list not created - creating one since this is being run likely ' 'outside of pipeline') get_merged_bams(config_file, sample_list) fh = open(pairs, 'r') for line in fh: cur = line.rstrip('\n').split('\t') loc = log_dir + cur[0] + '.scalpel.log' tumor_bam = cur[1] + '.merged.final.bam' normal_bam = cur[2] + '.merged.final.bam' scalpel_cmd = scalpel + ' --somatic --logs --numprocs ' + cpus + ' --tumor ' + tumor_bam + ' --normal ' \ + normal_bam + ' --bed ' + bed + ' --ref ' + fasta + ' 2>> ' + loc sys.stderr.write(date_time() + 'Starting indel calls for ' + cur[0] + '\n') log(loc, date_time() + 'Starting indel calls for ' + cur[0] + ' with command:\n' + scalpel_cmd + '\n') check = call(scalpel_cmd, shell=True) if check != 0: sys.stderr.write(date_time() + 'Indel calling failed for pair ' + cur[0] + ' with command:\n' + scalpel_cmd + '\n') log(loc, date_time() + 'Indel calling complete for pair ' + cur[0] + ' moving output files\n') mv_cmd = 'mkdir ' + cur[0] + '; mv outdir/main/* ' + cur[0] + '; rm -rf outdir/main;' log(loc, date_time() + mv_cmd + '\n') call(mv_cmd, shell=True) sys.stderr.write(date_time() + 'Completed indel calls for ' + cur[0] + '\n') if dustmask_flag == 'Y': log(loc, date_time() + 'Filter dustmask flag given\n') check = filter_indel(bedtools, dustmask_bed, cur[0]) if check != 0: sys.stderr.write(date_time() + 'Dustmask failed for ' + cur[0] + '\n') exit(1) else: log(loc, date_time() + 'Dustmask complete for ' + cur[0] + '\n') fh.close() sys.stderr.write(date_time() + 'Indel call completed\n') return 0
def variant_annot_pipe(config_file, sample_pairs, kflag, ref_mnt, wg): # create eventual output location directories mk_dir = 'mkdir BAM LOGS ANALYSIS ANNOTATION' call(mk_dir, shell=True) (novosort, obj, cont, analysis, annotation, germ_flag, indel_flag, annot_used) = parse_config(config_file) # create sample list sample_list = 'sample_list.txt' fh = open(sample_pairs, 'r') sl = open(sample_list, 'w') temp = {} for line in fh: cur = line.rstrip('\n').split('\t') if cur[1] not in temp: sl.write(cur[1] + '\n') temp[cur[1]] = 1 if cur[2] not in temp: sl.write(cur[2] + '\n') temp[cur[2]] = 1 sl.close() fh .close() del temp # download and merge (if necessary) bam files temp_list = check_existing_bams(sample_list) if len(temp_list) > 0: sys.stderr.write(date_time() + 'Missing files detected, downloading merged bam files\n') temp_fn = 'temp_samp_list.txt' temp_fh = open(temp_fn, 'w') temp_fh.write('\n'.join(temp_list)) temp_fh.close() miss_list = get_merged_bams(config_file, temp_fn) if len(miss_list) == 0: sys.stderr.write(date_time() + 'Merged bam files successfully download\n') else: sys.stderr.write(date_time() + 'Some merged bams appear to be missing, trying downloading constituent bams ' 'then merging\n') missing_fn = 'to_merge.txt' missing_fh = open(missing_fn, 'w') missing_fh.write('\n'.join(miss_list)) missing_fh.close() run_novosort(config_file, missing_fn, obj) else: sys.stderr.write(date_time() + 'All bams found. Moving on\n') if kflag == 'y': # create bam list for ksort bam_list = 'bam_list.txt' blist_cmd = 'ls *.merged.final.bam > ' + bam_list call(blist_cmd, shell=True) check = ksort(config_file, bam_list, kflag, ref_mnt) if check == 0: sys.stderr.write(date_time() + 'Karyotypic reorder of BAM files completed\n') else: sys.stderr.write(date_time() + 'Karyotypic reorder of BAM files failed.\n') exit(1) # quick check to see if just need to restart pipleine from mutect, or actually get merged bams check = mutect_pipe(config_file, sample_pairs, ref_mnt) if check == 0: sys.stderr.write(date_time() + 'Mutect variant calls successful\n') else: sys.stderr.write(date_time() + 'Mutect variant calls failed.\n') exit(1) check = mutect_merge_sort(config_file, sample_pairs, ref_mnt) if check == 0: sys.stderr.write(date_time() + 'Mutect file merge successful.\n') else: sys.stderr.write(date_time() + 'Mutect file merge failed.\n') # create def to do vep or snpeff mode for annotation check = scalpel_indel(sample_pairs, 'LOGS/', config_file, ref_mnt) if check == 0: sys.stderr.write(date_time() + 'scalpel successful.\n') else: sys.stderr.write(date_time() + 'scalpel failed.\n') exit(1) if annot_used == 'snpEff': snpEff(config_file, sample_pairs, ref_mnt, wg) if annot_used == 'vep': vep(config_file, sample_pairs, ref_mnt, '.vcf.keep', '.snv.vep.vcf', 'mutect') vep(config_file, sample_pairs, ref_mnt, '.indel.vcf', '.somatic.indel.vep.vcf', 'scalpel') if germ_flag == 'Y': sys.stderr.write(date_time() + 'Germ line call flag indicated\n') check = platypus_germline(config_file, sample_pairs, 'LOGS/', wg, ref_mnt) check += annot_platypus(config_file, sample_pairs, ref_mnt) if check == 0: sys.stderr.write(date_time() + 'Germ line call complete\n') else: sys.stderr.write(date_time() + 'Error during germline calls. Check output\n') exit(1) # relocate stuff, then upload mv_cmds = 'rm -rf outdir; mv *.bai *.bam BAM; mv *.xls *eff* *sift* *vep* ANNOTATION; mv *out* *vcf* ANALYSIS;' call(mv_cmds, shell=True) check = upload_variants_to_swift(cont, obj, sample_list, sample_pairs, analysis, annotation, annot_used) if check == 0: sys.stderr.write(date_time() + 'Uploading data to swift successful!\n') else: sys.stderr.write(date_time() + 'Uploading data to swift failed!\n') exit(1) return 0