organism = param['organism'] ##***************** Part 0. Build index file for bwa and GATK ****** ##================= Part I. Preprocess ============================ #======== 1. map and dedupping ===================================== Message(startMessage,email) #======== (0) enter the directory ======================== bwa_path = bwaIndex[:bwaIndex.rfind('/')] if not os.path.exists(bwa_path): os.mkdir(bwa_path) if os.listdir(bwa_path) == []: bwa_Db(bwa_path,ref_fa) os.chdir(file_path) #======== (1) read files ================================ fastqFiles = list_files(file_path) if trim == 'True': trim_fastqFiles = Trimmomatic(trimmomatic,fastqFiles,phred,trimmoAdapter,batch=6) remove(fastqFiles) else: trim_fastqFiles = fastqFiles print 'list file succeed' print 'fastqFiles is: ',trim_fastqFiles #======== (2) define group =============================== #defined above #======== (3) align using bwa ============================ try: map_sam = bwa_vari(read_group,trim_fastqFiles,bwaIndex,thread) print 'align succeed' print 'map_sam is: ',map_sam except: print 'align failed' Message('align failed',email) raise
Message(startMessage, email) #======== (0) enter the directory ======================== bwa_path = bwaIndex[:bwaIndex.rfind('/')] if not os.path.exists(bwa_path): os.mkdir(bwa_path) if os.listdir(bwa_path) == []: bwa_Db(bwa_path, ref_fa) os.chdir(file_path) #======== (1) read files ================================ fastqFiles = list_files(file_path) if trim == 'True': trim_fastqFiles = Trimmomatic(trimmomatic, fastqFiles, phred, trimmoAdapter, batch=6) remove(fastqFiles) else: trim_fastqFiles = fastqFiles print 'list file succeed' print 'fastqFiles is: ', trim_fastqFiles #======== (2) define group =============================== #defined above #======== (3) align using bwa ============================ try: map_sam = bwa_vari(read_group, trim_fastqFiles, bwaIndex, thread) print 'align succeed' print 'map_sam is: ', map_sam except: print 'align failed' Message('align failed', email) raise
try: group_bams = addReadGroup(picard, sort_bams, read_group) sys.stdout.write("add group succeed\n") sys.stdout.write("group_bams is: {group}\n".format(group=group_bams)) except: sys.stdout.write("add group failed\n") Message("add group failed", email) sys.exit(1) # ======== (2) mark duplicates ============================ try: dedup_bams = markduplicates(picard, group_bams) sys.stdout.write("mark duplicate succeed\n") sys.stdout.write("dedup_bams is: {dedup}\n".format(dedup=dedup_bams)) remove(group_bams) except: sys.stdout.write("mark duplicate failed\n") Message("mark duplicate failed", email) sys.exit(1) # ======== 3. Split 'N' Trim and reassign mapping qualiteies try: split_bams = splitN(gatk, dedup_bams, ref_fa) sys.stdout.write("split N succeed\n") sys.stdout.write("split N is: {N}\n".format(N=split_bams)) remove(dedup_bams) except: sys.stdout.write("split N failed\n") Message("split N failed", email) sys.exit(1) # ======== 4. Indel realignment ===========================
raise #======== (3) sam to bam and sort ================================ try: sorted_bams = sam2bam_sort(map_files,thread) # [file.sort.bam] print 'host sorted succeed' print 'sorted_bam is: ',sorted_bams except: print 'host sorted failed' Message('host sorted failed',email) raise #======== (4) extract reads that unmapped to host ===== try: unmap2host_bams = extract_bam(sorted_bams,'unmap',seqType,thread) # [file.sort.unmap.bam] print 'extract unmap2host_bams succeed' print 'unmap2host_bams is: ',unmap2host_bams remove(sorted_bams) # rename files for f in unmap2host_bams: os.rename(f,f[:-4]+'2host.bam') unmap2host_bams = [f[:-4]+'2host.bam' for f in unmap2host_bams] # [file.sort.unmap2host.bam] except: print 'extract unmap2host_bams failed' Message('extract unmap2host_bams failed',email) raise #======== (6) unmap2host_bams to fastq.gz ========================= try: unmap2host_fq_gzs = sam2fastq(picard,unmap2host_bams,seqType) # [[file.sort.unmap2host.fq.gz]] # compress to gz file #for fq in unmap2host_fqs: ('gzip {fq}').format(fq=fq) #unmap2host_fq_gzs = [f+'.gz' for f in unmap2host_fqs] # file.sort.unmap2host.fq.gz print 'unmap2host_fq_gzs succeed' print 'unmap2host_fq_gzs is: ',unmap2host_fq_gzs
except: print 'virus sorted failed' Message('virus sorted failed',email) raise #======== (3) extract reads that mapped and unmapped to virus ============================ try: map2virus_bams = extract_bam(sorted_bams,'map',seqType,thread) # [file.sort.unmap2host.sort.map.bam] unmap2virus_bams = extract_bam(sorted_bams,'unmap',seqType,thread) # [file.sort.unmap2host.sort.unmap.bam] # rename files for f in map2virus_bams: os.rename(f,f[:-29]+'.only2virus.bam') map2virus_bams = [f[:-29]+'.only2virus.bam' for f in map2virus_bams] # [file.only2virus.bam] for f in unmap2virus_bams: os.rename(f,f[:-31]+'.map2neither.bam') # [file.map2neither.bam] unmap2virus_bams = [f[:-31]+'.map2neither.bam' for f in unmap2virus_bams] print 'extract map and unmap2virus_bams succeed' print 'map2virus_bams is: ',map2virus_bams,'unmap2virus_bams is: ',unmap2virus_bams remove(sorted_bams) except: print 'extract map and unmap2virus_bams failed' Message('extract map and unmap2virus_bams failed',email) raise #======== (4) transfer the mapped and unmapped to virus bam to fastq ======= try: map2virus_fq_gzs = sam2fastq(picard,map2virus_bams,seqType) # [[file.only2virus.fq.gz]] unmap2virus_fq_gzs = sam2fastq(picard,unmap2virus_bams,seqType) # [[file.map2neither.fq.gz]] print 'transfer from bam to fq succeed' print 'map2virus_fq_gzs is: ',map2virus_fq_gzs,'unmap2virus_fq_gzs',unmap2virus_fq_gzs remove(map2virus_bams);remove(unmap2virus_bams) except: print 'transfer from bam to fq failed' Message('transfer from bam to fq failed',email) raise
file_path = param['filePath'] starDb = param['alignerDb'] trim = param['trim'] phred = param['phred'] picard = param['picard'] trimmomatic = param['trimmomatic'] trimmoAdapter = param['trimmoAdapter'] gatk = param['gatk'] read_group = param['readGroup'] organism = param['organism'] ##***************** Part 0. Build index file for bwa and GATK ****** ##***************** Part I. Preprocess ============================ #======== 1. map and dedupping ===================================== #======== (0) enter the directory ======================== os.chdir(file_path) Message(startMessage, email) #======== (1) read files ================================ fastqFiles = list_files(file_path) if trim == 'True': trim_fastqFiles = Trimmomatic(trimmomatic, fastqFiles, phred, trimmoAdapter, batch=6) remove(fastqFiles) else: trim_fastqFiles = fastqFiles sys.stdout.write('list file succeed\n') sys.stdout.write('fastqFiles is: {fq}\n'.format(fq=trim_fastqFiles))
raise #======== (4) Convert sam to sorted bam ================== try: sort_bams = sam2bam_sort(map_sam,thread) print 'sort bam files succeed' print 'sort_bams is: ',sort_bams except: print 'sort bam files failed' Message('sort bam files failed',email) raise #======== (5) Markduplicates using picard ================ try: dedup_files = markduplicates(picard,sort_bams) print 'mark duplicates succeed' print 'dedup_files is: ',dedup_files remove(sort_bams) except: print 'mark duplicates failed' Message('mark duplicates failed',email) raise #======== 2. Indel realignment ==================================== #======== (6) Create a target list of intervals=========== try: interval = RealignerTargetCreator(gatk,dedup_files,ref_fa,thread,phaseINDEL,gold_indel) print 'RealignerTarget Creator succeed' print 'interval is: ',interval except: print 'RealignerTarget Creator failed' Message('RealignerTarget Creator failed',email) raise #======== (7) realignment of target intervals ============
try: group_bams = addReadGroup(picard, sort_bams, read_group) sys.stdout.write('add group succeed\n') sys.stdout.write('group_bams is: {group}\n'.format(group=group_bams)) except: sys.stdout.write('add group failed\n') Message('add group failed', email) raise #======== (2) mark duplicates ============================ try: dedup_bams = markduplicates(picard, group_bams) sys.stdout.write('mark duplicate succeed\n') sys.stdout.write('dedup_bams is: {dedup}\n'.format(dedup=dedup_bams)) remove(group_bams) except: sys.stdout.write('mark duplicate failed\n') Message('mark duplicate failed', email) raise #======== 3. Split 'N' Trim and reassign mapping qualiteies try: split_bams = splitN(gatk, dedup_bams, ref_fa) sys.stdout.write('split N succeed\n') sys.stdout.write('split N is: {N}\n'.format(N=split_bams)) remove(dedup_bams) except: sys.stdout.write('split N failed\n') Message('split N failed', email) raise #======== 4. Indel realignment ===========================
Message('host sorted failed',email) raise #======== (4) get htseq Count to host ============================ try: htseq_count(sorted_bams,host_annotation,host_htseqFolder,host_AnnotationSource) print 'host htseqCount succeed' except: print 'host htseq count failed' Message('host htseq count failed',email) raise #======== (5) extract unmapped reads ============================= try: unmap2host_bams = extract_bam(sorted_bams,'unmap',seqType,thread) # [file.sort.unmap.bam] print 'extract unmap2host_bams succeed' print 'unmap2host_bams is: ',unmap2host_bams remove(sorted_bams) # rename files for f in unmap2host_bams: os.rename(f,f[:-4]+'2host.bam') unmap2host_bams = [f[:-4]+'2host.bam' for f in unmap2host_bams] # [file.sort.unmap2host.bam] except: print 'extract unmap2host_bams failed' Message('extract unmap2host_bams failed',email) raise #======== (6) unmap2host_bams to fastq ============================ try: unmap2host_fqs = sam2fastq(picard,unmap2host_bams,seqType) # [[file.sort.unmap2host.fq.gz]] print 'unmap2host_fq succeed' print 'unmap2host_fqs is: ',unmap2host_fqs remove(unmap2host_bams) except: print 'unmap2host_fq failed'
Dict = param['symbolIDFile'] inputpath = file_path #=========== (0) enter the directory ================ os.chdir(file_path) Message(startMessage,email) #=========== (1) reads files and trim =============== fastqFiles = list_files(file_path) print 'list file succeed' if trim == 'True': try: trim_fastqFiles = Trimmomatic(trimmomatic,fastqFiles,phred,trimmoAdapter,batch=6) print 'trim succeed' print 'fastqFiles is: ',fastqFiles remove(fastqFiles) except: print 'trim failed' Message('trim failed',email) raise else: trim_fastqFiles = fastqFiles #=========== (2) run STAR to do the mapping ======== try: if aligner == 'gsnap': map_files = gsnap(trim_fastqFiles,db_path, db_name,gsnap_annotation,thread) elif aligner == 'STAR': if not os.path.exists(db_path): os.mkdir(db_path) if os.listdir(db_path) == []: STAR_Db(db_path,ref_fa,thread) map_files = STAR(trim_fastqFiles,db_path,thread,annotation,['--outSAMtype BAM SortedByCoordinate','--quantMode GeneCounts'])