organism = param['organism']
##*****************  Part 0. Build index file for bwa and GATK ******
##=================  Part I. Preprocess  ============================
#========  1. map and dedupping =====================================
Message(startMessage,email)
#========  (0) enter the directory ========================
bwa_path = bwaIndex[:bwaIndex.rfind('/')]
if not os.path.exists(bwa_path): os.mkdir(bwa_path)
if os.listdir(bwa_path) == []:
    bwa_Db(bwa_path,ref_fa)
os.chdir(file_path)
#========  (1) read files  ================================
fastqFiles = list_files(file_path)
if trim == 'True':
    trim_fastqFiles = Trimmomatic(trimmomatic,fastqFiles,phred,trimmoAdapter,batch=6) 
    remove(fastqFiles)
else:
    trim_fastqFiles = fastqFiles
print 'list file succeed'
print 'fastqFiles is: ',trim_fastqFiles
#========  (2) define group ===============================
#defined above
#========  (3) align using bwa ============================
try:
    map_sam = bwa_vari(read_group,trim_fastqFiles,bwaIndex,thread)
    print 'align succeed'
    print 'map_sam is: ',map_sam
except:
    print 'align failed'
    Message('align failed',email)
    raise
Beispiel #2
0
Message(startMessage, email)
#========  (0) enter the directory ========================
bwa_path = bwaIndex[:bwaIndex.rfind('/')]
if not os.path.exists(bwa_path): os.mkdir(bwa_path)
if os.listdir(bwa_path) == []:
    bwa_Db(bwa_path, ref_fa)
os.chdir(file_path)
#========  (1) read files  ================================
fastqFiles = list_files(file_path)
if trim == 'True':
    trim_fastqFiles = Trimmomatic(trimmomatic,
                                  fastqFiles,
                                  phred,
                                  trimmoAdapter,
                                  batch=6)
    remove(fastqFiles)
else:
    trim_fastqFiles = fastqFiles
print 'list file succeed'
print 'fastqFiles is: ', trim_fastqFiles
#========  (2) define group ===============================
#defined above
#========  (3) align using bwa ============================
try:
    map_sam = bwa_vari(read_group, trim_fastqFiles, bwaIndex, thread)
    print 'align succeed'
    print 'map_sam is: ', map_sam
except:
    print 'align failed'
    Message('align failed', email)
    raise
try:
    group_bams = addReadGroup(picard, sort_bams, read_group)
    sys.stdout.write("add group succeed\n")
    sys.stdout.write("group_bams is: {group}\n".format(group=group_bams))
except:
    sys.stdout.write("add group failed\n")
    Message("add group failed", email)
    sys.exit(1)

# ========  (2) mark duplicates ============================
try:
    dedup_bams = markduplicates(picard, group_bams)
    sys.stdout.write("mark duplicate succeed\n")
    sys.stdout.write("dedup_bams is: {dedup}\n".format(dedup=dedup_bams))
    remove(group_bams)
except:
    sys.stdout.write("mark duplicate failed\n")
    Message("mark duplicate failed", email)
    sys.exit(1)
# ========  3. Split 'N' Trim and reassign mapping qualiteies
try:
    split_bams = splitN(gatk, dedup_bams, ref_fa)
    sys.stdout.write("split N succeed\n")
    sys.stdout.write("split N is: {N}\n".format(N=split_bams))
    remove(dedup_bams)
except:
    sys.stdout.write("split N failed\n")
    Message("split N failed", email)
    sys.exit(1)
# ========  4. Indel realignment ===========================
Beispiel #4
0
    raise
#========  (3) sam to bam and sort  ================================
try:
    sorted_bams = sam2bam_sort(map_files,thread)  # [file.sort.bam]
    print 'host sorted succeed'
    print 'sorted_bam is: ',sorted_bams
except:
    print 'host sorted failed'
    Message('host sorted failed',email)
    raise
#========  (4) extract reads that unmapped to host  =====
try:
    unmap2host_bams = extract_bam(sorted_bams,'unmap',seqType,thread) # [file.sort.unmap.bam]
    print 'extract unmap2host_bams succeed'
    print 'unmap2host_bams is: ',unmap2host_bams
    remove(sorted_bams)
    # rename files
    for f in unmap2host_bams: os.rename(f,f[:-4]+'2host.bam')
    unmap2host_bams = [f[:-4]+'2host.bam' for f in unmap2host_bams] # [file.sort.unmap2host.bam]
except:
    print 'extract unmap2host_bams failed'
    Message('extract unmap2host_bams failed',email)
    raise
#========  (6) unmap2host_bams to fastq.gz =========================
try:
    unmap2host_fq_gzs = sam2fastq(picard,unmap2host_bams,seqType) # [[file.sort.unmap2host.fq.gz]]
    # compress to gz file
    #for fq in unmap2host_fqs: ('gzip {fq}').format(fq=fq)
    #unmap2host_fq_gzs = [f+'.gz' for f in unmap2host_fqs]  # file.sort.unmap2host.fq.gz
    print 'unmap2host_fq_gzs succeed'
    print 'unmap2host_fq_gzs is: ',unmap2host_fq_gzs
Beispiel #5
0
except:
    print 'virus sorted failed'
    Message('virus sorted failed',email)
    raise
#========  (3) extract reads that mapped and unmapped to virus  ============================
try:
    map2virus_bams = extract_bam(sorted_bams,'map',seqType,thread)     # [file.sort.unmap2host.sort.map.bam]
    unmap2virus_bams = extract_bam(sorted_bams,'unmap',seqType,thread) # [file.sort.unmap2host.sort.unmap.bam]
    # rename files
    for f in map2virus_bams: os.rename(f,f[:-29]+'.only2virus.bam')
    map2virus_bams = [f[:-29]+'.only2virus.bam' for f in map2virus_bams]     # [file.only2virus.bam]
    for f in unmap2virus_bams: os.rename(f,f[:-31]+'.map2neither.bam')        # [file.map2neither.bam]
    unmap2virus_bams = [f[:-31]+'.map2neither.bam' for f in unmap2virus_bams]
    print 'extract map and unmap2virus_bams succeed'
    print 'map2virus_bams is: ',map2virus_bams,'unmap2virus_bams is: ',unmap2virus_bams
    remove(sorted_bams)
except:
    print 'extract map and unmap2virus_bams failed'
    Message('extract map and unmap2virus_bams failed',email)
    raise
#========  (4) transfer the mapped and unmapped to virus bam to fastq  =======
try:
    map2virus_fq_gzs = sam2fastq(picard,map2virus_bams,seqType)     # [[file.only2virus.fq.gz]]
    unmap2virus_fq_gzs = sam2fastq(picard,unmap2virus_bams,seqType) # [[file.map2neither.fq.gz]]
    print 'transfer from bam to fq succeed'
    print 'map2virus_fq_gzs is: ',map2virus_fq_gzs,'unmap2virus_fq_gzs',unmap2virus_fq_gzs
    remove(map2virus_bams);remove(unmap2virus_bams)
except:
    print 'transfer from bam to fq failed'
    Message('transfer from bam to fq failed',email)
    raise
Beispiel #6
0
file_path = param['filePath']
starDb = param['alignerDb']
trim = param['trim']
phred = param['phred']

picard = param['picard']
trimmomatic = param['trimmomatic']
trimmoAdapter = param['trimmoAdapter']
gatk = param['gatk']
read_group = param['readGroup']
organism = param['organism']

##*****************  Part 0. Build index file for bwa and GATK ******
##*****************  Part I. Preprocess  ============================
#========  1. map and dedupping =====================================
#========  (0) enter the directory ========================
os.chdir(file_path)
Message(startMessage, email)
#========  (1) read files  ================================
fastqFiles = list_files(file_path)
if trim == 'True':
    trim_fastqFiles = Trimmomatic(trimmomatic,
                                  fastqFiles,
                                  phred,
                                  trimmoAdapter,
                                  batch=6)
    remove(fastqFiles)
else:
    trim_fastqFiles = fastqFiles
sys.stdout.write('list file succeed\n')
sys.stdout.write('fastqFiles is: {fq}\n'.format(fq=trim_fastqFiles))
Beispiel #7
0
    raise
#========  (4) Convert sam to sorted bam ==================
try:
    sort_bams = sam2bam_sort(map_sam,thread)
    print 'sort bam files succeed'
    print 'sort_bams is: ',sort_bams
except:
    print 'sort bam files failed'
    Message('sort bam files failed',email)
    raise
#========  (5) Markduplicates using picard ================
try:
    dedup_files = markduplicates(picard,sort_bams)
    print 'mark duplicates succeed'
    print 'dedup_files is: ',dedup_files
    remove(sort_bams)
except:
    print 'mark duplicates failed'
    Message('mark duplicates failed',email)
    raise
#========  2. Indel realignment  ====================================
#========  (6) Create a target list of intervals===========
try:
    interval = RealignerTargetCreator(gatk,dedup_files,ref_fa,thread,phaseINDEL,gold_indel)
    print 'RealignerTarget Creator succeed'
    print 'interval is: ',interval
except:
    print 'RealignerTarget Creator failed'
    Message('RealignerTarget Creator failed',email)
    raise
#========  (7) realignment of target intervals ============
Beispiel #8
0
try:
    group_bams = addReadGroup(picard, sort_bams, read_group)
    sys.stdout.write('add group succeed\n')
    sys.stdout.write('group_bams is: {group}\n'.format(group=group_bams))
except:
    sys.stdout.write('add group failed\n')
    Message('add group failed', email)
    raise

#========  (2) mark duplicates ============================
try:
    dedup_bams = markduplicates(picard, group_bams)
    sys.stdout.write('mark duplicate succeed\n')
    sys.stdout.write('dedup_bams is: {dedup}\n'.format(dedup=dedup_bams))
    remove(group_bams)
except:
    sys.stdout.write('mark duplicate failed\n')
    Message('mark duplicate failed', email)
    raise
#========  3. Split 'N' Trim and reassign mapping qualiteies
try:
    split_bams = splitN(gatk, dedup_bams, ref_fa)
    sys.stdout.write('split N succeed\n')
    sys.stdout.write('split N is: {N}\n'.format(N=split_bams))
    remove(dedup_bams)
except:
    sys.stdout.write('split N failed\n')
    Message('split N failed', email)
    raise
#========  4. Indel realignment ===========================
Beispiel #9
0
    Message('host sorted failed',email)
    raise
#========  (4) get htseq Count to host  ============================
try:
    htseq_count(sorted_bams,host_annotation,host_htseqFolder,host_AnnotationSource)
    print 'host htseqCount succeed'
except:
    print 'host htseq count failed'
    Message('host htseq count failed',email)
    raise
#========  (5) extract unmapped reads  =============================
try:
    unmap2host_bams = extract_bam(sorted_bams,'unmap',seqType,thread)  # [file.sort.unmap.bam]
    print 'extract unmap2host_bams succeed'
    print 'unmap2host_bams is: ',unmap2host_bams
    remove(sorted_bams)
    # rename files
    for f in unmap2host_bams: os.rename(f,f[:-4]+'2host.bam')
    unmap2host_bams = [f[:-4]+'2host.bam' for f in unmap2host_bams]    # [file.sort.unmap2host.bam]
except:
    print 'extract unmap2host_bams failed'
    Message('extract unmap2host_bams failed',email)
    raise
#========  (6) unmap2host_bams to fastq ============================
try:
    unmap2host_fqs = sam2fastq(picard,unmap2host_bams,seqType)    # [[file.sort.unmap2host.fq.gz]]
    print 'unmap2host_fq succeed'
    print 'unmap2host_fqs is: ',unmap2host_fqs
    remove(unmap2host_bams)
except:
    print 'unmap2host_fq failed'
Beispiel #10
0
Dict = param['symbolIDFile']
inputpath = file_path

#=========== (0) enter the directory ================
os.chdir(file_path)
Message(startMessage,email)
#=========== (1) reads files and trim ===============

fastqFiles = list_files(file_path)
print 'list file succeed'
if trim == 'True':
    try:
        trim_fastqFiles = Trimmomatic(trimmomatic,fastqFiles,phred,trimmoAdapter,batch=6)
        print 'trim succeed'
        print 'fastqFiles is: ',fastqFiles
        remove(fastqFiles)
    except:
        print 'trim failed'
        Message('trim failed',email)
        raise
else:
    trim_fastqFiles = fastqFiles
#=========== (2) run STAR to do the mapping ========
try:
    if aligner == 'gsnap':
        map_files = gsnap(trim_fastqFiles,db_path, db_name,gsnap_annotation,thread)
    elif aligner == 'STAR':
        if not os.path.exists(db_path): os.mkdir(db_path)
        if os.listdir(db_path) == []:
            STAR_Db(db_path,ref_fa,thread)
        map_files = STAR(trim_fastqFiles,db_path,thread,annotation,['--outSAMtype BAM SortedByCoordinate','--quantMode GeneCounts'])