예제 #1
0
print 'list file succeed'
print 'fastqFiles is: ',trim_fastqFiles
#========  (2) define group ===============================
#defined above
#========  (3) align using bwa ============================
try:
    map_sam = bwa_vari(read_group,trim_fastqFiles,bwaIndex,thread)
    print 'align succeed'
    print 'map_sam is: ',map_sam
except:
    print 'align failed'
    Message('align failed',email)
    raise
#========  (4) Convert sam to sorted bam ==================
try:
    sort_bams = sam2bam_sort(map_sam,thread)
    print 'sort bam files succeed'
    print 'sort_bams is: ',sort_bams
except:
    print 'sort bam files failed'
    Message('sort bam files failed',email)
    raise
#========  (5) Markduplicates using picard ================
try:
    dedup_files = markduplicates(picard,sort_bams)
    print 'mark duplicates succeed'
    print 'dedup_files is: ',dedup_files
    remove(sort_bams)
except:
    print 'mark duplicates failed'
    Message('mark duplicates failed',email)
예제 #2
0
        map_files = gsnap(unmap2host_fq_gzs,virus_alignerDb, virus_gsnapDbName,virus_gsnapAnnotation,thread)
    else:
        map_files = STAR(unmap2host_fq_gzs,virus_alignerDb,thread,'',['--outSAMunmapped Within'])  # [file.sam] 
        new_map_files = [f[:-3]+'sort.unmap2host.sam' for f in map_files]
        for f1,f2 in zip(map_files,new_map_files):
            os.rename(f1,f2)                                        # [file.sort.unmap2host.sam]
    print 'virus align succeed'
    print 'map_files is: ',new_map_files
#     remove(unmap2host_fq_gzs)
except:
    print 'virus align failed'
    Message('virus align failed',email)
    raise
#========  (2) sam to bam and sort  ================================
try:
    sorted_bams = sam2bam_sort(new_map_files,thread)    # [file.sort.bam] [file.sort.unmap2host.sort.bam]
    print 'virus sorted succeed'
    print 'sorted_bams is: ',sorted_bams
except:
    print 'virus sorted failed'
    Message('virus sorted failed',email)
    raise
#========  (3) extract reads that mapped and unmapped to virus  ============================
try:
    map2virus_bams = extract_bam(sorted_bams,'map',seqType,thread)     # [file.sort.unmap2host.sort.map.bam]
    unmap2virus_bams = extract_bam(sorted_bams,'unmap',seqType,thread) # [file.sort.unmap2host.sort.unmap.bam]
    # rename files
    for f in map2virus_bams: os.rename(f,f[:-29]+'.only2virus.bam')
    map2virus_bams = [f[:-29]+'.only2virus.bam' for f in map2virus_bams]     # [file.only2virus.bam]
    for f in unmap2virus_bams: os.rename(f,f[:-31]+'.map2neither.bam')        # [file.map2neither.bam]
    unmap2virus_bams = [f[:-31]+'.map2neither.bam' for f in unmap2virus_bams]
예제 #3
0
print 'list file succeed'
print 'fastqFiles is: ', trim_fastqFiles
#========  (2) define group ===============================
#defined above
#========  (3) align using bwa ============================
try:
    map_sam = bwa_vari(read_group, trim_fastqFiles, bwaIndex, thread)
    print 'align succeed'
    print 'map_sam is: ', map_sam
except:
    print 'align failed'
    Message('align failed', email)
    raise
#========  (4) Convert sam to sorted bam ==================
try:
    sort_bams = sam2bam_sort(map_sam, thread)
    print 'sort bam files succeed'
    print 'sort_bams is: ', sort_bams
except:
    print 'sort bam files failed'
    Message('sort bam files failed', email)
    raise
#========  (5) Markduplicates using picard ================
try:
    dedup_files = markduplicates(picard, sort_bams)
    print 'mark duplicates succeed'
    print 'dedup_files is: ', dedup_files
    remove(sort_bams)
except:
    print 'mark duplicates failed'
    Message('mark duplicates failed', email)
예제 #4
0
output_path = param['htseqOutPath']
db_name = param['gsnapDbName']
gsnap_annotation = param['gsnapAnnotation']

Dict = param['symbolIDFile']
inputpath = file_path

#=========== (0) enter the directory ================
Message(startMessage, email)
os.chdir(file_path)
#=========== (1) reads files and trim ===============
fastqFiles = list_files(file_path)
if trim == 'True':
    fastqFiles = Trimmomatic(trimmomatic, fastqFiles, phred, trimmoAdapter)
print 'list file succeed'
#=========== (2) run gsnap to do the mapping ========
if aligner == 'gsnap':
    map_files = gsnap(fastqFiles, db_path, db_name, gsnap_annotation, thread)
else:
    map_files = STAR(fastqFiles, db_path, thread)
print 'align succeed'
#=========== (3) samtools to sort the file ==========
sorted_bam = sam2bam_sort(map_files, thread)
print 'sorted succeed'
#=========== (4) htseq_count ========================
htseq_count(sorted_bam, annotation, file_path)
print 'htseq count succeed'
#=========== (5) htseq symbol to id =================
ID_Convert(Dict, output_path, inputpath)
print 'id convert succeed'
Message(endMessage, email)
예제 #5
0
    if aligner == 'gsnap':
        # check index
        if os.listdir(alignerDb) == []:
            gsnap_Db(ref_fa,alignerDb,gsnapDbName,gsnapAnnotation)
        map_files = gsnap(fastqFiles,alignerDb,gsnapDbName,gsnapAnnotation,thread) # [file.sam]
    else:
        map_files = STAR(fastqFiles,alignerDb,thread)
    print 'align succeed'
    print 'map_files is: ',map_files
except:
    print 'align failed'
    Message('host align failed',email)
    raise
#========  (3) sam to bam and sort  ================================
try:
    sorted_bams = sam2bam_sort(map_files,thread)  # [file.sort.bam]
    print 'bam sorted succeed'
    print 'sorted_bam is: ',sorted_bams
except:
    print 'bam sorted failed'
    Message('bam sorted failed',email)
    raise
"""
#========  (2) use gsnap map to bacteria ========
map_files = gsnap(fastqFiles,microDb_path,microDb_name,'',thread)
print 'mapping succeed'
#========  (3) sam2Bam and sort bam =============
sorted_bam = sam2bam_sort(map_files)
print 'sortting succeed'
#========  (4) extract mapped reads =============
mapped_files = extract_mapped(sorted_bam)
예제 #6
0
        # check index
        if os.listdir(alignerDb) == []:
            gsnap_Db(ref_fa, alignerDb, gsnapDbName, gsnapAnnotation)
        map_files = gsnap(fastqFiles, alignerDb, gsnapDbName, gsnapAnnotation,
                          thread)  # [file.sam]
    else:
        map_files = STAR(fastqFiles, alignerDb, thread)
    print 'align succeed'
    print 'map_files is: ', map_files
except:
    print 'align failed'
    Message('host align failed', email)
    raise
#========  (3) sam to bam and sort  ================================
try:
    sorted_bams = sam2bam_sort(map_files, thread)  # [file.sort.bam]
    print 'bam sorted succeed'
    print 'sorted_bam is: ', sorted_bams
except:
    print 'bam sorted failed'
    Message('bam sorted failed', email)
    raise
"""
#========  (2) use gsnap map to bacteria ========
map_files = gsnap(fastqFiles,microDb_path,microDb_name,'',thread)
print 'mapping succeed'
#========  (3) sam2Bam and sort bam =============
sorted_bam = sam2bam_sort(map_files)
print 'sortting succeed'
#========  (4) extract mapped reads =============
mapped_files = extract_mapped(sorted_bam)
예제 #7
0
output_path = param['htseqOutPath']
db_name = param['gsnapDbName']
gsnap_annotation = param['gsnapAnnotation']

Dict = param['symbolIDFile']
inputpath = file_path

#=========== (0) enter the directory ================
Message(startMessage,email)
os.chdir(file_path)
#=========== (1) reads files and trim ===============
fastqFiles = list_files(file_path)
if trim == 'True':
    fastqFiles = Trimmomatic(trimmomatic,fastqFiles,phred,trimmoAdapter)
print 'list file succeed'
#=========== (2) run gsnap to do the mapping ========
if aligner == 'gsnap':
    map_files = gsnap(fastqFiles,db_path, db_name,gsnap_annotation,thread)
else:
    map_files = STAR(fastqFiles,db_path,thread)
print 'align succeed'
#=========== (3) samtools to sort the file ==========
sorted_bam = sam2bam_sort(map_files,thread)
print 'sorted succeed'
#=========== (4) htseq_count ========================
htseq_count(sorted_bam,annotation,file_path)
print 'htseq count succeed'
#=========== (5) htseq symbol to id =================
ID_Convert(Dict,output_path,inputpath)
print 'id convert succeed'
Message(endMessage,email)
예제 #8
0
    elif aligner == 'STAR':
        if not os.path.exists(db_path): os.mkdir(db_path)
        if os.listdir(db_path) == []:
            STAR_Db(db_path,ref_fa,thread)
        map_files = STAR(trim_fastqFiles,db_path,thread,annotation,['--outSAMtype BAM SortedByCoordinate','--quantMode GeneCounts'])
    elif aligner == 'bowtie':
        map_files = bowtie(trim_fastqFiles,db_path,thread=1,otherParameters=[''])
    print 'align succeed'
    print 'map_files is: ',map_files
except:
    print 'align failed'
    Message('align failed',email)
    raise
#=========== (3) samtools to sort the file ==========
try:
    sorted_bams = sam2bam_sort(map_files,thread,'name')
    print 'sorted succeed'
    print 'sorted_bam is: ',sorted_bams
except:
    print 'sorted failed'
    Message('sorted failed',email)
    raise
#=========== (4) get mapping stats ==================
try:
    flagstat(sorted_bams)
    print 'flagstat succeed'
except:
    print 'flagstat failed'
    Message('flagstat failed',email)
    raise
#=========== (4) htseq_count ========================