def post_aln_qc(args, bam_file, logger=None): """ perform post alignment quality check """ post_aln_dir = os.path.join(args.workDir, 'post_alignment_qc') if not os.path.isdir(post_aln_dir): os.mkdir(post_aln_dir) #Fix mate information for bam file exit_code, fix_mate_out = post_alignment_qc.fix_mate_information(args.picard, bam_file, args.id, args.workDir, logger) if exit_code == 0: os.remove(bam_file) assert(not os.path.isfile(bam_file)) os.rename(fix_mate_out, bam_file) assert(os.path.isfile(bam_file)) #validate the post-alignment BAM file post_alignment_qc.validate_bam_file(args.picard, bam_file, args.id, post_aln_dir, logger) #collect RNA-seq metrics post_alignment_qc.collect_rna_seq_metrics(args.picard, bam_file, args.id, post_aln_dir, args.ref_flat, logger) #run rna_seq_qc from broad institute post_alignment_qc.bam_index(bam_file, args.id, logger) rna_seq_qc_dir = os.path.join(post_aln_dir, 'rna_seq_qc') if not os.path.isdir(rna_seq_qc_dir): os.mkdir(rna_seq_qc_dir) exit_code = post_alignment_qc.rna_seq_qc(args.rna_seq_qc_path, bam_file, args.id, rna_seq_qc_dir, args.ref_genome,args.rna_seq_qc_annotation, logger) if not(exit_code == 0): reordered_bam = post_alignment_qc.reorder_bam(args.picard, bam_file, args.id, args.workDir, args.ref_genome, logger) post_alignment_qc.bam_index(reordered_bam, args.id, logger) post_alignment_qc.rna_seq_qc(args.rna_seq_qc_path, reordered_bam, args.id, rna_seq_qc_dir, args.ref_genome,args.rna_seq_qc_annotation, logger) if os.path.isfile(reordered_bam): os.remove(reordered_bam) if os.path.isfile('%s.bai' %reordered_bam): os.remove('%s.bai' %reordered_bam)
def post_aln_qc(args, bam_file, logger=None): """ perform post alignment quality check """ #validate the post-alignment BAM file post_alignment_qc.validate_bam_file(args.picard, bam_file, args.id, args.workDir, logger) #collect RNA-seq metrics post_alignment_qc.collect_rna_seq_metrics(args.picard, bam_file, args.id, args.workDir, args.ref_flat, logger) #run rna_seq_qc from broad institute post_alignment_qc.bam_index(bam_file, args.id, logger) exit_code = post_alignment_qc.rna_seq_qc(args.rna_seq_qc_path, bam_file, args.id, args.workDir, args.ref_genome,args.rna_seq_qc_annotation, logger) if not(exit_code == 0): reordered_bam = post_alignment_qc.reorder_bam(args.picard, bam_file, args.id, args.workDir, args.ref_genome, logger) post_alignment_qc.bam_index(reordered_bam, args.id, logger) post_alignment_qc.rna_seq_qc(args.rna_seq_qc_path, reordered_bam, args.id, args.workDir, args.ref_genome,args.rna_seq_qc_annotation, logger)
def run_pipeline(args, workdir, analysis_id, fastq_dir, logger): """ align datasets using STAR and compute expression using cufflinks """ tar_file_in = args.input_file qc_dir = os.path.join(workdir, 'qc') if not os.path.isdir(qc_dir): os.mkdir(qc_dir) decompress(tar_file_in, fastq_dir) for fname in os.listdir(fastq_dir): if fname.endswith("_1.fastq.gz") or fname.endswith("_1.fastq"): reads_1 = os.path.join(fastq_dir, fname) if fname.endswith("_2.fastq.gz") or fname.endswith("_2.fastq"): reads_2 = os.path.join(fastq_dir, fname) qc.fastqc(args.fastqc_path, reads_1, reads_2, qc_dir, analysis_id, logger) star_output_dir = os.path.join(workdir, 'star_2_pass') if os.path.isdir(star_output_dir): pipelineUtil.remove_dir(star_output_dir) os.mkdir(star_output_dir) bam = "%s_star.bam" %os.path.join(star_output_dir, analysis_id) if not os.path.isfile(bam): star_cmd = ['time', '/usr/bin/time', 'python', args.star_pipeline, '--genomeDir', args.genome_dir, '--runThreadN', args.p, '--tarFileIn', tar_file_in, '--workDir', workdir, '--out', bam, '--genomeFastaFile', args.genome_fasta_file, '--sjdbGTFfile', args.gtf ] if args.quantMode != "": star_cmd.append('--quantMode') star_cmd.append(args.quantMode) pipelineUtil.log_function_time("STAR", analysis_id, star_cmd, logger) exit_code = 1 #Fix mate information for BAM exit_code, fix_mate_out = post_alignment_qc.fix_mate_information(args.picard, bam, analysis_id, workdir, logger) if exit_code == 0: os.remove(bam) assert(not os.path.isfile(bam)) os.rename(fix_mate_out, bam) assert(os.path.isfile(bam)) #validate the post alignment BAM file post_alignment_qc.validate_bam_file(args.picard, bam, analysis_id, qc_dir, logger) #collect RNA-seq metrics post_alignment_qc.collect_rna_seq_metrics(args.picard, bam, analysis_id, qc_dir, args.ref_flat, logger) #quantify using cufflinks cufflinks_cmd = ['time', '/usr/bin/time', 'python', args.cufflinks_pipeline, '--bam', bam, '--gtf', args.gtf, '--analysis_id', analysis_id, '--out', star_output_dir, '--p', args.p, '--multi_read_correct', 'True' ] pipelineUtil.log_function_time("CUFFLINKS", analysis_id, cufflinks_cmd, logger)