Example #1
0
def post_aln_qc(args, bam_file, logger=None):
    """ perform post alignment quality check """

    post_aln_dir = os.path.join(args.workDir, 'post_alignment_qc')
    if not os.path.isdir(post_aln_dir):
        os.mkdir(post_aln_dir)
    #Fix mate information for bam file
    exit_code, fix_mate_out = post_alignment_qc.fix_mate_information(args.picard, bam_file,
                                                                    args.id, args.workDir, logger)
    if exit_code == 0:
        os.remove(bam_file)
        assert(not os.path.isfile(bam_file))
        os.rename(fix_mate_out, bam_file)
        assert(os.path.isfile(bam_file))

    #validate the post-alignment BAM file
    post_alignment_qc.validate_bam_file(args.picard, bam_file, args.id, post_aln_dir, logger)

    #collect RNA-seq metrics
    post_alignment_qc.collect_rna_seq_metrics(args.picard, bam_file, args.id,
                                              post_aln_dir, args.ref_flat, logger)
    #run rna_seq_qc from broad institute

    post_alignment_qc.bam_index(bam_file, args.id, logger)
    rna_seq_qc_dir = os.path.join(post_aln_dir, 'rna_seq_qc')
    if not os.path.isdir(rna_seq_qc_dir):
        os.mkdir(rna_seq_qc_dir)

    exit_code = post_alignment_qc.rna_seq_qc(args.rna_seq_qc_path, bam_file, args.id, rna_seq_qc_dir,
                                args.ref_genome,args.rna_seq_qc_annotation, logger)

    if not(exit_code == 0):

        reordered_bam = post_alignment_qc.reorder_bam(args.picard, bam_file, args.id, args.workDir,
                                                args.ref_genome, logger)
        post_alignment_qc.bam_index(reordered_bam, args.id, logger)
        post_alignment_qc.rna_seq_qc(args.rna_seq_qc_path, reordered_bam, args.id, rna_seq_qc_dir,
                                args.ref_genome,args.rna_seq_qc_annotation, logger)

        if os.path.isfile(reordered_bam):
            os.remove(reordered_bam)
        if os.path.isfile('%s.bai' %reordered_bam):
            os.remove('%s.bai' %reordered_bam)
Example #2
0
def post_aln_qc(args, bam_file, logger=None):
    """ perform post alignment quality check """

    #validate the post-alignment BAM file
    post_alignment_qc.validate_bam_file(args.picard, bam_file, args.id, args.workDir, logger)

    #collect RNA-seq metrics
    post_alignment_qc.collect_rna_seq_metrics(args.picard, bam_file, args.id,
                                              args.workDir, args.ref_flat, logger)
    #run rna_seq_qc from broad institute
    post_alignment_qc.bam_index(bam_file, args.id, logger)

    exit_code = post_alignment_qc.rna_seq_qc(args.rna_seq_qc_path, bam_file, args.id, args.workDir,
                                args.ref_genome,args.rna_seq_qc_annotation, logger)

    if not(exit_code == 0):

        reordered_bam = post_alignment_qc.reorder_bam(args.picard, bam_file, args.id, args.workDir,
                                                args.ref_genome, logger)
        post_alignment_qc.bam_index(reordered_bam, args.id, logger)
        post_alignment_qc.rna_seq_qc(args.rna_seq_qc_path, reordered_bam, args.id, args.workDir,
                                args.ref_genome,args.rna_seq_qc_annotation, logger)
Example #3
0
def run_pipeline(args, workdir, analysis_id, fastq_dir, logger):
    """ align datasets using STAR and compute expression using cufflinks """

    tar_file_in = args.input_file

    qc_dir = os.path.join(workdir, 'qc')
    if not os.path.isdir(qc_dir):
        os.mkdir(qc_dir)

    decompress(tar_file_in, fastq_dir)
    for fname in os.listdir(fastq_dir):
        if fname.endswith("_1.fastq.gz") or fname.endswith("_1.fastq"):
            reads_1 = os.path.join(fastq_dir, fname)
        if fname.endswith("_2.fastq.gz") or fname.endswith("_2.fastq"):
            reads_2 = os.path.join(fastq_dir, fname)
    qc.fastqc(args.fastqc_path, reads_1, reads_2, qc_dir, analysis_id, logger)

    star_output_dir = os.path.join(workdir, 'star_2_pass')
    if os.path.isdir(star_output_dir):
        pipelineUtil.remove_dir(star_output_dir)
    os.mkdir(star_output_dir)
    bam = "%s_star.bam" %os.path.join(star_output_dir, analysis_id)

    if not os.path.isfile(bam):
        star_cmd = ['time', '/usr/bin/time', 'python', args.star_pipeline,
                    '--genomeDir', args.genome_dir,
                    '--runThreadN', args.p,
                    '--tarFileIn', tar_file_in,
                    '--workDir', workdir,
                    '--out', bam,
                    '--genomeFastaFile', args.genome_fasta_file,
                    '--sjdbGTFfile', args.gtf
                   ]
        if args.quantMode != "":
            star_cmd.append('--quantMode')
            star_cmd.append(args.quantMode)

    pipelineUtil.log_function_time("STAR", analysis_id, star_cmd, logger)

    exit_code = 1
    #Fix mate information for BAM
    exit_code, fix_mate_out = post_alignment_qc.fix_mate_information(args.picard, bam,
                                                                    analysis_id, workdir, logger)
    if exit_code == 0:
        os.remove(bam)
        assert(not os.path.isfile(bam))
        os.rename(fix_mate_out, bam)
        assert(os.path.isfile(bam))

    #validate the post alignment BAM file
    post_alignment_qc.validate_bam_file(args.picard, bam, analysis_id, qc_dir, logger)

    #collect RNA-seq metrics
    post_alignment_qc.collect_rna_seq_metrics(args.picard, bam, analysis_id,
                                                qc_dir, args.ref_flat, logger)

    #quantify using cufflinks
    cufflinks_cmd = ['time', '/usr/bin/time', 'python', args.cufflinks_pipeline,
                     '--bam', bam,
                     '--gtf', args.gtf,
                     '--analysis_id', analysis_id,
                     '--out', star_output_dir,
                     '--p', args.p,
                     '--multi_read_correct', 'True'
                    ]

    pipelineUtil.log_function_time("CUFFLINKS", analysis_id, cufflinks_cmd, logger)