Example #1
0
def fastqc_validate(uuid,bam_path,thread_count,engine,logger):
    uuid_dir=os.path.dirname(bam_path)
    fastq_dir=os.path.join(uuid_dir,'fastq')
    fastqlist=fastq_util.buildfastqlist(fastq_dir)
    logging.info('fastqlist=%s' % fastqlist)
    pefastqdict=fastq_util.buildpefastqdict(fastqlist)
    logger.info('pefastqdict=%s' % pefastqdict)
    sefastqlist=fastq_util.buildsefastqlist(fastqlist)
    logger.info('sefastqlist=%s' % sefastqlist)
    o1fastqlist=fastq_util.buildo1fastqlist(fastqlist)
    logger.info('o1fastqlist=%s' % o1fastqlist)
    o2fastqlist=fastq_util.buildo2fastqlist(fastqlist)
    logger.info('o2fastqlist=%s' % o2fastqlist)
    for read1 in sorted(pefastqdict.keys()):
        #read1
        fq_path=os.path.join(fastq_dir,read1)
        do_fastqc(uuid,fq_path,thread_count,engine,logger)
        fastqc_to_db(uuid,fq_path,engine,logger)
        do_guess_encoding(uuid,fq_path,engine,logger)
        guess_enc_db(uuid,fq_path,engine,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,read1,engine,logger) # removable
        #read2
        fq_path=os.path.join(fastq_dir,pefastqdict[read1])
        do_fastqc(uuid,fq_path,thread_count,engine,logger)
        fastqc_to_db(uuid,fq_path,engine,logger)
        do_guess_encoding(uuid,fq_path,engine,logger)
        guess_enc_db(uuid,fq_path,engine,logger)
    for seread in sefastqlist:
        fq_path=os.path.join(fastq_dir,seread)
        do_fastqc(uuid,fq_path,thread_count,engine,logger)
        fastqc_to_db(uuid,fq_path,engine,logger)
        do_guess_encoding(uuid,fq_path,engine,logger)
        guess_enc_db(uuid,fq_path,engine,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable
    for o1read in o1fastqlist:
        fq_path=os.path.join(fastq_dir,o1read)
        do_fastqc(uuid,fq_path,thread_count,engine,logger)
        fastqc_to_db(uuid,fq_path,engine,logger)
        do_guess_encoding(uuid,fq_path,engine,logger)
        guess_enc_db(uuid,fq_path,engine,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable
    for o2read in o2fastqlist:
        fq_path=os.path.join(fastq_dir,o2read)
        do_fastqc(uuid,fq_path,thread_count,engine,logger)
        fastqc_to_db(uuid,fq_path,engine,logger)
        do_guess_encoding(uuid,fq_path,engine,logger)
        guess_enc_db(uuid,fq_path,engine,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable
    return fastq_length
Example #2
0
def bwa(uuid,bam_path,reference_fasta_path,readgroup_path_dict,thread_count,engine,logger):
    uuid_dir=os.path.dirname(bam_path)
    logger.info('uuid_dir=%s' % uuid_dir)
    fastq_dir=os.path.join(uuid_dir,'fastq')
    logger.info('fastq_dir=%s' % fastq_dir)
    realn_dir=os.path.join(uuid_dir,'realn')
    logger.info('realn_dir=%s' % realn_dir)
    os.makedirs(realn_dir,exist_ok=True)
    fastqlist=fastq_util.buildfastqlist(fastq_dir)
    logging.info('fastqlist=%s' % fastqlist)
    pefastqdict=fastq_util.buildpefastqdict(fastqlist)
    logger.info('pefastqdict=%s' % pefastqdict)
    sefastqlist=fastq_util.buildsefastqlist(fastqlist)
    logger.info('sefastqlist=%s' % sefastqlist)
    o1fastqlist=fastq_util.buildo1fastqlist(fastqlist)
    logger.info('o1fastqlist=%s' % o1fastqlist)
    o2fastqlist=fastq_util.buildo2fastqlist(fastqlist)
    logger.info('o2fastqlist=%s' % o2fastqlist)
    bam_path_list=list()
    for read1 in sorted(pefastqdict.keys()):
        rg_str=bam_util.get_readgroup_str(read1,readgroup_path_dict,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,read1,engine,logger)
        if fastq_length<MEM_ALN_CUTOFF:
            bam_path=bwa_aln_paired(uuid,bam_path,fastq_dir,read1,pefastqdict[read1],realn_dir,reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
        else:
            bam_path=bwa_mem_paired(uuid,bam_path,fastq_dir,read1,pefastqdict[read1],realn_dir,reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
    for seread in sefastqlist:
        rg_str=bam_util.get_readgroup_str(seread,readgroup_path_dict,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger)
        if fastq_length<MEM_ALN_CUTOFF:
            bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,seread,realn_dir,'s',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
        else:
            bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,seread,realn_dir,'s',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
    for o1read in o1fastqlist:
        rg_str=bam_util.get_readgroup_str(o1read,readgroup_path_dict,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,o1read,engine,logger)
        if fastq_length<MEM_ALN_CUTOFF:
            bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,o1read,realn_dir,'o1',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
        else:
            bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,o1read,realn_dir,'o1',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
    for o2read in o2fastqlist:
        rg_str=bam_util.get_readgroup_str(o2read,readgroup_path_dict,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,o2read,engine,logger)
        if fastq_length<MEM_ALN_CUTOFF:
            bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,o2read,realn_dir,'o2',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
        else:
            bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,o2read,realn_dir,'o2',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
    return bam_path_list