Example #1
0
def bwa_aln(bam_path,reference_fasta_path,logger):
    uuid_dir=os.path.dirname(bam_path)
    fastq_dir=os.path.join(uuid_dir,'fastq')
    realn_dir=os.path.join(uuid_dir,'realn')
    os.makedirs(realn_dir,exist_ok=True)
    fastqlist=fastq_util.buildfastqlist(fastq_dir)
    pefastqdict=fastq_util.buildpefastqdict(fastqlist)
    logger.info('pefastqdict=%s' % pefastqdict)
    sefastqlist=fastq_util.buildsefastqlist(fastqlist)
    logger.info('sefastqlist=%s' % sefastqlist)
    o1fastqlist=fastq_util.buildo1fastqlist(fastqlist)
    logger.info('o1fastqlist=%s' % o1fastqlist)
    o2fastqlist=fastq_util.buildo2fastqlist(fastqlist)
    logger.info('o2fastqlist=%s' % o2fastqlist)
    bam_path_list=list()
    for read1 in sorted(pefastqdict.keys()):
        bam_path=bwa_aln_paired(bam_path,fastq_dir,read1,pefastqdict[read1],realn_dir,reference_fasta_path,logger)
        bam_path_list.append(bam_path)
    for seread in sefastqlist:
        bam_path=bwa_aln_single(bam_path,fastq_dir,seread,realn_dir,'s',reference_fasta_path,logger)
        bam_path_list.append(bam_path)
    for o1read in o1fastqlist:
        bam_path=bwa_aln_single(bam_path,fastq_dir,o1read,realn_dir,'o1',reference_fasta_path,logger)
        bam_path_list.append(bam_path)
    for o2read in o2fastqlist:
        bam_path=bwa_aln_single(bam_path,fastq_dir,o2read,realn_dir,'o2',reference_fasta_path,logger)
        bam_path_list.append(bam_path)
    return bam_path_list
Example #2
0
def fastqc_validate(uuid,bam_path,thread_count,engine,logger):
    uuid_dir=os.path.dirname(bam_path)
    fastq_dir=os.path.join(uuid_dir,'fastq')
    fastqlist=fastq_util.buildfastqlist(fastq_dir)
    logging.info('fastqlist=%s' % fastqlist)
    pefastqdict=fastq_util.buildpefastqdict(fastqlist)
    logger.info('pefastqdict=%s' % pefastqdict)
    sefastqlist=fastq_util.buildsefastqlist(fastqlist)
    logger.info('sefastqlist=%s' % sefastqlist)
    o1fastqlist=fastq_util.buildo1fastqlist(fastqlist)
    logger.info('o1fastqlist=%s' % o1fastqlist)
    o2fastqlist=fastq_util.buildo2fastqlist(fastqlist)
    logger.info('o2fastqlist=%s' % o2fastqlist)
    for read1 in sorted(pefastqdict.keys()):
        #read1
        fq_path=os.path.join(fastq_dir,read1)
        do_fastqc(uuid,fq_path,thread_count,engine,logger)
        fastqc_to_db(uuid,fq_path,engine,logger)
        do_guess_encoding(uuid,fq_path,engine,logger)
        guess_enc_db(uuid,fq_path,engine,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,read1,engine,logger) # removable
        #read2
        fq_path=os.path.join(fastq_dir,pefastqdict[read1])
        do_fastqc(uuid,fq_path,thread_count,engine,logger)
        fastqc_to_db(uuid,fq_path,engine,logger)
        do_guess_encoding(uuid,fq_path,engine,logger)
        guess_enc_db(uuid,fq_path,engine,logger)
    for seread in sefastqlist:
        fq_path=os.path.join(fastq_dir,seread)
        do_fastqc(uuid,fq_path,thread_count,engine,logger)
        fastqc_to_db(uuid,fq_path,engine,logger)
        do_guess_encoding(uuid,fq_path,engine,logger)
        guess_enc_db(uuid,fq_path,engine,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable
    for o1read in o1fastqlist:
        fq_path=os.path.join(fastq_dir,o1read)
        do_fastqc(uuid,fq_path,thread_count,engine,logger)
        fastqc_to_db(uuid,fq_path,engine,logger)
        do_guess_encoding(uuid,fq_path,engine,logger)
        guess_enc_db(uuid,fq_path,engine,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable
    for o2read in o2fastqlist:
        fq_path=os.path.join(fastq_dir,o2read)
        do_fastqc(uuid,fq_path,thread_count,engine,logger)
        fastqc_to_db(uuid,fq_path,engine,logger)
        do_guess_encoding(uuid,fq_path,engine,logger)
        guess_enc_db(uuid,fq_path,engine,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable
    return fastq_length
Example #3
0
def bwa_mem(uuid, bam_path, reference_fasta_path, readgroup_path_dict, thread_count, engine, logger):
    uuid_dir = os.path.dirname(bam_path)
    logger.info("uuid_dir=%s" % uuid_dir)
    fastq_dir = os.path.join(uuid_dir, "fastq")
    logger.info("fastq_dir=%s" % fastq_dir)
    realn_dir = os.path.join(uuid_dir, "realn")
    logger.info("realn_dir=%s" % realn_dir)
    os.makedirs(realn_dir, exist_ok=True)
    fastqlist = fastq_util.buildfastqlist(fastq_dir)
    logging.info("fastqlist=%s" % fastqlist)
    pefastqdict = fastq_util.buildpefastqdict(fastqlist)
    logger.info("pefastqdict=%s" % pefastqdict)
    sefastqlist = fastq_util.buildsefastqlist(fastqlist)
    logger.info("sefastqlist=%s" % sefastqlist)
    o1fastqlist = fastq_util.buildo1fastqlist(fastqlist)
    logger.info("o1fastqlist=%s" % o1fastqlist)
    o2fastqlist = fastq_util.buildo2fastqlist(fastqlist)
    logger.info("o2fastqlist=%s" % o2fastqlist)
    bam_path_list = list()
    for read1 in sorted(pefastqdict.keys()):
        rg_str = bam_util.get_readgroup_str(read1, readgroup_path_dict, logger)
        bam_path = bwa_mem_paired(
            uuid,
            bam_path,
            fastq_dir,
            read1,
            pefastqdict[read1],
            realn_dir,
            reference_fasta_path,
            rg_str,
            thread_count,
            engine,
            logger,
        )
        bam_path_list.append(bam_path)
    for seread in sefastqlist:
        rg_str = bam_util.get_readgroup_str(seread, readgroup_path_dict, logger)
        bam_path = bwa_mem_single(
            uuid,
            bam_path,
            fastq_dir,
            seread,
            realn_dir,
            "s",
            reference_fasta_path,
            rg_str,
            thread_count,
            engine,
            logger,
        )
        bam_path_list.append(bam_path)
    for o1read in o1fastqlist:
        rg_str = bam_util.get_readgroup_str(o1read, readgroup_path_dict, logger)
        bam_path = bwa_mem_single(
            uuid,
            bam_path,
            fastq_dir,
            o1read,
            realn_dir,
            "o1",
            reference_fasta_path,
            rg_str,
            thread_count,
            engine,
            logger,
        )
        bam_path_list.append(bam_path)
    for o2read in o2fastqlist:
        rg_str = bam_util.get_readgroup_str(o2read, readgroup_path_dict, logger)
        bam_path = bwa_mem_single(
            uuid,
            bam_path,
            fastq_dir,
            o2read,
            realn_dir,
            "o2",
            reference_fasta_path,
            rg_str,
            thread_count,
            engine,
            logger,
        )
        bam_path_list.append(bam_path)
    return bam_path_list
Example #4
0
def bwa(uuid,bam_path,reference_fasta_path,readgroup_path_dict,thread_count,engine,logger):
    uuid_dir=os.path.dirname(bam_path)
    logger.info('uuid_dir=%s' % uuid_dir)
    fastq_dir=os.path.join(uuid_dir,'fastq')
    logger.info('fastq_dir=%s' % fastq_dir)
    realn_dir=os.path.join(uuid_dir,'realn')
    logger.info('realn_dir=%s' % realn_dir)
    os.makedirs(realn_dir,exist_ok=True)
    fastqlist=fastq_util.buildfastqlist(fastq_dir)
    logging.info('fastqlist=%s' % fastqlist)
    pefastqdict=fastq_util.buildpefastqdict(fastqlist)
    logger.info('pefastqdict=%s' % pefastqdict)
    sefastqlist=fastq_util.buildsefastqlist(fastqlist)
    logger.info('sefastqlist=%s' % sefastqlist)
    o1fastqlist=fastq_util.buildo1fastqlist(fastqlist)
    logger.info('o1fastqlist=%s' % o1fastqlist)
    o2fastqlist=fastq_util.buildo2fastqlist(fastqlist)
    logger.info('o2fastqlist=%s' % o2fastqlist)
    bam_path_list=list()
    for read1 in sorted(pefastqdict.keys()):
        rg_str=bam_util.get_readgroup_str(read1,readgroup_path_dict,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,read1,engine,logger)
        if fastq_length<MEM_ALN_CUTOFF:
            bam_path=bwa_aln_paired(uuid,bam_path,fastq_dir,read1,pefastqdict[read1],realn_dir,reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
        else:
            bam_path=bwa_mem_paired(uuid,bam_path,fastq_dir,read1,pefastqdict[read1],realn_dir,reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
    for seread in sefastqlist:
        rg_str=bam_util.get_readgroup_str(seread,readgroup_path_dict,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger)
        if fastq_length<MEM_ALN_CUTOFF:
            bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,seread,realn_dir,'s',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
        else:
            bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,seread,realn_dir,'s',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
    for o1read in o1fastqlist:
        rg_str=bam_util.get_readgroup_str(o1read,readgroup_path_dict,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,o1read,engine,logger)
        if fastq_length<MEM_ALN_CUTOFF:
            bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,o1read,realn_dir,'o1',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
        else:
            bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,o1read,realn_dir,'o1',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
    for o2read in o2fastqlist:
        rg_str=bam_util.get_readgroup_str(o2read,readgroup_path_dict,logger)
        fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,o2read,engine,logger)
        if fastq_length<MEM_ALN_CUTOFF:
            bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,o2read,realn_dir,'o2',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
        else:
            bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,o2read,realn_dir,'o2',reference_fasta_path,
                                    rg_str,thread_count,engine,logger)
            bam_path_list.append(bam_path)
    return bam_path_list