def fastqc_validate(uuid,bam_path,thread_count,engine,logger): uuid_dir=os.path.dirname(bam_path) fastq_dir=os.path.join(uuid_dir,'fastq') fastqlist=fastq_util.buildfastqlist(fastq_dir) logging.info('fastqlist=%s' % fastqlist) pefastqdict=fastq_util.buildpefastqdict(fastqlist) logger.info('pefastqdict=%s' % pefastqdict) sefastqlist=fastq_util.buildsefastqlist(fastqlist) logger.info('sefastqlist=%s' % sefastqlist) o1fastqlist=fastq_util.buildo1fastqlist(fastqlist) logger.info('o1fastqlist=%s' % o1fastqlist) o2fastqlist=fastq_util.buildo2fastqlist(fastqlist) logger.info('o2fastqlist=%s' % o2fastqlist) for read1 in sorted(pefastqdict.keys()): #read1 fq_path=os.path.join(fastq_dir,read1) do_fastqc(uuid,fq_path,thread_count,engine,logger) fastqc_to_db(uuid,fq_path,engine,logger) do_guess_encoding(uuid,fq_path,engine,logger) guess_enc_db(uuid,fq_path,engine,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,read1,engine,logger) # removable #read2 fq_path=os.path.join(fastq_dir,pefastqdict[read1]) do_fastqc(uuid,fq_path,thread_count,engine,logger) fastqc_to_db(uuid,fq_path,engine,logger) do_guess_encoding(uuid,fq_path,engine,logger) guess_enc_db(uuid,fq_path,engine,logger) for seread in sefastqlist: fq_path=os.path.join(fastq_dir,seread) do_fastqc(uuid,fq_path,thread_count,engine,logger) fastqc_to_db(uuid,fq_path,engine,logger) do_guess_encoding(uuid,fq_path,engine,logger) guess_enc_db(uuid,fq_path,engine,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable for o1read in o1fastqlist: fq_path=os.path.join(fastq_dir,o1read) do_fastqc(uuid,fq_path,thread_count,engine,logger) fastqc_to_db(uuid,fq_path,engine,logger) do_guess_encoding(uuid,fq_path,engine,logger) guess_enc_db(uuid,fq_path,engine,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable for o2read in o2fastqlist: fq_path=os.path.join(fastq_dir,o2read) do_fastqc(uuid,fq_path,thread_count,engine,logger) fastqc_to_db(uuid,fq_path,engine,logger) do_guess_encoding(uuid,fq_path,engine,logger) guess_enc_db(uuid,fq_path,engine,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) # removable return fastq_length
def bwa(uuid,bam_path,reference_fasta_path,readgroup_path_dict,thread_count,engine,logger): uuid_dir=os.path.dirname(bam_path) logger.info('uuid_dir=%s' % uuid_dir) fastq_dir=os.path.join(uuid_dir,'fastq') logger.info('fastq_dir=%s' % fastq_dir) realn_dir=os.path.join(uuid_dir,'realn') logger.info('realn_dir=%s' % realn_dir) os.makedirs(realn_dir,exist_ok=True) fastqlist=fastq_util.buildfastqlist(fastq_dir) logging.info('fastqlist=%s' % fastqlist) pefastqdict=fastq_util.buildpefastqdict(fastqlist) logger.info('pefastqdict=%s' % pefastqdict) sefastqlist=fastq_util.buildsefastqlist(fastqlist) logger.info('sefastqlist=%s' % sefastqlist) o1fastqlist=fastq_util.buildo1fastqlist(fastqlist) logger.info('o1fastqlist=%s' % o1fastqlist) o2fastqlist=fastq_util.buildo2fastqlist(fastqlist) logger.info('o2fastqlist=%s' % o2fastqlist) bam_path_list=list() for read1 in sorted(pefastqdict.keys()): rg_str=bam_util.get_readgroup_str(read1,readgroup_path_dict,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,read1,engine,logger) if fastq_length<MEM_ALN_CUTOFF: bam_path=bwa_aln_paired(uuid,bam_path,fastq_dir,read1,pefastqdict[read1],realn_dir,reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) else: bam_path=bwa_mem_paired(uuid,bam_path,fastq_dir,read1,pefastqdict[read1],realn_dir,reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) for seread in sefastqlist: rg_str=bam_util.get_readgroup_str(seread,readgroup_path_dict,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,seread,engine,logger) if fastq_length<MEM_ALN_CUTOFF: bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,seread,realn_dir,'s',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) else: bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,seread,realn_dir,'s',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) for o1read in o1fastqlist: rg_str=bam_util.get_readgroup_str(o1read,readgroup_path_dict,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,o1read,engine,logger) if fastq_length<MEM_ALN_CUTOFF: bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,o1read,realn_dir,'o1',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) else: bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,o1read,realn_dir,'o1',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) for o2read in o2fastqlist: rg_str=bam_util.get_readgroup_str(o2read,readgroup_path_dict,logger) fastq_length=fastq_util.get_fastq_length(uuid,fastq_dir,o2read,engine,logger) if fastq_length<MEM_ALN_CUTOFF: bam_path=bwa_aln_single(uuid,bam_path,fastq_dir,o2read,realn_dir,'o2',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) else: bam_path=bwa_mem_single(uuid,bam_path,fastq_dir,o2read,realn_dir,'o2',reference_fasta_path, rg_str,thread_count,engine,logger) bam_path_list.append(bam_path) return bam_path_list