def merge_fastq_files( technology_platform, barcode_fastq: [str], output_stem: str, genomic_fastq: [str]) -> (str, int): """annotates genomic fastq with barcode information; merging the two files. :param technology_platform: class from platforms.py that defines the characteristics of the data being processed :param barcode_fastq: list of str names of fastq files containing barcode information :param output_stem: str, stem for output files :param genomic_fastq: list of str names of fastq files containing genomic information :returns str merged_fastq: name of merged fastq file """ log.info('Merging genomic reads and barcode annotations.') merged_fastq = fastq.merge_paired( merge_function=technology_platform.merge_function, fout=output_stem + '_merged.fastq', genomic=genomic_fastq, barcode=barcode_fastq) # delete genomic/barcode fastq files after merged.fastq creation log.info('Removing original fastq file for memory management.') delete_fastq = ' '.join(['rm'] + genomic_fastq + barcode_fastq) io.ProcessManager(delete_fastq).run_all() return merged_fastq
def merge_fastq_files( technology_platform, barcode_fastq: [str], output_stem: str, genomic_fastq: [str], ) -> (str, int): """annotates genomic fastq with barcode information; merging the two files. :param technology_platform: class from platforms.py that defines the characteristics of the data being processed :param barcode_fastq: list of str names of fastq files containing barcode information :param output_stem: str, stem for output files :param genomic_fastq: list of str names of fastq files containing genomic information :returns str merged_fastq: name of merged fastq file """ # hack: # Due to the non-platform agnostic glob behavior, # it is possible that L001_R1 is merged with L002_R2 (not L001_R2). # to avoid this problem, we first sort. # this is a temporary hacky solution barcode_fastq = sorted(barcode_fastq) genomic_fastq = sorted(genomic_fastq) log.info("Merging genomic reads and barcode annotations.") for bar_fq, gen_fq in zip(barcode_fastq, genomic_fastq): log.info("Merge {} with {}".format(os.path.basename(bar_fq), os.path.basename(gen_fq))) merged_fastq = fastq.merge_paired( merge_function=technology_platform.merge_function, fout=output_stem + "_merged.fastq", genomic=genomic_fastq, barcode=barcode_fastq, ) # delete genomic/barcode fastq files after merged.fastq creation # log.info('Removing original fastq file for memory management.') # delete_fastq = ' '.join(['rm'] + genomic_fastq + barcode_fastq) # io.ProcessManager(delete_fastq).run_all() return merged_fastq