Exemple #1
0
    def merge_fastq_files(
            technology_platform, barcode_fastq: [str], output_stem: str,
            genomic_fastq: [str]) -> (str, int):
        """annotates genomic fastq with barcode information; merging the two files.

        :param technology_platform: class from platforms.py that defines the
          characteristics of the data being processed
        :param barcode_fastq: list of str names of fastq files containing barcode
          information
        :param output_stem: str, stem for output files
        :param genomic_fastq: list of str names of fastq files containing genomic
          information
        :returns str merged_fastq: name of merged fastq file
        """

        log.info('Merging genomic reads and barcode annotations.')
        merged_fastq = fastq.merge_paired(
            merge_function=technology_platform.merge_function,
            fout=output_stem + '_merged.fastq',
            genomic=genomic_fastq,
            barcode=barcode_fastq)

        # delete genomic/barcode fastq files after merged.fastq creation
        log.info('Removing original fastq file for memory management.')
        delete_fastq = ' '.join(['rm'] + genomic_fastq + barcode_fastq)
        io.ProcessManager(delete_fastq).run_all()

        return merged_fastq
Exemple #2
0
    def merge_fastq_files(
        technology_platform,
        barcode_fastq: [str],
        output_stem: str,
        genomic_fastq: [str],
    ) -> (str, int):
        """annotates genomic fastq with barcode information; merging the two files.

        :param technology_platform: class from platforms.py that defines the
          characteristics of the data being processed
        :param barcode_fastq: list of str names of fastq files containing barcode
          information
        :param output_stem: str, stem for output files
        :param genomic_fastq: list of str names of fastq files containing genomic
          information
        :returns str merged_fastq: name of merged fastq file
        """

        # hack:
        # Due to the non-platform agnostic glob behavior,
        # it is possible that L001_R1 is merged with L002_R2 (not L001_R2).
        # to avoid this problem, we first sort.
        # this is a temporary hacky solution
        barcode_fastq = sorted(barcode_fastq)
        genomic_fastq = sorted(genomic_fastq)

        log.info("Merging genomic reads and barcode annotations.")
        for bar_fq, gen_fq in zip(barcode_fastq, genomic_fastq):
            log.info("Merge {} with {}".format(os.path.basename(bar_fq),
                                               os.path.basename(gen_fq)))

        merged_fastq = fastq.merge_paired(
            merge_function=technology_platform.merge_function,
            fout=output_stem + "_merged.fastq",
            genomic=genomic_fastq,
            barcode=barcode_fastq,
        )

        # delete genomic/barcode fastq files after merged.fastq creation
        # log.info('Removing original fastq file for memory management.')
        # delete_fastq = ' '.join(['rm'] + genomic_fastq + barcode_fastq)
        # io.ProcessManager(delete_fastq).run_all()

        return merged_fastq