def test_humann2_rna_dna_norm_witten_bell_tsv(self):
        """
        Test norm the tsv file entries from dna and rna input files with humann2_rna_dna_norm_table
        Test with witten bell
        """

        # create a temp folder
        tempdir = utils.create_temp_folder("rna_dna_norm_witten_bell")
        output_basename = os.path.join(tempdir, "rna_dna_norm")

        # run the command
        utils.run_command([
            "humann2_rna_dna_norm", "--input_dna", cfg.rna_dna_norm_dna_input,
            "--input_rna", cfg.rna_dna_norm_rna_input, "--output_basename",
            output_basename, "--method", "witten_bell"
        ])

        # check the output files are as expected
        for file_extension, expected_output_file in zip(
                cfg.rna_dna_norm_file_names,
                cfg.rna_dna_norm_witten_bell_output_files):
            self.assertTrue(
                utils.files_almost_equal(output_basename + file_extension,
                                         expected_output_file))

        # remove the temp file
        utils.remove_temp_folder(tempdir)
    def test_humann2_rna_dna_norm_log_10_tsv(self):
        """
        Test norm the tsv file entries from dna and rna input files with humann2_rna_dna_norm_table
        Test with log transform with base 10
        """

        # create a temp folder
        tempdir = utils.create_temp_folder("rna_dna_norm_log_10")
        output_basename = os.path.join(tempdir, "rna_dna_norm")

        # run the command
        utils.run_command([
            "humann2_rna_dna_norm", "--input_dna", cfg.rna_dna_norm_dna_input,
            "--input_rna", cfg.rna_dna_norm_rna_input, "--output_basename",
            output_basename, "--log_transform", "--log_base", "10"
        ])

        # check the output files are as expected
        # allow for varying precision in the calculations with almost equal
        for file_extension, expected_output_file in zip(
                cfg.rna_dna_norm_file_names,
                cfg.rna_dna_norm_log_10_output_files):
            self.assertTrue(
                utils.files_almost_equal(output_basename + file_extension,
                                         expected_output_file))

        # remove the temp file
        utils.remove_temp_folder(tempdir)
Beispiel #3
0
    def test_trimmomatic_fastqc_start_no_reference_database_paired_end(self):
        """
        Test running the default flow of trimmomatic on paired end input as no
        reference database is provided
        Test running fastqc at the beginning of the workflow
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file,
            "--output", tempdir, "--run-fastqc-start"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = utils.file_basename(cfg.fastq_file)

        expected_output_files = [
            os.path.join("fastqc", basename + cfg.fastqc_extensions[0]),
            os.path.join("fastqc", basename + cfg.fastqc_extensions[1]),
            basename + cfg.log_extension,
            basename + cfg.paired_trim_extensions[0],
            basename + cfg.paired_trim_extensions[1]
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #4
0
    def test_humann_split_tables_tsv(self):
        """
        Test splitting a tsv file with humann_split_tables
        """

        input_file = cfg.multi_sample_genefamilies

        # create a temp directory
        temp_directory = utils.create_temp_folder("split_tables_tsv")

        # split the file
        utils.run_command([
            "humann_split_table", "--input", input_file, "--output",
            temp_directory, "--verbose"
        ])

        # test the split files are as expected
        output_files = os.listdir(temp_directory)

        # sort the output files
        file_pairs = []
        for file in output_files:
            filebasename = os.path.basename(file)
            # get the sample number for the file
            file = os.path.join(temp_directory, file)
            if filebasename[-1] == 1:
                file_pairs.append([file, cfg.multi_sample_genefamilies_split1])
            elif filebasename[-1] == 2:
                file_pairs.append([file, cfg.multi_sample_genefamilies_split2])

        for temp_file, file in file_pairs:
            self.assertTrue(utils.files_almost_equal(temp_file, file))

        # remove the temp folder
        utils.remove_temp_folder(temp_directory)
Beispiel #5
0
    def test_humann2_fastq_biom_output_pathways(self):
        """
        Test the standard humann2 flow on a fastq input file
        Test biom output is written
        Test the expected pathways are identified
        """

        # create a temp directory for output
        tempdir = utils.create_temp_folder("fastq")

        # run humann2 test
        command = [
            "humann2", "--input", cfg.demo_fastq, "--output", tempdir,
            "--output-format", "biom", "--gap-fill", "off"
        ]
        utils.run_humann2(command)

        # check the output file of pathway abundance has the expected pathways
        pathways_file_tsv = utils.read_biom_table(
            os.path.join(tempdir, "demo_pathabundance.biom"))
        pathways_found = set([
            x.split("\t")[0].split(":")[0]
            for x in filter(lambda x: "PWY" in x, pathways_file_tsv)
        ])

        self.assertEqual(pathways_found,
                         cfg.expected_demo_output_files_biom_pathways)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #6
0
    def test_trf_only_paired_end(self):
        """
        Test running only trf on paired end input
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file,
            "--output", tempdir, "--run-trf", "--bypass-trim"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = utils.file_basename(cfg.fastq_file)

        expected_output_files = [
            basename + cfg.log_extension,
            basename + cfg.paired_repeats_removed_extensions[0],
            basename + cfg.paired_repeats_removed_extensions[1]
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #7
0
    def test_humann2_gene_families_biom_input(self):
        """
        Test the standard humann2 flow on a gene families output file as input
        Test with the biom format of the gene families file
        """

        # create a temp directory for output
        tempdir = utils.create_temp_folder("gene_families")

        # run humann2 test
        command = [
            "humann2", "--input", cfg.demo_gene_families_biom, "--output",
            tempdir
        ]
        utils.run_humann2(command)

        # check the output files are as expected
        # it will include all output files except the gene families output file
        # since this file was used as input
        for expression, message in utils.check_output(
                cfg.expected_demo_output_files_genefamilies_input, tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #8
0
    def test_trimmomatic_and_trf_no_reference_database_single_end(self):
        """
        Test running the default flow of trimmomatic on single end input as no
        reference database is provided
        Test with also running trf
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--output", tempdir,
            "--run-trf"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = utils.file_basename(cfg.fastq_file)

        expected_output_files = [
            basename + cfg.log_extension, basename + cfg.single_trim_extension,
            basename + cfg.repeats_removed_extension
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #9
0
    def test_trimmomatic_bowtie2_database_and_trf_paired_end_remove_intermedite_temp_output(
            self):
        """
        Test running the default flow of trimmomatic on paired end input with a
        bowtie2 database provided
        Test running with remove intermediate temp output files
        Test running trf
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file,
            "--output", tempdir, "--reference-db", cfg.bowtie2_db_folder,
            "--run-trf", "--no-discordant"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = basename = utils.file_basename(cfg.fastq_file)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2", True)

        expected_non_empty_output_files = [
            basename + cfg.log_extension,
            basename + cfg.paired_trim_extensions[0],
            basename + cfg.paired_trim_extensions[1],
            basename + cfg.final_extensions_paired[0],
            basename + cfg.final_extensions_paired[1],
            basename + cfg.paired_repeats_removed_extensions[0],
            basename + cfg.paired_repeats_removed_extensions[1]
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(
                expected_non_empty_output_files, tempdir):
            self.assertTrue(expression, message)

        # add the expected output files which can be empty
        expected_output_files = expected_non_empty_output_files
        expected_output_files += [
            filtered_file_basename + cfg.paired_contaminated_extension[0],
            filtered_file_basename + cfg.paired_contaminated_extension[1]
        ]

        # check there are only three files in the output folder
        actual_output_files = os.listdir(tempdir)
        self.assertEqual(len(actual_output_files), len(expected_output_files))

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #10
0
    def test_trimmomatic_bowtie2_paired_end_remove_intermedite_temp_output_discordant_trf(
            self):
        """
        Test running the default flow of trimmomatic on paired end input with one
        bowtie2 database provided
        Test running with remove intermediate temp output files
        Test with discordant alignments
        Test with TRF
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--input",
            cfg.fastq_pair_file, "--output", tempdir, "--reference-db",
            cfg.bowtie2_db_folder, "--reference-db", cfg.bowtie2_db_folder,
            "--run-trf"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = basename = utils.file_basename(cfg.fastq_file)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2")

        expected_non_empty_output_files = [
            basename + cfg.log_extension,
            basename + cfg.paired_trim_extensions[0],
            basename + cfg.paired_trim_extensions[1],
            basename + cfg.final_extensions_paired[0],
            basename + cfg.final_extensions_paired[1],
            basename + cfg.paired_repeats_removed_extensions[0],
            basename + cfg.paired_repeats_removed_extensions[1]
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(
                expected_non_empty_output_files, tempdir):
            self.assertTrue(expression, message)

        # check there are the expected number of files in the output folder
        actual_output_files = list(
            filter(
                os.path.getsize,
                [os.path.join(tempdir, file) for file in os.listdir(tempdir)]))
        self.assertEqual(len(actual_output_files),
                         len(expected_non_empty_output_files))

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #11
0
    def test_trimmomatic_bowtie2_two_databases_paired_end_serial(self):
        """
        Test running the default flow of trimmomatic on paired end input with two
        bowtie2 database provided (both with the same name)
        Test running in serial alignment mode
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--input", cfg.fastq_file,
            "--output", tempdir, "--reference-db", cfg.bowtie2_db_folder,
            "--reference-db", cfg.bowtie2_db_folder, "--no-discordant",
            "--serial"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = basename = utils.file_basename(cfg.fastq_file)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2")

        expected_non_empty_output_files = [
            basename + cfg.log_extension,
            basename + cfg.paired_trim_extensions[0],
            basename + cfg.paired_trim_extensions[1],
            basename + cfg.final_extensions_paired[0],
            basename + cfg.final_extensions_paired[1]
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(
                expected_non_empty_output_files, tempdir):
            self.assertTrue(expression, message)

        # add the expected output files which can be empty
        expected_output_files = expected_non_empty_output_files
        expected_output_files += [
            filtered_file_basename + cfg.paired_contaminated_extension[0],
            filtered_file_basename + cfg.paired_contaminated_extension[1]
        ]

        # check there are at least the main expected files in the output folder
        actual_output_files = os.listdir(tempdir)
        self.assertGreater(len(actual_output_files),
                           len(expected_output_files))

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #12
0
    def test_humann_m8(self):
        """
        Test the standard humann flow on a m8 input file
        """
        
        # create a temp directory for output
        tempdir = utils.create_temp_folder("m8")
        
        # run humann test
        command = ["humann","--input",cfg.demo_m8,"--output",tempdir]
        utils.run_humann(command)
        
        # check the output files are as expected
        for expression, message in utils.check_output(cfg.expected_demo_output_files, tempdir):
            self.assertTrue(expression,message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #13
0
    def test_trimmomatic_bowtie2_database_and_trf_single_end_gzipped_input(
            self):
        """
        Test running the default flow of trimmomatic on single end input with
        bowtie2 database provided
        Test with keeping temp files
        Test with TRF
        Test with gzipped input fastq file
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file_gzipped, "--output",
            tempdir, "--reference-db", cfg.bowtie2_db_folder,
            "--store-temp-output", "--run-trf"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        fastq_file_basename = utils.file_basename(cfg.fastq_file_gzipped)
        basename = utils.file_basename(cfg.fastq_file)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2")

        expected_output_files = [
            fastq_file_basename, basename + cfg.log_extension,
            basename + cfg.single_trim_extension,
            filtered_file_basename + cfg.clean_extension,
            filtered_file_basename + cfg.contaminated_extension,
            filtered_file_basename + cfg.sam_extension,
            basename + cfg.final_extension,
            basename + cfg.repeats_removed_extension
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #14
0
    def test_humann_fasta_bypass_prescreen(self):
        """
        Test the standard humann flow on a fasta input file
        Test with bypassing prescreen
        """
        
        # create a temp directory for output
        tempdir = utils.create_temp_folder("fasta_bypass_prescreen")
        
        # run humann test
        command = ["humann","--input",cfg.demo_fasta,"--output",tempdir,"--bypass-prescreen"]
        utils.run_humann(command)
        
        # check the output files are as expected
        for expression, message in utils.check_output(cfg.expected_demo_output_files, tempdir):
            self.assertTrue(expression,message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #15
0
    def test_trimmomatic_bowtie2_database_fastqc_end_single_end(self):
        """
        Test running the default flow of trimmomatic on single end input with
        bowtie2 database provided
        Test with keeping temp files
        Test running fastqc at the end of the workflow
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--output", tempdir,
            "--reference-db", cfg.bowtie2_db_folder, "--store-temp-output",
            "--run-fastqc-end"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = utils.file_basename(cfg.fastq_file)
        final_basename = utils.file_basename(basename + cfg.final_extension)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2")

        expected_output_files = [
            os.path.join("fastqc", final_basename + cfg.fastqc_extensions[0]),
            os.path.join("fastqc", final_basename + cfg.fastqc_extensions[1]),
            basename + cfg.log_extension, basename + cfg.single_trim_extension,
            filtered_file_basename + cfg.clean_extension,
            filtered_file_basename + cfg.contaminated_extension,
            filtered_file_basename + cfg.sam_extension,
            basename + cfg.final_extension
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
    def test_humann_fastq_biom_output(self):
        """
        Test the standard humann flow on a fastq input file
        Test biom output is written
        """
        
        # create a temp directory for output
        tempdir = utils.create_temp_folder("fastq")
        
        # run humann test
        command = ["humann","--input",cfg.demo_fastq,"--output",tempdir,
                   "--output-format", "biom"]
        utils.run_humann(command)
        
        # check the output files are as expected
        for expression, message in utils.check_output(cfg.expected_demo_output_files_biom, tempdir):
            self.assertTrue(expression,message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #17
0
    def test_humann_fastq_custom_taxonomic_profile(self):
        """
        Test the standard humann flow on a fastq input file
        Test with a custom taxonomic profile
        """
        
        # create a temp directory for output
        tempdir = utils.create_temp_folder("fastq_custom_taxonomic_profile")
        
        # run humann test
        command = ["humann","--input",cfg.demo_fastq,"--output",tempdir,"--taxonomic-profile",
                   cfg.demo_bugs_list]
        utils.run_humann(command)
        
        # check the output files are as expected
        for expression, message in utils.check_output(cfg.expected_demo_output_files, tempdir):
            self.assertTrue(expression,message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
    def test_humann2_split_tables_tsv(self):
        """
        Test splitting a tsv file with humann2_split_tables
        """

        input_file = cfg.multi_sample_genefamilies_biom

        # create a temp directory
        temp_directory = utils.create_temp_folder("split_tables_biom")

        # split the file
        utils.run_command([
            "humann2_split_table", "--input", input_file, "--output",
            temp_directory, "--verbose"
        ])

        # test the split files are as expected
        for file in cfg.multi_sample_split_files_biom:
            self.assertTrue(utils.check_output(file, temp_directory))

        # remove the temp folder
        utils.remove_temp_folder(temp_directory)
Beispiel #19
0
    def test_bowtie2_only_single_end(self):
        """
        Test on single end input with bowtie2 database provided
        Test with keeping temp files
        Test bypassing trim step
        """

        # create a temp directory for output
        tempdir = tempfile.mkdtemp(suffix="test_kneaddata_")

        # run kneaddata test
        command = [
            "kneaddata", "--input", cfg.fastq_file, "--output", tempdir,
            "--reference-db", cfg.bowtie2_db_folder, "--store-temp-output",
            "--bypass-trim"
        ]
        utils.run_kneaddata(command)

        # get the basename of the input file
        basename = utils.file_basename(cfg.fastq_file)
        filtered_file_basename = utils.get_filtered_file_basename(
            basename, cfg.bowtie2_db_folder, "bowtie2")

        expected_output_files = [
            basename + cfg.log_extension,
            filtered_file_basename + cfg.clean_extension,
            filtered_file_basename + cfg.contaminated_extension,
            filtered_file_basename + cfg.sam_extension,
            basename + cfg.final_extension
        ]

        # check the output files are as expected
        for expression, message in utils.check_output(expected_output_files,
                                                      tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
Beispiel #20
0
    def test_humann2_fastq_bypass_translated_search(self):
        """
        Test the standard humann2 flow on a fastq input file
        Test with bypassing translated search
        """

        # create a temp directory for output
        tempdir = utils.create_temp_folder("fastq_bypass_translated_search")

        # run humann2 test
        command = [
            "humann2", "--input", cfg.demo_fastq, "--output", tempdir,
            "--bypass-translated-search"
        ]
        utils.run_humann2(command)

        # check the output files are as expected
        for expression, message in utils.check_output(
                cfg.expected_demo_output_files, tempdir):
            self.assertTrue(expression, message)

        # remove the temp directory
        utils.remove_temp_folder(tempdir)
    def test_humann2_strain_profile_tsv(self):
        """
        Test the tsv file entries running humann2_strain_profile
        Test with critical mean and critical count values
        """

        # create a temp folder
        tempdir = utils.create_temp_folder("strain_profile")

        # move to this folder as the output files will be created in the current working folder
        current_working_directory = os.getcwd()
        try:
            os.chdir(tempdir)
        except EnvironmentError:
            print("Warning: Unable to move to temp directory: " + tempdir)

        # run the command
        utils.run_command([
            "humann2_strain_profiler", "--input", cfg.strain_profile_input,
            "--critical_mean", "1", "--critical_count", "2"
        ])

        # check the output files are as expected
        # allow for varying precision in the calculations with almost equal
        for file, expected_output_file in zip(
                cfg.strain_profile_file_names,
                cfg.strain_profile_m1_n2_output_files):
            self.assertTrue(
                utils.files_almost_equal(os.path.join(tempdir, file),
                                         expected_output_file))

        # return to original working directory
        os.chdir(current_working_directory)

        # remove the temp file
        utils.remove_temp_folder(tempdir)