コード例 #1
0
    def test_tabular_output(self):
        """ SortMeRNA should output a BLAST tabular output
        """
        sortmerna_db, db_files_to_remove = build_database_sortmerna(
            abspath(self.file_reference_seq_fp),
            max_pos=250,
            output_dir=self.output_dir)

        self.files_to_remove.extend(db_files_to_remove)

        # Run SortMeRNA
        clusters, failures, smr_files_to_remove = sortmerna_ref_cluster(
            seq_path=self.file_read_seqs_fp,
            sortmerna_db=sortmerna_db,
            refseqs_fp=self.file_reference_seq_fp,
            result_path=join(self.output_dir, "sortmerna_otus.txt"),
            tabular=True)

        self.assertTrue(exists(join(self.output_dir,
                                    "sortmerna_otus.blast")))
コード例 #2
0
    def test_sortmerna_default_param(self):
        """ SortMeRNA version 2.0 reference OTU picking works with default settings
        """
        # rebuild the index
        sortmerna_db, db_files_to_remove = build_database_sortmerna(
            abspath(self.file_reference_seq_fp),
            max_pos=250,
            output_dir=self.output_dir)

        # Files created by indexdb_rna to be deleted
        self.files_to_remove.extend(db_files_to_remove)

        # Run SortMeRNA
        cluster_map, failures, smr_files_to_remove = sortmerna_ref_cluster(
            seq_path=self.file_read_seqs_fp,
            sortmerna_db=sortmerna_db,
            refseqs_fp=self.file_reference_seq_fp,
            result_path=join(self.output_dir, "sortmerna_otus.txt"))

        # Check all sortmerna output files exist
        output_files = [join(self.output_dir, ext)
                        for ext in ['sortmerna_otus_otus.txt',
                                    'sortmerna_otus.log',
                                    'sortmerna_otus_denovo.fasta',
                                    'sortmerna_otus.fasta']]

        # Check output files exist
        for fp in output_files:
            self.assertTrue(exists(fp))

        # Files created sortmerna to be deleted (StdErr and StdOut were already
        # removed in sortmerna_ref_cluster)
        self.files_to_remove.extend(output_files)

        # Random reads that should not appear in any output file
        random_reads = ['simulated_random_reads.fa.000000000',
                        'simulated_random_reads.fa.000000001',
                        'simulated_random_reads.fa.000000002',
                        'simulated_random_reads.fa.000000003',
                        'simulated_random_reads.fa.000000004',
                        'simulated_random_reads.fa.000000005',
                        'simulated_random_reads.fa.000000006',
                        'simulated_random_reads.fa.000000007',
                        'simulated_random_reads.fa.000000008',
                        'simulated_random_reads.fa.000000009']

        # Reads passing E-value threshold and with similarity/coverage >=97%
        otu_reads = ['HMPMockV1.2.Staggered2.673827_47',
                     'HMPMockV1.2.Staggered2.673827_115',
                     'HMPMockV1.2.Staggered2.673827_122',
                     'HMPMockV1.2.Staggered2.673827_161',
                     'HMPMockV1.2.Staggered2.673827_180',
                     'HMPMockV1.2.Staggered2.673827_203',
                     'HMPMockV1.2.Staggered2.673827_207',
                     'HMPMockV1.2.Staggered2.673827_215',
                     'HMPMockV1.2.Staggered2.673827_218',
                     'HMPMockV1.2.Staggered2.673827_220']

        # Reads passing E-value threshold and with similarity/coverage <97%
        denovo_reads = ['HMPMockV1.2.Staggered2.673827_0',
                        'HMPMockV1.2.Staggered2.673827_1',
                        'HMPMockV1.2.Staggered2.673827_2',
                        'HMPMockV1.2.Staggered2.673827_3',
                        'HMPMockV1.2.Staggered2.673827_4',
                        'HMPMockV1.2.Staggered2.673827_5',
                        'HMPMockV1.2.Staggered2.673827_6',
                        'HMPMockV1.2.Staggered2.673827_7',
                        'HMPMockV1.2.Staggered2.673827_8',
                        'HMPMockV1.2.Staggered2.673827_9']

        # Check correct number of OTU clusters in file
        otu_clusters = ['295053']

        f_aligned = open(output_files[3], "U")
        f_otumap = open(output_files[0], "U")
        f_denovo = open(output_files[2], "U")

        # Verify the aligned FASTA file
        for label, seq in parse_fasta(f_aligned):
            id = label.split()[0]
            # Read is not random
            self.assertNotIn(id, random_reads)
            # Read is either in otu_reads or denovo_reads
            self.assertIn(id, otu_reads+denovo_reads)
        f_aligned.close()

        # Verify the de novo reads FASTA file
        for label, seq in parse_fasta(f_denovo):
            id = label.split()[0]
            # Read is not random
            self.assertNotIn(id, random_reads)
            # Read is not an OTU read
            self.assertNotIn(id, otu_reads)
            # Read is a de novo read
            self.assertIn(id, denovo_reads)
        f_denovo.close()

        # Check the OTU map
        for line in f_otumap:
            otu_entry = line.split()
            # Cluster ID is correct
            self.assertIn(otu_entry[0], otu_clusters)
            # Each read in the cluster must exclusively be an OTU read
            for read in otu_entry[1:]:
                self.assertNotIn(read, random_reads)
                self.assertNotIn(read, denovo_reads)
                self.assertIn(read, otu_reads)
        f_otumap.close()

        # Check returned list of lists of clusters
        expected_cluster = ['HMPMockV1.2.Staggered2.673827_47',
                            'HMPMockV1.2.Staggered2.673827_115',
                            'HMPMockV1.2.Staggered2.673827_122',
                            'HMPMockV1.2.Staggered2.673827_161',
                            'HMPMockV1.2.Staggered2.673827_180',
                            'HMPMockV1.2.Staggered2.673827_203',
                            'HMPMockV1.2.Staggered2.673827_207',
                            'HMPMockV1.2.Staggered2.673827_215',
                            'HMPMockV1.2.Staggered2.673827_218',
                            'HMPMockV1.2.Staggered2.673827_220']

        # Should only have 1 cluster
        self.assertEqual(1, len(cluster_map))
        for actual_cluster in cluster_map.itervalues():
            actual_cluster.sort()
            expected_cluster.sort()
            self.assertEqual(actual_cluster, expected_cluster)

        # Check log file number of clusters and failures corresponds to
        # the results in the output files
        f_log = open(output_files[1], "U")
        num_clusters = 0
        num_failures = 0
        for line in f_log:
            if line.startswith(" Total OTUs"):
                num_clusters = (re.split(' = ', line)[1]).strip()
            elif line.startswith("    Total reads for de novo clustering"):
                num_failures = (re.split(' = ', line)[1]).strip()
        f_log.close()

        self.assertEqual(int(num_clusters), len(otu_clusters))
        self.assertEqual(int(num_failures), len(denovo_reads))