예제 #1
0
    def test_get_clusters_from_fasta_filepath_rev_strand_match(self):
        """ Test OTUs from filepath functions with rev strand match
        """
        # seq and its rc don't cluster when enable_rev_strand_matching = False
        expected_cluster_list = [['uclust_test_seqs_0'],
                                 ['uclust_test_seqs_0_rc']]
        expected_failure_list = []
        expected_new_seed_list = [
            'uclust_test_seqs_0',
            'uclust_test_seqs_0_rc']
        clusters_res = \
            get_clusters_from_fasta_filepath(self.tmp_raw_dna_seqs_rc_filepath,
                                             original_fasta_path=None, save_uc_files=False,
                                             percent_ID=0.90, enable_rev_strand_matching=False)

        expected_cluster_list.sort()
        expected_failure_list.sort()
        expected_new_seed_list.sort()
        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()
        self.assertEqual(clusters_res, (expected_cluster_list,
                                        expected_failure_list,
                                        expected_new_seed_list))

        # seq and its rc cluster when enable_rev_strand_matching = False
        expected_cluster_list = [
            ['uclust_test_seqs_0', 'uclust_test_seqs_0_rc']]
        expected_failure_list = []
        expected_new_seed_list = ['uclust_test_seqs_0']
        clusters_res = \
            get_clusters_from_fasta_filepath(self.tmp_raw_dna_seqs_rc_filepath,
                                             original_fasta_path=None, save_uc_files=False,
                                             percent_ID=0.90, enable_rev_strand_matching=True)

        expected_cluster_list.sort()
        expected_failure_list.sort()
        expected_new_seed_list.sort()
        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()
        self.assertEqual(clusters_res, (expected_cluster_list,
                                        expected_failure_list,
                                        expected_new_seed_list))
예제 #2
0
    def test_get_clusters_from_fasta_filepath(self):
        """ Tests for return of lists of OTUs from given fasta filepath """

        clusters_res = \
            get_clusters_from_fasta_filepath(self.tmp_unsorted_fasta_filepath,
                                             original_fasta_path=None, percent_ID=0.90, save_uc_files=False)
        expected_cluster_list.sort()
        expected_failure_list.sort()
        expected_new_seed_list.sort()
        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()
        self.assertEqual(clusters_res, (expected_cluster_list,
                                        expected_failure_list,
                                        expected_new_seed_list))
예제 #3
0
    def test_get_clusters_from_fasta_filepath_optimal(self):
        """ Test OTUs from filepath functions with optimal
        """
        # need to compile a small test where optimal has an affect --
        # this currently is only testing that we don't get a failure with
        # optimal
        clusters_res = \
            get_clusters_from_fasta_filepath(self.tmp_unsorted_fasta_filepath,
                                             original_fasta_path=None, save_uc_files=False,
                                             percent_ID=0.90, optimal=True)
        expected_cluster_list.sort()
        expected_failure_list.sort()
        expected_new_seed_list.sort()
        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()

        self.assertEqual(clusters_res, (expected_cluster_list,
                                        expected_failure_list,
                                        expected_new_seed_list))
예제 #4
0
    def test_get_clusters_from_fasta_filepath_suppress_sort(self):
        """ Test OTUs from filepath functions with suppress sort
        """
        expected = [['uclust_test_seqs_0'], ['uclust_test_seqs_1'],
                    ['uclust_test_seqs_2'], ['uclust_test_seqs_3'],
                    ['uclust_test_seqs_4'], ['uclust_test_seqs_5'],
                    ['uclust_test_seqs_6', 'uclust_test_seqs_8'],
                    ['uclust_test_seqs_7'], ['uclust_test_seqs_9']]
        clusters_res = \
            get_clusters_from_fasta_filepath(self.tmp_unsorted_fasta_filepath,
                                             original_fasta_path=None,
                                             percent_ID=0.90, suppress_sort=True, save_uc_files=False)
        expected_cluster_list.sort()
        expected_failure_list.sort()
        expected_new_seed_list.sort()
        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()

        self.assertEqual(clusters_res, (expected_cluster_list,
                                        expected_failure_list,
                                        expected_new_seed_list))
예제 #5
0
    def test_get_clusters_from_fasta_filepath_extending_reference_db(self):
        """ Correct clusters when clustering against db and adding new clusters
        """
        clusters_res = get_clusters_from_fasta_filepath(
            self.tmp_unsorted_fasta_filepath,
            original_fasta_path=None,
            max_accepts=7, max_rejects=12,
            percent_ID=0.90,
            subject_fasta_filepath=self.ref_dna_seqs_fp,
            suppress_new_clusters=False, enable_rev_strand_matching=True,
            HALT_EXEC=False,
            save_uc_files=False)

        self.ref_test_clusters2.sort()
        self.ref_test_failures2.sort()
        self.ref_test_new_seeds2.sort()

        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()
        self.assertEqual(clusters_res, (self.ref_test_clusters2,
                                        self.ref_test_failures2,
                                        self.ref_test_new_seeds2))
예제 #6
0
    def test_get_clusters_from_fasta_filepath_reference_db_only(self):
        """ Correct clusters returned when clustering against a database only
        """
        clusters_res = get_clusters_from_fasta_filepath(
            self.tmp_unsorted_fasta_filepath,
            original_fasta_path=None,
            save_uc_files=False,
            max_accepts=7, max_rejects=12,
            percent_ID=0.90,
            subject_fasta_filepath=self.ref_dna_seqs_fp,
            suppress_new_clusters=True,
            HALT_EXEC=False)

        self.ref_test_clusters1.sort()
        self.ref_test_failures1.sort()
        self.ref_test_new_seeds1.sort()

        clusters_res[0].sort()
        clusters_res[1].sort()
        clusters_res[2].sort()
        self.assertEqual(clusters_res, (self.ref_test_clusters1,
                                        self.ref_test_failures1,
                                        self.ref_test_new_seeds1))