Python assign_taxonomyの例、cogent.app.rdp_classifier.assign_taxonomy Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_rdp_classifier.py プロジェクト: ctSkennerton/pycogent

    def test_assign_taxonomy_file_output(self):
        """ assign_taxonomy wrapper writes correct file output when requested
        
            This function tests for sucessful completion of assign_taxonomy
             when writing to file, that the lines in the file roughly look
             correct by verifying how many are written (by zipping with 
             expected), and that each line starts with the correct seq id.
             Actual testing of taxonomy data is performed elsewhere.
        
        """
        output_fp = get_tmp_filename(\
         prefix='RDPAssignTaxonomyTests',suffix='.txt')
        # convert the expected dict to a list of lines to match 
        # file output
        expected_file_headers = self.expected_assignments1.keys()
        expected_file_headers.sort()
        
        actual_return_value = assign_taxonomy(\
         self.test_input1,min_confidence=0.95,output_fp=output_fp)
        
        actual_file_output = list(open(output_fp))
        actual_file_output.sort()

        # remove the output_fp before running the tests, so if they
        # fail the output file is still cleaned-up
        remove(output_fp)
        
        # None return value on write to file
        self.assertEqual(actual_return_value,None)
        
        # check that each line starts with the correct seq_id -- not 
        # checking the taxonomies or confidences here as these are variable and
        # tested elsewhere
        for a,e in zip(actual_file_output,expected_file_headers):
            self.assertTrue(a.startswith(e))

コード例 #2

0

ファイルを表示

ファイル: test_rdp_classifier.py プロジェクト: ctSkennerton/pycogent

    def test_assign_taxonomy(self):
        """assign_taxonomy wrapper functions as expected 
        
        This test may fail periodicially, but failure should be rare.
        """
        unverified_seq_ids = set(self.expected_assignments1.keys())
        for i in range(self.num_trials):
            obs_assignments = assign_taxonomy(self.test_input1)
            for seq_id in list(unverified_seq_ids):
                obs_lineage, obs_confidence = obs_assignments[seq_id]
                exp_lineage = self.expected_assignments1[seq_id]
                if (obs_lineage == exp_lineage):
                    unverified_seq_ids.remove(seq_id)
            if not unverified_seq_ids:
                break

        messages = []
        for seq_id in unverified_seq_ids:
            messages.append("Unable to verify %s trials" % self.num_trials)
            messages.append("  Sequence ID: %s" % seq_id)
            messages.append("  Expected: %s" % self.expected_assignments1[seq_id])
            messages.append("  Observed: %s" % obs_assignments[seq_id][0])
            messages.append("  Confidence: %s" % obs_assignments[seq_id][1])
        
        # make sure all taxonomic results were correct at least once
        self.assertFalse(unverified_seq_ids, msg='\n'.join(messages))

コード例 #3

0

ファイルを表示

ファイル: test_rdp_classifier.py プロジェクト: cxhernandez/pycogent

    def test_assign_taxonomy_file_output(self):
        """ assign_taxonomy wrapper writes correct file output when requested
        
            This function tests for sucessful completion of assign_taxonomy
             when writing to file, that the lines in the file roughly look
             correct by verifying how many are written (by zipping with 
             expected), and that each line starts with the correct seq id.
             Actual testing of taxonomy data is performed elsewhere.
        
        """
        output_fp = get_tmp_filename(\
         prefix='RDPAssignTaxonomyTests',suffix='.txt')
        # convert the expected dict to a list of lines to match
        # file output
        expected_file_headers = list(self.expected_assignments1.keys())
        expected_file_headers.sort()

        actual_return_value = assign_taxonomy(\
         self.test_input1,min_confidence=0.95,output_fp=output_fp)

        actual_file_output = list(open(output_fp))
        actual_file_output.sort()

        # remove the output_fp before running the tests, so if they
        # fail the output file is still cleaned-up
        remove(output_fp)

        # None return value on write to file
        self.assertEqual(actual_return_value, None)

        # check that each line starts with the correct seq_id -- not
        # checking the taxonomies or confidences here as these are variable and
        # tested elsewhere
        for a, e in zip(actual_file_output, expected_file_headers):
            self.assertTrue(a.startswith(e))

コード例 #4

0

ファイルを表示

ファイル: test_rdp_classifier.py プロジェクト: cxhernandez/pycogent

    def test_assign_taxonomy(self):
        """assign_taxonomy wrapper functions as expected 
        
        This test may fail periodicially, but failure should be rare.
        """
        unverified_seq_ids = set(self.expected_assignments1.keys())
        for i in range(self.num_trials):
            obs_assignments = assign_taxonomy(self.test_input1)
            for seq_id in list(unverified_seq_ids):
                obs_lineage, obs_confidence = obs_assignments[seq_id]
                exp_lineage = self.expected_assignments1[seq_id]
                if (obs_lineage == exp_lineage):
                    unverified_seq_ids.remove(seq_id)
            if not unverified_seq_ids:
                break

        messages = []
        for seq_id in unverified_seq_ids:
            messages.append("Unable to verify %s trials" % self.num_trials)
            messages.append("  Sequence ID: %s" % seq_id)
            messages.append("  Expected: %s" %
                            self.expected_assignments1[seq_id])
            messages.append("  Observed: %s" % obs_assignments[seq_id][0])
            messages.append("  Confidence: %s" % obs_assignments[seq_id][1])

        # make sure all taxonomic results were correct at least once
        self.assertFalse(unverified_seq_ids, msg='\n'.join(messages))

コード例 #5

0

ファイルを表示

ファイル: test_rdp_classifier.py プロジェクト: ctSkennerton/pycogent

 def test_assign_taxonomy_short_sequence(self):
     """assign_taxonomy should return Unclassifiable if sequence is too short
     """
     assignments = assign_taxonomy([
         '>MySeq 1',
         'TTCCGGTTGATCCTGCCGGACCCGACTGCTATCCGGA',
         ])
     self.assertEqual(assignments, {'MySeq 1': ('Unassignable', 1.0)})

コード例 #6

0

ファイルを表示

ファイル: test_rdp_classifier.py プロジェクト: cxhernandez/pycogent

 def test_assign_taxonomy_short_sequence(self):
     """assign_taxonomy should return Unclassifiable if sequence is too short
     """
     assignments = assign_taxonomy([
         '>MySeq 1',
         'TTCCGGTTGATCCTGCCGGACCCGACTGCTATCCGGA',
     ])
     self.assertEqual(assignments, {'MySeq 1': ('Unassignable', 1.0)})

コード例 #7

0

ファイルを表示

ファイル: test_rdp_classifier.py プロジェクト: ctSkennerton/pycogent

    def test_assign_taxonomy_alt_confidence(self):
        """assign_taxonomy wrapper functions as expected with alt confidence
        """
        obs_assignments = assign_taxonomy(
            self.test_input1, min_confidence=0.95)            

        for seq_id, assignment in obs_assignments.items():
            obs_lineage, obs_confidence = assignment
            exp_lineage = self.expected_assignments1[seq_id]
            message = "Sequence ID: %s, assignment: %s" % (seq_id, assignment)
            self.assertTrue(
                exp_lineage.startswith(obs_lineage) or \
                (obs_lineage == "Unclassified"),
                msg=message,
                )
            self.assertTrue(obs_confidence >= 0.95, msg=message)

コード例 #8

0

ファイルを表示

ファイル: test_rdp_classifier.py プロジェクト: cxhernandez/pycogent

    def test_assign_taxonomy_alt_confidence(self):
        """assign_taxonomy wrapper functions as expected with alt confidence
        """
        obs_assignments = assign_taxonomy(self.test_input1,
                                          min_confidence=0.95)

        for seq_id, assignment in list(obs_assignments.items()):
            obs_lineage, obs_confidence = assignment
            exp_lineage = self.expected_assignments1[seq_id]
            message = "Sequence ID: %s, assignment: %s" % (seq_id, assignment)
            self.assertTrue(
                exp_lineage.startswith(obs_lineage) or \
                (obs_lineage == "Unclassified"),
                msg=message,
                )
            self.assertTrue(obs_confidence >= 0.95, msg=message)

コード例 #9

0

ファイルを表示

 def test_assign_taxonomy(self):
     """assign_taxonomy wrapper functions as expected 
     
         This test may fail periodicially, but failure should be rare.
     
     """
     # convert the expected dict to a list, so it's easier to 
     # handle the order
     expected_assignments = \
      [(k,v[0],v[1]) for k,v in self.expected_assignments1.items()]
     expected_assignments.sort()
     
     # Because there is some variation in the taxon assignments, 
     # I run the test several times (which can be quite slow) and 
     # each sequence was classified the same as expected at least once
     taxon_assignment_results = [False] * len(expected_assignments)
     all_assigned_correctly = False
     for i in range(10):
         actual_assignments = assign_taxonomy(self.test_input1)
         # covert actual_assignments to a list so it's easier to handle
         # the order
         actual_assignments = \
          [(k,v[0],v[1]) for k,v in actual_assignments.items()]
         actual_assignments.sort()
         
         for j in range(len(expected_assignments)):
             a = actual_assignments[j]
             e = expected_assignments[j]
             # same description fields
             self.assertEqual(a[0],e[0])
             
             # same taxonomic assignment
             r = a[1] == e[1]
             if r and not taxon_assignment_results[j]:
                 taxon_assignment_results[j] = True
             
             # confidence >= 0.80
             self.assertTrue(a[2]>=0.80)
             
         if False not in taxon_assignment_results:
             # all sequences have been correctly assigned at
             # least once -- bail out
             all_assigned_correctly = True
             break
     
     # make sure all taxonomic results were correct at least once
     self.assertTrue(all_assigned_correctly)

コード例 #10

0

ファイルを表示

 def test_assign_taxonomy_alt_confidence(self):
     """assign_taxonomy wrapper functions as expected with alt confidence
     """
     actual_assignments = \
      assign_taxonomy(self.test_input1,min_confidence=0.95)            
     # covert actual_assignments to a list so it's easier to handle
     # the order
     actual_assignments = \
          [(k,v[0],v[1]) for k,v in actual_assignments.items()]
     actual_assignments.sort()
     
     # convert the expected dict to a list, so it's easier to 
     # handle the order
     expected_assignments = \
      [(k,v[0],v[1]) for k,v in self.expected_assignments1.items()]
     expected_assignments.sort()
     
     for a,e in zip(actual_assignments,expected_assignments):
         # same description fields
         self.assertEqual(a[0],e[0])
         # confidence >= 0.95
         self.assertTrue(a[2]>=0.95)