def test_generate_training_files(self):
        app = RdpTaxonAssigner({
            'id_to_taxonomy_fp':
            self.id_to_taxonomy_file.name,
            'reference_sequences_fp':
            self.reference_seqs_file.name,
        })
        actual_taxonomy_file, actual_training_seqs_file = \
            app._generate_training_files()

        # see note in test_build_tree()
        self.assertEqual(actual_taxonomy_file.read(), rdp_expected_taxonomy)
    def test_log(self):
        """RdpTaxonAssigner should write correct message to log file"""
        # expected result when no result_path is provided
        a = RdpTaxonAssigner({})
        a(seq_path=self.tmp_seq_filepath,
          result_path=None,
          log_path=self.tmp_log_filepath)

        # open the actual log file and the expected file, and pass into lists
        obs = [l.strip() for l in list(open(self.tmp_log_filepath, 'r'))]
        exp = rdp_test1_log_file_contents.split('\n')
        # sort the lists as the entries are written from a dict,
        # so order may vary
        obs.sort()
        exp.sort()
        self.assertEqual(obs, exp)
    def test_train_on_the_fly(self):
        """Training on-the-fly classifies reference sequence correctly with 100% certainty
        """
        input_seqs_file = NamedTemporaryFile(prefix='RdpTaxonAssignerTest_',
                                             suffix='.fasta')
        input_seqs_file.write(test_seq_coll.toFasta())
        input_seqs_file.seek(0)

        expected = rdp_trained_test1_expected_dict

        app = RdpTaxonAssigner({
            'id_to_taxonomy_fp':
            self.id_to_taxonomy_file.name,
            'reference_sequences_fp':
            self.reference_seqs_file.name,
        })
        actual = app(self.tmp_seq_filepath)

        key = 'X67228 some description'
        self.assertEqual(actual[key], expected[key])
    def setUp(self):
        # Temporary input file
        self.tmp_seq_filepath = get_tmp_filename(
            prefix='RdpTaxonAssignerTest_', suffix='.fasta')
        seq_file = open(self.tmp_seq_filepath, 'w')
        seq_file.write(rdp_test1_fasta)
        seq_file.close()

        # Temporary results filename
        self.tmp_res_filepath = get_tmp_filename(
            prefix='RdpTaxonAssignerTestResult_',
            suffix='.tsv',
        )
        # touch the file so we don't get an error trying to close it
        open(self.tmp_res_filepath, 'w').close()

        # Temporary log filename
        self.tmp_log_filepath = get_tmp_filename(
            prefix='RdpTaxonAssignerTestLog_',
            suffix='.txt',
        )
        # touch the file so we don't get an error trying to close it
        open(self.tmp_log_filepath, 'w').close()

        self._paths_to_clean_up = \
         [self.tmp_seq_filepath, self.tmp_res_filepath, self.tmp_log_filepath]

        self.id_to_taxonomy_file = NamedTemporaryFile(
            prefix='RdpTaxonAssignerTest_', suffix='.txt')
        self.id_to_taxonomy_file.write(rdp_id_to_taxonomy)
        self.id_to_taxonomy_file.seek(0)

        self.reference_seqs_file = NamedTemporaryFile(
            prefix='RdpTaxonAssignerTest_', suffix='.fasta')
        self.reference_seqs_file.write(rdp_reference_seqs)
        self.reference_seqs_file.seek(0)

        self.default_app = RdpTaxonAssigner({})
 def test_init(self):
     """RdpTaxonAssigner.__init__ should set default attributes and params
     """
     a = RdpTaxonAssigner({})
     self.assertEqual(a.Name, 'RdpTaxonAssigner')