Example #1
0
    def test_token_stream(self):
        stream = TextFileStreamer(path_list=[self.doc1, self.doc2], tokenizer=self.tokenizer)
        token_benchmark = [["doomed", "failure"], ["set", "success"]]
        id_benchmark = ["doc1", "doc2"]
        token_result = []
        for each in stream.token_stream(cache_list=["doc_id"]):
            token_result.append(each)

        self.assertEqual(token_benchmark, token_result)
        self.assertEqual(id_benchmark, stream.__dict__["doc_id_cache"])
Example #2
0
    def test_token_stream(self):
        stream = TextFileStreamer(path_list=[self.doc1, self.doc2],
                                  tokenizer=self.tokenizer)
        token_benchmark = [['doomed', 'failure'], ['set', 'success']]
        id_benchmark = ['doc1', 'doc2']
        token_result = []
        for each in stream.token_stream(cache_list=['doc_id']):
            token_result.append(each)

        self.assertEqual(token_benchmark, token_result)
        self.assertEqual(id_benchmark, stream.__dict__['doc_id_cache'])
Example #3
0
    def test_token_stream(self):
        stream = TextFileStreamer(path_list = [self.doc1, self.doc2],
                                  tokenizer=self.tokenizer)
        token_benchmark = [['doomed', 'failure'],
                           ['set', 'success']]
        id_benchmark = ['doc1', 'doc2']
        token_result = []
        for each in stream.token_stream(cache_list=['doc_id']):
            token_result.append(each)

        self.assertEqual(token_benchmark, token_result)
        self.assertEqual(id_benchmark, stream.__dict__['doc_id_cache'])