Beispiel #1
0
    def test_to_vw(self):
        stream = MySQLStreamer(self.db_setup, tokenizer=self.tokenizer)
        stream.cursor = self.mock_cursor
        stream.to_vw(open(self.temp_vw_path, 'w'))
        result = open(self.temp_vw_path).read()

        benchmark = " 1 a| failure:1 doomed:1\n 1 1| set:1 success:1\n"
        self.assertEqual(benchmark, result)
Beispiel #2
0
    def test_to_scipyspare(self):
        stream = MySQLStreamer(self.db_setup, tokenizer=self.tokenizer)

        stream.cursor = self.mock_cursor
        result = stream.to_scipysparse()
        benchmark = sparse.csr_matrix([[1, 1, 0, 0], [0, 0, 1, 1]])

        compare = result.toarray() == benchmark.toarray()
        self.assertTrue(compare.all())
Beispiel #3
0
    def test_to_vw(self):
        stream = MySQLStreamer(self.db_setup,
                               tokenizer=self.tokenizer)
        stream.cursor = self.mock_cursor
        stream.to_vw(open(self.temp_vw_path, 'w'))
        result = open(self.temp_vw_path).read()

        benchmark = " 1 a| failure:1 doomed:1\n 1 1| set:1 success:1\n"
        self.assertEqual(benchmark, result)
Beispiel #4
0
    def test_to_scipyspare(self):
        stream = MySQLStreamer(self.db_setup,
                               tokenizer=self.tokenizer)

        stream.cursor = self.mock_cursor
        result = stream.to_scipysparse()
        benchmark = sparse.csr_matrix([[1, 1, 0, 0], [0, 0, 1, 1]])

        compare = result.toarray() == benchmark.toarray()
        self.assertTrue(compare.all())
Beispiel #5
0
    def test_token_stream(self):
        stream = MySQLStreamer(self.db_setup, tokenizer=self.tokenizer)
        stream.cursor = self.mock_cursor
        token_benchmark = [['doomed', 'failure'], ['set', 'success']]
        id_benchmark = ['a', '1']
        token_result = []
        for each in stream.token_stream(cache_list=['doc_id']):
            token_result.append(each)

        self.assertEqual(token_benchmark, token_result)
        self.assertEqual(id_benchmark, stream.__dict__['doc_id_cache'])
Beispiel #6
0
    def test_info_stream(self):
        stream = MySQLStreamer(self.db_setup, tokenizer=self.tokenizer)
        stream.cursor = self.mock_cursor
        token_benchmark = [['doomed', 'failure'], ['set', 'success']]
        text_benchmark = ['doomed to failure', 'set for success']
        token_result = []
        text_result = []
        for each in stream.info_stream():
            token_result.append(each['tokens'])
            text_result.append(each['text'])

        self.assertEqual(token_benchmark, token_result)
        self.assertEqual(text_benchmark, text_result)
Beispiel #7
0
    def test_token_stream(self):
        stream = MySQLStreamer(self.db_setup,
                               tokenizer=self.tokenizer)
        stream.cursor = self.mock_cursor
        token_benchmark = [['doomed', 'failure'],
                           ['set', 'success']]
        id_benchmark = ['a', '1']
        token_result = []
        for each in stream.token_stream(cache_list=['doc_id']):
            token_result.append(each)

        self.assertEqual(token_benchmark, token_result)
        self.assertEqual(id_benchmark, stream.__dict__['doc_id_cache'])
Beispiel #8
0
    def test_info_stream(self):
        stream = MySQLStreamer(self.db_setup,
                               tokenizer=self.tokenizer)
        stream.cursor = self.mock_cursor
        token_benchmark = [['doomed', 'failure'],
                           ['set', 'success']]
        text_benchmark = ['doomed to failure', 'set for success']
        token_result = []
        text_result = []
        for each in stream.info_stream():
            token_result.append(each['tokens'])
            text_result.append(each['text'])

        self.assertEqual(token_benchmark, token_result)
        self.assertEqual(text_benchmark, text_result)