Exemple #1
0
 def __init__(self, tokenizer=None, cache_path='/tmp/'):
     """
     :param tokenizer: A tokenizer override
     :type tokenizer: function (optional)
     :param cache_path: path to data storage
     :type cache_path: str
     """
     self.categories = BayesCategories()
     self.tokenizer = tokenizer or SimpleBayes.tokenize_text
     self.cache_path = cache_path
     self.probabilities = {}
Exemple #2
0
    def test_persist_cache(self, dump_mock, open_mock):
        open_mock.return_value = 'opened'

        categories = BayesCategories()
        categories.categories = {'foo': 'bar'}

        sb = SimpleBayes()
        sb.cache_path = '/tmp/'
        sb.categories = categories
        sb.cache_persist()

        open_mock.assert_called_once_with('/tmp/_simplebayes.pickle', 'wb')
        dump_mock.assert_called_once_with(categories, 'opened')
Exemple #3
0
    def test_cache_train(self, exists_mock, load_mock, open_mock, calc_mock):
        categories = BayesCategories()
        categories.categories = {'foo': 'bar'}

        load_mock.return_value = categories
        open_mock.return_value = 'opened'
        exists_mock.return_value = True

        sb = SimpleBayes(cache_path='foo')
        sb.cache_train()

        exists_mock.assert_called_once_with('foo/_simplebayes.pickle')
        open_mock.assert_called_once_with('foo/_simplebayes.pickle', 'rb')
        load_mock.assert_called_once_with('opened')
        calc_mock.assert_called_once_with()

        self.assertEqual(sb.categories, categories)
Exemple #4
0
 def __init__(self,
              tokenizer=None,
              cache_path='/tmp/',
              cache_data=None,
              cache_in_memory=False):
     """
     :param tokenizer: A tokenizer override
     :type tokenizer: function (optional)
     :param cache_path: path to data storage
     :type cache_path: str
     :param cache_data: from an existing cache
     :type cache_data: pickle.dumps object
     :param cache_in_memory: True if the persistant cache is kept in memory
     :type: boolean
     """
     self.categories = BayesCategories()
     self.tokenizer = tokenizer or SimpleBayes.tokenize_text
     self.cache_path = cache_path
     self.cache_data = cache_data
     self.cache_in_memory = True if cache_data is not None else cache_in_memory
     self.probabilities = {}
Exemple #5
0
 def test_get_categories(self):
     bc = BayesCategories()
     bc.add_category('foo')
     self.assertEqual(bc.get_categories(), bc.categories)
Exemple #6
0
 def test_get_category(self):
     bc = BayesCategories()
     bc.add_category('foo')
     self.assertIsInstance(bc.get_category('foo'), BayesCategory)
Exemple #7
0
 def test_add_category(self):
     bc = BayesCategories()
     bc.add_category('foo')
     self.assertIn('foo', bc.categories)
     self.assertIsInstance(bc.categories['foo'], BayesCategory)
Exemple #8
0
 def flush(self):
     """
     Deletes all tokens & categories
     """
     self.categories = BayesCategories()