def __init__(self, tokenizer=None, cache_path='/tmp/'): """ :param tokenizer: A tokenizer override :type tokenizer: function (optional) :param cache_path: path to data storage :type cache_path: str """ self.categories = BayesCategories() self.tokenizer = tokenizer or SimpleBayes.tokenize_text self.cache_path = cache_path self.probabilities = {}
def test_persist_cache(self, dump_mock, open_mock): open_mock.return_value = 'opened' categories = BayesCategories() categories.categories = {'foo': 'bar'} sb = SimpleBayes() sb.cache_path = '/tmp/' sb.categories = categories sb.cache_persist() open_mock.assert_called_once_with('/tmp/_simplebayes.pickle', 'wb') dump_mock.assert_called_once_with(categories, 'opened')
def test_cache_train(self, exists_mock, load_mock, open_mock, calc_mock): categories = BayesCategories() categories.categories = {'foo': 'bar'} load_mock.return_value = categories open_mock.return_value = 'opened' exists_mock.return_value = True sb = SimpleBayes(cache_path='foo') sb.cache_train() exists_mock.assert_called_once_with('foo/_simplebayes.pickle') open_mock.assert_called_once_with('foo/_simplebayes.pickle', 'rb') load_mock.assert_called_once_with('opened') calc_mock.assert_called_once_with() self.assertEqual(sb.categories, categories)
def __init__(self, tokenizer=None, cache_path='/tmp/', cache_data=None, cache_in_memory=False): """ :param tokenizer: A tokenizer override :type tokenizer: function (optional) :param cache_path: path to data storage :type cache_path: str :param cache_data: from an existing cache :type cache_data: pickle.dumps object :param cache_in_memory: True if the persistant cache is kept in memory :type: boolean """ self.categories = BayesCategories() self.tokenizer = tokenizer or SimpleBayes.tokenize_text self.cache_path = cache_path self.cache_data = cache_data self.cache_in_memory = True if cache_data is not None else cache_in_memory self.probabilities = {}
def test_get_categories(self): bc = BayesCategories() bc.add_category('foo') self.assertEqual(bc.get_categories(), bc.categories)
def test_get_category(self): bc = BayesCategories() bc.add_category('foo') self.assertIsInstance(bc.get_category('foo'), BayesCategory)
def test_add_category(self): bc = BayesCategories() bc.add_category('foo') self.assertIn('foo', bc.categories) self.assertIsInstance(bc.categories['foo'], BayesCategory)
def flush(self): """ Deletes all tokens & categories """ self.categories = BayesCategories()