def test_cmp(self): a = BagOfWords('car', 'chair', 'chicken') b = BagOfWords('car', 'chair', 'chicken') self.assertEqual(a == b, True) # a.add('car') self.assertEqual(a == b, False)
def test_join_sub(self): a = BagOfWords('car', 'chair', 'chicken') b = BagOfWords({'chicken': 2}, ['eye', 'ugly']) c = BagOfWords('plane') self.assertCountEqual(dict(a - b - c), {'car': 1, 'chair': 1}) self.assertCountEqual(dict(c - b - a), {'plane': 1}) self.assertCountEqual(dict(b - c - a), { 'chicken': 1, 'eye': 1, 'ugly': 1 }) # total = b - c - a total = 'eye' - total self.assertCountEqual(dict(total), {'chicken': 1, 'ugly': 1}) # total = b - c - a total = 'eye' - total total = total - 'eye' self.assertCountEqual(dict(total), {'chicken': 1, 'ugly': 1}) # total = b - c - a total = total - ['chicken', 'ugly'] self.assertCountEqual(dict(total), {'eye': 1})
preprocessor=dummy, max_features=2000) preprocessor_setup = { "pre__use_standartize": [True, False], "pre__use_slang": [True, False], "pre__use_stopword": [True, False], "pre__use_lemmatization": [True, False], "pre__use_stemmer": [True, False], "pre__use_lowercase": [True, False], "pre__use_punctation": [True, False] } # Create the pipelines rnn_pipeline = Pipeline([("pre", preprocessor), ("bow", BagOfWords(max_features=2000)), ("rnn", RnnClassifier())], memory='cache') bayes_pipeline = Pipeline([("pre", preprocessor), ("vectorizer", vectorizer), ("bayes", MultinomialNB())], memory='cache') forest_pipeline = Pipeline([("pre", preprocessor), ("vectorizer", vectorizer), ("forest", RandomForestClassifier())], memory='cache') # Load the datasets datasets = [ SingleFile('data/Youtube01-Psy.csv'), SplittedFile('data/Youtube01-Psy.csv', 'data/Youtube02-KatyPerry.csv'),
def test_join_add(self): a = BagOfWords('car', 'chair', 'chicken') b = BagOfWords({'chicken': 2}, ['eye', 'ugly']) c = BagOfWords('plane') self.assertCountEqual(dict(a + b + c), { 'car': 1, 'chair': 1, 'eye': 1, 'chicken': 3, 'plane': 1, 'ugly': 1 }) self.assertCountEqual(dict(c + b + a), { 'car': 1, 'chair': 1, 'eye': 1, 'chicken': 3, 'plane': 1, 'ugly': 1 }) self.assertCountEqual(dict(b + c + a), { 'car': 1, 'chair': 1, 'eye': 1, 'chicken': 3, 'plane': 1, 'ugly': 1 }) # total = a + b + c total = 'ugly' + total self.assertCountEqual(dict(total), { 'car': 1, 'chair': 1, 'eye': 1, 'chicken': 3, 'plane': 1, 'ugly': 2 }) # total = a + b + c total = 'ugly' + total total = total + 'plane' self.assertCountEqual(dict(total), { 'car': 1, 'chair': 1, 'eye': 1, 'chicken': 3, 'plane': 2, 'ugly': 2 }) # total = a + b + c total = total + ['car', 'chair', 'chicken' ] + ['chicken', 'chicken', 'eye'] self.assertCountEqual(dict(total), { 'car': 2, 'chair': 2, 'eye': 2, 'chicken': 6, 'plane': 1, 'ugly': 1 })
def test_copy(self): a = BagOfWords('car', 'chair', 'chicken') b = a.copy() self.assertEqual(a == b, True)
def setUp(self): self.bow = BagOfWords()
class BagOfWordsTest(TestCase): def __init__(self, *args, **kwargs): super(BagOfWordsTest, self).__init__(*args, **kwargs) def setUp(self): self.bow = BagOfWords() def test_add_one_word(self): self.bow.add('David') self.bow.add({'David': 2}) self.assertCountEqual(self.bow.words(), ['David']) self.assertEqual(len(self.bow), 1) self.assertEqual(self.bow.num(), 3) self.assertEqual(self.bow.freq('David'), 3) self.assertCountEqual(dict(self.bow), {'David': 3}) def test_add_two_words(self): self.bow.add('David', ['David', 'Álex']) self.assertCountEqual(self.bow.words(), ['Álex', 'David']) self.assertEqual(len(self.bow), 2) self.assertEqual(self.bow.num(), 3) self.assertEqual(self.bow.freq('David'), 2) self.assertCountEqual(dict(self.bow), {'Álex': 1, 'David': 2}) def test_del_one_word(self): self.bow.delete('David') self.assertCountEqual(dict(self.bow), {}) # self.bow.add('David') self.bow.delete('David') self.assertCountEqual(dict(self.bow), {}) # self.bow.add('David', 'David') self.bow.delete('David') self.assertCountEqual(self.bow.words(), ['David']) self.assertEqual(len(self.bow), 1) self.assertEqual(self.bow.num(), 1) self.assertEqual(self.bow.freq('David'), 1) self.assertCountEqual(dict(self.bow), {'David': 1}) def test_del_two_word(self): self.bow.delete('David', 'Álex') self.assertCountEqual(dict(self.bow), {}) # self.bow.add('David', 'Álex') self.bow.delete('David', 'Álex') self.assertCountEqual(dict(self.bow), {}) # self.bow.add({'David': 2}) self.bow.delete('David') self.bow.add('Álex') self.assertCountEqual(self.bow.words(), ['Álex', 'David']) self.assertEqual(len(self.bow), 2) self.assertEqual(self.bow.num(), 2) self.assertEqual(self.bow.freq('David'), 1) self.assertCountEqual(dict(self.bow), {'Álex': 1, 'David': 1}) def test_join_add(self): a = BagOfWords('car', 'chair', 'chicken') b = BagOfWords({'chicken': 2}, ['eye', 'ugly']) c = BagOfWords('plane') self.assertCountEqual(dict(a + b + c), { 'car': 1, 'chair': 1, 'eye': 1, 'chicken': 3, 'plane': 1, 'ugly': 1 }) self.assertCountEqual(dict(c + b + a), { 'car': 1, 'chair': 1, 'eye': 1, 'chicken': 3, 'plane': 1, 'ugly': 1 }) self.assertCountEqual(dict(b + c + a), { 'car': 1, 'chair': 1, 'eye': 1, 'chicken': 3, 'plane': 1, 'ugly': 1 }) # total = a + b + c total = 'ugly' + total self.assertCountEqual(dict(total), { 'car': 1, 'chair': 1, 'eye': 1, 'chicken': 3, 'plane': 1, 'ugly': 2 }) # total = a + b + c total = 'ugly' + total total = total + 'plane' self.assertCountEqual(dict(total), { 'car': 1, 'chair': 1, 'eye': 1, 'chicken': 3, 'plane': 2, 'ugly': 2 }) # total = a + b + c total = total + ['car', 'chair', 'chicken' ] + ['chicken', 'chicken', 'eye'] self.assertCountEqual(dict(total), { 'car': 2, 'chair': 2, 'eye': 2, 'chicken': 6, 'plane': 1, 'ugly': 1 }) def test_join_sub(self): a = BagOfWords('car', 'chair', 'chicken') b = BagOfWords({'chicken': 2}, ['eye', 'ugly']) c = BagOfWords('plane') self.assertCountEqual(dict(a - b - c), {'car': 1, 'chair': 1}) self.assertCountEqual(dict(c - b - a), {'plane': 1}) self.assertCountEqual(dict(b - c - a), { 'chicken': 1, 'eye': 1, 'ugly': 1 }) # total = b - c - a total = 'eye' - total self.assertCountEqual(dict(total), {'chicken': 1, 'ugly': 1}) # total = b - c - a total = 'eye' - total total = total - 'eye' self.assertCountEqual(dict(total), {'chicken': 1, 'ugly': 1}) # total = b - c - a total = total - ['chicken', 'ugly'] self.assertCountEqual(dict(total), {'eye': 1}) def test_clear(self): self.bow.add('item', 'item') self.bow.clear() self.assertEqual(len(self.bow), 0) self.assertEqual(self.bow.num(), 0) self.assertEqual(self.bow.freq('item'), 0) self.assertCountEqual(dict(self.bow), {}) def test_item(self): self.bow.add('item1', 'item2', 'item2', 'item3') self.assertEqual(self.bow['item2'], 2) self.assertEqual(self.bow['item3'], 1) self.assertEqual(self.bow['item1'], 1) def test_copy(self): a = BagOfWords('car', 'chair', 'chicken') b = a.copy() self.assertEqual(a == b, True) def test_del(self): self.bow.add(['car', 'chair', 'chicken']) del self.bow['car'] self.assertCountEqual(dict(self.bow), {'chair': 1, 'chicken': 1}) def test_cmp(self): a = BagOfWords('car', 'chair', 'chicken') b = BagOfWords('car', 'chair', 'chicken') self.assertEqual(a == b, True) # a.add('car') self.assertEqual(a == b, False) def test_has_key(self): self.bow.add('car', 'chair', 'chicken') self.assertEqual('car' in self.bow, True) self.assertEqual('car' in self.bow, True) def test_rate(self): self.bow.add(['b', 'a', 'a', 'a']) self.assertCountEqual(self.bow.rates, {'a': 0.75, 'b': 0.25}) self.assertCountEqual(self.bow.sorted_rates, [('a', 0.75), ('b', 0.25)]) self.assertEqual(self.bow.rate('a'), 0.75) self.assertEqual(self.bow.rate('b'), 0.25) self.assertEqual(self.bow.rate('c'), 0) # self.bow.clear() self.assertEqual(self.bow.rate('a'), 0)