def create_stop_word_remover(self, isDev=False): if isDev: stopWords = self.get_stop_words() dictionary = ArrayDictionary(stopWords) else: dictionary = self.get_prod_stop_word_dictionary() stopWordRemover = StopWordRemover(dictionary) return stopWordRemover
def create_stemmer(self, isDev=False): """ Returns Stemmer instance """ words = self.get_words(isDev) dictionary = ArrayDictionary(words) stemmer = Stemmer(dictionary) resultCache = ArrayCache() cachedStemmer = CachedStemmer(resultCache, stemmer) return cachedStemmer
def create_stemmer(self, isDev=False): """ Returns Stemmer instance """ if isDev: words = self.get_words_from_file() dictionary = ArrayDictionary(words) else: dictionary = self.get_prod_words_dictionary() stemmer = Stemmer(dictionary) return stemmer
def setUp(self): self.dictionary = ArrayDictionary(['di', 'ke']) self.stopWordRemover = StopWordRemover(self.dictionary) return super(Test_StopWordRemoverTest, self).setUp()
def setUp(self): self.dictionary = ArrayDictionary([ 'hancur', 'benar', 'apa', 'siapa', 'jubah', 'baju', 'beli', 'celana', 'hantu', 'jual', 'buku', 'milik', 'kulit', 'sakit', 'kasih', 'buang', 'suap', 'nilai', 'beri', 'rambut', 'adu', 'suara', 'daerah', 'ajar', 'kerja', 'ternak', 'asing', 'raup', 'gerak', 'puruk', 'terbang', 'lipat', 'ringkas', 'warna', 'yakin', 'bangun', 'fitnah', 'vonis', 'baru', 'ajar', 'tangkap', 'kupas', 'minum', 'pukul', 'cinta', 'dua', 'jauh', 'ziarah', 'nuklir', 'gila', 'hajar', 'qasar', 'udara', 'populer', 'warna', 'yoga', 'adil', 'rumah', 'muka', 'labuh', 'tarung', 'tebar', 'indah', 'daya', 'untung', 'sepuluh', 'ekonomi', 'makmur', 'telah', 'serta', 'percaya', 'pengaruh', 'kritik', 'seko', 'sekolah', 'tahan', 'capa', 'capai', 'mula', 'mulai', 'petan', 'tani', 'aba', 'abai', 'balas', 'balik', 'peran', 'medan', 'syukur', 'syarat', 'bom', 'promosi', 'proteksi', 'prediksi', 'kaji', 'sembunyi', 'langgan', 'laku', 'baik', 'terang', 'iman', 'bisik', 'taat', 'puas', 'makan', 'nyala', 'nyanyi', 'nyata', 'nyawa', 'rata', 'lembut', 'ligas', 'budaya', 'karya', 'ideal', 'final', 'taat', 'tiru', 'sepak', 'kuasa', 'malaikat', 'nikmat', # sastrawi additional rules 'lewat', 'nganga', 'allah', ]) self.stemmer = Stemmer(self.dictionary) return super(Test_StemmerTest, self).setUp()
def create_stop_word_remover(self): stopWords = self.get_stop_words() dictionary = ArrayDictionary(stopWords) stopWordRemover = StopWordRemover(dictionary) return stopWordRemover
def get_prod_words_dictionary(self): words = self.get_words_from_file() dictionary = ArrayDictionary(words) return dictionary
def setUp(self): self.dictionary = ArrayDictionary() return super(Test_ArrayDictionaryTest, self).setUp()
class Test_ArrayDictionaryTest(unittest.TestCase): def setUp(self): self.dictionary = ArrayDictionary() return super(Test_ArrayDictionaryTest, self).setUp() def test_add_and_contain(self): self.assertFalse(self.dictionary.contains('word')) self.dictionary.add('word') self.assertTrue(self.dictionary.contains('word')) def test_add_count_word(self): self.assertEquals(0, self.dictionary.count()) self.dictionary.add('word') self.assertEquals(1, self.dictionary.count()) def test_add_word_ignore_empty_string(self): self.assertEquals(0, self.dictionary.count()) self.dictionary.add('') self.assertEquals(0, self.dictionary.count()) def test_add_words(self): words = ['word1', 'word2'] self.dictionary.add_words(words) self.assertEquals(2, self.dictionary.count()) self.assertTrue(self.dictionary.contains('word1')) self.assertTrue(self.dictionary.contains('word2')) def test_constructor_preserve_words(self): words = ['word1', 'word2'] dictionary = ArrayDictionary(words) self.assertEquals(2, dictionary.count()) self.assertTrue(dictionary.contains('word1')) self.assertTrue(dictionary.contains('word2'))
def test_constructor_preserve_words(self): words = ['word1', 'word2'] dictionary = ArrayDictionary(words) self.assertEquals(2, dictionary.count()) self.assertTrue(dictionary.contains('word1')) self.assertTrue(dictionary.contains('word2'))
def test_non_dict_list(self): dictionary = ArrayDictionary('$$%&**&(^&') self.assertEqual(0, dictionary.count())
def test_dict_param(self): dictionary = ArrayDictionary({'word1':'word1', 'word2':'word2'}) self.assertTrue(dictionary.contains('word1')) self.assertTrue(dictionary.contains('word2')) self.assertFalse(dictionary.contains('word3')) self.assertEqual(2, dictionary.count()) dictionary.add('word3') dictionary.add(' ') self.assertTrue(dictionary.contains('word3')) self.assertEqual(3, dictionary.count())
class Test_ArrayDictionaryTest(unittest.TestCase): def setUp(self): self.dictionary = ArrayDictionary() return super(Test_ArrayDictionaryTest, self).setUp() def test_add_and_contain(self): self.assertFalse(self.dictionary.contains('word')) self.dictionary.add('word') self.assertTrue(self.dictionary.contains('word')) def test_add_count_word(self): self.assertEquals(0, self.dictionary.count()) self.dictionary.add('word') self.assertEquals(1, self.dictionary.count()) def test_add_word_ignore_empty_string(self): self.assertEquals(0, self.dictionary.count()) self.dictionary.add('') self.assertEquals(0, self.dictionary.count()) def test_add_words(self): words = ['word1', 'word2'] self.dictionary.add_words(words) self.assertEquals(2, self.dictionary.count()) self.assertTrue(self.dictionary.contains('word1')) self.assertTrue(self.dictionary.contains('word2')) def test_constructor_preserve_words(self): words = ['word1', 'word2'] dictionary = ArrayDictionary(words) self.assertEquals(2, dictionary.count()) self.assertTrue(dictionary.contains('word1')) self.assertTrue(dictionary.contains('word2')) # Test ArrayDictionary dengan tipe data dict # @author Mufid Jamaluddin def test_dict_param(self): dictionary = ArrayDictionary({'word1':'word1', 'word2':'word2'}) self.assertTrue(dictionary.contains('word1')) self.assertTrue(dictionary.contains('word2')) self.assertFalse(dictionary.contains('word3')) self.assertEqual(2, dictionary.count()) dictionary.add('word3') dictionary.add(' ') self.assertTrue(dictionary.contains('word3')) self.assertEqual(3, dictionary.count()) def test_non_dict_list(self): dictionary = ArrayDictionary('$$%&**&(^&') self.assertEqual(0, dictionary.count())
def get_prod_stop_word_dictionary(self): stopWords = self.get_stop_words() return ArrayDictionary(stopWords)
def create_custom_stemmer(self, isDev=False, words): dictionary = ArrayDictionary(words)