def cat_dog_generation(): cat_dog_text = simple_generation(999) sp = StreamPredictor() sp.pop_manager.add_pop_string('cat') sp.pop_manager.add_pop_string('dog') sp.pop_manager.add_pop_string(' ate food. ') sp.pop_manager.add_pop_string('The ') for i in range(3): sp.train_characters(cat_dog_text) sp.pop_manager.save_pb_plain('PatternStore/cat_dog_' + str(i) + '.txt') generated = sp.generate_stream(100) print(generated)
def test_sequence_learn(self): sp = StreamPredictor() first_words_list = ['that', 'is', 'good'] sp.train(first_words_list) first_next_words, first_probabilities = sp.pop_manager.get('that').get_next_smallest_distribution() self.assertEqual(['is'], first_next_words) self.assertEqual([1.0], first_probabilities) second_words_list = ['that', 'isnt', 'good'] sp.train(second_words_list) second_next_words, second_probabilities = sp.pop_manager.get('that').get_next_smallest_distribution() self.assertEqual({'is', 'isnt'}, set(second_next_words)) self.assertListEqual([0.5, 0.5], list(second_probabilities))
def form_simple_tree(self): sp = StreamPredictor() abxe = Pop('abxe') abxe.strength = 600 abyd = Pop('abyd') abyd.strength = 400 ab = Pop('ab') xe = Pop('xe') yd = Pop('yd') a = Pop('a') b = Pop('b') d = Pop('d') e = Pop('e') x = Pop('x') y = Pop('y') sp.pop_manager.add_pop_to_vocabulary(abxe) sp.pop_manager.add_pop_to_vocabulary(ab) sp.pop_manager.add_pop_to_vocabulary(xe) sp.pop_manager.add_pop_to_vocabulary(yd) sp.pop_manager.add_pop_to_vocabulary(a) sp.pop_manager.add_pop_to_vocabulary(b) sp.pop_manager.add_pop_to_vocabulary(d) sp.pop_manager.add_pop_to_vocabulary(e) sp.pop_manager.add_pop_to_vocabulary(x) sp.pop_manager.add_pop_to_vocabulary(y) xe.set_components(x, e) yd.set_components(y, d) abxe.set_components(ab, xe) abyd.set_components(ab, yd) return sp, ab, abxe, abyd, xe, yd
def fruit_generalization(): print('hello') sp = StreamPredictor() sp.pop_manager.add_pop_string('apple') sp.pop_manager.add_pop_string('banana') text = data_fetcher.get_clean_text_from_file('data/Experimental/case.txt', 100000) sp.train_characters(text) sp.pop_manager.refactor() # sp.generalize() # sp.train(text) # sp.generalize() # sp.train(text) # sp.generalize() print([i.belongs_to_category.__repr__() for i in list(sp.pop_manager.patterns_collection.values()) if i.belongs_to_category is not None]) sp.pop_manager.save_pb_plain('PatternStore/fruit_experiment.txt') sp.pop_manager.load_pb_plain('PatternStore/fruit_experiment.txt')
def test_load(self): sample = self.get_sample() sample.file_manager.save_tsv(save_filename) self.assertTrue(os.path.isfile(save_filename)) empty_sample = StreamPredictor() self.assertFalse(len(empty_sample.pop_manager.pattern_collection) > 10) empty_sample.file_manager.load_tsv(save_filename) self.assertTrue(len(empty_sample.pop_manager.pattern_collection) > 10) os.remove(save_filename)
def test_save_load_equal(self): original = self.get_sample() original.pop_manager.pattern_collection['karma'] = Pop('karma') original_string = original.pop_manager.status() original.file_manager.save_tsv(save_filename) loaded = StreamPredictor() loaded.file_manager.load_tsv(save_filename) loaded_string = loaded.pop_manager.status() self.assertEqual(original_string, loaded_string) self.assertEqual(len(original.pop_manager.pattern_collection), len(loaded.pop_manager.pattern_collection))
def setup_simple_patterns(self): sp = StreamPredictor() abc = Pop('abc') ab = Pop('ab') bc = Pop('bc') a = Pop('a') b = Pop('b') c = Pop('c') sp.pop_manager.add_pop_to_vocabulary(abc) sp.pop_manager.add_pop_to_vocabulary(bc) sp.pop_manager.add_pop_to_vocabulary(ab) sp.pop_manager.add_pop_to_vocabulary(a) sp.pop_manager.add_pop_to_vocabulary(b) sp.pop_manager.add_pop_to_vocabulary(c) abc.set_components(a, bc) return a, ab, abc, sp, b, c, bc
def test_sequence_learn(self): sp = StreamPredictor() first_words_list = ['that', 'is', 'good'] sp.train(first_words_list) first_next_words, first_probabilities = sp.pop_manager.get( 'that').get_next_smallest_distribution() self.assertEqual(['is'], first_next_words) self.assertEqual([1.0], first_probabilities) second_words_list = ['that', 'isnt', 'good'] sp.train(second_words_list) second_next_words, second_probabilities = sp.pop_manager.get( 'that').get_next_smallest_distribution() self.assertEqual({'is', 'isnt'}, set(second_next_words)) self.assertListEqual([0.5, 0.5], list(second_probabilities))
import sys import matplotlib.pyplot as plt sys.path.insert(0, '../streampredictor/') sys.path.insert(0, '../') from streampredictor.stream_predictor import StreamPredictor from streampredictor import data_fetcher input_text_file = '../Data/ptb.test.txt' max_input_length = 10 ** 4 words = data_fetcher.get_clean_words_from_file(input_text_file, max_input_length) sp = StreamPredictor() test_length = -1000 sp.train(words[:test_length]) sp.file_manager.save_tsv('../PatternStore/ptb.tsv') perplexity_list = sp.calculate_perplexity(words=words[test_length:], verbose=True) plt.plot(perplexity_list) plt.xlabel('Time') plt.ylabel('Perplexity') plt.title('Perplexity during training') plt.show()
def cat_dog_train_category(): cat_dog_text = simple_generation(999) sp = StreamPredictor() sp.pop_manager.load_pb_plain('PatternStore/cat_dog_gen.txt') sp.train_characters(cat_dog_text) sp.pop_manager.save_pb_plain('PatternStore/cat_dog_gen2.txt')
def cat_dog_generalization(): sp = StreamPredictor() sp.pop_manager.load_pb_plain('PatternStore/cat_dog_0.txt') sp.pop_manager.generalize() sp.pop_manager.save_pb_plain('PatternStore/cat_dog_gen.txt')
def simple_generation(generation_length=100): sp = StreamPredictor() sp.pop_manager.load_pb_plain('PatternStore/fruit_experiment_synthetic.txt') out = sp.generate_stream(generation_length) return out
import sys sys.path.insert(0, '../streampredictor/') sys.path.insert(0, '../') from streampredictor.stream_predictor import StreamPredictor sp = StreamPredictor() sp.occasional_step = 100 input_text = 'hello how are you hello who are you' input_words = input_text.split(' ') * 1000 print(input_words[:20]) sp.train(input_words, verbose=True) print('\n\nThe generated words are ') print(sp.generate(20)) print('\nEnd of generated words\n') sp.file_manager.save_tsv('../PatternStore/simple_generate.tsv') print(sp.pop_manager.pattern_collection)
def get_sample(): sample = StreamPredictor() sample.train(training_words) return sample