def main(): parser = argparse.ArgumentParser( description='Self-Descriptive Sentence Generator', formatter_class=argparse.ArgumentDefaultsHelpFormatter, version=__version__ ) parser.add_argument('text', nargs='?', default='', help='a text will be included in the sentence') parser.add_argument('-l', '--language', default='chinese', choices=('chinese', 'number'), help='choose a language to generate sentence') parser.add_argument('-a', '--attempts', type=int, default=SentenceGenerator.MAX_ATTEMPTS, help='the maximum number of attempts') parser.add_argument('-i', '--iterations', type=int, default=SentenceGenerator.MAX_ITERATIONS, help='the maximum number of iterations in each attempt') parser.add_argument('-V', '--no-verbose', action='store_true', help='disable debug messages') group = parser.add_mutually_exclusive_group() group.add_argument('-0', '--down-to-zero', action='store_true', help='prefer zero when performing a decrease operation on a count=2 character') group.add_argument('-1', '--down-to-one', action='store_true', help='prefer one when performing a decrease operation on a count=2 character') group.add_argument('-s', '--seed', type=int, help='use a seeded Random object to randomly choose between zero and one' ' when performing a decrease operation on a count=2 character') unicode_args = map(lambda s: unicode(s, sys.getfilesystemencoding()), sys.argv) args = parser.parse_args(unicode_args[1:]) if args.down_to_zero: down_to_zero = True elif args.down_to_one: down_to_zero = False else: down_to_zero = args.seed if args.language == 'chinese': language = chinese.Chinese() elif args.language == 'number': language = number.Number() else: raise ValueError('unknown language {}'.format(args.language)) counts = SentenceGenerator.generate(language, user_text=args.text, attempts=args.attempts, iterations=args.iterations, down_to_zero=down_to_zero, verbose=not args.no_verbose) if counts: print(language.compose_sentence(counts.total, counts, user_text=args.text)) if not SentenceGenerator.verify(language, args.text, counts): raise Exception('Oops, something is wrong, we got an incorrect sentence!') else: print('Generation failed.')
def main(): with open(CONFIG_FN) as f: conf = json.load(f) global sent_groups with open(conf["sent_groups"]) as f: sent_groups = json.load(f)["groups"] kb = load_kb(conf["kb"], 'name') sys_vocab, sys_word2idx = load_sys_vocab(conf["sys_vocab"]) sys_codec = Codec(sys_vocab, sys_word2idx) onto, onto_idx = load_ontology(conf["ontology"]) word2idx, embed = load_embed(**conf) usr_codec = Codec([], word2idx) trk_model, slot_len_sum = load_tracker_model(onto, embed, conf, kb) trk_model.eval() hidden = trk_model.state_tracker.init_hidden() kb_vec = Variable(torch.zeros(1, conf["kb_indicator_len"])) sentence_generator = SentenceGenerator(kb, onto, sent_groups) for line in iter(sys.stdin.readline, ''): inp = usr_codec.encode(line.strip()) inp = Variable(torch.LongTensor([ inp, ])) sentvecs, states_reps, states_preds, hidden, sent_grp_preds = trk_model( inp, None, hidden) criteria = to_search_criteria(states_preds, onto) ret, kb_vec = get_kb_result(kb, criteria, conf["kb_indicator_len"]) # print criteria, kb_vec sentvecs = sentvecs.view(1, -1) states_reps = states_reps.view(1, -1) print_ret(states_preds, sent_grp_preds, onto, sentence_generator)
def main(): parser = argparse.ArgumentParser( description='Self-Descriptive Sentence Generator', formatter_class=argparse.ArgumentDefaultsHelpFormatter, version=__version__) parser.add_argument('text', nargs='?', default='', help='a text will be included in the sentence') parser.add_argument('-l', '--language', default='chinese', choices=('chinese', 'number'), help='choose a language to generate sentence') parser.add_argument('-a', '--attempts', type=int, default=SentenceGenerator.MAX_ATTEMPTS, help='the maximum number of attempts') parser.add_argument( '-i', '--iterations', type=int, default=SentenceGenerator.MAX_ITERATIONS, help='the maximum number of iterations in each attempt') parser.add_argument('-V', '--no-verbose', action='store_true', help='disable debug messages') group = parser.add_mutually_exclusive_group() group.add_argument( '-0', '--down-to-zero', action='store_true', help= 'prefer zero when performing a decrease operation on a count=2 character' ) group.add_argument( '-1', '--down-to-one', action='store_true', help= 'prefer one when performing a decrease operation on a count=2 character' ) group.add_argument( '-s', '--seed', type=int, help='use a seeded Random object to randomly choose between zero and one' ' when performing a decrease operation on a count=2 character') unicode_args = map(lambda s: unicode(s, sys.getfilesystemencoding()), sys.argv) args = parser.parse_args(unicode_args[1:]) if args.down_to_zero: down_to_zero = True elif args.down_to_one: down_to_zero = False else: down_to_zero = args.seed if args.language == 'chinese': language = chinese.Chinese() elif args.language == 'number': language = number.Number() else: raise ValueError('unknown language {}'.format(args.language)) counts = SentenceGenerator.generate(language, user_text=args.text, attempts=args.attempts, iterations=args.iterations, down_to_zero=down_to_zero, verbose=not args.no_verbose) if counts: print( language.compose_sentence(counts.total, counts, user_text=args.text)) if not SentenceGenerator.verify(language, args.text, counts): raise Exception( 'Oops, something is wrong, we got an incorrect sentence!') else: print('Generation failed.')
def test_get_sentence(self): # arrange gen = SentenceGenerator() # act, assert self.assertEqual('es ist zwölf uhr'.split(' '), gen.get_sentence(time(12, 0))) self.assertEqual('es ist zwölf uhr'.split(' '), gen.get_sentence(time(0, 0))) self.assertEqual('es ist sechs uhr'.split(' '), gen.get_sentence(time(6, 0))) self.assertEqual('es ist sechs uhr'.split(' '), gen.get_sentence(time(18, 0))) self.assertEqual('es ist fünf nach eins'.split(' '), gen.get_sentence(time(1, 5))) self.assertEqual('es ist zehn nach zwei'.split(' '), gen.get_sentence(time(2, 11))) self.assertEqual('es ist viertel nach drei'.split(' '), gen.get_sentence(time(3, 14))) self.assertEqual('es ist zwanzig nach vier'.split(' '), gen.get_sentence(time(4, 22))) self.assertEqual('es ist fünf vor halb fünf2'.split(' '), gen.get_sentence(time(4, 25))) self.assertEqual('es ist halb sechs'.split(' '), gen.get_sentence(time(5, 30))) self.assertEqual('es ist fünf nach halb sieben'.split(' '), gen.get_sentence(time(6, 34))) self.assertEqual('es ist zwanzig vor acht'.split(' '), gen.get_sentence(time(7, 42))) self.assertEqual('es ist viertel vor neun'.split(' '), gen.get_sentence(time(20, 45))) self.assertEqual('es ist zehn vor zehn2'.split(' '), gen.get_sentence(time(21, 49))) self.assertEqual('es ist fünf vor elf'.split(' '), gen.get_sentence(time(10, 55))) self.assertEqual('es ist fünf vor eins'.split(' '), gen.get_sentence(time(12, 55)))
embed_model = SentenceTransformer('bert-base-nli-mean-tokens') embedding_fn = lambda s: embed_model.encode( [s.replace("@@ ", "").replace("@@", "")])[0] # Load or create the model. model_save_path = "drive/My Drive/sentence_generator-{}.pickle".format( model_id) if os.path.isfile(model_save_path): print("Loading sentence generator.") sentence_generator = pickle.load(open(model_save_path, "rb")) # Flattening is different depending on the decoder used in the sentence generator. # sentence_generator._decoder.decoder.lstm.flatten_parameters() # VanillaRNNDecoder sentence_generator._decoder.decoder.rnn.flatten_parameters() # DecoderRNN print("Loaded sentence generator.") else: sentence_generator = SentenceGenerator(embedding_fn, id=model_id) if train: # Note: pickled vocab is only used if a vocab does not already exist (i.e. # when training a model from scratch). if not use_pickled_vocab: pickled_vocab = "" # Note: the training data used is either pickled_shards, pickled_pairs, # or all_sentences (in that priority). if not use_pickled_shards: pickled_shards = [] if not use_pickled_pairs: pickled_pairs = "" sentence_generator.train_generator(all_sentences, num_train_iters, pickled_pairs=pickled_pairs,
def test_generated_sentence(self): words_list = [['eat'], ['code', 'commit'], ['sleep']] sentences = SentenceGenerator.generate_sentences(words_list) self.assertEqual(sentences, ['eat code sleep', 'eat commit sleep'], "Should return all generated sentences")
def test_empty_words_list(self): words_list = [] sentences = SentenceGenerator.generate_sentences(words_list) self.assertEqual(sentences, [], "Should return all generated sentences")
def setUp(self): self.gen = SentenceGenerator()
class TestSentenceGenerator(unittest.TestCase): """ Tests external functionality of the SentenceGenerator class. """ def setUp(self): self.gen = SentenceGenerator() def test_train_model_single_words(self): """ Test single word mappings so that each word has only one possible word that proceeds it. """ self.gen.train_model("The brown fox.") self.assertEquals(self.gen.model, {"The" : ["brown"], "brown" : ["fox."]}) def test_train_model_multi_words(self): """ Test multi word mappings such that each word has two possible words that proceed it. """ self.gen.train_model("The brown brown fox.") self.assertEquals(self.gen.model, {"The" : ["brown"], "brown" : ["brown", "fox."]}) def test_train_model_no_end_word(self): """ Test if the model has no end punctuation, ".", "?", "!", or ":" that it successfully appends a "." to the end. """ self.gen.train_model("The brown fox") self.assertEquals(self.gen.model, {"The" : ["brown"], "brown" : ["fox."]}) def test_train_model_empty_input(self): """ Test that the empty input does not modify the model. """ self.gen.train_model("") self.assertEquals(self.gen.model, {}) def test_generate_sentence_invalid_key(self): """ Test that a ValueError is thrown if the key is not present in the model. """ self.gen.train_model("The brown fox.") self.assertRaises(ValueError, lambda: self.gen.generate_sentences(1, "wolf")) def test_generate_sentence_initial_word(self): """ Test that the initial word is being applied if the it is valid and is specified. """ self.gen.train_model("The brown fox jumped over the lazy fat dog and the big log.") generated_sentences = self.gen.generate_sentences(100, "The") # Generate many sentences so that the test does not succeed by chance. for sentence in generated_sentences: self.assertEquals(sentence.split(" ", 1)[0], "The") def test_length_generated_sentences(self): """ Test that the number of generated sentences specified is the number of sentences returned. """ self.gen.train_model("The brown fox jumped over the lazy fat dog and the big log.") self.assertEquals(len(self.gen.generate_sentences(100)), 100) def test_json_representation(self): """ Test that the correct json representation is being returned from the model. """ self.gen.train_model("The brown fox.") expected_structure = {"The" : ["brown"], "brown" : ["fox."]} self.assertEquals(self.gen.get_json_rep(), json.dumps(expected_structure))