Python SentenceGeneratorの例、sentence_generator.SentenceGenerator Pythonの例

コード例 #1

0

ファイルを表示

ファイル: self_descriptive_sentence.py プロジェクト: calfzhou/self-descriptive-sentence

def main():
    parser = argparse.ArgumentParser(
        description='Self-Descriptive Sentence Generator',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        version=__version__
    )
    parser.add_argument('text', nargs='?', default='', help='a text will be included in the sentence')
    parser.add_argument('-l', '--language', default='chinese', choices=('chinese', 'number'),
                        help='choose a language to generate sentence')

    parser.add_argument('-a', '--attempts', type=int, default=SentenceGenerator.MAX_ATTEMPTS,
                        help='the maximum number of attempts')
    parser.add_argument('-i', '--iterations', type=int, default=SentenceGenerator.MAX_ITERATIONS,
                        help='the maximum number of iterations in each attempt')
    parser.add_argument('-V', '--no-verbose', action='store_true',
                        help='disable debug messages')

    group = parser.add_mutually_exclusive_group()
    group.add_argument('-0', '--down-to-zero', action='store_true',
                       help='prefer zero when performing a decrease operation on a count=2 character')
    group.add_argument('-1', '--down-to-one', action='store_true',
                       help='prefer one when performing a decrease operation on a count=2 character')
    group.add_argument('-s', '--seed', type=int,
                       help='use a seeded Random object to randomly choose between zero and one'
                            ' when performing a decrease operation on a count=2 character')

    unicode_args = map(lambda s: unicode(s, sys.getfilesystemencoding()), sys.argv)
    args = parser.parse_args(unicode_args[1:])

    if args.down_to_zero:
        down_to_zero = True
    elif args.down_to_one:
        down_to_zero = False
    else:
        down_to_zero = args.seed

    if args.language == 'chinese':
        language = chinese.Chinese()
    elif args.language == 'number':
        language = number.Number()
    else:
        raise ValueError('unknown language {}'.format(args.language))

    counts = SentenceGenerator.generate(language, user_text=args.text,
                                        attempts=args.attempts, iterations=args.iterations,
                                        down_to_zero=down_to_zero, verbose=not args.no_verbose)
    if counts:
        print(language.compose_sentence(counts.total, counts, user_text=args.text))
        if not SentenceGenerator.verify(language, args.text, counts):
            raise Exception('Oops, something is wrong, we got an incorrect sentence!')
    else:
        print('Generation failed.')

コード例 #2

0

ファイルを表示

def main():
    with open(CONFIG_FN) as f:
        conf = json.load(f)

    global sent_groups

    with open(conf["sent_groups"]) as f:
        sent_groups = json.load(f)["groups"]

    kb = load_kb(conf["kb"], 'name')
    sys_vocab, sys_word2idx = load_sys_vocab(conf["sys_vocab"])

    sys_codec = Codec(sys_vocab, sys_word2idx)

    onto, onto_idx = load_ontology(conf["ontology"])

    word2idx, embed = load_embed(**conf)

    usr_codec = Codec([], word2idx)

    trk_model, slot_len_sum = load_tracker_model(onto, embed, conf, kb)

    trk_model.eval()

    hidden = trk_model.state_tracker.init_hidden()
    kb_vec = Variable(torch.zeros(1, conf["kb_indicator_len"]))

    sentence_generator = SentenceGenerator(kb, onto, sent_groups)

    for line in iter(sys.stdin.readline, ''):
        inp = usr_codec.encode(line.strip())

        inp = Variable(torch.LongTensor([
            inp,
        ]))

        sentvecs, states_reps, states_preds, hidden, sent_grp_preds = trk_model(
            inp, None, hidden)

        criteria = to_search_criteria(states_preds, onto)
        ret, kb_vec = get_kb_result(kb, criteria, conf["kb_indicator_len"])

        # print criteria, kb_vec

        sentvecs = sentvecs.view(1, -1)
        states_reps = states_reps.view(1, -1)

        print_ret(states_preds, sent_grp_preds, onto, sentence_generator)

コード例 #3

0

ファイルを表示

def main():
    parser = argparse.ArgumentParser(
        description='Self-Descriptive Sentence Generator',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        version=__version__)
    parser.add_argument('text',
                        nargs='?',
                        default='',
                        help='a text will be included in the sentence')
    parser.add_argument('-l',
                        '--language',
                        default='chinese',
                        choices=('chinese', 'number'),
                        help='choose a language to generate sentence')

    parser.add_argument('-a',
                        '--attempts',
                        type=int,
                        default=SentenceGenerator.MAX_ATTEMPTS,
                        help='the maximum number of attempts')
    parser.add_argument(
        '-i',
        '--iterations',
        type=int,
        default=SentenceGenerator.MAX_ITERATIONS,
        help='the maximum number of iterations in each attempt')
    parser.add_argument('-V',
                        '--no-verbose',
                        action='store_true',
                        help='disable debug messages')

    group = parser.add_mutually_exclusive_group()
    group.add_argument(
        '-0',
        '--down-to-zero',
        action='store_true',
        help=
        'prefer zero when performing a decrease operation on a count=2 character'
    )
    group.add_argument(
        '-1',
        '--down-to-one',
        action='store_true',
        help=
        'prefer one when performing a decrease operation on a count=2 character'
    )
    group.add_argument(
        '-s',
        '--seed',
        type=int,
        help='use a seeded Random object to randomly choose between zero and one'
        ' when performing a decrease operation on a count=2 character')

    unicode_args = map(lambda s: unicode(s, sys.getfilesystemencoding()),
                       sys.argv)
    args = parser.parse_args(unicode_args[1:])

    if args.down_to_zero:
        down_to_zero = True
    elif args.down_to_one:
        down_to_zero = False
    else:
        down_to_zero = args.seed

    if args.language == 'chinese':
        language = chinese.Chinese()
    elif args.language == 'number':
        language = number.Number()
    else:
        raise ValueError('unknown language {}'.format(args.language))

    counts = SentenceGenerator.generate(language,
                                        user_text=args.text,
                                        attempts=args.attempts,
                                        iterations=args.iterations,
                                        down_to_zero=down_to_zero,
                                        verbose=not args.no_verbose)
    if counts:
        print(
            language.compose_sentence(counts.total,
                                      counts,
                                      user_text=args.text))
        if not SentenceGenerator.verify(language, args.text, counts):
            raise Exception(
                'Oops, something is wrong, we got an incorrect sentence!')
    else:
        print('Generation failed.')

コード例 #4

0

ファイルを表示

ファイル: test_sentence_generator.py プロジェクト: olexs/wordclock

    def test_get_sentence(self):
        # arrange
        gen = SentenceGenerator()

        # act, assert
        self.assertEqual('es ist zwölf uhr'.split(' '),
                         gen.get_sentence(time(12, 0)))
        self.assertEqual('es ist zwölf uhr'.split(' '),
                         gen.get_sentence(time(0, 0)))
        self.assertEqual('es ist sechs uhr'.split(' '),
                         gen.get_sentence(time(6, 0)))
        self.assertEqual('es ist sechs uhr'.split(' '),
                         gen.get_sentence(time(18, 0)))
        self.assertEqual('es ist fünf nach eins'.split(' '),
                         gen.get_sentence(time(1, 5)))
        self.assertEqual('es ist zehn nach zwei'.split(' '),
                         gen.get_sentence(time(2, 11)))
        self.assertEqual('es ist viertel nach drei'.split(' '),
                         gen.get_sentence(time(3, 14)))
        self.assertEqual('es ist zwanzig nach vier'.split(' '),
                         gen.get_sentence(time(4, 22)))
        self.assertEqual('es ist fünf vor halb fünf2'.split(' '),
                         gen.get_sentence(time(4, 25)))
        self.assertEqual('es ist halb sechs'.split(' '),
                         gen.get_sentence(time(5, 30)))
        self.assertEqual('es ist fünf nach halb sieben'.split(' '),
                         gen.get_sentence(time(6, 34)))
        self.assertEqual('es ist zwanzig vor acht'.split(' '),
                         gen.get_sentence(time(7, 42)))
        self.assertEqual('es ist viertel vor neun'.split(' '),
                         gen.get_sentence(time(20, 45)))
        self.assertEqual('es ist zehn vor zehn2'.split(' '),
                         gen.get_sentence(time(21, 49)))
        self.assertEqual('es ist fünf vor elf'.split(' '),
                         gen.get_sentence(time(10, 55)))
        self.assertEqual('es ist fünf vor eins'.split(' '),
                         gen.get_sentence(time(12, 55)))

コード例 #5

0

ファイルを表示

embed_model = SentenceTransformer('bert-base-nli-mean-tokens')
embedding_fn = lambda s: embed_model.encode(
    [s.replace("@@ ", "").replace("@@", "")])[0]

# Load or create the model.
model_save_path = "drive/My Drive/sentence_generator-{}.pickle".format(
    model_id)
if os.path.isfile(model_save_path):
    print("Loading sentence generator.")
    sentence_generator = pickle.load(open(model_save_path, "rb"))
    # Flattening is different depending on the decoder used in the sentence generator.
    # sentence_generator._decoder.decoder.lstm.flatten_parameters() # VanillaRNNDecoder
    sentence_generator._decoder.decoder.rnn.flatten_parameters()  # DecoderRNN
    print("Loaded sentence generator.")
else:
    sentence_generator = SentenceGenerator(embedding_fn, id=model_id)

if train:
    # Note: pickled vocab is only used if a vocab does not already exist (i.e.
    # when training a model from scratch).
    if not use_pickled_vocab:
        pickled_vocab = ""
    # Note: the training data used is either pickled_shards, pickled_pairs,
    # or all_sentences (in that priority).
    if not use_pickled_shards:
        pickled_shards = []
    if not use_pickled_pairs:
        pickled_pairs = ""
    sentence_generator.train_generator(all_sentences,
                                       num_train_iters,
                                       pickled_pairs=pickled_pairs,

コード例 #6

0

ファイルを表示

ファイル: test.py プロジェクト: quar17esma/ak-phrase.py

 def test_generated_sentence(self):
     words_list = [['eat'], ['code', 'commit'], ['sleep']]
     sentences = SentenceGenerator.generate_sentences(words_list)
     self.assertEqual(sentences, ['eat code sleep', 'eat commit sleep'],
                      "Should return all generated sentences")

コード例 #7

0

ファイルを表示

ファイル: test.py プロジェクト: quar17esma/ak-phrase.py

 def test_empty_words_list(self):
     words_list = []
     sentences = SentenceGenerator.generate_sentences(words_list)
     self.assertEqual(sentences, [],
                      "Should return all generated sentences")

コード例 #8

0

ファイルを表示

ファイル: test_sentence_generator.py プロジェクト: gestone/TechGen

 def setUp(self):
     self.gen = SentenceGenerator()

コード例 #9

0

ファイルを表示

ファイル: test_sentence_generator.py プロジェクト: gestone/TechGen

class TestSentenceGenerator(unittest.TestCase):
    """
    Tests external functionality of the SentenceGenerator class.
    """

    def setUp(self):
        self.gen = SentenceGenerator()

    def test_train_model_single_words(self):
        """
        Test single word mappings so that each word
        has only one possible word that proceeds it.
        """
        self.gen.train_model("The brown fox.")
        self.assertEquals(self.gen.model, {"The" : ["brown"], "brown" : ["fox."]})

    def test_train_model_multi_words(self):
        """
        Test multi word mappings such that each word
        has two possible words that proceed it.
        """
        self.gen.train_model("The brown brown fox.")
        self.assertEquals(self.gen.model, {"The" : ["brown"], "brown" : ["brown", "fox."]})

    def test_train_model_no_end_word(self):
        """
        Test if the model has no end punctuation, ".", "?", "!", or ":"
        that it successfully appends a "." to the end.
        """
        self.gen.train_model("The brown fox")
        self.assertEquals(self.gen.model, {"The" : ["brown"], "brown" : ["fox."]})

    def test_train_model_empty_input(self):
        """
        Test that the empty input does not modify the model.
        """
        self.gen.train_model("")
        self.assertEquals(self.gen.model, {})

    def test_generate_sentence_invalid_key(self):
        """
        Test that a ValueError is thrown if the key
        is not present in the model.
        """
        self.gen.train_model("The brown fox.")
        self.assertRaises(ValueError, lambda: self.gen.generate_sentences(1, "wolf"))

    def test_generate_sentence_initial_word(self):
        """
        Test that the initial word is being applied if
        the it is valid and is specified.
        """
        self.gen.train_model("The brown fox jumped over the lazy fat dog and the big log.")

        generated_sentences = self.gen.generate_sentences(100, "The")

        # Generate many sentences so that the test does not succeed by chance.
        for sentence in generated_sentences:
            self.assertEquals(sentence.split(" ", 1)[0], "The")

    def test_length_generated_sentences(self):
        """
        Test that the number of generated sentences
        specified is the number of sentences returned.
        """
        self.gen.train_model("The brown fox jumped over the lazy fat dog and the big log.")
        self.assertEquals(len(self.gen.generate_sentences(100)), 100)

    def test_json_representation(self):
        """
        Test that the correct json representation is being
        returned from the model.
        """
        self.gen.train_model("The brown fox.")
        expected_structure = {"The" : ["brown"], "brown" : ["fox."]}
        self.assertEquals(self.gen.get_json_rep(), json.dumps(expected_structure))