Exemple #1
0
def main():
    with open(CONFIG_FN) as f:
        conf = json.load(f)

    global sent_groups

    with open(conf["sent_groups"]) as f:
        sent_groups = json.load(f)["groups"]

    kb = load_kb(conf["kb"], 'name')
    sys_vocab, sys_word2idx = load_sys_vocab(conf["sys_vocab"])

    sys_codec = Codec(sys_vocab, sys_word2idx)

    onto, onto_idx = load_ontology(conf["ontology"])

    word2idx, embed = load_embed(**conf)

    usr_codec = Codec([], word2idx)

    trk_model, slot_len_sum = load_tracker_model(onto, embed, conf, kb)

    trk_model.eval()

    hidden = trk_model.state_tracker.init_hidden()
    kb_vec = Variable(torch.zeros(1, conf["kb_indicator_len"]))

    sentence_generator = SentenceGenerator(kb, onto, sent_groups)

    for line in iter(sys.stdin.readline, ''):
        inp = usr_codec.encode(line.strip())

        inp = Variable(torch.LongTensor([
            inp,
        ]))

        sentvecs, states_reps, states_preds, hidden, sent_grp_preds = trk_model(
            inp, None, hidden)

        criteria = to_search_criteria(states_preds, onto)
        ret, kb_vec = get_kb_result(kb, criteria, conf["kb_indicator_len"])

        # print criteria, kb_vec

        sentvecs = sentvecs.view(1, -1)
        states_reps = states_reps.view(1, -1)

        print_ret(states_preds, sent_grp_preds, onto, sentence_generator)
    def test_get_sentence(self):
        # arrange
        gen = SentenceGenerator()

        # act, assert
        self.assertEqual('es ist zwölf uhr'.split(' '),
                         gen.get_sentence(time(12, 0)))
        self.assertEqual('es ist zwölf uhr'.split(' '),
                         gen.get_sentence(time(0, 0)))
        self.assertEqual('es ist sechs uhr'.split(' '),
                         gen.get_sentence(time(6, 0)))
        self.assertEqual('es ist sechs uhr'.split(' '),
                         gen.get_sentence(time(18, 0)))
        self.assertEqual('es ist fünf nach eins'.split(' '),
                         gen.get_sentence(time(1, 5)))
        self.assertEqual('es ist zehn nach zwei'.split(' '),
                         gen.get_sentence(time(2, 11)))
        self.assertEqual('es ist viertel nach drei'.split(' '),
                         gen.get_sentence(time(3, 14)))
        self.assertEqual('es ist zwanzig nach vier'.split(' '),
                         gen.get_sentence(time(4, 22)))
        self.assertEqual('es ist fünf vor halb fünf2'.split(' '),
                         gen.get_sentence(time(4, 25)))
        self.assertEqual('es ist halb sechs'.split(' '),
                         gen.get_sentence(time(5, 30)))
        self.assertEqual('es ist fünf nach halb sieben'.split(' '),
                         gen.get_sentence(time(6, 34)))
        self.assertEqual('es ist zwanzig vor acht'.split(' '),
                         gen.get_sentence(time(7, 42)))
        self.assertEqual('es ist viertel vor neun'.split(' '),
                         gen.get_sentence(time(20, 45)))
        self.assertEqual('es ist zehn vor zehn2'.split(' '),
                         gen.get_sentence(time(21, 49)))
        self.assertEqual('es ist fünf vor elf'.split(' '),
                         gen.get_sentence(time(10, 55)))
        self.assertEqual('es ist fünf vor eins'.split(' '),
                         gen.get_sentence(time(12, 55)))
Exemple #3
0
embed_model = SentenceTransformer('bert-base-nli-mean-tokens')
embedding_fn = lambda s: embed_model.encode(
    [s.replace("@@ ", "").replace("@@", "")])[0]

# Load or create the model.
model_save_path = "drive/My Drive/sentence_generator-{}.pickle".format(
    model_id)
if os.path.isfile(model_save_path):
    print("Loading sentence generator.")
    sentence_generator = pickle.load(open(model_save_path, "rb"))
    # Flattening is different depending on the decoder used in the sentence generator.
    # sentence_generator._decoder.decoder.lstm.flatten_parameters() # VanillaRNNDecoder
    sentence_generator._decoder.decoder.rnn.flatten_parameters()  # DecoderRNN
    print("Loaded sentence generator.")
else:
    sentence_generator = SentenceGenerator(embedding_fn, id=model_id)

if train:
    # Note: pickled vocab is only used if a vocab does not already exist (i.e.
    # when training a model from scratch).
    if not use_pickled_vocab:
        pickled_vocab = ""
    # Note: the training data used is either pickled_shards, pickled_pairs,
    # or all_sentences (in that priority).
    if not use_pickled_shards:
        pickled_shards = []
    if not use_pickled_pairs:
        pickled_pairs = ""
    sentence_generator.train_generator(all_sentences,
                                       num_train_iters,
                                       pickled_pairs=pickled_pairs,