Exemple #1
0
    def test_train_bilm_chars(self):
        vocab, data, options = self._get_vocab_data_options(True, True)
        train(options, data, 1, self.tmp_dir, self.tmp_dir)

        # now test
        tf.reset_default_graph()
        options, ckpt_file = load_options_latest_checkpoint(self.tmp_dir)
        data_test, vocab_test = self._get_data(True, True, True)
        perplexity = test(options, ckpt_file, data_test, batch_size=1)
        self.assertTrue(perplexity < 20.0)
Exemple #2
0
def main(args):
    options, ckpt_file = load_options_latest_checkpoint(args.save_dir)

    # load the vocab
    if 'char_cnn' in options:
        max_word_length = options['char_cnn']['max_characters_per_token']
    else:
        max_word_length = None
    vocab = load_vocab(args.vocab_file, max_word_length)

    test_prefix = args.test_prefix

    kwargs = {
        'test': True,
        'shuffle_on_load': False,
    }

    if options.get('bidirectional'):
        data = BidirectionalLMDataset(test_prefix, vocab, **kwargs)
    else:
        data = LMDataset(test_prefix, vocab, **kwargs)

    test(options, ckpt_file, data, batch_size=args.batch_size)
Exemple #3
0
    def test_train_skip_connections(self):
        bidirectional = True
        use_chars = False
        vocab, data, options = self._get_vocab_data_options(
            bidirectional, use_chars)
        options['lstm']['use_skip_connections'] = True
        train(options, data, 1, self.tmp_dir, self.tmp_dir)

        # now test
        tf.reset_default_graph()
        options, ckpt_file = load_options_latest_checkpoint(self.tmp_dir)
        data_test, vocab_test = self._get_data(bidirectional,
                                               use_chars,
                                               test=True)
        perplexity = test(options, ckpt_file, data_test, batch_size=1)
        self.assertTrue(perplexity < 20.0)
Exemple #4
0
    def test_train_shared_softmax_embedding(self):
        bidirectional = True
        use_chars = False

        vocab, data, options = self._get_vocab_data_options(
            bidirectional, use_chars, share_embedding_softmax=True)
        train(options, data, 1, self.tmp_dir, self.tmp_dir)

        # now test
        tf.reset_default_graph()
        options, ckpt_file = load_options_latest_checkpoint(self.tmp_dir)
        data_test, vocab_test = self._get_data(bidirectional,
                                               use_chars,
                                               test=True)
        perplexity = test(options, ckpt_file, data_test, batch_size=1)
        self.assertTrue(perplexity < 20.0)