def test_train_shared_softmax_no_chars(self): bidirectional = True use_chars = True vocab, data, options = self._get_vocab_data_options( bidirectional, use_chars, share_embedding_softmax=True) # character inputs and sharing weights not suppported with self.assertRaises(ValueError): train(options, data, 1, self.tmp_dir, self.tmp_dir)
def test_train_bilm_chars(self): vocab, data, options = self._get_vocab_data_options(True, True) train(options, data, 1, self.tmp_dir, self.tmp_dir) # now test tf.reset_default_graph() options, ckpt_file = load_options_latest_checkpoint(self.tmp_dir) data_test, vocab_test = self._get_data(True, True, True) perplexity = test(options, ckpt_file, data_test, batch_size=1) self.assertTrue(perplexity < 20.0)
def main(args): # load the vocab vocab = load_vocab(args.vocab_file, None) # define the options batch_size = 256 # batch size for each GPU n_gpus = 1 # number of tokens in training data (this for 1B Word Benchmark) n_train_tokens = 14273184 # 100000*16 options = { 'bidirectional': True, # 'char_cnn': {'activation': 'relu', # 'embedding': {'dim': 200}, # 'filters': [[1, 32], # [2, 32], # [3, 64], # [4, 128], # [5, 256], # [6, 512], # [7, 1024]], # 'max_characters_per_token': 50, # 'n_characters': 261, # 'n_highway': 2}, 'dropout': 0.1, 'lstm': { 'cell_clip': 3, 'dim': 4096, 'n_layers': 2, 'proj_clip': 3, 'projection_dim': 200, # 512 'use_skip_connections': True}, 'all_clip_norm_val': 10.0, 'n_epochs': 3, 'n_train_tokens': n_train_tokens, 'batch_size': batch_size, 'n_tokens_vocab': vocab.size, 'unroll_steps': 20, # 5 'n_negative_samples_batch': 8192, } prefix = args.train_prefix # '../corpus_me/wd_fact_cut.txt' data = BidirectionalLMDataset(prefix, vocab, test=False, shuffle_on_load=True) print('load data BidirectionalLMDataset') tf_save_dir = args.save_dir tf_log_dir = args.save_dir # 加载模并训练 train(options, data, n_gpus, tf_save_dir=tf_save_dir, tf_log_dir=tf_log_dir, restart_ckpt_file=args.save_dir)
def test_train_skip_connections(self): bidirectional = True use_chars = False vocab, data, options = self._get_vocab_data_options( bidirectional, use_chars) options['lstm']['use_skip_connections'] = True train(options, data, 1, self.tmp_dir, self.tmp_dir) # now test tf.reset_default_graph() options, ckpt_file = load_options_latest_checkpoint(self.tmp_dir) data_test, vocab_test = self._get_data(bidirectional, use_chars, test=True) perplexity = test(options, ckpt_file, data_test, batch_size=1) self.assertTrue(perplexity < 20.0)
def test_train_shared_softmax_embedding(self): bidirectional = True use_chars = False vocab, data, options = self._get_vocab_data_options( bidirectional, use_chars, share_embedding_softmax=True) train(options, data, 1, self.tmp_dir, self.tmp_dir) # now test tf.reset_default_graph() options, ckpt_file = load_options_latest_checkpoint(self.tmp_dir) data_test, vocab_test = self._get_data(bidirectional, use_chars, test=True) perplexity = test(options, ckpt_file, data_test, batch_size=1) self.assertTrue(perplexity < 20.0)
def main(args): options, ckpt_file = load_options_latest_checkpoint(args.save_dir) if 'char_cnn' in options: max_word_length = options['char_cnn']['max_characters_per_token'] else: max_word_length = None vocab = load_vocab(args.vocab_file, max_word_length) prefix = args.train_prefix kwargs = { 'test': False, 'shuffle_on_load': True, } if options.get('bidirectional'): data = BidirectionalLMDataset(prefix, vocab, **kwargs) else: data = LMDataset(prefix, vocab, **kwargs) tf_save_dir = args.save_dir tf_log_dir = args.save_dir # set optional inputs if args.n_train_tokens > 0: options['n_train_tokens'] = args.n_train_tokens if args.n_epochs > 0: options['n_epochs'] = args.n_epochs if args.batch_size > 0: options['batch_size'] = args.batch_size train(options, data, args.n_gpus, tf_save_dir, tf_log_dir, restart_ckpt_file=ckpt_file)
def test_shared_variables(self): vocab, data, options = self._get_vocab_data_options(True, True) options['n_epochs'] = 1 train(options, data, 2, self.tmp_dir, self.tmp_dir) self.assertEqual(len(tf.global_variables()), 64)