def dump_bilm_embeddings(vocab_file, dataset_file, options_file, weight_file, outfile): with open(options_file, 'r') as fin: options = json.load(fin) max_word_length = options['char_cnn']['max_characters_per_token'] vocab = UnicodeCharsVocabulary(vocab_file, max_word_length) batcher = Batcher(vocab_file, max_word_length) ids_placeholder = tf.placeholder('int32', shape=(None, None, max_word_length)) model = BidirectionalLanguageModel(options_file, weight_file) ops = model(ids_placeholder) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) sentence_id = 0 with open(dataset_file, 'r') as fin, h5py.File(outfile, 'w') as fout: for line in fin: sentence = line.strip().split() char_ids = batcher.batch_sentences([sentence]) embeddings = sess.run(ops['lm_embeddings'], feed_dict={ids_placeholder: char_ids}) ds = fout.create_dataset('{}'.format(sentence_id), embeddings.shape[1:], dtype='float32', data=embeddings[0, :, :, :]) sentence_id += 1
def load_batcher(data_params, cuda): languages, Lang_name = [], [] # Load the data into languages data_dir = data_params['data_dir'] for w in data_params['languages']: lang = Language( name=w['name'], cuda=cuda, mode=data_params['mode'], mean_center=data_params['mean_center'], unit_norm=data_params['unit_norm'] ) Lang_name.append(w['name']) lang.load(w['filename'], data_dir, max_freq=data_params['max_freq']) languages.append(lang) batcher = Batcher(languages) if 'supervised' in data_params: filename = data_params['supervised']['fname'] random = data_params['supervised']['random'] max_count = data_params['supervised']['max_count'] if data_params["data_dir"] == "./muse_data/": sup_dir_name = os.path.join(data_dir, "crosslingual", "dictionaries") elif data_params["data_dir"] == "./vecmap_data/": sup_dir_name = os.path.join(data_dir, "dictionaries") batcher.load_from_supervised( filename, Lang_name[0], Lang_name[1], sup_dir_name, random = random, max_count=max_count) return batcher
def dump_embeddings_from_dynamic_bilm(option_file, weight_file, word_file, char_file, data_file, output_file, sent_vec=False, sent_vec_type='last', cell_reset=False): """ Get elmo embeddings """ with open(option_file, 'r') as fin: options = json.load(fin) # add one so that 0 is the mask value options['char_cnn']['n_characters'] += 1 max_word_length = options['char_cnn']['max_characters_per_token'] batcher = Batcher(word_file, char_file, max_word_length) # 1D: batch_size, 2D: time_steps, 3D: max_characters_per_token ids_placeholder = tf.placeholder('int32', shape=(None, None, max_word_length)) model = DynamicLanguageModel(options, weight_file, cell_reset=cell_reset) ops = model(ids_placeholder) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) print('Computing ELMo...') sentence_id = 0 with open(data_file, 'r') as fin, h5py.File(output_file, 'w') as fout: for line in fin: if (sentence_id + 1) % 100 == 0: print("%d" % (sentence_id + 1), flush=True, end=" ") sentence = line.rstrip().split() char_ids = batcher.batch_sentences([sentence]) embeddings = sess.run(ops['lm_embeddings'], feed_dict={ids_placeholder: char_ids}) # 1D: 3(ELMo layers), 2D: n_words, 3D: vector dim embeddings = embeddings[0, :, :, :] if sent_vec: embeddings = np.mean(embeddings, axis=1) if sent_vec_type == 'last': embeddings = embeddings[-1] else: embeddings = np.mean(embeddings, axis=0) else: # 1D: n_words, 2D: 3(ELMo layers), 3D: vector dim embeddings = np.transpose(embeddings, (1, 0, 2)) fout.create_dataset(name=str(sentence_id), data=embeddings) sentence_id += 1 print('Finished')
def __init__(self, model_file_path): model_name = re.findall(r'train_\d+', model_file_path)[0] + '_' + \ re.findall(r'model_\d+_\d+\.\d+', model_file_path)[0] self._decode_dir = os.path.join(config.log_root, 'decode_%s' % (model_name)) self._rouge_ref_dir = os.path.join(self._decode_dir, 'rouge_ref') self._rouge_dec_dir = os.path.join(self._decode_dir, 'rouge_dec_dir') for p in [self._decode_dir, self._rouge_ref_dir, self._rouge_dec_dir]: if not os.path.exists(p): os.mkdir(p) self.vocab = Vocab(config.vocab_path, config.vocab_size) self.batcher = Batcher(config.decode_data_path, self.vocab, mode='decode', batch_size=config.beam_size, single_pass=True) self.model = Model(model_file_path, is_eval=True)
def main(unused_argv): if len(unused_argv) != 1: # prints a message if you've entered flags incorrectly raise Exception("Problem with flags: %s" % unused_argv) tf.logging.set_verbosity(tf.logging.INFO) # choose what level of logging you want tf.logging.info('Starting seq2seq_attention in %s mode...', (PARAMS.mode)) # Change log_root to PARAMS.log_root/PARAMS.exp_name and create the dir if necessary PARAMS.log_root = os.path.join(PARAMS.log_root, PARAMS.exp_name) if not os.path.exists(PARAMS.log_root): if PARAMS.mode== "train": os.makedirs(PARAMS.log_root) else: raise Exception("Logdir %s doesn't exist. Run in train mode to create it." % (PARAMS.log_root)) vocab = Vocab(PARAMS.vocab_path, PARAMS.vocab_size) # create a vocabulary # If in decode mode, set batch_size = beam_size # Reason: in decode mode, we decode one example at a time. # On each step, we have beam_size-many hypotheses in the beam, so we need to make a batch of these hypotheses. if PARAMS.mode == 'decode': PARAMS.batch_size = PARAMS.beam_size # If single_pass=True, check we're in decode mode if PARAMS.single_pass and PARAMS.mode!= 'decode': raise Exception("The single_pass flag should only be True in decode mode") # Make a namedtuple hps, containing the values of the hyperparameters that the model needs hparam_list = ['mode', 'lr', 'adagrad_init_acc', 'rand_unif_init_mag', 'trunc_norm_init_std', 'max_grad_norm', 'hidden_dim', 'emb_dim', 'batch_size', 'max_dec_steps', 'max_enc_steps', 'coverage', 'cov_loss_wt', 'pointer_gen'] hps_dict = {} for key,val in PARAMS.__flags.items(): # for each flag if key in hparam_list: # if it's in the list hps_dict[key] = val # add it to the dict hps = namedtuple("HParams", hps_dict.keys())(**hps_dict) # Create a batcher object that will create minibatches of data batcher = Batcher(PARAMS.data_path, vocab, hps, single_pass=PARAMS.single_pass) tf.set_random_seed(111) # a seed value for randomness if hps.mode == 'train': print("creating model...") model = AttHistCopyModel(hps, vocab) setup_training(model, batcher) elif hps.mode == 'eval': model = AttHistCopyModel(hps, vocab) run_eval(model, batcher, vocab) elif hps.mode == 'decode': decode_model_hps = hps # This will be the hyperparameters for the decoder model decode_model_hps = hps._replace(max_dec_steps=1) # The model is configured with max_dec_steps=1 because we only ever run one step of the decoder at a time (to do beam search). Note that the batcher is initialized with max_dec_steps equal to e.g. 100 because the batches need to contain the full summaries model = AttHistCopyModel(decode_model_hps, vocab) decoder = BeamSearchDecoder(model, batcher, vocab) decoder.decode() # decode indefinitely (unless single_pass=True, in which case deocde the dataset exactly once) else: raise ValueError("The 'mode' flag must be one of train/eval/decode")
def __init__(self): self.vocab = Vocab(config.vocab_path, config.vocab_size) self.batcher = Batcher(config.train_data_path, self.vocab, mode='train', batch_size=config.batch_size, single_pass=False) train_dir = os.path.join(config.log_root, 'train_%d' % (int(time.time()))) if not os.path.exists(config.log_root): os.mkdir(config.log_root) if not os.path.exists(train_dir): os.mkdir(train_dir) self.model_dir = os.path.join(train_dir, 'model') if not os.path.exists(self.model_dir): os.mkdir(self.model_dir)
def dump_token_embeddings(vocab_file, options_file, weight_file, outfile): ''' Given an input vocabulary file, dump all the token embeddings to the outfile. The result can be used as the embedding_weight_file when constructing a BidirectionalLanguageModel. ''' with open(options_file, 'r') as fin: options = json.load(fin) max_word_length = options['char_cnn']['max_characters_per_token'] vocab = UnicodeCharsVocabulary(vocab_file, max_word_length) batcher = Batcher(vocab_file, max_word_length) ids_placeholder = tf.placeholder('int32', shape=(None, None, max_word_length)) model = BidirectionalLanguageModel(options_file, weight_file) embedding_op = model(ids_placeholder)['token_embeddings'] n_tokens = vocab.size embed_dim = int(embedding_op.shape[2]) embeddings = np.zeros((n_tokens, embed_dim), dtype=DTYPE) config = tf.ConfigProto(allow_soft_placement=True) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) for k in range(n_tokens): token = vocab.id_to_word(k) #todo 获取具体的单词 char_ids = batcher.batch_sentences([[token] ])[0, 1, :].reshape(1, 1, -1) embeddings[k, :] = sess.run(embedding_op, feed_dict={ids_placeholder: char_ids}) with h5py.File(outfile, 'w') as fout: ds = fout.create_dataset('embedding', embeddings.shape, dtype='float32', data=embeddings)
class BeamSearch(object): def __init__(self, model_file_path): model_name = re.findall(r'train_\d+', model_file_path)[0] + '_' + \ re.findall(r'model_\d+_\d+\.\d+', model_file_path)[0] print('o MODEL NAME: ', model_name) self._decode_dir = os.path.join(config.log_root, 'decode_%s' % (model_name)) self._rouge_ref_dir = os.path.join(self._decode_dir, 'rouge_ref') self._rouge_dec_dir = os.path.join(self._decode_dir, 'rouge_dec_dir') for p in [self._decode_dir, self._rouge_ref_dir, self._rouge_dec_dir]: if not os.path.exists(p): os.mkdir(p) self.vocab = Vocab(config.vocab_path, config.vocab_size) self.batcher = Batcher(config.decode_data_path, self.vocab, mode='decode', batch_size=config.beam_size, single_pass=True) self.model = Model(model_file_path, is_eval=True) def sort_beams(self, beams): return sorted(beams, key=lambda h: h.avg_log_prob, reverse=True) def decode(self): start = time.time() counter = 0 batch = self.batcher.next_batch() while batch is not None: # and counter <= 100 # 11490 # Run beam search to get best Hypothesis best_summary = self.beam_search(batch) # Extract the output ids from the hypothesis and convert back to words output_ids = [int(t) for t in best_summary.tokens[1:]] decoded_words = data.outputids2words( output_ids, self.vocab, (batch.art_oovs[0] if config.pointer_gen else None)) # Remove the [STOP] token from decoded_words, if necessary try: fst_stop_idx = decoded_words.index(data.STOP_DECODING) decoded_words = decoded_words[:fst_stop_idx] except ValueError: decoded_words = decoded_words original_abstract_sents = batch.original_abstracts_sents[0] write_for_rouge(original_abstract_sents, decoded_words, counter, self._rouge_ref_dir, self._rouge_dec_dir) counter += 1 if counter % 10 == 0: print('%d example in %d sec' % (counter, time.time() - start)) start = time.time() batch = self.batcher.next_batch() print("Decoder has finished reading dataset for single_pass.") print("Now starting ROUGE eval...") results_dict = rouge_eval(self._rouge_ref_dir, self._rouge_dec_dir) rouge_log(results_dict, self._decode_dir) def beam_search(self, batch): # The batch should have only one example enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_0, coverage_t_0 = \ get_input_from_batch(batch) encoder_outputs, encoder_feature, encoder_hidden = self.model.encoder( enc_batch, enc_lens) s_t_0 = self.model.reduce_state(encoder_hidden) dec_h, dec_c = s_t_0 # 1 x 2*hidden_size dec_h = dec_h.squeeze() dec_c = dec_c.squeeze() # Prepare decoder batch beams = [ Beam(tokens=[self.vocab.word2id(data.START_DECODING)], log_probs=[0.0], state=(dec_h[0], dec_c[0]), context=c_t_0[0], coverage=(coverage_t_0[0] if config.is_coverage else None)) for _ in range(config.beam_size) ] results = [] steps = 0 while steps < config.max_dec_steps and len(results) < config.beam_size: latest_tokens = [h.latest_token for h in beams] latest_tokens = [t if t < self.vocab.size() else self.vocab.word2id(data.UNKNOWN_TOKEN) \ for t in latest_tokens] y_t_1 = paddle.to_tensor(latest_tokens) all_state_h = [] all_state_c = [] all_context = [] for h in beams: state_h, state_c = h.state all_state_h.append(state_h) all_state_c.append(state_c) all_context.append(h.context) s_t_1 = (paddle.stack(all_state_h, 0).unsqueeze(0), paddle.stack(all_state_c, 0).unsqueeze(0)) c_t_1 = paddle.stack(all_context, 0) coverage_t_1 = None if config.is_coverage: all_coverage = [] for h in beams: all_coverage.append(h.coverage) coverage_t_1 = paddle.stack(all_coverage, 0) final_dist, s_t, c_t, attn_dist, p_gen, coverage_t = self.model.decoder( y_t_1, s_t_1, encoder_outputs, encoder_feature, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage_t_1, steps) log_probs = paddle.log(final_dist) topk_log_probs, topk_ids = paddle.topk(log_probs, config.beam_size * 2) dec_h, dec_c = s_t dec_h = dec_h.squeeze() dec_c = dec_c.squeeze() all_beams = [] num_orig_beams = 1 if steps == 0 else len(beams) for i in range(num_orig_beams): h = beams[i] state_i = (dec_h[i], dec_c[i]) context_i = c_t[i] coverage_i = (coverage_t[i] if config.is_coverage else None) for j in range(config.beam_size * 2): # for each of the top 2*beam_size hyps: new_beam = h.extend(token=topk_ids[i, j].numpy()[0], log_prob=topk_log_probs[i, j].numpy()[0], state=state_i, context=context_i, coverage=coverage_i) all_beams.append(new_beam) beams = [] for h in self.sort_beams(all_beams): if h.latest_token == self.vocab.word2id(data.STOP_DECODING): if steps >= config.min_dec_steps: results.append(h) else: beams.append(h) if len(beams) == config.beam_size or len( results) == config.beam_size: break steps += 1 if len(results) == 0: results = beams beams_sorted = self.sort_beams(results) return beams_sorted[0]
class Trainer(object): def __init__(self): self.vocab = Vocab(config.vocab_path, config.vocab_size) self.batcher = Batcher( config.train_data_path, self.vocab, mode='train', batch_size=config.batch_size, single_pass=False) train_dir = os.path.join(config.log_root, 'train_%d' % (int(time.time()))) if not os.path.exists(config.log_root): os.mkdir(config.log_root) if not os.path.exists(train_dir): os.mkdir(train_dir) self.model_dir = os.path.join(train_dir, 'model') if not os.path.exists(self.model_dir): os.mkdir(self.model_dir) def save_model(self, running_avg_loss, iter): state = { 'encoder': self.model.encoder.state_dict(), 'decoder': self.model.decoder.state_dict(), 'reduce_state': self.model.reduce_state.state_dict(), 'optimizer': self.optimizer.state_dict() } model_save_dir = os.path.join(self.model_dir, 'model_%06d_%.8f' % (iter, running_avg_loss)) for k in state: model_save_path = os.path.join(model_save_dir, '%s.params' % k) paddle.save(state[k], model_save_path) return model_save_dir def setup_train(self, model_file_path=None): self.model = Model(model_file_path) initial_lr = config.lr_coverage if config.is_coverage else config.lr params = list(self.model.encoder.parameters()) + list(self.model.decoder.parameters()) + \ list(self.model.reduce_state.parameters()) assert len(params) == 31 self.optimizer = Adagrad( parameters=params, learning_rate=initial_lr, initial_accumulator_value=config.adagrad_init_acc, epsilon=1.0e-10, grad_clip=paddle.nn.ClipGradByGlobalNorm( clip_norm=config.max_grad_norm)) start_iter, start_loss = 0, 0 if model_file_path is not None: start_iter = int(model_file_path.split('_')[-2]) start_loss = float( model_file_path.split('_')[-1].replace(os.sep, '')) if not config.is_coverage: self.optimizer.set_state_dict( paddle.load( os.path.join(model_file_path, 'optimizer.params'))) return start_iter, start_loss def train_one_batch(self, batch, iter): enc_batch, enc_padding_mask, enc_lens, enc_batch_extend_vocab, extra_zeros, c_t_1, coverage = \ get_input_from_batch(batch) dec_batch, dec_padding_mask, max_dec_len, dec_lens_var, target_batch = \ get_output_from_batch(batch) self.optimizer.clear_gradients() encoder_outputs, encoder_feature, encoder_hidden = self.model.encoder( enc_batch, enc_lens) s_t_1 = self.model.reduce_state(encoder_hidden) step_losses = [] for di in range(min(max_dec_len, config.max_dec_steps)): y_t_1 = dec_batch[:, di] final_dist, s_t_1, c_t_1, attn_dist, p_gen, next_coverage = \ self.model.decoder(y_t_1, s_t_1, encoder_outputs, encoder_feature, enc_padding_mask, c_t_1, extra_zeros, enc_batch_extend_vocab, coverage, di) target = target_batch[:, di] add_index = paddle.arange(0, target.shape[0]) new_index = paddle.stack([add_index, target], axis=1) gold_probs = paddle.gather_nd(final_dist, new_index).squeeze() step_loss = -paddle.log(gold_probs + config.eps) if config.is_coverage: step_coverage_loss = paddle.sum( paddle.minimum(attn_dist, coverage), 1) step_loss = step_loss + config.cov_loss_wt * step_coverage_loss coverage = next_coverage step_mask = dec_padding_mask[:, di] step_loss = step_loss * step_mask step_losses.append(step_loss) sum_losses = paddle.sum(paddle.stack(step_losses, 1), 1) batch_avg_loss = sum_losses / dec_lens_var loss = paddle.mean(batch_avg_loss) loss.backward() self.optimizer.minimize(loss) return loss.numpy()[0] def trainIters(self, n_iters, model_file_path=None): iter, running_avg_loss = self.setup_train(model_file_path) start = time.time() while iter < n_iters: batch = self.batcher.next_batch() loss = self.train_one_batch(batch, iter) running_avg_loss = calc_running_avg_loss(loss, running_avg_loss, iter) iter += 1 print( 'global step %d/%d, step loss: %.8f, running avg loss: %.8f, speed: %.2f step/s' % (iter, n_iters, loss, running_avg_loss, 1.0 / (time.time() - start))) start = time.time() if iter % 5000 == 0 or iter == 1000: model_save_dir = self.save_model(running_avg_loss, iter) print( 'Saved model for iter %d with running avg loss %.8f to directory: %s' % (iter, running_avg_loss, model_save_dir))
def test_batch_sentences(self): batcher = Batcher(os.path.join(DATA_FIXTURES, 'vocab_test.txt'), 50) sentences = [['The', 'first', 'sentence'], ['Second', '.']] x_char_ids = batcher.batch_sentences(sentences) self.assertTrue((x_char_ids == self._expected_char_ids).all())
print('Success rate: %d / %d' % (success_rate, len(train_sentences))) if __name__ == "__main__": config = json.load(open('config.json', 'r')) data_path = '/dev/shm/coco/' #data_path = 'coco/' train_dir = 'summaries/Caption_training' + datetime.datetime.strftime( datetime.datetime.today(), '%d%m%Y%H%M%S') vocab = Vocab('vocab') model = CaptioningNetwork(config, vocab) batcher = Batcher(data_path, config, vocab) tf.set_random_seed(111) # Setup training tf.logging.info('Building graph...') model.build_graph() # print(tf.GraphKeys.GLOBAL_VARIABLES) # print(tf.GraphKeys.TRAINABLE_VARIABLES) # Feed forward test # with sess: # sess.run(...) # output_shape = ... # print('Feed forward OK! Output shape: %s' % str(output_shape))
def _check_weighted_layer(self, l2_coef, do_layer_norm, use_top_only): # create the Batcher vocab_file = os.path.join(FIXTURES, 'vocab_test.txt') batcher = Batcher(vocab_file, 50) # load the model options_file = os.path.join(FIXTURES, 'options.json') weight_file = os.path.join(FIXTURES, 'lm_weights.hdf5') character_ids = tf.placeholder('int32', (None, None, 50)) model = BidirectionalLanguageModel( options_file, weight_file, max_batch_size=4) bilm_ops = model(character_ids) weighted_ops = [] for k in range(2): ops = weight_layers(str(k), bilm_ops, l2_coef=l2_coef, do_layer_norm=do_layer_norm, use_top_only=use_top_only) weighted_ops.append(ops) # initialize self.sess.run(tf.global_variables_initializer()) n_expected_trainable_weights = 2 * (1 + int(not use_top_only)) self.assertEqual(len(tf.trainable_variables()), n_expected_trainable_weights) # and one regularizer per weighted layer n_expected_reg_losses = 2 * int(not use_top_only) self.assertEqual( len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)), n_expected_reg_losses, ) # Set the variables. weights = [[np.array([0.1, 0.3, 0.5]), np.array([1.1])], [np.array([0.2, 0.4, 0.6]), np.array([0.88])]] for k in range(2): with tf.variable_scope('', reuse=True): if not use_top_only: W = tf.get_variable('{}_ELMo_W'.format(k)) _ = self.sess.run([W.assign(weights[k][0])]) gamma = tf.get_variable('{}_ELMo_gamma'.format(k)) _ = self.sess.run([gamma.assign(weights[k][1])]) # make some data sentences = [ ['The', 'first', 'sentence', '.'], ['The', 'second'], ['Third'] ] X_chars = batcher.batch_sentences(sentences) ops = model(character_ids) lm_embeddings, mask, weighted0, weighted1 = self.sess.run( [ops['lm_embeddings'], ops['mask'], weighted_ops[0]['weighted_op'], weighted_ops[1]['weighted_op']], feed_dict={character_ids: X_chars} ) actual_elmo = [weighted0, weighted1] # check the mask first expected_mask = [[True, True, True, True], [True, True, False, False], [True, False, False, False]] self.assertTrue((expected_mask == mask).all()) # Now compute the actual weighted layers for k in range(2): normed_weights = np.exp(weights[k][0] + 1.0 / 3) / np.sum( np.exp(weights[k][0] + 1.0 / 3)) # masked layer normalization expected_elmo = np.zeros((3, 4, lm_embeddings.shape[-1])) if not use_top_only: for j in range(3): # number of LM layers if do_layer_norm: mean = np.mean(lm_embeddings[:, j, :, :][mask]) std = np.std(lm_embeddings[:, j, :, :][mask]) normed_lm_embed = (lm_embeddings[:, j, :, :] - mean) / ( std + 1E-12) expected_elmo += normed_weights[j] * normed_lm_embed else: expected_elmo += normed_weights[j] * lm_embeddings[ :, j, :, :] else: expected_elmo += lm_embeddings[:, -1, :, :] # the scale parameter expected_elmo *= weights[k][1] self.assertTrue( np.allclose(expected_elmo, actual_elmo[k], atol=1e-6) )
def test_bilm(self): sentences, expected_lm_embeddings = _load_sentences_embeddings() # create the Batcher vocab_file = os.path.join(FIXTURES, 'vocab_test.txt') batcher = Batcher(vocab_file, 50) # load the model options_file = os.path.join(FIXTURES, 'options.json') weight_file = os.path.join(FIXTURES, 'lm_weights.hdf5') character_ids = tf.placeholder('int32', (None, None, 50)) model = BidirectionalLanguageModel(options_file, weight_file, max_batch_size=4) # get the ops to compute embeddings ops = model(character_ids) # initialize self.sess.run(tf.global_variables_initializer()) # We shouldn't have any trainable variables self.assertEqual(len(tf.trainable_variables()), 0) # will run 10 batches of 3 sentences for i in range(10): # make a batch of sentences batch_sentences = [] for k in range(3): sentence = sentences[k][i].strip().split() batch_sentences.append(sentence) X = batcher.batch_sentences(batch_sentences) lm_embeddings, lengths = self.sess.run( [ops['lm_embeddings'], ops['lengths']], feed_dict={character_ids: X}) #todo 句子的真实的长度 actual_lengths = [len(sent) for sent in batch_sentences] self.assertEqual(actual_lengths, list(lengths)) # get the expected embeddings and compare! expected_y = [expected_lm_embeddings[k][i] for k in range(3)] for k in range(3): self.assertTrue( np.allclose(lm_embeddings[k, 2, :lengths[k], :], expected_y[k], atol=1.0e-6)) # Finally, check that the states are being updated properly. # All batches were size=3, so last element of states should always # be zero. third_states = [] for direction in ['forward', 'backward']: states = self.sess.run( model._graphs[character_ids].lstm_init_states[direction]) for i in range(2): for state in states[i]: self.assertTrue(np.sum(np.abs(state[-1, :])) < 1e-7) third_states.append(state[2, :]) # Run a batch with size=2, the third state should not have been updated _ = self.sess.run( ops['lm_embeddings'], feed_dict={character_ids: np.ones((2, 5, 50), dtype=np.int32)}) k = 0 for direction in ['forward', 'backward']: states = self.sess.run( model._graphs[character_ids].lstm_init_states[direction]) for i in range(2): for state in states[i]: self.assertTrue( np.allclose(third_states[k], state[2, :], atol=1e-6)) k += 1