def get_feed_dict(self, word_idx_seqs, task, label_seqs=None, lr=None, dropout=None): word_idx_seqs = [list(word_idxs) for word_idxs in word_idx_seqs] word_ids, sequence_lengths = pad_sequences(word_idx_seqs, 0) # build feed dictionary feed = { self.word_idxs: word_ids, self.sequence_lengths: sequence_lengths } if label_seqs is not None: label_seqs = [list(labels) for labels in label_seqs] labels, _ = pad_sequences(label_seqs, 0) if task == 'src1': feed[self.labels_src1] = labels elif task == 'src2': feed[self.labels_src2] = labels else: feed[self.labels_tar] = labels feed[self.lr] = lr feed[self.dropout] = dropout return feed, sequence_lengths
def get_feed_dict(self, word_idx_seqs, label_seqs=None, lr=None, dropout=None, manual_feat=None): word_idx_seqs = [list(word_idxs) for word_idxs in word_idx_seqs] word_ids, sequence_lengths = utils.pad_sequences(word_idx_seqs, 0) # print(len(word_ids)) # build feed dictionary feed = { self.word_idxs: word_ids, self.sequence_lengths: sequence_lengths } if label_seqs is not None: label_seqs = [list(labels) for labels in label_seqs] labels, _ = utils.pad_sequences(label_seqs, 0) feed[self.labels] = labels if self.manual_feat is not None: manual_feat, lens = utils.pad_feat_sequence( manual_feat, manual_feat[0].shape[1]) feed[self.manual_feat] = manual_feat feed[self.lr] = lr feed[self.dropout] = dropout return feed, sequence_lengths
def get_feed_dict(self, word_embeddings, label_seqs=None, lr=None, dropout=None): word_embed_seqs, sequence_lengths = utils.pad_embed_sequences( word_embeddings, self.word_embed_pad) word_embed_seqs = np.array(word_embed_seqs, np.float32) # print(word_embed_seqs.shape) # print(len(word_ids)) # build feed dictionary feed = { self.word_embeddings_input: word_embed_seqs, self.sequence_lengths: sequence_lengths } if label_seqs is not None: label_seqs = [list(labels) for labels in label_seqs] labels, _ = utils.pad_sequences(label_seqs, 0) feed[self.labels] = labels feed[self.lr] = lr feed[self.dropout] = dropout return feed, sequence_lengths
def get_feed_dict_ol(self, embed_arr, seq_lens, lr, dropout, task, label_seqs=None): feed = { self.word_embeddings_input: embed_arr, self.sequence_lengths: seq_lens } # if label_seqs is not None: # feed[self.labels] = label_seqs if label_seqs is not None: label_seqs = [list(labels) for labels in label_seqs] labels, _ = utils.pad_sequences(label_seqs, 0) if task == 'src1': feed[self.labels_src1] = labels elif task == 'src2': feed[self.labels_src2] = labels else: feed[self.labels_tar] = labels feed[self.lr] = lr feed[self.dropout] = dropout return feed
def ubuntu_data_load(params): ''' :param dataset_dir: :param max_sentence_len: :param max_num_utterance: :return: dict{str: numpy.ndarray} ''' default_params = { "dataset_dir": None, "phase": "training", "training_files": ["responses.pkl", "utterances.pkl"], "evaluate_files": ["Evaluate.pkl"], "max_num_utterance": 10, "max_sentence_len": 50 } default_params.update(params) assert default_params["dataset_dir"] and os.path.exists(default_params["dataset_dir"]) np_dtype = np.int64 if default_params["phase"] in ["training", "validation"]: training_files = default_params["training_files"] with open(os.path.join(default_params["dataset_dir"], training_files[0]), 'rb') as f: actions = pickle.load(f) with open(os.path.join(default_params["dataset_dir"], training_files[1]), 'rb') as f: history, true_utt = pickle.load(f) # prepare tf dataset history, history_len = utils.multi_sequences_padding(history, max_num_utterance=default_params["max_num_utterance"], max_sentence_len=default_params["max_sentence_len"]) true_utt_len = np.array(utils.get_sequences_length(true_utt, maxlen=default_params["max_sentence_len"]), dtype=np_dtype) true_utt = np.array(pad_sequences(true_utt, default_params["max_sentence_len"], padding='post'), dtype=np_dtype) actions_len = np.array(utils.get_sequences_length(actions, maxlen=default_params["max_sentence_len"]), dtype=np_dtype) actions = np.array(pad_sequences(actions, default_params["max_sentence_len"], padding='post'), dtype=np_dtype) history, history_len = np.array(history, dtype=np_dtype), np.array(history_len, dtype=np_dtype) return { "history": {"data": history, "type": "normal"}, "history_len": {"data": history_len, "type": "normal"}, "true_utt": {"data": true_utt, "type": "normal"}, "true_utt_len": {"data": true_utt_len, "type": "normal"}, "actions": {"data": actions, "type": "share"}, "actions_len": {"data": actions_len, "type": "share"} } else: evaluate_files = default_params["evaluate_files"] with open(os.path.join(default_params["dataset_dir"], evaluate_files[0]), 'rb') as f: history, true_utt, labels = pickle.load(f) history, history_len = utils.multi_sequences_padding(history, max_num_utterance=default_params["max_num_utterance"], max_sentence_len=default_params["max_sentence_len"]) true_utt_len = np.array(utils.get_sequences_length(true_utt, maxlen=default_params["max_sentence_len"]), dtype=np_dtype) true_utt = np.array(pad_sequences(true_utt, default_params["max_sentence_len"], padding='post'), dtype=np_dtype) history, history_len = np.array(history, dtype=np_dtype), np.array(history_len, dtype=np_dtype) labels = np.array(labels, dtype=np_dtype) return { "history": history, "history_len": history_len, "true_utt": true_utt, "true_utt_len": true_utt_len, "labels": labels }
def numpy_process(): max_batch_size = 100000 np_cache_file = None results = {} keys = ["history_np_list", "history_len", "response_features_np_list", "true_utt", "true_utt_len", "labels", "history_bert_id_np_list", "history_bert_len", "history_bert_mask_np_list", "history_bert_segment_np_list", "history_alignment_np_list", "true_utt_bert_id", "true_utt_bert_len", "true_utt_bert_mask", "true_utt_bert_segment", "true_utt_alignment"] if default_params["phase"] in ["training", "validation"]: np_cache_file = os.path.join(default_params["dataset_dir"], "numpy_training_" + training_files[0]) else: np_cache_file = os.path.join(default_params["dataset_dir"], "numpy_evaluate_" + evaluate_files[0][5:]) if os.path.isfile(np_cache_file): results = pickle.load(open(np_cache_file, 'rb')) if (use_bert_embeddings and all(key in results for key in keys)) or (not use_bert_embeddings and all(key in results for key in keys[:6])): pass else: inputs = process() if "history_np_list" not in results: history, history_len = utils.multi_sequences_padding(tqdm(inputs['c'], desc="Sequence Padding"), max_num_utterance=default_params[ "max_num_utterance"], max_sentence_len=default_params[ "max_sentence_len"]) results["history_np_list"] = [np.array(history[i: i + max_batch_size], dtype=np_dtype) for i in range(0, len(history), max_batch_size)] results["history_len"] = np.array(history_len, dtype=np_dtype) if "response_features_np_list" not in results: feature_len = len(inputs["r_feature"][0][0][0]) response_features, _ = utils.multi_sequences_padding(tqdm(inputs["r_feature"], desc="Feature Sequence Padding"), max_num_utterance=default_params["max_num_utterance"], max_sentence_len=default_params["max_sentence_len"], padding_element=[0] * feature_len) results["response_features_np_list"] = [ np.array(response_features[i:i + max_batch_size], dtype=np_float_dtype) for i in range(0, len(response_features), max_batch_size)] if "true_utt_len" not in results: results["true_utt_len"] = np.array(utils.get_sequences_length(inputs['r'], maxlen=default_params["max_sentence_len"]), dtype=np_dtype) if "true_utt" not in results: results["true_utt"]= np.array(pad_sequences(inputs['r'], default_params["max_sentence_len"], padding='post'), dtype=np_dtype) if "labels" not in results: results["labels"]= np.array(inputs['y'], dtype=np_dtype) if use_bert_embeddings: if "history_bert_id_np_list" not in results: history_bert_id, history_bert_len = utils.multi_sequences_padding( tqdm(inputs["c_bert"]["id"], desc="Bert Sequence Padding"), max_num_utterance=default_params["max_num_utterance"], max_sentence_len=default_params["bert_max_sentence_len"]) results["history_bert_id_np_list"] = [ np.array(history_bert_id[i: i + max_batch_size], dtype=np_dtype) for i in range(0, len(history_bert_id), max_batch_size)] results["history_bert_len"] = np.array(history_bert_len, dtype=np_dtype) if "history_bert_mask_np_list" not in results: history_bert_mask, _ = utils.multi_sequences_padding( tqdm(inputs["c_bert"]["mask"], desc="Bert Mask Padding"), max_num_utterance=default_params["max_num_utterance"], max_sentence_len=default_params["bert_max_sentence_len"]) results["history_bert_mask_np_list"] = [ np.array(history_bert_mask[i: i + max_batch_size], dtype=np_dtype) for i in range(0, len(history_bert_mask), max_batch_size)] if "history_bert_segment_np_list" not in results: history_bert_segment, _ = utils.multi_sequences_padding( tqdm(inputs["c_bert"]["segment"], desc="Bert Segment Padding"), max_num_utterance=default_params["max_num_utterance"], max_sentence_len=default_params["bert_max_sentence_len"]) results["history_bert_segment_np_list"] = [ np.array(history_bert_segment[i: i + max_batch_size], dtype=np_dtype) for i in range(0, len(history_bert_segment), max_batch_size)] if "history_alignment_np_list" not in results: history_alignment, _ = utils.multi_sequences_padding( tqdm(inputs["c_bert"]["alignment"], desc="Alignment Padding"), max_num_utterance=default_params["max_num_utterance"], max_sentence_len=default_params["max_sentence_len"]) results["history_alignment_np_list"] = [ np.array(history_alignment[i: i + max_batch_size], dtype=np_dtype) for i in range(0, len(history_alignment), max_batch_size)] if "true_utt_bert_id" not in results: results["true_utt_bert_id"] = np.array(pad_sequences(inputs["r_bert"]["id"], default_params["bert_max_sentence_len"], padding='post'), dtype=np_dtype) if "true_utt_bert_len" not in results: results["true_utt_bert_len"] = np.array(utils.get_sequences_length(inputs["r_bert"]["id"], default_params["bert_max_sentence_len"]), dtype=np_dtype) if "true_utt_bert_mask" not in results: results["true_utt_bert_mask"] = np.array(pad_sequences(inputs["r_bert"]["mask"], default_params["bert_max_sentence_len"], padding='post'), dtype=np_dtype) if "true_utt_bert_segment" not in results: results["true_utt_bert_segment"] = np.array(pad_sequences(inputs["r_bert"]["segment"], default_params["bert_max_sentence_len"], padding='post'), dtype=np_dtype) if "true_utt_alignment" not in results: results["true_utt_alignment"] = np.array(pad_sequences(inputs["r_bert"]["alignment"], default_params["max_sentence_len"], padding='post'), dtype=np_dtype) with open(np_cache_file, 'wb') as f: pickle.dump(results, f) results["history"] = np.concatenate(results["history_np_list"], axis=0) results["response_features"] = np.concatenate(results["response_features_np_list"], axis=0) if use_bert_embeddings: results["history_bert_id"] = np.concatenate(results["history_bert_id_np_list"], axis=0) results["history_bert_mask"] = np.concatenate(results["history_bert_mask_np_list"], axis=0) results["history_bert_segment"] = np.concatenate(results["history_bert_segment_np_list"], axis=0) results["history_alignment"] = np.concatenate(results["history_alignment_np_list"], axis=0) return results
def main(): # word vector embedding_vec = None ## model testing doesn't need data if TEST_MODE != 1: if TEST_MODE == 2: print 'INTO TEST_MODE 2' ## load the dataset print 'Load data...' X_train, y_train_sent, y_train_chunk, train_av, train_lex, train_en,\ X_val, y_val_sent, y_val_chunk, val_av, val_lex, val_en,\ X_test, y_test_sent, y_test_chunk, test_av, test_lex, test_en,\ word_dict, tag_dict, label_dict = load_data() # read wordvec file if wordvec_init == True: embedding_vec = numpy.empty((0, WORD_DIM), float) wordvec_dict = cPickle.load(open(WORD_VEC_FILE, 'rb')) for i in range(VOCA_SIZE): if wordvec_dict.has_key(i): embedding_vec = numpy.append(embedding_vec, wordvec_dict[i].reshape( 1, WORD_DIM), axis=0) else: embedding_vec = numpy.append( embedding_vec, lasagne.random.get_rng().normal(0.0, 0.01, size=(1, WORD_DIM)), axis=0) #print embedding_vec.shape, type(embedding_vec) #print embedding_vec[0][:10], len(embedding_vec[0]) print 'Ok.' else: print 'INTO TEST_MODE 1' ### model inputs sents = theano.tensor.itensor3('sents') bigrams = theano.tensor.itensor3('bigrams') masks = theano.tensor.imatrix('masks') av_features = theano.tensor.itensor3('av') lex_features = theano.tensor.imatrix('lex') en_features = theano.tensor.itensor3('en') #masks_seg = theano.tensor.imatrix('masks_seg') lr = theano.tensor.fscalar('lr') ### model target outputs # chunk label target chunk_labels = theano.tensor.imatrix() ### model target outputs def expandMatrix(X, dim=None, exflag=True): ## exflag: has padding or not. # (batch size, max length) -> (batch size, max length, dim) _eye = None if exflag: _x = theano.tensor.eye(dim) _y = theano.tensor.zeros((1, dim)) _eye = theano.tensor.concatenate([_y, _x], axis=0) else: _eye = theano.tensor.eye(dim) return theano.tensor.cast(_eye[X], dtype='int32') chunk_targets = expandMatrix(chunk_labels, N_CHUNK_LABEL) ## build model print 'Build model...' # the model has two outputs chunk_out = build_model(sents, bigrams, av_features, lex_features, en_features, masks, chunk_labels, embedding_vec) # whether or not use trained model if reuse_mode == True: with numpy.load('model_best_c_test.npz') as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values(chunk_out, param_values) # (batch size, ) chunk_label_mass = lasagne.layers.get_output(chunk_out) chunk_loss = theano.tensor.mean(chunk_label_mass[0] / theano.tensor.sum(masks, axis=1)) chunk_label_chain = chunk_label_mass[1] * masks # l2 penalty loss_penalty = lasagne.regularization.regularize_layer_params( chunk_out, lasagne.regularization.l2) * LAMBDA loss = chunk_loss + loss_penalty all_params = lasagne.layers.get_all_params(chunk_out, trainable=True) #print all_params ## set constraints for Tag Inference Layer tag_inference_layer_params = chunk_out.get_params() init_tran = tag_inference_layer_params[0] tran = tag_inference_layer_params[1] halt_tran = tag_inference_layer_params[2] def l1_unit_norm(p): epsilon = 10e-8 p = p * theano.tensor.cast(p >= 0., 'float64') return p / (epsilon + theano.tensor.sum(p, axis=-1, keepdims=True)) constraints = { init_tran: l1_unit_norm, tran: l1_unit_norm, halt_tran: l1_unit_norm } updates = adagrad_norm(loss, all_params, learning_rate=lr, constraints=constraints) ## for validation and test chunk_pred_label_mass = lasagne.layers.get_output(chunk_out, deterministic=True) chunk_pred_loss = theano.tensor.mean(chunk_pred_label_mass[0] / theano.tensor.sum(masks, axis=1)) chunk_pred_label_chain = chunk_pred_label_mass[1] * masks val_loss = chunk_pred_loss # for train train_fn = theano.function([ sents, bigrams, av_features, lex_features, en_features, masks, chunk_labels, lr ], loss, updates=updates) # loss, updates=updates) # validation or test val_fn = theano.function([ sents, bigrams, av_features, lex_features, en_features, masks, chunk_labels ], [val_loss, chunk_pred_label_chain]) if TEST_MODE == 1: print 'MODEL BUILDING PASS.' sys.exit() print 'Ok.' best_val_c_f1 = 0 best_val = 0 best_test_c_f1 = 0 data = {'best_val_c_f1': [], 'best_val': [], 'best_test_c_f1': []} lr_decayed = numpy.float32(LR) train_losses = [] # Finally, launch the training loop. print "Starting training..." for epoch in range(NUM_EPOCHS): print 'Epoch', epoch progbar = generic_utils.Progbar(X_train.shape[0]) # In each epoch, we do a full pass over the training data: train_err = 0 train_batchs = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train_sent, y_train_chunk, train_av, train_lex, train_en, N_BATCH, shuffle=True): inputs, bigrams, av_features, lex_features, en_features, masks, sentiment_targets, chunk_targets = batch err = train_fn(inputs, bigrams, av_features, lex_features, en_features, masks, chunk_targets, lr_decayed) train_err += err train_batchs += 1 progbar.add(inputs.shape[0], values=[('train loss', err)]) train_loss = train_err / train_batchs # Decrease learning rate if no improvement was seen over last 3 times. if len(train_losses) > 3 and train_loss > max(train_losses[-3:]): lr_decayed = numpy.float32(lr_decayed * 0.5) train_losses.append(train_loss) val_cf1 = 0 # And a full pass over the val data: inputs = utils.pad_sequences(X_val, MAX_LENGTH) masks = numpy.int32( numpy.ones_like(inputs) * (1 - numpy.equal(inputs, 0))) inputscw, bigrams = contextwin_bigram(inputs, WINDOW_SIZE) chunk_targets = utils.pad_sequences(y_val_chunk, MAX_LENGTH) val_av_batch = [ utils.pad_matrix(val_av[j], sent_maxlen=MAX_LENGTH, feature_dim=5) for j in range(len(val_av)) ] val_lex_batch = utils.pad_sequences(val_lex, MAX_LENGTH) val_en_batch = [ utils.pad_matrix(val_en[j], sent_maxlen=MAX_LENGTH, feature_dim=2) for j in range(len(val_en)) ] val_err, val_chunk_label = val_fn(inputscw, bigrams, val_av_batch, val_lex_batch, val_en_batch, masks, chunk_targets) if BIO_C_FLAG: c_res_val = utils.cwsEalve(inputs, chunk_targets, val_chunk_label, word_dict, tag_dict, VAL_GS_FILE, DICTIONARY, False) val_cf1 = c_res_val if best_val <= (val_cf1): best_val = val_cf1 best_val_c_f1 = val_cf1 test_cf1 = 0 # And a full pass over the test data: inputs = utils.pad_sequences(X_test, MAX_LENGTH) masks = numpy.int32( numpy.ones_like(inputs) * (1 - numpy.equal(inputs, 0))) inputscw, bigrams = contextwin_bigram(inputs, WINDOW_SIZE) chunk_targets = utils.pad_sequences(y_test_chunk, MAX_LENGTH) test_av_batch = [ utils.pad_matrix(test_av[j], sent_maxlen=MAX_LENGTH, feature_dim=5) for j in range(len(test_av)) ] test_lex_batch = utils.pad_sequences(test_lex, MAX_LENGTH) test_en_batch = [ utils.pad_matrix(test_en[j], sent_maxlen=MAX_LENGTH, feature_dim=2) for j in range(len(test_en)) ] test_err, test_chunk_label = val_fn(inputscw, bigrams, test_av_batch, test_lex_batch, test_en_batch, masks, chunk_targets) if BIO_C_FLAG: # c_res_val = utils.cwsEalve(inputs, chunk_targets, test_chunk_label, # word_dict, tag_dict, TEST_GS_FILE, DICTIONARY, False) utils.save_test(inputs, chunk_targets, test_chunk_label, word_dict, tag_dict, TEST_GS_FILE, DICTIONARY, False) #best_test_c_f1 = c_res_val #numpy.savez('model_best_c_test.npz', *lasagne.layers.get_all_param_values(chunk_out)) # utils.id2original(inputs, chunk_targets, # utils.pad_sequences(y_test_sent, MAX_LENGTH, value=1), test_chunk_label, # utils.pad_sequences(y_test_sent, MAX_LENGTH, value=1), # word_dict=word_dict, tag_dict=tag_dict, label_dict=label_dict, # output_file='test_c_result.txt') # Then we print the results for this epoch: print "val: %.4f c_f1: %.4f best: %.4f test: %.4f c_f1: %.4f" \ %(val_err, val_cf1, best_val, test_err, best_test_c_f1) data['best_val_c_f1'].append(best_val_c_f1) data['best_val'].append(best_val) data['best_test_c_f1'].append(best_test_c_f1) data['args'] = args_hash cPickle.dump(data, open('result_data.pkl', 'wb'))
def iterate_minibatches(inputs, sent_targets, chunk_targets, train_av, train_lex, train_en, batchsize, shuffle=False): assert len(inputs) == len(sent_targets) assert len(inputs) == len(chunk_targets) def gene_mask(X): return numpy.int32(numpy.ones_like(X) * (1 - numpy.equal(X, 0))) index_set = [[] for _ in BUCKTES] for i in range(len(inputs)): x = inputs[i] for bucket_id, (min_size, max_size) in enumerate(BUCKTES): if len(x) >= min_size and len(x) < max_size: index_set[bucket_id].append(i) break if index_set and len(index_set) > 1: # user set multi buckets for i in range(len(index_set)): bucket_max_length = BUCKTES[i][1] - 1 index_bucket = index_set[i] if len(index_bucket) == 0: # empty buckets #print 'empty' continue #print index_bucket index_bucket_shuffled = index_bucket[:] if shuffle: lasagne.random.get_rng().shuffle(index_bucket_shuffled) for start_idx in range(0, len(index_bucket), batchsize): if shuffle: excerpt = index_bucket_shuffled[start_idx:len(index_bucket) \ if (start_idx + batchsize > len(index_bucket)) else start_idx + batchsize] else: excerpt = index_bucket[start_idx:len(index_bucket) \ if (start_idx + batchsize > len(index_bucket)) else start_idx + batchsize] sents_one_batch = utils.pad_sequences( [inputs[j] for j in excerpt], bucket_max_length) masks_one_batch = gene_mask(sents_one_batch) sentscw_one_batch = contextwin(sents_one_batch, WINDOW_SIZE) sent_targets_one_batch = utils.pad_sequences( [sent_targets[j] for j in excerpt], bucket_max_length) chunk_targets_one_batch = utils.pad_sequences( [chunk_targets[j] for j in excerpt], bucket_max_length) yield sentscw_one_batch, masks_one_batch, sent_targets_one_batch, chunk_targets_one_batch else: # only one default bucket: (0, max_length) or None buckets at all if shuffle: indices = numpy.arange(len(inputs)) lasagne.random.get_rng().shuffle(indices) #print indices for start_idx in range(0, len(inputs), batchsize): if shuffle: excerpt = indices[start_idx:len(inputs) \ if (start_idx + batchsize > len(inputs)) else start_idx + batchsize] else: excerpt = range(start_idx, len(inputs) \ if (start_idx + batchsize > len(inputs)) else start_idx + batchsize) sents_one_batch = utils.pad_sequences([inputs[j] for j in excerpt], MAX_LENGTH) masks_one_batch = gene_mask(sents_one_batch) sentscw_one_batch, bigram_one_batch = contextwin_bigram( sents_one_batch, WINDOW_SIZE) train_av_one_batch = [ utils.pad_matrix(train_av[j], sent_maxlen=MAX_LENGTH, feature_dim=5) for j in excerpt ] train_lex_one_batch = utils.pad_sequences( [train_lex[j] for j in excerpt], MAX_LENGTH) train_en_one_batch = [ utils.pad_matrix(train_en[j], sent_maxlen=MAX_LENGTH, feature_dim=2) for j in excerpt ] sent_targets_one_batch = utils.pad_sequences( [sent_targets[j] for j in excerpt], MAX_LENGTH) chunk_targets_one_batch = utils.pad_sequences( [chunk_targets[j] for j in excerpt], MAX_LENGTH) #yield sentscw_one_batch, masks_one_batch, masks_seg_one_batch, sent_targets_one_batch, chunk_targets_one_batch yield sentscw_one_batch, bigram_one_batch, train_av_one_batch, train_lex_one_batch, train_en_one_batch, masks_one_batch, \ sent_targets_one_batch, chunk_targets_one_batch
def numpy_process(): max_batch_size = 100000 np_cache_file = None if default_params["phase"] in ["training", "validation"]: np_cache_file = os.path.join( default_params["dataset_dir"], "numpy_" + training_files[0].rsplit(".", 1)[0] + ".pkl") else: np_cache_file = os.path.join( default_params["dataset_dir"], "numpy_" + evaluate_files[0].rsplit(".", 1)[0] + ".pkl") if os.path.isfile(np_cache_file): context_id_np_list, context_len, context_mask_np_list, context_segment_np_list, \ response_features_np_list, response_id, response_len, response_mask, response_segment, labels = pickle.load( open(np_cache_file, 'rb')) else: inputs = bert_process() context_id, context_len = utils.multi_sequences_padding( tqdm(inputs["context_id"], desc="Sequence Padding"), max_num_utterance=max_num_utterance, max_sentence_len=max_sentence_len) context_mask, _ = utils.multi_sequences_padding( tqdm(inputs["context_mask"], desc="Sequence Mask Padding"), max_num_utterance=max_num_utterance, max_sentence_len=max_sentence_len) context_segment, _ = utils.multi_sequences_padding( tqdm(inputs["context_segment"], desc="Sequence Segment Padding"), max_num_utterance=max_num_utterance, max_sentence_len=max_sentence_len) feature_len = len(inputs["r_feature"][0][0][0]) response_features, _ = utils.multi_sequences_padding( tqdm(inputs["r_feature"], desc="Feature Sequence Padding"), max_num_utterance=default_params["max_num_utterance"], max_sentence_len=default_params["max_sentence_len"], padding_element=[0] * feature_len) context_id_np_list = [ np.array(context_id[i:i + max_batch_size], dtype=np_dtype) for i in range(0, len(context_id), max_batch_size) ] context_len = np.array(context_len, dtype=np_dtype) context_mask_np_list = [ np.array(context_mask[i:i + max_batch_size], dtype=np_dtype) for i in range(0, len(context_mask), max_batch_size) ] context_segment_np_list = [ np.array(context_segment[i:i + max_batch_size], dtype=np_dtype) for i in range(0, len(context_segment), max_batch_size) ] response_features_np_list = [ np.array(response_features[i:i + max_batch_size], dtype=np_float_dtype) for i in range(0, len(response_features), max_batch_size) ] response_id = np.array(pad_sequences(inputs["response_id"], max_sentence_len, padding='post'), dtype=np_dtype) response_len = np.array(inputs["response_len"], dtype=np_dtype) response_mask = np.array(pad_sequences(inputs["response_mask"], max_sentence_len, padding='post'), dtype=np_dtype) response_segment = np.array(pad_sequences( inputs["response_segment"], max_sentence_len, padding='post'), dtype=np_dtype) labels = np.array(inputs["labels"], dtype=np_dtype) with open(np_cache_file, 'wb') as f: pickle.dump([ context_id_np_list, context_len, context_mask_np_list, context_segment_np_list, response_features_np_list, response_id, response_len, response_mask, response_segment, labels ], f) context_id = np.concatenate(context_id_np_list, axis=0) context_mask = np.concatenate(context_mask_np_list, axis=0) context_segment = np.concatenate(context_segment_np_list, axis=0) response_features = np.concatenate(response_features_np_list, axis=0) return [ context_id, context_len, context_mask, context_segment, response_features, response_id, response_len, response_mask, response_segment, labels ]
for curr_epoch in range(num_epochs): train_cost = train_ler = train_cost1 = train_ler1 = 0 start = time.time() for batch in range(num_batches_per_epoch): # Getting the index indexes = [ i % num_examples for i in range(batch * batch_size, (batch + 1) * batch_size) ] batch_train_inputs = train_inputs[indexes] # Padding input to max_time_step of this batch batch_train_inputs, batch_train_seq_len = pad_sequences( batch_train_inputs) # Converting to sparse representation so as to to feed SparseTensor input batch_train_targets = sparse_tuple_from(train_targets[indexes]) feed = { inputs: batch_train_inputs, targets: batch_train_targets, seq_len: batch_train_seq_len } train_ler1_part = session.run(ler, feed_dict=feed) * batch_size train_ler1_part = session.run(ler, feed_dict=feed) * batch_size train_ler1 += train_ler1_part train_cost1_part = session.run(cost, feed) * batch_size train_cost1_part = session.run(cost, feed) * batch_size