def run_epoch(self, sess, train): prog = Progbar(target=1 + int(len(train) / self.config.batch_size)) losses, grad_norms = [], [] for i, batch in enumerate(minibatches(train, self.config.batch_size)): loss, grad_norm = self.train_on_batch(sess, *batch) losses.append(loss) grad_norms.append(grad_norm) prog.update(i + 1, [("train loss", loss)]) return losses, grad_norms
def output(self, sess, inputs_raw, inputs=None): """ Reports the output of the model on examples (uses helper to featurize each example). """ if inputs is None: inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw)) preds = [] prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size)) for i, batch in enumerate(minibatches(inputs, self.config.batch_size, shuffle=False)): # Ignore predict batch = batch[:1] + batch[2:] preds_ = self.predict_on_batch(sess, *batch) preds += list(preds_) prog.update(i + 1, []) return self.consolidate_predictions(inputs_raw, inputs, preds)
def output(self, sess, inputs_raw, inputs=None): """ Reports the output of the model on examples (uses helper to featurize each example). """ if inputs is None: inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw)) preds = [] prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size)) for i, batch in enumerate(minibatches(inputs, self.config.batch_size, shuffle=False)): # Ignore predict batch = batch[:1] + batch[2:] preds_ = self.predict_on_batch(sess, *batch) preds += list(preds_) prog.update(i + 1, []) return self.consolidate_predictions(inputs_raw, inputs, preds)
def train(self, train_examples, dev_examples): model = self._model config = self._config logger = self._logger best_score = 0. preprocessed_train_examples = train_examples['preprocessed'] step = 0 for epoch in range(config.n_epochs): model.train() logger.info("Epoch %d out of %d", epoch + 1, config.n_epochs) prog = Progbar( target=1 + int(len(preprocessed_train_examples) / config.batch_size)) avg_loss = 0 for i, minibatch in enumerate( minibatches(preprocessed_train_examples, config.batch_size)): sentences = torch.tensor(minibatch[0], device=config.device) labels = torch.tensor(minibatch[1], device=config.device) masks = torch.tensor(minibatch[2], device=config.device) avg_loss += self._train_on_batch(sentences, labels, masks) avg_loss /= i + 1 logger.info("Training average loss: %.5f", avg_loss) model.eval() with torch.no_grad(): logger.info("Evaluating on development data") token_cm, entity_scores = self._evaluator.evaluate( dev_examples) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: {:.2f}/{:.2f}/{:.2f}".format( *entity_scores)) score = entity_scores[-1] if score > best_score and config.model_output: best_score = score logger.info("New best score! Saving model in %s", config.model_output) torch.save(model.state_dict(), config.model_output) print("") return best_score
def fit(self, sess, saver, train_examples_raw, dev_set_raw): best_score = 0. train_examples = self.preprocess_sequence_data(train_examples_raw) dev_set = self.preprocess_sequence_data(dev_set_raw) for epoch in range(self.config.n_epochs): logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs) # You may use the progress bar to monitor the training progress # Addition of progress bar will not be graded, but may help when debugging prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) # The general idea is to loop over minibatches from train_examples, and run train_on_batch inside the loop # Hint: train_examples could be a list containing the feature data and label data # Read the doc for utils.get_minibatches to find out how to use it. # Note that get_minibatches could either return a list, or a list of list # [features, labels]. This makes expanding tuples into arguments (* operator) handy ### YOUR CODE HERE (2-3 lines) for i, batch in enumerate( minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, *batch) prog.update(i + 1, [("train loss", loss)]) if self.report: self.report.log_train_loss(loss) ### END YOUR CODE logger.info("Evaluating on development data") token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) score = entity_scores[-1] if score > best_score: best_score = score if saver: logger.info("New best score! Saving model in %s", self.config.model_output) saver.save(sess, self.config.model_output) print("") if self.report: self.report.log_epoch() self.report.save() return best_score
def evaluate(sess, examples_set, examples_raw, examples_vec, pred, input_placeholder, mask_placeholder, labels_placeholder): """在@examples_raw数据上评估模型/预测数据类别 """ preds = [] # 所有数据的预测值存放 for j, batch in enumerate(minibatches(examples_set, batch_size, shuffle=False)): inputs_batch, mask_batch = batch[0], batch[2] feed = create_feed_dict(input_placeholder, mask_placeholder, labels_placeholder, inputs_batch=inputs_batch, mask_batch=mask_batch) preds_ = sess.run(tf.argmax(pred, axis=2), feed_dict=feed) # 一个batch的预测值 preds += list(preds_) total_batch = 1 + int(len(examples_set) / batch_size) print(progress_bar(j, total_batch, "batch")) all_original_labels = [] # 标准答案 all_predicted_labesl = [] # 预测值 for i, (sentence, labels) in enumerate(examples_vec): _, _, mask = examples_set[i] # 获取每个句子的mask labels_ = [l for l, m in zip(preds[i], mask) if m] # mask作用(预测值只保留mask标记为True的) if len(labels_) == len(labels): # 最后一个batch all_original_labels += labels all_predicted_labesl += labels_ cm = confusion_matrix(all_original_labels, all_predicted_labesl) # 混淆矩阵 acc_sorce = accuracy_score(all_original_labels, all_predicted_labesl) f_score = f1_score(all_original_labels, all_predicted_labesl, average="micro") report = classification_report(all_original_labels, all_predicted_labesl, target_names=LBLS) print("准确率:", acc_sorce) print("F值:", f_score) print("混淆矩阵:\n", cm) print("分类结果:\n", report) result = [] for i, (sentence, labels) in enumerate(examples_raw): _, _, mask = examples_set[i] # 获取每个句子的mask labels_ = [l for l, m in zip(preds[i], mask) if m] # mask作用(预测值只保留mask标记为True的) orig_labels = [LBLS[l] for l in labels_] # 将数字标签转回字符表示 result.append((sentence, orig_labels)) return result
def generate_answers(sess, model, dataset, rev_vocab): """ Loop over the dev or test dataset and generate answer. Note: output format must be answers[uuid] = "real answer" You must provide a string of words instead of just a list, or start and end index In main() function we are dumping onto a JSON file evaluate.py will take the output JSON along with the original JSON file and output a F1 and EM You must implement this function in order to submit to Leaderboard. :param sess: active TF session :param model: a built QASystem model :param rev_vocab: this is a list of vocabulary that maps index to actual words :return: """ answers = {} (context, question, question_uuid_data) = dataset context_data = convert_data_to_list(context) question_data = convert_data_to_list(question) context_padded, context_mask = pad_sequence(context_data, FLAGS.max_context_len) question_padded, question_mask = pad_sequence(question_data, FLAGS.max_question_len) input_data = vectorize(context_padded, context_mask, question_padded, question_mask, question_uuid_data) batch_size = 32 num_batches = int(len(input_data) / batch_size) + 1 prog = Progbar(target=num_batches) for i, batch in enumerate(minibatches(input_data, batch_size)): a_s_vec, a_e_vec = model.answer(sess, batch) prog.update(i + 1) for (a_s, a_e, context, uuid) in zip(a_s_vec, a_e_vec, batch[0], batch[4]): if a_s > a_e: tmp = a_s a_s = a_e a_e = tmp predicted_answer = model.formulate_answer(context, rev_vocab, a_s, a_e) answers[uuid] = predicted_answer return answers
def output(self, sess, inputs_raw, inputs=None): """ Reports the output of the model on examples (uses helper to featurize each example). """ if inputs is None: # 用于预测的时候 inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw)) preds = [] prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size)) # 设置总数,然后每次只要update当前数就行 for i, batch in enumerate( minibatches(inputs, self.config.batch_size, shuffle=False)): # 注意这里shuffle为false,对于rnn来说不需要 # Ignore predict batch = batch[:1] + batch[2:] # 为啥还要加batch[2:],要忽略输出的话直接前面一半不就好了? preds_ = self.predict_on_batch(sess, *batch) preds += list(preds_) prog.update(i + 1, []) return self.consolidate_predictions(inputs_raw, inputs, preds)
def eval_batches(self, sess, eval_set, num_batches): """Evaluate the loss on a number of given minibatches of a dataset. Args: sess: tf.Session() eval_set: full dataset, as passed to run_epoch num_batches: number of batches to evaluate Returns: loss: loss over the batches (a scalar) """ losses = [] for i, (inputs_batch, outputs_batch) in enumerate( minibatches(eval_set, self.config.batch_size)): if i >= num_batches: break loss = self.eval_on_batch(sess, inputs_batch, outputs_batch) losses.append(loss) return np.mean(losses)
def evaluate(self, sess, examples, examples_raw, last_epoch): avg_div = 0.0 seen = 0 for i, batch in enumerate( minibatches(examples, self.config.batch_size, shuffle=False)): pred_label = self.predict_on_batch(sess, batch[0]) gold_label = batch[1] #just want first 20 predictions for first minibatch if last_epoch and i == 0: self.visualize_distributions( pred_label[:20], gold_label[:20], [example[0] for example in examples_raw[:20]]) divergence = np_kl_divergence(gold_label, pred_label) avg_div += divergence seen += 1 return avg_div / seen
def predict(self, examples, use_str_labels=False): """ Reports the output of the model on examples (uses helper to featurize each example). """ config = self._config preprocessed_examples = examples['preprocessed'] preds = [] prog = Progbar(target=1 + int(len(preprocessed_examples) / config.batch_size)) for i, minibatch in enumerate(minibatches(preprocessed_examples, config.batch_size, shuffle=False)): sentences = torch.tensor(minibatch[0], device=config.device) tag_probs = self._model(sentences) preds_ = torch.argmax(tag_probs, dim=-1) preds += list(preds_) prog.update(i + 1, []) return self.consolidate_predictions(examples, preds, use_str_labels)
def run_epoch(self, sess, train_examples, dev_set): prog = Progbar(target=1 + train_examples[0].shape[0] / self.config.batch_size) for i, (articles_batch, headlines_batch, labels_batch) in enumerate(minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, articles_batch, headlines_batch, labels_batch) prog.update(i + 1, [("train loss", loss)]) print "Evaluating on train set" train_actual = vectorize_stances(train_examples[2]) train_preds = list(self.predict_on_batch(sess, *train_examples[:2])) train_score, _ = report_score(train_actual, train_preds) print "Evaluating on dev set" actual = vectorize_stances(dev_set[2]) preds = list(self.predict_on_batch(sess, *dev_set[:2])) dev_score,_ = report_score(actual, preds) print "- train Score {:.2f}".format(train_score) print "- dev Score: {:.2f}".format(dev_score) return dev_score
def run_epoch(self, session, dataset, rev_vocab, train_dir, batch_size): tic = time.time() # prog = Progbar(target=1 + int(len(dataset[0]) / batch_size)) num_correct = 0 num_batches = 0 total_loss = 0 with tqdm(total=int(len(dataset[0]))) as pbar: for i, batch in enumerate( minibatches(dataset, batch_size, bucket=self.bucket)): self.iteration += batch_size # for tensorboard if self.verbose and (i % 10 == 0): sys.stdout.write(str(i) + "...") sys.stdout.flush() premises, premise_lens, hypotheses, hypothesis_lens, goldlabels = batch loss, probs, error = self.optimize(session, rev_vocab, premises, premise_lens, hypotheses, hypothesis_lens, goldlabels) total_loss += loss num_batches += 1 # Record correctness of training predictions correct_predictions = np.equal(np.argmax(probs, axis=1), np.argmax(goldlabels, axis=1)) num_correct += np.sum(correct_predictions) pbar.update(batch_size) toc = time.time() # LOGGING CODE # if (i * batch_size) % 1000 == 0: # print("Training Example: " + str(i * batch_size)) # print("Loss: " + str(loss)) train_accuracy = num_correct / float(len(dataset[0])) epoch_mean_loss = total_loss / float(num_batches) if epoch_mean_loss != epoch_mean_loss: # Nan - aka we f-ed up. print('\nMEAN LOSS IS NAN!! Printing out...') print('Mean Loss:', epoch_mean_loss, '\n') return -1, -1, True print("Amount of time to run this epoch: " + str(toc - tic) + " secs") print("Training accuracy for this epoch: " + str(train_accuracy)) print("Mean loss for this epoch: " + str(epoch_mean_loss)) return train_accuracy, epoch_mean_loss, False
def output(self, sess, inputs_raw, inputs=None): """ Reports the output of the model on examples (uses helper to featurize each example). 生成模型的按批次预测的实体识别的分数结果,将结果按句子分组,加上真实的类别结果,返回给评估过程. """ if inputs is None: inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw)) preds = [] prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size)) for i, batch in enumerate(minibatches(inputs, self.config.batch_size, shuffle=False)): # Ignore predict # 忽略窗口正确的类标签列,只要窗口数据 batch = batch[:1] + batch[2:] # [2048x6] preds_ = self.predict_on_batch(sess, *batch) # 2048 preds += list(preds_) prog.update(i + 1, []) return self.consolidate_predictions(inputs_raw, inputs, preds)
def fit(self, sess, saver, train_examples_raw, dev_set_raw, writer=None): best_score = 0. train_examples = self.preprocess_sequence_data(train_examples_raw) dev_set = self.preprocess_sequence_data(dev_set_raw) step = 0 for epoch in range(self.config.n_epochs): logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs) prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) for i, minibatch in enumerate( minibatches(train_examples, self.config.batch_size)): data = minibatch[:1] + minibatch[1:2] + minibatch[2:] if writer is None: self.train_on_batch(sess, *data) else: loss, summary = self.train_on_batch(sess, *data, summarize=True) writer.add_summary(summary, step) step += 1 logger.info("Evaluating on development data") token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) score = entity_scores[-1] if score > best_score: best_score = score if saver: logger.info("New best score! Saving model in %s", self.config.model_output) saver.save(sess, self.config.model_output) print("") if self.report: self.report.log_epoch() self.report.save() return best_score
def run_epoch(self, session, train_examples, dev_examples, epoch_num, train_log): num_batches = int(len(train_examples) / self.batch_size) prog = Progbar(target=num_batches) for i, batch in enumerate(minibatches(train_examples, self.batch_size)): loss, summary, current_lr = self.optimize( session, batch, global_batch_num=epoch_num * num_batches + i) # logging format (epoch,loss) train_log.write("{},{}\n".format(epoch_num + 1, loss)) prog.update(i + 1, exact=[("train loss", loss), ("current LR", current_lr)]) if self.summary_flag: self.train_writer.add_summary(summary, i) print("") logging.info("Evaluating on development data") validate_cost = self.test(session, dev_examples) return validate_cost
def _output(self, sess, inputs, extra_fetch=[]): preds = [] logits = [] loss_record = [] extras = [] prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size)) for i, batch in enumerate(minibatches(inputs, self.config.batch_size, shuffle=False)): # batch = batch[:4] # ignore label feed = self.create_feed_dict(*batch) preds_, logits_, loss_, extra_ = sess.run([self.predictions, self.pred, self.loss, extra_fetch], feed_dict=feed) preds += list(preds_) loss_record.append(loss_) logits += list(logits_) if extra_fetch: extras.append(extra_) prog.update(i + 1, []) if extra_fetch: extras = np.concatenate(extras) return preds, logits, np.mean(loss_record), extras
def run_epoch(self, sess, train_examples, dev_set, train_examples_raw, dev_set_raw): prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) for i, batch in enumerate( minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, *batch) prog.update(i + 1, [("train loss", loss)]) if self.report: self.report.log_train_loss(loss) print("") logger.info("Evaluating on development data") token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) f1 = entity_scores[-1] return f1
def run_epoch(self, sess, train_data, val_data, epoch_num, train_log): num_batches = int(len(train_data) / self.batch_size) + 1 logging.info("Evaluating on training data") prog = Progbar(target=num_batches) for i, batch in enumerate(minibatches(train_data, self.batch_size)): global_batch_num = int(epoch_num * num_batches + i) loss, current_lr, summary = self.optimize(sess, batch, global_batch_num) prog.update(i + 1, [("train loss", loss), ("current LR", current_lr)]) train_log.write("{},{}\n".format(epoch_num + 1, loss)) if self.summary_flag: self.train_writer.add_summary(summary, i) print("") logging.info("Evaluating on development data") val_loss = self.validate(sess, val_data) return val_loss
def test(self, session, dev_examples): """ in here you should compute a cost for your validation set and tune your hyperparameters according to the validation set performance :return: """ num_batches = int(len(dev_examples) / self.batch_size) prog = Progbar(target=num_batches) total_cost = 0 for i, batch in enumerate(minibatches(dev_examples, self.batch_size)): input_feed = self.create_feed_dict(batch, dropout=1) output_feed = [self.loss] outputs = session.run(output_feed, input_feed) prog.update(i + 1, exact=[("dev loss", outputs[0])]) total_cost += outputs[0] print("") return total_cost / (i + 1)
def output(self, sess, inputs): """ Reports the output of the model on examples. """ preds = [] if self.config.similarity_metric_feature: headlines, bodies, stances, sim_scores = zip(*inputs) data = zip(headlines, bodies, sim_scores) else: headlines, bodies, stances = zip(*inputs) data = zip(headlines, bodies) prog = Progbar(target=1 + int(len(stances) / self.config.batch_size)) # TODO(akshayka): Verify that data is in the correct structure for i, batch in enumerate( minibatches(data, self.config.batch_size, shuffle=False)): preds_ = self.predict_on_batch(sess, *batch) preds += list(preds_) prog.update(i + 1, []) return (headlines, bodies), stances, preds
def output(self, sess, inputs_raw, inputs=None): """ Reports the output of the model on examples (uses helper to featurize each example). """ if inputs is None: inputs = self.preprocess_sequence_data( word2index(self.tokens, inputs_raw)[1]) # inputs = inputs[:self.config.batch_size] # just for debug preds = [] prog = Progbar(target=math.ceil(len(inputs) / self.config.batch_size)) y_true = [] for i, batch in enumerate( minibatches(inputs, self.config.batch_size, shuffle=False)): # Ignore predict y_true.extend(batch[1]) batch = batch[:1] + batch[2:] preds_ = self.predict_on_batch(sess, *batch) preds += list(preds_) prog.update(i + 1, []) return y_true, preds
def run_epoch(self, sess, train_examples, dev_set, logfile=None): prog = Progbar(target=1 + train_examples[0].shape[0] / self.config.batch_size) for i, (inputs_batch, outputs_batch) in enumerate( minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, inputs_batch, outputs_batch, get_loss=True) prog.update(i + 1, [("train loss", loss)]) print("") print("Evaluating on train set...") train_loss = self.eval_batches(sess, train_examples, self.config.n_eval_batches) print("Train Loss: {0:.6f}".format(train_loss)) print("Evaluating on dev set...") dev_loss = self.eval_batches(sess, dev_set, self.config.n_eval_batches) print("Dev Loss: {0:.6f}".format(dev_loss)) logfile.write(",{0:.5f},{1:.5f}\n".format(float(train_loss), float(dev_loss))) return dev_loss
def fit(self, sess, saver, train_examples_raw, dev_set_raw): best_score = 0. train_examples = self.preprocess_sequence_data(train_examples_raw) dev_set = self.preprocess_sequence_data(dev_set_raw) for epoch in range(self.config.n_epochs): logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs) prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) for i, batch in enumerate( minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, *batch) prog.update(i + 1, [("train loss", loss)]) if self.report: self.report.log_train_loss(loss) print("") logger.info("Evaluating on development data") token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) score = entity_scores[-1] if score > best_score: best_score = score if saver: logger.info("New best score! Saving model in %s", self.config.model_output) saver.save(sess, self.config.model_output) print("") if self.report: self.report.log_epoch() self.report.save() return best_score
def run_epoch(self, sess, train_examples, dev_set, train_examples_raw, dev_set_raw, epoch, last_epoch): prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) curr_loss = 0. num_encountered = 0 for i, batch in enumerate( minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, *batch) prog.update(i + 1, [("train loss", loss)]) curr_loss += loss num_encountered += 1 if self.report: self.report.log_train_loss(loss) train_loss.append(curr_loss / num_encountered) print("") logger.info("Evaluating on development data") divergence = self.evaluate(sess, dev_set, dev_set_raw, last_epoch) logger.info("KL- divergence: %.2f", divergence) dev_loss.append(divergence) return divergence
def run_epoch(self, sess, train_examples, dev_set, train_examples_raw, dev_set_raw): prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) for i, batch in enumerate(minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, *batch) prog.update(i + 1, [("train loss", loss)]) if self.report: self.report.log_train_loss(loss) print("") #logger.info("Evaluating on training data") #token_cm, entity_scores = self.evaluate(sess, train_examples, train_examples_raw) #logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) #logger.debug("Token-level scores:\n" + token_cm.summary()) #logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) logger.info("Evaluating on development data") token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) f1 = entity_scores[-1] return f1
def train_on_epoch(self, sess, summary, summary_writer): # 分batch训练,batch里有batch_size个句子、labels、masks for i, batch in enumerate(minibatches(self.train_set, self.batch_size)): inputs_batch, labels_batch, mask_batch = batch[0], batch[1], batch[ 2] feed = self.create_feed_dict(inputs_batch=inputs_batch, mask_batch=mask_batch, labels_batch=labels_batch, dropout=self.dropout) _, loss = sess.run([self.train_op, self.loss], feed_dict=feed) if i % 10 == 0: summary_str = sess.run(summary, feed_dict=feed) summary_writer.add_summary(summary_str, i) summary_writer.flush() total_batch = 1 + int(len(self.train_set) / self.batch_size) print( progress_bar(i, total_batch, "batch") + " train_loss:{:.8f}".format(loss))
def validate(self, sess, val_data): """ Iterate through the validation dataset and determine what the validation cost is. This method calls self.test() which explicitly calculates validation cost. How you implement this function is dependent on how you design your data iteration function :return: """ val_cost = 0 num_batches = int(len(val_data) / self.batch_size) + 1 prog = Progbar(target=num_batches) for i, batch in enumerate(minibatches(val_data, self.batch_size)): loss = self.test(sess, batch) prog.update(i + 1, [("val loss", loss)]) val_cost += loss print("") val_cost /= i + 1 return val_cost
def run_epoch_1(self, train, dev, epoch): """Performs one complete pass over the train set and evaluate on dev Args: train: dataset that yields tuple of sentences, tags dev: dataset epoch: (int) index of the current epoch Returns: f1: (python float), score to select model on, higher is better """ # progbar stuff for logging batch_size = Config.batch_size nbatches = (len(train) + batch_size - 1) // batch_size prog = Progbar(target=nbatches) # iterate over dataset for i, (words, labels) in enumerate(minibatches(train, batch_size)): fd, _ = self.get_feed_dict(words, labels, Config.lr, Config.dropout) _, train_loss, summary = self.sess.run( [self.train_op, self.loss, self.merged], feed_dict=fd) prog.update(i + 1, [("train loss", train_loss)]) # tensorboard if i % 10 == 0: self.file_writer.add_summary(summary, epoch * nbatches + i) metrics = self.run_evaluate(dev) msg = " - ".join( ["{} {:04.2f}".format(k, v) for k, v in metrics.items()]) self.logger.info(msg) return metrics["f1"]
def output(self, sess, inputs): """ Reports the output of the model on examples (uses helper to featurize each example). """ # prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size)) true = [] pred = [] # NOTE shuffle = False means everything will be predicting in order for i, batch in enumerate( minibatches(inputs, self.config.batch_size, shuffle=False)): # Ignore predict batch_input = batch[:-2] preds_ = self.predict_on_batch(sess, *batch_input) pred += list((np.transpose(preds_))) # pred for this batch true += list(np.transpose(batch[-2:])) # true for this batch # prog.update(i + 1, ["\n"]) # Return context sentence, gold indexes, predicted indexes # ret.append([batch[2], batch[-2:], preds_]) ret = [(true[i], pred[i]) for i in range(len(true))] # print(ret) return ret
sess.run(init_l) print(80 * "=") print("TRAINING") print(80 * "=") losses, dev_losses, accs, dev_accs = model.fit(sess, saver, train, dev) print(80 * "=") print("TESTING") print(80 * "=") print("Restoring the best model weights found on the dev set") saver.restore(sess, './data/weights/model.weights') print("Final evaluation on test set") preds = [] refs = [] test_losses = [] test_accs = [] for batch in minibatches(test, model.config.batch_size): inputs_batch, targets_batch = batch pred, test_loss, test_acc, _, _ = model.predict_on_batch( sess, *batch) pred = list(pred) preds += pred refs += list(targets_batch) test_losses.append(test_loss) test_accs.append(test_acc) mean_test_loss = np.mean(np.asarray(test_losses)) preds = [ tokens_to_sentences(pred, model.config.idx2word) for pred in preds ] refs = [ tokens_to_sentences(ref, model.config.idx2word) for ref in refs
def main(debug=True): parser = argparse.ArgumentParser() parser.add_argument('--epoch', type=int, default=5) parser.add_argument('--restore', action='store_true') args = parser.parse_args() if not os.path.exists('./data/weights/'): os.makedirs('./data/weights/') if not os.path.exists('./data/predictions/'): os.makedirs('./data/predictions/') if not os.path.exists('./data/plots/'): os.makedirs('./data/plots/') with tf.Graph().as_default(): print 80 * "=" print "INITIALIZING" print 80 * "=" config = Config() if args.epoch: config.n_epochs = args.epoch # Load Data # Note: X_train_input, X_dev_input, X_test_input are lists where each item is an example. # Each example is a sparse representation of a headline + article, where the text # is encoded as a series of indices into the glove-vectors. # y_train_input, y_dev_input, y_test_input are matrices (num_examples, num_classes) X_train_input, X_dev_input, X_test_input, y_train_input, y_dev_input, y_test_input, glove_matrix, max_lengths = create_inputs_by_glove( ) config.max_length = max_lengths[0] + max_lengths[1] print "Max Length is {}".format(config.max_length) # Create Basic LSTM Model config.pretrained_embeddings = glove_matrix model = BasicLSTM(config) # Create Data Lists train_examples = [X_train_input, y_train_input] dev_set = [X_dev_input, y_dev_input] test_set = [X_test_input, y_test_input] print "Building model...", start = time.time() print "took {:.2f} seconds\n".format(time.time() - start) init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) exclude_names = set([ "embedding_matrix:0", "embedding_matrix/Adam:0", "embedding_matrix/Adam_1:0" ]) saver = create_tensorflow_saver(exclude_names) if args.restore: saver.restore(session, './data/weights/basic_lstm_curr_stance.weights') print "Restored weights from ./data/weights/basic_lstm_curr_stance.weights" print "-------------------------------------------" session.graph.finalize() print 80 * "=" print "TRAINING" print 80 * "=" model.fit(session, saver, train_examples, dev_set) if saver: print 80 * "=" print "TESTING" print 80 * "=" print "Restoring the best model weights found on the dev set" saver.restore(session, './data/weights/basic_lstm_best_stance.weights') print "Final evaluation on test set", prog = Progbar(target=1 + len(test_set[0]) / config.batch_size) actual = vectorize_stances(test_set[1]) preds = [] for i, (inputs_batch, labels_batch) in enumerate( minibatches(test_set, config.batch_size)): predictions_batch = list( model.predict_on_batch(session, inputs_batch)) preds.extend(predictions_batch) prog.update(i + 1) test_score, test_lines = report_score(actual, preds) print "- test Score: {:.2f}".format(test_score) print "Writing predictions" with open('./data/predictions/basic_lstm_predicted.pkl', 'w') as f: cPickle.dump(preds, f, -1) print "Done!"
def main(_): vocab, rev_vocab = initialize_vocab(FLAGS.vocab_path) embed_path = FLAGS.embed_path or pjoin("data", "squad", "glove.trimmed.{}.npz".format(FLAGS.embedding_size)) global_train_dir = '/tmp/cs224n-squad-train' # Adds symlink to {train_dir} from /tmp/cs224n-squad-train to canonicalize the # file paths saved in the checkpoint. This allows the model to be reloaded even # if the location of the checkpoint files has moved, allowing usage with CodaLab. # This must be done on both train.py and qa_answer.py in order to work. if not os.path.exists(FLAGS.train_dir): os.makedirs(FLAGS.train_dir) if os.path.exists(global_train_dir): os.unlink(global_train_dir) #os.symlink(os.path.abspath(FLAGS.train_dir), global_train_dir) train_dir = global_train_dir if not os.path.exists(FLAGS.log_dir): os.makedirs(FLAGS.log_dir) file_handler = logging.FileHandler(pjoin(FLAGS.log_dir, "log.txt")) logging.getLogger().addHandler(file_handler) print(vars(FLAGS)) with open(os.path.join(FLAGS.log_dir, "flags.json"), 'w') as fout: json.dump(FLAGS.__flags, fout) # ========= Download Dataset json ========= # You can change this code to load dataset in your own way #dev_dirname = os.path.dirname(os.path.abspath(FLAGS.dev_path)) #dev_filename = os.path.basename(FLAGS.dev_path) #_, _, _ = prepare_dev(dev_dirname, dev_filename, vocab) # ========= Process input json ========= # for codalab prefix = os.path.join("data", "squad") # writes dev.answer, dev.context, dev.question, dev.span dev_path = FLAGS.dev_path dev_filename = FLAGS.dev_path.split("/")[-1] if FLAGS.download: dev_data = data_from_json(os.path.join(prefix, dev_filename)) else: dev_data = data_from_json(dev_filename) dev_num_questions, dev_num_answers = read_write_dataset(dev_data, 'dev', prefix="") print("Processed {} questions and {} answers in dev".format(dev_num_questions, dev_num_answers)) # writes dev.ids.context, dev.ids.question vocab_path = pjoin(os.path.join("data", "squad"), "vocab.dat") dev_deposit_path = pjoin(os.path.join("", ""), "dev") #pjoin(os.path.join("data", "squad"), "dev") x_dis_path = dev_deposit_path + ".ids.context" y_ids_path = dev_deposit_path + ".ids.question" data_to_token_ids(dev_deposit_path + ".context", x_dis_path, vocab_path) data_to_token_ids(dev_deposit_path + ".question", y_ids_path, vocab_path) # load data sets #Q_test, P_test, A_start_test, A_end_test, A_len_test, P_raw_test, A_raw_test, Q_len_test, P_len_test = load_data(os.path.join("data", "squad"), "dev") # for our purposes this is as test set. Q_test, P_test, A_start_test, A_end_test, A_len_test, P_raw_test, A_raw_test, Q_len_test, P_len_test = load_data_home(dev_deposit_path) # for our purposes this is as test set. question_uuid_data = [] with open(dev_deposit_path + ".quid") as f: for line in f: question_uuid_data.append((line)) # pad the data at load-time. So, we don't need to do any masking later!!! # ref: https://keras.io/preprocessing/sequence/ # if len < maxlen, pad with specified val # elif len > maxlen, truncate QMAXLEN = FLAGS.QMAXLEN PMAXLEN = FLAGS.PMAXLEN Q_test = pad_sequences(Q_test, maxlen=QMAXLEN, value=PAD_ID, padding='post') P_test = pad_sequences(P_test, maxlen=PMAXLEN, value=PAD_ID, padding='post') A_start_test = pad_sequences(A_start_test, maxlen=PMAXLEN, value=0, padding='post') A_end_test = pad_sequences(A_end_test, maxlen=PMAXLEN, value=0, padding='post') test_data = zip(P_test, Q_test, P_len_test, Q_len_test, A_start_test, A_end_test, A_len_test, P_raw_test, A_raw_test, question_uuid_data) # ========= Model-specific ========= # You must change the following code to adjust to your model """models = [ 'MPCM', 'COATT', 'COATT_fixed', 'COATT_mix','COATT_fixed_mix', 'COATT_fixed_200_mix'] # 'COATT_fixed_200', leave out to save time predictions_start = {}; predictions_end = {} with open("preds_dev.txt", "a") as f: f.write("model" + "," + "pred_raw" + "," + "a_raw") for model in models: FLAGS.model_type = model FLAGS.train_dir = "train/ensemble_train_" + model train_dir = "train/ensemble_train_" + model # define sizes etc. for different models. if model == 'COATT_fixed_200' or model == 'COATT_fixed_200_mix' : FLAGS.embedding_size = 200 FLAGS.lstm_units = 200 elif model == "MPCM_p100": FLAGS.embedding_size = 100 FLAGS.lstm_units = 100 FLAGS.perspective_units = 100 else: FLAGS.embedding_size = 100 FLAGS.lstm_units = 100 FLAGS.perspective_units = 50 with tf.Graph().as_default(): with tf.Session() as sess: embeddings = np.load(FLAGS.data_dir + '/glove.trimmed.' + str(FLAGS.embedding_size) + '.npz') pretrained_embeddings = embeddings['glove'] qa = QASystem(FLAGS, pretrained_embeddings, vocab_dim=len(vocab.keys())) initialize_model(sess, qa, train_dir) # get predicted start-end indices a_s_l = [] a_e_l = [] f1 = exact_match = total = 0; answers = {}; prob_start = {}; prob_end = {}; p_raw_mapping= {} prog = Progbar(target=1 + int(len(test_data) / FLAGS.batch_size)) for i, batch in enumerate(minibatches(test_data, FLAGS.batch_size, shuffle = False)): batch_test = batch[:4] (ys, ye) = qa.predict_on_batch(sess, *batch_test) a_s = (np.argmax(ys, axis=1)) a_e = (np.argmax(ye, axis=1)) a_s_l = a_s_l + list(a_s) a_e_l = a_e_l + list(a_e) print(len(a_s)) for j in range(len(a_s)): p_raw = batch[7][j] a_raw = batch[8][j] s = a_s[j] e = a_e[j] pred_raw = ' '.join(p_raw.split()[s:e + 1]) p_raw_mapping[batch[9][j].strip("\n")] = p_raw #answers[batch[9][j].strip("\n")] = pred_raw.strip("\n") prob_start[batch[9][j].strip("\n")] = ys[j] prob_end[batch[9][j].strip("\n")] = ye[j] f.write(model + "," + pred_raw + "," + a_raw ) prog.update(i + 1, [("processed", i + 1)]) predictions_start[model] = prob_start predictions_end[model] = prob_end f.close() # save dropPickle(predictions_start, "preds_start.pkl") dropPickle(predictions_end, "preds_end.pkl") dropPickle(p_raw_mapping, "p_raw_mapping.pkl")""" predictions_start = loadPickle("preds_start.pkl") predictions_end = loadPickle("preds_end.pkl") p_raw_mapping = loadPickle("p_raw_mapping.pkl") models = ['COATT_fixed_200'] #predictions_start = {}; predictions_end = {} with open("preds_dev.txt", "a") as f: f.write("model" + "," + "pred_raw" + "," + "a_raw") for model in models: FLAGS.model_type = model FLAGS.train_dir = "train/ensemble_train_" + model train_dir = "train/ensemble_train_" + model if model == 'COATT_fixed_200' or model == 'COATT_fixed_200_mix' : FLAGS.embedding_size = 200 FLAGS.lstm_units = 200 elif model == "MPCM_p100": FLAGS.embedding_size = 100 FLAGS.lstm_units = 100 FLAGS.perspective_units = 100 else: FLAGS.embedding_size = 100 FLAGS.lstm_units = 100 FLAGS.perspective_units = 50 with tf.Graph().as_default(): with tf.Session() as sess: embeddings = np.load(FLAGS.data_dir + '/glove.trimmed.' + str(FLAGS.embedding_size) + '.npz') pretrained_embeddings = embeddings['glove'] qa = QASystem(FLAGS, pretrained_embeddings, vocab_dim=len(vocab.keys())) initialize_model(sess, qa, train_dir) # get predicted start-end indices a_s_l = [] a_e_l = [] f1 = exact_match = total = 0; answers = {}; prob_start = {}; prob_end = {}; p_raw_mapping= {} prog = Progbar(target=1 + int(len(test_data) / FLAGS.batch_size)) for i, batch in enumerate(minibatches(test_data, FLAGS.batch_size, shuffle = False)): batch_test = batch[:4] (ys, ye) = qa.predict_on_batch(sess, *batch_test) a_s = (np.argmax(ys, axis=1)) a_e = (np.argmax(ye, axis=1)) a_s_l = a_s_l + list(a_s) a_e_l = a_e_l + list(a_e) print(len(a_s)) for j in range(len(a_s)): p_raw = batch[7][j] a_raw = batch[8][j] s = a_s[j] e = a_e[j] print(s,e)# comment this out pred_raw = ' '.join(p_raw.split()[s:e + 1]) p_raw_mapping[batch[9][j].strip("\n")] = p_raw #answers[batch[9][j].strip("\n")] = pred_raw.strip("\n") prob_start[batch[9][j].strip("\n")] = ys[j] prob_end[batch[9][j].strip("\n")] = ye[j] f.write(model + "," + pred_raw + "," + a_raw ) prog.update(i + 1, [("processed", i + 1)]) predictions_start[model] = prob_start predictions_end[model] = prob_end f.close() dropPickle(predictions_start, "preds_start.pkl") dropPickle(predictions_end, "preds_end.pkl") dropPickle(p_raw_mapping, "p_raw_mapping.pkl") # combine the predictions of the two models (while making independent start, end predictions) """answers = {} for qkey in predictions_start['MPCM'].keys(): ys = predictions_start['MPCM'][qkey]*predictions_start['COATT'][qkey]*predictions_start['COATT_fixed'][qkey] ye = predictions_end['MPCM'][qkey]*predictions_end['COATT'][qkey]*predictions_end['COATT_fixed'][qkey] s = (np.argmax(ys)) arr = ye.copy() arr[0:s] = 0 e = (np.argmax(arr)) #e = (np.argmax(ye)) pred_raw = ' '.join(p_raw_mapping[qkey].split()[s:e + 1]) answers[qkey] = pred_raw.strip("\n")""" # predict span with max predicted probability (make joint prediction rather than indepenedntly predicitng start and end indices) answers = {} for qkey in predictions_start['MPCM'].keys(): ys = predictions_start['MPCM'][qkey]*predictions_start['COATT'][qkey]*predictions_start['COATT_fixed'][qkey]\ *predictions_start['COATT_mix'][qkey]*predictions_start['COATT_fixed_mix'][qkey]\ *predictions_start['COATT_fixed_200_mix'][qkey]*predictions_start['COATT_fixed_200'][qkey] #to save time ye = predictions_end['MPCM'][qkey]*predictions_end['COATT'][qkey]*predictions_end['COATT_fixed'][qkey]\ *predictions_end['COATT_mix'][qkey]*predictions_end['COATT_fixed_mix'][qkey]\ *predictions_end['COATT_fixed_200_mix'][qkey]*predictions_end['COATT_fixed_200'][qkey] #to save time s = 0; e = 0; prodmax = 0 for si in range(0, len(ys)): for ei in range(si, len(ye)): prod = ys[si]*ye[ei] if prod > prodmax: s = si e = ei prodmax = prod print(s,e, prodmax) pred_raw = ' '.join(p_raw_mapping[qkey].split()[s:e + 1]); print(pred_raw) answers[qkey] = pred_raw.strip("\n") # write to json file to root dir with io.open('dev-prediction.json', 'w', encoding='utf-8') as f: f.write(unicode(json.dumps(answers, ensure_ascii=False)))