def run_epoch(self, sess, train): prog = Progbar(target=1 + int(len(train) / self.config.batch_size)) losses, grad_norms = [], [] for i, batch in enumerate(minibatches(train, self.config.batch_size)): loss, grad_norm = self.train_on_batch(sess, *batch) losses.append(loss) grad_norms.append(grad_norm) prog.update(i + 1, [("train loss", loss)]) return losses, grad_norms
def fit(self, sess, saver, train_examples_raw, dev_set_raw): best_score = 0. train_examples = self.preprocess_sequence_data(train_examples_raw) dev_set = self.preprocess_sequence_data(dev_set_raw) for epoch in range(self.config.n_epochs): logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs) # You may use the progress bar to monitor the training progress # Addition of progress bar will not be graded, but may help when debugging prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) # The general idea is to loop over minibatches from train_examples, and run train_on_batch inside the loop # Hint: train_examples could be a list containing the feature data and label data # Read the doc for utils.get_minibatches to find out how to use it. # Note that get_minibatches could either return a list, or a list of list # [features, labels]. This makes expanding tuples into arguments (* operator) handy ### YOUR CODE HERE (2-3 lines) for batch_set in minibatches(train_examples, self.config.batch_size): inputs_batch, labels_batch = batch_set[:2] ## mask batch is optional mask_batch = batch_set[2] if len(batch_set) > 2 else None if mask_batch is None: self.train_on_batch(sess, inputs_batch, labels_batch.reshape(-1, )) else: self.train_on_batch(sess, inputs_batch, labels_batch, mask_batch) # ### END YOUR CODE logger.info("Evaluating on development data") token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) score = entity_scores[-1] if score > best_score: best_score = score if saver: logger.info("New best score! Saving model in %s", self.config.model_output) saver.save(sess, self.config.model_output) print("") if self.report: self.report.log_epoch() self.report.save() return best_score
def evaluate(self, sess, examples): """ Args: sess: a TFSession examples: [ numpy array (num_examples, max_length) of all sentence 1, numpy array (num_examples, max_length) of all sentence 2, numpy array (num_examples, ) of all labels ] Returns: fraction of correct predictions TODO: maybe return the actual predictions as well """ correct_preds = 0.0 tp = 0.0 fp = 0.0 fn = 0.0 preds = [] confusion_matrix = np.zeros((2, 2), dtype=np.float64) num_examples = len(examples[0]) num_batches = int(np.ceil(num_examples * 1.0 / self.config.batch_size)) prog = Progbar(target=num_batches) for i, batch in enumerate(self.minibatch(examples, shuffle=False)): # Ignore labels sentence1_batch, sentence2_batch, labels_batch = batch preds_ = self.predict_on_batch(sess, sentence1_batch, sentence2_batch) preds += list(preds_) labels_batch = np.array(labels_batch) for j in range(preds_.shape[0]): confusion_matrix[labels_batch[j], preds_[j]] += 1 prog.update(i + 1) ## CONFUSION MATRIX (is indeed hella confusing) # pred - pred + # label - | tn | fp | # label + | fn | tp | tn = confusion_matrix[0, 0] fp = confusion_matrix[0, 1] fn = confusion_matrix[1, 0] tp = confusion_matrix[1, 1] correct_preds = tp + tn accuracy = correct_preds / num_examples precision = (tp) / (tp + fp) if tp > 0 else 0 recall = (tp) / (tp + fn) if tp > 0 else 0 print("\ntp: %f, fp: %f, fn: %f" % (tp, fp, fn)) f1 = 2 * precision * recall / (precision + recall) if tp > 0 else 0 return (preds, accuracy, precision, recall, f1)
def run_epoch(self, sess, train_examples, dev_set): prog = Progbar(target=1 + len(train_examples[0]) / self.config.batch_size) for i, (inputs_batch, labels_batch) in enumerate( minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, inputs_batch, labels_batch) prog.update(i + 1, [("train loss", loss)]) print "Evaluating on dev set" prog = Progbar(target=1 + len(dev_set[0]) / self.config.batch_size) actual = [] preds = [] for i, (inputs_batch, labels_batch) in enumerate( minibatches(dev_set, self.config.batch_size)): predictions_batch = list(self.predict_on_batch(sess, inputs_batch)) preds.extend(predictions_batch) actual.extend(vectorize_stances(labels_batch)) prog.update(i + 1) dev_score, lines = report_score(actual, preds) print "- dev Score: {:.2f}".format(dev_score) return dev_score
def run_epoch(self, sess, train, label): """ """ prog = Progbar(target=1 + int(len(train) / self.config.batch_size)) losses, grad_norms = [], [] for inputs_minibatch, labels_minibatch in get_minibatches( [train, label], self.config.batch_size): #for i, batch in enumerate(minibatches(train, label, self.config.batch_size)): loss, grad_norm = self.train_on_batch(sess, inputs_minibatch, labels_minibatch) losses.append(loss) grad_norms.append(grad_norm) prog.update(i + 1, [("train loss", loss)])
def answer(self, session, data): scores = [] prog_train = Progbar(target=1 + int(len(data[0]) / self.flags.batch_size)) for i, batch in enumerate( self.minibatches(data, self.flags.batch_size, shuffle=False)): score = self.forward_pass(session, *batch) scores.append(score) prog_train.update(i + 1, [("Predicting Images....", 0.0)]) print("") scores = np.vstack(scores) predictions = np.argmax(scores, axis=-1) return predictions
def run_epoch(self, sess, train): prog = Progbar(target=1 + int(len(train) / self.config.batch_size)) losses, grad_norms = [], [] for i, batch in enumerate(minibatches(train, self.config.batch_size)): if batch[0].shape[0] != 100: continue pred = self.predict_on_batch(sess, batch[0]) loss, grad_norm = self.train_on_batch(sess, *batch) losses.append(loss) grad_norms.append(grad_norm) prog.update(i + 1, [("train loss", loss), ("train grad", grad_norm)]) return losses, grad_norms
def run_epoch(self, sess, batch_gen, info): # use 3301 for 24 batch size # use 2476 for 32 batch size prog = Progbar(target=4952) (i1, i2, i3, i4, i5, i6) = info batch_epoch = batch_gen(i1, i2, i3, i4, i5, i6) for i in range(4952): batch = batch_epoch.next() loss, grad_norm, EM = self.train_on_batch(sess, batch) logging.info("loss is %f, grad_norm is %f" % (loss, grad_norm)) prog.update(i + 1, [("train loss", loss), ("grad_norm", grad_norm), ("EM", EM)]) if math.isnan(loss): logging.info("loss nan") assert False
def summary_success_epoch(train_data, model, session): num_train_batches = int(len(train_data['q']) / FLAGS.batch_size) prog = Progbar(target=num_train_batches) permutation = np.random.permutation(num_train_batches*FLAGS.batch_size) successes = [] for i in range(num_train_batches): if i >= FLAGS.train_batch >= 0: break data_batch = get_batch(train_data, i, permutation=permutation) successes.append(model.summary_success(sess=session, data_batch=data_batch)) prog.update(i+1, [("retained", sum(successes))]) logger.debug("Summarization: %d out of %d answers are retained", sum(successes), int(len(train_data['q']))) logger.debug("Retain rate: %.2f%%", 100. * sum(successes) / len(train_data['q'])) return sum(successes)
def fit(self, sess, saver, train_examples_raw, dev_set_raw): best_score = 0. train_examples = self.preprocess_sequence_data(train_examples_raw) dev_set = self.preprocess_sequence_data(dev_set_raw) for epoch in range(self.config.n_epochs): logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs) # You may use the progress bar to monitor the training progress # Addition of progress bar will not be graded, but may help when debugging prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) # The general idea is to loop over minibatches from train_examples, and run train_on_batch inside the loop # Hint: train_examples could be a list containing the feature data and label data # Read the doc for utils.get_minibatches to find out how to use it. # Note that get_minibatches could either return a list, or a list of list # [features, labels]. This makes expanding tuples into arguments (* operator) handy ### YOUR CODE HERE (2-3 lines) # get_minibatches actually returns a generator, which can be considered as an iterator # the difference is, it will generate result upon request but not storing it in memory for i, batch in enumerate(minibatches(train_examples, self.config.batch_size)): # q1_windown.py and q2_rnn.py has a differnt train_on_batch functions that accept different # number of the arguments. It might be clear to leave the implementation to the subclass loss = self.train_on_batch(sess, *batch) prog.update(current=i+1, values=[('loss',loss)]) ### END YOUR CODE logger.info("Evaluating on development data") token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) score = entity_scores[-1] if score > best_score: best_score = score if saver: logger.info("New best score! Saving model in %s", self.config.model_output) saver.save(sess, self.config.model_output) print("") if self.report: self.report.log_epoch() self.report.save() return best_score
def fit(self, sess, saver, train_examples_raw, dev_set_raw): best_score = 0. train_examples = self.preprocess_sequence_data(train_examples_raw) print(train_examples[0]) dev_set = self.preprocess_sequence_data(dev_set_raw) for epoch in range(self.config.n_epochs): logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs) # You may use the progress bar to monitor the training progress # Addition of progress bar will not be graded, but may help when debugging prog = Progbar( target=math.ceil(len(train_examples) / self.config.batch_size)) # The general idea is to loop over minibatches from train_examples, and run train_on_batch inside the loop # Hint: train_examples could be a list containing the feature data and label data # Read the doc for utils.get_minibatches to find out how to use it. # Note that get_minibatches could either return a list, or a list of list # [features, labels]. This makes expanding tuples into arguments (* operator) handy ### YOUR CODE HERE (2-3 lines) for i, batch in enumerate( minibatches(train_examples, self.config.batch_size, shuffle=True)): # batch[0] is loss = self.train_on_batch(sess, *batch) prog.update(i + 1, [("train loss", loss)]) if self.report: self.report.log_train_loss(loss) ## END YOUR CODE logger.info("Evaluating on development data") cm, score = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("confusion matrix:\n" + str(cm)) logger.info("f1 score: %.2f", score) if score > best_score: best_score = score if saver: logger.info("New best score! Saving model in %s", self.config.model_output) saver.save(sess, self.config.model_output) print("") if self.report: self.report.log_epoch() self.report.save() return best_score
def compute_dev_loss(self, sess, input_batches, labels_batches, mask_batches): prog = Progbar(target=1 + len(input_batches)) total_dev_loss = 0 for i, input_batch in enumerate(input_batches): # feed = self.create_feed_dict(inputs_batch=input_batch, stacked_labels_batch=labels_batches[i], mask_batch=mask_batches[i]) #problem: labels has shape: [batch_size x max_sentence_length], should be opposite # dev_loss = sess.run(self.dev_loss, feed_dict=feed) dev_loss = self.predict_on_batch(sess, inputs_batch=input_batch, labels_batch=labels_batches[i], \ mask_batch=mask_batches[i], num_of_batch=i, using_dev=True) total_dev_loss += dev_loss prog.update(i + 1, [("dev loss", dev_loss)]) if i == len(input_batches) - 1: logger.info("Last batch dev loss: " + str(dev_loss)) return total_dev_loss
def run_epoch(self, sess, train): batches = self.build_batches(self.train_qas) if not FLAGS.is_prod: batches = batches[:5] prog = Progbar(target=len(batches)) losses = [] for i, batch in enumerate(batches): loss = self.train_on_batch(sess, zip(*batch)) losses.append(loss) prog.update(i + 1, [("train loss", loss)]) logging.info("Evaluation on training data") self.evaluate_answer(sess, self.train_qas, log=True) logging.info("Evaluation on dev data") f1, em = self.evaluate_answer(sess, self.dev_qas, log=True) return f1
def output(self, sess, inputs_raw, inputs=None): """ Reports the output of the model on examples (uses helper to featurize each example). """ if inputs is None: inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw)) preds = [] prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size)) for i, batch in enumerate(minibatches(inputs, self.config.batch_size, shuffle=False)): # Ignore predict batch = batch[:1] + batch[2:] preds_ = self.predict_on_batch(sess, *batch) preds += list(preds_) prog.update(i + 1, []) return self.consolidate_predictions(inputs_raw, inputs, preds)
def generate_answers(session, model, dataset, rev_vocab): answers = {} num_dev_batches = int(len(dataset['q']) / FLAGS.batch_size) + 1 prog = Progbar(target=num_dev_batches) for i in range(num_dev_batches): data_batch = du.get_batch(dataset, i) pred = model.predict_on_batch(sess=session, data_batch=data_batch, rev_vocab=rev_vocab) for j, document in enumerate(data_batch['c']): answers[data_batch['q_uuids'][j]] = " ".join( [rev_vocab[document[index]] for index in pred[j]]) prog.update(i + 1, []) return answers
def generate_answers(sess, model, dataset, rev_vocab): """ Loop over the dev or test dataset and generate answer. Note: output format must be answers[uuid] = "real answer" You must provide a string of words instead of just a list, or start and end index In main() function we are dumping onto a JSON file evaluate.py will take the output JSON along with the original JSON file and output a F1 and EM You must implement this function in order to submit to Leaderboard. :param sess: active TF session :param model: a built QASystem model :param rev_vocab: this is a list of vocabulary that maps index to actual words :return: """ answers = {} (context, question, question_uuid_data) = dataset context_data = convert_data_to_list(context) question_data = convert_data_to_list(question) context_padded, context_mask = pad_sequence(context_data, FLAGS.max_context_len) question_padded, question_mask = pad_sequence(question_data, FLAGS.max_question_len) input_data = vectorize(context_padded, context_mask, question_padded, question_mask, question_uuid_data) batch_size = 32 num_batches = int(len(input_data) / batch_size) + 1 prog = Progbar(target=num_batches) for i, batch in enumerate(minibatches(input_data, batch_size)): a_s_vec, a_e_vec = model.answer(sess, batch) prog.update(i + 1) for (a_s, a_e, context, uuid) in zip(a_s_vec, a_e_vec, batch[0], batch[4]): if a_s > a_e: tmp = a_s a_s = a_e a_e = tmp predicted_answer = model.formulate_answer(context, rev_vocab, a_s, a_e) answers[uuid] = predicted_answer return answers
def train(self, train_examples, dev_examples): model = self._model config = self._config logger = self._logger best_score = 0. preprocessed_train_examples = train_examples['preprocessed'] step = 0 for epoch in range(config.n_epochs): model.train() logger.info("Epoch %d out of %d", epoch + 1, config.n_epochs) prog = Progbar( target=1 + int(len(preprocessed_train_examples) / config.batch_size)) avg_loss = 0 for i, minibatch in enumerate( minibatches(preprocessed_train_examples, config.batch_size)): sentences = torch.tensor(minibatch[0], device=config.device) labels = torch.tensor(minibatch[1], device=config.device) masks = torch.tensor(minibatch[2], device=config.device) avg_loss += self._train_on_batch(sentences, labels, masks) avg_loss /= i + 1 logger.info("Training average loss: %.5f", avg_loss) model.eval() with torch.no_grad(): logger.info("Evaluating on development data") token_cm, entity_scores = self._evaluator.evaluate( dev_examples) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: {:.2f}/{:.2f}/{:.2f}".format( *entity_scores)) score = entity_scores[-1] if score > best_score and config.model_output: best_score = score logger.info("New best score! Saving model in %s", config.model_output) torch.save(model.state_dict(), config.model_output) print("") return best_score
def run_epoch(self, sess, train_examples, dev_set): prog = Progbar(target=1 + train_examples[0].shape[0] / self.config.batch_size) for i, (articles_batch, headlines_batch, labels_batch) in enumerate(minibatches(train_examples, self.config.batch_size)): loss = self.train_on_batch(sess, articles_batch, headlines_batch, labels_batch) prog.update(i + 1, [("train loss", loss)]) print "Evaluating on train set" train_actual = vectorize_stances(train_examples[2]) train_preds = list(self.predict_on_batch(sess, *train_examples[:2])) train_score, _ = report_score(train_actual, train_preds) print "Evaluating on dev set" actual = vectorize_stances(dev_set[2]) preds = list(self.predict_on_batch(sess, *dev_set[:2])) dev_score,_ = report_score(actual, preds) print "- train Score {:.2f}".format(train_score) print "- dev Score: {:.2f}".format(dev_score) return dev_score
def predict(self, examples, use_str_labels=False): """ Reports the output of the model on examples (uses helper to featurize each example). """ config = self._config preprocessed_examples = examples['preprocessed'] preds = [] prog = Progbar(target=1 + int(len(preprocessed_examples) / config.batch_size)) for i, minibatch in enumerate(minibatches(preprocessed_examples, config.batch_size, shuffle=False)): sentences = torch.tensor(minibatch[0], device=config.device) tag_probs = self._model(sentences) preds_ = torch.argmax(tag_probs, dim=-1) preds += list(preds_) prog.update(i + 1, []) return self.consolidate_predictions(examples, preds, use_str_labels)
def output(self, sess, inputs_raw, inputs=None): """ Reports the output of the model on examples (uses helper to featurize each example). """ if inputs is None: # 用于预测的时候 inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw)) preds = [] prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size)) # 设置总数,然后每次只要update当前数就行 for i, batch in enumerate( minibatches(inputs, self.config.batch_size, shuffle=False)): # 注意这里shuffle为false,对于rnn来说不需要 # Ignore predict batch = batch[:1] + batch[2:] # 为啥还要加batch[2:],要忽略输出的话直接前面一半不就好了? preds_ = self.predict_on_batch(sess, *batch) preds += list(preds_) prog.update(i + 1, []) return self.consolidate_predictions(inputs_raw, inputs, preds)
def fit(self, sess, saver, train_examples_raw, dev_set_raw): best_score = 0. train_examples = self.preprocess_sequence_data(train_examples_raw) dev_set = self.preprocess_sequence_data(dev_set_raw) for epoch in range(self.config.n_epochs): logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs) # You may use the progress bar to monitor the training progress # Addition of progress bar will not be graded, but may help when debugging prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) # Hint: train_examples is a list containing the feature data and label data # Read the doc for utils.get_minibatches to find out how to use it. # The general idea is to loop over minibatches from train_examples, and run train_on_batch inside the loop ### YOUR CODE HERE (2-3 lines) for i, batch in enumerate( minibatches(train_examples, self.config.batch_size)): # print train_y[:20] loss = self.train_on_batch(sess, *batch) prog.update(i + 1, [("train loss", loss)]) ### END YOUR CODE logger.info("Evaluating on development data") token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) score = entity_scores[-1] if score > best_score: best_score = score if saver: logger.info("New best score! Saving model in %s", self.config.model_output) saver.save(sess, self.config.model_output) print("") if self.report: self.report.log_epoch() self.report.save() return best_score
def train_epoch(train_data, model, session, losses, grad_norms): num_train_batches = int(len(train_data['q']) / FLAGS.batch_size) prog = Progbar(target=num_train_batches) permutation = np.random.permutation(num_train_batches*FLAGS.batch_size) retain_sum = 0 for i in range(num_train_batches): if i >= FLAGS.train_batch >= 0: break data_batch = get_batch(train_data, i, permutation=permutation) (grad_norm, loss, retain) = model.train_on_batch(sess=session, data_batch=data_batch) retain_sum += retain losses.append(loss) for j,grad in enumerate(grad_norm): grad_norms[j].append(grad) prog.update(i+1, [("grad_norm",np.sum(grad_norm)), ("loss", loss)]) logger.info("{} out of {} ground truth answers are retained.".format(retain_sum, int(len(train_data['q'])))) return grad_norms, losses
def answer(self, session, data): yp_lst = [] yp2_lst = [] prog_train = Progbar(target=1 + int(len(data[0]) / self.flags.batch_size)) for i, batch in enumerate(self.minibatches(data, self.flags.batch_size, shuffle=False)): yp, yp2 = self.decode(session, *batch) yp_lst.append(yp) yp2_lst.append(yp2) prog_train.update(i + 1, [("computing F1...", 1)]) print("") yp_all = np.concatenate(yp_lst, axis=0) yp2_all = np.concatenate(yp2_lst, axis=0) a_s = np.argmax(yp_all, axis=1) a_e = np.argmax(yp2_all, axis=1) return (a_s, a_e)
def output(self, sess, inputs_raw, inputs=None): """ Reports the output of the model on examples (uses helper to featurize each example). 生成模型的按批次预测的实体识别的分数结果,将结果按句子分组,加上真实的类别结果,返回给评估过程. """ if inputs is None: inputs = self.preprocess_sequence_data(self.helper.vectorize(inputs_raw)) preds = [] prog = Progbar(target=1 + int(len(inputs) / self.config.batch_size)) for i, batch in enumerate(minibatches(inputs, self.config.batch_size, shuffle=False)): # Ignore predict # 忽略窗口正确的类标签列,只要窗口数据 batch = batch[:1] + batch[2:] # [2048x6] preds_ = self.predict_on_batch(sess, *batch) # 2048 preds += list(preds_) prog.update(i + 1, []) return self.consolidate_predictions(inputs_raw, inputs, preds)
def run_epoch(self, session, train_examples, dev_examples, epoch_num, train_log): num_batches = int(len(train_examples) / self.batch_size) prog = Progbar(target=num_batches) for i, batch in enumerate(minibatches(train_examples, self.batch_size)): loss, summary, current_lr = self.optimize( session, batch, global_batch_num=epoch_num * num_batches + i) # logging format (epoch,loss) train_log.write("{},{}\n".format(epoch_num + 1, loss)) prog.update(i + 1, exact=[("train loss", loss), ("current LR", current_lr)]) if self.summary_flag: self.train_writer.add_summary(summary, i) print("") logging.info("Evaluating on development data") validate_cost = self.test(session, dev_examples) return validate_cost
def test(self, session, dev_examples): """ in here you should compute a cost for your validation set and tune your hyperparameters according to the validation set performance :return: """ num_batches = int(len(dev_examples) / self.batch_size) prog = Progbar(target=num_batches) total_cost = 0 for i, batch in enumerate(minibatches(dev_examples, self.batch_size)): input_feed = self.create_feed_dict(batch, dropout=1) output_feed = [self.loss] outputs = session.run(output_feed, input_feed) prog.update(i + 1, exact=[("dev loss", outputs[0])]) total_cost += outputs[0] print("") return total_cost / (i + 1)
def fit(self, sess, saver, train_examples_raw, dev_set_raw, writer=None): best_score = 0. train_examples = self.preprocess_sequence_data(train_examples_raw) dev_set = self.preprocess_sequence_data(dev_set_raw) step = 0 for epoch in range(self.config.n_epochs): logger.info("Epoch %d out of %d", epoch + 1, self.config.n_epochs) prog = Progbar(target=1 + int(len(train_examples) / self.config.batch_size)) for i, minibatch in enumerate( minibatches(train_examples, self.config.batch_size)): data = minibatch[:1] + minibatch[1:2] + minibatch[2:] if writer is None: self.train_on_batch(sess, *data) else: loss, summary = self.train_on_batch(sess, *data, summarize=True) writer.add_summary(summary, step) step += 1 logger.info("Evaluating on development data") token_cm, entity_scores = self.evaluate(sess, dev_set, dev_set_raw) logger.debug("Token-level confusion matrix:\n" + token_cm.as_table()) logger.debug("Token-level scores:\n" + token_cm.summary()) logger.info("Entity level P/R/F1: %.2f/%.2f/%.2f", *entity_scores) score = entity_scores[-1] if score > best_score: best_score = score if saver: logger.info("New best score! Saving model in %s", self.config.model_output) saver.save(sess, self.config.model_output) print("") if self.report: self.report.log_epoch() self.report.save() return best_score
def generate_answers(sess, model, dataset, rev_vocab, batch_size): """ Loop over the dev or test dataset and generate answer. Note: output format must be answers[uuid] = "real answer" You must provide a string of words instead of just a list, or start and end index In main() function we are dumping onto a JSON file evaluate.py will take the output JSON along with the original JSON file and output a F1 and EM You must implement this function in order to submit to Leaderboard. :param sess: active TF session :param model: a built QASystem model :param rev_vocab: this is a list of vocabulary that maps index to actual words :return: """ answers = {} all_contexts, all_questions, all_question_uuids = dataset prog = Progbar(target=1 + int(len(dataset[0]) / batch_size)) for i, batch in enumerate( get_minibatches([all_contexts, all_questions, all_question_uuids], batch_size, shuffle=False)): contexts, questions, question_uuids = batch context_lengths = (contexts != 0).sum(1) pred_spans = model.answer(sess, [questions, contexts]) pred_spans[:, 0] = np.minimum(pred_spans[:, 0], pred_spans[:, 1]) pred_spans[:, 1] = np.minimum(pred_spans[:, 1], context_lengths) for (s, e), uuid, context in zip(pred_spans, question_uuids, contexts): answers[uuid] = " ".join( [rev_vocab[context[c]] for c in range(s, e + 1)]) prog.update(i + 1, []) return answers
def run_epoch(self, sess, train_data, dev_data): train_input_batches, train_truth_batches, train_mask_batches = train_data dev_input_batches, dev_truth_batches, dev_mask_batches = dev_data logger.info("number of train input batches: %d", int(len(train_input_batches))) prog = Progbar(target=1 + len(train_input_batches)) loss = 0 for i, input_batch in enumerate(train_input_batches): loss = self.train_on_batch(sess, input_batch, train_truth_batches[i], train_mask_batches[i]) prog.update(i + 1, [("train loss", loss)]) logger.info("\nTrain loss: " + str(loss)) # if self.report: self.report.log_train_loss(loss) #print("") dev_loss = self.compute_dev_loss(sess, dev_input_batches, dev_truth_batches, dev_mask_batches) # print loss on dev set return dev_loss # TODO: to check where the return value is used
def run_epoch(self, sess, train_data, val_data, epoch_num, train_log): num_batches = int(len(train_data) / self.batch_size) + 1 logging.info("Evaluating on training data") prog = Progbar(target=num_batches) for i, batch in enumerate(minibatches(train_data, self.batch_size)): global_batch_num = int(epoch_num * num_batches + i) loss, current_lr, summary = self.optimize(sess, batch, global_batch_num) prog.update(i + 1, [("train loss", loss), ("current LR", current_lr)]) train_log.write("{},{}\n".format(epoch_num + 1, loss)) if self.summary_flag: self.train_writer.add_summary(summary, i) print("") logging.info("Evaluating on development data") val_loss = self.validate(sess, val_data) return val_loss