def predict(task, encoding_scheme, embedding_type, tf_session, batch_size, sentence_file, mention_idx_file, feature_file, feature_meta_file, scores_file=None, log=None): global CLASSES_CARD, CLASSES_VISUAL classes = None if task == 'nonvis': classes = CLASSES_VISUAL elif task == 'card': classes = CLASSES_CARD n_classes = len(classes) # Load the data log.info("Loading data from " + sentence_file + " and " + mention_idx_file) data_dict = nn_data.load_sentences(sentence_file, embedding_type) data_dict.update( nn_data.load_mentions(mention_idx_file, task, feature_file, feature_meta_file, n_classes)) # Get the predicted scores, given our arguments mentions = data_dict['mention_indices'].keys() pred_scores, gold_label_dict = \ nn_util.get_pred_scores_mcc(task, encoding_scheme, tf_session, batch_size, mentions, data_dict, n_classes, log) # If we do an argmax on the scores, we get the predicted labels pred_labels = list() gold_labels = list() for m in mentions: pred_labels.append(np.argmax(pred_scores[m])) gold_labels.append(np.argmax(gold_label_dict[m])) #endfor # Evaluate the predictions nn_eval.evaluate_multiclass(gold_labels, pred_labels, classes, log) # If a scores file was specified, write the scores if scores_file is not None: log.info("Writing scores file to " + scores_file) with open(scores_file, 'w') as f: for pair_id in pred_scores.keys(): score_line = list() score_line.append(pair_id) for score in pred_scores[pair_id]: if score == 0: score = np.nextafter(0, 1) score_line.append(str(np.log(score))) f.write(",".join(score_line) + "\n") f.close()
def train(task, encoding_scheme, embedding_type, sentence_file, mention_idx_file, feature_file, feature_meta_file, epochs, batch_size, lstm_hidden_width, start_hidden_width, hidden_depth, weighted_classes, lstm_input_dropout, dropout, lrn_rate, adam_epsilon, clip_norm, data_norm, activation, model_file=None, eval_sentence_file=None, eval_mention_idx_file=None, eval_feature_file=None, eval_feature_meta_file=None, early_stopping=False, log=None): """ Trains a nonvis or cardinality model :param task: {nonvis, card} :param encoding_scheme: {first_last_sentence, first_last_mention} :param embedding_type: {w2v, glove} :param sentence_file: File with captions :param mention_idx_file: File with mention pair word indices :param feature_file: File with sparse mention pair features :param feature_meta_file: File associating sparse indices with feature names :param epochs: Number of times to run over the data :param batch_size: Number of mention pairs to run each batch :param lstm_hidden_width: Number of hidden units in the lstm cells :param start_hidden_width: Number of hidden units to which the mention pairs' representation is passed :param hidden_depth: Number of hidden layers after the lstm :param weighted_classes: Whether to weight the examples by their class inversely with the frequency of that class :param lstm_input_dropout: Probability to keep for lstm inputs :param dropout: Probability to keep for all other nodes :param lrn_rate: Learning rate of the optimizer :param clip_norm: Global gradient clipping norm :param adam_epsilon: Adam optimizer epsilon value :param activation: Nonlinear activation function (sigmoid,tanh,relu) :param model_file: File to which the model is periodically saved :param eval_sentence_file: Sentence file against which the model should be evaluated :param eval_mention_idx_file: Mention index file against which the model should be evaluated :return: """ global CLASSES_CARD, CLASSES_VISUAL # Retrieve the correct set of classes classes = None if task == 'nonvis': classes = CLASSES_VISUAL elif task == 'card': classes = CLASSES_CARD n_classes = len(classes) log.info("Loading data from " + sentence_file + " and " + mention_idx_file) data_dict = nn_data.load_sentences(sentence_file, embedding_type) data_dict.update( nn_data.load_mentions(mention_idx_file, task, feature_file, feature_meta_file, n_classes)) log.info("Loading data from " + eval_sentence_file + " and " + eval_mention_idx_file) eval_data_dict = nn_data.load_sentences(eval_sentence_file, embedding_type) eval_data_dict.update( nn_data.load_mentions(eval_mention_idx_file, task, eval_feature_file, eval_feature_meta_file, n_classes)) mentions = list(data_dict['mention_indices'].keys()) n_pairs = len(mentions) log.info("Setting up network architecture") with tf.variable_scope('bidirectional_lstm'): nn_util.setup_bidirectional_lstm(lstm_hidden_width, data_norm) nn_util.setup_core_architecture(task, encoding_scheme, batch_size, start_hidden_width, hidden_depth, weighted_classes, activation, n_classes, data_dict['n_mention_feats']) loss = tf.get_collection('loss')[0] accuracy = tf.get_collection('accuracy')[0] nn_util.add_train_op(loss, lrn_rate, adam_epsilon, clip_norm) train_op = tf.get_collection('train_op')[0] nn_util.dump_tf_vars() # We want to keep track of the best scores with # the epoch that they originated from best_avg_score = -1 best_epoch = -1 log.info("Training") saver = tf.train.Saver(max_to_keep=100) with tf.Session() as sess: # Initialize all our variables sess.run(tf.global_variables_initializer()) # Iterate through the data [epochs] number of times for i in range(0, epochs): log.info(None, "--- Epoch %d ----", i + 1) losses = list() accuracies = list() # Shuffle the data once for this epoch np.random.shuffle(mentions) # Iterate through the entirety of the data start_idx = 0 end_idx = start_idx + batch_size n_iter = n_pairs / batch_size for j in range(0, n_iter): log.log_status('info', None, 'Training; %d (%.2f%%) batches complete', j, 100.0 * j / n_iter) # Retrieve this batch batch_mentions = mentions[start_idx:end_idx] batch_tensors = nn_data.load_batch(batch_mentions, data_dict, task, n_classes) # Train nn_util.run_op(sess, train_op, [batch_tensors], lstm_input_dropout, dropout, encoding_scheme, [task], [""], True) # Store the losses and accuracies every 100 batches if (j + 1) % 100 == 0: losses.append( nn_util.run_op(sess, loss, [batch_tensors], lstm_input_dropout, dropout, encoding_scheme, [task], [""], True)) accuracies.append( nn_util.run_op(sess, accuracy, [batch_tensors], lstm_input_dropout, dropout, encoding_scheme, [task], [""], True)) #endif start_idx = end_idx end_idx = start_idx + batch_size #endfor # Every epoch, evaluate and save the model log.info(None, "Saving model; Average Loss: %.2f; Acc: %.2f%%", sum(losses) / float(len(losses)), 100.0 * sum(accuracies) / float(len(accuracies))) saver.save(sess, model_file) if ( i + 1 ) % 10 == 0 and eval_sentence_file is not None and eval_mention_idx_file is not None: eval_mentions = eval_data_dict['mention_indices'].keys() pred_scores, gold_label_dict = \ nn_util.get_pred_scores_mcc(task, encoding_scheme, sess, batch_size, eval_mentions, eval_data_dict, n_classes, log) # If we do an argmax on the scores, we get the predicted labels eval_mentions = list(pred_scores.keys()) pred_labels = list() gold_labels = list() for m in eval_mentions: pred_labels.append(np.argmax(pred_scores[m])) gold_labels.append(np.argmax(gold_label_dict[m])) #endfor # Evaluate the predictions score_dict = nn_eval.evaluate_multiclass( gold_labels, pred_labels, classes, log) # Get the current scores and see if their average beats our best # by half a point (if we're stopping early) avg = score_dict.get_score(0).f1 + score_dict.get_score(1).f1 avg /= 2.0 if avg >= best_avg_score - 0.005: log.info( None, "Previous best score average F1 of %.2f%% after %d epochs", 100.0 * best_avg_score, best_epoch) best_avg_score = avg best_epoch = i log.info(None, "New best at current epoch (%.2f%%)", 100.0 * best_avg_score) #endif # Implement early stopping; if it's been 10 epochs since our best, stop if early_stopping and i >= (best_epoch + 10): log.info(None, "Stopping early; best scores at %d epochs", best_epoch) break #endif #endif #endfor log.info("Saving final model") saver.save(sess, model_file)
def predict(encoding_scheme, embedding_type, tf_session, batch_size, sentence_file, mention_idx_file, feature_file, feature_meta_file, box_dir, mention_box_label_file, box_category_file=None, scores_file=None, log=None): """ :param encoding_scheme: :param embedding_type: :param tf_session: :param batch_size: :param sentence_file: :param mention_idx_file: :param feature_file: :param feature_meta_file: :param box_dir: :param mention_box_label_file: :param scores_file: :param log: :return: """ global CLASSES, task n_classes = len(CLASSES) # Load the data log.info("Loading data") data_dict = nn_data.load_sentences(sentence_file, embedding_type) data_dict.update( nn_data.load_mentions(mention_idx_file, task, feature_file, feature_meta_file, n_classes)) data_dict.update( nn_data.load_boxes(mention_box_label_file, box_dir, box_category_file)) # Get the predicted scores, given our arguments mention_box_pairs = get_valid_mention_box_pairs(data_dict) pred_scores, gold_label_dict = \ nn_util.get_pred_scores_mcc(task, encoding_scheme, tf_session, batch_size, mention_box_pairs, data_dict, n_classes, log) # If we do an argmax on the scores, we get the predicted labels mentions = list(pred_scores.keys()) pred_labels = list() gold_labels = list() for m in mentions: pred_labels.append(np.argmax(pred_scores[m])) gold_labels.append(np.argmax(gold_label_dict[m])) #endfor # Evaluate the predictions nn_eval.evaluate_multiclass(gold_labels, pred_labels, CLASSES, log) # If a scores file was specified, write the scores log.info("Writing scores file") if scores_file is not None: with open(scores_file, 'w') as f: for pair_id in pred_scores.keys(): score_line = list() score_line.append(pair_id) for score in pred_scores[pair_id]: if score == 0: score = np.nextafter(0, 1) score_line.append(str(np.log(score))) f.write(",".join(score_line) + "\n") f.close()
def predict(rel_type, encoding_scheme, embedding_type, tf_session, batch_size, sentence_file, mention_idx_file, feature_file, feature_meta_file, label_file, scores_file=None, ordered_pairs=False, log=None): """ Wrapper for making predictions on a pre-trained model, already loaded into the session :param rel_type: :param encoding_scheme: :param embedding_type: :param tf_session: :param batch_size: :param sentence_file: :param mention_idx_file: :param feature_file: :param feature_meta_file: :param label_file: :param scores_file: :return: """ global CLASSES n_classes = len(CLASSES) task = "rel_" + rel_type # Load the data log.info("Loading data from " + sentence_file + " and " + mention_idx_file) data_dict = nn_data.load_sentences(sentence_file, embedding_type) data_dict.update( nn_data.load_mentions(mention_idx_file, task, feature_file, feature_meta_file, n_classes)) # Get the predicted scores, given our arguments log.info("Predictiong scores") mention_pairs = data_dict['mention_indices'].keys() pred_scores, _ = nn_util.get_pred_scores_mcc(task, encoding_scheme, tf_session, batch_size, mention_pairs, data_dict, n_classes, log) if ordered_pairs: pred_scores = induce_ji_predictions(pred_scores) log.info("Loading data from " + label_file) gold_label_dict = nn_data.load_relation_labels(label_file) # If we do an argmax on the scores, we get the predicted labels log.info("Getting labels from scores") pred_labels = list() for pair in pred_scores.keys(): pred_labels.append(np.argmax(pred_scores[pair])) # Evaluate the predictions log.info("Evaluating against the gold") nn_eval.evaluate_relations(pred_scores.keys(), pred_labels, gold_label_dict, log) # If a scores file was specified, write the scores log.info("Writing scores file " + scores_file) if scores_file is not None: with open(scores_file, 'w') as f: for pair_id in pred_scores.keys(): score_line = list() score_line.append(pair_id) for score in pred_scores[pair_id]: if score == 0: score = np.nextafter(0, 1) score_line.append(str(np.log(score))) f.write(",".join(score_line) + "\n") f.close()
def train(rel_type, encoding_scheme, embedding_type, sentence_file, mention_idx_file, feature_file, feature_meta_file, epochs, batch_size, lstm_hidden_width, start_hidden_width, hidden_depth, weighted_classes, lstm_input_dropout, dropout, lrn_rate, adam_epsilon, clip_norm, data_norm, activation, model_file=None, eval_sentence_file=None, eval_mention_idx_file=None, eval_feature_file=None, eval_feature_meta_file=None, eval_label_file=None, early_stopping=False, ordered_pairs=False, log=None): """ Trains a relation model :param sentence_file: File with captions :param mention_idx_file: File with mention pair word indices :param feature_file: File with sparse mention pair features :param feature_meta_file: File associating sparse indices with feature names :param epochs: Number of times to run over the data :param batch_size: Number of mention pairs to run each batch :param lstm_hidden_width: Number of hidden units in the lstm cells :param start_hidden_width: Number of hidden units to which the mention pairs' representation is passed :param hidden_depth: Number of hidden layers after the lstm :param weighted_classes: Whether to weight the examples by their class inversely with the frequency of that class :param lstm_input_dropout: Probability to keep for lstm inputs :param dropout: Probability to keep for all other nodes :param lrn_rate: Learning rate of the optimizer :param clip_norm: Global gradient clipping norm :param adam_epsilon: Adam optimizer epsilon value :param activation: Nonlinear activation function (sigmoid,tanh,relu) :param model_file: File to which the model is periodically saved :param eval_sentence_file: Sentence file against which the model should be evaluated :param eval_mention_idx_file: Mention index file against which the model should be evaluated :param eval_label_file: Relation label file for eval data :return: """ global CLASSES task = "rel_" + rel_type n_classes = len(CLASSES) log.info("Loading data from " + sentence_file + " and " + mention_idx_file) data_dict = nn_data.load_sentences(sentence_file, embedding_type) data_dict.update( nn_data.load_mentions(mention_idx_file, task, feature_file, feature_meta_file, n_classes)) log.info("Loading data from " + eval_sentence_file + " and " + eval_mention_idx_file) eval_data_dict = nn_data.load_sentences(eval_sentence_file, embedding_type) eval_data_dict.update( nn_data.load_mentions(eval_mention_idx_file, task, eval_feature_file, eval_feature_meta_file, n_classes)) mentions = list(data_dict['mention_indices'].keys()) n_pairs = len(mentions) # Load the gold labels from the label file once, and we can just reuse them every epoch gold_label_dict = nn_data.load_relation_labels(eval_label_file) # We want to keep track of the best coref and subset scores, along # with the epoch that they originated from best_coref_subset_avg = -1 best_coref_subset_epoch = -1 log.info("Setting up network architecture") # Set up the bidirectional LSTM with tf.variable_scope('bidirectional_lstm'): nn_util.setup_bidirectional_lstm(lstm_hidden_width, data_norm) nn_util.setup_core_architecture(task, encoding_scheme, batch_size, start_hidden_width, hidden_depth, weighted_classes, activation, n_classes, data_dict['n_mention_feats']) loss = tf.get_collection('loss')[0] accuracy = tf.get_collection('accuracy')[0] nn_util.add_train_op(loss, lrn_rate, adam_epsilon, clip_norm) train_op = tf.get_collection('train_op')[0] nn_util.dump_tf_vars() log.info("Training") saver = tf.train.Saver(max_to_keep=100) with tf.Session() as sess: # Initialize all our variables sess.run(tf.global_variables_initializer()) # Iterate through the data [epochs] number of times for i in range(0, epochs): log.info(None, "--- Epoch %d ----", i + 1) losses = list() accuracies = list() # Shuffle the data once for this epoch np.random.shuffle(mentions) # Iterate through the entirety of the data start_idx = 0 end_idx = start_idx + batch_size n_iter = n_pairs / batch_size for j in range(0, n_iter): log.log_status('info', None, 'Training; %d (%.2f%%) batches complete', j, 100.0 * j / n_iter) # Retrieve this batch batch_mentions = mentions[start_idx:end_idx] batch_tensors = nn_data.load_batch(batch_mentions, data_dict, task, n_classes) # Train nn_util.run_op(sess, train_op, [batch_tensors], lstm_input_dropout, dropout, encoding_scheme, [task], [""], True) # Store the losses and accuracies every 100 batches if (j + 1) % 100 == 0: losses.append( nn_util.run_op(sess, loss, [batch_tensors], lstm_input_dropout, dropout, encoding_scheme, [task], [""], True)) accuracies.append( nn_util.run_op(sess, accuracy, [batch_tensors], lstm_input_dropout, dropout, encoding_scheme, [task], [""], True)) #endif start_idx = end_idx end_idx = start_idx + batch_size #endfor # Every epoch, evaluate and save the model log.info(None, "Saving model; Average Loss: %.2f; Acc: %.2f%%", sum(losses) / float(len(losses)), 100.0 * sum(accuracies) / float(len(accuracies))) saver.save(sess, model_file) if ( i + 1 ) % 10 == 0 and eval_sentence_file is not None and eval_mention_idx_file is not None: # We want to predict over all mentions unless this is our weird # ij intra caption case, in which case we want predictions # only for the ij pairs eval_mention_pairs = eval_data_dict['mention_indices'].keys() if ordered_pairs: eval_mention_pairs = get_ij_pairs(eval_mention_pairs) # Predict scores pred_scores, _ = nn_util.get_pred_scores_mcc( task, encoding_scheme, sess, batch_size, eval_mention_pairs, eval_data_dict, n_classes, log) # If this is our ij intra case, we need to induce scores # for ji pairs and reset what we consider as the complete set # of mention pairs if ordered_pairs: pred_scores = induce_ji_predictions(pred_scores) eval_mention_pairs = eval_data_dict[ 'mention_indices'].keys() pred_labels = list() for pair in eval_mention_pairs: pred_labels.append(np.argmax(pred_scores[pair])) # Evaluate the predictions score_dict = \ nn_eval.evaluate_relations(eval_mention_pairs, pred_labels, gold_label_dict) # Get the current coref / subset and see if their average beats our best coref_subset_avg = score_dict.get_score('coref').f1 + \ score_dict.get_score('subset').f1 coref_subset_avg /= 2.0 if coref_subset_avg >= best_coref_subset_avg - 0.005: log.info( None, "Previous best coref/subset average F1 of %.2f%% after %d epochs", 100.0 * best_coref_subset_avg, best_coref_subset_epoch) best_coref_subset_avg = coref_subset_avg best_coref_subset_epoch = i log.info(None, "New best at current epoch (%.2f%%)", 100.0 * best_coref_subset_avg) #endif # Implement early stopping; if it's been 10 epochs since our best, stop if early_stopping and i >= (best_coref_subset_epoch + 10): log.info(None, "Stopping early; best scores at %d epochs", best_coref_subset_epoch) break #endif #endif #endfor log.info("Saving final model") saver.save(sess, model_file)
def load_data(data_dir, data, split, embedding_type, mention_box_label_file=None, box_category_file=None, log=None): """ Loads all of the data for all tasks :param data_dir: :param data: :param split: :param embedding_type: :param log: :return: """ global TASKS, TASK_CLASS_DICT, N_EMBEDDING_WIDTH task_data_dicts = dict() data_root = data + "_" + split for task in TASKS: # Retrieve the input files for these tasks sentence_file = data_dir + "raw/" + data_root + "_captions.txt" mention_idx_file = data_dir + "raw/" + data_root + "_" feature_file = data_dir + "feats/" + data_root + "_" feature_meta_file = data_dir + "feats/" + data_root + "_" label_file = None if 'rel' in task: # Relation files are a little weird, # since we have some mixed intra/cross files mention_idx_file += "mentionPairs_" + task.split("_")[1] feature_file += "relation" feature_meta_file += "relation" label_file = data_dir + "raw/" + data_root + "_mentionPair_labels.txt" else: mention_idx_file += "mentions_" + task feature_file += task feature_meta_file += task if task == 'affinity': label_file = data_dir + "raw/" + data_root + "_mention_box_labels.txt" if mention_box_label_file is not None: label_file = mention_box_label_file #endif mention_idx_file += ".txt" feature_file += ".feats" feature_meta_file += "_meta.json" log.info(None, "Loading data for %s: %s, %s, %s", task, sentence_file, mention_idx_file, feature_file) task_data_dicts[task] = nn_data.load_sentences(sentence_file, embedding_type) task_data_dicts[task].update( nn_data.load_mentions(mention_idx_file, task, feature_file, feature_meta_file, len(TASK_CLASS_DICT[task]))) if "rel" in task: task_data_dicts[task]['gold_label_dict'] = \ nn_data.load_relation_labels(label_file) elif task == "affinity": box_dir = data_dir + "feats/" + data + "_boxes/" + split + "/" task_data_dicts[task].update( nn_data.load_boxes(label_file, box_dir, box_category_file)) #endif #endfor return task_data_dicts