def do_evaluate(args): """ Evaluate an existing model. """ logging.info("Evaluating the model.") model = get_model_factory(args.model).load(args.model_path) data = list(process_snli_data(args.eval_data)) X1, X2, Y = vectorize_data(data, args.input_length) emb = WordEmbeddings() cm = ConfusionMatrix(LABELS) writer = csv.writer(args.output, delimiter="\t") writer.writerow(["sentence1", "sentence2", "gold_label", "guess_label", "neutral", "contradiction", "entailment"]) for batch in tqdm(grouper(args.batch_size, zip(data, X1, X2, Y)), total=int(len(data)/args.batch_size)): objs, X1_batch, X2_batch, y_batch = zip(*batch) X1_batch = array([emb.weights[x,:] for x in X1_batch]) X2_batch = array([emb.weights[x,:] for x in X2_batch]) y_batch = array(y_batch) y_batch_ = model.predict_on_batch([X1_batch, X2_batch]) for obj, y, y_ in zip(objs, y_batch, y_batch_): label = np.argmax(y) label_ = np.argmax(y_) writer.writerow([ obj.sentence1, obj.sentence2, LABELS[label], LABELS[label_], ] + list(y_)) cm.update(label, label_) cm.print_table() cm.summary() logging.info("Done.")
def evaluate(args, emb, model, X1X2Y, total=None): cm = ConfusionMatrix(LABELS) for batch in tqdm(grouper(args.batch_size, X1X2Y), total=int(total/args.batch_size)): X1_batch, X2_batch, y_batch = zip(*batch) X1_batch = array([emb.weights[x,:] for x in X1_batch]) X2_batch = array([emb.weights[x,:] for x in X2_batch]) y_batch = array(y_batch) y_batch_ = model.predict_on_batch([X1_batch, X2_batch]) for y, y_ in zip(y_batch, y_batch_): cm.update(np.argmax(y), np.argmax(y_)) cm.print_table() cm.summary() return cm
def evaluate_prediction(self, session, batch_size, dataset): print("\nEVALUATING") cm = ConfusionMatrix(labels=self.LBLS) total_loss = 0 total_correct = 0 num_batches = 0 for batch in minibatches(dataset, batch_size, bucket=self.bucket): probs, loss = self.predict(session, batch_size, batch) _, _, _, _, goldlabels = batch for i in xrange(len(probs)): total_correct += 1 if label_to_name(probs[i]) == label_to_name( goldlabels[i]) else 0 gold_idx = np.argmax(goldlabels[i]) predicted_idx = np.argmax(probs[i]) cm.update(gold_idx, predicted_idx) total_loss += loss num_batches += 1 accuracy = total_correct / float(len(dataset[0])) print("Accuracy: " + str(accuracy)) average_loss = total_loss / float(num_batches) print("Average Loss: " + str(average_loss)) print("Token-level confusion matrix:\n" + cm.as_table()) print("Token-level scores:\n" + cm.summary()) return (accuracy, average_loss, cm)
def evaluate(model, X, Y): cm = ConfusionMatrix(labels=LBLS) Y_ = model.predict(X) for i in range(Y.shape[0]): y, y_ = np.argmax(Y[i]), np.argmax(Y_[i]) cm.update(y, y_) cm.print_table() return cm.summary()
def evaluate(model, X, Y): cm = ConfusionMatrix(labels=LBLS) Y_ = model.predict(X) for i in range(Y.shape[0]): y, y_ = np.argmax(Y[i]), np.argmax(Y_[i]) cm.update(y,y_) cm.print_table() return cm.summary()
def train(args, emb, model, X1X2Y, total=None): """ Train the model using the embeddings @emb and input data batch X1X2Y. """ cm = ConfusionMatrix(LABELS) scorer = Scorer(model.metrics_names) for batch in tqdm(grouper(args.batch_size, X1X2Y), total=int(total/args.batch_size)): X1_batch, X2_batch, y_batch = zip(*batch) X1_batch = array([emb.weights[x,:] for x in X1_batch]) X2_batch = array([emb.weights[x,:] for x in X2_batch]) y_batch = array(y_batch) score = model.train_on_batch([X1_batch, X2_batch], y_batch) scorer.update(score, len(y_batch)) y_batch_ = model.predict_on_batch([X1_batch, X2_batch]) for y, y_ in zip(y_batch, y_batch_): cm.update(np.argmax(y), np.argmax(y_)) logging.info("train error: %s", scorer) cm.print_table() cm.summary() return cm