def evaluate_on_batch(self, sess, inputs_batch, labels_batch, log=True): """Return the loss after evaluating on the provided batch of data Args: sess: tf.Session() input_batch: np.ndarray of shape (n_samples, n_features) labels_batch: np.ndarray of shape (n_samples,) Returns: loss: loss over the batch (a scalar) """ feed = self.create_feed_dict(inputs_batch, labels_batch=labels_batch) output_pred = tf.argmax(self.pred, axis=1) output = sess.run(output_pred, feed_dict=feed) num_correct = 0 if log: confusion_matrix = ConfusionMatrix(np.sort( np.unique(labels_batch))) for i in range(inputs_batch.shape[0]): y = labels_batch[i] y_hat = output[i] if log: confusion_matrix.update(y, y_hat) if y == y_hat: num_correct += 1 # else: # print("pred was {}, truth was {}".format(y_hat, y)) if log: print(confusion_matrix.as_table()) return 1 - (1.0 * num_correct / inputs_batch.shape[0])
def evaluate_prediction(self, session, batch_size, dataset): print("\nEVALUATING") cm = ConfusionMatrix(labels=self.LBLS) total_loss = 0 total_correct = 0 num_batches = 0 for batch in minibatches(dataset, batch_size, bucket=self.bucket): probs, loss = self.predict(session, batch_size, batch) _, _, _, _, goldlabels = batch for i in xrange(len(probs)): total_correct += 1 if label_to_name(probs[i]) == label_to_name( goldlabels[i]) else 0 gold_idx = np.argmax(goldlabels[i]) predicted_idx = np.argmax(probs[i]) cm.update(gold_idx, predicted_idx) total_loss += loss num_batches += 1 accuracy = total_correct / float(len(dataset[0])) print("Accuracy: " + str(accuracy)) average_loss = total_loss / float(num_batches) print("Average Loss: " + str(average_loss)) print("Token-level confusion matrix:\n" + cm.as_table()) print("Token-level scores:\n" + cm.summary()) return (accuracy, average_loss, cm)