Ejemplo n.º 1
0
def do_evaluate(args):
    """
    Evaluate an existing model.
    """
    logging.info("Evaluating the model.")
    model = get_model_factory(args.model).load(args.model_path)

    data = list(process_snli_data(args.eval_data))
    X1, X2, Y = vectorize_data(data, args.input_length)

    emb = WordEmbeddings()
    cm = ConfusionMatrix(LABELS)
    writer = csv.writer(args.output, delimiter="\t")
    writer.writerow(["sentence1", "sentence2", "gold_label", "guess_label", "neutral", "contradiction", "entailment"])
    for batch in tqdm(grouper(args.batch_size, zip(data, X1, X2, Y)), total=int(len(data)/args.batch_size)):
        objs, X1_batch, X2_batch, y_batch = zip(*batch)
        X1_batch = array([emb.weights[x,:] for x in X1_batch])
        X2_batch = array([emb.weights[x,:] for x in X2_batch])
        y_batch = array(y_batch)

        y_batch_ = model.predict_on_batch([X1_batch, X2_batch])

        for obj, y, y_ in zip(objs, y_batch, y_batch_):
            label = np.argmax(y)
            label_ = np.argmax(y_)
            writer.writerow([
                obj.sentence1,
                obj.sentence2,
                LABELS[label],
                LABELS[label_],
                ] + list(y_))
            cm.update(label, label_)
    cm.print_table()
    cm.summary()
    logging.info("Done.")
Ejemplo n.º 2
0
def evaluate(args, emb, model, X1X2Y, total=None):
    cm = ConfusionMatrix(LABELS)
    for batch in tqdm(grouper(args.batch_size, X1X2Y), total=int(total/args.batch_size)):
        X1_batch, X2_batch, y_batch = zip(*batch)
        X1_batch = array([emb.weights[x,:] for x in X1_batch])
        X2_batch = array([emb.weights[x,:] for x in X2_batch])
        y_batch = array(y_batch)

        y_batch_ = model.predict_on_batch([X1_batch, X2_batch])
        for y, y_ in zip(y_batch, y_batch_): cm.update(np.argmax(y), np.argmax(y_))
    cm.print_table()
    cm.summary()
    return cm
Ejemplo n.º 3
0
    def evaluate_prediction(self, session, batch_size, dataset):
        print("\nEVALUATING")

        cm = ConfusionMatrix(labels=self.LBLS)
        total_loss = 0
        total_correct = 0
        num_batches = 0
        for batch in minibatches(dataset, batch_size, bucket=self.bucket):
            probs, loss = self.predict(session, batch_size, batch)
            _, _, _, _, goldlabels = batch
            for i in xrange(len(probs)):
                total_correct += 1 if label_to_name(probs[i]) == label_to_name(
                    goldlabels[i]) else 0

                gold_idx = np.argmax(goldlabels[i])
                predicted_idx = np.argmax(probs[i])
                cm.update(gold_idx, predicted_idx)
            total_loss += loss
            num_batches += 1
        accuracy = total_correct / float(len(dataset[0]))
        print("Accuracy: " + str(accuracy))
        average_loss = total_loss / float(num_batches)
        print("Average Loss: " + str(average_loss))
        print("Token-level confusion matrix:\n" + cm.as_table())
        print("Token-level scores:\n" + cm.summary())
        return (accuracy, average_loss, cm)
Ejemplo n.º 4
0
def evaluate(model, X, Y):
    cm = ConfusionMatrix(labels=LBLS)
    Y_ = model.predict(X)
    for i in range(Y.shape[0]):
        y, y_ = np.argmax(Y[i]), np.argmax(Y_[i])
        cm.update(y, y_)
    cm.print_table()
    return cm.summary()
Ejemplo n.º 5
0
def evaluate(model, X, Y):
    cm = ConfusionMatrix(labels=LBLS)
    Y_ = model.predict(X)
    for i in range(Y.shape[0]):
        y, y_ = np.argmax(Y[i]), np.argmax(Y_[i])
        cm.update(y,y_)
    cm.print_table()
    return cm.summary()
Ejemplo n.º 6
0
def train(args, emb, model, X1X2Y, total=None):
    """
    Train the model using the embeddings @emb and input data batch X1X2Y.
    """
    cm = ConfusionMatrix(LABELS)
    scorer = Scorer(model.metrics_names)
    for batch in tqdm(grouper(args.batch_size, X1X2Y), total=int(total/args.batch_size)):
        X1_batch, X2_batch, y_batch = zip(*batch)
        X1_batch = array([emb.weights[x,:] for x in X1_batch])
        X2_batch = array([emb.weights[x,:] for x in X2_batch])
        y_batch = array(y_batch)

        score = model.train_on_batch([X1_batch, X2_batch], y_batch)
        scorer.update(score, len(y_batch))
        y_batch_ = model.predict_on_batch([X1_batch, X2_batch])
        for y, y_ in zip(y_batch, y_batch_): cm.update(np.argmax(y), np.argmax(y_))
    logging.info("train error: %s", scorer)
    cm.print_table()
    cm.summary()
    return cm