コード例 #1
0
ファイル: ner_model.py プロジェクト: wryoung412/cs224n_nlp
    def evaluate(self, sess, examples, examples_raw):
        """Evaluates model performance on @examples.

        This function uses the model to predict labels for @examples and constructs a confusion matrix.

        Args:
            sess: the current TensorFlow session.
            examples: A list of vectorized input/output pairs.
            examples: A list of the original input/output sequence pairs.
        Returns:
            The F1 score for predicting tokens as named entities.
        """
        token_cm = ConfusionMatrix(labels=LBLS)

        correct_preds, total_correct, total_preds = 0., 0., 0.
        for _, labels, labels_ in self.output(sess, examples_raw, examples):
            for l, l_ in zip(labels, labels_):
                token_cm.update(l, l_)
            gold = set(get_chunks(labels))
            pred = set(get_chunks(labels_))
            correct_preds += len(gold.intersection(pred))
            total_preds += len(pred)
            total_correct += len(gold)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        return token_cm, (p, r, f1)
コード例 #2
0
ファイル: ner_model.py プロジェクト: Da-Capo/mystudyofcnn
    def evaluate(self, sess, examples, examples_raw):
        """Evaluates model performance on @examples.

        This function uses the model to predict labels for @examples and constructs a confusion matrix.

        Args:
            sess: the current TensorFlow session.
            examples: A list of vectorized input/output pairs.
            examples: A list of the original input/output sequence pairs.
        Returns:
            The F1 score for predicting tokens as named entities.
        """
        token_cm = ConfusionMatrix(labels=LBLS)

        correct_preds, total_correct, total_preds = 0., 0., 0.
        for _, labels, labels_  in self.output(sess, examples_raw, examples):
            for l, l_ in zip(labels, labels_):
                token_cm.update(l, l_)
            gold = set(get_chunks(labels))
            pred = set(get_chunks(labels_))
            correct_preds += len(gold.intersection(pred))
            total_preds += len(pred)
            total_correct += len(gold)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        return token_cm, (p, r, f1)
コード例 #3
0
ファイル: extras.py プロジェクト: baraklevyy/NLP
    def evaluate(self, examples):
        """Evaluates model performance on @examples.

        This function uses the model to predict labels for @examples and constructs a confusion matrix.

        Returns:
            The F1 score for predicting tokens as named entities.
        """
        token_cm = ConfusionMatrix(labels=LBLS)

        correct_preds, total_correct, total_preds = 0., 0., 0.
        for data  in self._predictor.predict(examples):
            (_, labels, labels_) = data

            for l, l_ in zip(labels, labels_):
                token_cm.update(l, l_)
            gold = set(get_chunks(labels))
            pred = set(get_chunks(labels_))
            correct_preds += len(gold.intersection(pred))
            total_preds += len(pred)
            total_correct += len(gold)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        return token_cm, (p, r, f1)
コード例 #4
0
    def evaluate_on_batch(self, sess, inputs_batch, labels_batch, log=True):
        """Return the loss after evaluating on the provided batch of data

        Args:
            sess: tf.Session()
            input_batch: np.ndarray of shape (n_samples, n_features)
            labels_batch: np.ndarray of shape (n_samples,)
        Returns:
            loss: loss over the batch (a scalar)
        """
        feed = self.create_feed_dict(inputs_batch, labels_batch=labels_batch)
        output_pred = tf.argmax(self.pred, axis=1)
        output = sess.run(output_pred, feed_dict=feed)

        num_correct = 0
        if log:
            confusion_matrix = ConfusionMatrix(np.sort(
                np.unique(labels_batch)))
        for i in range(inputs_batch.shape[0]):
            y = labels_batch[i]
            y_hat = output[i]
            if log: confusion_matrix.update(y, y_hat)
            if y == y_hat:
                num_correct += 1
            # else:
            # print("pred was {}, truth was {}".format(y_hat, y))

        if log: print(confusion_matrix.as_table())

        return 1 - (1.0 * num_correct / inputs_batch.shape[0])
コード例 #5
0
    def evaluate(self, sess, examples):
        """Evaluates model performance on @examples.

        This function uses the model to predict labels for @examples and
        constructs a confusion matrix.

        Args:
            sess: the current TensorFlow session.
            examples: A list of vectorized input/output pairs.
        Returns:
            The F1 score for predicting the relationship between
            headline-body pairs
        """
        # TODO(akshayka): Implement a report that tells us the inputs
        # on which we guessed incorrectly
        token_cm = ConfusionMatrix(labels=LBLS, default_label=UNRELATED)

        correct_guessed_related, total_gold_related, total_guessed_related = (
            0., 0., 0.)
        _, labels, labels_hat = self.output(sess, examples)
        score = 0
        num_unrelated = len([l for l in labels if l == UNRELATED])
        num_related = len(labels) - num_unrelated
        unrelated_score = 0.25 * num_unrelated
        max_score = unrelated_score + 1.0 * num_related
        for l, l_hat in zip(labels, labels_hat):
            token_cm.update(l, l_hat)
            if l == l_hat:
                score += 0.25
                if l != UNRELATED:
                    score += 0.5
            if l in RELATED and l_hat in RELATED:
                score += 0.25

            if l == l_hat and l in RELATED:
                correct_guessed_related += 1
            if l in RELATED:
                total_gold_related += 1
            if l_hat in RELATED:
                total_guessed_related += 1


        p = correct_guessed_related / total_guessed_related if \
            total_guessed_related > 0 else 0
        r = correct_guessed_related / total_gold_related if \
            total_gold_related > 0 else 0

        if total_guessed_related == 0:
            logging.warn("total_guessed_related == 0!")
        if total_gold_related == 0:
            logging.warn("total_gold_related == 0!")
        f1 = 2 * p * r / (p + r) if p + r > 0 else 0

        unrelated_ratio = unrelated_score / max_score
        score_ratio = score / max_score
        return token_cm, (p, r, f1), (unrelated_ratio, score_ratio)
コード例 #6
0
ファイル: main.py プロジェクト: arunchaganty/hypatia
def do_evaluate(args):
    """
    Evaluate an existing model.
    """
    logging.info("Evaluating the model.")
    model = get_model_factory(args.model).load(args.model_path)

    data = list(process_snli_data(args.eval_data))
    X1, X2, Y = vectorize_data(data, args.input_length)

    emb = WordEmbeddings()
    cm = ConfusionMatrix(LABELS)
    writer = csv.writer(args.output, delimiter="\t")
    writer.writerow(["sentence1", "sentence2", "gold_label", "guess_label", "neutral", "contradiction", "entailment"])
    for batch in tqdm(grouper(args.batch_size, zip(data, X1, X2, Y)), total=int(len(data)/args.batch_size)):
        objs, X1_batch, X2_batch, y_batch = zip(*batch)
        X1_batch = array([emb.weights[x,:] for x in X1_batch])
        X2_batch = array([emb.weights[x,:] for x in X2_batch])
        y_batch = array(y_batch)

        y_batch_ = model.predict_on_batch([X1_batch, X2_batch])

        for obj, y, y_ in zip(objs, y_batch, y_batch_):
            label = np.argmax(y)
            label_ = np.argmax(y_)
            writer.writerow([
                obj.sentence1,
                obj.sentence2,
                LABELS[label],
                LABELS[label_],
                ] + list(y_))
            cm.update(label, label_)
    cm.print_table()
    cm.summary()
    logging.info("Done.")
コード例 #7
0
def generate_cm(real, pred, n_class):
    LBLS = [str(x) for x in xrange(n_class)]
    token_cm = ConfusionMatrix(labels=LBLS)
    for l, l_ in zip(real, pred):
        token_cm.update(l, l_)          # self.counts[gold][guess] += 1
    return token_cm


# for test
# real = [s for s in xrange(50)] * 10
# real.extend([random.randint(0, 49) for r in xrange(1000)])
# pred = [p for p in xrange(50)] * 10
# pred.extend([random.randint(0,49) for r in xrange(1000)])
#
# t_cm = generate_cm(real, pred, 50)
# print t_cm.as_table()
# print t_cm.summary()
# print t_cm.as_matrix()
コード例 #8
0
def evaluate(model, X, Y):
    cm = ConfusionMatrix(labels=LBLS)
    Y_ = model.predict(X)
    for i in range(Y.shape[0]):
        y, y_ = np.argmax(Y[i]), np.argmax(Y_[i])
        cm.update(y, y_)
    cm.print_table()
    return cm.summary()
コード例 #9
0
ファイル: model.py プロジェクト: theblind/NER
    def evaluate(self, sess, examples, examples_raw):
        """
        Evaluates model performance on @examples.
        """
        token_cm = ConfusionMatrix(labels=LBLS)

        correct_preds, total_correct, total_preds = 0., 0., 0.
        for _, labels, labels_ in self.output(sess, examples_raw, examples):
            for l, l_ in zip(labels, labels_):
                token_cm.update(l, l_)
            gold = set(get_chunks(labels))
            pred = set(get_chunks(labels_))
            correct_preds += len(gold.intersection(pred))
            total_preds += len(pred)
            total_correct += len(gold)

        p = correct_preds / total_preds if correct_preds > 0 else 0
        r = correct_preds / total_correct if correct_preds > 0 else 0
        f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0
        return token_cm, (p, r, f1)
コード例 #10
0
    def evaluate_prediction(self, session, batch_size, dataset):
        print("\nEVALUATING")

        cm = ConfusionMatrix(labels=self.LBLS)
        total_loss = 0
        total_correct = 0
        num_batches = 0
        for batch in minibatches(dataset, batch_size, bucket=self.bucket):
            probs, loss = self.predict(session, batch_size, batch)
            _, _, _, _, goldlabels = batch
            for i in xrange(len(probs)):
                total_correct += 1 if label_to_name(probs[i]) == label_to_name(
                    goldlabels[i]) else 0

                gold_idx = np.argmax(goldlabels[i])
                predicted_idx = np.argmax(probs[i])
                cm.update(gold_idx, predicted_idx)
            total_loss += loss
            num_batches += 1
        accuracy = total_correct / float(len(dataset[0]))
        print("Accuracy: " + str(accuracy))
        average_loss = total_loss / float(num_batches)
        print("Average Loss: " + str(average_loss))
        print("Token-level confusion matrix:\n" + cm.as_table())
        print("Token-level scores:\n" + cm.summary())
        return (accuracy, average_loss, cm)
コード例 #11
0
ファイル: data_util.py プロジェクト: atulkum/ml
def evaluate(model, X, Y):
    cm = ConfusionMatrix(labels=LBLS)
    Y_ = model.predict(X)
    for i in range(Y.shape[0]):
        y, y_ = np.argmax(Y[i]), np.argmax(Y_[i])
        cm.update(y,y_)
    cm.print_table()
    return cm.summary()
コード例 #12
0
ファイル: main.py プロジェクト: arunchaganty/hypatia
def evaluate(args, emb, model, X1X2Y, total=None):
    cm = ConfusionMatrix(LABELS)
    for batch in tqdm(grouper(args.batch_size, X1X2Y), total=int(total/args.batch_size)):
        X1_batch, X2_batch, y_batch = zip(*batch)
        X1_batch = array([emb.weights[x,:] for x in X1_batch])
        X2_batch = array([emb.weights[x,:] for x in X2_batch])
        y_batch = array(y_batch)

        y_batch_ = model.predict_on_batch([X1_batch, X2_batch])
        for y, y_ in zip(y_batch, y_batch_): cm.update(np.argmax(y), np.argmax(y_))
    cm.print_table()
    cm.summary()
    return cm
コード例 #13
0
ファイル: main.py プロジェクト: arunchaganty/hypatia
def train(args, emb, model, X1X2Y, total=None):
    """
    Train the model using the embeddings @emb and input data batch X1X2Y.
    """
    cm = ConfusionMatrix(LABELS)
    scorer = Scorer(model.metrics_names)
    for batch in tqdm(grouper(args.batch_size, X1X2Y), total=int(total/args.batch_size)):
        X1_batch, X2_batch, y_batch = zip(*batch)
        X1_batch = array([emb.weights[x,:] for x in X1_batch])
        X2_batch = array([emb.weights[x,:] for x in X2_batch])
        y_batch = array(y_batch)

        score = model.train_on_batch([X1_batch, X2_batch], y_batch)
        scorer.update(score, len(y_batch))
        y_batch_ = model.predict_on_batch([X1_batch, X2_batch])
        for y, y_ in zip(y_batch, y_batch_): cm.update(np.argmax(y), np.argmax(y_))
    logging.info("train error: %s", scorer)
    cm.print_table()
    cm.summary()
    return cm