def evaluate_all(gt_file_dir, gt_img_dir, ckpt_path, gpuid='0'):
    db = DB(ckpt_path, gpuid)

    img_list = os.listdir(gt_img_dir)

    show = './eva'
    make_dir(show)

    total_TP = 0
    total_gt_care_num = 0
    total_pred_care_num = 0
    for img_name in tqdm.tqdm(img_list):
        img = cv2.imread(os.path.join(gt_img_dir, img_name))

        pred_box_list, pred_score_list, _ = db.detect_img(os.path.join(
            gt_img_dir, img_name),
                                                          ispoly=True,
                                                          show_res=False)

        gt_file_name = os.path.splitext(img_name)[0] + '.txt'

        gt_boxes, tags = load_ctw1500_labels(
            os.path.join(gt_file_dir, gt_file_name))

        gt_care_list = []
        gt_dontcare_list = []

        for i, box in enumerate(gt_boxes):
            box = box.reshape((-1, 2)).tolist()
            if tags[i] == False:
                gt_care_list.append(box)
            else:
                gt_dontcare_list.append(box)

        precision, recall, f1_score, TP, gt_care_num, pred_care_num, pairs_list = evaluate(
            gt_care_list, gt_dontcare_list, pred_box_list, overlap=0.5)

        for pair in pairs_list:
            cv2.polylines(img,
                          [np.array(pair['gt'], np.int).reshape([-1, 1, 2])],
                          True, (0, 255, 0))
            cv2.polylines(img,
                          [np.array(pair['pred'], np.int).reshape([-1, 1, 2])],
                          True, (255, 0, 0))

        cv2.imwrite(os.path.join(show, img_name), img)

        total_TP += TP
        total_gt_care_num += gt_care_num
        total_pred_care_num += pred_care_num

    total_precision = float(total_TP) / total_pred_care_num
    total_recall = float(total_TP) / total_gt_care_num
    total_f1_score = compute_f1_score(total_precision, total_recall)

    return total_precision, total_recall, total_f1_score
Exemple #2
0
 def testModel(self):
     n_test_samples, max_length = self.data['X_test'].shape
     accuracy_test = []
     preds_test = []
     self.initModel()
     test_bar = ProgressBar('Testing', max=len(self.data['X_test']))
     for batch in minibatches_iter(self.data['X_test'],
                                   self.data['Y_test'],
                                   masks=self.data['mask_test'],
                                   char_inputs=self.data['C_test'],
                                   lexicons=self.lexicons['lexicons_test'],
                                   batch_size=self.batch_size):
         inputs, targets, masks, char_inputs, lexicons = batch
         test_bar.next(len(inputs))
         corrects = self.model.eval_fn(inputs, targets, masks, lexicons)
         _, preds = self.model.test_fn(inputs, targets, masks, lexicons)
         preds_test.append(preds)
         accuracy_test.append(corrects)
     this_test_accuracy = np.concatenate(
         accuracy_test)[0:n_test_samples].sum() / float(n_test_samples)
     test_bar.finish()
     print("Test accuracy: " + str(this_test_accuracy * 100) + "%")
     compute_f1_score(self.data['Y_test'], preds_test)
    def compute_metrics(self,
                        predicted_names,
                        test_labels,
                        sample_infs,
                        print_labels=False):

        n_correct, n_nonzero, f1 = 0, 0, 0

        print("Predictions: ", len(predicted_names))
        print("Test labels: ", len(test_labels))

        for i in range(len(predicted_names)):

            if print_labels:

                print("Predicted: ",
                      [sym.encode('utf-8') for sym in predicted_names[i]])
                print("Actual: ",
                      [sym.encode('utf-8') for sym in test_labels[i]])
                print("")
                print("")

            f1 += compute_f1_score(predicted_names[i], test_labels[i])

            if predicted_names[i] == test_labels[i]:
                n_correct += 1
                sample_infs[i].predicted_correctly = True

            else:
                sample_infs[i].predicted_correctly = False

        accuracy = n_correct / len(test_labels) * 100

        f1 = f1 * 100 / len(predicted_names)

        print("Absolute accuracy: ", accuracy)
        print("F1 score: ", f1)

        return accuracy, f1, predicted_names
Exemple #4
0
    def trainingModel(self):

        self.initModel()

        best_acc = 0
        best_validation_accuracy = 0
        stop_count = 0
        lr = self.learning_rate
        patience = self.patience
        n_dev_samples, max_length = self.data['X_dev'].shape
        n_test_samples, max_length = self.data['X_test'].shape

        for epoch in range(1, self.num_epochs + 1):
            print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (
                epoch, lr, self.decay_rate)
            train_err = 0.0
            train_batches = 0
            train_bar = ProgressBar('Training', max=len(self.data['X_train']))
            for batch in minibatches_iter(
                    self.data['X_train'],
                    self.data['Y_train'],
                    masks=self.data['mask_train'],
                    char_inputs=self.data['C_train'],
                    lexicons=self.lexicons['lexicons_train'],
                    batch_size=self.batch_size,
                    shuffle=True):
                inputs, targets, masks, char_inputs, lexicons = batch
                err = self.model.train_fn(inputs, targets, masks, lexicons)
                train_err += err
                train_bar.next(len(inputs))

                if train_batches > 0 and train_batches % self.valid_freq == 0:
                    accuracy_valid = []
                    for batch in minibatches_iter(
                            self.data['X_dev'],
                            self.data['Y_dev'],
                            masks=self.data['mask_dev'],
                            lexicons=self.lexicons['lexicons_dev'],
                            char_inputs=self.data['C_dev'],
                            batch_size=self.batch_size):
                        inputs, targets, masks, char_inputs, lexicons = batch
                        accuracy_valid.append(
                            self.model.eval_fn(inputs, targets, masks,
                                               lexicons))
                    this_validation_accuracy = np.concatenate(accuracy_valid)[
                        0:n_dev_samples].sum() / float(n_dev_samples)

                    if this_validation_accuracy > best_validation_accuracy:
                        print("\nTrain loss, " + str(
                            (train_err / self.valid_freq)) +
                              ", validation accuracy: " +
                              str(this_validation_accuracy * 100) + "%")
                        best_validation_accuracy = this_validation_accuracy
                        preds_test = []
                        accuracy_test = []
                        for batch in minibatches_iter(
                                self.data['X_test'],
                                self.data['Y_test'],
                                masks=self.data['mask_test'],
                                char_inputs=self.data['C_test'],
                                lexicons=self.lexicons['lexicons_test'],
                                batch_size=self.batch_size):
                            inputs, targets, masks, char_inputs, lexicons = batch
                            _, preds = self.model.test_fn(
                                inputs, targets, masks, lexicons)
                            preds_test.append(preds)
                            accuracy_test.append(
                                self.model.eval_fn(inputs, targets, masks,
                                                   lexicons))
                        this_test_accuracy = np.concatenate(accuracy_test)[
                            0:n_test_samples].sum() / float(n_test_samples)
                        print "F1-score: " + str(
                            compute_f1_score(self.data["Y_test"], preds_test,
                                             self.data['label_alphabet']) *
                            100)
                        print("Test accuracy: " +
                              str(this_test_accuracy * 100) + "%")
                        if best_acc < this_test_accuracy:
                            best_acc = this_test_accuracy
                            write_model_data(self.model.network,
                                             self.model_path + '/best_model')

                    train_err = 0
                train_batches += 1

            train_bar.finish()

            # stop if dev acc decrease 3 time straightly.
            if stop_count == patience:
                break

            # re-compile a function with new learning rate for training
            if self.update_algo != 'adadelta':
                lr = self.learning_rate / (1.0 + epoch * self.decay_rate)
                updates = utils.create_updates(self.model.loss_train,
                                               self.model.params,
                                               self.update_algo,
                                               lr,
                                               momentum=self.momentum)
                self.model.train_fn = theano.function(
                    [
                        self.model.input_var, self.model.target_var,
                        self.model.mask_var, self.model.lex_var
                    ],
                    outputs=self.model.loss_train,
                    updates=updates,
                    allow_input_downcast=True)

            print("Epoch " + str(epoch) + " finished.")
        print("The final best acc: " + str(best_acc * 100) + "%")

        if self.output_predict:
            f = codecs.open('./results/10-fold.txt', 'a+', 'utf-8')
            f.write(str(best_acc * 100) + '\n')
            f.close()
def evaluate(gt_care_list, gt_dontcare_list, pred_list, overlap=0.5):
    """

    :param gt_care_list: [-1, M, 2]
    :param gt_dontcare_list: [-1, M, 2]
    :param pred_list: [-1, M, 2]
    :param overlap:
    :return:
    """

    pred_care_list = []
    pred_dontcare_list = []

    if len(gt_dontcare_list) != 0:
        for pred_box in pred_list:
            flag = False
            for gt_box in gt_dontcare_list:
                if quad_iou(gt_box, pred_box) > overlap:
                    flag = True
                    break

            if not flag:
                pred_care_list.append(pred_box)
            else:
                pred_dontcare_list.append(pred_box)
    else:
        pred_care_list = pred_list

    gt_care_flag_list = [False] * len(gt_care_list)
    pred_care_flag_list = [False] * len(pred_care_list)
    pairs_list = []
    gt_not_pair_list = []
    pred_not_pair_list = []

    for gt_i, gt_box in enumerate(gt_care_list):
        for pred_i, pred_box in enumerate(pred_care_list):
            if pred_care_flag_list[pred_i]:
                continue
            else:
                iou = quad_iou(gt_box, pred_box)
                if iou > overlap:
                    pair_dict = {}
                    pair_dict['gt'] = gt_box
                    pair_dict['pred'] = pred_box
                    pair_dict['iou'] = iou
                    pairs_list.append(pair_dict)
                    pred_care_flag_list[pred_i] = True
                    gt_care_flag_list[gt_i] = True

    TP = len(pairs_list)

    if len(gt_care_list) == 0:
        recall = 1.0
        precision = 1.0 if len(pred_care_list) == 0 else 0.0
    elif len(pred_care_list) == 0:
        recall = 0.0
        precision = 0.0
    else:
        recall = 1.0 * TP / len(gt_care_list)
        precision = 1.0 * TP / len(pred_care_list)

    f1_score = compute_f1_score(precision, recall)

    return precision, recall, f1_score, TP, len(gt_care_list), len(
        pred_care_list), pairs_list