Ejemplo n.º 1
0
 def __init__(self,
              class_train,
              class_test,
              min_division=2,
              min_info_gain=0.05,
              number_of_trees=15):
     # Labels are appended to X_train & X_test
     self.X_train = list(csv.reader(open(class_train, 'r')))
     self.X_test = list(csv.reader(open(class_test, 'r')))
     # parameter initialization
     self.number_of_trees = number_of_trees
     self.min_info_gain = min_info_gain
     self.min_division = min_division
     # feature names
     feature_number_for_each_tree = math.floor(math.sqrt(7))
     forest = []
     # initialization of decision trees
     for i in range(number_of_trees):
         feature_indexes = random.sample(range(1, 7),
                                         feature_number_for_each_tree)
         tree = DecisionTree(self.get_random_subsets(feature_indexes),
                             feature_indexes[0], feature_indexes[1],
                             min_info_gain, min_division)
         # Add tree instance to forest
         forest.append(tree)
     # making predictions and calculating the f1 score
     pred = self.prediction(forest, number_of_trees)
     f1_score([row[8] for row in self.X_test], pred)
Ejemplo n.º 2
0
Archivo: test.py Proyecto: sxrczh/FETHI
def test(opt,
         model,
         test_dataloader,
         threshold=config.PRED_THRESHOLD,
         record_result=False,
         analysis_result=False,
         mode=config.TEST):
    device = torch.device(config.CUDA) if torch.cuda.is_available() else "cpu"
    model.eval()
    bgn = 0
    test_iter = iter(test_dataloader)

    gold_all, pred_all = [], []
    hierarchical_types = pickle.load(
        open(config.DATA_ROOT + opt.corpus_dir + "hierarchical_types.pkl",
             'rb'))

    for batch in test_iter:
        # mention, mention_len, mention_neighbor, lcontext, rcontext, y = batch
        mention, mention_len, lcontext, rcontext, mention_char, y = batch

        mention = mention.to(device)
        mention_len = mention_len.to(device)
        lcontext = lcontext.to(device)
        rcontext = rcontext.to(device)
        mention_char = mention_char.to(device)
        y = y.to(device)

        model_output = model(
            [mention, mention_len, lcontext, rcontext, mention_char])

        loss, gold, pred, prob = bce_loss(model, model_output, y, opt,
                                          hierarchical_types, threshold)

        if record_result:
            util.record_result(gold, pred, prob, opt, bgn, mode)

        gold_all.append(gold)
        pred_all.append(pred)

        bgn += opt.batch_size

    gold_all = torch.cat(gold_all)
    pred_all = torch.cat(pred_all)
    if analysis_result:
        util.analysis_result(gold_all, pred_all)

    pmacro, remacro = e.loose_macro_PR(gold_all, pred_all, opt)
    pmicro, remicro = e.loose_micro_PR(gold_all, pred_all, opt)
    pstrict, restrict = e.strict_PR(gold_all, pred_all, opt)
    macro_F1 = e.f1_score(pmacro, remacro)
    micro_F1 = e.f1_score(pmicro, remicro)
    strict_F1 = e.f1_score(pstrict, restrict)

    return (macro_F1, pmacro, remacro), \
           (micro_F1, pmicro, remicro), \
           (strict_F1, pstrict, restrict)
Ejemplo n.º 3
0
    def test_f1_score(self):
        target = FloatTensor([[1, 1, 1, 0, 1], [1, 0, 1, 0, 1]])
        pred = FloatTensor([[1, -1, 1, -1, 1], [-1, 1, 1, -1, -1]])
        f1_macro_score, f1_micro = f1_score(target, pred, 5, threshold=0.5)
        self.assertAlmostEqual(7 / 15, f1_macro_score, 5)
        self.assertAlmostEqual(2 / 3, f1_micro, 5)

        pred = FloatTensor([[1, 0, 1, 0, 1], [0, 1, 1, 0, 0]])
        f1_macro_score, f1_micro = f1_score(target,
                                            pred,
                                            5,
                                            use_threshold=False)
        self.assertAlmostEqual(7 / 15, f1_macro_score, 5)
        self.assertAlmostEqual(2 / 3, f1_micro, 5)
Ejemplo n.º 4
0
def get_metric(logits, data):
    start_logits, end_logits = logits
    start_logits, end_logits = start_logits.squeeze().cpu(
    ), end_logits.squeeze().cpu()

    answers = data['answer']
    contexts = data['context']
    offsets = data['offset_mapping']
    cw_ids = data['cw_ids']

    mask = torch.zeros_like(cw_ids) != cw_ids
    mask = mask.type(torch.float32)

    start_logits = mask * start_logits + (1 - mask) * _neg_inf
    end_logits = mask * end_logits + (1 - mask) * _neg_inf
    pred_starts = torch.argmax(start_logits, dim=1)
    pred_ends = torch.argmax(end_logits, dim=1)

    f1 = 0
    n_samples = len(answers)
    for i in range(n_samples):
        true_answer = answers[i]

        start_token_idx, end_token_idx = pred_starts[i], pred_ends[i]
        start_char_idx, end_char_idx = offsets[i][start_token_idx][0], offsets[
            i][end_token_idx][1]
        pred_answer = contexts[i][start_char_idx:end_char_idx + 1]
        f1 += f1_score(pred_answer, true_answer)
    f1_result = 100.0 * f1 / n_samples
    return f1_result
Ejemplo n.º 5
0
 def evaluate(self, x, y):
     self.eval()
     output = self(x)
     f1_macro, f1_micro = f1_score(y, output, self.number_of_class, use_threshold=True,
                                   threshold=self.best_threshold)
     f1_macro = f1_macro.data.cpu().numpy()[0]
     f1_micro = f1_micro.data.cpu().numpy()[0]
     return f1_macro, f1_micro
Ejemplo n.º 6
0
 def __init__(self,class_train="Classification_Train.csv",class_test="Classification_Test_Data.csv",max_depth=5,min_info_gain=0.03,number_of_trees=15):
     # Labels are appended to X_train & X_test
     self.X_train=list(csv.reader(open(class_train,'r')))
     self.X_test=list(csv.reader(open(class_test,'r')))
     # parameter initialization
     self.number_of_trees=number_of_trees
     self.min_info_gain=min_info_gain
     self.max_depth=max_depth
     # feature names
     feature_number_for_each_tree=math.floor(math.sqrt(7))
     forest=[]
     # initialization of decision trees 
     for i in range(number_of_trees):
         feature_indexes=random.sample(range(1,8),feature_number_for_each_tree)
         tree=DecisionTree(self.get_random_subsets(feature_indexes),feature_indexes[0],feature_indexes[1],min_info_gain,max_depth)
         # Decision tree class automatically calls the train method 
         forest.append(tree)
     # making predictions and calculating the f1 score
     pred=self.prediction(forest,number_of_trees)
     self.f1=f1_score([row[8] for row in self.X_test],pred)
Ejemplo n.º 7
0
def SVM_machine(class_train, class_test):
    print("\nSVM Initialization\n")
    # dividing the train data as input and output for SVM algorithm
    [X, y] = calculateInputOutput(
        convertSVM(list(csv.reader(open(class_train, 'r')))))
    # creating SVM object from SupportVectorMachine class
    SVM = SupportVectorMachine()
    # fitting the train data to the SVM machine
    # calculating the 5 fold cross validation
    SVM.crossValidation()
    # dividing the test data as input and output for SVM algorithm
    [X_t, y_t] = calculateInputOutput(
        convertSVM(list(csv.reader(open(class_test, 'r')))))
    # classifying the test data with the SVM machine
    y_p = SVM.predict(X_t)
    # printing the label predictions
    print('The SVM predictions are:')
    print(np.array(y_p))
    #calculating the prediction error on the test set and printing
    error = classError(y_p, y_t)
    print('\nThe SVM prediction error percentage is {0:.2f} \n'.format(error))
    score = f1_score(y_t, y_p)
    print('f1(SVM) = {0:.2f} \n'.format(score))
    return
Ejemplo n.º 8
0
    def evaluate(self, criterion):
        """Evaluates the model on a validation set.

        Args:
            model: A PyTorch model.
            val_loader: A DataLoader to the evluation data set.
            device: The CUDA device being used.
            criterion: Loss function for the model.

        Returns: A tuple of the (F1-Score, Accuracy, Total Loss) on the validation set.
        """
        total_loss = 0.0
        tp, fp, fn, tn = 0, 0, 0, 0
        self.model.eval()
        with torch.no_grad():
            since = time.time()
            for i, (images, labels) in enumerate(self.val_loader):
                images = images.to(self.device)
                labels = labels.to(self.device).long().flatten()
                # Forward propagate and evaluate
                outputs = self.model(images)
                loss = criterion(outputs, labels)
                # Compute class probabilities -> predictions
                probabilities = self.model.log_softmax(outputs)
                predictions = torch.argmax(probabilities, dim=1)
                # Compute confusion matrix terms
                tp += (predictions[labels == 1] == 1).sum().item()
                fp += (predictions[labels == 0] == 1).sum().item()
                fn += (predictions[labels == 1] == 0).sum().item()
                tn += (predictions[labels == 0] == 0).sum().item()
                # Compute total loss
                total_loss += loss.item()
        # Compute model accuracy and f1-score
        accuracy = (tp + tn) / (tp + fp + fn + tn + 1e-10)
        score = f1_score(tp, fp, fn, tn)
        return score, accuracy, total_loss
Ejemplo n.º 9
0
# 画像を選択
index = 4
image_path = os.path.join(image_dir, image_files[index])
img = cv2.imread(image_path)
gt = annotation[index]
answer = ans[index]

# 予測
confidence_threshold = 0.25
out = ssd.predict(image_path)
out = [pred for pred in out if pred["score"]>=confidence_threshold]
out = sorted(out, key=lambda x:x["score"], reverse=True)

# 評価
pred = [p['bbox'] for p in out]
evaluation = f1_score(pred, answer)
print('f1score:', evaluation)

#表示
plot_bbox(img, gt, out)
print("1つ目:正解と予測の重複度を表すIoUが規定値(0.5)に達していないことから, 誤検出(FP)となっており、正解bboxも検出できていない為、未検出(FN)")

##################

# 画像を選択
index = 4
image_path = os.path.join(image_dir, image_files[index])
img = cv2.imread(image_path)
gt = annotation[index]
answer = ans[index]
Ejemplo n.º 10
0
    def fit(self, train, test, verbose=False):
        """
        The general training loop to fit the model

        Parameters
        ----------

        train: :class:`spotlight.interactions.Interactions`
            training instances, also contains test sequences
        test: :class:`spotlight.interactions.Interactions`
            only contains targets for test sequences
        verbose: bool, optional
            print the logs
        """

        # convert to sequences, targets and users
        sequences_np = train.sequences.sequences
        targets_np = train.sequences.targets
        users_np = train.sequences.user_ids.reshape(-1, 1)

        L, T = train.sequences.L, train.sequences.T

        n_train = sequences_np.shape[0]

        output_str = 'total training instances: %d' % n_train
        print(output_str)

        if not self._initialized:
            self._initialize(train)

        start_epoch = 0 
        best_map = 0 

        ### create directory if not exists
        save_dir = args.save_root + args.dataset + '/'
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)

        results = pd.DataFrame()
        #results_odd = pd.DataFrame()
        for epoch_num in range(start_epoch, self._n_iter):

            t1 = time()

            # set model to training mode
            self._net.train()

            users_np, sequences_np, targets_np = shuffle(users_np,
                                                         sequences_np,
                                                         targets_np)

            negatives_np = self._generate_negative_samples(users_np, train, n=self._neg_samples)

            # convert numpy arrays to PyTorch tensors and move it to the corresponding devices
            users, sequences, targets, negatives = (torch.from_numpy(users_np).long(),
                                                    torch.from_numpy(sequences_np).long(),
                                                    torch.from_numpy(targets_np).long(),
                                                    torch.from_numpy(negatives_np).long())

            users, sequences, targets, negatives = (users.to(self._device),
                                                    sequences.to(self._device),
                                                    targets.to(self._device),
                                                    negatives.to(self._device))

            epoch_loss = 0.0

            for (minibatch_num,
                 (batch_users,
                  batch_sequences,
                  batch_targets,
                  batch_negatives)) in enumerate(minibatch(users,
                                                           sequences,
                                                           targets,
                                                           negatives,
                                                           batch_size=self._batch_size)):
                items_to_predict = torch.cat((batch_targets, batch_negatives), 1)
                items_prediction = self._net(batch_sequences,
                                             batch_users,
                                             items_to_predict)

                (targets_prediction,
                 negatives_prediction) = torch.split(items_prediction,
                                                     [batch_targets.size(1),
                                                      batch_negatives.size(1)], dim=1)

                self._optimizer.zero_grad()
                # compute the binary cross-entropy loss
                positive_loss = -torch.mean(
                    torch.log(torch.sigmoid(targets_prediction)))
                negative_loss = -torch.mean(
                    torch.log(1 - torch.sigmoid(negatives_prediction)))
                loss = positive_loss + negative_loss

                epoch_loss += loss.item()

                loss.backward()
                self._optimizer.step()

            epoch_loss /= minibatch_num + 1


            parameterset = {}
            t2 = time()
            if verbose: #and (epoch_num + 1) % 2 == 0:
                precision, recall, mean_aps = evaluate_ranking(self, test, train, k=[1, 5, 10])
                output_str = "Epoch %d [%.1f s]\tloss=%.4f, map=%.4f, " \
                             "prec@1=%.4f, prec@5=%.4f, prec@10=%.4f, " \
                             "recall@1=%.4f, recall@5=%.4f, recall@10=%.4f,"\
                             "f1_score@1=%.4f,f1_score@5=%.4f,f1_score@10=%.4f,[%.1f s]" % (epoch_num + 1,
                                                                                         t2 - t1,
                                                                                         epoch_loss,
                                                                                         mean_aps,
                                                                                         np.mean(precision[0]),
                                                                                         np.mean(precision[1]),
                                                                                         np.mean(precision[2]),
                                                                                         np.mean(recall[0]),
                                                                                         np.mean(recall[1]),
                                                                                         np.mean(recall[2]),
                                                                                         f1_score(np.mean(precision[0]),np.mean(recall[0])),
                                                                                         f1_score(np.mean(precision[1]),np.mean(recall[1])),
                                                                                         f1_score(np.mean(precision[2]),np.mean(recall[2])),
                                                                                         time() - t2)
                parameterset["Epoch"] = epoch_num + 1
                parameterset["time1"] = t2 - t1
                parameterset["loss"] = epoch_loss
                parameterset["map"] = mean_aps
                parameterset["prec@1"] = np.mean(precision[0])
                parameterset["prec@5"] = np.mean(precision[1])
                parameterset["prec@10"] = np.mean(precision[2])
                parameterset["recall@1"] = np.mean(recall[0])
                parameterset["recall@5"] = np.mean(recall[1])
                parameterset["recall@10"] = np.mean(recall[2])
                parameterset["f1_score@1"] = f1_score(np.mean(precision[0]),np.mean(recall[0]))
                parameterset["f1_score@5"] = f1_score(np.mean(precision[1]),np.mean(recall[1]))
                parameterset["f1_score@10"] = f1_score(np.mean(precision[2]),np.mean(recall[2]))
                parameterset["time2"] = time() - t2
                results = results.append(parameterset, ignore_index=True)

                print(output_str)
                if mean_aps > best_map:
                    best_map = mean_aps
                    checkpoint_name = "best_model.pth.tar"
                    save_checkpoint({
                    'epoch': epoch_num+1,
                    'state_dict': self._net.state_dict(),
                    'optimizer': self._optimizer.state_dict(),
                    }, checkpoint_name, save_dir)

            #else:
            #    output_str = "Epoch %d [%.1f s]\tloss=%.4f [%.1f s]" % (epoch_num + 1,
            #                                                            t2 - t1,
            #                                                            epoch_loss,
            #                                                            time() - t2)
            #    parameterset["Epoch"] = epoch_num + 1
            #    parameterset["time1"] = t2 - t1
            #    parameterset["loss"] = epoch_loss
            #    parameterset["time2"] = time() - t2
            #    results_odd = results_odd.append(parameterset, ignore_index=True)
            #    print(output_str)
        print ('***** Best map:{0:.4f} *****'.format(best_map))
        #results_odd.to_csv("results/Odd_ml1m", index=False)
        results.to_csv("results/ml1m_hold", index=False)
Ejemplo n.º 11
0
def train(opt, model, optim, tr_dataloader, test_dataloader, dev_dataloader,
          lr_scheduler, logger):
    device = torch.device(
        config.CUDA) if torch.cuda.is_available() and opt.cuda else "cpu"

    best_state = None

    train_loss = []
    train_f = []

    best_f = 0

    best_t_macro_f, best_t_micro_f, best_t_strict_f = 0, 0, 0

    best_model_path = os.path.join(opt.experiment_root, "best_model.pth")
    last_model_path = os.path.join(opt.experiment_root, "last_model.pth")

    p = config.DATA_ROOT + opt.corpus_dir + "hierarchical_types.pkl"
    prior = torch.tensor(util.create_prior(p),
                         requires_grad=False,
                         dtype=torch.long).to(device)
    tune = torch.tensor(util.create_prior(p, config.BETA),
                        requires_grad=False,
                        dtype=torch.float).to(device)
    mask = torch.tensor(util.create_mask(p),
                        requires_grad=False,
                        dtype=torch.long).to(device)

    for epoch in range(opt.epochs):
        # logger.info(f"epoch: {epoch}")
        print(f"====Epoch: {epoch}====")
        tr_iter = iter(tr_dataloader)
        model.train()
        for batch in tqdm(tr_iter):
            optim.zero_grad()

            mention, mention_len, mention_neighbor, lcontext, rcontext, y = batch
            mention = mention.to(device)
            mention_len = mention_len.to(device)
            mention_neighbor = mention_neighbor.to(device)

            lcontext = lcontext.to(device)
            rcontext = rcontext.to(device)
            y = y.to(device)

            model_output = model(
                [mention, mention_len, mention_neighbor, lcontext, rcontext])
            # loss, gold, pred = customized_bce_loss(model, model_output, y, opt, hierarchical_types)
            loss, gold, pred = bce_loss(model, model_output, y, opt, "train")
            # loss, gold, pred = hier_loss(model, model_output, y, opt, tune, prior, mask)

            train_loss.append(float(loss.item()))
            precision, recall = e.loose_macro_PR(gold, pred, opt)
            train_f.append(e.f1_score(float(precision), float(recall)))

            loss.backward()
            optim.step()
        # lr_scheduler.step()

        avg_loss = np.mean(train_loss)
        avg_f = np.mean(train_f)
        print(f"Avg train loss: {avg_loss}, Avg train macro-f1 score: {avg_f}")

        if dev_dataloader is not None:
            dev_ma, dev_mi, dev_str = test(opt, model, dev_dataloader)
            print(
                f"Model acc in dev data:\n"
                f" \nmacro: F1: {dev_ma[0]}, P: {dev_ma[1]}, R: {dev_ma[2]}"
                f" \nmicro: F1: {dev_mi[0]}, P: {dev_mi[1]}, R: {dev_mi[2]}"
                f" \nstrict: F1: {dev_str[0]}, P: {dev_str[1]}, R: {dev_str[2]}"
            )

        if test_dataloader is not None:
            test_ma, test_mi, test_str = test(opt,
                                              model,
                                              test_dataloader,
                                              record_result=False)
            print(
                f"Model acc in test data:\n"
                f" \nmacro: F1: {test_ma[0]}, P: {test_ma[1]}, R: {test_ma[2]}"
                f" \nmicro: F1: {test_mi[0]}, P: {test_mi[1]}, R: {test_mi[2]}"
                f" \nstrict: F1: {test_str[0]}, P: {test_str[1]}, R: {test_str[2]}"
            )
            if best_t_macro_f + best_t_micro_f + best_t_strict_f < test_ma[
                    0] + test_mi[0] + test_str[0]:
                best_t_macro_f, best_t_micro_f, best_t_strict_f = test_ma[
                    0], test_mi[0], test_str[0]

                best_state = model.state_dict()
                print(f"save best model in: {best_model_path}")
                torch.save(best_state, best_model_path)

        print(
            f"Best Model F values:"
            f"\nmacro: {best_t_macro_f}, micro: {best_t_micro_f}, strict: {best_t_strict_f}"
        )

    torch.save(model.state_dict(), last_model_path)
Ejemplo n.º 12
0
def test(opt, model, test_dataloader, record_result=False):
    device = torch.device(config.CUDA) if torch.cuda.is_available() else "cpu"
    model.eval()

    macro_F1, micro_F1, strict_F1 = 0, 0, 0
    pmacro, remacro = 0, 0
    pmicro, remicro = 0, 0
    pstrict, restrict = 0, 0

    bgn = 0

    total = len(test_dataloader)
    test_iter = iter(test_dataloader)

    p = config.DATA_ROOT + opt.corpus_dir + "hierarchical_types.pkl"
    prior = torch.tensor(util.create_prior(p),
                         requires_grad=False,
                         dtype=torch.long).to(device)
    tune = torch.tensor(util.create_prior(p, config.BETA),
                        requires_grad=False,
                        dtype=torch.float).to(device)
    mask = torch.tensor(util.create_mask(p),
                        requires_grad=False,
                        dtype=torch.long).to(device)

    for batch in test_iter:
        mention, mention_len, mention_neighbor, lcontext, rcontext, y = batch

        mention = mention.to(device)
        mention_len = mention_len.to(device)
        mention_neighbor = mention_neighbor.to(device)
        lcontext = lcontext.to(device)
        rcontext = rcontext.to(device)
        y = y.to(device)

        model_output = model(
            [mention, mention_len, mention_neighbor, lcontext, rcontext])

        # loss, gold, pred = customized_bce_loss(model, model_output, y, opt, hierarchical_types)
        loss, gold, pred = bce_loss(model, model_output, y, opt, "test")
        # loss, gold, pred = hier_loss(model, model_output, y, opt, tune, prior, mask)

        if record_result:
            util.record_result(gold, pred, opt, bgn)

        bgn += opt.batch_size
        pma, rema = e.loose_macro_PR(gold, pred, opt)
        macro_F1 += e.f1_score(pma, rema)
        pmacro += pma
        remacro += rema

        pmi, remi = e.loose_micro_PR(gold, pred, opt)
        micro_F1 += e.f1_score(pmi, remi)
        pmicro += pmi
        remicro += remi

        pstr, restr = e.strict_PR(gold, pred, opt)
        strict_F1 += e.f1_score(pstr, restr)
        pstrict += pstr
        restrict += restr

    return (macro_F1/total, pmacro/total, remacro/total), \
           (micro_F1/total, pmicro/total, remicro/total), \
           (strict_F1/total, pstrict/total, restrict/total)
Ejemplo n.º 13
0
def test(opt,
         model,
         test_dataloader,
         record_result=False,
         analysis_result=False,
         mode=config.TEST):
    device = torch.device(config.CUDA) if torch.cuda.is_available() else "cpu"
    model.eval()

    macro_F1, micro_F1, strict_F1 = 0, 0, 0
    pmacro, remacro = 0, 0
    pmicro, remicro = 0, 0
    pstrict, restrict = 0, 0

    bgn = 0

    total = len(test_dataloader)
    test_iter = iter(test_dataloader)

    gold_all, pred_all = [], []

    p = config.DATA_ROOT + opt.corpus_dir + "hierarchical_types.pkl"

    hierarchical_types = pickle.load(open(p, 'rb'))

    for batch in test_iter:
        # mention, mention_len, mention_neighbor, lcontext, rcontext, y = batch
        mention, mention_len, lcontext, rcontext, mention_char, y = batch

        mention = mention.to(device)
        mention_len = mention_len.to(device)
        # mention_neighbor = mention_neighbor.to(device)
        lcontext = lcontext.to(device)
        rcontext = rcontext.to(device)
        mention_char = mention_char.to(device)
        # feature = feature.to(device)
        y = y.to(device)

        model_output = model(
            [mention, mention_len, lcontext, rcontext, mention_char])

        loss, gold, pred, prob = bce_loss(model, model_output, y, opt,
                                          hierarchical_types)

        if record_result:
            util.record_result(gold, pred, prob, opt, bgn)

        if analysis_result:
            gold_all.append(gold)
            pred_all.append(pred)

        bgn += opt.batch_size
        pma, rema = e.loose_macro_PR(gold, pred, opt)
        macro_F1 += e.f1_score(pma, rema)
        pmacro += pma
        remacro += rema

        pmi, remi = e.loose_micro_PR(gold, pred, opt)
        micro_F1 += e.f1_score(pmi, remi)
        pmicro += pmi
        remicro += remi

        pstr, restr = e.strict_PR(gold, pred, opt)
        strict_F1 += e.f1_score(pstr, restr)
        pstrict += pstr
        restrict += restr

    if analysis_result:
        util.analysis_result(torch.cat(gold_all), torch.cat(pred_all))

    return (macro_F1/total, pmacro/total, remacro/total), \
           (micro_F1/total, pmicro/total, remicro/total), \
           (strict_F1/total, pstrict/total, restrict/total)