Example #1
0
    def test_score(self):
        ch = hmcdatasets.load_shades_class_hierachy()
        X, y = hmcdatasets.load_shades_data()
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.50,
                                                            random_state=0)
        dt = hmc.DecisionTreeHierarchicalClassifier(ch)
        dt = dt.fit(X_train, y_train)

        y_pred = dt.predict(X_test)

        metrics.classification_report(ch, y_test, pd.DataFrame(y_pred))
Example #2
0
    def eval(dataset):
        dev_dataloader = DataLoader(dataset, batch_size=args.predict_batch_size, shuffle=False, num_workers=2)
        n_sample = len(dev_dataloader)
        result = dict()

        detection_loss = torch.nn.CrossEntropyLoss().to(device)

        pos.eval()
        E.eval()

        all_detection_preds = []
        all_detection_logit = []

        for sample in tqdm(dev_dataloader):
            sample = (i.to(device) for i in sample)
            token, mask, type_ids, pos1, pos2, pos_mask, y = sample
            batch = len(token)

            # -------------------------evaluate D------------------------- #
            # BERT encode sentence to feature vector
            with torch.no_grad():
                sequence_output, pooled_output = E(token, mask, type_ids)
                real_feature = pooled_output

                out = pos(pos1, pos2, real_feature)
                all_detection_logit.append(out)
                all_detection_preds.append(torch.argmax(out, 1))

        all_y = LongTensor(dataset.dataset[:, -4].astype(int)).cpu()  # [length, n_class]
        all_binary_y = (all_y != 0).long()  # [length, 1] label 0 is oos
        all_detection_preds = torch.cat(all_detection_preds, 0).cpu()  # [length, 1]
        # all_detection_binary_preds = convert_to_int_by_threshold(all_detection_preds.squeeze())  # [length, 1]
        all_detection_logit = torch.cat(all_detection_logit, 0).cpu()

        # 计算损失
        detection_loss = detection_loss(all_detection_logit, all_binary_y.long())
        result['detection_loss'] = detection_loss

        logger.info(
            metrics.classification_report(all_binary_y, all_detection_preds, target_names=['oos', 'in']))

        # report
        oos_ind_precision, oos_ind_recall, oos_ind_fscore, _ = metrics.binary_recall_fscore(
            all_detection_preds, all_binary_y)
        detection_acc = metrics.accuracy(all_detection_preds, all_binary_y)

        y_score = all_detection_logit.softmax(1)[:, 1].tolist()
        eer = metrics.cal_eer(all_binary_y, y_score)

        result['eer'] = eer
        result['all_detection_binary_preds'] = all_detection_preds
        result['detection_acc'] = detection_acc
        result['all_binary_y'] = all_binary_y
        result['oos_ind_precision'] = oos_ind_precision
        result['oos_ind_recall'] = oos_ind_recall
        result['oos_ind_f_score'] = oos_ind_fscore
        result['y_score'] = y_score
        result['auc'] = roc_auc_score(all_binary_y, y_score)

        return result
Example #3
0
    def test(dataset):
        load_model(model, path=config['model_save_path'], model_name='bert')
        test_dataloader = DataLoader(dataset,
                                     batch_size=args.predict_batch_size,
                                     shuffle=False,
                                     num_workers=2)
        n_sample = len(test_dataloader)
        result = dict()
        model.eval()

        # Loss function
        classified_loss = torch.nn.CrossEntropyLoss().to(device)
        all_pred = []
        total_loss = 0
        all_logit = []
        for sample in tqdm.tqdm(test_dataloader):
            sample = (i.to(device) for i in sample)
            token, mask, type_ids, y = sample
            batch = len(token)

            with torch.no_grad():
                logit = model(token, mask, type_ids)
                all_logit.append(logit)
                all_pred.append(torch.argmax(logit, 1))
                total_loss += classified_loss(logit, y.long())

        all_y = LongTensor(
            dataset.dataset[:, -1].astype(int)).cpu()  # [length, n_class]
        all_binary_y = (all_y != 0).long()  # [length, 1] label 0 is oos
        all_pred = torch.cat(all_pred, 0).cpu()
        all_logit = torch.cat(all_logit, 0).cpu()

        # classification report
        ind_class_acc = metrics.ind_class_accuracy(all_pred, all_y)
        report = metrics.classification_report(all_y,
                                               all_pred,
                                               output_dict=True)
        oos_ind_precision, oos_ind_recall, oos_ind_fscore, _ = metrics.binary_recall_fscore(
            all_pred, all_binary_y)
        result.update(report)
        # 只有二分类时候ERR才有意义
        y_score = all_logit.softmax(1)[:, 1].tolist()
        eer = metrics.cal_eer(all_binary_y, y_score)

        result['eer'] = eer
        result['ind_class_acc'] = ind_class_acc
        result['loss'] = total_loss / n_sample
        result['all_y'] = all_y.tolist()
        result['all_pred'] = all_pred.tolist()
        result['oos_ind_precision'] = oos_ind_precision
        result['oos_ind_recall'] = oos_ind_recall
        result['oos_ind_f_score'] = oos_ind_fscore
        result['auc'] = roc_auc_score(all_binary_y, y_score)
        result['y_score'] = y_score
        result['all_binary_y'] = all_binary_y

        freeze_data['test_all_y'] = all_y.tolist()
        freeze_data['test_all_pred'] = all_pred.tolist()
        freeze_data['test_score'] = y_score
        return result
def evaluate(val_dataset):
    val_confusion_matrix.reset_states()

    for (batch, (speaker, utterance, emotion)) in enumerate(val_dataset):
        eval_step(speaker, utterance, emotion)

    return metrics.classification_report(val_confusion_matrix)
Example #5
0
def incremental_evaluate(sess, model, minibatch_iter, size, test=False):
    t_test = time.time()
    val_losses = []
    val_preds = []
    labels = []
    iter_num = 0
    finished = False

    while not finished:
        feed_dict_val, batch_labels, finished, _ = \
            minibatch_iter.incremental_node_val_feed_dict(
                size, iter_num, test=test)

        node_outs_val = sess.run([model.preds, model.loss],
                                 feed_dict=feed_dict_val)

        val_preds.append(node_outs_val[0])
        labels.append(batch_labels)
        val_losses.append(node_outs_val[1])
        iter_num += 1

    # TODO 放进model
    val_preds = np.vstack(val_preds)
    labels = np.vstack(labels)
    f1_scores = calc_f1(labels, val_preds)
    report = classification_report(labels, val_preds)

    # precision, recall, thresholds = precision_recall_curve(
    #     labels[:, 1], val_preds[:, 1])
    # area = auc(recall, precision)

    return np.mean(val_losses), f1_scores[0], f1_scores[1], report, (
        time.time() - t_test)  #, area
Example #6
0
def test(classifier,x_test,y_test):
    prediction = classifier.predict(x_test)
    print("Confusion Matrix for Decision tree Classofier Model given : ")
    print(confusion_matrix(y_test,prediction))
    print("Classification Report for given Decision tree Classisifer : ")
    print(classification_report(y_test,prediction))
    print("Accuracy Score  : ",accuracy_score(y_test,prediction))
    return
Example #7
0
def evaluate(model, data_iterator, params, mark='Eval', verbose=True):
    """Evaluate the model on `steps` batches."""
    # set model to evaluation mode
    model.eval()

    # id2tag dict
    idx2tag = {idx: tag for idx, tag in enumerate(params.tags)}

    true_tags = []
    pred_tags = []

    # a running average object for loss
    loss_avg = utils.RunningAverage()
    for input_ids, input_mask, labels in data_iterator:
        # to device
        input_ids = input_ids.to(params.device)
        input_mask = input_mask.to(params.device)
        labels = labels.to(params.device)

        batch_size, max_len = labels.size()

        # get loss
        loss = model(input_ids, attention_mask=input_mask.bool(), labels=labels)
        loss /= batch_size
        # update the average loss
        loss_avg.update(loss.item())

        # inference
        with torch.no_grad():
            batch_output = model(input_ids, attention_mask=input_mask.bool())

        # 恢复标签真实长度
        real_batch_tags = []
        for i in range(batch_size):
            real_len = int(input_mask[i].sum())
            real_batch_tags.append(labels[i][:real_len].to('cpu').numpy())

        # List[int]
        pred_tags.extend([idx2tag.get(idx) for indices in batch_output for idx in indices])
        true_tags.extend([idx2tag.get(idx) for indices in real_batch_tags for idx in indices])
    # sanity check
    assert len(pred_tags) == len(true_tags), 'len(pred_tags) is not equal to len(true_tags)!'

    # logging loss, f1 and report
    metrics = {}
    f1 = f1_score(true_tags, pred_tags)
    accuracy = accuracy_score(true_tags, pred_tags)
    metrics['loss'] = loss_avg()
    metrics['f1'] = f1
    metrics['accuracy'] = accuracy
    metrics_str = "; ".join("{}: {:05.2f}".format(k, v) for k, v in metrics.items())
    logging.info("- {} metrics: ".format(mark) + metrics_str)

    # f1 classification report
    if verbose:
        report = classification_report(true_tags, pred_tags)
        logging.info(report)
    return metrics
def evaluate(model, data_iterator, params, mark='Test', verbose=False):
    """Evaluate the model on `steps` batches."""
    # set model to evaluation mode
    model.eval()

    idx2tag = params.idx2tag

    true_tags = []
    pred_tags = []

    # a running average object for loss
    loss_avg = utils.RunningAverage()

    for _ in range(params.eval_steps):
        # fetch the next evaluation batch
        batch_data, batch_tags = next(data_iterator)
        batch_masks = batch_data.gt(0)

        loss = model(batch_data,
                     token_type_ids=None,
                     attention_mask=batch_masks,
                     labels=batch_tags)
        batch_output = model(batch_data,
                             token_type_ids=None,
                             attention_mask=batch_masks
                             )  # shape: (batch_size, max_len, num_labels)

        loss = loss[0]
        batch_output = batch_output[0]

        if params.n_gpu > 1 and params.multi_gpu:
            loss = loss.mean()
        loss_avg.update(loss.item())

        batch_output = batch_output.detach().cpu().numpy()
        batch_tags = batch_tags.to('cpu').numpy()

        pred_tags.extend([
            idx2tag.get(idx) for indices in np.argmax(batch_output, axis=2)
            for idx in indices
        ])
        true_tags.extend(
            [idx2tag.get(idx) for indices in batch_tags for idx in indices])
    assert len(pred_tags) == len(true_tags)

    # logging loss, f1 and report
    metrics = {}
    f1 = f1_score(true_tags, pred_tags)
    metrics['loss'] = loss_avg()
    metrics['f1'] = f1
    metrics_str = "; ".join("{}: {:05.2f}".format(k, v)
                            for k, v in metrics.items())
    logging.info("- {} metrics: ".format(mark) + metrics_str)

    if verbose:
        report = classification_report(true_tags, pred_tags)
        logging.info(report)
    return metrics
Example #9
0
def test_ffnn():
    params = {
        'n_layers': 4,
        'hidden_nodes': [512, 512, 512, 512],
        'epochs': 10,
        'use_dynamic_features': True,
        'use_mspec': False,
        'as_mat': False,
        'speaker_norm': False,
        'context_length': 17
    }
    net = FFNN(params)
    model = net.train_model()
    net.set_model(model)
    y_true, yp = net.predict_on_test()
    print("FFNN RESULTS")
    print(get_f1_score(y_true, yp))
    print(get_accuracy(y_true, yp))
    print(classification_report(y_true, yp))
Example #10
0
def test_rnn():
    """Notice as_mat is true here!"""
    params = {
        'n_layers': 2,
        'hidden_nodes': [32, 32],
        'epochs': 100,
        'use_dynamic_features': True,
        'use_mspec': True,
        'as_mat': True,
        'speaker_norm': False,
        'context_length': 35
    }
    net = RNN(params)
    model = net.train_model(params['unroll'])
    net.set_model(model)
    y_true, yp = net.predict_on_test()
    print("RNN RESULTS")
    print(get_f1_score(y_true, yp))
    print(get_accuracy(y_true, yp))
    print(classification_report(y_true, yp))
    model.save('rnn-64-64-context-35.h5')
Example #11
0
def evaluate(test_dataset):
    confusion_matrix = metrics.ConfusionMatrix(model_config.n_classes)

    for (batch, (speaker, utterance, emotion)) in enumerate(test_dataset):
        speaker = tf.squeeze(speaker)  # (batch_size, dial_len)
        emotion = tf.squeeze(emotion)  # (batch_size, dial_len)

        mask = tf.cast(tf.math.not_equal(utterance, 0), dtype=tf.float32)

        utterance = encode_utterance(utterance)

        predictions = model(utterance, False,
                            mask)  # (batch_size, dial_len, n_classes)

        sample_weight = tf.math.not_equal(tf.math.reduce_sum(mask, axis=2), 0)
        sample_weight = tf.cast(sample_weight, dtype=tf.float32)
        pred_emotion = tf.math.argmax(predictions, axis=2)

        confusion_matrix(emotion, pred_emotion, sample_weight=sample_weight)

    return metrics.classification_report(confusion_matrix)
Example #12
0
def main():
    # check for gpu device
    print(tf.test.gpu_device_name())
    # print tf and keras version
    print(tf.VERSION)
    print(tf.keras.__version__)

    data, labels = data_prep.read_images("data/train",
                                         IMAGE_DIMS,
                                         general=True)
    print("Train Set loaded")
    data_test, labels_test = data_prep.read_images("data/test",
                                                   IMAGE_DIMS,
                                                   general=True)
    print("Test Set loaded")

    lb = LabelBinarizer()
    X_train, y_train = data_prep.binarize(data, labels, lb)
    X_test, y_test = data_prep.binarize(data_test, labels_test, lb)
    print(X_train.shape)
    print(y_train.shape)

    model = VGGNet.vgg_net(N_CLASSES, IMAGE_DIMS)

    # Train the model
    start = time.time()
    history = VGGNet.fit(model, X_train, y_train, X_test, y_test, EPOCHS, BS)
    end = time.time()
    print("Training Time: " + timer(start, end))

    # Metrics
    metrics.plot_evaluation(history)
    y_pred = model.predict(X_test)
    classification_report = metrics.classification_report(y_test, y_pred, lb)
    print("Classification report : \n", classification_report)
    confusion_matrix = metrics.confusion_matrix(y_test, y_pred)
    print("Confusion Matrix : \n", confusion_matrix)
    metrics.print_confusion_matrix(y_test, y_pred, lb)
Example #13
0
                         class_to_index,
                         subtitles,
                         section=[0, 11])

# %%

metrics.worst_samples(imgs_valid,
                      labels_valid,
                      scores_predict,
                      class_to_index,
                      top=16,
                      names_valid=None)

# %%

metrics.classification_report(labels_valid, label_predict, class_to_index)

# %%

metrics.confusion_matrix(labels_valid, label_predict, class_to_index)

# %%

metrics.ROC(labels_valid, scores_predict, class_to_index, section=[0, 5, 11])

#%%

metrics.ROCCompare(labels_valids,
                   scores_predicts,
                   class_to_index,
                   subtitles,
Example #14
0
    def test(dataset):
        load_model(model, path=config['model_save_path'], model_name='bert')
        test_dataloader = DataLoader(dataset,
                                     batch_size=args.predict_batch_size,
                                     shuffle=False,
                                     num_workers=2)
        n_sample = len(test_dataloader)
        result = dict()
        model.eval()

        # Loss function
        classified_loss = torch.nn.CrossEntropyLoss().to(device)
        detection_loss = torch.nn.BCELoss().to(device)
        all_detection_preds = []
        all_features = []
        all_pred = []
        total_loss = 0
        all_logit = []
        for sample in tqdm.tqdm(test_dataloader):
            sample = (i.to(device) for i in sample)
            token, mask, type_ids, y = sample
            batch = len(token)

            with torch.no_grad():
                f_vector, discriminator_output, classification_output = model(
                    token, mask, type_ids, return_feature=True)
                discriminator_output = discriminator_output.squeeze()
                all_detection_preds.append(discriminator_output)
                if args.do_vis:
                    all_features.append(f_vector)

        all_y = LongTensor(
            dataset.dataset[:, -1].astype(int)).cpu()  # [length, n_class]
        all_binary_y = (all_y != 0).long()  # [length, 1] label 0 is oos
        all_detection_preds = torch.cat(all_detection_preds,
                                        0).cpu()  # [length, 1]
        all_detection_binary_preds = convert_to_int_by_threshold(
            all_detection_preds.squeeze())  # [length, 1]

        # 计算损失
        detection_loss = detection_loss(all_detection_preds,
                                        all_binary_y.float())
        result['detection_loss'] = detection_loss

        logger.info(
            metrics.classification_report(all_binary_y,
                                          all_detection_binary_preds,
                                          target_names=['oos', 'in']))

        # report
        oos_ind_precision, oos_ind_recall, oos_ind_fscore, _ = metrics.binary_recall_fscore(
            all_detection_binary_preds, all_binary_y)
        detection_acc = metrics.accuracy(all_detection_binary_preds,
                                         all_binary_y)

        y_score = all_detection_preds.squeeze().tolist()
        eer = metrics.cal_eer(all_binary_y, y_score)

        if args.do_vis:
            all_features = torch.cat(all_features, 0).cpu().numpy()
            result['all_features'] = all_features

        ind_class_acc = metrics.ind_class_accuracy(all_detection_binary_preds,
                                                   all_y)

        result['ind_class_acc'] = ind_class_acc
        result['loss'] = total_loss / n_sample

        result['eer'] = eer
        result['all_detection_binary_preds'] = all_detection_binary_preds
        result['detection_acc'] = detection_acc
        result['all_binary_y'] = all_binary_y
        result['all_y'] = all_y
        result['oos_ind_precision'] = oos_ind_precision
        result['oos_ind_recall'] = oos_ind_recall
        result['oos_ind_f_score'] = oos_ind_fscore
        result['score'] = y_score
        result['y_score'] = y_score
        result['all_pred'] = all_detection_binary_preds
        result['auc'] = roc_auc_score(all_binary_y, y_score)

        freeze_data['test_all_y'] = all_y.tolist()
        freeze_data['test_all_pred'] = all_detection_binary_preds.tolist()
        freeze_data['test_score'] = y_score

        return result
    def eval(dataset):
        dev_dataloader = DataLoader(dataset, batch_size=args.predict_batch_size, shuffle=False, num_workers=2)
        n_sample = len(dev_dataloader)
        result = dict()

        # Loss function
        detection_loss = torch.nn.BCELoss().to(device)
        classified_loss = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

        G.eval()
        D.eval()
        E.eval()

        all_detection_preds = []
        all_class_preds = []

        for sample in tqdm.tqdm(dev_dataloader):
            sample = (i.to(device) for i in sample)
            token, mask, type_ids, y = sample
            batch = len(token)

            # -------------------------evaluate D------------------------- #
            # BERT encode sentence to feature vector

            with torch.no_grad():
                sequence_output, pooled_output = E(token, mask, type_ids)
                real_feature = pooled_output

                # 大于2表示除了训练判别器还要训练分类器
                if n_class > 2:
                    f_vector, discriminator_output, classification_output = D(real_feature, return_feature=True)
                    all_detection_preds.append(discriminator_output)
                    all_class_preds.append(classification_output)

                # 只预测判别器
                else:
                    f_vector, discriminator_output = D.detect_only(real_feature, return_feature=True)
                    all_detection_preds.append(discriminator_output)

        all_y = LongTensor(dataset.dataset[:, -1].astype(int)).cpu()  # [length, n_class]
        all_binary_y = (all_y != 0).long()  # [length, 1] label 0 is oos
        all_detection_preds = torch.cat(all_detection_preds, 0).cpu()  # [length, 1]
        all_detection_binary_preds = convert_to_int_by_threshold(all_detection_preds.squeeze())  # [length, 1]

        # print('all_detection_preds', all_detection_preds.size())
        # print('all_binary_y', all_binary_y.size())
        # 计算损失
        detection_loss = detection_loss(all_detection_preds.squeeze(), all_binary_y.float())
        result['detection_loss'] = detection_loss

        if n_class > 2:
            class_one_hot_preds = torch.cat(all_class_preds, 0).detach().cpu()  # one hot label
            class_loss = classified_loss(class_one_hot_preds, all_y)  # compute loss
            all_class_preds = torch.argmax(class_one_hot_preds, 1)  # label
            class_acc = metrics.ind_class_accuracy(all_class_preds, all_y, oos_index=0)  # accuracy for ind class
            logger.info(metrics.classification_report(all_y, all_class_preds, target_names=processor.id_to_label))

        # logger.info(metrics.classification_report(all_binary_y, all_detection_binary_preds, target_names=['oos', 'in']))

        # report
        oos_ind_precision, oos_ind_recall, oos_ind_fscore, _ = metrics.binary_recall_fscore(all_detection_binary_preds, all_binary_y)
        detection_acc = metrics.accuracy(all_detection_binary_preds, all_binary_y)

        y_score = all_detection_preds.squeeze().tolist()
        eer = metrics.cal_eer(all_binary_y, y_score)

        result['eer'] = eer
        result['all_detection_binary_preds'] = all_detection_binary_preds
        result['detection_acc'] = detection_acc
        result['all_binary_y'] = all_binary_y
        result['oos_ind_precision'] = oos_ind_precision
        result['oos_ind_recall'] = oos_ind_recall
        result['oos_ind_f_score'] = oos_ind_fscore
        result['y_score'] = y_score
        result['auc'] = roc_auc_score(all_binary_y, y_score)
        if n_class > 2:
            result['class_loss'] = class_loss
            result['class_acc'] = class_acc

        freeze_data['valid_all_y'] = all_y
        freeze_data['vaild_all_pred'] = all_detection_binary_preds
        freeze_data['valid_score'] = y_score

        return result
Example #16
0
def evaluate_f1_no_mask(model, dl_test, save_dir, criterion_clsf = nn.CrossEntropyLoss().to(device), criterion_tgt = nn.CrossEntropyLoss(ignore_index=PAD).to(device), verbose = False):
    loss_test = 0
    pred_tags = []
    true_tags = []

    pred_clss = []
    true_clss = []
    criterion_clsf = criterion_clsf
    criterion_tgt = criterion_tgt
    idx2lbl = load_obj(save_dir+'idx2lbl.json')
    for enc, tgt, cls in dl_test[:]:
        model.eval()
        with torch.no_grad():
            enc = enc.to(device)
            tgt = tgt.to(device)
            cls = cls.to(device)
            enc_self_attn_mask = get_attn_pad_mask(enc, enc)
            enc_self_attn_mask.to(device)

            logits_tgt, logits_clsf = model(enc,enc_self_attn_mask)
            loss_tgt = criterion_tgt(logits_tgt.transpose(1, 2), tgt) # for masked LM
            loss_tgt = (loss_tgt.float()).mean()
            loss_clsf = criterion_clsf(logits_clsf, cls)# for sentence classification
            loss = loss_clsf + loss_tgt
            # loss = loss_clsf
            loss_test+=loss

        pad_mask = enc.data.eq(0).sum(axis = 1)

        score_tgt, tgt_idx = torch.max(logits_tgt,dim = -1)
        score_cls, cls_idx = torch.max(logits_clsf, dim = -1)

        for pre, true, pad_num in zip(tgt_idx, tgt, pad_mask):
            pred_tags += pre[0:-pad_num].data.tolist()
            true_tags += true[0:-pad_num].data.tolist()

        # print(cls_idx.size())
        pred_clss += cls_idx.tolist()
        true_clss += cls.tolist()
        # print(len(pred_tags), len(true_tags))
        # print(pred_tags)
        # print(true_tags)
        # print(len(pred_clss), len(true_clss))
        # print(pred_clss)

        # print(true_clss)
        assert len(pred_tags) == len(true_tags)
        assert len(pred_clss) == len(true_clss)
    # print(pred_clss[-20:])
    # print(true_clss[-20:])
    # print(pred_tags[-20:])
    # print(true_tags[-20:])

    # print(enc[-20:])

    f1_tgt = f1_score(pred_tags, true_tags, average='micro')
    f1_cls = f1_score(pred_clss, true_clss, average='micro')

    # logging loss, f1 and report

    metrics = {}
    true_lbls = []
    pred_lbls = []

    for t,p in zip(true_tags,pred_tags):
        true_lbls.append(idx2lbl[str(t)])
        pred_lbls.append(idx2lbl[str(p)])

    f1_tgt_merged = f1_score_merged(true_lbls, pred_lbls)

    if verbose:
        report = classification_report(true_lbls, pred_lbls)
        print("============no_mask_slot================")
        print(report, flush=True)

    return loss_test/len(dl_test), f1_cls*100, f1_tgt*100, f1_tgt_merged
Example #17
0
    def test(dataset):
        # # load BERT and GAN
        # load_gan_model(D, G, config['gan_save_path'])
        # if args.fine_tune:
        #     load_model(E, path=config['bert_save_path'], model_name='bert')
        #
        test_dataloader = DataLoader(dataset,
                                     batch_size=args.predict_batch_size,
                                     shuffle=False,
                                     num_workers=2)
        n_sample = len(test_dataloader)
        result = dict()

        # Loss function
        detection_loss = torch.nn.BCELoss().to(device)
        classified_loss = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

        pos.eval()
        E.eval()

        all_detection_preds = []
        all_class_preds = []
        all_features = []

        for sample in tqdm(test_dataloader):
            sample = (i.to(device) for i in sample)
            token, mask, type_ids, pos1, pos2, pos_mask, y = sample
            batch = len(token)

            # -------------------------evaluate D------------------------- #
            # BERT encode sentence to feature vector

            with torch.no_grad():
                sequence_output, pooled_output = E(token, mask, type_ids)
                real_feature = pooled_output

                out = pos(pos1, pos2, real_feature)
                all_detection_preds.append(out)

        all_y = LongTensor(
            dataset.dataset[:, -4].astype(int)).cpu()  # [length, n_class]
        all_binary_y = (all_y != 0).long()  # [length, 1] label 0 is oos
        all_detection_preds = torch.cat(all_detection_preds,
                                        0).cpu()  # [length, 1]
        all_detection_binary_preds = convert_to_int_by_threshold(
            all_detection_preds.squeeze())  # [length, 1]

        # 计算损失
        detection_loss = detection_loss(all_detection_preds,
                                        all_binary_y.float())
        result['detection_loss'] = detection_loss

        logger.info(
            metrics.classification_report(all_binary_y,
                                          all_detection_binary_preds,
                                          target_names=['oos', 'in']))

        # report
        oos_ind_precision, oos_ind_recall, oos_ind_fscore, _ = metrics.binary_recall_fscore(
            all_detection_binary_preds, all_binary_y)
        detection_acc = metrics.accuracy(all_detection_binary_preds,
                                         all_binary_y)

        y_score = all_detection_preds.squeeze().tolist()
        eer = metrics.cal_eer(all_binary_y, y_score)

        result['eer'] = eer
        result['all_detection_binary_preds'] = all_detection_binary_preds
        result['detection_acc'] = detection_acc
        result['all_binary_y'] = all_binary_y
        result['oos_ind_precision'] = oos_ind_precision
        result['oos_ind_recall'] = oos_ind_recall
        result['oos_ind_f_score'] = oos_ind_fscore
        result['y_score'] = y_score
        result['auc'] = roc_auc_score(all_binary_y, y_score)

        return result
Example #18
0
def evaluate(model, iterator, f, ner_label, verbose = False):
    """Evaluate the model on `steps` batches."""
    # set model to evaluation mode
    model.eval()

    y_true = []
    y_pred = []
    Words, Is_heads, Tags, Y, Y_hat = [], [], [], [], []
    with torch.no_grad():
        for i, batch in enumerate(iterator):
            words, input_ids, is_heads, tags, input_tags, entity_label, seqlens = batch

            _, _, y_hat = model(input_ids, input_tags, entity_label)  # y_hat: (N, T)

            Words.extend(words)
            Is_heads.extend(is_heads)
            Tags.extend(tags)
            Y.extend(input_tags.numpy().tolist())
            Y_hat.extend(y_hat.cpu().numpy().tolist())
    ## gets results and save
    with open("temp", 'w') as fout:
        for words, is_heads, tags, y_hat in zip(Words, Is_heads, Tags, Y_hat):
            y_hat = [hat for head, hat in zip(is_heads, y_hat) if head == 1]
            preds = [ner_label.idx2tag[hat] for hat in y_hat]
            if len(preds[1:-1]) > 0:
                y_pred.append(preds[1:-1])
            if len(tags.split()[1:-1]) > 0:
                y_true.append(tags.split()[1:-1])
            assert len(preds) == len(words.split()) == len(tags.split())
            for w, t, p in zip(words.split()[1:-1], tags.split()[1:-1], preds[1:-1]):
                fout.write(f"{w} {t} {p}\n")
            fout.write("\n")

    assert len(y_pred) == len(y_true)

    # logging loss, f1 and report
    p, r, f1 = f1_score(y_true, y_pred)

    # metrics_str = "; ".join("{}: {:05.2f}".format(k, v) for k, v in metrics.items())
    # logging.info("- {} metrics: ".format(mark) + metrics_str)
    #
    # if verbose:
    #     report = classification_report(true_tags, pred_tags)
    #     logging.info(report)

    final = f + ".P%.4f_R%.4f_F%.4f" %(p, r, f1)
    with open(final, 'w') as fout:
        result = open("temp", "r").read()
        fout.write(f"{result}\n")

        fout.write(f"precision={p}\n")
        fout.write(f"recall={r}\n")
        fout.write(f"f1={f1}\n")
        if verbose:
            report = classification_report(y_true, y_pred)
            print(report)

    os.remove("temp")

    print("precision=%.2f"%p)
    print("recall=%.2f"%r)
    print("f1=%.2f"%f1)
    return p, r, f1
Example #19
0
def evaluate(args, model, eval_dataloader, params):
    model.eval()
    # 记录平均损失
    loss_avg = utils.RunningAverage()
    # init
    pre_result = []
    gold_result = []

    # get data
    for batch in tqdm(eval_dataloader, unit='Batch'):
        # to device
        batch = tuple(t.to(params.device) for t in batch)
        input_ids, input_mask, segment_ids, start_pos, end_pos, ne_cate = batch

        with torch.no_grad():
            # get loss
            loss = model(input_ids,
                         token_type_ids=segment_ids,
                         attention_mask=input_mask,
                         start_positions=start_pos,
                         end_positions=end_pos)
            if params.n_gpu > 1 and args.multi_gpu:
                loss = loss.mean()  # mean() to average on multi-gpu.
            # update the average loss
            loss_avg.update(loss.item())

            # inference
            start_logits, end_logits = model(input_ids=input_ids,
                                             token_type_ids=segment_ids,
                                             attention_mask=input_mask)

        # gold label
        start_pos = start_pos.to("cpu").numpy().tolist()
        end_pos = end_pos.to("cpu").numpy().tolist()
        input_mask = input_mask.to('cpu').numpy().tolist()
        ne_cate = ne_cate.to("cpu").numpy().tolist()

        # predict label
        start_label = start_logits.detach().cpu().numpy().tolist()
        end_label = end_logits.detach().cpu().numpy().tolist()

        # idx to label
        cate_idx2label = {
            idx: value
            for idx, value in enumerate(params.label_list)
        }

        # get bio result
        for start_p, end_p, start_g, end_g, input_mask_s, ne_cate_s in zip(
                start_label, end_label, start_pos, end_pos, input_mask,
                ne_cate):
            ne_cate_str = cate_idx2label[ne_cate_s]
            # 问题长度
            q_len = len(IO2QUERY[ne_cate_str])
            # 有效长度
            act_len = sum(input_mask_s[q_len + 2:-1])
            # get BIO labels
            pre_bio_labels = pointer2bio(start_p[q_len + 2:q_len + 2 +
                                                 act_len],
                                         end_p[q_len + 2:q_len + 2 + act_len],
                                         ne_cate=ne_cate_str)
            gold_bio_labels = pointer2bio(start_g[q_len + 2:q_len + 2 +
                                                  act_len],
                                          end_g[q_len + 2:q_len + 2 + act_len],
                                          ne_cate=ne_cate_str)
            pre_result.append(pre_bio_labels)
            gold_result.append(gold_bio_labels)

    # metrics
    f1 = f1_score(y_true=gold_result, y_pred=pre_result)
    acc = accuracy_score(y_true=gold_result, y_pred=pre_result)

    # f1, acc
    metrics = {'loss': loss_avg(), 'f1': f1, 'acc': acc}
    metrics_str = "; ".join("{}: {:05.2f}".format(k, v)
                            for k, v in metrics.items())
    logging.info("- {} metrics: ".format('Val') + metrics_str)
    # f1 classification report
    report = classification_report(y_true=gold_result, y_pred=pre_result)
    logging.info(report)

    return metrics
Example #20
0
                            score, preds = model(sents, lens)
                            for i, l in enumerate(lens):
                                true_labels.append(
                                    seqid2text(labs[i, :l], ix_to_lab))
                                pred_labels.append(
                                    seqid2text(preds[i, :l], ix_to_lab))
                        f1 = f1_score(true_labels, pred_labels)
                        if (f1 > best_f1):
                            torch.save(model.state_dict(),
                                       "models/model-27-02-20")
                            best_f1 = f1

                        print("Accuracy: {:.4f}".format(
                            accuracy_score(true_labels, pred_labels)))
                        print("F1 score: {:.4f}".format(f1))
                        print(classification_report(true_labels, pred_labels))
                        model.train(True)
    if args.do_test:
        with torch.no_grad():
            print("Evaluation on test set")
            model.load_state_dict(
                torch.load("models/model-27-02-20", map_location=device))
            model.eval()
            true_labels = []
            pred_labels = []
            word_sents = []
            for batch in test_data_loader:
                sents, labs, lens = batch
                sents = pad_sequence(sents, batch_first=True).to(device)
                labs = pad_sequence(labs, batch_first=True).to(device)
                lens = torch.tensor(lens).to(device)
Example #21
0
    def test(dataset):
        # # load BERT and GAN
        # load_gan_model(D, G, config['gan_save_path'])
        # if args.fine_tune:
        #     load_model(E, path=config['bert_save_path'], model_name='bert')
        #
        test_dataloader = DataLoader(dataset,
                                     batch_size=args.predict_batch_size,
                                     shuffle=False,
                                     num_workers=2)
        n_sample = len(test_dataloader)
        result = dict()

        # Loss function
        detection_loss = torch.nn.CrossEntropyLoss().to(device)

        model.eval()

        all_detection_preds = []
        all_detection_logit = []
        total_loss = 0

        for sample in tqdm(test_dataloader):
            sample = (i.to(device) for i in sample)
            token, mask, type_ids, y = sample
            batch = len(token)

            # -------------------------evaluate D------------------------- #
            # BERT encode sentence to feature vector
            with torch.no_grad():
                logit = model(token, mask, type_ids)
                all_detection_logit.append(logit)
                all_detection_preds.append(torch.argmax(logit, 1))
                total_loss += detection_loss(logit, y.long())

        all_y = LongTensor(
            dataset.dataset[:, -1].astype(int)).cpu()  # [length, n_class]
        all_binary_y = (all_y != 0).long()  # [length, 1] label 0 is oos
        all_detection_preds = torch.cat(all_detection_preds,
                                        0).cpu()  # [length, 1]
        # all_detection_binary_preds = convert_to_int_by_threshold(all_detection_preds.squeeze())  # [length, 1]
        all_detection_logit = torch.cat(all_detection_logit, 0).cpu()

        # 计算损失
        result['detection_loss'] = total_loss

        logger.info(
            metrics.classification_report(all_binary_y,
                                          all_detection_preds,
                                          target_names=['oos', 'in']))

        # report
        oos_ind_precision, oos_ind_recall, oos_ind_fscore, _ = metrics.binary_recall_fscore(
            all_detection_preds, all_binary_y)
        detection_acc = metrics.accuracy(all_detection_preds, all_binary_y)

        # y_score = all_detection_preds.squeeze().tolist()
        y_score = all_detection_logit.softmax(1)[:, 1].tolist()
        eer = metrics.cal_eer(all_binary_y, y_score)

        test_logit = all_detection_logit.tolist()
        result['test_logit'] = test_logit

        result['eer'] = eer
        result['all_detection_preds'] = all_detection_preds
        result['detection_acc'] = detection_acc
        result['all_binary_y'] = all_binary_y
        result['oos_ind_precision'] = oos_ind_precision
        result['oos_ind_recall'] = oos_ind_recall
        result['oos_ind_f_score'] = oos_ind_fscore
        result['y_score'] = y_score
        result['auc'] = roc_auc_score(all_binary_y, y_score)

        return result
Example #22
0
def predict(features, stage, dataframe_path, ch_path, hmc_path):
    """Gets label predictions from the previously fit model

    Parameters
    ----------
    features : str
        The specified type of features - ["bert", "tfidf"]

    stage : str
        Predict stage; which data to use - ["dev", "test"]

    dataframe_path : str
        The path to the Pandas dataframe which contains the
        preprocessed data

    ch_path : str
        Location of the class hierarchy file

    hmc_path : str
        Location of the pretrained HierarchicalClassifier
    """
    df = pd.read_pickle(dataframe_path)
    topics = list(df)[5:]

    df, y = prep_df_for_train(df, None)
    print(y.shape)
    print(df.info())

    df.reset_index(inplace=True, drop=True)

    # load features
    if features == "bert":
        path = f"bert-multilingual/bert_{stage}.npy"
        f = np.load(path, allow_pickle=True)
        print(
            f"Total: {len(f)}, tokens: {len(f[0]['embeddings'][0])}, embeds: ",
            f"{len(f[0]['embeddings'][1])}x{len(f[0]['embeddings'][1][0])}",
        )
        embeddings_list = f

        # parse from pickle
        embeddings = []
        tokens_list = []
        y = []

        for entry in embeddings_list:
            _y = label_ids_to_labels(entry["label_ids"], topics)

            y.append(_y)
            tokens, vectors = entry["embeddings"]

            tokens_list.append(tokens)
            embeddings.append(vectors)
            assert len(vectors) == 4
            assert len(vectors[0]) == 768

        assert len(embeddings) == len(y)
        print(f"Number of examples: {len(embeddings)}")

        f.close()

        X = embeddings
        print(len(X), len(X[0]), len(y))

        X = np.asarray(X).reshape(len(X), 3072)
        print(X.shape)

    elif features == "tfidf":
        y = df["topics"]

        xtrain, xtest, ytrain, ytest = train_test_split(
            df["clean_text_tokenized"], y, test_size=0.2, random_state=42)
        xtrain, xdev, ytrain, ydev = train_test_split(xtrain,
                                                      ytrain,
                                                      test_size=0.25,
                                                      random_state=42)
        print(f"train shape, {xtrain.shape}")
        print(f"dev shape, {xdev.shape}")
        print(f"test shape, {xtest.shape}")

        # create TF-IDF features
        tfidf_vectorizer = TfidfVectorizer(max_df=0.8,
                                           max_features=10000,
                                           preprocessor=" ".join)
        xtrain = tfidf_vectorizer.fit_transform(xtrain)
        xdev = tfidf_vectorizer.transform(xdev)
        xtest = tfidf_vectorizer.transform(xtest)

        print(xdev.shape, xtest.shape)

        if stage == "dev":
            X = pd.DataFrame(xdev.todense())
            y = ydev
        elif stage == "test":
            X = pd.DataFrame(xtest.todense())
            y = ytest

        X.reset_index(inplace=True, drop=True)
        y.reset_index(inplace=True, drop=True)

    # load trained model
    clf = None
    if features == "tfidf":
        with open(hmc_path, "rb") as f:
            clf = dill.load(f)
    elif features == "bert":
        with open(hmc_path, "rb") as f:
            clf = dill.load(f)

    # load class hierarchy
    ch = None
    with open(ch_path, "rb") as f:
        ch = pickle.load(f)

    # get predictions
    ypred = clf.predict(X)

    metrics.classification_report(ch, y, pd.DataFrame(ypred))
    metrics.EXTEND_PRED = False
    print("=" * 100)
    metrics.classification_report(ch, y, pd.DataFrame(ypred))
    metrics.EXTEND_PRED = True

    ypred_topk = dth.predict_topk(Xdev, 5)
    metrics.classification_report_topk(ch, ydev, ypred_topk, 1)
    metrics.classification_report_topk(ch, ydev, ypred_topk, 3)
    metrics.classification_report_topk(ch, ydev, ypred_topk, 5)

best_weighted_f1 = 0.

for epoch in range(train_config.n_epochs):
    start = time.time()

    train_loss.reset_states()
    # train_accuracy.reset_states()
    train_confusion_matrix.reset_states()

    for (batch, (speaker, utterance, emotion)) in enumerate(train_dataset):
        train_step(speaker, utterance, emotion)

        if batch % 20 == 0:
            report = metrics.classification_report(train_confusion_matrix)
            print(
                'Epoch {} Batch {} Loss {:.4f} Micro-f1 {:.4f} Macro-f1 {:.4f}'
                ' Weighted-f1 {:.4f} Accuracy {:.4f}'.format(
                    epoch + 1, batch, train_loss.result(), report[1].numpy(),
                    report[2].numpy(), report[3].numpy(), report[4].numpy()))
            with np.printoptions(precision=4, suppress=True):
                print('Metrics of classes:\n', report[0].numpy())

    if (epoch + 1) % 5 == 0:
        ckpt_save_path = ckpt_manager.save()
        print('Saving checkpoint for epoch {} at {}'.format(
            epoch + 1, ckpt_save_path))

    report = metrics.classification_report(train_confusion_matrix)
    print('Epoch {} Loss {:.4f} Micro-f1 {:.4f} Macro-f1 {:.4f}'
    def test(dataset):
        # load BERT and GAN
        load_gan_model(D, G, config['gan_save_path'])
        if args.fine_tune:
            load_model(E, path=config['bert_save_path'], model_name='bert')

        test_dataloader = DataLoader(dataset, batch_size=args.predict_batch_size, shuffle=False, num_workers=2)
        n_sample = len(test_dataloader)
        result = dict()

        # Loss function
        detection_loss = torch.nn.BCELoss().to(device)
        classified_loss = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

        G.eval()
        D.eval()
        E.eval()

        all_detection_preds = []
        all_class_preds = []
        all_features = []

        for sample in tqdm.tqdm(test_dataloader):
            sample = (i.to(device) for i in sample)
            if args.dataset == 'smp':
                token, mask, type_ids, knowledge_tag, y = sample
            if args.dataset == 'oos-eval':
                token, mask, type_ids, y = sample
            batch = len(token)

            anchor_ood = torch.zeros(args.num_outcomes, dtype=torch.float).to(device) + torch.tensor(anchor0, dtype=torch.float).to(device)
            anchor_ind = torch.zeros(args.num_outcomes, dtype=torch.float).to(device) + torch.tensor(anchor1, dtype=torch.float).to(device)

            # -------------------------evaluate D------------------------- #
            # BERT encode sentence to feature vector

            with torch.no_grad():
                sequence_output, pooled_output = E(token, mask, type_ids)
                real_feature = pooled_output

                # 大于2表示除了训练判别器还要训练分类器
                if n_class > 2:
                    f_vector, discriminator_output, classification_output = D(real_feature, return_feature=True)
                    all_class_preds.append(classification_output)

                # 只预测判别器
                else:
                    f_vector, discriminator_output = D.detect_only(real_feature, return_feature=True)

                discriminator_output = discriminator_output.log_softmax(1).exp()

                if args.do_vis:
                    all_features.append(f_vector)

                divergence_to_preidction = []

                # logger.info('discriminator_output: {}'.format(discriminator_output))

                for output in discriminator_output:
                    d_ood = triplet_loss(anchor_ood, output, skewness=args.positive_skew)
                    d_ind = triplet_loss(anchor_ind, output, skewness=args.negative_skew)
                    # logger.info('d_ood : d_ind = {} : {}'.format(d_ood, d_ind))
                    # divergence_to_preidction.append(1 if d_ind < d_ood else 0)
                    divergence_to_preidction.append(d_ood / (d_ind + d_ood))
                all_detection_preds.extend(divergence_to_preidction)

        all_y = LongTensor(dataset.dataset[:, -1].astype(int)).cpu()  # [length, n_class]
        all_binary_y = (all_y != 0).long()  # [length, 1] label 0 is oos

        # 用 realness_D 做 ood 判别
        # all_detection_preds = torch.cat(all_detection_preds, 0).cpu()  # [length, 1]
        # all_detection_binary_preds = convert_to_int_by_threshold(all_detection_preds.squeeze())  # [length, 1]
        all_detection_preds = FloatTensor(all_detection_preds).cpu()
        # all_detection_binary_preds = all_detection_preds.squeeze()  # [length, 1]
        all_detection_binary_preds = convert_to_int_by_threshold(all_detection_preds.squeeze())  # [length, 1]

        # logger.info('all_detection_preds: {}'.format(all_detection_preds))
        # logger.info('all_binary_y: {}'.format(all_binary_y))

        # 计算损失
        detection_loss = detection_loss(all_detection_preds, all_binary_y.float())
        result['detection_loss'] = detection_loss

        if n_class > 2:
            class_one_hot_preds = torch.cat(all_class_preds, 0).detach().cpu()  # one hot label
            class_loss = classified_loss(class_one_hot_preds, all_y)  # compute loss
            all_class_preds = torch.argmax(class_one_hot_preds, 1)  # label
            class_acc = metrics.ind_class_accuracy(all_class_preds, all_y, oos_index=0)  # accuracy for ind class
            logger.info(metrics.classification_report(all_y, all_class_preds, target_names=processor.id_to_label))

        # report
        oos_ind_precision, oos_ind_recall, oos_ind_fscore, _ = metrics.binary_recall_fscore(all_detection_binary_preds,all_binary_y)
        detection_acc = metrics.accuracy(all_detection_binary_preds, all_binary_y)

        y_score = all_detection_preds.squeeze().tolist()
        eer = metrics.cal_eer(all_binary_y, y_score)

        result['eer'] = eer
        result['all_detection_binary_preds'] = all_detection_binary_preds
        result['detection_acc'] = detection_acc
        result['all_binary_y'] = all_binary_y
        result['all_y'] = all_y
        result['oos_ind_precision'] = oos_ind_precision
        result['oos_ind_recall'] = oos_ind_recall
        result['oos_ind_f_score'] = oos_ind_fscore
        result['score'] = y_score
        result['y_score'] = y_score
        result['auc'] = roc_auc_score(all_binary_y, y_score)
        result['fpr95'] = ErrorRateAt95Recall(all_binary_y, y_score)
        if n_class > 2:
            result['class_loss'] = class_loss
            result['class_acc'] = class_acc
        if args.do_vis:
            all_features = torch.cat(all_features, 0).cpu().numpy()
            result['all_features'] = all_features

        freeze_data['test_all_y'] = all_y.tolist()
        freeze_data['test_all_pred'] = all_detection_binary_preds.tolist()
        freeze_data['test_score'] = y_score

        return result
Example #25
0
def evaluate(dataloader,
             model,
             word_vocab,
             label_vocab,
             output_path,
             prefix,
             use_gpu=False):
    model.eval()
    prediction = []
    trues_list = []
    preds_list = []
    for batch in dataloader:
        batch_text, seq_length, word_perm_idx = batch['text']
        batch_label, _, _ = batch['label']
        char_inputs = batch['char']
        char_inputs = char_inputs[word_perm_idx]
        char_dim = char_inputs.size(-1)
        char_inputs = char_inputs.contiguous().view(-1, char_dim)
        if use_gpu:
            batch_text = batch_text.cuda()
            batch_label = batch_label.cuda()
            char_inputs = char_inputs.cuda()
        mask = get_mask(batch_text)
        with torch.no_grad():
            tag_seq = model(batch_text, seq_length, char_inputs, batch_label,
                            mask)

        for line_tesor, labels_tensor, predicts_tensor in zip(
                batch_text, batch_label, tag_seq):
            for word_tensor, label_tensor, predict_tensor in zip(
                    line_tesor, labels_tensor, predicts_tensor):
                if word_tensor.item() == 0:
                    break
                line = [
                    word_vocab.id_to_word(word_tensor.item()),
                    label_vocab.id_to_label(label_tensor.item()),
                    label_vocab.id_to_label(predict_tensor.item())
                ]
                trues_list.append(line[1])
                preds_list.append(line[2])
                prediction.append(' '.join(line))
            prediction.append('')

    true_entities = get_entities_bio(trues_list)
    pred_entities = get_entities_bio(preds_list)
    print(len(trues_list), len(preds_list), len(prediction))

    results = {
        "f1": f1_score(true_entities, pred_entities),
        'report': classification_report(true_entities, pred_entities)
    }

    with open(os.path.join(output_path, '%s_pred.txt' % prefix),
              'w',
              encoding='utf-8') as f:
        f.write('\n'.join(prediction))

    with open(os.path.join(output_path, '%s_score.txt' % prefix),
              "a") as writer:
        writer.write("***** Eval results {} *****\n".format(prefix))
        for key in sorted(results.keys()):
            if key == 'report_dict':
                continue
            writer.write("{} = {}\n".format(key, str(results[key])))

    return results["f1"]
import pandas as pd
import sklearn.model_selection import train_test_split
import sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
import sklearn import metrics
import joblib

## load dataset.csv
dataframe = pd.read_csv("csv/dataset.csv")
print(dataframe.head())

#Split into training and test data
 data_x = dataframe.drop(["Label"], axis=1)
 data_y = dataframe["Label"]
 trained_x, test_x, trained_y, test_y = train_test_split(data_x, data_y, test_size=0.2, random_state-4)

 ## Build the model
 model = RandomForestClassifier(num_estimators=100, max_depth=5)
 model.fit(trained_x,trained_y)
 joblib.dump(model, "rf_malaria_100_5")

 # Create and build predictions and get classification report on the trained data
 predictions = model.predict(test_x)
 print(metrics.classification_report(predictions, test_y ))
 
Example #27
0
def evaluate(args, model, eval_dataloader, params):
    model.eval()
    # 记录平均损失
    loss_avg = utils.RunningAverage()
    # init
    pre_result = []
    gold_result = []

    # get data
    for batch in tqdm(eval_dataloader, unit='Batch', ascii=True):
        # fetch the next training batch
        batch = tuple(
            t.to(params.device) if isinstance(t, torch.Tensor) else t
            for t in batch)
        input_ids, input_mask, tags, cls_labels, random_cls_ids, \
        random_start_posis, random_end_posis, _, _ = batch

        with torch.no_grad():
            # get loss
            loss = model(input_ids,
                         attention_mask=input_mask,
                         cls_labels=cls_labels,
                         cls_ids=random_cls_ids,
                         start_positions=random_start_posis,
                         end_positions=random_end_posis)
            if params.n_gpu > 1 and args.multi_gpu:
                loss = loss.mean()  # mean() to average on multi-gpu.
            # update the average loss
            loss_avg.update(loss.item())

            # inference
            cls_pre, start_pre, end_pre = model(input_ids=input_ids,
                                                attention_mask=input_mask)

        # gold label
        tags = tags[0]

        # predict label
        start_pre = start_pre.detach().cpu().numpy().tolist()
        end_pre = end_pre.detach().cpu().numpy().tolist()

        # idx to label
        cate_idx2label = {
            idx: str(idx + 1)
            for idx, _ in enumerate(params.label_list)
        }

        # get bio result
        # 有效长度
        act_len = sum(input_mask[0])
        # 合并一个样本的结果(用于metrics)
        old_bio_labels = ['O'] * act_len
        for start_p, end_p, cls_p in zip(start_pre, end_pre, cls_pre):
            pre_bio_labels = pointer2bio(start_p[:act_len],
                                         end_p[:act_len],
                                         ne_cate=cate_idx2label[cls_p])
            old_bio_labels = [
                new if old == 'O' else old
                for old, new in zip(old_bio_labels, pre_bio_labels)
            ]

        pre_result.append(old_bio_labels)
        gold_result.append(tags[:act_len])

    # metrics
    f1 = f1_score(y_true=gold_result, y_pred=pre_result)
    acc = accuracy_score(y_true=gold_result, y_pred=pre_result)

    # f1, acc
    metrics = {'loss': loss_avg(), 'f1': f1, 'acc': acc}
    metrics_str = "; ".join("{}: {:05.2f}".format(k, v)
                            for k, v in metrics.items())
    logging.info("- {} metrics: ".format('Val') + metrics_str)
    # f1 classification report
    report = classification_report(y_true=gold_result, y_pred=pre_result)
    logging.info(report)

    return metrics
    def test(dataset):
        # load BERT and GAN
        load_gan_model(D, G, config['gan_save_path'])
        if args.fine_tune:
            load_model(E, path=config['bert_save_path'], model_name='bert')

        test_dataloader = DataLoader(dataset,
                                     batch_size=args.predict_batch_size,
                                     shuffle=False,
                                     num_workers=2)
        n_sample = len(test_dataloader)
        result = dict()

        # Loss function
        detection_loss = torch.nn.BCELoss().to(device)
        detection_loss_v2 = torch.nn.CrossEntropyLoss().to(device)
        classified_loss = torch.nn.CrossEntropyLoss(ignore_index=0).to(device)

        G.eval()
        D.eval()
        E.eval()
        detector.eval()

        all_detection_preds = []
        all_class_preds = []
        all_features = []
        all_logit = []

        for sample in tqdm.tqdm(test_dataloader):
            sample = (i.to(device) for i in sample)
            token, mask, type_ids, y = sample
            batch = len(token)

            # -------------------------evaluate D------------------------- #
            # BERT encode sentence to feature vector

            with torch.no_grad():
                sequence_output, pooled_output = E(token, mask, type_ids)
                real_feature = pooled_output

                # 大于2表示除了训练判别器还要训练分类器
                if n_class > 2:
                    # f_vector, discriminator_output, classification_output = D(real_feature, return_feature=True)
                    # all_detection_preds.append(discriminator_output)
                    # all_class_preds.append(classification_output)
                    pass

                else:
                    if args.loss == 'v1':
                        detector_out = detector(real_feature)
                        all_detection_preds.append(detector_out)
                    else:
                        detector_out = detector(real_feature)
                        all_logit.append(detector_out)
                        all_detection_preds.append(
                            torch.argmax(detector_out, 1))
                # if args.do_vis:
                #     all_features.append(f_vector)

        all_y = LongTensor(
            dataset.dataset[:, -1].astype(int)).cpu()  # [length, n_class]
        all_binary_y = (all_y != 0).long()  # [length, 1] label 0 is oos
        all_detection_preds = torch.cat(all_detection_preds,
                                        0).cpu()  # [length, 1]
        if args.loss == 'v1':
            all_detection_binary_preds = convert_to_int_by_threshold(
                all_detection_preds.squeeze())  # [length, 1]
        else:
            all_detection_binary_preds = all_detection_preds
            all_logit = torch.cat(all_logit, 0).cpu()

        # 计算损失
        if args.loss == 'v1':
            loss = detection_loss(all_detection_preds, all_binary_y.float())
        else:
            loss = detection_loss_v2(all_logit, all_y.long())
        result['detection_loss'] = loss

        if n_class > 2:
            class_one_hot_preds = torch.cat(all_class_preds,
                                            0).detach().cpu()  # one hot label
            class_loss = classified_loss(class_one_hot_preds,
                                         all_y)  # compute loss
            all_class_preds = torch.argmax(class_one_hot_preds, 1)  # label
            class_acc = metrics.ind_class_accuracy(
                all_class_preds, all_y, oos_index=0)  # accuracy for ind class
            logger.info(
                metrics.classification_report(
                    all_y, all_class_preds,
                    target_names=processor.id_to_label))

        logger.info(
            metrics.classification_report(all_binary_y,
                                          all_detection_binary_preds,
                                          target_names=['oos', 'in']))

        # report
        oos_ind_precision, oos_ind_recall, oos_ind_fscore, _ = metrics.binary_recall_fscore(
            all_detection_binary_preds, all_binary_y)
        detection_acc = metrics.accuracy(all_detection_binary_preds,
                                         all_binary_y)

        y_score = all_detection_preds.squeeze().tolist()
        eer = metrics.cal_eer(all_binary_y, y_score)

        result['eer'] = eer
        result['all_detection_binary_preds'] = all_detection_binary_preds
        result['detection_acc'] = detection_acc
        result['all_binary_y'] = all_binary_y
        result['all_y'] = all_y
        result['oos_ind_precision'] = oos_ind_precision
        result['oos_ind_recall'] = oos_ind_recall
        result['oos_ind_f_score'] = oos_ind_fscore
        result['score'] = y_score
        result['y_score'] = y_score
        result['auc'] = roc_auc_score(all_binary_y, y_score)
        if n_class > 2:
            result['class_loss'] = class_loss
            result['class_acc'] = class_acc
        if args.do_vis:
            all_features = torch.cat(all_features, 0).cpu().numpy()
            result['all_features'] = all_features

        freeze_data['test_all_y'] = all_y.tolist()
        freeze_data['test_all_pred'] = all_detection_binary_preds.tolist()
        freeze_data['test_score'] = y_score

        return result
Example #29
0
def evaluate(args, model, eval_dataloader, params):
    model.eval()
    # 记录平均损失
    loss_avg = utils.RunningAverage()
    # init
    pre_result = []
    gold_result = []

    # get data
    for batch in tqdm(eval_dataloader, unit='Batch', ascii=True):
        # fetch the next training batch
        batch = tuple(t.to(params.device) for t in batch)
        input_ids, input_mask, start_pos, end_pos, _, _ = batch

        with torch.no_grad():
            # get loss
            loss = model(input_ids, attention_mask=input_mask,
                         start_positions=start_pos, end_positions=end_pos)
            if params.n_gpu > 1 and args.multi_gpu:
                loss = loss.mean()  # mean() to average on multi-gpu.
            # update the average loss
            loss_avg.update(loss.item())

            # inference
            start_pre, end_pre = model(input_ids=input_ids, attention_mask=input_mask)

        # gold label
        start_pos = start_pos.to("cpu").numpy().transpose((0, 2, 1)).tolist()  # (batch_size, tag_size, seq_len)
        end_pos = end_pos.to("cpu").numpy().transpose((0, 2, 1)).tolist()
        input_mask = input_mask.to('cpu').numpy().tolist()

        # predict label
        start_label = start_pre.detach().cpu().numpy().transpose((0, 2, 1)).tolist()
        end_label = end_pre.detach().cpu().numpy().transpose((0, 2, 1)).tolist()

        # idx to label
        cate_idx2label = {idx: str(idx + 1) for idx, _ in enumerate(params.label_list)}

        # get bio result
        for start_p_s, end_p_s, start_g_s, end_g_s, input_mask_s in zip(start_label, end_label,
                                                                        start_pos, end_pos, input_mask):
            # 有效长度
            act_len = sum(input_mask_s)
            for idx, (start_p, end_p, start_g, end_g) in enumerate(zip(start_p_s,
                                                                       end_p_s, start_g_s, end_g_s)):
                pre_bio_labels = pointer2bio(start_p[:act_len], end_p[:act_len],
                                             ne_cate=cate_idx2label[idx])
                gold_bio_labels = pointer2bio(start_g[:act_len], end_g[:act_len],
                                              ne_cate=cate_idx2label[idx])
                pre_result.append(pre_bio_labels)
                gold_result.append(gold_bio_labels)

    # metrics
    f1 = f1_score(y_true=gold_result, y_pred=pre_result)
    acc = accuracy_score(y_true=gold_result, y_pred=pre_result)

    # f1, acc
    metrics = {'loss': loss_avg(), 'f1': f1, 'acc': acc}
    metrics_str = "; ".join("{}: {:05.2f}".format(k, v) for k, v in metrics.items())
    logging.info("- {} metrics: ".format('Val') + metrics_str)
    # f1 classification report
    report = classification_report(y_true=gold_result, y_pred=pre_result)
    logging.info(report)

    return metrics