def for_each_fold(fold, folds, data, labels, model, error_function):
    
    (x_train, y_train), (x_test, y_test) = partition_data(data, labels, fold, folds)
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
        
    # Based on the error_function passed
    if error_function is None: # if None calculate precision
        error = precision_score(y_test, y_pred)
            
    elif error_function == 'precision':
        error = precision_score(y_test, y_pred)
        
    if error_function == 'accuracy':
        error = accuracy_score(y_test, y_pred)
            
    elif error_function == 'recall':
        error = recall_score(y_test, y_pred)
            
    elif error_function == 'f1':
        error = f1_score(y_test, y_pred)
    else:
        raise ValueError('%s error function is not defined.' % error_function)
        
    return {'expected labels': y_test, 
            'predicted labels': y_pred, 
            'errors': [error]}
Ejemplo n.º 2
0
def test_accuracy_score_simple():
    y_true_np = np.array([1, 0, 1, 0])
    y_pred_np = np.array([1, 1, 0, 0])
    y_true = torch.Tensor(y_true_np)
    y_pred = torch.Tensor(y_pred_np)
    acc = accuracy_score(y_pred, y_true)
    assert acc == 0.5
Ejemplo n.º 3
0
def test(classifier,x_test,y_test):
    prediction = classifier.predict(x_test)
    print("Confusion Matrix for Decision tree Classofier Model given : ")
    print(confusion_matrix(y_test,prediction))
    print("Classification Report for given Decision tree Classisifer : ")
    print(classification_report(y_test,prediction))
    print("Accuracy Score  : ",accuracy_score(y_test,prediction))
    return
Ejemplo n.º 4
0
Archivo: main.py Proyecto: crab-a/lab4
def run_1nn(points):
    m = KNN(1)
    m.train(points)
    predicted = m.predict(points)
    real = []
    for point in points:
        real.append(point.label)
    print(accuracy_score(real, predicted))
Ejemplo n.º 5
0
Archivo: hmc.py Proyecto: zbxzc35/hmc
 def score(self, X, y):
     """
     Returns the mean accuracy on the given test data (X, y).
     """
     # Check that the trees have been fit
     self._check_fit()
     y_pred = pd.DataFrame(self.predict(X), columns=['y_hat'], index=y.index)
     return metrics.accuracy_score(self.class_hierarchy, y, y_pred)
Ejemplo n.º 6
0
def get_best_model(X, y, clf, kf, clf_name, fitur, filename, show=False):
    """
	fungsi untuk mendapatkan model terbaik dari hasil k-fold

	return best_model: model terbaik, dengan tolak ukur gmean
	
	parameter:
	X = data per jenis fitur
	y = label dari data
	clf = object classifier
	kf = object K-Fold
	show = boolean untuk mencetak proses pencarian model terbaik
	"""

    performance_total = 0
    best_fold_performance = -100
    best_fold_index = -1
    best_model = None

    if show: print('\t\t', end='')

    performances = []
    performances.append(clf_name.upper() + '-' + fitur.upper())

    for index, (train_index, test_index) in enumerate(kf):
        X_train_fold, X_test_fold = X[train_index], X[test_index]
        y_train_fold, y_test_fold = y[train_index], y[test_index]
        clf_now = clf.fit(X_train_fold, y_train_fold)
        pred = clf.predict(X_test_fold)
        acc = round(accuracy_score(y_test_fold, pred) * 100, 2)
        gmean_score = round(gmean(y_test_fold, pred) * 100, 2)
        selected_metric_score = gmean_score

        if selected_metric_score > best_fold_performance:
            best_fold_performance = selected_metric_score
            best_fold_index = index
            best_model = clf_now

        if show: print(selected_metric_score, end=' ')

        performance_total += selected_metric_score

        performances.append(selected_metric_score)

    performance_avg = round(performance_total / 10, 2)
    performances.append(performance_avg)

    if show:
        print(
            '\n\t\tbest index: {}, best performance: {}, performance avg: {}\n'
            .format(best_fold_index + 1, best_fold_performance,
                    performance_avg))

    with open(filename, 'a', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(performances)

    return best_model
Ejemplo n.º 7
0
def evaluate(model, data_iterator, params, mark='Eval', verbose=True):
    """Evaluate the model on `steps` batches."""
    # set model to evaluation mode
    model.eval()

    # id2tag dict
    idx2tag = {idx: tag for idx, tag in enumerate(params.tags)}

    true_tags = []
    pred_tags = []

    # a running average object for loss
    loss_avg = utils.RunningAverage()
    for input_ids, input_mask, labels in data_iterator:
        # to device
        input_ids = input_ids.to(params.device)
        input_mask = input_mask.to(params.device)
        labels = labels.to(params.device)

        batch_size, max_len = labels.size()

        # get loss
        loss = model(input_ids, attention_mask=input_mask.bool(), labels=labels)
        loss /= batch_size
        # update the average loss
        loss_avg.update(loss.item())

        # inference
        with torch.no_grad():
            batch_output = model(input_ids, attention_mask=input_mask.bool())

        # 恢复标签真实长度
        real_batch_tags = []
        for i in range(batch_size):
            real_len = int(input_mask[i].sum())
            real_batch_tags.append(labels[i][:real_len].to('cpu').numpy())

        # List[int]
        pred_tags.extend([idx2tag.get(idx) for indices in batch_output for idx in indices])
        true_tags.extend([idx2tag.get(idx) for indices in real_batch_tags for idx in indices])
    # sanity check
    assert len(pred_tags) == len(true_tags), 'len(pred_tags) is not equal to len(true_tags)!'

    # logging loss, f1 and report
    metrics = {}
    f1 = f1_score(true_tags, pred_tags)
    accuracy = accuracy_score(true_tags, pred_tags)
    metrics['loss'] = loss_avg()
    metrics['f1'] = f1
    metrics['accuracy'] = accuracy
    metrics_str = "; ".join("{}: {:05.2f}".format(k, v) for k, v in metrics.items())
    logging.info("- {} metrics: ".format(mark) + metrics_str)

    # f1 classification report
    if verbose:
        report = classification_report(true_tags, pred_tags)
        logging.info(report)
    return metrics
Ejemplo n.º 8
0
def run_1nn(points):
    m = KNN(1)
    m.train(points)
    predicts = []
    real = []
    for point in points:
        predicts.append(m.predict(point)[0])
        real.append(point.label)
    print(accuracy_score(real, predicts))
Ejemplo n.º 9
0
def ques_one(points):
    k = KNN(1)
    k.train(points)
    real = [0] * len(points)
    predicted = [0] * len(points)
    for i in range(len(points)):
        real[i] = points[i].label
        predicted[i] = k.predict(points[i])[0]
    print("question 1 answer: ", accuracy_score(real, predicted))
Ejemplo n.º 10
0
def evaluate(model, data_iterator, params, mark='Eval', verbose=False):
    """Evaluate the model on `steps` batches."""
    # set model to evaluation mode
    model.eval()

    idx2tag = params.idx2tag

    true_tags = []
    pred_tags = []

    # a running average object for loss
    loss_avg = utils.RunningAverage()

    for _ in range(params.eval_steps):
        # fetch the next evaluation batch
        batch_data, batch_token_starts, batch_tags = next(data_iterator)
        batch_masks = batch_data.gt(0)

        loss = model((batch_data, batch_token_starts),
                     token_type_ids=None,
                     attention_mask=batch_masks,
                     labels=batch_tags)[0]
        loss_avg.update(loss.item())

        batch_output = model((batch_data, batch_token_starts),
                             token_type_ids=None,
                             attention_mask=batch_masks)[
                                 0]  # shape: (batch_size, max_len, num_labels)

        batch_output = batch_output.detach().cpu().numpy()
        batch_tags = batch_tags.to('cpu').numpy()

        pred_tags.extend([[idx2tag.get(idx) for idx in indices]
                          for indices in np.argmax(batch_output, axis=2)])
        true_tags.extend(
            [[idx2tag.get(idx) if idx != -1 else 'O' for idx in indices]
             for indices in batch_tags])

    assert len(pred_tags) == len(true_tags)

    # logging loss, f1 and report
    metrics = {}
    f1 = f1_score(true_tags, pred_tags)
    metrics['loss'] = loss_avg()
    metrics['f1'] = f1
    metrics_str = "; ".join("{}: {:05.2f}".format(k, v)
                            for k, v in metrics.items())
    logging.info("- {} metrics: ".format(mark) + metrics_str)

    if verbose:
        report_acc = accuracy_score(true_tags, pred_tags)
        report = classification_report(true_tags, pred_tags)
        logging.info(report_acc)
        logging.info(report)
    return metrics
Ejemplo n.º 11
0
def run_knn_kpoints(points, k):
    m = KNN(k)
    sum_ = 0
    for _ in range(len(points)):
        point = points[0]
        points.remove(point)
        m.train(points)
        cl = m.predict(point)
        sum_ += accuracy_score(point.label, cl)
        points.append(point)
    return sum_/len(points)
Ejemplo n.º 12
0
def run_knn1(points):
    """
    a function for question 1
    :param points: list of Point
    """
    m = KNN(1)
    m.train(points)
    predicted = m.predict(points)
    true_labels = []
    for point in points:
        true_labels.append(point.label)
    print("accuracy_score for k=1:", accuracy_score(true_labels, predicted))
def cross_validation_whole():
    X_train = []
    X_test = []
    y_train = []
    y_test = []

    indexes = [[0, 36], [37, 73], [74, 110], [111, 146],
               [147, 182], [183, 216], [217, 253], [254, 290], [291, 328],
               [329, 365]]  #indexes of specified groups

    res_accuracy = []
    res_precision = []

    for i in range(0, 10):
        X_test = data[np.arange(indexes[i][0], indexes[i][1] + 1), :48]
        X_test = X_test[:, [0, 1, 38, 39, 40, 41, 42, 43]]  #F1+F12
        X_test = X_test.astype(np.float64)
        X_train = np.delete(data,
                            np.arange(indexes[i][0], indexes[i][1] + 1),
                            axis=0)[:, :48]
        X_train = X_train[:, [0, 1, 38, 39, 40, 41, 42, 43]]  #F1+F12
        X_train = X_train.astype(np.float64)
        y_test = data[np.arange(indexes[i][0], indexes[i][1] + 1), 48]
        y_train = np.delete(data,
                            np.arange(indexes[i][0], indexes[i][1] + 1),
                            axis=0)[:, 48]
        #clf = svm.SVC(kernel='rbf', probability=0, C=1).fit(X_train, y_train)
        #clf = LogisticRegression(solver='liblinear', C=10).fit(X_train, y_train)
        clf = RandomForestClassifier(criterion='gini',
                                     n_estimators=10,
                                     min_samples_leaf=1).fit(X_train, y_train)
        #clf = DecisionTreeClassifier(criterion='entropy', max_features='auto', min_samples_leaf=2).fit(X_train, y_train)
        y_pred = clf.predict(X_test)
        res_accuracy.append(
            metrics.accuracy_score(y_test, y_pred, normalize=True))
        res_precision.append(
            metrics.precision_score(y_test, y_pred, pos_label="1"))
    res_accuracy = np.array(res_accuracy)
    res_precision = np.array(res_precision)
    print(res_accuracy.min())
    print(statistics.median(res_accuracy))
    print(res_accuracy.max())
    print(res_precision.min())
    print(statistics.median(res_precision))
    print(res_precision.max())
    print("max acc: " + str(res_accuracy.max()) + " min acc: " +
          str(res_accuracy.min()) + " mediana acc: " +
          str(statistics.median(res_accuracy)))
    print("max prec: " + str(res_precision.max()) + " min prec: " +
          str(res_precision.min()) + " mediana prec: " +
          str(statistics.median(res_precision)))
Ejemplo n.º 14
0
    def evaluate(self, darray, thr):

        batch_index = 0
        X_batch, P_batch, y_batch = self.get_batch(darray, self.batch_size,
                                                   batch_index)
        y_pred = None
        y_label = None
        while len(X_batch) > 0:
            num_batch = len(y_batch)
            feed_dict = {
                self.vocab_index:
                X_batch,
                self.props:
                P_batch,
                self.label:
                y_batch,
                self.first_level_lstm_dropout_p:
                [1.0] * len(self.first_level_lstm_dropout),
                self.deep_dropout_p: [1.0] * len(self.deep_dropout),
                self.conv_pool_dropout_p: [1.0] * len(self.conv_pool_dropout),
                self.second_level_lstm_dropout_p:
                [1.0] * len(self.second_level_lstm_dropout),
                self.train_phase:
                False
            }
            batch_out = self.sess.run(self.out, feed_dict=feed_dict)

            if batch_index == 0:
                y_pred = np.reshape(batch_out, (num_batch, ))
                y_label = np.reshape(y_batch, (num_batch, ))
            else:
                y_pred = np.concatenate(
                    (y_pred, np.reshape(batch_out, (num_batch, ))))
                y_label = np.concatenate(
                    (y_label, np.reshape(y_batch, (num_batch, ))))

            batch_index += 1
            X_batch, P_batch, y_batch = self.get_batch(darray, self.batch_size,
                                                       batch_index)

        pred = [1 if y_pred[i] > thr else 0 for i in range(len(y_pred))]
        accuracy = metrics.accuracy_score(y_label, pred)
        precision = metrics.precision_score(y_label, pred)
        recall = metrics.recall_score(y_label, pred)
        f1 = metrics.f1_score(y_label, pred)

        return accuracy, precision, recall, f1
Ejemplo n.º 15
0
    def on_batch_close(self, loss: torch.Tensor, np_probs: torch.Tensor,
                       targets: torch.Tensor):
        # np_probs N*2*H*W      targets: N*H*W
        # targets = torch.zeros(size=np_probs.shape).scatter_(dim=1, index=targets.unsqueeze(dim=1).long(), value=1)
        np_preds = torch.argmax(np_probs, dim=1).squeeze()
        assert np_preds.shape == targets.shape
        self.batch_num += 1
        if not torch.isnan(loss):
            self.metrics['loss'] += float(loss)

        dice: torch.Tensor = metrics.dice_score(np_preds, targets)
        if not torch.isnan(dice):
            self.metrics['dice'] += float(dice)

        # iou: torch.Tensor = metrics.iou_score(np_preds, targets)
        # if not torch.isnan(iou):
        #     self.metrics['iou'] += float(iou)

        acc: torch.Tensor = metrics.accuracy_score(np_preds, targets)
        if not torch.isnan(acc):
            self.metrics['acc'] += float(acc)
Ejemplo n.º 16
0
    def step(self):
        """ Epochs step, training and validation.
            Return:
                training_loss, validation_loss, accuracy, precision, recall
        """
        # Training loop
        batch_loss, batch_val_loss, batch_accuracy, batch_precision, batch_recall = [], [], [], [], []

        for x_batch, y_batch in self.train_loader:
            x_batch = x_batch.to(self.device)
            y_batch = y_batch.to(self.device)

            loss = self.train_step(x_batch, y_batch)
            batch_loss.append(loss)

        with torch.no_grad():
            # Validation loop
            for i, (x_val, y_val) in enumerate(self.val_loader):
                x_val = x_val.to(self.device)
                y_val = y_val.to(self.device)

                self.model.eval()
                yhat = self.model(x_val)
                val_loss = self.criterion(yhat, y_val)
                batch_val_loss.append(val_loss)

                batch_accuracy.append(accuracy_score(np.argmax(yhat.cpu().detach().numpy(), axis=1), np.argmax(y_val.cpu().detach().numpy(), axis=1)))
                batch_precision.append(precision_score(np.argmax(yhat.cpu().detach().numpy(), axis=1), np.argmax(y_val.cpu().detach().numpy(), axis=1)))
                batch_recall.append(recall_score(np.argmax(yhat.cpu().detach().numpy(), axis=1), np.argmax(y_val.cpu().detach().numpy(), axis=1)))

        # step lr scheduler using val_loss
        if self.scheduler is not None:
            self.scheduler.step(val_loss)

        return [    torch.mean(torch.Tensor(batch_loss)),
                    torch.mean(torch.Tensor(batch_val_loss)),
                    torch.mean(torch.Tensor(batch_accuracy)),
                    torch.mean(torch.Tensor(batch_precision)),
                    torch.mean(torch.Tensor(batch_recall))
                ]
Ejemplo n.º 17
0
def generate_classification_perf(truths, pred_probs, multiclass=False):
    """Given truths, and predicted probabilities, generate ModelPerf object"""
    pred_classes = np.round(pred_probs).astype(int)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        retval = ClassificationModelPerf(
            auroc=metrics.roc_auc_score(truths, pred_probs),
            auroc_curve=metrics.roc_curve(truths, pred_probs)
            if not multiclass else None,
            auprc=metrics.average_precision_score(truths, pred_probs),
            accuracy=metrics.accuracy_score(truths, pred_classes)
            if not multiclass else None,
            recall=metrics.recall_score(truths, pred_classes)
            if not multiclass else None,
            precision=metrics.precision_score(truths, pred_classes)
            if not multiclass else None,
            f1=metrics.f1_score(truths, pred_classes)
            if not multiclass else None,
            ce_loss=metrics.log_loss(truths, pred_probs, normalize=False) /
            np.prod(truths.shape),
        )
    return retval
Ejemplo n.º 18
0
from bayes import NaiveBayes
from util import FileOperate
from util import train_test_split
from metrics import accuracy_score

# 运行这部分代码的时候,要将 playML 这个文件夹设置为源代码的根文件夹

if __name__ == '__main__':
    # 1、加载数据,spam 表示垃圾短信(1),ham 表示非垃圾短信(0)
    data_path = '../input/SMSSpamCollection'
    label = '\t'
    fo = FileOperate(data_path, label)
    X, y = fo.load_data()

    # 2、分割数据集,得到训练数据集与测试数据集
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=.25,
                                                        random_state=666)

    # 开始训练
    nb = NaiveBayes()
    nb.fit(X_train, y_train)

    # 开始预测
    y_pred = nb.predict(X_test)

    # 计算得分
    score = accuracy_score(y_test, y_pred)
    print('准确率:', score)
In [31]:
# 3.3 采用测试集验证模型离线指标  
# 训练集AUC
probs_train= lr_model.predict_proba(X_train)  
AUC1 = metrics.roc_auc_score(Y_train, probs_train[:,1])
print("Train Auc: %s"%(AUC1))

# 测试集AUC
probs_test= lr_model.predict_proba(X_test)  
predict_test = lr_model.predict(X_test)
AUC2 = metrics.roc_auc_score(Y_test, probs_test[:,1])
print("Test Auc: %s"%(AUC2))

# 准确率
accuracy = metrics.accuracy_score(Y_test, predict_test) 
print("Test Accuracy: %s"%(accuracy))

# 召回率
recall = metrics.recall_score(Y_test, predict_test) 
print("Test Recall: %s"%(recall))

# F1值
f1 = metrics.f1_score(Y_test, predict_test) 
print("Test F1: %s"%(f1))

In [42]:
# 3.4 打印模型参数
w=lr_model.coef_
print("参数大小:")
print(w.shape)
Ejemplo n.º 20
0
 def score(self, X_test, y_test):
     '''计算对测试集的预测准确度'''
     y_predict = self.predict(X_test)
     return accuracy_score(y_test, y_predict)
Ejemplo n.º 21
0
 def score(self, X_test, y_test):
     y_predict = self.predict(X_test)
     return accuracy_score(y_test, y_predict)
Ejemplo n.º 22
0
 lens, idx = torch.sort(lens, descending=True)
 sents = sents[idx]
 labs = labs[idx]
 loss = model.neg_log_likelihood(sents, labs, lens)
 loss.backward()
 optimizer.step()
 score, preds = model(sents, lens)
 true_labs = [
     seqid2text(labs[i, :l], ix_to_lab)
     for i, l in enumerate(lens)
 ]
 pred_labs = [
     seqid2text(preds[i, :l], ix_to_lab)
     for i, l in enumerate(lens)
 ]
 acc = accuracy_score(true_labs, pred_labs)
 f1 = f1_score(true_labs, pred_labs)
 print(
     "Epoch {}, batch {}, train loss {:.4f}, train acc {:.4f}, train f1 {:.4f} "
     .format(epoch, i, loss.item(), acc, f1))
 if ((i + 1) % 50 == 0):
     with torch.no_grad():
         model.eval()
         print("Evaluation on validation set")
         true_labels = []
         pred_labels = []
         for batch in val_data_loader:
             sents, labs, lens = batch
             sents = pad_sequence(sents,
                                  batch_first=True).to(device)
             labs = pad_sequence(labs,
Ejemplo n.º 23
0
#导入iris数据
iris = datasets.load_iris()
X = iris.data
y = iris.target

standardScaler = StandardScaler()
standardScaler.fit

#将数据集拆分为train、test数据
X_train, X_test, y_train, y_test = train_test_split(X, y, 0.2)

#数据的归一化(训练数据集、测试数据集的归一化)
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train = standardScaler.transform(X_train)
X_test = standardScaler.transform(X_test)
print(X_test)

#使用上述数据进行训练和预测
kcf = kNNClassifier(3)
kcf.fit(X_train, y_train)
y_predict = kcf.predict(X_test)

#计算预测准确度
accuracy = accuracy_score(y_test, y_predict)
print('Origin: ', y_test)
print('Predict:', y_predict)
print('Accuracy:', accuracy)

print(kcf.score(X_test, y_test))
Ejemplo n.º 24
0
    def score(self, X, y, sample_weight=None):

        from metrics import accuracy_score
        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
Ejemplo n.º 25
0
 def score(self, X, y, sample_weight=None):
     
     from metrics import accuracy_score
     return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
 def score(self, x_test, y_test):
     assert self._x_train is not None and self._y_train is not None, "must fit before score"
     y_predict = self.predict(x_test)
     return accuracy_score(y_test, y_predict)
                    data[:, :48],
                    columns=columns)  # change nr of columns in file
                y = data[:, 48]  # last column in file
                X_train, X_test, y_train, y_test = train_test_split(
                    df, y, test_size=0.2, random_state=0)
                clf = RandomForestClassifier(n_estimators=ne,
                                             criterion=crit,
                                             max_features=mf,
                                             min_samples_leaf=msl)
                clf.fit(X_train, y_train)
                y_pred = clf.predict(X_test)
                results.append(
                    "criterion " + crit + ", n_estimators " + str(ne) +
                    ", max_features " + mf + ", min_samples_leaf " + str(msl) +
                    ", accuracy " +
                    str(metrics.accuracy_score(
                        y_test, y_pred, normalize=True)) + ", precision " +
                    str(metrics.precision_score(y_test, y_pred, pos_label="1"))
                )

for res in results:
    print(res)

#Decision Tree
"""
criterion = ['gini', 'entropy']
max_features = ['auto', 'sqrt', 'log2']
min_samples_leaf = [ 1, 2, 3, 4, 5]

for crit in criterion:
    for mf in max_features:
        for msl in min_samples_leaf:
Ejemplo n.º 28
0
def evaluate(args, model, eval_dataloader, params):
    model.eval()
    # 记录平均损失
    loss_avg = utils.RunningAverage()
    # init
    pre_result = []
    gold_result = []

    # get data
    for batch in tqdm(eval_dataloader, unit='Batch'):
        # to device
        batch = tuple(t.to(params.device) for t in batch)
        input_ids, input_mask, segment_ids, start_pos, end_pos, ne_cate = batch

        with torch.no_grad():
            # get loss
            loss = model(input_ids,
                         token_type_ids=segment_ids,
                         attention_mask=input_mask,
                         start_positions=start_pos,
                         end_positions=end_pos)
            if params.n_gpu > 1 and args.multi_gpu:
                loss = loss.mean()  # mean() to average on multi-gpu.
            # update the average loss
            loss_avg.update(loss.item())

            # inference
            start_logits, end_logits = model(input_ids=input_ids,
                                             token_type_ids=segment_ids,
                                             attention_mask=input_mask)

        # gold label
        start_pos = start_pos.to("cpu").numpy().tolist()
        end_pos = end_pos.to("cpu").numpy().tolist()
        input_mask = input_mask.to('cpu').numpy().tolist()
        ne_cate = ne_cate.to("cpu").numpy().tolist()

        # predict label
        start_label = start_logits.detach().cpu().numpy().tolist()
        end_label = end_logits.detach().cpu().numpy().tolist()

        # idx to label
        cate_idx2label = {
            idx: value
            for idx, value in enumerate(params.label_list)
        }

        # get bio result
        for start_p, end_p, start_g, end_g, input_mask_s, ne_cate_s in zip(
                start_label, end_label, start_pos, end_pos, input_mask,
                ne_cate):
            ne_cate_str = cate_idx2label[ne_cate_s]
            # 问题长度
            q_len = len(IO2QUERY[ne_cate_str])
            # 有效长度
            act_len = sum(input_mask_s[q_len + 2:-1])
            # get BIO labels
            pre_bio_labels = pointer2bio(start_p[q_len + 2:q_len + 2 +
                                                 act_len],
                                         end_p[q_len + 2:q_len + 2 + act_len],
                                         ne_cate=ne_cate_str)
            gold_bio_labels = pointer2bio(start_g[q_len + 2:q_len + 2 +
                                                  act_len],
                                          end_g[q_len + 2:q_len + 2 + act_len],
                                          ne_cate=ne_cate_str)
            pre_result.append(pre_bio_labels)
            gold_result.append(gold_bio_labels)

    # metrics
    f1 = f1_score(y_true=gold_result, y_pred=pre_result)
    acc = accuracy_score(y_true=gold_result, y_pred=pre_result)

    # f1, acc
    metrics = {'loss': loss_avg(), 'f1': f1, 'acc': acc}
    metrics_str = "; ".join("{}: {:05.2f}".format(k, v)
                            for k, v in metrics.items())
    logging.info("- {} metrics: ".format('Val') + metrics_str)
    # f1 classification report
    report = classification_report(y_true=gold_result, y_pred=pre_result)
    logging.info(report)

    return metrics
Ejemplo n.º 29
0
 def score(self, X_test, Y_test):
     Y_predict = self.predict(X_test)
     accuracy = accuracy_score(Y_test, Y_predict)
     return accuracy
Ejemplo n.º 30
0
    def score(self, X_test, y_test):
        """根据测试数据集 X_test 和 y_test 确定当前模型的准确度"""

        y_predict = self.predict(X_test)
        return accuracy_score(y_test, y_predict)
Ejemplo n.º 31
0
    y_fold_train = y_train[train_fold_idx]

    if kernel_method:
        K_fold_test = K_train[test_fold_idx, :][:, train_fold_idx]
    else:
        X_fold_test = X_train[test_fold_idx]
    y_fold_test = y_train[test_fold_idx]

    if kernel_method:
        kernel_model.fit(K_fold_train, y_fold_train)
        y_pred = kernel_model.predict(K_fold_test)
    else:
        kernel_model.fit(X_fold_train, y_fold_train)
        y_pred = kernel_model.predict(X_fold_test)

    fold_score = metrics.accuracy_score(y_pred, y_fold_test)

    print(fold_score)
    cv_scores.append(fold_score)

cv_score_mean = round(np.mean(cv_scores), 5)
cv_score_std = round(np.std(cv_scores), 5)
cv_score_min = round(np.min(cv_scores), 5)
cv_score_max = round(np.max(cv_scores), 5)

print("\nMean score :", cv_score_mean)
print("Standard deviation :", cv_score_std)
print("Min score :", cv_score_min)
print("Max score :", cv_score_max)