コード例 #1
0
 def metrics(self):
     tpr = TruePositiveRate(self.cm).get()
     fpr = FalsePositiveRate(self.cm).get()
     acc = Accuracy(self.cm).get()
     f1 = F1(self.cm).get()
     f1 = round(f1, 2)
     return tpr, fpr, acc, f1
コード例 #2
0
 def metrics_dict(self, prefix="train"):
     if self.tusk == "classification":
         return {
             f"{prefix}_top1": Top(n=1),
             f"{prefix}_top5": Top(n=5),
             f"{prefix}_MRR": MRR()
         }
     elif self.tusk == "generation":
         ignore_idxs = (self.dm.target_eos_idx, self.dm.target_pad_idx)
         return {
             f"{prefix}_accuracy": Accuracy(),
             f"{prefix}_precision": Precision(ignore_idxs),
             f"{prefix}_recall": Recall(ignore_idxs),
             f"{prefix}_F1": F1(ignore_idxs)
         }
     else:
         return ValueError(f"{self.tusk} tusk is not supported")
コード例 #3
0
ファイル: trainer.py プロジェクト: KUAN-HSUN-LI/SDML
    def run_epoch(self, epoch, data, training):
        if epoch == self.freeze_epoch:
            self.model.freeze_bert_encoder()

        self.model.train(training)
        if training:
            description = 'Train'
            dataset = data
            shuffle = True
        else:
            description = 'Valid'
            dataset = data
            shuffle = False
        dataloader = DataLoader(dataset=dataset,
                                batch_size=self.batch_size,
                                shuffle=shuffle,
                                collate_fn=dataset.collate_fn,
                                num_workers=1)

        trange = tqdm(enumerate(dataloader), total=len(dataloader), desc=description)
        loss = 0
        f1_score = F1()
        for step, (tokens, segments, masks, node_vec, tfidf, labels) in trange:
            o_labels, batch_loss = self._run_iter(tokens, segments, masks, node_vec, tfidf, labels)
            if training:
                if self.gradient_accumulation_steps > 1:
                    batch_loss = batch_loss / self.gradient_accumulation_steps
                batch_loss.backward()
                # clip_grad_norm_(self.model.parameters(), self.grad_clip)
                if (step + 1) % self.gradient_accumulation_steps == 0:
                    self.opt.step()
                    self.opt.zero_grad()

            loss += batch_loss.item()
            f1_score.update(o_labels.cpu(), labels)

            trange.set_postfix(
                loss=loss / (step + 1), f1=f1_score.print_score())
        if training:
            self.history['train'].append({'f1': f1_score.get_score(), 'loss': loss / len(trange)})
        else:
            self.history['valid'].append({'f1': f1_score.get_score(), 'loss': loss / len(trange)})

        self.scheduler.step()
コード例 #4
0
ファイル: test.py プロジェクト: vladostan/segmification
    y2_true = y2_true.astype('int64')
    y2_pred = np.squeeze(y2_pred > 0.5).astype('int64')

    TP, FP, FN, TN = tpfpfn(y1_pred, y1_true)
    TP_1 += TP
    FP_1 += FP
    FN_1 += FN
    TN_1 += TN

    TP, FP, FN, TN = tpfpfn(y2_pred, y2_true)

    mAccuracy_2 += Accuracy(TP, FP, FN, TN) / dlina
    mPrecision_2 += Precision(TP, FP) / dlina
    mRecall_2 += Recall(TP, FN) / dlina
    mIU_2 += IU(TP, FP, FN) / dlina
    mF1_2 += F1(TP, FP, FN) / dlina

mAccuracy_1 = Accuracy(TP_1, FP_1, FN_1, TN_1)
mPrecision_1 = Precision(TP_1, FP_1)
mRecall_1 = Recall(TP_1, FN_1)
mIU_1 = IU(TP_1, FP_1, FN_1)
mF1_1 = F1(TP_1, FP_1, FN_1)

print("CLASS accuracy: {}".format(mAccuracy_1))
print("CLASS precision: {}".format(mPrecision_1))
print("CLASS recall: {}".format(mRecall_1))
print("CLASS iu: {}".format(mIU_1))
print("CLASS f1: {}".format(mF1_1))

print("MASK accuracy: {}".format(mAccuracy_2))
print("MASK precision: {}".format(mPrecision_2))
コード例 #5
0
    cfg = edict(json.load(f))

if isinstance(cfg.batch_size, list) and isinstance(cfg.long_side, list):
    list_batch = cfg.batch_size
    list_res = cfg.long_side
elif isinstance(cfg.batch_size, int) and isinstance(cfg.long_side, int):
    list_batch = [cfg.batch_size]
    list_res = [cfg.long_side]
else:
    raise Exception("'batch_size' and 'long_side' in config file should be same instance!!!")

loss_func = BCEWithLogitsLoss()

# data_dir = '/home/tungthanhlee/bdi_xray/data/images'
data_dir = '/home/dual1/thanhtt/assigned_jpeg'
metrics_dict = {'acc': ACC(), 'auc':AUC(), 'precision':Precision(), 'recall':Recall(), 'specificity':Specificity(), 'f1':F1()}

model_names=[
    'dense',
    'dense',
    'dense',
    # 'resnet',
    # 'dense',
    # 'efficient',
    #'resnest'
    ]
ids = [
    '121',
    '121',
    '121',
    # '101',
コード例 #6
0
val_loader = create_loader(cfg.dev_csv,
                           data_dir,
                           cfg,
                           mode='val',
                           dicom=False,
                           type=cfg.type)

# loss_func = BCELoss()
# loss_func = BCEWithLogitsLoss()
loss_func = MSELoss()

metrics_dict = {
    'auc': AUC(),
    'sensitivity': Recall(),
    'specificity': Specificity(),
    'f1': F1()
}
loader_dict = {'train': train_loader, 'val': val_loader}

#------------------------------- additional config for ensemble ---------------------------------------
model_names = [
    'dense',
    'resnet',
    'dense',
    # 'efficient',
    #'resnest'
]
ids = [
    '121',
    '101',
    '169',
コード例 #7
0
parser.add_argument("--weight_decay", default=0.01, type=float, help="Weight decay if we apply some.")
parser.add_argument("--warmup_proportion", default=0.1, type=float, help="Linear warmup proption over the training process.")
parser.add_argument("--dataset", default="imdb", choices=["imdb", "iflytek", "thucnews", "hyp"], type=str, help="The training dataset")
parser.add_argument("--layerwise_decay", default=1.0, type=float, help="Layerwise decay ratio")
parser.add_argument("--max_steps", default=-1, type=int, help="If > 0: set total number of training steps to perform. Override num_train_epochs.",)
# yapf: enable
args = parser.parse_args()

# tokenizer, eval_dataset, test_dataset, preprocess_text_fn, metric
# BPETokenizer for English Tasks
# ErnieDocTokenizer for Chinese Tasks

DATASET_INFO = {
    "imdb":
    (ErnieDocBPETokenizer, "test", "test", ImdbTextPreprocessor(), Accuracy()),
    "hyp": (ErnieDocBPETokenizer, "dev", "test", HYPTextPreprocessor(), F1()),
    "iflytek": (ErnieDocTokenizer, "dev", "dev", None, Accuracy()),
    "thucnews": (ErnieDocTokenizer, "dev", "test", None, Accuracy())
}


def set_seed(args):
    # Use the same data seed(for data shuffle) for all procs to guarantee data
    # consistency after sharding.
    random.seed(args.seed)
    np.random.seed(args.seed)
    # Maybe different op seeds(for dropout) for different procs is better. By:
    # `paddle.seed(args.seed + paddle.distributed.get_rank())`
    paddle.seed(args.seed)

コード例 #8
0
    # max_depths = [32, 64, 128, 256, 512]
    # num_rounds = [32, 64, 128, 256, 512]
    # learning_rates = [0.8, 1]
    max_depths = [64]
    num_rounds = [64]
    learning_rates = [0.8]
    for max_depth in max_depths:
        for num_round in num_rounds:
            for learning_rate in learning_rates:
                param = {
                    'max_depth': max_depth,
                    'eta': learning_rate,
                    'silent': 1,
                    'objective': 'multi:softmax',
                    'num_class': 4
                }
                # bst = xgb.train(param, dtrain, num_round)
                # bst.save_model('xgboost.model')
                bst = xgb.Booster({'nthread': 4})
                bst.load_model('xgboost.model')
                predictions = bst.predict(dtest)
                if os.path.isfile('predicted_labels.txt'):
                    os.remove('predicted_labels.txt')
                writer = open('predicted_labels.txt', 'w')
                for prediction in predictions:
                    writer.write(str(prediction) + '\n')
                writer.close()
                print 'Max depth: ' + str(max_depth) + ' Num round: ' + str(
                    num_round) + ' Learning rate: ' + str(
                        learning_rate) + ' F1 Score: ' + str(F1())
コード例 #9
0
def predict_worker(proc_id,
                   output_file,
                   classes,
                   model_params,
                   batch_size,
                   que,
                   lock,
                   status_que,
                   gpu_id=0,
                   evaluate=True,
                   framework='mxnet'):
    """ get data from batch loader and make predictions, predictions will be saved in output_file
        if evaluate, will evaluate recall, precision, f1_score and recall_top5 """

    logging.info('Predictor #{}: Loading model...'.format(proc_id))
    model = load_model(proc_id,
                       model_params,
                       batch_size,
                       classes,
                       gpu_id,
                       framework=framework)
    if model is None:
        status_que.put('Error')
        raise ValueError('No model created! Exit')
    logging.info('Predictor #{}: Model loaded'.format(proc_id))
    status_que.put('OK')

    if evaluate:
        from metrics import F1, ConfusionMatrix, MisClassified, RecallTopK
        evaluator = F1(len(classes))
        misclassified = MisClassified(len(classes))
        cm = ConfusionMatrix(classes)
        recall_topk = RecallTopK(len(classes), top_k=5)

    f = open(output_file, 'w')
    batch_idx = 0
    logging.info('Predictor #{} starts'.format(proc_id))
    start = time.time()
    while True:
        # get a batch from data loader via a queue
        lock.acquire()
        batch = que.get()
        lock.release()
        if batch == 'FINISH':
            logging.info(
                'Predictor #{} has received all batches, exit'.format(proc_id))
            break

        # predict
        im_names, batch, gt_list = batch
        logging.debug('Predictor #{}: predict'.format(proc_id))
        pred, prob = model.predict(batch)
        pred_labels, top_probs = model.get_label_prob(top_k=5)

        # write prediction to file
        for im_name, label, top_prob in zip(im_names, pred_labels, top_probs):
            if im_name is None:
                continue
            top_prob = [str(p) for p in top_prob]
            f.write('{} labels:{} prob:{}\n'.format(im_name, ','.join(label),
                                                    ','.join(top_prob)))

        # update metrics if evaluation mode is set
        if evaluate:
            assert gt_list is not None and gt_list != [] and gt_list[
                0] is not None
            top1_int = [p[0] for p in pred]
            assert len(top1_int) == len(gt_list), '{} != {}'.format(
                len(top1_int), len(gt_list))
            evaluator.update(top1_int, gt_list)
            misclassified.update(top1_int, gt_list, prob, im_names)
            cm.update(top1_int, gt_list)

            top5_int = [p[:5] for p in pred]
            assert len(top5_int) == len(gt_list), '{} != {}'.format(
                len(top5_int), len(gt_list))
            recall_topk.update(top5_int, gt_list)

        batch_idx += 1
        if batch_idx % 50 == 0 and batch_idx != 0:
            elapsed = time.time() - start
            logging.info(
                'Predictor #{}: Tested {} batches of {} images, elapsed {}s'.
                format(proc_id, batch_idx, batch_size, elapsed))

    # evaluation after prediction if set
    if evaluate:
        logging.info('Evaluating...')
        recall, precision, f1_score = evaluator.get()
        for rec, prec, f1, cls, in zip(recall, precision, f1_score, classes):
            print(
                'Class {:<20}: recall: {:<12}, precsion: {:<12}, f1 score: {:<12}'
                .format(cls, rec, prec, f1))
            f.write(
                'Class {:<20}: recall: {:<12}, precsion: {:<12}, f1 score: {:<12}\n'
                .format(cls, rec, prec, f1))
        topk_recall = recall_topk.get()
        for rec, cls in zip(topk_recall, classes):
            print('Class {:<20}: recall-top-5: {:<12}'.format(cls, rec))
            f.write('Class {:<20}: recall-top-5: {:<12}\n'.format(cls, rec))

        fp_images, fn_images = misclassified.get()
        g = open(output_file + '.fp', 'w')
        for cls, fp_cls in zip(classes, fp_images):
            for fp in fp_cls:
                g.write('{} pred:{} prob:{} gt:{} prob:{}\n'.format(
                    fp[0], cls, fp[2], classes[fp[1]], fp[3]))
        g.close()
        g = open(output_file + '.fn', 'w')
        for cls, fn_cls in zip(classes, fn_images):
            for fn in fn_cls:
                g.write('{} gt:{} prob:{} pred:{} prob:{}\n'.format(
                    fp[0], cls, fp[3], classes[fp[1]], fp[2]))
        g.close()

        cm.normalize()
        plt_name = output_file + '_cm.jpg'
        cm.draw(plt_name)
    f.close()
コード例 #10
0
            TN_direct += TN
        elif cl == 2:
            TP_alternative += TP
            FP_alternative += FP
            FN_alternative += FN
            TN_alternative += TN
        TP_mean_segm += TP
        FP_mean_segm += FP
        FN_mean_segm += FN
        TN_mean_segm += TN
   
mAccuracy_0 = Accuracy(TP_0, FP_0, FN_0, TN_0)
mPrecision_0 = Precision(TP_0, FP_0)
mRecall_0 = Recall(TP_0, FN_0)
mIU_0 = IU(TP_0, FP_0, FN_0)
mF1_0 = F1(TP_0, FP_0, FN_0)

mAccuracy_1 = Accuracy(TP_1, FP_1, FN_1, TN_1)
mPrecision_1 = Precision(TP_1, FP_1)
mRecall_1 = Recall(TP_1, FN_1)
mIU_1 = IU(TP_1, FP_1, FN_1)
mF1_1 = F1(TP_1, FP_1, FN_1)

mAccuracy_2 = Accuracy(TP_2, FP_2, FN_2, TN_2)
mPrecision_2 = Precision(TP_2, FP_2)
mRecall_2 = Recall(TP_2, FN_2)
mIU_2 = IU(TP_2, FP_2, FN_2)
mF1_2 = F1(TP_2, FP_2, FN_2)

mAccuracy_3 = Accuracy(TP_3, FP_3, FN_3, TN_3)
mPrecision_3 = Precision(TP_3, FP_3)