Ejemplo n.º 1
0
def valid_epoch(summary, summary_writer, epoch, model, loss_fn, dataloader_valid, cfg):
    model.eval()
    num_classes = cfg['num_classes']
    class_point = cfg['class_point']

    eval_loss = AverageMeter()
    eval_acc = AverageMeter()
    confusion_matrix = ConfusionMatrix(num_classes=num_classes)

    dataloader = [dataloader_valid]

    name = cfg['labels']

    time_now = time.time()
    loss_sum = 0
    acc_sum = 0
    count = 0
    steps_count = 0
    for i in range(len(dataloader)):
        steps = len(dataloader[i])
        batch_size = dataloader[i].batch_size
        dataiter = iter(dataloader[i])
        # 使用 torch,no_grad()构建不需要track的上下文环境
        with torch.no_grad():
            acc_tmp = 0
            loss_tmp = 0
            prefetcher = data_prefetcher(dataiter)
            img, target, mask = prefetcher.next()

            for step in range(steps):
                # data, target = next(dataiter)
                data = img.to(device)
                target = target.to(device)

                output = model(data)
                output = output.view(int(batch_size), num_classes)
                target = target.view(int(batch_size))
                mask = mask.view(int(batch_size))

                conf_targets = target[mask]
                conf_preds = output[mask]
                loss = loss_fn(conf_preds, conf_targets)

                torch.cuda.synchronize()
                probs = F.softmax(output, dim=1)
                _, predicts = torch.max(probs, 1)

                acc = (predicts[mask] == conf_targets).type(
                    torch.cuda.FloatTensor).sum() * 1.0 / conf_targets.size(0)
                for t in range(num_classes):
                    for p in range(num_classes):
                        count = (predicts[mask][conf_targets == t] == p).type(
                            torch.cuda.FloatTensor).sum()
                        reduced_count = reduce_tensor(
                            count.data, reduction=False)

                        confusion_matrix.update(t, p,
                                                to_python_float(reduced_count))

                reduced_loss = reduce_tensor(loss.data)
                reduced_acc = reduce_tensor(acc.data)

                eval_loss.update(to_python_float(reduced_loss))
                eval_acc.update(to_python_float(reduced_acc))

                if args.local_rank == 0:
                    time_spent = time.time() - time_now
                    time_now = time.time()
                    logging.info(
                        'data_num : {}, Step : {}, Testing Loss : {:.5f}, '
                        'Testing Acc : {:.3f}, Run Time : {:.2f}'
                        .format(
                            str(i),
                            summary['step'] + 1, reduced_loss, reduced_acc, time_spent))
                    summary['step'] += 1

                img, target, mask = prefetcher.next()

    if args.local_rank == 0:
        summary['confusion_matrix'] = plot_confusion_matrix(
            confusion_matrix.matrix,
            cfg['labels'],
            tensor_name='train/Confusion matrix')
        summary['loss'] = eval_loss.avg
        # summary['acc'] = acc_sum / (steps * (batch_size))
        summary['acc'] = eval_acc.avg

    return summary
Ejemplo n.º 2
0
def valid_epoch(summary, summary_writer, epoch, model, loss_fn, dataloader_valid, cfg):
    logger = log.logger()
    model.eval()
    num_classes = cfg['num_classes']
    class_point = cfg['class_point']

    eval_loss = AverageMeter()
    eval_acc = AverageMeter()
    eval_pred_posit = AverageMeter()
    eval_label_posit = AverageMeter()
    confusion_matrix = ConfusionMatrix(num_classes=(num_classes)+1)

    dataloader = [dataloader_valid]

    name = cfg['labels']

    time_now = time.time()
    loss_sum = 0
    acc_sum = 0
    count = 0
    steps_count = 0
    for i in range(len(dataloader)):
        steps = len(dataloader[i])
        batch_size = dataloader[i].batch_size
        dataiter = iter(dataloader[i])
        # 使用 torch,no_grad()构建不需要track的上下文环境
        with torch.no_grad():
            acc_tmp = 0
            loss_tmp = 0
            prefetcher = data_prefetcher(dataiter)
            img, target, label, label_degree = prefetcher.next()

            for step in range(steps):
                # data, target = next(dataiter)
                data = img.to(device)
                target = target.to(device)

                output = model(data)
                output = output.view(img.size(0), num_classes)
                target = target.view(img.size(0), num_classes)
                label = label.view(img.size(0))

                conf_preds = torch.sigmoid(output)
                # print("conf_preds", conf_preds.shape)
                loss = loss_fn(conf_preds, target)

                torch.cuda.synchronize()

                predicts = (conf_preds >= 0.5)
                d = torch.Tensor([0] * img.size(0)
                                 ).reshape(-1, 1).to(device)
                predicts = torch.cat((d, predicts.float()), 1)
                logger.get_info(predicts)
                # _, predicts = torch.max(predicts, 1)
                predicts = MaxIndex(predicts, batch_size)
                # logger.get_info(predicts)


                acc = (predicts == label).type(
                    torch.cuda.FloatTensor).sum() * 1.0 / img.size(0)
                recall_pred = (predicts[label_degree >= 20] > 1).type(
                    torch.cuda.FloatTensor).sum() * 1.0
                recall_label = (label_degree >= 20).sum()

                for t in range(num_classes+1):
                    for p in range(num_classes+1):
                        count = (predicts[label == t] == p).type(
                            torch.cuda.FloatTensor).sum()
                        reduced_count = reduce_tensor(
                            count.data, reduction=False)

                        confusion_matrix.update(t, p,
                                                to_python_float(reduced_count))

                reduced_loss = reduce_tensor(loss.data)
                reduced_acc = reduce_tensor(acc.data)
                reduced_pred_20 = reduce_tensor(recall_pred.data)
                reduced_label_20 = reduce_tensor(recall_label)

                eval_loss.update(to_python_float(reduced_loss))
                eval_acc.update(to_python_float(reduced_acc))
                eval_pred_posit.update(to_python_float(reduced_pred_20))
                eval_label_posit.update(to_python_float(reduced_label_20))

                if args.local_rank == 0:
                    time_spent = time.time() - time_now
                    time_now = time.time()
                    logging.info(
                        'data_num : {}, Step : {}, Testing Loss : {:.5f}, '
                        'Testing Acc : {:.3f}, Run Time : {:.2f}'
                        .format(
                            str(i),
                            summary['step'] + 1, reduced_loss, reduced_acc, time_spent))
                    summary['step'] += 1

                img, target, label, label_degree = prefetcher.next()

    if args.local_rank == 0:
        recall = eval_pred_posit.sum/float(eval_label_posit.sum)
        summary['confusion_matrix'] = plot_confusion_matrix(
            confusion_matrix.matrix,
            cfg['labels'],
            tensor_name='Confusion matrix')
        summary['loss'] = eval_loss.avg
        summary['recall'] = recall
        # summary['acc'] = acc_sum / (steps * (batch_size))
        summary['acc'] = eval_acc.avg
        print("Recall >=20:", recall)
    return summary
Ejemplo n.º 3
0
def train_epoch(epoch, summary, summary_writer, model, loss_fn, optimizer, dataloader_train, cfg):
    model.train()
    num_classes = cfg['num_classes']
    class_point = cfg['class_point']

    train_loss = AverageMeter()
    train_acc = AverageMeter()
    confusion_matrix = ConfusionMatrix(num_classes=num_classes)

    steps = len(dataloader_train)
    batch_size = dataloader_train.batch_size

    dataiter = iter(dataloader_train)
    time_now = time.time()
    loss_sum = 0
    acc_sum = 0

    summary['epoch'] = epoch

    if args.local_rank == 0:
        print("steps:", steps)
    prefetcher = data_prefetcher(dataiter)
    img, target, mask = prefetcher.next()
    for step in range(steps):
        data = img.to(device)
        target = target.to(device)

        # # mixup
        # # generate mixed inputs, two one-hot label vectors and mixing coefficient
        # data, target_a, target_b, lam = mixup_data(
        #     data, target, args.alpha, use_cuda)
        # print(data.shape)
        output = model(data)
        output = output.view(int(batch_size), num_classes)
        target = target.view(int(batch_size))
        mask = mask.view(int(batch_size))
        # target = target.long()

        conf_targets = target[mask]
        conf_preds = output[mask]
        # print("conf_preds", conf_preds.shape)
        loss = loss_fn(conf_preds, conf_targets)
        # loss = loss_func(loss_fn, output)

        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        # loss.backward()
        optimizer.step()
        torch.cuda.synchronize()
        # scheduler.step()
        # lr = scheduler.get_last_lr()[0]
        probs = F.softmax(output, dim=1)
        # torch.max(a,1) 返回每一行中最大值的那个元素FloatTensor,且返回其索引LongTensor(返回最大元素在这一行的列索引)
        _, predicts = torch.max(probs, 1)

        # target = (target >= class_point).long()

        acc = (predicts[mask] == conf_targets).type(
            torch.cuda.FloatTensor).sum() * 1.0 / conf_targets.size(0)
        for t in range(num_classes):
            for p in range(num_classes):
                count = (predicts[mask][conf_targets == t] == p).type(
                    torch.cuda.FloatTensor).sum()
                reduced_count = reduce_tensor(count.data, reduction=False)

                confusion_matrix.update(t, p, to_python_float(reduced_count))

        reduced_loss = reduce_tensor(loss.data)
        reduced_acc = reduce_tensor(acc.data)

        train_loss.update(to_python_float(reduced_loss))
        train_acc.update(to_python_float(reduced_acc))

        if args.local_rank == 0:
            time_spent = time.time() - time_now
            time_now = time.time()

            logging.info(
                'Epoch : {}, Step : {}, Training Loss : {:.5f}, '
                'Training Acc : {:.3f}, Run Time : {:.2f}'
                .format(
                    summary['epoch'] + 1,
                    summary['step'] + 1, train_loss.avg, train_acc.avg, time_spent))

            summary['step'] += 1

        img, target, mask = prefetcher.next()

    if args.local_rank == 0:
        time_spent = time.time() - time_now
        time_now = time.time()
        summary_writer.add_scalar(
            'train/loss', train_loss.val,  epoch)
        summary_writer.add_scalar(
            'train/acc', train_acc.val, epoch)
        # summary_writer.add_scalar(
        #     'learning_rate', lr, summary['step'] + steps*epoch)
        summary_writer.flush()
        summary['confusion_matrix'] = plot_confusion_matrix(
            confusion_matrix.matrix,
            cfg['labels'],
            tensor_name='train/Confusion matrix')
        # summary['loss'] = train_loss.avg
        # summary['acc'] = acc_sum / (steps * (batch_size))
        # summary['acc'] = train_acc.avg
        summary['epoch'] = epoch

    return summary
Ejemplo n.º 4
0
def train_epoch(epoch, summary, summary_writer, model, loss_fn, optimizer, dataloader_train, cfg):
    # logger = log.logger()
    model.train()
    num_classes = cfg['num_classes']
    class_point = cfg['class_point']

    train_loss = AverageMeter()
    train_acc = AverageMeter()
    train_pred_posit = AverageMeter()
    train_label_posit = AverageMeter()

    confusion_matrix = ConfusionMatrix(num_classes=(num_classes)+1)

    steps = len(dataloader_train)
    batch_size = dataloader_train.batch_size

    dataiter = iter(dataloader_train)
    time_now = time.time()
    loss_sum = 0
    acc_sum = 0

    summary['epoch'] = epoch

    if args.local_rank == 0:
        print("steps:", steps)
    prefetcher = data_prefetcher(dataiter)
    img, target, label, label_degree = prefetcher.next()
    for step in range(steps):
        # logger.get_info('...........'+'step' + str(step) + '............')
        data = img.to(device)
        target = target.to(device)

        # # mixup
        # # generate mixed inputs, two one-hot label vectors and mixing coefficient
        # data, target_a, target_b, lam = mixup_data(
        #     data, target, args.alpha, use_cuda)
        # print(data.shape)
        output = model(data)
        output = output.view(int(batch_size), num_classes)
        target = target.view(int(batch_size), num_classes)
        label = label.view(int(batch_size))
        # target = target.long()
        conf_preds = torch.sigmoid(output)

        # print("conf_preds", conf_preds.shape)
        loss = loss_fn(conf_preds, target)

        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        # loss.backward()
        optimizer.step()
        torch.cuda.synchronize()
        # scheduler.step()
        # lr = scheduler.get_last_lr()[0]
        # print(conf_preds.shape)
        # torch.max(a,1) 返回每一行中最大值的那个元素FloatTensor,且返回其索引LongTensor(返回最大元素在这一行的列索引)
        predicts = (conf_preds >= 0.5)
        d = torch.Tensor([0] * int(batch_size)).reshape(-1, 1).to(device)
        predicts = torch.cat((d, predicts.float()), 1)
        # logger.get_info(predicts)
        predicts = MaxIndex(predicts, batch_size)
        # logger.get_info(predicts)

        # target = (target >= class_point).long()

        acc = (predicts == label).type(
            torch.cuda.FloatTensor).sum() * 1.0 / label.size(0)

        # print(type(predicts), predicts[label_degree >= 20])

        recall_pred = (predicts[label_degree >= 20] > 1).type(
            torch.cuda.FloatTensor).sum() * 1.0
        recall_label = (label_degree >= 20).sum()
        # print('recall_pred : {}, recall_label : {}'.format(recall_pred, recall_label))

        for t in range(num_classes+1):
            for p in range(num_classes+1):
                count = (predicts[label == t] == p).type(
                    torch.cuda.FloatTensor).sum()
                reduced_count = reduce_tensor(count.data, reduction=False)

                confusion_matrix.update(t, p, to_python_float(reduced_count))

        reduced_loss = reduce_tensor(loss.data)
        reduced_acc = reduce_tensor(acc.data)
        reduced_pred_20 = reduce_tensor(recall_pred.data)
        reduced_label_20 = reduce_tensor(recall_label)

        train_loss.update(to_python_float(reduced_loss))
        train_acc.update(to_python_float(reduced_acc))
        train_pred_posit.update(to_python_float(reduced_pred_20))
        train_label_posit.update(to_python_float(reduced_label_20))

        if args.local_rank == 0:
            time_spent = time.time() - time_now
            time_now = time.time()

            logging.info(
                'Epoch : {}, Step : {}, Training Loss : {:.5f}, '
                'Training Acc : {:.3f}, Run Time : {:.2f}'
                .format(
                    summary['epoch'] + 1,
                    summary['step'] + 1, train_loss.avg, train_acc.avg, time_spent))

            summary['step'] += 1

        img, target, label, label_degree = prefetcher.next()

    if args.local_rank == 0:
        time_spent = time.time() - time_now
        time_now = time.time()
        recall = train_pred_posit.sum/float(train_label_posit.sum)
        summary_writer.add_scalar(
            'train/loss', train_loss.val,  epoch)
        summary_writer.add_scalar(
            'train/acc', train_acc.val, epoch)
        summary_writer.add_scalar('train/recall', recall, epoch)
        # summary_writer.add_scalar(
        #     'learning_rate', lr, summary['step'] + steps*epoch)
        summary_writer.flush()
        summary['confusion_matrix'] = plot_confusion_matrix(
            confusion_matrix.matrix,
            cfg['labels'],
            tensor_name='train/Confusion matrix')
        # summary['loss'] = train_loss.avg
        # summary['acc'] = acc_sum / (steps * (batch_size))
        # summary['acc'] = train_acc.avg
        summary['epoch'] = epoch
        print("Recall >=20:", recall)
    return summary