Beispiel #1
0
    def train(self, epoch, print_step=100):
        msglogger.info("Epoch: {}".format(epoch))
        self.model.train()
        classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
        train_loss = 0
        correct = 0
        total = 0
        for batch_idx, (inputs, targets) in enumerate(self.trainloader):
            inputs, targets = inputs.to(self.device), targets.to(self.device)
            self.optimizer.zero_grad()
            outputs = self.model(inputs)
            loss = self.criterion(outputs, targets)
            loss.backward()
            self.optimizer.step()

            train_loss += loss.item()
            # _, predicted = outputs.max(1)
            # total += targets.size(0)
            # correct += predicted.eq(targets).sum().item()
            classerr.add(outputs.detach(), targets)
            if ((batch_idx + 1) % print_step) == 0:
                msglogger.info(
                    '[%d / %d] ==> Top1: %.3f    Top5: %.3f    Loss: %.3f\n',
                    batch_idx + 1, len(self.trainloader),
                    classerr.value()[0],
                    classerr.value()[1], train_loss / (batch_idx + 1))
Beispiel #2
0
def test(model, criterion):
    dump_act = 2
    correct = 0
    total = 0
    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    losses = {'objective_loss': tnt.AverageValueMeter()}
    with torch.no_grad():
        for batch_idx, (images, labels) in enumerate(testloader):
            if(dump_act == None or (dump_act != None and batch_idx == dump_act)):
                images, labels = images.cuda(), labels.cuda()
                # dump_to_npy(name= 'input.activation.int8.'+str(batch_idx), tensor=images)
                outputs = model(images)
                classerr.add(outputs.data, labels)

                loss = criterion(outputs, labels)
                losses['objective_loss'].add(loss.item())
                _, predicted = torch.max(outputs.data, 1)

                total += labels.size(0)
                correct += (predicted == labels).sum().item()
                if(total % 1000 == 0):
                    print('[{0}] accuracy {1}%'.format(total, str(correct/total*100)))

    acc = correct / total
    print('Accuracy of the network on the 10000 test images: %d %%' % (100 * acc))
    
    top1, top5 = classerr.value()[0], classerr.value()[1]
    print("Top1 = %.3f, Top5 = %.3f, loss = %.3f\n"%(top1, top5, losses["objective_loss"].mean))
    
    return top1, top5, losses['objective_loss'].mean
Beispiel #3
0
def run_epoch(stage, state, data_loader):
    """stage = 'train' or 'test' or 'val' or anything"""
    if stage=='train':
        state.model.train()
    else:
        state.model.eval()

    pbar = tqdm(total=len(data_loader), leave=False)

    _loss = meter.AverageValueMeter()
    _acc = meter.ClassErrorMeter(accuracy=True)
    _conf = meter.ConfusionMeter(k=10, normalized=True)

    for batch_idx, (data, target) in enumerate(data_loader):
        data, target = data.to(state.args.device), target.to(state.args.device)
        if stage=='train':
            state.optimizer.zero_grad()
        output = state.model(data)
        loss = F.nll_loss(output, target)
        if stage=='train':
            loss.backward()
            state.optimizer.step()
            state.writer.add_scalar(stage+'/loss-iter', loss.mean(),
                (batch_idx + state.epoch*len(data_loader)) )
                # * data.size()[0]  )


        _loss.add(loss.mean().item())
        _acc.add(output, target)
        _conf.add(output, target)

        if batch_idx % state.args.pbar_interval == 0:
            pbar.desc = '{:6s}'.format(stage)
            pbar.postfix = 'Loss {:.4f} Acc {:.4f}%'.format(_loss.value(), _acc.value())
            pbar.update(state.args.pbar_interval)

    if stage=='train':
        state.scheduler.step()

    pbar.close()

    # if stage != 'train' or 'train_test' not in stage:
    state.epoch_pbar.desc += ' {:6s}: loss {:.4f}, Acc {:.4f}% |'.format(stage, _loss.value(), _acc.value())
    state.epoch_pbar.update()

    # if stage!='train':
    state.writer.add_scalar(stage+'/avg_loss-epoch', _loss.value(), state.epoch)
    state.writer.add_scalar(stage+'/avg_acc-epoch',  _acc.value(),  state.epoch)
    state.writer.add_heatmap(stage+'/conf_matrix-epoch', _conf.value(), state.epoch,
        y_title=data_loader.dataset.classes, x_title=data_loader.dataset.classes )

    result = {
        'loss' :  _loss.value(),
        'acc': _acc.value()
    }

    return result
Beispiel #4
0
 def testClassErrorMeteri_batch1(self):
     mtr = meter.ClassErrorMeter(topk=[1])
     output = torch.tensor([1, 0, 0])
     if hasattr(torch, "arange"):
         target = torch.arange(0, 1)
     else:
         target = torch.range(0, 0)
     mtr.add(output, target)
     err = mtr.value()
     self.assertEqual(err, [0], "All should be correct")
Beispiel #5
0
def train(train_loader, model, criterion, optimizer, epoch, loggers, args):
    losses = OrderedDict([('Overall Loss', meter.AverageValueMeter()),
                          ('Objective Loss', meter.AverageValueMeter())])
    classerr = meter.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = meter.AverageValueMeter()
    data_time = meter.AverageValueMeter()

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)

    msglogger.info("{} samples ({} per mini-batch)".format(
        total_samples, batch_size))

    model.train()
    acc_stats = []
    end = time.time()
    for train_step, (inputs, target) in enumerate(train_loader):
        data_time.add(time.time() - end)

        inputs, target = inputs.to(args.device), target.to(args.device)
        output = model(inputs)
        loss = criterion(output, target)

        classerr.add(output.data, target)
        acc_stats.append([classerr.value(1), classerr.value(5)])
        losses['Objective Loss'].add(loss.item())
        losses['Overall Loss'].add(loss.item())

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.add(time.time() - end)
        steps_completed = train_step + 1

        if steps_completed % args.print_freq == 0:
            errs = OrderedDict()
            errs['Top1'] = classerr.value(1)
            errs['Top5'] = classerr.value(5)

            stats_dict = OrderedDict()
            for loss_name, loss_value in losses.items():
                stats_dict[loss_name] = loss_value.mean
            stats_dict.update(errs)
            stats_dict['LR'] = optimizer.param_groups[0]['lr']
            stats_dict['Time'] = batch_time.mean
            stats = ('Performance/Training/', stats_dict)
            msglogger.info(
                'Train epoch: %d [%5d/%5d]  Top1: %.3f  Top5: %.3f  Loss: %.3f',
                epoch, steps_completed, steps_per_epoch, errs['Top1'],
                errs['Top5'], losses['Objective Loss'].mean)

        end = time.time()
    return acc_stats
def _validate(data_loader, model, criterion, loggers, print_freq, epoch=-1):
    """Execute the validation/test loop."""
    losses = {'objective_loss': tnt.AverageValueMeter()}
    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    # if nclasses<=10:
    #     # Log the confusion matrix only if the number of classes is small
    #     confusion = tnt.ConfusionMeter(10)

    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    total_steps = total_samples / batch_size
    msglogger.info('%d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to evaluation mode
    model.eval()

    end = time.time()
    for validation_step, (inputs, target) in enumerate(data_loader):
        with PytorchNoGrad():
            target = target.cuda(async=True)
            input_var = get_inference_var(inputs)
            target_var = get_inference_var(target)

            # compute output
            output = model(input_var)
            loss = criterion(output, target_var)

            # measure accuracy and record loss
            losses['objective_loss'].add(loss.item())
            classerr.add(output.data, target)
            # if confusion:
            #     confusion.add(output.data, target)

            # measure elapsed time
            batch_time.add(time.time() - end)
            end = time.time()

            steps_completed = (validation_step + 1)
            if steps_completed % print_freq == 0:
                stats = ('',
                         OrderedDict([('Loss', losses['objective_loss'].mean),
                                      ('Top1', classerr.value(1)),
                                      ('Top5', classerr.value(5))]))
                distiller.log_training_progress(stats, None, epoch,
                                                steps_completed, total_steps,
                                                print_freq, loggers)

    msglogger.info('==> Top1: %.3f    Top5: %.3f    Loss: %.3f\n',
                   classerr.value()[0],
                   classerr.value()[1], losses['objective_loss'].mean)

    # if confusion:
    #     msglogger.info('==> Confusion:\n%s', str(confusion.value()))
    return classerr.value(1), classerr.value(5), losses['objective_loss'].mean
Beispiel #7
0
def test(model, dataloader, num_workers, batch_size, resultpath):
    print("num test = {}".format(len(dataloader.dataset)))
    """
    测试指标:
    1、 准确率(Accuracy): 模型预测正确样本数占总样本数的比例。test_acc
    2、 各个类的精度: 模型对各个类别的预测准确率。
    3、 AUC
    4、 混淆矩阵: 用于计算各种指标(包括灵敏性,特异性等)
    """
    # 整个测试数据集的准确率
    test_acc = meter.ClassErrorMeter(topk=[1], accuracy=True)
    # 每一类的精度
    test_ap = meter.APMeter()
    # AUC指标,AUC要求输入样本预测为正例的概率
    """根据我的数据集文件命名,0表示阴性,1表示阳性(即1表示正例)"""
    test_auc = meter.AUCMeter()
    # 混淆矩阵
    test_conf = meter.ConfusionMeter(k=2, normalized=False)

    result_writer = ResultsWriter(str(resultpath), overwrite=False)

    with torch.no_grad():

        for inputs, labels in tqdm(dataloader, desc="Test"):

            # inputs[B,C,H,W]
            inputs = inputs.cuda() if torch.cuda.is_available() else inputs
            # labes[B,numclasses]
            labels = labels.cuda() if torch.cuda.is_available() else labels

            # outputs[B,numclasses]
            outputs = model(inputs)

            # 计算指标
            pred_proc = F.softmax(outputs.detach(), dim=1)
            test_acc.add(pred_proc, labels.detach())
            test_ap.add(pred_proc, labels.detach())
            # 取出output第1列的数,正例即1(患病)的概率
            test.auc.add(pred_proc[:1], labels.detach())
            test_conf.add(pred_proc, labels.detach())

    # 记录保存, 便于evaluate.py计算和画图一些结果
    result_writer.update(
        "test", {
            "acc": test_acc.value(),
            "ap": test_ap.value(),
            "test_auc": test_auc.value()[0],
            "test_tpr": test_auc.value()[1],
            "test_fpr": test_auc.value()[2],
            "test_conf": test_conf.value()
        })

    return test_acc, test_ap, test_auc
Beispiel #8
0
    def __init__(self, name=None, n_classes=2):
        self.name = name
        self.n_classes = n_classes
        self.path = os.path.join('log', name)
        self.conf_mtr = meter.ConfusionMeter(n_classes)
        self.auc_mtr = meter.AUCMeter()
        self.err_mtr = meter.ClassErrorMeter(topk=[1], accuracy=True)
        saveMkdir(self.path)

        self.fp = open(os.path.join(self.path, 'res.log'), 'w')
        self.y_scores = np.array([], dtype=np.float32).reshape(0, 1)
        self.y_true = np.array([], dtype=np.float32).reshape(0, 1)
Beispiel #9
0
    def testClassErrorMeter(self):
        mtr = meter.ClassErrorMeter(topk=[1])
        output = torch.eye(3)
        target = torch.range(0, 2)
        mtr.add(output, target)
        err = mtr.value()

        self.assertEqual(err, [0], "All should be correct")

        target[0] = 1
        target[1] = 0
        target[2] = 0
        mtr.add(output, target)
        err = mtr.value()
        self.assertEqual(err, [50.0], "Half should be correct")
Beispiel #10
0
def _validate(data_loader, model, criterion, loggers, args, epoch=-1):
    losses = {'objective_loss': meter.AverageValueMeter()}
    classerr = meter.ClassErrorMeter(accuracy=True, topk=(1, 5))
    print(type(meter))
    if args.earlyexit_thresholds:
        raise ValueError('Error: earlyexit function has not been completed')

    batch_time = meter.AverageValueMeter()
    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    total_steps = total_samples / batch_size
    msglogger.info("{} samples ({} per mini-batch)".format(
        total_samples, batch_size))

    model.eval()

    end = time.time()

    for validation_step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            inputs, target = inputs.to(args.device), target.to(args.device)
            output = model(inputs)

            loss = criterion(output, target)
            losses['objective_loss'].add(loss.item())
            classerr.add(output.data, target)
        batch_time.add(time.time() - end)
        end = time.time()
        steps_completed = validation_step + 1
        if steps_completed % args.print_freq == 0:
            if not args.earlyexit_thresholds:
                stats = ('',
                         OrderedDict([('Loss', losses['objective_loss'].mean),
                                      ('Top1', classerr.value(1)),
                                      ('Top5', classerr.value(5))]))
                msglogger.info("Validation epoch: %d [%d/%d]", epoch,
                               validation_step, total_steps)
            else:
                pass

    if not args.earlyexit_thresholds:
        msglogger.info(
            '==> Validation epoch: %d Top1: %.3f  Top5: %.3f  Loss: %.3f',
            epoch,
            classerr.value()[0],
            classerr.value()[1], losses['objective_loss'].mean)
        return classerr.value(1), classerr.value(
            5), losses['objective_loss'].mean
def validate(model, criterion, data_loader, args):
    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    model.eval()
    if args.cpu == True:
        model = model.cpu()

    for validation_step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            if args.cpu == True:
                inputs, target = inputs.cpu(), target.cpu()
            else:
                inputs, target = inputs.to('cuda'), target.to('cuda')

            output = model(inputs)
            classerr.add(output.data, target)
    return classerr.value(1)
Beispiel #12
0
def _validate(data_loader, model, criterion, loggers, args, epoch=-1):
    """Execute the validation/test loop."""
    losses = {'objective_loss': tnt.AverageValueMeter()}
    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))

    batch_time = tnt.AverageValueMeter()
    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    if args.display_confusion:
        confusion = tnt.ConfusionMeter(args.num_classes)
    total_steps = total_samples / batch_size
    msglogger.info('%d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to evaluation mode
    model.eval()

    end = time.time()
    for validation_step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            inputs, target = inputs.to(args.device), target.to(args.device)
            # compute output from model
            output = model(inputs)

            # compute loss
            loss = criterion(output, target)
            # measure accuracy and record loss
            losses['objective_loss'].add(loss.item())
            classerr.add(output.data, target)
            if args.display_confusion:
                confusion.add(output.data, target)
            # measure elapsed time
            batch_time.add(time.time() - end)
            end = time.time()

            steps_completed = (validation_step + 1)
            if steps_completed % args.print_freq == 0:
                stats = ('',
                         OrderedDict([('Loss', losses['objective_loss'].mean),
                                      ('Top1', classerr.value(1)),
                                      ('Top5', classerr.value(5))]))
                distiller.log_training_progress(stats, None, epoch,
                                                steps_completed, total_steps,
                                                args.print_freq, loggers)
    if args.display_confusion:
        msglogger.info('==> Confusion:\n%s\n', str(confusion.value()))
    return classerr.value(1), classerr.value(5), losses['objective_loss'].mean
Beispiel #13
0
    def test(self, epoch, print_step=100):
        self.model.eval()
        classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
        test_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(self.testloader):
                inputs, targets = inputs.to(self.device), targets.to(
                    self.device)
                outputs = self.model(inputs)
                loss = self.criterion(outputs, targets)

                test_loss += loss.item()
                classerr.add(outputs.detach(), targets)
                if ((batch_idx + 1) % print_step) == 0:
                    msglogger.info(
                        '[%d / %d] ==> Top1: %.3f    Top5: %.3f    Loss: %.3f\n',
                        batch_idx + 1, len(self.testloader),
                        classerr.value()[0],
                        classerr.value()[1], test_loss / (batch_idx + 1))

        # Save checkpoint.
        acc = classerr.value()[0]
        save_path = './logs/' + self.log_time + '/checkpoint/ckpt.pth'
        if acc > self.best_acc:
            save_path = './logs/' + self.log_time + '/checkpoint/best.pth'
            self.best_acc = acc

        print('Saving..')
        state = {
            'net': self.model.state_dict(),
            'acc': acc,
            'epoch': epoch,
        }
        if not os.path.isdir('./logs/' + self.log_time + '/checkpoint'):
            os.mkdir('./logs/' + self.log_time + '/checkpoint')
        torch.save(state, save_path)

        return acc
Beispiel #14
0
def val(model, dataloader, criterion):
    model.eval() if opt.gpus <= 1 else model.module.eval()
    loss_meter = meter.AverageValueMeter()
    accuracy_meter = meter.ClassErrorMeter(accuracy=True)
    for ii, data in enumerate(dataloader):
        input_, label = data
        input_, label = input_.to(device), label.to(device)
        score = model(input_)
        accuracy_meter.add(score.data.squeeze(), label.long())
        loss = criterion(score, label)
        loss_meter.add(loss.cpu().data)

    for (i, num) in enumerate(model.get_activated_neurons() if opt.gpus <= 1 else model.module.get_activated_neurons()):
        vis.plot("val_layer/{}".format(i), num)

    for (i, z_phi) in enumerate(model.z_phis()):
        if opt.hardsigmoid:
            vis.hist("hard_sigmoid(phi)/{}".format(i), F.hardtanh(opt.k * z_phi / 7. + .5, 0, 1).cpu().detach().numpy())
        else:
            vis.hist("sigmoid(phi)/{}".format(i), torch.sigmoid(opt.k * z_phi).cpu().detach().numpy())

    vis.plot("prune_rate", model.prune_rate() if opt.gpus <= 1 else model.module.prune_rate())
    return accuracy_meter.value()[0], loss_meter.value()[0]
Beispiel #15
0
def _validate(data_loader, model, criterion, loggers, args, epoch=-1):
    """Execute the validation/test loop."""
    losses = {'objective_loss': tnt.AverageValueMeter()}
    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))

    if args.earlyexit_thresholds:
        # for Early Exit, we have a list of errors and losses for each of the exits.
        args.exiterrors = []
        args.losses_exits = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))
            args.losses_exits.append(tnt.AverageValueMeter())
        args.exit_taken = [0] * args.num_exits

    batch_time = tnt.AverageValueMeter()
    total_samples = len(data_loader.sampler)
    batch_size = data_loader.batch_size
    if args.display_confusion:
        confusion = tnt.ConfusionMeter(args.num_classes)
    total_steps = total_samples / batch_size
    msglogger.info('%d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to evaluation mode
    model.eval()

    end = time.time()
    for validation_step, (inputs, target) in enumerate(data_loader):
        with torch.no_grad():
            inputs, target = inputs.to(args.device), target.to(args.device)
            # compute output from model
            output = model(inputs)

            if not args.earlyexit_thresholds:
                # compute loss
                loss = criterion(output, target)
                # measure accuracy and record loss
                losses['objective_loss'].add(loss.item())
                classerr.add(output.data, target)
                if args.display_confusion:
                    confusion.add(output.data, target)
            else:
                earlyexit_validate_loss(output, target, criterion, args)

            # measure elapsed time
            batch_time.add(time.time() - end)
            end = time.time()

            steps_completed = (validation_step+1)
            if steps_completed % args.print_freq == 0:
                if not args.earlyexit_thresholds:
                    stats = ('',
                            OrderedDict([('Loss', losses['objective_loss'].mean),
                                         ('Top1', classerr.value(1)),
                                         ('Top5', classerr.value(5))]))
                else:
                    stats_dict = OrderedDict()
                    stats_dict['Test'] = validation_step
                    for exitnum in range(args.num_exits):
                        la_string = 'LossAvg' + str(exitnum)
                        stats_dict[la_string] = args.losses_exits[exitnum].mean
                        # Because of the nature of ClassErrorMeter, if an exit is never taken during the batch,
                        # then accessing the value(k) will cause a divide by zero. So we'll build the OrderedDict
                        # accordingly and we will not print for an exit error when that exit is never taken.
                        if args.exit_taken[exitnum]:
                            t1 = 'Top1_exit' + str(exitnum)
                            t5 = 'Top5_exit' + str(exitnum)
                            stats_dict[t1] = args.exiterrors[exitnum].value(1)
                            stats_dict[t5] = args.exiterrors[exitnum].value(5)
                    stats = ('Performance/Validation/', stats_dict)

                distiller.log_training_progress(stats, None, epoch, steps_completed,
                                                total_steps, args.print_freq, loggers)
    if not args.earlyexit_thresholds:
        msglogger.info('==> Top1: %.3f    Top5: %.3f    Loss: %.3f\n',
                       classerr.value()[0], classerr.value()[1], losses['objective_loss'].mean)

        if args.display_confusion:
            msglogger.info('==> Confusion:\n%s\n', str(confusion.value()))
        return classerr.value(1), classerr.value(5), losses['objective_loss'].mean
    else:
        total_top1, total_top5, losses_exits_stats = earlyexit_validate_stats(args)
        return total_top1, total_top5, losses_exits_stats[args.num_exits-1]
Beispiel #16
0
def train_epoch(epoch, data_loader, model, criterion, optimizer, opt, vis,
                trainlogwindow):
    print('train at epoch {}'.format(epoch))

    model.train()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()

    mmap = meter.mAPMeter()
    top = meter.ClassErrorMeter(topk=[1, 3, 5], accuracy=True)
    mmap.reset()
    top.reset()
    end_time = time.time()
    for i, (inputs, targets) in enumerate(data_loader):
        data_time.update(time.time() - end_time)
        targets = targets.cuda()
        if type(inputs) is list:
            inputs = [Variable(inputs[ii]).cuda() for ii in range(len(inputs))]
        else:
            inputs = inputs.cuda()
            #inputs, targets_a, targets_b, lam = mixup_data(inputs, targets, opt.DATASET.ALPHA, True)
            #inputs, targets_a, targets_b = Variable(inputs), Variable(targets_a), Variable(targets_b)
            inputs = Variable(inputs)
        #print(targets)
        targets = Variable(targets)

        outputs, context = model(inputs)
        #loss_func = mixup_criterion(targets_a, targets_b, lam)
        #loss = loss_func(criterion, outputs)
        loss = criterion(outputs, targets)
        #print(outputs.shape)
        #print(targets)
        acc = calculate_accuracy(outputs, targets)
        one_hot = torch.zeros_like(outputs).cuda().scatter_(
            1, targets.view(-1, 1), 1)
        mmap.add(outputs.detach(), one_hot.detach())
        top.add(outputs.detach(), targets.detach())
        losses.update(loss.data.item(), targets.detach().size(0))
        accuracies.update(acc, targets.detach().size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end_time)
        end_time = time.time()

        vis.text(
            "gpu{}, epoch: {},batch:{},iter: {},loss: {},acc:{},lr: {}\n".format(torch.cuda.current_device(),epoch, i + 1,(epoch - 1) * len(data_loader) + (i + 1),losses.val, \
                                                  accuracies.val,optimizer.param_groups[0]['lr'])
                                                    ,win=trainlogwindow,append=True)

        print('Epoch: [{0}][{1}/{2}]\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Acc {acc.val:.3f} ({acc.avg:.3f})\t'
              'mmap {mmap}\t'
              'top1 3 5: {top}\t'.format(epoch,
                                         i + 1,
                                         len(data_loader),
                                         batch_time=batch_time,
                                         data_time=data_time,
                                         loss=losses,
                                         acc=accuracies,
                                         mmap=mmap.value(),
                                         top=top.value()))
    vis.text(
        "total:\n gpu:{} epoch: {},loss: {},lr: {}, accu:{},mAP:{}, top135 {}\n"
        .format(torch.cuda.current_device(), epoch, losses.avg,
                optimizer.param_groups[0]['lr'], accuracies.avg, mmap.value(),
                top.value()),
        win=trainlogwindow,
        append=True)
    if torch.cuda.current_device() == 0:
        print("saveing ckp ########################################")
        if epoch % opt.MODEL.CKP_DURING == 0:
            save_file_path = os.path.join(opt.MODEL.RESULT, opt.MODEL.NAME,
                                          'save_{}.pth'.format(epoch))
            if not os.path.exists(
                    os.path.join(opt.MODEL.RESULT, opt.MODEL.NAME)):
                os.makedirs(os.path.join(opt.MODEL.RESULT, opt.MODEL.NAME))
            states = {
                'epoch': epoch + 1,
                'arch': opt.MODEL.NAME,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
            }
            torch.save(states, save_file_path)
    return losses.avg, mmap.value()
Beispiel #17
0
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, args):
    """Training loop for one epoch."""
    losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()),
                          (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())])

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    # For Early Exit, we define statistics for each exit
    # So exiterrors is analogous to classerr for the non-Early Exit case
    if args.earlyexit_lossweights:
        args.exiterrors = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to train mode
    model.train()
    acc_stats = []
    end = time.time()
    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)
        inputs, target = inputs.to(args.device), target.to(args.device)

        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step, steps_per_epoch, optimizer)

        if not hasattr(args, 'kd_policy') or args.kd_policy is None:
            output = model(inputs)
        else:
            output = args.kd_policy.forward(inputs)

        if not args.earlyexit_lossweights:
            loss = criterion(output, target)
            # Measure accuracy
            classerr.add(output.data, target)
            acc_stats.append([classerr.value(1), classerr.value(5)])
        else:
            # Measure accuracy and record loss
            loss = earlyexit_loss(output, target, criterion, args)
        # Record loss
        losses[OBJECTIVE_LOSS_KEY].add(loss.item())

        if compression_scheduler:
            # Before running the backward phase, we allow the scheduler to modify the loss
            # (e.g. add regularization loss)
            agg_loss = compression_scheduler.before_backward_pass(epoch, train_step, steps_per_epoch, loss,
                                                                  optimizer=optimizer, return_loss_components=True)
            loss = agg_loss.overall_loss
            losses[OVERALL_LOSS_KEY].add(loss.item())

            for lc in agg_loss.loss_components:
                if lc.name not in losses:
                    losses[lc.name] = tnt.AverageValueMeter()
                losses[lc.name].add(lc.value.item())
        else:
            losses[OVERALL_LOSS_KEY].add(loss.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        if compression_scheduler:
            compression_scheduler.before_parameter_optimization(epoch, train_step, steps_per_epoch, optimizer)
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step, steps_per_epoch, optimizer)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step+1)

        if steps_completed % args.print_freq == 0:
            # Log some statistics
            errs = OrderedDict()
            if not args.earlyexit_lossweights:
                errs['Top1'] = classerr.value(1)
                errs['Top5'] = classerr.value(5)
            else:
                # for Early Exit case, the Top1 and Top5 stats are computed for each exit.
                for exitnum in range(args.num_exits):
                    errs['Top1_exit' + str(exitnum)] = args.exiterrors[exitnum].value(1)
                    errs['Top5_exit' + str(exitnum)] = args.exiterrors[exitnum].value(5)

            stats_dict = OrderedDict()
            for loss_name, meter in losses.items():
                stats_dict[loss_name] = meter.mean
            stats_dict.update(errs)
            stats_dict['LR'] = optimizer.param_groups[0]['lr']
            stats_dict['Time'] = batch_time.mean
            stats = ('Performance/Training/', stats_dict)

            params = model.named_parameters() if args.log_params_histograms else None
            distiller.log_training_progress(stats,
                                            params,
                                            epoch, steps_completed,
                                            steps_per_epoch, args.print_freq,
                                            loggers)
        end = time.time()
    return acc_stats
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, args):
    """Training-with-compression loop for one epoch.
    
    For each training step in epoch:
        compression_scheduler.on_minibatch_begin(epoch)
        output = model(input)
        loss = criterion(output, target)
        compression_scheduler.before_backward_pass(epoch)
        loss.backward()
        compression_scheduler.before_parameter_optimization(epoch)
        optimizer.step()
        compression_scheduler.on_minibatch_end(epoch)
    """
    def _log_training_progress():
        # Log some statistics
        errs = OrderedDict()
        if not early_exit_mode(args):
            errs['Top1'] = classerr.value(1)
            errs['Top5'] = classerr.value(5)
        else:
            # For Early Exit case, the Top1 and Top5 stats are computed for each exit.
            for exitnum in range(args.num_exits):
                errs['Top1_exit' + str(exitnum)] = args.exiterrors[exitnum].value(1)
                errs['Top5_exit' + str(exitnum)] = args.exiterrors[exitnum].value(5)

        stats_dict = OrderedDict()
        for loss_name, meter in losses.items():
            stats_dict[loss_name] = meter.mean
        stats_dict.update(errs)
        stats_dict['LR'] = optimizer.param_groups[0]['lr']
        stats_dict['Time'] = batch_time.mean
        stats = ('Performance/Training/', stats_dict)

        params = model.named_parameters() if args.log_params_histograms else None
        distiller.log_training_progress(stats,
                                        params,
                                        epoch, steps_completed,
                                        steps_per_epoch, args.print_freq,
                                        loggers)

    OVERALL_LOSS_KEY = 'Overall Loss'
    OBJECTIVE_LOSS_KEY = 'Objective Loss'

    losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()),
                          (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())])

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    # For Early Exit, we define statistics for each exit, so
    # `exiterrors` is analogous to `classerr` in the non-Early Exit case
    if early_exit_mode(args):
        args.exiterrors = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to train mode
    model.train()
    acc_stats = []
    end = time.time()
    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)
        inputs, target = inputs.to(args.device), target.to(args.device)

        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step, steps_per_epoch, optimizer)

        if not hasattr(args, 'kd_policy') or args.kd_policy is None:
            output = model(inputs)
        else:
            output = args.kd_policy.forward(inputs)

        if not early_exit_mode(args):
            # Handle loss calculation for inception models separately due to auxiliary outputs
            # if user turned off auxiliary classifiers by hand, then loss should be calculated normally,
            # so, we have this check to ensure we only call this function when output is a tuple
            if models.is_inception(args.arch) and isinstance(output, tuple):
                loss = inception_training_loss(output, target, criterion, args)
            else:
                loss = criterion(output, target)
            # Measure accuracy
            # For inception models, we only consider accuracy of main classifier
            if isinstance(output, tuple):
                classerr.add(output[0].detach(), target)
            else:
                classerr.add(output.detach(), target)
            acc_stats.append([classerr.value(1), classerr.value(5)])
        else:
            # Measure accuracy and record loss
            classerr.add(output[args.num_exits-1].detach(), target) # add the last exit (original exit)
            loss = earlyexit_loss(output, target, criterion, args)
        # Record loss
        losses[OBJECTIVE_LOSS_KEY].add(loss.item())

        if compression_scheduler:
            # Before running the backward phase, we allow the scheduler to modify the loss
            # (e.g. add regularization loss)
            agg_loss = compression_scheduler.before_backward_pass(epoch, train_step, steps_per_epoch, loss,
                                                                  optimizer=optimizer, return_loss_components=True)
            loss = agg_loss.overall_loss
            losses[OVERALL_LOSS_KEY].add(loss.item())

            for lc in agg_loss.loss_components:
                if lc.name not in losses:
                    losses[lc.name] = tnt.AverageValueMeter()
                losses[lc.name].add(lc.value.item())
        else:
            losses[OVERALL_LOSS_KEY].add(loss.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        if compression_scheduler:
            compression_scheduler.before_parameter_optimization(epoch, train_step, steps_per_epoch, optimizer)
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step, steps_per_epoch, optimizer)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step+1)

        if steps_completed % args.print_freq == 0:
            _log_training_progress()

        end = time.time()
    #return acc_stats
    # NOTE: this breaks previous behavior, which returned a history of (top1, top5) values
    return classerr.value(1), classerr.value(5), losses[OVERALL_LOSS_KEY]
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, print_freq, log_params_hist):
    """Training loop for one epoch."""
    losses = {
        'objective_loss': tnt.AverageValueMeter(),
        'regularizer_loss': tnt.AverageValueMeter()
    }
    if compression_scheduler is None:
        # Initialize the regularizer loss to zero
        losses['regularizer_loss'].add(0)

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)',
                   total_samples, batch_size)

    # Switch to train mode
    model.train()
    end = time.time()

    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)

        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(inputs)
        target_var = torch.autograd.Variable(target)

        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step,
                                                     steps_per_epoch)
        output = model(input_var)
        loss = criterion(output, target_var)

        # Measure accuracy and record loss
        classerr.add(output.data, target)
        losses['objective_loss'].add(loss.item())

        if compression_scheduler:
            # Before running the backward phase, we add any regularization loss computed by the scheduler
            regularizer_loss = compression_scheduler.before_backward_pass(
                epoch, train_step, steps_per_epoch, loss)
            loss += regularizer_loss
            losses['regularizer_loss'].add(regularizer_loss.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step,
                                                   steps_per_epoch)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step + 1)

        if steps_completed % print_freq == 0:
            # Log some statistics
            lr = optimizer.param_groups[0]['lr']
            stats = ('Peformance/Training/',
                     OrderedDict([('Loss', losses['objective_loss'].mean),
                                  ('Reg Loss',
                                   losses['regularizer_loss'].mean),
                                  ('Top1', classerr.value(1)),
                                  ('Top5', classerr.value(5)), ('LR', lr),
                                  ('Time', batch_time.mean)]))

            distiller.log_training_progress(
                stats,
                model.named_parameters() if log_params_hist else None, epoch,
                steps_completed, steps_per_epoch, print_freq, loggers)
        end = time.time()
def val_epoch(epoch, data_loader, model, criterion, opt, vis,vallogwindow):
    print('validation at epoch {}'.format(epoch))

    model.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    accuracies = AverageMeter()
    mmap = meter.mAPMeter()
    AP = meter.APMeter()
    top = meter.ClassErrorMeter(topk=[1, 3, 5], accuracy=True)
    mmap.reset()
    AP.reset()
    top.reset()
    end_time = time.time()
    for i, (inputs, targets) in enumerate(data_loader):
        data_time.update(time.time() - end_time)
        if type(inputs) is list:
            inputs = [Variable(inputs[ii].cuda()) for ii in range(len(inputs))]
        else:
            inputs = Variable(inputs.cuda())
        targets = targets.cuda()
        with torch.no_grad():
            #inputs = Variable(inputs)
            targets = Variable(targets)
            outputs ,context= model(inputs)
            #if i %5==0:
            #for jj in range(num):
            #    org_img = inverse_normalize(inputs[0,jj,:,:,:].detach().cpu().numpy())
            #    show_keypoint(org_img, context[0].detach().cpu(),vis=vis,title = str(jj+1))

            loss = criterion(outputs, targets)
            acc = calculate_accuracy(outputs, targets)

            losses.update(loss.data.item(), targets.detach().size(0))
            accuracies.update(acc, targets.detach().size(0))
            one_hot = torch.zeros_like(outputs).cuda().scatter_(1, targets.view(-1, 1), 1)
            mmap.add(outputs.detach(), one_hot.detach())
            top.add(outputs.detach(), targets.detach())
            AP.add(outputs.detach(), one_hot.detach())
        batch_time.update(time.time() - end_time)
        end_time = time.time()
        print('Epoch: [{0}][{1}/{2}]\t'
              'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
              'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
              'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
              'Acc {acc.val:.3f} ({acc.avg:.3f})\t'
              'mmap {mmap}\t'
              'top1 3 5: {top}\t'.format(
            epoch,
            i + 1,
            len(data_loader),
            batch_time=batch_time,
            data_time=data_time,
            loss=losses,
            acc=accuracies,
            mmap=mmap.value(),
            top=top.value() ))

    vis.text("gpu:{}, epoch: {},loss: {},accu:{},mAP:{}, top135 {}\nAP:{}".format(torch.cuda.current_device(),epoch,losses.avg,accuracies.avg,mmap.value(),top.value(),AP.value())
    ,win=vallogwindow,append=True)
    #exit()
    #if epoch==10:
    #    exit()
    return losses.avg, mmap.value()
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, args):
    """Training loop for one epoch."""
    losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()),
                          (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())])

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    # For Early Exit, we define statistics for each exit
    # So exiterrors is analogous to classerr for the non-Early Exit case
    if args.earlyexit_lossweights:
        args.exiterrors = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)', total_samples, batch_size)
    epoch_frac = args.partial_epoch
    steps_per_frac_epoch = math.ceil((total_samples*epoch_frac) / batch_size)

    # Switch to train mode
    model.train()
    end = time.time()

    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)
        inputs, target = inputs.to('cuda'), target.to('cuda')

        if train_step == steps_per_frac_epoch:
            break
        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step, steps_per_epoch, optimizer)

        if args.kd_policy is None:
            output = model(inputs)
        else:
            output = args.kd_policy.forward(inputs)
        if not args.earlyexit_lossweights:
            # ------------------------------------------------------------------ AHMED edit sin2-reg - April19
            """ adding sin2 regularization here"""
            qbits_dict = {}
            sin2_reg_loss = 0
            #print('weights:', (model.module.conv2.weight.size()))
            bw = 3
            qbits_dict['conv1'] = bw
            qbits_dict['conv2'] = bw
            qbits_dict['fc1'] = bw
            qbits_dict['fc2'] = bw
            qbits_dict['fc3'] = bw

            # ---------------------
            #kernel = model.module.features[0].float_weight
            kernel1 = model.module.conv1.weight
            kernel2 = model.module.conv2.weight
            kernel3 = model.module.fc1.weight
            kernel4 = model.module.fc2.weight
            kernel5 = model.module.fc3.weight
            last_epoch = 999
            if (train_step == last_epoch):
                w1 = kernel1.data.cpu().numpy()
                w2 = kernel2.data.cpu().numpy()
                w3 = kernel3.data.cpu().numpy()
                w4 = kernel4.data.cpu().numpy()
                w5 = kernel5.data.cpu().numpy()
                np.save('weights_sin2Reg/cifar10_L1_weights'+str(last_epoch), w1)
                np.save('weights_sin2Reg/cifar10_L2_weights'+str(last_epoch), w2)
                np.save('weights_sin2Reg/cifar10_L3_weights'+str(last_epoch), w3)
                np.save('weights_sin2Reg/cifar10_L4_weights'+str(last_epoch), w4)
                np.save('weights_sin2Reg/cifar10_L5_weights'+str(last_epoch), w5)
                print('++++saving weights+++++++++++++++++++++++++++')
            # ---------------------



            # ----------------------------------
            q = 2
            power = 2
            step = 1/(2**(q)-0.5) # dorefa 
            shift = step/2 

            #step = 1/(2**(q)-1) # wrpn
            #shift = 0 
            
            #amplitude   = (np.sin(pi*(weight+step/2)/(step)))**2
            
            step = 1/(2**(model.module.B1.clone())-0.5) # dorefa 
            #step = 1/(2**(5)-0.5) # dorefa 
            shift = step/2 
            #kernel = model.module.conv1.float_weight
            kernel = model.module.conv1.weight
            #sin2_func_1 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['conv1']))-1)),2))
            sin2_func_1 =torch.mean((torch.sin(pi*(kernel+shift)/(step)))**power)  # dorefa
            #print(sin2_func_1.data[0])

            step = 1/(2**(model.module.B2.clone())-0.5) # dorefa 
            #step = 1/(2**(3)-0.5) # dorefa 
            shift = step/2 
            #kernel = model.module.conv2.float_weight
            kernel = model.module.conv2.weight
            #sin2_func_2 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['conv2']))-1)),2))
            sin2_func_2 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa

            step = 1/(2**(model.module.B3.clone())-0.5) # dorefa 
            #step = 1/(2**(3)-0.5) # dorefa 
            shift = step/2 
            #kernel = model.module.fc1.float_weight
            kernel = model.module.fc1.weight
            #sin2_func_3 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['fc1']))-1)),2))
            sin2_func_3 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa

            step = 1/(2**(model.module.B4.clone())-0.5) # dorefa 
            #step = 1/(2**(3)-0.5) # dorefa 
            shift = step/2 
            #kernel = model.module.fc2.float_weight
            kernel = model.module.fc2.weight
            #sin2_func_4 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['fc2']))-1)),2))
            sin2_func_4 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa

            step = 1/(2**(model.module.B5.clone())-0.5) # dorefa 
            #step = 1/(2**(4)-0.5) # dorefa 
            shift = step/2 
            #kernel = model.module.fc3.float_weight
            kernel = model.module.fc3.weight
            #sin2_func_5 = torch.mean(torch.pow(torch.sin(pi*kernel/(2**(-(qbits_dict['fc3']))-1)),2))
            sin2_func_5 = torch.mean(torch.pow(torch.sin(pi*(kernel+shift)/step),power)) # dorefa

            # ----------------------------------

            sin2_reg_loss = sin2_func_1 + sin2_func_2 + sin2_func_3 + sin2_func_4 + sin2_func_5 
            freq_loss = model.module.B1 + model.module.B2 + model.module.B3 + model.module.B4 + model.module.B5
            #sin2_reg_loss =  sin2_func_1 + sin2_func_3 + sin2_func_4  

            #loss = criterion(output, target) 

            
            """ settings 0 """ 
            #if train_step > 100: 
            #   lambda_q = 1
            #   lambda_f = 0.05
            #else:
            #   lambda_q = 0
            #   lambda_f = 0
            
            """ settings 1 """ 
            #lambda_q = (1/torch.exp(torch.tensor(4.0))).to('cuda')*torch.exp(torch.tensor(4*int(epoch)/1000)).to('cuda')# rising1
            #lambda_f =  0.05
            #lambda_qp = (1/np.exp(4))*torch.exp(torch.from_numpy(np.array(4*epoch/500))).cpu().numpy().data # rising1
            #lambda_fp = lambda_f
            
            """ settings  2:  step-like lambda """
            r = 0.2*args.epochs
            d = 0.8*args.epochs
            s = 20
            f1 = 0.5 * (1+torch.tanh(torch.tensor((epoch-r)/s).to('cuda')));
            f2 = 0.5 * (1+torch.tanh(torch.tensor((epoch-d)/s).to('cuda')));
            lambda_q = f1
            #lambda_f_value = 0.02*(f1-f2)
            lambda_f = 0.03
            
            reg_loss = lambda_q * sin2_reg_loss
            loss = criterion(output, target) + reg_loss + (lambda_f * freq_loss)

            #print('sin2_reg_LOSS:', sin2_reg_loss.data[0])
            #print('total_LOSS:', loss.data[0])
            #print('MODEL:', (model.state_dict()))
            # ------------------------------------------------------------------ AHMED edit sin2-reg - April19
            # Measure accuracy and record loss
            classerr.add(output.data, target)
        else:
            # Measure accuracy and record loss
            loss = earlyexit_loss(output, target, criterion, args)
        losses[OBJECTIVE_LOSS_KEY].add(loss.item())
        #print('sin2_reg_LOSS:', sin2_reg_loss.data[0])

        if compression_scheduler:
            # Before running the backward phase, we allow the scheduler to modify the loss
            # (e.g. add regularization loss)
            agg_loss = compression_scheduler.before_backward_pass(epoch, train_step, steps_per_epoch, loss,
                                                                  optimizer=optimizer, return_loss_components=True)
            loss = agg_loss.overall_loss
            losses[OVERALL_LOSS_KEY].add(loss.item())
            for lc in agg_loss.loss_components:
                if lc.name not in losses:
                    losses[lc.name] = tnt.AverageValueMeter()
                losses[lc.name].add(lc.value.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward(retain_graph=True)
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step, steps_per_epoch, optimizer)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step+1)

        if steps_completed % args.print_freq == 0:
            # Log some statistics
            errs = OrderedDict()
            if not args.earlyexit_lossweights:
                errs['Top1'] = classerr.value(1)
                errs['Top5'] = classerr.value(5)
            else:
                # for Early Exit case, the Top1 and Top5 stats are computed for each exit.
                for exitnum in range(args.num_exits):
                    errs['Top1_exit' + str(exitnum)] = args.exiterrors[exitnum].value(1)
                    errs['Top5_exit' + str(exitnum)] = args.exiterrors[exitnum].value(5)

            stats_dict = OrderedDict()
            for loss_name, meter in losses.items():
                stats_dict[loss_name] = meter.mean
            stats_dict.update(errs)
            stats_dict['LR'] = optimizer.param_groups[0]['lr']
            stats_dict['Time'] = batch_time.mean
            stats = ('Peformance/Training/', stats_dict)

            params = model.named_parameters() if args.log_params_histograms else None
            distiller.log_training_progress(stats,
                                            params,
                                            epoch, steps_completed,
                                            steps_per_epoch, args.print_freq,
                                            loggers)
        end = time.time()

    kernel = model.module.conv1.weight
    #kernel = model.module.conv1.float_weight
    print('00000000000000000000')
    w1 = kernel.data.cpu().numpy()
    np.save('w1_cifar', w1)

    print('======================================', reg_loss.data[0])
    print('learned bitwidths', model.module.B1.data.cpu().numpy()[0], model.module.B2.data.cpu().numpy()[0], model.module.B3.data.cpu().numpy()[0], model.module.B4.data.cpu().numpy()[0], model.module.B5.data.cpu().numpy()[0])
from torch.autograd import Variable as V

from torchnet import meter
from config.config import cfg
from util.visualize import Visualizer
from util.show_masked_image import show_masked_image
from mmcv.runner import save_checkpoint, load_checkpoint
import cv2
from util.show_masked_image import tensor_to_np
import numpy as np
#cfg.merge_from_file("config/un_att_pascal_0001.yaml")
cfg.freeze()  # 冻结参数
vis = Visualizer("newvis", port=8097)
AP = meter.APMeter()
mAP = meter.mAPMeter()
top3 = meter.ClassErrorMeter(topk=[1, 3, 5], accuracy=True)

Loss_meter = meter.AverageValueMeter()
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2"
num = 30


def visualize_func(result):
    pass


def inverse_normalize(img):
    #if opt.caffe_pretrain:
    #    img = img + (np.array([122.7717, 115.9465, 102.9801]).reshape(3, 1, 1))
    #    return img[::-1, :, :]
    # approximate un-normalize for visualize
def main():
    if not os.path.exists(opt.save):
        os.mkdir(opt.save)

    if opt.scat > 0:
        model, params, stats = models.__dict__[opt.model](N=opt.N, J=opt.scat)
    else:
        model, params, stats = models.__dict__[opt.model]()

    def create_optimizer(opt, lr):
        print('creating optimizer with lr = %f' % lr)
        return torch.optim.SGD(params.values(),
                               lr,
                               opt.momentum,
                               weight_decay=opt.weightDecay)

    def get_iterator(mode):
        ds = create_dataset(opt, mode)
        return ds.parallel(batch_size=opt.batchSize,
                           shuffle=mode,
                           num_workers=opt.nthread,
                           pin_memory=False)

    optimizer = create_optimizer(opt, opt.lr)

    iter_test = get_iterator(False)
    iter_train = get_iterator(True)

    if opt.scat > 0:
        scat = Scattering(M=opt.N, N=opt.N, J=opt.scat, pre_pad=False).cuda()

    epoch = 0
    if opt.resume != '':
        resumeFile = opt.resume
        if not resumeFile.endswith('pt7'):
            resumeFile = torch.load(opt.resume + '/latest.pt7')['latest_file']
            state_dict = torch.load(resumeFile)
            epoch = state_dict['epoch']
            params_tensors, stats = state_dict['params'], state_dict['stats']
            for k, v in params.iteritems():
                v.data.copy_(params_tensors[k])
            optimizer.load_state_dict(state_dict['optimizer'])
            print('model was restored from epoch:', epoch)

    print('\nParameters:')
    print(
        pd.DataFrame([(key, v.size(), torch.typename(v.data))
                      for key, v in params.items()]))
    print('\nAdditional buffers:')
    print(
        pd.DataFrame([(key, v.size(), torch.typename(v))
                      for key, v in stats.items()]))
    n_parameters = sum(
        [p.numel() for p in list(params.values()) + list(stats.values())])
    print('\nTotal number of parameters: %f' % n_parameters)

    meter_loss = meter.AverageValueMeter()
    classacc = meter.ClassErrorMeter(topk=[1, 5], accuracy=False)
    timer_data = meter.TimeMeter('s')
    timer_sample = meter.TimeMeter('s')
    timer_train = meter.TimeMeter('s')
    timer_test = meter.TimeMeter('s')

    def h(sample):
        inputs = sample[0].cuda()
        if opt.scat > 0:
            inputs = scat(inputs)
        inputs = Variable(inputs)
        targets = Variable(sample[1].cuda().long())
        if sample[2]:
            model.train()
        else:
            model.eval()
        y = torch.nn.parallel.data_parallel(model, inputs,
                                            np.arange(opt.ngpu).tolist())
        return F.cross_entropy(y, targets), y

    def log(t, state):
        if (t['epoch'] > 0 and t['epoch'] % opt.frequency_save == 0):
            torch.save(
                dict(params={k: v.data.cpu()
                             for k, v in params.iteritems()},
                     stats=stats,
                     optimizer=state['optimizer'].state_dict(),
                     epoch=t['epoch']),
                open(os.path.join(opt.save, 'epoch_%i_model.pt7' % t['epoch']),
                     'w'))
            torch.save(
                dict(
                    latest_file=os.path.join(opt.save, 'epoch_%i_model.pt7' %
                                             t['epoch'])),
                open(os.path.join(opt.save, 'latest.pt7'), 'w'))

        z = vars(opt).copy()
        z.update(t)
        logname = os.path.join(opt.save, 'log.txt')
        with open(logname, 'a') as f:
            f.write('json_stats: ' + json.dumps(z) + '\n')
        print(z)

    def on_sample(state):
        global data_time
        data_time = timer_data.value()
        timer_sample.reset()
        state['sample'].append(state['train'])

    def on_forward(state):
        prev_sum5 = classacc.sum[5]
        prev_sum1 = classacc.sum[1]
        classacc.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

        next_sum5 = classacc.sum[5]
        next_sum1 = classacc.sum[1]
        n = state['output'].data.size(0)
        curr_top5 = 100.0 * (next_sum5 - prev_sum5) / n
        curr_top1 = 100.0 * (next_sum1 - prev_sum1) / n
        sample_time = timer_sample.value()
        timer_data.reset()
        if (state['train']):
            txt = 'Train:'
        else:
            txt = 'Test'
        if (state['t'] % opt.frequency_print == 0 and state['t'] > 0):
            print(
                '%s [%i,%i/%i] ; loss: %.3f (%.3f) ; acc5: %.2f (%.2f) ; acc1: %.2f (%.2f) ; data %.3f ; time %.3f'
                % (txt, state['epoch'], state['t'] % len(state['iterator']),
                   len(state['iterator']), state['loss'].data[0],
                   meter_loss.value()[0], curr_top5, classacc.value(5),
                   curr_top1, classacc.value(1), data_time, sample_time))

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()

        state['iterator'] = iter_train

        epoch = state['epoch'] + 1
        if epoch in epoch_step:
            print('changing LR')
            lr = state['optimizer'].param_groups[0]['lr']
            state['optimizer'] = create_optimizer(opt, lr * opt.lr_decay_ratio)

    def on_end_epoch(state):
        if (state['t'] % opt.frequency_test == 0 and state['t'] > 0):
            train_loss = meter_loss.value()
            train_acc = classacc.value()
            train_time = timer_train.value()
            meter_loss.reset()
            classacc.reset()
            timer_test.reset()

            engine.test(h, iter_test)

            log(
                {
                    "train_loss": train_loss[0],
                    "train_acc": 100 - train_acc[0],
                    "test_loss": meter_loss.value()[0],
                    "test_acc": 100 - classacc.value()[0],
                    "epoch": state['epoch'],
                    "n_parameters": n_parameters,
                    "train_time": train_time,
                    "test_time": timer_test.value(),
                }, state)

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.train(h, iter_train, opt.epochs, optimizer)
Beispiel #24
0
def _validate(data_group, model, criterion, device):
    # Open source accelerate package!
    classerr = tnt.ClassErrorMeter(accuracy=True, topk=[1, 5])  # Remove top 5.
    losses = {'objective_loss': tnt.AverageValueMeter()}
    """
    if _is_earlyexit(args):
        # for Early Exit, we have a list of errors and losses for each of the exits.
        args.exiterrors = []
        args.losses_exits = []
        for exitnum in range(args.num_exits):
            args.exiterrors.append(tnt.ClassErrorMeter(accuracy=True, topk=(1, 5)))
            args.losses_exits.append(tnt.AverageValueMeter())
        args.exit_taken = [0] * args.num_exits
    """
    batch_time = tnt.AverageValueMeter()
    total_samples = len(dataloaders[data_group].sampler)
    batch_size = dataloaders[data_group].batch_size
    total_steps = total_samples / batch_size
    # Display confusion option should be implmented in the near future.
    """
    if args.display_confusion:
        confusion = tnt.ConfusionMeter(args.num_classes
    """

    # Turn into evaluation model.
    model.eval()
    end = time.time()
    # Starting primiary teating code here.
    with torch.no_grad():
        for validation_step, data in enumerate(dataloaders[data_group]):
            inputs = data[0].to(device)
            labels = data[1].to(device)
            output = model(inputs)

            # Neglect elary exist mode in the first version.
            '''
            if not _is_earlyexit(args):
                # compute loss
                loss = criterion(output, target)
                # measure accuracy and record loss
                losses['objective_loss'].add(loss.item())
                classerr.add(output.detach(), target)
                if args.display_confusion:
                    confusion.add(output.detach(), target)
            else:
                earlyexit_validate_loss(output, target, criterion, args)
            '''

            loss = criterion(output, labels)
            losses['objective_loss'].add(loss.item())
            classerr.add(output.detach(), labels)
            steps_completed = (validation_step + 1)

            batch_time.add(time.time() - end)
            end = time.time()
            steps_completed = (validation_step + 1)
            #Record log using _log_validation_progress function
            # "\033[0;37;40m\tExample\033[0m"
            if steps_completed % 200. == 0:
                print('Test [{:5d}/{:5d}] \033[0;37;41mLoss {:.5f}\033[0'
                      '\033[0;37;42m\tTop1 {:.5f}  Top5 {:.5f}\033[m'
                      '\tTime {:.5f}.'.format(steps_completed,
                                              int(total_steps),
                                              losses['objective_loss'].mean,
                                              classerr.value(1),
                                              classerr.value(5),
                                              batch_time.mean))

        print('==> \033[0;37;42mTop1 {:.5f}  Top5 {:.5f}\033[m'
              '\033[0;37;41m\tLoss: {:.5f}\n\033[m.'.format(
                  classerr.value(1), classerr.value(5),
                  losses['objective_loss'].mean))

    return classerr.value(1), classerr.value(5), losses['objective_loss'].mean
Beispiel #25
0
def train(train_loader, model, criterion, optimizer, epoch,
          compression_scheduler, loggers, print_freq, log_params_hist, teacher_model=None,
          temperature_distillation=2, weight_distillation_loss=0.7):
    """Training loop for one epoch. If teacher_model is not None, distillation will be used"""
    losses = {'objective_loss':   tnt.AverageValueMeter(),
              'regularizer_loss': tnt.AverageValueMeter()}
    if compression_scheduler is None:
        # Initialize the regularizer loss to zero
        losses['regularizer_loss'].add(0)

    if teacher_model is not None:
        softmax_function = nn.Softmax(dim=1).cuda()
        log_softmax_function = nn.LogSoftmax(dim=1).cuda()
        kldiv_loss = nn.KLDivLoss(size_average=False).cuda()  # see https://github.com/pytorch/pytorch/issues/6622

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=(1, 5))
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    total_samples = len(train_loader.sampler)
    batch_size = train_loader.batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)
    msglogger.info('Training epoch: %d samples (%d per mini-batch)', total_samples, batch_size)

    # Switch to train mode
    model.train()
    end = time.time()

    for train_step, (inputs, target) in enumerate(train_loader):
        # Measure data loading time
        data_time.add(time.time() - end)

        target = target.cuda(async=True)
        input_var = torch.autograd.Variable(inputs)
        target_var = torch.autograd.Variable(target)

        # Execute the forward phase, compute the output and measure loss
        if compression_scheduler:
            compression_scheduler.on_minibatch_begin(epoch, train_step, steps_per_epoch, optimizer)
        output = model(input_var)
        loss = criterion(output, target_var)
        if teacher_model is not None:
            with PytorchNoGrad():
                input_var_teacher = get_inference_var(inputs)
                output_teacher = teacher_model(input_var_teacher)
            loss_distilled = (temperature_distillation**2) * kldiv_loss(
                log_softmax_function(output / temperature_distillation),
                softmax_function(output_teacher / temperature_distillation)) / output.size(0)
            loss = weight_distillation_loss*loss_distilled + (1-weight_distillation_loss)*loss

        # Measure accuracy and record loss
        classerr.add(output.data, target)
        losses['objective_loss'].add(loss.item())

        if compression_scheduler:
            # Before running the backward phase, we add any regularization loss computed by the scheduler
            regularizer_loss = compression_scheduler.before_backward_pass(epoch, train_step, steps_per_epoch, loss, optimizer)
            loss += regularizer_loss
            losses['regularizer_loss'].add(regularizer_loss.item())

        # Compute the gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if compression_scheduler:
            compression_scheduler.on_minibatch_end(epoch, train_step, steps_per_epoch, optimizer)

        # measure elapsed time
        batch_time.add(time.time() - end)
        steps_completed = (train_step+1)

        if steps_completed % print_freq == 0:
            # Log some statistics
            lr = optimizer.param_groups[0]['lr']
            stats = ('Peformance/Training/',
                     OrderedDict([
                         ('Loss', losses['objective_loss'].mean),
                         ('Reg Loss', losses['regularizer_loss'].mean),
                         ('Top1', classerr.value(1)),
                         ('Top5', classerr.value(5)),
                         ('LR', lr),
                         ('Time', batch_time.mean)]))

            distiller.log_training_progress(stats,
                                            model.named_parameters() if log_params_hist else None,
                                            epoch, steps_completed,
                                            steps_per_epoch, print_freq,
                                            loggers)
        end = time.time()
Beispiel #26
0
def light_train_with_distiller(model,
                               criterion,
                               optimizer,
                               compress_scheduler,
                               device,
                               epoch=1):

    total_samples = dataset_sizes['train']
    batch_size = dataloaders["train"].batch_size
    steps_per_epoch = math.ceil(total_samples / batch_size)

    classerr = tnt.ClassErrorMeter(accuracy=True, topk=[
        1, 5
    ])  # It seems that binary can not use top5 accuracy (topk=[1,5]).
    batch_time = tnt.AverageValueMeter()
    data_time = tnt.AverageValueMeter()

    OVERALL_LOSS_KEY = 'Overall Loss'
    OBJECTIVE_LOSS_KEY = 'Objective Loss'

    losses = OrderedDict([(OVERALL_LOSS_KEY, tnt.AverageValueMeter()),
                          (OBJECTIVE_LOSS_KEY, tnt.AverageValueMeter())])

    model.train()
    acc_stats = []
    end = time.time()
    for train_step, data in enumerate(dataloaders["train"], 0):
        inputs = data[0].to(device)
        labels = data[1].to(device)

        if compress_scheduler:
            compress_scheduler.on_minibatch_begin(epoch, train_step,
                                                  steps_per_epoch, optimizer)
        output = model(inputs)
        loss = criterion(output, labels)

        # Drop the early exist mode in this first version
        classerr.add(output.detach(), labels)
        acc_stats.append([classerr.value(1), classerr.value(5)])
        losses[OBJECTIVE_LOSS_KEY].add(loss.item())
        """
        if not early_exit_mode(args):
        loss = criterion(output, target)
        # Measure accuracy
        classerr.add(output.detach(), target)
        acc_stats.append([classerr.value(1), classerr.value(5)])
        else:
        # Measure accuracy and record loss
        classerr.add(output[args.num_exits-1].detach(), target) # add the last exit (original exit)
        loss = earlyexit_loss(output, target, criterion, args)
        """

        if compress_scheduler:
            agg_loss = compress_scheduler.before_backward_pass(
                epoch,
                train_step,
                steps_per_epoch,
                loss,
                optimizer=optimizer,
                return_loss_components=True)
            # should by modified, this may incorporated in the future.
            loss = agg_loss.overall_loss
            """
            for lc in agg_loss.loss_components:
                if lc.name not in losses:
                    losses[lc.name] = tnt.AverageValueMeter()
                losses[lc.name].add(lc.value.item())
            """
            loss = agg_loss.overall_loss
            losses[OVERALL_LOSS_KEY].add(loss.item())

            for lc in agg_loss.loss_components:
                if lc.name not in losses:
                    losses[lc.name] = tnt.AverageValueMeter()
                losses[lc.name].add(lc.value.item())
        else:
            losses[OVERALL_LOSS_KEY].add(loss.item())

        optimizer.zero_grad()
        loss.backward()
        if compress_scheduler:
            compress_scheduler.before_parameter_optimization(
                epoch, train_step, steps_per_epoch, optimizer)
        optimizer.step()
        if compress_scheduler:
            compress_scheduler.on_minibatch_end(epoch, train_step,
                                                steps_per_epoch, optimizer)
        batch_time.add(time.time() - end)
        steps_completed = (train_step + 1)

        # "\033[0;37;40m\tExample\033[0m"
        if steps_completed % 1000 == 0:
            print(
                'Epoch: [{}][{:5d}/{:5d}]  \033[0;37;41mOverall Loss {:.5f}  Objective Loss {:.5f}\033[0m'
                '\033[0;37;42m\tTop 1 {:.5f}  Top 5 {:.5f}\033[0m'
                '\033[0;37;40m\tLR {:.5f}  Time {:.5f}\033[0m.'.format(
                    epoch, steps_completed, int(steps_per_epoch),
                    losses['Overall Loss'].mean, losses['Objective Loss'].mean,
                    classerr.value(1), classerr.value(5),
                    optimizer.param_groups[0]['lr'], batch_time.mean))
            t, total = summary.weights_sparsity_tbl_summary(
                net, return_total_sparsity=True)
            print('Total sparsity: {:0.2f}\n'.format(total))
            #df = summary.masks_sparsity_tbl_summary(net, compress_scheduler)
            #print(df)
        end = time.time()

    return classerr.value(1), classerr.value(5), losses[
        OVERALL_LOSS_KEY]  # classerr.vlaue(5)
Beispiel #27
0
def train(args, model, dataloaders, criterion, optimizer, scheduler, logger, epochs=25, is_inception=False):
    """
    args: 从键盘接收的参数
    model: 将被训练的模型
    dataloaders: 数据加载器
    criterion: 损失函数
    optimizer: 训练时的优化器
    scheduler: 学习率调整机制
    logger: 日志
    epochs: 训练周期数
    is_inception: 是否为inception模型的标志
    """

    # 训练周期数
    epochs = epochs or args.epochs

    # 模型保存地址
    if args.pretrained and args.feature:
        mode = "feature_extractor" # pretrained=True, feature=True
    elif args.pretrained and not args.feature:
        mode = "fine_tuning" # pretrained=True, feature=False
    else:
        mode = "from_scratch" # pretrained=False, feature=False
    # 模型保存地址
    model_path = Path(args.output) / args.arch / mode / "model.pt"
    # 准确率最好的模型保存地址
    best_modelpath = Path(args.output) / args.arch / mode / "bestmodel.pt"

    # 断点训练
    if (model_path.exists()):
        state = torch.load(str(model_path))

        epoch = state["epoch"]
        model.load_state_dict(state["model"])
        best_acc = state["best_acc"]

        logger.info("Loading epoch {} checkpoint ...".format(epoch))
        print("Restored model, epoch {}".format(epoch))
    else:
        epoch = 0
        best_acc = float('inf')

    # save匿名函数,使用的时候就调用save(ep)
    save = lambda epoch: torch.save({
        "model":model.state_dict(),
        "epoch":epoch,
        "best_acc": best_acc,
        }, str(model_path))

    # 训练指标
    running_loss_meter = meter.AverageValueMeter() # 平均值loss
    # running_acc_meter = meter.mAPMeter() # 所有类的平均正确率
    running_acc_meter = meter.ClassErrorMeter(topk=[1], accuracy=True) # 准确率
    time_meter = meter.TimeMeter(unit=True)  # 测量训练时间

    # 结果记录文件
    resultpath = Path(args.output) / args.arch / mode / "train_result.pkl"
    result_writer = ResultsWriter(resultpath, overwrite=False)

    for epoch in range(epoch, epochs):
        print("Epoch {}/{}".format(epoch, epochs-1))
        print("-" * 10)

        # 每个epoch都有一个训练和验证阶段
        for phase in ["train", "val"]:
            if phase == "train":
                model.train() # Set model to training mode
            else:
                model.eval() # Set model to evaluate mode

            # 每个epoch的train和val阶段分别重置
            running_loss_meter.reset()
            running_acc_meter.reset()

            random.seed(args.seed)
            tq = tqdm.tqdm(total=len(dataloaders[phase].datasets))
            tq.set_description("{} for Epoch {}/{}".format(phase, epoch+1, epochs))

            try:
                # 迭代数据
                for inputs, labels in dataloaders[phase]:
                    # 将输入和标签放入gpu或者cpu中
                    inputs = inputs.cuda() if torch.cuda.is_available() else inputs
                    labels = labels.cuda() if torch.cuda.is_available() else labels

                    # 零参数梯度
                    optimizer.zero_grad()

                    # 前向
                    # track history if only in train
                    with torch.set_grad_enabled(phase=="train"):
                        # inception的训练和验证有区别
                        if is_inception and phase == "train":
                            outputs, aux_outputs = model(inpus)
                            loss1 = criterion(outputs, labels)
                            loss2 = criterion(aux_outputs, labels)
                            loss = loss1 + 0.4 * loss2
                        else:
                            outputs = model(inputs)
                            loss = criterion(outputs, labels) # 计算loss

                        # backward + optimize only if in training phase
                        if phase == "train":
                            # 反向传播
                            loss.backward()
                            # 更新权值参数
                            optimizer.step()

                    tq.update(inputs.size(0))

                    # 一次迭代(step)的更新
                    running_loss_meter.add(loss.item())
                    running_acc_meter.add(F.softmax(output.detach(), dim=1), labels.detach())

                # 学习率调整(按epoch调整)
                if phase == "train":
                    # 更新学习率
                    scheduler.step()
                    save(epoch+1)

                tq.close()
                print("{} Loss: {:.4f} Acc: {:.4f}".format(phase, running_loss_meter.value()[0], running_acc_meter.value()))

                # copy the bestmodel
                if phase == "val" and running_acc_meter.value() > best_acc:
                    best_acc = running_acc_meter.value()
                    shutil.copy(str(model_path), str(best_modelpath))

                """记录epoch的loss和acc,不记录step的"""
                # 记录到日志中
                logger.info("\n phase: {phase}, epoch: {epoch}, lr: {lr}, loss: {loss}, acc: {acc}".format(
                    phase = phase, epoch = epoch+1, lr = scheduler.get_lr(),
                    loss = running_loss_meter.value()[0], acc = running_acc_meter.value()))

                # ResultWriter记录
                result_writer.update(epoch, {"phase":phase, "loss": running_loss_meter.value()[0],
                    "acc":running_acc_meter.value()})

            except KeyboardInterrupt:
                tq.close()
                print("Ctrl+C", saving snapshot)
                save(epoch)

        print()

    # 训练所用时间
    time_elapsed = time_meter.value()
    print("Training complete in {:.0f}m {:.0f}s".format(time_elapsed, time_elapsed))
    print("Best val Acc: {:.4f}".format(best_acc))
Beispiel #28
0
def main():
    model, params, stats = models.__dict__[opt.model](N=opt.N, J=opt.scat)

    iter_test = get_iterator(False, opt)

    scat = Scattering(M=opt.N, N=opt.N, J=opt.scat, pre_pad=False).cuda()

    epoch = 0
    if opt.resume != '':
        resumeFile = opt.resume
        if not resumeFile.endswith('pt7'):
            resumeFile = torch.load(opt.resume + '/latest.pt7')['latest_file']
        state_dict = torch.load(resumeFile)

        model.load_state_dict(state_dict['state_dict'])
        print('model was restored from epoch:', epoch)

    print('\nParameters:')
    print(
        pd.DataFrame([(key, v.size(), torch.typename(v.data))
                      for key, v in params.items()]))
    print('\nAdditional buffers:')
    print(
        pd.DataFrame([(key, v.size(), torch.typename(v))
                      for key, v in stats.items()]))
    n_parameters = sum(
        [p.numel() for p in list(params.values()) + list(stats.values())])
    print('\nTotal number of parameters: %f' % n_parameters)

    meter_loss = meter.AverageValueMeter()
    classacc = meter.ClassErrorMeter(topk=[1, 5], accuracy=False)
    timer_data = meter.TimeMeter('s')
    timer_sample = meter.TimeMeter('s')
    timer_train = meter.TimeMeter('s')
    timer_test = meter.TimeMeter('s')

    def h(sample):
        inputs = sample[0].cuda()
        if opt.scat > 0:
            inputs = scat(inputs)
        inputs = Variable(inputs)
        targets = Variable(sample[1].cuda().long())
        if sample[2]:
            model.train()
        else:
            model.eval()

    # y = model.forward(inputs)
        y = torch.nn.parallel.data_parallel(model, inputs,
                                            np.arange(opt.ngpu).tolist())
        return F.cross_entropy(y, targets), y

    def on_sample(state):
        global data_time
        data_time = timer_data.value()
        timer_sample.reset()
        state['sample'].append(state['train'])

    def on_forward(state):
        prev_sum5 = classacc.sum[5]
        prev_sum1 = classacc.sum[1]
        classacc.add(state['output'].data,
                     torch.LongTensor(state['sample'][1]))
        meter_loss.add(state['loss'].data[0])

        next_sum5 = classacc.sum[5]
        next_sum1 = classacc.sum[1]
        n = state['output'].data.size(0)
        curr_top5 = 100.0 * (next_sum5 - prev_sum5) / n
        curr_top1 = 100.0 * (next_sum1 - prev_sum1) / n
        sample_time = timer_sample.value()
        timer_data.reset()
        if (state['train']):
            txt = 'Train:'
        else:
            txt = 'Test'

        print(
            '%s [%i,%i/%i] ; loss: %.3f (%.3f) ; err5: %.2f (%.2f) ; err1: %.2f (%.2f) ; data %.3f ; time %.3f'
            % (txt, state['epoch'], state['t'] % len(state['iterator']),
               len(state['iterator']), state['loss'].data[0],
               meter_loss.value()[0], curr_top5, classacc.value(5), curr_top1,
               classacc.value(1), data_time, sample_time))

    def on_start(state):
        state['epoch'] = epoch

    def on_start_epoch(state):
        classacc.reset()
        meter_loss.reset()
        timer_train.reset()

        epoch = state['epoch'] + 1

    def on_end_epoch(state):
        train_loss = meter_loss.value()
        train_acc = classacc.value()
        train_time = timer_train.value()
        meter_loss.reset()
        classacc.reset()
        timer_test.reset()

        engine.test(h, iter_test)

    engine = Engine()
    engine.hooks['on_sample'] = on_sample
    engine.hooks['on_forward'] = on_forward
    engine.hooks['on_start_epoch'] = on_start_epoch
    engine.hooks['on_end_epoch'] = on_end_epoch
    engine.hooks['on_start'] = on_start
    engine.test(h, iter_test)
    print(classacc.value())
Beispiel #29
0
def train(**kwargs):
    global device, vis
    if opt.seed is not None:
        setup_seed(opt.seed)
    config_str = opt.parse(kwargs)
    device = torch.device("cuda" if opt.use_gpu else "cpu")

    vis = Visualizer(opt.log_dir, opt.model, current_time, opt.title_note)
    # log all configs
    vis.log('config', config_str)

    # load data set
    train_loader, val_loader, num_classes = getattr(dataset, opt.dataset)(opt.batch_size * opt.gpus)
    # load model
    model = getattr(models, opt.model)(lambas=opt.lambas, num_classes=num_classes, weight_decay=opt.weight_decay).to(
        device)

    if opt.gpus > 1:
        model = nn.DataParallel(model)

    # define loss function
    def criterion(output, target_var):
        loss = nn.CrossEntropyLoss().to(device)(output, target_var)
        reg_loss = model.regularization() if opt.gpus <= 1 else model.module.regularization()
        total_loss = (loss + reg_loss).to(device)
        return total_loss

    # load optimizer and scheduler
    if opt.optimizer == 'adam':
        optimizer = torch.optim.Adam(model.parameters() if opt.gpus <= 1 else model.module.parameters(), opt.lr)
        # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=opt.lr_decay, patience=15)
        scheduler = None
        print('Optimizer: Adam, lr={}'.format(opt.lr))
    elif opt.optimizer == 'momentum':
        optimizer = torch.optim.SGD(model.parameters() if opt.gpus <= 1
                                    else model.module.parameters(), opt.lr, momentum=opt.momentum, nesterov=True)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=opt.schedule_milestone,
                                                         gamma=opt.lr_decay)
        print('Optimizer: Momentum, lr={}, momentum'.format(opt.lr, opt.momentum))
    else:
        print('No optimizer')
        return

    loss_meter = meter.AverageValueMeter()
    accuracy_meter = meter.ClassErrorMeter(accuracy=True)
    # create checkpoints dir
    directory = '{}/{}_{}'.format(opt.checkpoints_dir, opt.model, current_time)
    if not os.path.exists(directory):
        os.makedirs(directory)
    total_steps = 0
    for epoch in range(opt.start_epoch, opt.max_epoch) if opt.verbose else tqdm(range(opt.start_epoch, opt.max_epoch)):
        model.train() if opt.gpus <= 1 else model.module.train()
        loss_meter.reset()
        accuracy_meter.reset()
        for ii, (input_, target) in enumerate(train_loader):
            input_, target = input_.to(device), target.to(device)
            optimizer.zero_grad()
            score = model(input_, target)
            loss = criterion(score, target)
            loss.backward()
            optimizer.step()

            loss_meter.add(loss.cpu().data)
            accuracy_meter.add(score.data, target.data)

            e_fl, e_l0 = model.get_exp_flops_l0() if opt.gpus <= 1 else model.module.get_exp_flops_l0()
            vis.plot('stats_comp/exp_flops', e_fl, total_steps)
            vis.plot('stats_comp/exp_l0', e_l0, total_steps)
            total_steps += 1

            if (model.beta_ema if opt.gpus <= 1 else model.module.beta_ema) > 0.:
                model.update_ema() if opt.gpus <= 1 else model.module.update_ema()

            if ii % opt.print_freq == opt.print_freq - 1:
                vis.plot('train/loss', loss_meter.value()[0])
                vis.plot('train/accuracy', accuracy_meter.value()[0])
                if opt.verbose:
                    print("epoch:{epoch},lr:{lr},loss:{loss:.2f},train_acc:{train_acc:.2f}"
                      .format(epoch=epoch, loss=loss_meter.value()[0],
                              train_acc=accuracy_meter.value()[0],
                              lr=optimizer.param_groups[0]['lr']))

        # save model
        if epoch % 10 == 0 or epoch == opt.max_epoch - 1:
            torch.save(model.state_dict(), directory + '/{}.model'.format(epoch))
        # validate model
        val_accuracy, val_loss = val(model, val_loader, criterion)

        vis.plot('val/loss', val_loss)
        vis.plot('val/accuracy', val_accuracy)

        # update lr
        if scheduler is not None:
            if isinstance(optimizer, torch.optim.lr_scheduler.ReduceLROnPlateau):
                scheduler.step(val_loss)
            else:
                scheduler.step(epoch)
        if opt.verbose:
            print("epoch:{epoch},lr:{lr},loss:{loss:.2f},val_acc:{val_acc:.2f},prune_rate:{pr:.2f}"
                  .format(epoch=epoch, loss=loss_meter.value()[0], val_acc=val_accuracy, lr=optimizer.param_groups[0]['lr'],
                          pr=model.prune_rate() if opt.gpus <= 1 else model.module.prune_rate()))
        for (i, num) in enumerate(model.get_expected_activated_neurons() if opt.gpus <= 1
                                  else model.module.get_expected_activated_neurons()):
            vis.plot("Training_layer/{}".format(i), num)
        vis.plot('lr', optimizer.param_groups[0]['lr'])
Beispiel #30
0
def test(models,
         weights,
         gpu_ids,
         iterator,
         topk,
         num_classes,
         enviroment='main'):
    print(
        '=========================Start Testing at {}==========================='
        .format(time.strftime('%c')))

    # TODO: serialization
    classerr_meters = [meter.ClassErrorMeter(topk) for i in models]
    ap_meters = [APMeter(num_classes) for i in models]

    # multiple gpu support
    if gpu_ids is not None:
        for i in range(len(models)):
            models[i].cuda(gpu_ids[0])
            models[i] = torch.nn.DataParallel(models[i], device_ids=gpu_ids)

    # set eval() to freeze running mean and running var
    for m in models:
        m.eval()

    with torch.no_grad():
        for sample in tqdm(iterator):
            # wrap data
            for i in range(2):
                if gpu_ids is not None:
                    sample[i].cuda(gpu_ids[0], non_blocking=True)

            ipt, target = sample[0], sample[1]

            opt = None
            for i in range(len(models)):
                if opt is None:
                    opt = weights[i] * functional.softmax(models[i](ipt))

                else:
                    opt += weights[i] * functional.softmax(models[i](ipt))

                classerr_meters[i].add(opt.data, target.data)
                ap_meters[i].add(opt.data, target.data)

    # sorting w.r.t the first weak learner
    index = numpy.argsort(ap_meters[0].value())

    classerrs = []
    for i in topk:
        classerrs.append([meter.value(i) for meter in classerr_meters])
    ap = [meter.value()[index] for meter in ap_meters]
    ap = numpy.stack(ap)

    x = [
        numpy.linspace(0, num_classes, num=num_classes, endpoint=False)
        for i in ap_meters
    ]
    x = numpy.stack(x)

    vis = visdom.Visdom(server='http://localhost', env=enviroment)
    vis.line(X=x.transpose(), Y=ap.transpose(), opts={'title': 'Class AP'})
    for i in range(len(topk)):
        vis.line(numpy.asarray(classerrs[i]),
                 opts={'title': 'Class Top {} Error'.format(topk[i])})

    print(
        '========================Testing Down at {} ==========================='
        .format(time.strftime('%c')))
    print('******************Top {} Error: {}*****************'.format(
        topk, classerrs))