Exemple #1
0
def run_training(args):

    writer_path = os.path.join('runs', args.exp_desc + '-' + time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime()))
    writer = SummaryWriter(writer_path)

    signsgd_config = {
        'num_bits': args.num_bits,
        'num_bits_weight': args.num_bits_weight,
        'num_bits_grad': args.num_bits_grad,
        'biprecision': args.biprecision,
        'predictive_forward': args.predictive_forward,
        'predictive_backward': args.predictive_backward,
        'msb_bits': args.msb_bits,
        'msb_bits_weight': args.msb_bits_weight,
        'msb_bits_grad': args.msb_bits_grad,
        'threshold': args.threshold,
        'sparsify': args.sparsify,
        'sign': args.sign,
        'writer': writer,
    }

    # create model
    model = models.__dict__[args.arch](args.pretrained, **signsgd_config)
    model.install_gate()
    model = torch.nn.DataParallel(model).cuda()
    best_prec1 = 0

    # optionally resume from a checkpoint
    if args.resume:
        checkpoint = torch.load(args.resume)
        if os.path.isfile(args.resume):
            logging.info('=> loading checkpoint `{}`'.format(args.resume))

            args.start_iter = 0
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'], strict=True)
            # translate(model, checkpoint)
            logging.info('=> loaded checkpoint `{}` (iter: {})'.format(
                args.resume, checkpoint['iter']
            ))
        else:
            logging.info('=> no checkpoint found at `{}`'.format(args.resume))

    cudnn.benchmark = True
    train_loader = prepare_train_data(dataset=args.dataset,
                                      batch_size=args.batch_size,
                                      shuffle=True,
                                      num_workers=args.workers)
    test_loader = prepare_test_data(dataset=args.dataset,
                                    batch_size=args.batch_size,
                                    shuffle=False,
                                    num_workers=args.workers)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                       model.parameters()), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    cp_energy_record = AverageMeter()
    skip_ratios = ListAverageMeter()

    end = time.time()
    dataloader_iterator = iter(train_loader)

    for i in range(0, args.iters):

        rand_flag = random.uniform(0, 1) > 0.5
        model.train()
        adjust_learning_rate(args, optimizer, i)

        try:
            input, target = next(dataloader_iterator)
        except StopIteration:
            dataloader_iterator = iter(train_loader)
            input, target = next(dataloader_iterator)

        # measuring data loading time
        data_time.update(time.time() - end)

        target = target.cuda()
        input_var = Variable(input, requires_grad=True).cuda()
        target_var = Variable(target).cuda()

        # compute output
        if rand_flag:
            optimizer.zero_grad()
            # optimizer.step()
            global skip_count
            skip_count += 1
            continue

        output, masks, _, has_ds = model(input_var)

        # energy_parameter = np.ones(35,)
        energy_parameter = np.ones(len(masks),)
        for iii, flag in enumerate(has_ds):
            if flag:
                energy_parameter[iii] = 0.75
        energy_parameter /= energy_parameter.max()

        energy_cost = 0
        energy_all = 0
        for layer in range(len(energy_parameter)):
            energy_cost += masks[layer].sum() * energy_parameter[layer]
            energy_all += reduce((lambda x, y: x * y), masks[layer].shape) * energy_parameter[layer]

        cp_energy = (energy_cost.item() / energy_all.item()) * 100
        global training_cost
        training_cost += (cp_energy / 100) * 0.51 * args.batch_size
        energy_cost *= args.beta
        if cp_energy <= args.minimum:
        # if cp_energy > args.minimum:
            reg = -1
        else:
            reg = 1
        if args.energy:
            loss = criterion(output, target_var) + energy_cost * reg
        else:
            loss = criterion(output, target_var)

        # collect skip ratio of each layer
        skips = [mask.data.le(0.5).float().mean() for mask in masks]
        if skip_ratios.len != len(skips):
            skip_ratios.set_len(len(skips))

        # measure accuracy and record loss
        prec1, = accuracy(output.data, target, topk=(1,))
        writer.add_scalar('data/train_error', 100 - prec1, i-skip_count)
        writer.add_scalar('data/train_comp_using', cp_energy, i-skip_count)
        writer.add_scalar('data/train_cost_Gops', training_cost, i-skip_count)
        losses.update(loss.data.item(), input.size(0))
        top1.update(prec1.item(), input.size(0))
        cp_energy_record.update(cp_energy, 1)
        skip_ratios.update(skips, input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # repackage hidden units for RNN Gate
        model.module.control.repackage_hidden()

        batch_time.update(time.time() - end)
        end = time.time()

        # print log
        if i % args.print_freq == 0 or i == (args.iters - 1):
            logging.info("Iter: [{0}/{1}]\t"
                         "Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t"
                         "Data {data_time.val:.3f} ({data_time.avg:.3f})\t"
                         "Loss {loss.val:.3f} ({loss.avg:.3f})\t"
                         "Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t"
                         'Energy_ratio: {cp_energy_record.val:.3f}({cp_energy_record.avg:.3f})\t'.format(
                            i,
                            args.iters,
                            batch_time=batch_time,
                            data_time=data_time,
                            loss=losses,
                            top1=top1,
                            cp_energy_record=cp_energy_record)
            )

        # evaluate every 1000 steps
        if (i % args.eval_every == 0 and i > 0) or (i == (args.iters-1)):
            prec1 = validate(args, test_loader, model, criterion)
            writer.add_scalar('data/test_error', 100 - prec1, i-skip_count)
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
            checkpoint_path = os.path.join(args.save_path,
                                           'checkpoint_{:05d}.pth.tar'.format(
                                               i))
            save_checkpoint({
                'iter': i,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            },
                is_best, filename=checkpoint_path)
            shutil.copyfile(checkpoint_path, os.path.join(args.save_path,
                                                          'checkpoint_latest'
                                                          '.pth.tar'))
Exemple #2
0
def validate(args, test_loader, model, criterion):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()
    skip_ratios = ListAverageMeter()
    cp_energy_record = AverageMeter()

    # switch to evaluation mode
    model.eval()
    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            if i == len(test_loader) - 1:
                break
            target = target.cuda()
            input_var = Variable(input).cuda()
            target_var = Variable(target).cuda()
            # compute output
            output, masks, logprobs, has_ds = model(input_var)

            # energy_parameter = np.ones(35, )
            energy_parameter = np.ones(len(masks), )
            for iii, flag in enumerate(has_ds):
                if flag:
                    energy_parameter[iii] = 0.75
            energy_parameter /= energy_parameter.max()

            energy_cost = 0
            energy_all = 0
            for layer in range(len(energy_parameter)):
                energy_cost += masks[layer].sum() * energy_parameter[layer]
                energy_all += reduce((lambda x, y: x * y), masks[layer].shape) * energy_parameter[layer]
            cp_energy = (energy_cost.item() / energy_all.item()) * 100

            skips = [mask.data.le(0.5).float().mean().item() for mask in masks]

            if skip_ratios.len != len(skips):
                skip_ratios.set_len(len(skips))
            loss = criterion(output, target_var)

            # measure accuracy and record loss
            prec1, prec5 = accuracy(output.data, target, topk=(1,5))
            top1.update(prec1.item(), input.size(0))
            top5.update(prec5.item(), input.size(0))
            skip_ratios.update(skips, input.size(0))
            losses.update(loss.data.item(), input.size(0))
            batch_time.update(time.time() - end)
            cp_energy_record.update(cp_energy, 1)
            end = time.time()

            if i % args.print_freq == 0 or (i == (len(test_loader) - 1)):
                logging.info(
                    'Test: [{}/{}]\t'
                    'Time: {batch_time.val:.4f}({batch_time.avg:.4f})\t'
                    'Loss: {loss.val:.3f}({loss.avg:.3f})\t'
                    'Prec@1: {top1.val:.3f}({top1.avg:.3f})\t'
                    'Prec@5: {top1.val:.3f}({top5.avg:.3f})\t'
                    'Energy_ratio: {cp_energy_record.val:.3f}({cp_energy_record.avg:.3f})\t'.format(
                        i, len(test_loader), batch_time=batch_time,
                        loss=losses,
                        top1=top1, top5=top5,
                        cp_energy_record=cp_energy_record,
                    )
                )
        logging.info(' * Prec@1 {top1.avg:.3f}, Loss {loss.avg:.3f}'.format(
            top1=top1, loss=losses))

        skip_summaries = []
        for idx in range(skip_ratios.len):
            skip_summaries.append(1-skip_ratios.avg[idx])
        # compute `computational percentage`
        cp = ((sum(skip_summaries) + 1) / (len(skip_summaries) + 1)) * 100
        logging.info('*** Computation Percentage: {:.3f} %'.format(cp))

    return top1.avg
Exemple #3
0
def forward(data_loader,
            model,
            criterion,
            epoch,
            training,
            model_type,
            optimizer=None,
            writer=None):
    if training:
        model.train()
    else:
        model.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    end = time.time()

    total_steps = len(data_loader)

    for i, (inputs, target) in enumerate(data_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        inputs = inputs.to('cuda:0')
        target = target.to('cuda:0')

        # compute output
        output = model(inputs)
        if model_type == 'int':
            # omit the output exponent
            output, output_exp = output
            output = output.float()
            loss = criterion(output * (2**output_exp.float()), target)
        else:
            output_exp = 0
            loss = criterion(output, target)

        # measure accuracy and record loss
        losses.update(float(loss), inputs.size(0))
        prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5))
        top1.update(float(prec1), inputs.size(0))
        top5.update(float(prec5), inputs.size(0))

        if training:
            if model_type == 'int':
                model.backward(target)

            elif model_type == 'hybrid':
                # float backward
                optimizer.update(epoch, epoch * len(data_loader) + i)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                #int8 backward
                model.backward()
            else:
                optimizer.update(epoch, epoch * len(data_loader) + i)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.log_interval == 0 and training:
            logging.info('{model_type} [{0}][{1}/{2}] '
                         'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                         'Data {data_time.val:.2f} '
                         'loss {loss.val:.3f} ({loss.avg:.3f}) '
                         'e {output_exp:d} '
                         '@1 {top1.val:.3f} ({top1.avg:.3f}) '
                         '@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                             epoch,
                             i,
                             len(data_loader),
                             model_type=model_type,
                             batch_time=batch_time,
                             data_time=data_time,
                             loss=losses,
                             output_exp=output_exp,
                             top1=top1,
                             top5=top5))

            if args.grad_hist:
                if args.model_type == 'int':
                    for idx, l in enumerate(model.forward_layers):
                        if hasattr(l, 'weight'):
                            grad = l.grad_int32acc
                            writer.add_histogram(
                                'Grad/' + l.__class__.__name__ + '_' +
                                str(idx), grad, epoch * total_steps + i)

                elif args.model_type == 'float':
                    for idx, l in enumerate(model.layers):
                        if hasattr(l, 'weight'):
                            writer.add_histogram(
                                'Grad/' + l.__class__.__name__ + '_' +
                                str(idx), l.weight.grad,
                                epoch * total_steps + i)
                    for idx, l in enumerate(model.classifier):
                        if hasattr(l, 'weight'):
                            writer.add_histogram(
                                'Grad/' + l.__class__.__name__ + '_' +
                                str(idx), l.weight.grad,
                                epoch * total_steps + i)

    return losses.avg, top1.avg, top5.avg