Example #1
0
def infer(valid_queue, model, epoch, criterion, writer, model_name):
    batch_time = utils.AverageMeters('Time', ':6.3f')
    losses = utils.AverageMeters('Loss', ':.4e')
    top1 = utils.AverageMeters('Acc@1', ':6.2f')
    top5 = utils.AverageMeters('Acc@5', ':6.2f')
    model.eval()

    progress = utils.ProgressMeter(len(valid_queue),
                                   batch_time,
                                   losses,
                                   top1,
                                   top5,
                                   prefix='Test: ')
    cur_step = epoch * len(valid_queue)

    end = time.time()
    for step, (input, target) in enumerate(valid_queue):
        #input = input.cuda()
        #target = target.cuda(non_blocking=True)
        input = Variable(input, volatile=True).cuda()
        #target = Variable(target, volatile=True).cuda(async=True)
        target = Variable(target, volatile=True).cuda()
        if model_name == "ShuffleNas":
            block_choices = model.module.random_block_choices(
                select_predefined_block=False)
            ignore_first_two_cs = True  # 0.2 and 0.4 scales are ignored
            if config.cs_warm_up:
                full_channel_mask, _ = model.module.random_channel_mask(
                    select_all_channels=config.use_all_channels,
                    epoch_after_cs=epoch - config.epoch_start_cs,
                    ignore_first_two_cs=ignore_first_two_cs)
            else:
                full_channel_mask, _ = model.module.random_channel_mask(
                    select_all_channels=config.use_all_channels)
            logits = model(input, block_choices, full_channel_mask)
        else:
            logits = model(input, None, None)
        loss = criterion(logits, target)
        acc1, acc5 = utils.accuracy(logits, target, topk=(1, 5))
        n = input.size(0)
        reduced_loss = reduce_tensor(loss.data, world_size=config.world_size)
        acc1 = reduce_tensor(acc1, world_size=config.world_size)
        acc5 = reduce_tensor(acc5, world_size=config.world_size)
        losses.update(to_python_float(reduced_loss), n)
        top1.update(to_python_float(acc1), n)
        top5.update(to_python_float(acc5), n)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if step % config.print_freq == 0:
            progress.print(step)
            logger.info('valid %03d %e %f %f', step, losses.avg, top1.avg,
                        top5.avg)

    writer.add_scalar('val/loss', losses.avg, cur_step)
    writer.add_scalar('val/top1', top1.avg, cur_step)
    writer.add_scalar('val/top5', top5.avg, cur_step)
    return top1.avg, losses.avg
def infer(valid_queue, model, epoch, criterion, criterion_latency, writer):
    batch_time = utils.AverageMeters('Time', ':6.3f')
    losses = utils.AverageMeters('Loss', ':.4e')
    top1 = utils.AverageMeters('Acc@1', ':6.2f')
    top5 = utils.AverageMeters('Acc@5', ':6.2f')
    model.eval()

    progress = utils.ProgressMeter(len(valid_queue),
                                   batch_time,
                                   losses,
                                   top1,
                                   top5,
                                   prefix='Test: ')
    cur_step = epoch * len(valid_queue)

    end = time.time()
    for step, (input, target) in enumerate(valid_queue):
        #input = input.cuda()
        #target = target.cuda(non_blocking=True)
        input = Variable(input, volatile=True).cuda()
        #target = Variable(target, volatile=True).cuda(async=True)
        target = Variable(target, volatile=True).cuda()
        logits = model(input)
        loss = criterion(logits, target)
        latency_loss = criterion_latency(
            model.arch_parameters()[0:5]) * config.lambda1
        loss += latency_loss
        acc1, acc5 = utils.accuracy(logits, target, topk=(1, 5))
        n = input.size(0)
        reduced_loss = reduce_tensor(loss.data, world_size=config.world_size)
        acc1 = reduce_tensor(acc1, world_size=config.world_size)
        acc5 = reduce_tensor(acc5, world_size=config.world_size)
        losses.update(to_python_float(reduced_loss), n)
        top1.update(to_python_float(acc1), n)
        top5.update(to_python_float(acc5), n)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if step % config.print_freq == 0:
            progress.print(step)
            logger.info('valid %03d %e %e %f %f', step, losses.avg,
                        latency_losses.avg, top1.avg, top5.avg)

    writer.add_scalar('val/loss', losses.avg, cur_step)
    writer.add_scalar('val/top1', top1.avg, cur_step)
    writer.add_scalar('val/top5', top5.avg, cur_step)
    return top1.avg, losses.avg
Example #3
0
def train(train_queue, valid_queue, model, criterion, optimizer, lr, epoch,
          writer, model_name):
    batch_time = utils.AverageMeters('Time', ':6.3f')
    data_time = utils.AverageMeters('Data', ':6.3f')
    losses = utils.AverageMeters('Loss', ':.4e')
    top1 = utils.AverageMeters('Acc@1', ':6.2f')
    top5 = utils.AverageMeters('Acc@5', ':6.2f')

    progress = utils.ProgressMeter(len(train_queue),
                                   batch_time,
                                   data_time,
                                   losses,
                                   top1,
                                   top5,
                                   prefix="Epoch: [{}]".format(epoch))
    cur_step = epoch * len(train_queue)
    writer.add_scalar('train/lr', lr, cur_step)

    model.train()
    end = time.time()
    if config.mixup:
        beta_distribution = torch.distributions.beta.Beta(
            config.mixup_alpha, config.mixup_alpha)

    for step, (input, target) in enumerate(train_queue):
        # measure data loading time
        data_time.update(time.time() - end)

        n = input.size(0)
        input = Variable(input, requires_grad=False).cuda()
        #target = Variable(target, requires_grad=False).cuda(async=True)
        target = Variable(target, requires_grad=False).cuda()
        optimizer.zero_grad()

        if config.mixup:
            # Mixup images.
            lambda_ = beta_distribution.sample([]).item()
            index = torch.randperm(input.size(0)).cuda()
            mixed_images = lambda_ * input + (1 - lambda_) * input[index, :]

            # Mixup loss.
            scores = model(mixed_images)
            loss = (lambda_ * criterion(scores, target) +
                    (1 - lambda_) * criterion(scores, target[index]))

        if model_name == "ShuffleNas":
            block_choices = model.module.random_block_choices(
                select_predefined_block=False)
            if config.cs_warm_up:
                ignore_first_two_cs = True
                full_channel_mask, _ = model.module.random_channel_mask(
                    select_all_channels=config.use_all_channels,
                    epoch_after_cs=epoch - config.epoch_start_cs,
                    ignore_first_two_cs=ignore_first_two_cs)
            else:
                full_channel_mask, _ = model.module.random_channel_mask(
                    select_all_channels=config.use_all_channels)
            logits = model(input, block_choices, full_channel_mask)
        else:
            logits = model(input, None, None)
        loss = criterion(logits, target)
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), config.w_grad_clip)
        if model_name == "ShuffleNas":
            weight = model.parameters()
            optimizer = torch.optim.SGD(weight,
                                        config.w_lr,
                                        momentum=config.w_momentum,
                                        weight_decay=config.w_weight_decay)
            optimizer.step()
        else:
            optimizer.step()

        acc1, acc5 = utils.accuracy(logits, target, topk=(1, 5))
        reduced_loss = reduce_tensor(loss.data, world_size=config.world_size)
        acc1 = reduce_tensor(acc1, world_size=config.world_size)
        acc5 = reduce_tensor(acc5, world_size=config.world_size)

        losses.update(to_python_float(reduced_loss), n)
        top1.update(to_python_float(acc1), n)
        top5.update(to_python_float(acc5), n)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if step % config.print_freq == 0 or step == len(train_queue) - 1:
            logger.info('train step:%03d %03d  loss:%e top1:%05f top5:%05f',
                        step, len(train_queue), losses.avg, top1.avg, top5.avg)
            progress.print(step)
        writer.add_scalar('train/loss', losses.avg, cur_step)
        writer.add_scalar('train/top1', top1.avg, cur_step)
        writer.add_scalar('train/top5', top5.avg, cur_step)

    return top1.avg, losses.avg
def train(train_queue, valid_queue, model, architect, criterion,
          criterion_latency, optimizer, lr, epoch, writer):
    batch_time = utils.AverageMeters('Time', ':6.3f')
    data_time = utils.AverageMeters('Data', ':6.3f')
    losses = utils.AverageMeters('Loss', ':.4e')
    top1 = utils.AverageMeters('Acc@1', ':6.2f')
    top5 = utils.AverageMeters('Acc@5', ':6.2f')

    model.train()
    progress = utils.ProgressMeter(len(train_queue),
                                   batch_time,
                                   data_time,
                                   losses,
                                   top1,
                                   top5,
                                   prefix="Epoch: [{}]".format(epoch))
    cur_step = epoch * len(train_queue)
    writer.add_scalar('train/lr', lr, cur_step)

    end = time.time()
    for step, (input, target) in enumerate(train_queue):
        # measure data loading time
        data_time.update(time.time() - end)

        n = input.size(0)
        input = Variable(input, requires_grad=False).cuda()
        #target = Variable(target, requires_grad=False).cuda(async=True)
        target = Variable(target, requires_grad=False).cuda()

        # get a random minibatch from the search queue with replacement
        input_search, target_search = next(iter(valid_queue))
        #try:
        #  input_search, target_search = next(valid_queue_iter)
        #except:
        #  valid_queue_iter = iter(valid_queue)
        #  input_search, target_search = next(valid_queue_iter)
        input_search = Variable(input_search, requires_grad=False).cuda()
        #target_search = Variable(target_search, requires_grad=False).cuda(async=True)
        target_search = Variable(target_search, requires_grad=False).cuda()

        if epoch >= 15:
            architect.step(input,
                           target,
                           input_search,
                           target_search,
                           lr,
                           optimizer,
                           unrolled=config.unrolled)

        optimizer.zero_grad()
        logits = model(input)

        loss = criterion(logits, target)
        latency_loss = criterion_latency(
            model.module.arch_parameters()[0:5]) * config.lambda1

        loss += latency_loss
        loss.backward()
        nn.utils.clip_grad_norm(model.parameters(), config.grad_clip)
        optimizer.step()
        #torch.cuda.synchronize()

        acc1, acc5 = utils.accuracy(logits, target, topk=(1, 5))

        reduced_loss = reduce_tensor(loss.data, world_size=config.world_size)
        acc1 = reduce_tensor(acc1, world_size=config.world_size)
        acc5 = reduce_tensor(acc5, world_size=config.world_size)

        losses.update(to_python_float(reduced_loss), n)
        top1.update(to_python_float(acc1), n)
        top5.update(to_python_float(acc5), n)

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if step % config.print_freq == 0 or step == len(train_queue) - 1:
            logger.info('train step:%03d %03d  loss:%e top1:%05f top5:%05f',
                        step, len(train_queue), losses.avg, top1.avg, top5.avg)
            progress.print(step)
        writer.add_scalar('train/loss', losses.avg, cur_step)
        writer.add_scalar('train/top1', top1.avg, cur_step)
        writer.add_scalar('train/top5', top5.avg, cur_step)

    return top1.avg, losses.avg