Ejemplo n.º 1
0
def run(args, myargs):
    my_config = getattr(myargs.config, args.command)
    config = SearchConfig()
    for k, v in args.items():
        assert not hasattr(config, k)
        setattr(config, k, v)

    for k, v in my_config.items():
        if not hasattr(config, k):
            print('* config does not have %s' % k)
        setattr(config, k, v)
    device = torch.device("cuda")
    writer = myargs.writer
    writer.add_text('all_config', config.as_markdown(), 0)
    logger = myargs.logger
    config.print_params(logger.info_msg)

    config.data_path = os.path.expanduser(config.data_path)
    config.plot_path = os.path.join(args.outdir, 'plot')
    config.path = args.outdir
    main(config=config, logger=logger, device=device, myargs=myargs)
Ejemplo n.º 2
0
def main():
    config = SearchConfig(section='fine-tune')

    device = torch.device("cuda")

    # tensorboard
    writer = SummaryWriter(log_dir=os.path.join(config.path, "tb"))
    writer.add_text('config', config.as_markdown(), 0)

    logger = utils.get_logger(
        os.path.join(config.path, "{}_tune.log".format(config.name)))
    config.print_params(logger.info)

    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data, valid_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=True)

    logger.debug('loading checkpoint')
    best_path = os.path.join(config.path, 'best.pth.tar')

    model = torch.load(best_path)

    model.prune()

    model = model.to(device)

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)

    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)
    model.print_alphas(logger)
    first_top1 = validate(valid_loader, model, -1, 0, device, config, logger,
                          writer)
    os.system('mkdir -p ' + config.fine_tune_path)
    # training loop
    best_top1 = 0.
    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        train(train_loader, model, architect, w_optim, lr, epoch, writer,
              device, config, logger)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, epoch, cur_step, device, config,
                        logger, writer)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.fine_tune_path, is_best)
        print("")

    logger.info("Initial best Prec@1 = {:.4%}".format(first_top1))
    logger.info("Final best Prec@1 = {:.4%}".format(best_top1))
Ejemplo n.º 3
0
    assert error < epsilon
    return error


config = SearchConfig()

device = torch.device("cuda")

# tensorboard
writer = SummaryWriter(log_dir=os.path.join(config.path, "tb"))
writer.add_text('config', config.as_markdown(), 0)

logger = utils.get_logger(
    os.path.join(config.path, "{}.log".format(config.name)))
config.print_params(logger.info)


def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True
Ejemplo n.º 4
0
def main():
    config = SearchConfig()

    device = torch.device("cuda")

    # tensorboard
    tb_path = os.path.join(config.path, "tb")
    os.system('rm -r ' + tb_path)
    writer = SummaryWriter(log_dir=tb_path)
    writer.add_text('config', config.as_markdown(), 0)

    logger = utils.get_logger(
        os.path.join(config.path, "{}_train.log".format(config.name)))
    config.print_params(logger.info)

    logger.info("Logger is set - training start")
    if int(config.profile) != 0:
        logger.info('entering profile mode')
        profile = True
        config.epochs = 1
        max_batches = config.print_freq
    else:
        profile = False
        max_batches = None
    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data = utils.get_data(
        config.dataset, config.data_path, cutout_length=0, validation=False)

    module_name, class_name = config.controller_class.rsplit('.', 1)
    controller_cls = getattr(import_module(module_name), class_name)
    model = controller_cls(device, **config.__dict__)
    model = model.to(device)

    # weights optimizer
    w_optim = torch.optim.SGD(model.weights(),
                              config.w_lr,
                              momentum=config.w_momentum,
                              weight_decay=config.w_weight_decay)
    # alphas optimizer
    alpha_optim = torch.optim.Adam(model.alphas(),
                                   config.alpha_lr,
                                   betas=(0.5, 0.999),
                                   weight_decay=config.alpha_weight_decay)

    # split data to train/validation
    n_train = len(train_data)
    split = int(n_train * config.validate_split)
    indices = list(range(n_train))
    if split <= 0:
        logger.debug('using train as validation')
        valid_sampler = train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
            indices)
    else:
        train_sampler = torch.utils.data.sampler.SubsetRandomSampler(
            indices[:split])
        valid_sampler = torch.utils.data.sampler.SubsetRandomSampler(
            indices[split:])

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=train_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               sampler=valid_sampler,
                                               num_workers=config.workers,
                                               pin_memory=True)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        w_optim, config.epochs, eta_min=config.w_lr_min)
    architect = Architect(model, config.w_momentum, config.w_weight_decay)

    # training loop
    best = 0
    best_genotype = None

    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]

        model.print_alphas(logger)

        # training
        if profile:
            with torch.autograd.profiler.profile(use_cuda=True) as prof:
                train_qual = train(train_loader,
                                   valid_loader,
                                   model,
                                   architect,
                                   w_optim,
                                   alpha_optim,
                                   lr,
                                   epoch,
                                   writer,
                                   device,
                                   config,
                                   logger,
                                   max_batches=max_batches)
            print('cpu')
            print(prof.key_averages().table(sort_by="cpu_time_total",
                                            row_limit=10))
            print(prof.key_averages().table(sort_by="cpu_time", row_limit=10))
            print('cuda')
            print(prof.key_averages().table(sort_by="cuda_time_total",
                                            row_limit=10))
            print(prof.key_averages().table(sort_by="cuda_time", row_limit=10))
            break

        model.new_epoch(epoch, writer)
        train_qual = train(train_loader,
                           valid_loader,
                           model,
                           architect,
                           w_optim,
                           alpha_optim,
                           lr,
                           epoch,
                           writer,
                           device,
                           config,
                           logger,
                           max_batches=max_batches)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        val_qual = validate(valid_loader, model, epoch, cur_step, device,
                            config, logger, writer)

        # log
        # genotype
        genotype = model.genotype()
        logger.info("genotype = {}".format(genotype))

        # genotype as a image
        plot_path = os.path.join(config.plot_path,
                                 "EP{:02d}".format(epoch + 1))
        caption = "Epoch {}".format(epoch + 1)
        model.plot_genotype(plot_path, caption)
        #plot(genotype.normal, plot_path + "-normal", caption)
        #plot(genotype.reduce, plot_path + "-reduce", caption)

        if config.use_train_quality != 0:
            cur_qual = train_qual
        else:
            cur_qual = val_qual

        # save
        if best < cur_qual:
            best = cur_qual
            best_genotype = genotype
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)
        logger.info("Quality{}: {} \n\n".format('*' if is_best else '',
                                                cur_qual))

    logger.info("Final best =  {}".format(best))
    logger.info("Best Genotype = {}".format(best_genotype))