Beispiel #1
0
 def default_mutator_test_pipeline(self, mutator_cls):
     for model_cls in self.default_cls:
         for cuda_test in self.cuda_test:
             _reset_global_mutable_counting()
             model = model_cls(self)
             mutator = mutator_cls(model)
             if cuda_test:
                 model.cuda()
                 mutator.cuda()
                 if cuda_test > 1:
                     model = nn.DataParallel(model)
             self.iterative_sample_and_forward(model,
                                               mutator,
                                               use_cuda=cuda_test)
             _reset_global_mutable_counting()
             model_fixed = model_cls(self)
             if cuda_test:
                 model_fixed.cuda()
                 if cuda_test > 1:
                     model_fixed = nn.DataParallel(model_fixed)
             with torch.no_grad():
                 arc = mutator.export()
             apply_fixed_architecture(model_fixed, arc)
             self.iterative_sample_and_forward(model_fixed,
                                               n_iters=1,
                                               use_cuda=cuda_test)
Beispiel #2
0
def get_model(embedding, num_layers):
    logger.info("num layers: {0}".format(num_layers))
    assert FLAGS.child_fixed_arc is not None, "Architecture should be provided."

    child_model = Model(embedding=embedding,
                        hidden_units=FLAGS.child_out_filters_scale *
                        FLAGS.child_out_filters,
                        num_layers=num_layers,
                        num_classes=FLAGS.class_num,
                        choose_from_k=5 if FLAGS.multi_path else 1,
                        lstm_keep_prob=FLAGS.lstm_out_keep_prob,
                        cnn_keep_prob=FLAGS.cnn_keep_prob,
                        att_keep_prob=FLAGS.attention_keep_prob,
                        att_mask=FLAGS.is_mask,
                        embed_keep_prob=FLAGS.embed_keep_prob,
                        final_output_keep_prob=FLAGS.final_output_keep_prob,
                        global_pool=FLAGS.output_type)

    apply_fixed_architecture(child_model, FLAGS.child_fixed_arc)
    return child_model
    def compare(self):
        self.logger.info("=" * 20)
        self.logger.info("Selecting the best architecture ...")
        self.enable_writter = False
        # split train dataset into train and valid dataset
        train_size = int(0.8 * len(self.train_dataset))
        valid_size = len(self.train_dataset) - train_size
        self.train_dataset_part, self.valid_dataset_part = torch.utils.data.random_split(
            self.train_dataset, [train_size, valid_size])

        # dataloader
        self.train_loader_part = torch.utils.data.DataLoader(
            self.train_dataset_part,
            batch_size=self.cfg.dataset.batch_size,
            shuffle=True,
            num_workers=self.cfg.dataset.workers,
            pin_memory=True)
        self.valid_loader_part = torch.utils.data.DataLoader(
            self.valid_dataset_part,
            batch_size=self.cfg.dataset.batch_size,
            shuffle=True,
            num_workers=self.cfg.dataset.workers,
            pin_memory=True)

        # choose the best architecture
        for arc in self.arcs:
            self.reset()
            self.mutator = apply_fixed_architecture(self.model, arc)
            size = self.model_size()
            arc_name = os.path.basename(arc)
            self.logger.info(f"{arc} Model size={size*4/1024**2} MB")

            # train
            for epoch in range(self.train_epochs):
                self.train_one_epoch(epoch, self.train_loader_part)
            val_acc = self.valid_one_epoch(-1, self.valid_loader_part)
            self.size_acc[arc_name] = {
                'size': size,
                'val_acc': val_acc,
                'arc': arc
            }
        sorted_size_acc = sorted(
            self.size_acc.items(),
            key=lambda x: x[1]['val_acc']['save_metric'].avg,
            reverse=True)
        return sorted_size_acc[0][1]
Beispiel #4
0
    parser.add_argument("--batch-size", default=96, type=int)
    parser.add_argument("--log-frequency", default=10, type=int)
    parser.add_argument("--epochs", default=600, type=int)
    parser.add_argument("--aux-weight", default=0.4, type=float)
    parser.add_argument("--drop-path-prob", default=0.2, type=float)
    parser.add_argument("--workers", default=4)
    parser.add_argument("--grad-clip", default=5., type=float)
    parser.add_argument("--arc-checkpoint",
                        default="./checkpoints/epoch_0.json")

    args = parser.parse_args()
    dataset_train, dataset_valid = datasets.get_dataset("cifar10",
                                                        cutout_length=16)

    model = CNN(32, 3, 36, 10, args.layers, auxiliary=True)
    apply_fixed_architecture(model, args.arc_checkpoint, device=device)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    criterion.to(device)

    optimizer = torch.optim.SGD(model.parameters(),
                                0.025,
                                momentum=0.9,
                                weight_decay=3.0E-4)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                              args.epochs,
                                                              eta_min=1E-6)

    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
    parser.add_argument('--ending_lr',
                        default=0,
                        type=float,
                        help='ending learning rate')
    parser.add_argument('--cutout',
                        default=0,
                        type=int,
                        help='cutout length in data augmentation')
    parser.add_argument("--channels", default=16, type=int)

    args = parser.parse_args()
    dataset_train, dataset_valid = datasets.get_dataset(
        "cifar10", cutout_length=args.cutout)

    model = CNN(32, 3, args.channels, 10, args.layers, auxiliary=True)
    apply_fixed_architecture(model, args.arc_checkpoint)
    criterion = nn.CrossEntropyLoss()

    model.to(device)
    criterion.to(device)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.initial_lr,
                                momentum=0.9,
                                weight_decay=args.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, args.epochs, eta_min=args.ending_lr)

    train_loader = torch.utils.data.DataLoader(dataset_train,
                                               batch_size=args.batch_size,
                                               shuffle=True,
Beispiel #6
0
def main():
    config = RetrainConfig()
    main_proc = not config.distributed or config.local_rank == 0
    if config.distributed:
        torch.cuda.set_device(config.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method=config.dist_url,
                                             rank=config.local_rank,
                                             world_size=config.world_size)
    if main_proc:
        os.makedirs(config.output_path, exist_ok=True)
    if config.distributed:
        torch.distributed.barrier()
    logger = utils.get_logger(os.path.join(config.output_path, 'search.log'))
    if main_proc:
        config.print_params(logger.info)
    utils.reset_seed(config.seed)

    loaders, samplers = get_augment_datasets(config)
    train_loader, valid_loader = loaders
    train_sampler, valid_sampler = samplers

    model = Model(config.dataset,
                  config.layers,
                  in_channels=config.input_channels,
                  channels=config.init_channels,
                  retrain=True).cuda()
    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(config.n_classes,
                                                  config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()

    fixed_arc_path = os.path.join(config.output_path, config.arc_checkpoint)
    with open(fixed_arc_path, "r") as f:
        fixed_arc = json.load(f)
    fixed_arc = utils.encode_tensor(fixed_arc, torch.device("cuda"))
    genotypes = utils.parse_results(fixed_arc, n_nodes=4)
    genotypes_dict = {i: genotypes for i in range(3)}
    apply_fixed_architecture(model, fixed_arc_path)
    param_size = utils.param_size(
        model, criterion,
        [3, 32, 32] if 'cifar' in config.dataset else [3, 224, 224])

    if main_proc:
        logger.info("Param size: %.6f", param_size)
        logger.info("Genotype: %s", genotypes)

    # change training hyper parameters according to cell type
    if 'cifar' in config.dataset:
        if param_size < 3.0:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.2
        elif 3.0 < param_size < 3.5:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.3
        else:
            config.weight_decay = 5e-4
            config.drop_path_prob = 0.3

    if config.distributed:
        apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(model, delay_allreduce=True)

    optimizer = torch.optim.SGD(model.parameters(),
                                config.lr,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                              config.epochs,
                                                              eta_min=1E-6)

    best_top1 = best_top5 = 0.
    for epoch in range(config.epochs):
        drop_prob = config.drop_path_prob * epoch / config.epochs
        if config.distributed:
            model.module.drop_path_prob(drop_prob)
        else:
            model.drop_path_prob(drop_prob)
        # training
        if config.distributed:
            train_sampler.set_epoch(epoch)
        train(logger, config, train_loader, model, optimizer, criterion, epoch,
              main_proc)

        # validation
        top1, top5 = validate(logger, config, valid_loader, model, criterion,
                              epoch, main_proc)
        best_top1 = max(best_top1, top1)
        best_top5 = max(best_top5, top5)
        lr_scheduler.step()

    logger.info("Final best Prec@1 = %.4f Prec@5 = %.4f", best_top1, best_top5)
Beispiel #7
0
                               num_modules_per_stack=args.num_modules_per_stack,
                               bn_affine=args.bn_affine,
                               bn_momentum=args.bn_momentum,
                               bn_track_running_stats=args.bn_track_running_stats)

    optim = torch.optim.SGD(model.parameters(), 0.025)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optim, args.epochs, eta_min=0.001)
    criterion = nn.CrossEntropyLoss()

    if args.arch is not None:
        logger.info('model retraining...')
        with open(args.arch, 'r') as f:
            arch = json.load(f)
        for trial in query_nb201_trial_stats(arch, 200, 'cifar100'):
            pprint.pprint(trial)
        apply_fixed_architecture(model, args.arch)
        dataloader_train = DataLoader(dataset_train, batch_size=args.batch_size, shuffle=True, num_workers=0)
        dataloader_valid = DataLoader(dataset_valid, batch_size=args.batch_size, shuffle=True, num_workers=0)
        train(args, model, dataloader_train, dataloader_valid, criterion, optim,
              torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        exit(0)

    trainer = enas.EnasTrainer(model,
                               loss=criterion,
                               metrics=lambda output, target: accuracy(output, target, topk=(1,)),
                               reward_function=reward_accuracy,
                               optimizer=optim,
                               callbacks=[LRSchedulerCallback(lr_scheduler), ArchitectureCheckpoint("./checkpoints")],
                               batch_size=args.batch_size,
                               num_epochs=args.epochs,
                               dataset_train=dataset_train,
Beispiel #8
0
    if args.train_mode == 'search':
        # this is architecture search
        logger.info('Creating ProxylessNasTrainer...')
        trainer = ProxylessNasTrainer(model,
                                      model_optim=optimizer,
                                      train_loader=data_provider.train,
                                      valid_loader=data_provider.valid,
                                      device=device,
                                      warmup=args.warmup,
                                      ckpt_path=args.checkpoint_path,
                                      arch_path=args.arch_path)

        logger.info('Start to train with ProxylessNasTrainer...')
        trainer.train()
        logger.info('Training done')
        trainer.export(args.arch_path)
        logger.info('Best architecture exported in %s', args.arch_path)
    elif args.train_mode == 'retrain':
        # this is retrain
        from nni.nas.pytorch.fixed import apply_fixed_architecture
        assert os.path.isfile(args.exported_arch_path), \
            "exported_arch_path {} should be a file.".format(args.exported_arch_path)
        apply_fixed_architecture(model, args.exported_arch_path, device=device)
        trainer = Retrain(model,
                          optimizer,
                          device,
                          data_provider,
                          n_epochs=300)
        trainer.run()
    def run(self, arc, validate=True, test=False):
        '''retrain the best-performing arch from scratch
            arc: the json file path of the best-performing arch 
        '''
        self.logger.info("=" * 20)
        self.logger.info("Retraining the best architecture ...")
        self.enable_writter = True
        self.reset()

        # init model and mutator
        self.mutator = apply_fixed_architecture(self.model, arc)
        size = self.model_size()
        arc_name = os.path.basename(arc)
        self.logger.info(f"{arc_name} Model size={size*4/1024**2} MB")

        # callbacks
        for callback in self.callbacks:
            callback.build(self.model, self.mutator, self)

        # resume
        self.start_epoch = 0
        self.resume()

        # fintune
        # todo: improve robustness, bug of optimizer resume
        # if self.cfg.model.finetune:
        #     self.logger.info("Freezing params of conv part ...")
        #     for name, param in self.model.named_parameters():
        #         if 'dense' not in name:
        #             param.requires_grad = False

        # dataparallel
        if len(self.cfg.trainer.device_ids) > 1:
            device_ids = self.cfg.trainer.device_ids
            num_gpus_available = torch.cuda.device_count()
            assert num_gpus_available >= len(
                device_ids), "you can only use {} device(s)".format(
                    num_gpus_available)
            self.model = torch.nn.DataParallel(self.model,
                                               device_ids=device_ids)
            if self.kd_model:
                self.kd_model = torch.nn.DataParallel(self.kd_model,
                                                      device_ids=device_ids)

        if test:
            meters = self.test_one_epoch(-1, self.test_loader)
            self.logger.info(f"Final test metrics= {meters}")
            return meters

        # start training
        for epoch in range(self.start_epoch, self.cfg.evaluator.num_epochs):
            for callback in self.callbacks:
                callback.on_epoch_begin(epoch)

            self.logger.info("Epoch %d Training", epoch)
            self.train_one_epoch(epoch, self.train_loader)

            if validate:
                self.logger.info("Epoch %d Validating", epoch)
                self.valid_one_epoch(epoch, self.test_loader)

            self.lr_scheduler.step()

            self.cur_meters = getattr(self, 'valid_meters', self.train_meters)
            for callback in self.callbacks:
                if isinstance(callback, CheckpointCallback):
                    callback.update_best_metric(
                        self.cur_meters.meters['save_metric'].avg)
                callback.on_epoch_end(epoch)

        self.logger.info("Final best Prec@1 = {:.4%}".format(self.best_metric))
Beispiel #10
0
def main():
    config = RetrainConfig()
    main_proc = not config.distributed or config.local_rank == 0
    if config.distributed:
        torch.cuda.set_device(config.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method=config.dist_url,
                                             rank=config.local_rank,
                                             world_size=config.world_size)
    if main_proc:
        os.makedirs(config.output_path, exist_ok=True)
    if config.distributed:
        torch.distributed.barrier()
    logger = utils.get_logger(os.path.join(config.output_path, 'search.log'))
    if main_proc:
        config.print_params(logger.info)
    utils.reset_seed(config.seed)

    loaders, samplers = get_augment_datasets(config)
    train_loader, valid_loader = loaders
    train_sampler, valid_sampler = samplers
    train_loader = CyclicIterator(train_loader, train_sampler)
    # valid_loader = CyclicIterator(valid_loader, valid_sampler, False)

    model = Model(config.dataset,
                  config.layers,
                  in_channels=config.input_channels,
                  channels=config.init_channels,
                  retrain=True).cuda()
    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(config.n_classes,
                                                  config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()

    fixed_arc_path = os.path.join('', config.arc_checkpoint)
    with open(fixed_arc_path, "r") as f:
        fixed_arc = json.load(f)
    fixed_arc = utils.encode_tensor(fixed_arc, torch.device("cuda"))
    genotypes = utils.parse_results(fixed_arc, n_nodes=4)
    genotypes_dict = {i: genotypes for i in range(3)}
    apply_fixed_architecture(model, fixed_arc_path)
    param_size = utils.param_size(model, criterion, [3, 512, 512])

    if main_proc:
        logger.info("Param size: %.6f", param_size)
        logger.info("Genotype: %s", genotypes)

    # change training hyper parameters according to cell type
    if 'cifar' in config.dataset:
        if param_size < 3.0:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.2
        elif 3.0 < param_size < 3.5:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.3
        else:
            config.weight_decay = 5e-4
            config.drop_path_prob = 0.3

    if config.distributed:
        apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(model, delay_allreduce=True)

    optimizer = torch.optim.AdamW(model.parameters(), config.lr)
    # lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs, eta_min=1E-6)

    best_top1 = 0.
    epoch = 0
    try:
        checkpoint = torch.load(config.model_checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        epoch = checkpoint['epoch']
        loss = checkpoint['loss']

        model.eval()
        print("----------------------------")
        print("MODEL LOADED FROM CHECKPOINT" + config.model_checkpoint)
        print("----------------------------")
    except:
        print("----------------------------")
        print("MODEL NOT LOADED FROM CHECKPOINT")
        print("----------------------------")
        pass

    # for epoch in range(0, epoch):
    # lr_scheduler.step()

    for epoch in range(epoch, config.epochs):
        drop_prob = config.drop_path_prob * epoch / config.epochs
        if config.distributed:
            model.module.drop_path_prob(drop_prob)
        else:
            model.drop_path_prob(drop_prob)
        # training
        if config.distributed:
            train_sampler.set_epoch(epoch)
        train(logger, config, train_loader, model, optimizer, criterion, epoch,
              main_proc)
        if (epoch % config.log_frequency == 0):
            # validation
            top1 = validate(logger, config, valid_loader, model, criterion,
                            epoch, main_proc)
            best_top1 = max(best_top1, top1)
            # lr_scheduler.step()
            logger.info("Final best Prec@1 = %.4f", best_top1)
Beispiel #11
0
def main(args):
    reset_seed(args.seed)
    prepare_logger(args)

    logger.info("These are the hyper-parameters you want to tune:\n%s",
                pprint.pformat(vars(args)))

    if args.model == 'nas':
        logger.info("Using NAS.\n")
        if args.fix_arch:
            if not os.path.exists(args.arc_checkpoint):
                print(args.arc_checkpoint,
                      'does not exist, don not fix archetect')
                args.fix_arch = False

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    if args.model == 'nas':
        if not args.fix_arch:
            model = CNN(32, 3, args.channels, 10, args.layers)
            trainset, testset = data_preprocess(args)
        else:
            model = CNN(32, 3, args.channels, 10, args.layers)
            apply_fixed_architecture(model, args.arc_checkpoint)
            model.to(device)
            train_loader, test_loader = data_preprocess(args)
    else:
        train_loader, test_loader = data_preprocess(args)
        model = models.__dict__[args.model]()
        model.to(device)

    criterion = nn.CrossEntropyLoss()
    if args.optimizer == 'adam':
        optimizer = optim.Adam(model.parameters(),
                               lr=args.initial_lr,
                               weight_decay=args.weight_decay)
    else:
        if args.optimizer == 'sgd':
            optimizer_cls = optim.SGD
        elif args.optimizer == 'rmsprop':
            optimizer_cls = optim.RMSprop
        optimizer = optimizer_cls(model.parameters(),
                                  lr=args.initial_lr,
                                  momentum=args.momentum,
                                  weight_decay=args.weight_decay)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                     args.epochs,
                                                     eta_min=args.ending_lr)

    if args.model == 'nas' and not args.fix_arch:
        trainer = DartsTrainer(model,
                               loss=criterion,
                               metrics=lambda output, target: accuracyTopk(
                                   output, target, topk=(1, )),
                               optimizer=optimizer,
                               num_epochs=args.epochs,
                               dataset_train=trainset,
                               dataset_valid=testset,
                               batch_size=args.batch_size,
                               log_frequency=args.log_frequency,
                               unrolled=args.unrolled,
                               callbacks=[
                                   LRSchedulerCallback(scheduler),
                                   ArchitectureCheckpoint("./checkpoints")
                               ])
        if args.visualization:
            trainer.enable_visualization()
        trainer.train()
        trainer.export("final_arch.json")
    else:
        for epoch in range(1, args.epochs + 1):
            train(model, train_loader, criterion, optimizer, scheduler, args,
                  epoch, device)
            top1, _ = test(model, test_loader, criterion, args, epoch, device)
            nni.report_intermediate_result(top1)
        logger.info("Final accuracy is: %.6f", top1)
        nni.report_final_result(top1)
Beispiel #12
0
            {'params': get_parameters(model, keys, mode='include'), 'weight_decay': 0},
        ], lr=0.05, momentum=momentum, nesterov=nesterov)
    else:
        optimizer = torch.optim.SGD(get_parameters(model), lr=0.05, momentum=momentum, nesterov=nesterov, weight_decay=4e-5)

    if args.train_mode == 'search':
        # this is architecture search
        logger.info('Creating ProxylessNasTrainer...')
        trainer = ProxylessNasTrainer(model,
                                      model_optim=optimizer,
                                      train_loader=data_provider.train,
                                      valid_loader=data_provider.valid,
                                      device=device,
                                      warmup=args.warmup,
                                      ckpt_path=args.checkpoint_path,
                                      arch_path=args.arch_path)

        logger.info('Start to train with ProxylessNasTrainer...')
        trainer.train()
        logger.info('Training done')
        trainer.export(args.arch_path)
        logger.info('Best architecture exported in %s', args.arch_path)
    elif args.train_mode == 'retrain':
        # this is retrain
        from nni.nas.pytorch.fixed import apply_fixed_architecture
        assert os.path.isfile(args.exported_arch_path), \
            "exported_arch_path {} should be a file.".format(args.exported_arch_path)
        apply_fixed_architecture(model, args.exported_arch_path)
        trainer = Retrain(model, optimizer, device, data_provider, n_epochs=300)
        trainer.run()