コード例 #1
0
ファイル: trainer.py プロジェクト: araujoalexandre/neuralnet
    def _run_training(self):

        if self.params.lb_smooth == 0:
            self.criterion = torch.nn.CrossEntropyLoss().cuda()
        else:
            if self.local_rank == 0:
                logging.info(
                    "Using CrossEntropyLoss with label smooth {}.".format(
                        self.params.lb_smooth))
            self.criterion = utils.CrossEntropyLabelSmooth(
                self.reader.n_classes, self.params.lb_smooth)

        # if start_new_model is True, global_step = 0
        # else we get global step from checkpoint
        if self.start_new_model:
            start_epoch = 0
            global_step = 0
        else:
            start_epoch = self.checkpoint['epoch']
            global_step = self.checkpoint['global_step']

        data_loader, sampler = self.reader.load_dataset()
        if sampler is not None:
            assert sampler.num_replicas == self.world_size

        batch_size = self.batch_size
        if self.is_distributed:
            n_files = sampler.num_samples
        else:
            n_files = self.reader.n_train_files

        if self.local_rank == 0:
            logging.info("Number of files on worker: {}".format(n_files))
            logging.info("Start training")

        profile_enabled = False
        for epoch_id in range(start_epoch, self.params.num_epochs):
            if self.is_distributed:
                sampler.set_epoch(epoch_id)
            for n_batch, data in enumerate(data_loader):
                epoch = (int(global_step) * batch_size) / n_files
                with torch.autograd.profiler.profile(enabled=profile_enabled,
                                                     use_cuda=True) as prof:
                    self._training(data, epoch, global_step)
                if profile_enabled:
                    logging.info(prof.key_averages().table(
                        sort_by="self_cpu_time_total"))
                    # prof.export_chrome_trace(join(
                    #   self.train_dir+'_logs', 'trace_{}.json'.format(global_step)))
                self._save_ckpt(global_step, epoch_id)
                global_step += 1

            self.scheduler.step()
        self._save_ckpt(global_step, epoch_id, final=True)
        logging.info("Done training -- epoch limit reached.")
コード例 #2
0
ファイル: test.py プロジェクト: susan0199/StacNAS
def main():
    if not torch.cuda.is_available():
        logger.info("no gpu device available")
        sys.exit(1)

    logger.info("*** Begin {} ***".format(config.stage))

    # set default gpu device
    torch.cuda.set_device(config.gpus[0])

    # set random seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    logger.info("preparing data...")
    input_size, channels_in, num_classes, train_data, valid_data = \
        load_dataset(dataset=config.dataset,
                     data_dir=config.data_dir,
                     cutout_length=config.cutout_length,
                     validation=True,
                     auto_aug=config.auto_aug)

    valid_loader = torch.utils.data.DataLoader(dataset=valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.num_workers,
                                               pin_memory=True)

    logger.info("loading model...")
    if config.load_model_dir is not None:
        model = torch.load(config.load_model_dir)
    else:
        model = utils.load_checkpoint(config.model_dir)
    model = model.to(device)

    model_size = utils.param_size(model)
    logger.info("model_size: {:.3f} MB".format(model_size))

    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(num_classes,
                                                  config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)

    logger.info("start testing...")
    best_top1 = test(valid_loader, model, criterion)

    logger.info("Final Prec@1: {:.4%}".format(best_top1))
    logger.info("*** Finish {} ***".format(config.stage))
コード例 #3
0
ファイル: retrain.py プロジェクト: xxlya/COS598D_Assignment3
def main():
    config = RetrainConfig()
    main_proc = not config.distributed or config.local_rank == 0
    if config.distributed:
        torch.cuda.set_device(config.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method=config.dist_url,
                                             rank=config.local_rank,
                                             world_size=config.world_size)
    if main_proc:
        os.makedirs(config.output_path, exist_ok=True)
    if config.distributed:
        torch.distributed.barrier()
    logger = utils.get_logger(os.path.join(config.output_path, 'search.log'))
    if main_proc:
        config.print_params(logger.info)
    utils.reset_seed(config.seed)

    loaders, samplers = get_augment_datasets(config)
    train_loader, valid_loader = loaders
    train_sampler, valid_sampler = samplers

    model = Model(config.dataset,
                  config.layers,
                  in_channels=config.input_channels,
                  channels=config.init_channels,
                  retrain=True).cuda()
    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(config.n_classes,
                                                  config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()

    fixed_arc_path = os.path.join(config.output_path, config.arc_checkpoint)
    with open(fixed_arc_path, "r") as f:
        fixed_arc = json.load(f)
    fixed_arc = utils.encode_tensor(fixed_arc, torch.device("cuda"))
    genotypes = utils.parse_results(fixed_arc, n_nodes=4)
    genotypes_dict = {i: genotypes for i in range(3)}
    apply_fixed_architecture(model, fixed_arc_path)
    param_size = utils.param_size(
        model, criterion,
        [3, 32, 32] if 'cifar' in config.dataset else [3, 224, 224])

    if main_proc:
        logger.info("Param size: %.6f", param_size)
        logger.info("Genotype: %s", genotypes)

    # change training hyper parameters according to cell type
    if 'cifar' in config.dataset:
        if param_size < 3.0:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.2
        elif 3.0 < param_size < 3.5:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.3
        else:
            config.weight_decay = 5e-4
            config.drop_path_prob = 0.3

    if config.distributed:
        apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(model, delay_allreduce=True)

    optimizer = torch.optim.SGD(model.parameters(),
                                config.lr,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay)
    lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                              config.epochs,
                                                              eta_min=1E-6)

    best_top1 = best_top5 = 0.
    for epoch in range(config.epochs):
        drop_prob = config.drop_path_prob * epoch / config.epochs
        if config.distributed:
            model.module.drop_path_prob(drop_prob)
        else:
            model.drop_path_prob(drop_prob)
        # training
        if config.distributed:
            train_sampler.set_epoch(epoch)
        train(logger, config, train_loader, model, optimizer, criterion, epoch,
              main_proc)

        # validation
        top1, top5 = validate(logger, config, valid_loader, model, criterion,
                              epoch, main_proc)
        best_top1 = max(best_top1, top1)
        best_top5 = max(best_top5, top5)
        lr_scheduler.step()

    logger.info("Final best Prec@1 = %.4f Prec@5 = %.4f", best_top1, best_top5)
コード例 #4
0
def build_imagenet(model_state_dict, optimizer_state_dict, **kwargs):
    valid_ratio = kwargs.pop('valid_ratio', None)
    valid_num = kwargs.pop('valid_num', None)
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(
            brightness=0.4,
            contrast=0.4,
            saturation=0.4,
            hue=0.1),
        transforms.ToTensor(),
        normalize,
    ])
    valid_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])
    if args.zip_file:
        logging.info('Loading data from zip file')
        traindir = os.path.join(args.data, 'train.zip')
        if args.lazy_load:
            data = utils.ZipDataset(traindir)
        else:
            logging.info('Loading data into memory')
            data = utils.InMemoryZipDataset(traindir, num_workers=args.num_workers)
    else:
        logging.info('Loading data from directory')
        traindir = os.path.join(args.data, 'train')
        if args.lazy_load:
            data = dset.ImageFolder(traindir)
        else:
            logging.info('Loading data into memory')
            data = utils.InMemoryDataset(traindir, num_workers=args.num_workers)
       
    num_data = len(data)
    indices = list(range(num_data))
    np.random.shuffle(indices)
    if valid_ratio is not None:
        split = int(np.floor(1 - valid_ratio * num_data))
        train_indices = sorted(indices[:split])
        valid_indices = sorted(indices[split:])
    else:
        assert valid_num is not None
        train_indices = sorted(indices[valid_num:])
        valid_indices = sorted(indices[:valid_num])

    train_data = utils.WrappedDataset(data, train_indices, train_transform)
    valid_data = utils.WrappedDataset(data, valid_indices, valid_transform)
    logging.info('train set = %d', len(train_data))
    logging.info('valid set = %d', len(valid_data))

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(train_indices),
        pin_memory=True, num_workers=args.num_workers, drop_last=False)
    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.eval_batch_size,
        sampler=torch.utils.data.sampler.SubsetRandomSampler(valid_indices),
        pin_memory=True, num_workers=args.num_workers, drop_last=False)
    
    model = NASNet(args.width_stages, args.n_cell_stages, args.stride_stages, args.dropout)
    model.init_model(args.model_init)
    model.set_bn_param(0.1, 0.001)
    logging.info("param size = %d", utils.count_parameters(model))

    if model_state_dict is not None:
        model.load_state_dict(model_state_dict)

    if args.no_decay_keys:
        keys = args.no_decay_keys.split('#')
        net_params=[model.get_parameters(keys, mode='exclude'),
                    model.get_parameters(keys, mode='include')]
        optimizer = torch.optim.SGD([
            {'params': net_params[0], 'weight_decay': args.weight_decay},
            {'params': net_params[1], 'weight_decay': 0},],
            args.lr,
            momentum=0.9,
            nesterov=True,
        )
    else:
        optimizer = torch.optim.SGD(
            model.parameters(),
            args.lr,
            momentum=0.9,
            weight_decay=args.weight_decay,
            nesterov=True,
        )
    if optimizer_state_dict is not None:
        optimizer.load_state_dict(optimizer_state_dict)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, gamma=args.gamma)

    if torch.cuda.device_count() > 1:
        logging.info("Use %d %s", torch.cuda.device_count(), "GPUs !")
        model = nn.DataParallel(model)
    model = model.cuda()

    train_criterion = utils.CrossEntropyLabelSmooth(1000, args.label_smooth).cuda()
    eval_criterion = nn.CrossEntropyLoss().cuda()
    return train_queue, valid_queue, model, train_criterion, eval_criterion, optimizer, scheduler
コード例 #5
0
def build_imagenet(model_config, model_state_dict, optimizer_state_dict, **kwargs):
    epoch = kwargs.pop('epoch')
    step = kwargs.pop('step')

    # build model
    logging.info('Building Model')
    model = NASNet.build_from_config(model_config)
    model.init_model(args.model_init)
    model.set_bn_param(model_config['bn']['momentum'], model_config['bn']['eps'])
    print(model.config)
    logging.info("param size = %d", utils.count_parameters(model))
    logging.info("multi adds = %fM", model.get_flops(torch.ones(1, 3, 224, 224).float())[0] / 1000000)
    if model_state_dict is not None:
        model.load_state_dict(model_state_dict)

    if torch.cuda.device_count() > 1:
        logging.info("Use %d %s", torch.cuda.device_count(), "GPUs !")
        model = nn.DataParallel(model)
    model = model.cuda()

    # build criterion
    logging.info('Building Criterion')
    train_criterion = utils.CrossEntropyLabelSmooth(1000, args.label_smooth).cuda()
    eval_criterion = nn.CrossEntropyLoss().cuda()

    # build optimizer
    logging.info('Building Optimizer')
    if args.no_decay_keys:
        keys = args.no_decay_keys.split('#')
        net_params=[model.module.get_parameters(keys, mode='exclude'),
                    model.module.get_parameters(keys, mode='include')]
        optimizer = torch.optim.SGD([
            {'params': net_params[0], 'weight_decay': args.weight_decay},
            {'params': net_params[1], 'weight_decay': 0},],
            args.lr,
            momentum=0.9,
            nesterov=True,
        )
    else:
        optimizer = torch.optim.SGD(
            model.parameters(),
            args.lr,
            momentum=0.9,
            weight_decay=args.weight_decay,
            nesterov=True,
        )
    if optimizer_state_dict is not None:
        optimizer.load_state_dict(optimizer_state_dict)

    # build data loader
    logging.info('Building Data')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(
            brightness=0.4,
            contrast=0.4,
            saturation=0.4,
            hue=0.1),
        transforms.ToTensor(),
        normalize,
    ])
    valid_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])
    if args.zip_file:
        logging.info('Loading data from zip file')
        traindir = os.path.join(args.data, 'train.zip')
        validdir = os.path.join(args.data, 'valid.zip')
        if args.lazy_load:
            train_data = utils.ZipDataset(traindir, train_transform)
            valid_data = utils.ZipDataset(validdir, valid_transform)
        else:
            logging.info('Loading data into memory')
            train_data = utils.InMemoryZipDataset(traindir, train_transform, num_workers=args.num_workers)
            valid_data = utils.InMemoryZipDataset(validdir, valid_transform, num_workers=args.num_workers)
    else:
        logging.info('Loading data from directory')
        traindir = os.path.join(args.data, 'train')
        validdir = os.path.join(args.data, 'val')
        if args.lazy_load:
            train_data = dset.ImageFolder(traindir, train_transform)
            valid_data = dset.ImageFolder(validdir, valid_transform)
        else:
            logging.info('Loading data into memory')
            train_data = utils.InMemoryDataset(traindir, train_transform, num_workers=args.num_workers)
            valid_data = utils.InMemoryDataset(validdir, valid_transform, num_workers=args.num_workers)
    
    logging.info('Found %d in training data', len(train_data))
    logging.info('Found %d in validation data', len(valid_data))

    train_queue = torch.utils.data.DataLoader(
        train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.num_workers)
    valid_queue = torch.utils.data.DataLoader(
        valid_data, batch_size=args.eval_batch_size, shuffle=False, pin_memory=True, num_workers=args.num_workers)
    
    # build lr scheduler
    logging.info('Building LR Scheduler')
    if args.lr_scheduler == 'cosine':
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)*len(train_queue), 0, step)
    else:
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.decay_period, args.gamma, epoch)
    return train_queue, valid_queue, model, train_criterion, eval_criterion, optimizer, scheduler
コード例 #6
0
def main(args=None):
    if args is None:
        args = get_parameter()

    if args.dataset == 'dali' and not dali_enable:
        args.case = args.case.replace('dali', 'imagenet')
        args.dataset = 'imagenet'
        args.workers = 12

    # log_dir
    if not os.path.exists(args.log_dir):
        os.makedirs(args.log_dir)

    model_arch = args.model
    model_name = model_arch
    if args.evaluate:
        log_suffix = 'eval-' + model_arch + '-' + args.case
    else:
        log_suffix = model_arch + '-' + args.case
    utils.setup_logging(os.path.join(args.log_dir, log_suffix + '.txt'),
                        resume=args.resume)

    logging.info("current folder: %r", os.getcwd())
    logging.info("alqnet plugins: %r", plugin_enable)
    logging.info("apex available: %r", apex_enable)
    logging.info("dali available: %r", dali_enable)
    for x in vars(args):
        logging.info("config %s: %r", x, getattr(args, x))

    torch.manual_seed(args.seed)
    if torch.cuda.is_available() and len(args.device_ids) > 0:
        args.device_ids = [
            x for x in args.device_ids
            if x < torch.cuda.device_count() and x >= 0
        ]
        if len(args.device_ids) == 0:
            args.device_ids = None
        else:
            logging.info("training on %d gpu", len(args.device_ids))
    else:
        args.device_ids = None

    if args.device_ids is not None:
        torch.cuda.manual_seed_all(args.seed)
        cudnn.benchmark = True
        torch.backends.cudnn.deterministic = True  #https://github.com/pytorch/pytorch/issues/8019
    else:
        logging.info(
            "no gpu available, try CPU version, lots of functions limited")
        #return

    if model_name in models.model_zoo:
        model, args = models.get_model(args)
    else:
        logging.error("model(%s) not support, available models: %r" %
                      (model_name, models.model_zoo))
        return
    criterion = nn.CrossEntropyLoss()
    if 'label-smooth' in args.keyword:
        criterion_smooth = utils.CrossEntropyLabelSmooth(
            args.num_classes, args.label_smooth)

    # load policy for initial phase
    models.policy.deploy_on_init(model, getattr(args, 'policy', ''))
    # load policy for epoch updating
    epoch_policies = models.policy.read_policy(getattr(args, 'policy', ''),
                                               section='epoch')
    # print model
    logging.info("models: %r" % model)
    logging.info("epoch_policies: %r" % epoch_policies)

    utils.check_folder(args.weights_dir)
    args.weights_dir = os.path.join(args.weights_dir, model_name)
    utils.check_folder(args.weights_dir)
    args.resume_file = os.path.join(args.weights_dir,
                                    args.case + "-" + args.resume_file)
    args.pretrained = os.path.join(args.weights_dir, args.pretrained)
    epoch = 0
    lr = args.lr
    best_acc = 0
    scheduler = None
    checkpoint = None
    # resume training
    if args.resume:
        if utils.check_file(args.resume_file):
            logging.info("resuming from %s" % args.resume_file)
            if torch.cuda.is_available():
                checkpoint = torch.load(args.resume_file)
            else:
                checkpoint = torch.load(args.resume_file, map_location='cpu')
            if 'epoch' in checkpoint:
                epoch = checkpoint['epoch']
                logging.info("resuming ==> last epoch: %d" % epoch)
                epoch = epoch + 1
                logging.info("updating ==> epoch: %d" % epoch)
            if 'best_acc' in checkpoint:
                best_acc = checkpoint['best_acc']
                logging.info("resuming ==> best_acc: %f" % best_acc)
            if 'learning_rate' in checkpoint:
                lr = checkpoint['learning_rate']
                logging.info("resuming ==> learning_rate: %f" % lr)
            if 'state_dict' in checkpoint:
                utils.load_state_dict(model, checkpoint['state_dict'])
                logging.info("resumed from %s" % args.resume_file)
        else:
            logging.info("warning: *** resume file not exists({})".format(
                args.resume_file))
            args.resume = False
    else:
        if utils.check_file(args.pretrained):
            logging.info("load pretrained from %s" % args.pretrained)
            if torch.cuda.is_available():
                checkpoint = torch.load(args.pretrained)
            else:
                checkpoint = torch.load(args.pretrained, map_location='cpu')
            logging.info("load pretrained ==> last epoch: %d" %
                         checkpoint.get('epoch', 0))
            logging.info("load pretrained ==> last best_acc: %f" %
                         checkpoint.get('best_acc', 0))
            logging.info("load pretrained ==> last learning_rate: %f" %
                         checkpoint.get('learning_rate', 0))
            #if 'learning_rate' in checkpoint:
            #    lr = checkpoint['learning_rate']
            #    logging.info("resuming ==> learning_rate: %f" % lr)
            try:
                utils.load_state_dict(
                    model,
                    checkpoint.get('state_dict',
                                   checkpoint.get('model', checkpoint)))
            except RuntimeError as err:
                logging.info("Loading pretrained model failed %r" % err)
        else:
            logging.info(
                "no pretrained file exists({}), init model with default initlizer"
                .format(args.pretrained))

    if args.device_ids is not None:
        torch.cuda.set_device(args.device_ids[0])
        if not isinstance(model, nn.DataParallel) and len(args.device_ids) > 1:
            model = nn.DataParallel(model, args.device_ids).cuda()
        else:
            model = model.cuda()
        criterion = criterion.cuda()
        if 'label-smooth' in args.keyword:
            criterion_smooth = criterion_smooth.cuda()

    if 'label-smooth' in args.keyword:
        train_criterion = criterion_smooth
    else:
        train_criterion = criterion

    # move after to_cuda() for speedup
    if args.re_init and not args.resume:
        for m in model.modules():
            if hasattr(m, 'init_after_load_pretrain'):
                m.init_after_load_pretrain()

    # dataset
    data_path = args.root
    dataset = args.dataset
    logging.info("loading dataset with batch_size {} and val-batch-size {}. "
                 "dataset: {}, resolution: {}, path: {}".format(
                     args.batch_size, args.val_batch_size, dataset,
                     args.input_size, data_path))

    if args.val_batch_size < 1:
        val_loader = None
    else:
        if args.evaluate:
            val_batch_size = (args.batch_size // 100) * 100
            if val_batch_size > 0:
                args.val_batch_size = val_batch_size
            logging.info("update val_batch_size to %d in evaluate mode" %
                         args.val_batch_size)
        val_loader = datasets.data_loader(args.dataset)('val', args)

    if args.evaluate and val_loader is not None:
        if args.fp16 and torch.backends.cudnn.enabled and apex_enable and args.device_ids is not None:
            logging.info("training with apex fp16 at opt_level {}".format(
                args.opt_level))
        else:
            args.fp16 = False
            logging.info("training without apex")

        if args.fp16:
            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)  #
            model, optimizer = amp.initialize(model,
                                              optimizer,
                                              opt_level=args.opt_level)

        logging.info("evaluate the dataset on pretrained model...")
        result = validate(val_loader, model, criterion, args)
        top1, top5, loss = result
        logging.info('evaluate accuracy on dataset: top1(%f) top5(%f)' %
                     (top1, top5))
        return

    train_loader = datasets.data_loader(args.dataset)('train', args)
    if isinstance(train_loader, torch.utils.data.dataloader.DataLoader):
        train_length = len(train_loader)
    else:
        train_length = getattr(train_loader, '_size', 0) / getattr(
            train_loader, 'batch_size', 1)

    # sample several iteration / epoch to calculate the initial value of quantization parameters
    if args.stable_epoch > 0 and args.stable <= 0:
        args.stable = train_length * args.stable_epoch
        logging.info("update stable: %d" % args.stable)

    # fix learning rate at the beginning to warmup
    if args.warmup_epoch > 0 and args.warmup <= 0:
        args.warmup = train_length * args.warmup_epoch
        logging.info("update warmup: %d" % args.warmup)

    params_dict = dict(model.named_parameters())
    params = []
    quant_wrapper = []
    for key, value in params_dict.items():
        #print(key)
        if 'quant_weight' in key and 'quant_weight' in args.custom_lr_list:
            to_be_quant = key.split('.quant_weight')[0] + '.weight'
            if to_be_quant not in quant_wrapper:
                quant_wrapper += [to_be_quant]
    if len(quant_wrapper) > 0 and args.verbose:
        logging.info("quant_wrapper: {}".format(quant_wrapper))

    for key, value in params_dict.items():
        shape = value.shape
        custom_hyper = dict()
        custom_hyper['params'] = value
        if value.requires_grad == False:
            continue

        found = False
        for i in args.custom_decay_list:
            if i in key and len(i) > 0:
                found = True
                break
        if found:
            custom_hyper['weight_decay'] = args.custom_decay
        elif (not args.decay_small and args.no_decay_small) and (
            (len(shape) == 4 and shape[1] == 1) or (len(shape) == 1)):
            custom_hyper['weight_decay'] = 0.0

        found = False
        for i in args.custom_lr_list:
            if i in key and len(i) > 0:
                found = True
                break
        if found:
            #custom_hyper.setdefault('lr_constant', args.custom_lr) # 2019.11.25
            custom_hyper['lr'] = args.custom_lr
        elif key in quant_wrapper:
            custom_hyper.setdefault('lr_constant', args.custom_lr)
            custom_hyper['lr'] = args.custom_lr

        params += [custom_hyper]

        if 'debug' in args.keyword:
            logging.info("{}, decay {}, lr {}, constant {}".format(
                key, custom_hyper.get('weight_decay', "default"),
                custom_hyper.get('lr', "default"),
                custom_hyper.get('lr_constant', "No")))

    optimizer = None
    if args.optimizer == "ADAM":
        optimizer = torch.optim.Adam(params,
                                     lr=args.lr,
                                     weight_decay=args.weight_decay)

    if args.optimizer == "SGD":
        optimizer = torch.optim.SGD(params,
                                    lr=args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay,
                                    nesterov=args.nesterov)

    if args.resume and checkpoint is not None:
        try:
            optimizer.load_state_dict(checkpoint['optimizer'])
        except RuntimeError as error:
            logging.info("Restore optimizer state failed %r" % error)

    if args.fp16 and torch.backends.cudnn.enabled and apex_enable and args.device_ids is not None:
        logging.info("training with apex fp16 at opt_level {}".format(
            args.opt_level))
    else:
        args.fp16 = False
        logging.info("training without apex")

    if args.sync_bn:
        logging.info("sync_bn to be supported, currently not yet")

    if args.fp16:
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level=args.opt_level)
        if args.resume and checkpoint is not None:
            try:
                amp.load_state_dict(checkpoint['amp'])
            except RuntimeError as error:
                logging.info("Restore amp state failed %r" % error)

    # start tensorboard as late as possible
    if args.tensorboard and not args.evaluate:
        tb_log = os.path.join(args.log_dir, log_suffix)
        args.tensorboard = SummaryWriter(tb_log,
                                         filename_suffix='.' + log_suffix)
    else:
        args.tensorboard = None

    logging.info("start to train network " + model_name + ' with case ' +
                 args.case)
    while epoch < (args.epochs + args.extra_epoch):
        if 'proxquant' in args.keyword:
            if args.proxquant_step < 10:
                if args.lr_policy in ['sgdr', 'sgdr_step', 'custom_step']:
                    index = len([x for x in args.lr_custom_step if x <= epoch])
                    for m in model.modules():
                        if hasattr(m, 'prox'):
                            m.prox = 1.0 - 1.0 / args.proxquant_step * (index +
                                                                        1)
            else:
                for m in model.modules():
                    if hasattr(m, 'prox'):
                        m.prox = 1.0 - 1.0 / args.proxquant_step * epoch
                        if m.prox < 0:
                            m.prox = 0
        if epoch < args.epochs:
            lr, scheduler = utils.setting_learning_rate(
                optimizer, epoch, train_length, checkpoint, args, scheduler)
        if lr is None:
            logging.info('lr is invalid at epoch %d' % epoch)
            return
        else:
            logging.info('[epoch %d]: lr %e', epoch, lr)

        loss = 0
        top1, top5, eloss = 0, 0, 0
        is_best = top1 > best_acc
        # leverage policies on epoch
        models.policy.deploy_on_epoch(model,
                                      epoch_policies,
                                      epoch,
                                      optimizer=optimizer,
                                      verbose=logging.info)

        if 'lr-test' not in args.keyword:  # otherwise only print the learning rate in each epoch
            # training
            loss = train(train_loader, model, train_criterion, optimizer, args,
                         scheduler, epoch, lr)
            #for i in range(train_length):
            #  scheduler.step()
            logging.info('[epoch %d]: train_loss %.3f' % (epoch, loss))

            # validate
            top1, top5, eloss = 0, 0, 0
            top1, top5, eloss = validate(val_loader, model, criterion, args)
            is_best = top1 > best_acc
            if is_best:
                best_acc = top1
            logging.info('[epoch %d]: test_acc %f %f, best top1: %f, loss: %f',
                         epoch, top1, top5, best_acc, eloss)

        if args.tensorboard is not None:
            args.tensorboard.add_scalar(log_suffix + '/train-loss', loss,
                                        epoch)
            args.tensorboard.add_scalar(log_suffix + '/eval-top1', top1, epoch)
            args.tensorboard.add_scalar(log_suffix + '/eval-top5', top5, epoch)
            args.tensorboard.add_scalar(log_suffix + '/lr', lr, epoch)

        utils.save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'scheduler':
                None if scheduler is None else scheduler.state_dict(),
                'best_acc': best_acc,
                'learning_rate': lr,
                'amp': None if not args.fp16 else amp.state_dict(),
            }, is_best, args)

        epoch = epoch + 1
        if epoch == 1:
            logging.info(utils.gpu_info())
コード例 #7
0
def main():
    config = RetrainConfig()
    main_proc = not config.distributed or config.local_rank == 0
    if config.distributed:
        torch.cuda.set_device(config.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method=config.dist_url,
                                             rank=config.local_rank,
                                             world_size=config.world_size)
    if main_proc:
        os.makedirs(config.output_path, exist_ok=True)
    if config.distributed:
        torch.distributed.barrier()
    logger = utils.get_logger(os.path.join(config.output_path, 'search.log'))
    if main_proc:
        config.print_params(logger.info)
    utils.reset_seed(config.seed)

    loaders, samplers = get_augment_datasets(config)
    train_loader, valid_loader = loaders
    train_sampler, valid_sampler = samplers
    train_loader = CyclicIterator(train_loader, train_sampler)
    # valid_loader = CyclicIterator(valid_loader, valid_sampler, False)

    model = Model(config.dataset,
                  config.layers,
                  in_channels=config.input_channels,
                  channels=config.init_channels,
                  retrain=True).cuda()
    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(config.n_classes,
                                                  config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()

    fixed_arc_path = os.path.join('', config.arc_checkpoint)
    with open(fixed_arc_path, "r") as f:
        fixed_arc = json.load(f)
    fixed_arc = utils.encode_tensor(fixed_arc, torch.device("cuda"))
    genotypes = utils.parse_results(fixed_arc, n_nodes=4)
    genotypes_dict = {i: genotypes for i in range(3)}
    apply_fixed_architecture(model, fixed_arc_path)
    param_size = utils.param_size(model, criterion, [3, 512, 512])

    if main_proc:
        logger.info("Param size: %.6f", param_size)
        logger.info("Genotype: %s", genotypes)

    # change training hyper parameters according to cell type
    if 'cifar' in config.dataset:
        if param_size < 3.0:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.2
        elif 3.0 < param_size < 3.5:
            config.weight_decay = 3e-4
            config.drop_path_prob = 0.3
        else:
            config.weight_decay = 5e-4
            config.drop_path_prob = 0.3

    if config.distributed:
        apex.parallel.convert_syncbn_model(model)
        model = DistributedDataParallel(model, delay_allreduce=True)

    optimizer = torch.optim.AdamW(model.parameters(), config.lr)
    # lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, config.epochs, eta_min=1E-6)

    best_top1 = 0.
    epoch = 0
    try:
        checkpoint = torch.load(config.model_checkpoint)
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        epoch = checkpoint['epoch']
        loss = checkpoint['loss']

        model.eval()
        print("----------------------------")
        print("MODEL LOADED FROM CHECKPOINT" + config.model_checkpoint)
        print("----------------------------")
    except:
        print("----------------------------")
        print("MODEL NOT LOADED FROM CHECKPOINT")
        print("----------------------------")
        pass

    # for epoch in range(0, epoch):
    # lr_scheduler.step()

    for epoch in range(epoch, config.epochs):
        drop_prob = config.drop_path_prob * epoch / config.epochs
        if config.distributed:
            model.module.drop_path_prob(drop_prob)
        else:
            model.drop_path_prob(drop_prob)
        # training
        if config.distributed:
            train_sampler.set_epoch(epoch)
        train(logger, config, train_loader, model, optimizer, criterion, epoch,
              main_proc)
        if (epoch % config.log_frequency == 0):
            # validation
            top1 = validate(logger, config, valid_loader, model, criterion,
                            epoch, main_proc)
            best_top1 = max(best_top1, top1)
            # lr_scheduler.step()
            logger.info("Final best Prec@1 = %.4f", best_top1)
コード例 #8
0
def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
    logging.info('gpu device %s' % args.gpu)
    logging.info('small regime or regular regime %s' % args.regime)
    logging.info("args = %s", args)

    # read graph info
    if args.regime:
        graph_info_dict3 = read_graph_info(args.graph_txt + "3.txt")
        graph_info_dict4 = read_graph_info(args.graph_txt + "4.txt")
        graph_info_dict5 = read_graph_info(args.graph_txt + "5.txt")
        graph_info_dicts = [graph_info_dict3, graph_info_dict4, graph_info_dict5]
    else:
        graph_info_dict2 = read_graph_info(args.graph_txt + "2.txt")
        graph_info_dict3 = read_graph_info(args.graph_txt + "3.txt")
        graph_info_dict4 = read_graph_info(args.graph_txt + "4.txt")
        graph_info_dict5 = read_graph_info(args.graph_txt + "5.txt")
        graph_info_dicts = [graph_info_dict2, graph_info_dict3, graph_info_dict4, graph_info_dict5]

    writer = SummaryWriter(log_dir=args.save)

    # CrossEntropyLabelSmooth for train, CrossEntropyLoss for val
    criterion_smooth = utils.CrossEntropyLabelSmooth(NUM_CLASSES, args.label_smooth)
    criterion_smooth = criterion_smooth.cuda()
    criterion = nn.CrossEntropyLoss()
    criterion = criterion.cuda()

    model = RandomlyWiredNN(args.base_channels, NUM_CLASSES, args.output_channels, args.regime, graph_info_dicts)
    x = torch.randn(2, 3, 224, 224)
    writer.add_graph(model, (x,))
    logging.info("param size = %fMB", utils.count_parameters_in_MB(model))
    logging.info("FLOPs = %fMB", utils.model_param_flops_in_MB(model, input_res=[224, 224], multiply_adds=False))

    if args.parallel:
        model = nn.DataParallel(model).cuda()
        print("multi GPUs")
    else:
        model = model.cuda()
        print("single GPU")

    optimizer = torch.optim.SGD(model.parameters(), args.learning_rate,
                                momentum=args.momentum, weight_decay=args.weight_decay)

    train_queue, valid_queue = preprocess_imagenet(args)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs), eta_min=args.learning_rate_min)

    best_acc_top1 = 0.0
    best_acc_top5 = 0.0
    for epoch in range(args.epochs):
        scheduler.step()
        lr = scheduler.get_lr()[0]
        logging.info('epoch %d lr %e', epoch, lr)

        train_acc_top1, train_acc_top5, train_obj, train_speed = train(train_queue, model, criterion_smooth, optimizer)
        logging.info('train_acc %f', train_acc_top1)
        logging.info('train_speed_per_image %f', train_speed)
        writer.add_scalar('train_loss', train_obj, epoch)

        valid_acc_top1, valid_acc_top5, valid_obj, valid_speed = infer(valid_queue, model, criterion)
        logging.info('valid_acc %f', valid_acc_top1)
        logging.info('valid_speed_per_image %f', valid_speed)
        writer.add_scalar('val_loss', valid_obj, epoch)

        if valid_acc_top1 > best_acc_top1:
            best_acc_top1 = valid_acc_top1
            utils.save_parm(model, os.path.join(args.save, 'model_top1.pt'))
        if valid_acc_top5 > best_acc_top5:
            best_acc_top5 = valid_acc_top5
            utils.save_parm(model, os.path.join(args.save, 'model_top5.pt'))
    writer.close()
コード例 #9
0
def main():
    logger.info("Logger is set - training start")

    # set default gpu device id
    torch.cuda.set_device(config.gpus[0])

    # set seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    input_size, input_channels, n_classes, train_data, valid_data = utils.get_data(
        config.dataset,
        config.data_path,
        config.cutout_length,
        validation=True,
        autoaugment=config.autoaugment)

    if config.label_smooth != 0:
        criterion = utils.CrossEntropyLabelSmooth(
            10, config.label_smooth).to(device)
    else:
        criterion = nn.CrossEntropyLoss().to(device)
    use_aux = config.aux_weight > 0.
    if config.dataset in utils.LARGE_DATASETS:
        model = AugmentCNNImageNet(input_size, input_channels,
                                   config.init_channels, n_classes,
                                   config.layers, use_aux, config.genotype)
    else:
        model = AugmentCNN(input_size,
                           input_channels,
                           config.init_channels,
                           n_classes,
                           config.layers,
                           use_aux,
                           config.genotype,
                           SSC=config.SSC)
    model = nn.DataParallel(model, device_ids=config.gpus).to(device)
    # model size
    mb_params = utils.param_size(model)
    logger.info("Model size = {:.3f} MB".format(mb_params))

    # weights optimizer
    if config.p != 1:
        optimizer = torch.optim.SGD(model.parameters(),
                                    1.,
                                    momentum=config.momentum,
                                    weight_decay=config.weight_decay)
    else:
        optimizer = torch.optim.SGD(model.parameters(),
                                    config.lr,
                                    momentum=config.momentum,
                                    weight_decay=config.weight_decay)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.workers,
                                               pin_memory=True)
    valid_loader = torch.utils.data.DataLoader(valid_data,
                                               batch_size=config.batch_size,
                                               shuffle=False,
                                               num_workers=config.workers,
                                               pin_memory=True)
    if config.p == 1:
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer, config.epochs)
    else:
        lr_cpa = utils.cosine_power_annealing_lr(nepochs=config.epochs,
                                                 min_lr=config.lr_min,
                                                 max_lr=config.lr,
                                                 p=config.p)
        lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [lr_cpa])
    best_top1 = 0.
    # training loop
    for epoch in range(config.epochs):
        lr_scheduler.step()
        drop_prob = config.drop_path_prob * epoch / config.epochs
        model.module.drop_path_prob(drop_prob)

        # training
        train(train_loader, model, optimizer, criterion, epoch)

        # validation
        cur_step = (epoch + 1) * len(train_loader)
        top1 = validate(valid_loader, model, criterion, epoch, cur_step)

        # save
        if best_top1 < top1:
            best_top1 = top1
            is_best = True
        else:
            is_best = False
        utils.save_checkpoint(model, config.path, is_best)

        print("")
    logger.info("Final best Prec@1 = {:.4%} for job {}".format(
        best_top1, config.name))
コード例 #10
0
ファイル: augment.py プロジェクト: susan0199/StacNAS
def main():
    if not torch.cuda.is_available():
        logger.info("no gpu device available")
        sys.exit(1)

    logger.info("*** Begin {} ***".format(config.stage))

    # set default gpu device
    torch.cuda.set_device(config.gpus[0])

    # set random seed
    np.random.seed(config.seed)
    torch.manual_seed(config.seed)
    torch.cuda.manual_seed_all(config.seed)

    torch.backends.cudnn.benchmark = True

    # get data with meta info
    logger.info("preparing data...")
    input_size, channels_in, num_classes, train_data, valid_data = \
        load_dataset(dataset=config.dataset,
                     data_dir=config.data_dir,
                     cutout_length=config.cutout_length,
                     validation=True,
                     auto_aug=config.auto_aug)

    train_loader = torch.utils.data.DataLoader(
        dataset=train_data,
        batch_size=config.batch_size,
        shuffle=True,
        num_workers=config.num_workers,
        pin_memory=True)

    valid_loader = torch.utils.data.DataLoader(
        dataset=valid_data,
        batch_size=config.batch_size,
        shuffle=False,
        num_workers=config.num_workers,
        pin_memory=True)

    logger.info("parsing genotypes...")
    genotypes = parse_genotypes()
    logger.info(genotypes)
    
    logger.info("building model...")
    model = AugmentCNN(input_size=input_size,
                       channels_in=channels_in,
                       channels_init=config.init_channels,
                       num_cells=config.num_cells,
                       num_nodes=config.num_nodes,
                       num_classes=num_classes, 
                       stem_multiplier=3,
                       auxiliary=(config.aux_weight > 0),
                       genotypes=genotypes,
                       alpha_share=config.alpha_share)
    model = model.to(device)

    model_size = utils.param_size(model)
    logger.info("model_size: {:.3f} MB".format(model_size))

    if config.label_smooth > 0:
        criterion = utils.CrossEntropyLabelSmooth(
            num_classes, config.label_smooth)
    else:
        criterion = nn.CrossEntropyLoss()
    criterion = criterion.to(device)

    optimizer = torch.optim.SGD(params=model.parameters(),
                                lr=config.learning_rate,
                                momentum=config.momentum,
                                weight_decay=config.weight_decay)

    if config.power_lr:
        lr_scheduler = utils.CosinePowerAnnealingLR(
            optimizer=optimizer, T_max=config.epochs, p=2)
    else:
        lr_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
            optimizer=optimizer, T_max=config.epochs)

    logger.info("start training...")
    history_top1 = []
    best_top1 = 0.0

    for epoch in range(config.epochs):
        lr_scheduler.step()
        lr = lr_scheduler.get_lr()[0]
        logger.info("epoch: {:d}, lr: {:e}".format(epoch, lr))

        drop_prob = config.drop_path_prob * epoch / config.epochs
        model.drop_path_prob(drop_prob)

        train(train_loader, model, criterion, optimizer, epoch)

        global_step = (epoch + 1) * len(train_loader) - 1
        valid_top1 = valid(valid_loader, model, criterion, epoch, global_step)
        history_top1.append(valid_top1)

        if epoch == 0 or best_top1 < valid_top1:
            best_top1 = valid_top1
            is_best = True
        else:
            is_best = False

        utils.save_checkpoint(model, config.model_dir, is_best=is_best)

    with open(os.path.join(config.stage_dir, "history_top1.pk"), "wb") as f:
        pickle.dump(history_top1, f)

    logger.info("Final best valid Prec@1: {:.4%}".format(best_top1))
    logger.info("*** Finish {} ***".format(config.stage))