logging.info("Network Structure: \n" + '|\n'.join(map(str, model.net_config))) if args.meas_lat: latency_cpu = utils.latency_measure(model, (3, 224, 224), 1, 2000, mode='cpu') logging.info('latency_cpu (batch 1): %.2fms' % latency_cpu) latency_gpu = utils.latency_measure(model, (3, 224, 224), 32, 5000, mode='gpu') logging.info('latency_gpu (batch 32): %.2fms' % latency_gpu) params = utils.count_parameters_in_MB(model) logging.info("Params = %.2fMB" % params) mult_adds = comp_multadds(model, input_size=config.data.input_size) logging.info("Mult-Adds = %.2fMB" % mult_adds) model = nn.DataParallel(model) # whether to resume from a checkpoint if config.optim.if_resume: utils.load_model(model, config.optim.resume.load_path) start_epoch = config.optim.resume.load_epoch + 1 else: start_epoch = 0 model = model.cuda() if config.optim.label_smooth: criterion = utils.cross_entropy_with_label_smoothing
search_optim = Optimizer(super_model, criterion, config) scheduler = get_lr_scheduler(config, search_optim.weight_optimizer, imagenet.train_num_examples) scheduler.last_step = start_epoch * ( imagenet.train_num_examples // config.data.batch_size + 1) search_trainer = SearchTrainer(train_queue, valid_queue, search_optim, criterion, scheduler, config, args) betas, head_alphas, stack_alphas = super_model.module.display_arch_params() derived_archs = arch_gener.derive_archs(betas, head_alphas, stack_alphas) derived_model = der_Net('|'.join(map(str, derived_archs))) logging.info( "Derived Model Mult-Adds = %.2fMB" % comp_multadds(derived_model, input_size=config.data.input_size)) logging.info("Derived Model Num Params = %.2fMB", utils.count_parameters_in_MB(derived_model)) best_epoch = [0, 0, 0] # [epoch, acc_top1, acc_top5] rec_list = [] for epoch in range(start_epoch, config.train_params.epochs): # training part1: update the architecture parameters if epoch >= config.search_params.arch_update_epoch: search_stage = 1 search_optim.set_param_grad_state('Arch') train_acc_top1, train_acc_top5, train_obj, sub_obj, batch_time = search_trainer.train( super_model, epoch, 'Arch', search_stage) logging.info( 'EPOCH%d Arch Train_acc top1 %.2f top5 %.2f loss %.4f %s %.2f batch_time %.3f', epoch, train_acc_top1, train_acc_top5, train_obj,
cudnn.benchmark = True cudnn.enabled = True logging.info("args = %s", args) logging.info('Training with config:') logging.info(pprint.pformat(config)) config.net_config, net_type = utils.load_net_config(os.path.join(args.load_path, 'net_config')) derivedNetwork = getattr(model_derived, '%s_Net' % net_type.upper()) model = derivedNetwork(config.net_config, config=config) logging.info("Network Structure: \n" + '\n'.join(map(str, model.net_config))) logging.info("Params = %.2fMB" % utils.count_parameters_in_MB(model)) logging.info("Mult-Adds = %.2fMB" % comp_multadds(model, input_size=config.data.input_size)) model = model.cuda() model = nn.DataParallel(model) utils.load_model(model, os.path.join(args.load_path, 'weights.pt')) imagenet = imagenet_data.ImageNet12(trainFolder=os.path.join(args.data_path, 'train'), testFolder=os.path.join(args.data_path, 'val'), num_workers=config.data.num_workers, data_config=config.data) valid_queue = imagenet.getTestLoader(config.data.batch_size) trainer = Trainer(None, valid_queue, None, None, None, config, args.report_freq) with torch.no_grad(): val_acc_top1, val_acc_top5, valid_obj, batch_time = trainer.infer(model)
def main(): np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, NUM_CLASSES, args.layers, config.optim.auxiliary, genotype) start_epoch = 0 model.eval() model.drop_path_prob = args.drop_path_prob * 0 # compute the params as well as the multi-adds params = count_parameters_in_MB(model) logging.info("Params = %.2fMB" % params) mult_adds = comp_multadds(model, input_size=config.data.input_size) logging.info("Mult-Adds = %.2fMB" % mult_adds) model.train() if len(args.gpus) > 1: model = nn.DataParallel(model) model = model.cuda() if config.optim.label_smooth: criterion = CrossEntropyLabelSmooth(NUM_CLASSES, config.optim.smooth_alpha) else: criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), config.optim.init_lr, momentum=config.optim.momentum, weight_decay=config.optim.weight_decay) imagenet = imagenet_data.ImageNet12( trainFolder=os.path.join(args.data_path, 'train'), testFolder=os.path.join(args.data_path, 'val'), num_workers=config.data.num_workers, type_of_data_augmentation=config.data.type_of_data_aug, data_config=config.data, size_images=config.data.input_size[1], scaled_size=config.data.scaled_size[1]) train_queue, valid_queue = imagenet.getTrainTestLoader( config.data.batch_size) if config.optim.lr_schedule == 'cosine': scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(config.train_params.epochs)) trainer = Trainer(train_queue, valid_queue, criterion, config, args.report_freq) best_epoch = [0, 0, 0] # [epoch, acc_top1, acc_top5] lr = config.optim.init_lr for epoch in range(start_epoch, config.train_params.epochs): if config.optim.lr_schedule == 'cosine': scheduler.step() current_lr = scheduler.get_lr()[0] elif config.optim.lr_schedule == 'linear': # with warmup initial optimizer, current_lr = adjust_lr(optimizer, config.train_params.epochs, lr, epoch) else: print('Wrong lr type, exit') sys.exit(1) if epoch < 5: # Warmup epochs for 5 current_lr = lr * (epoch + 1) / 5.0 for param_group in optimizer.param_groups: param_group['lr'] = current_lr logging.info('Warming-up Epoch: %d, LR: %e', epoch, lr * (epoch + 1) / 5.0) logging.info('Epoch: %d lr %e', epoch, current_lr) if len(args.gpus) > 1: model.module.drop_path_prob = args.drop_path_prob * epoch / config.train_params.epochs else: model.drop_path_prob = args.drop_path_prob * epoch / config.train_params.epochs train_acc_top1, train_acc_top5, train_obj, batch_time, data_time = trainer.train( model, optimizer, epoch) with torch.no_grad(): val_acc_top1, val_acc_top5, batch_time, data_time = trainer.infer( model, epoch) if val_acc_top1 > best_epoch[1]: best_epoch = [epoch, val_acc_top1, val_acc_top5] if epoch >= 0: # 120 utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.module.state_dict(), 'best_acc_top1': val_acc_top1, 'optimizer': optimizer.state_dict(), }, save_path=args.save, epoch=epoch, is_best=True) if len(args.gpus) > 1: utils.save( model.module.state_dict(), os.path.join( args.save, 'weights_{}_{}.pt'.format(epoch, val_acc_top1))) else: utils.save( model.state_dict(), os.path.join( args.save, 'weights_{}_{}.pt'.format(epoch, val_acc_top1))) logging.info('BEST EPOCH %d val_top1 %.2f val_top5 %.2f', best_epoch[0], best_epoch[1], best_epoch[2]) logging.info( 'epoch: {} \t train_acc_top1: {:.4f} \t train_loss: {:.4f} \t val_acc_top1: {:.4f}' .format(epoch, train_acc_top1, train_obj, val_acc_top1)) logging.info("Params = %.2fMB" % params) logging.info("Mult-Adds = %.2fMB" % mult_adds)