def train_model(args): if os.path.isdir(args.save) == False: os.makedirs(args.save) save_dir = '{}eval-{}-{}'.format(args.save, args.note, time.strftime("%Y%m%d-%H%M%S")) utils.create_exp_dir(save_dir, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(save_dir, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) if args.cifar100: CIFAR_CLASSES = 100 data_folder = 'cifar-100-python' else: CIFAR_CLASSES = 10 data_folder = 'cifar-10-batches-py' if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) num_gpus = torch.cuda.device_count() if args.arch in genotypes.__dict__.keys(): genotype = eval("genotypes.%s" % args.arch) else: genotype = eval(args.arch) print('---------Genotype---------') logging.info(genotype) print('--------------------------') model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = torch.nn.DataParallel(model) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) best_acc = 0.0 for epoch in range(args.epochs): scheduler.step() logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs model.drop_path_prob = args.drop_path_prob * epoch / args.epochs start_time = time.time() train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('Train_acc: %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) if valid_acc > best_acc: best_acc = valid_acc logging.info('Valid_acc: %f', valid_acc) logging.info('Best_acc: %f', best_acc) end_time = time.time() duration = end_time - start_time print('Epoch time: %ds.' % duration) utils.save(model.module, os.path.join(save_dir, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) device = torch.device("cuda:{}".format(args.gpu)) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) cudnn.deterministic = True logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) if args.arch is not None: genotype = eval("genotypes.%s" % args.arch) if args.dir is not None: with open(os.path.join(args.dir, "genotype.pickle"), 'rb') as f: genotype = pickle.load(f) print("Unpickling genotype.pickle") logging.info(genotype) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) train_transform, valid_transform = utils._data_transforms_cifar100(args) train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform) logging.info("[INFO] len(train_data): {}, len(valid_data): {}".format( len(train_data), len(valid_data))) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) test_error = [] best_acc = 0.0 for epoch in range(args.epochs): logging.info('[INFO] epoch %d lr %e', epoch + 1, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('[INFO] train_acc %f', train_acc) writer.add_scalar("train_acc", train_acc, epoch + 1) writer.add_scalar("train_obj", train_obj, epoch + 1) scheduler.step() valid_acc, valid_obj = infer(valid_queue, model, criterion) if valid_acc > best_acc: best_acc = valid_acc utils.save(model, os.path.join(args.save, 'best_weights.pt')) logging.info('[INFO] valid_acc %f', valid_acc) writer.add_scalar("valid_acc", valid_acc, epoch + 1) writer.add_scalar("valid_obj", valid_obj, epoch + 1) writer.add_scalar("test_error", 100 - valid_acc, epoch + 1) utils.save(model, os.path.join(args.save, 'weights.pt')) test_error.append(100 - valid_acc) logging.info('[INFO] best_acc %f', best_acc) with open("{}/test_error.pickle".format(args.save), 'wb') as f: pickle.dump(test_error, f)
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() switches = [] for i in range(14): switches.append([True for j in range(len(PRIMITIVES))]) switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # eps_no_archs = [10, 10, 10] eps_no_archs = [2, 2, 2] for sp in range(len(num_to_keep)): # if sp < 1: # continue model = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp])) model = nn.DataParallel(model) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) network_params = [] for k, v in model.named_parameters(): if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')): network_params.append(v) optimizer = torch.optim.SGD(network_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # optimizer_a = torch.optim.Adam(model.module.arch_parameters(), # lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) optimizer_a = torch.optim.Adam(model.module.arch_parameters(), lr=args.arch_learning_rate, betas=(0, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 # cur_sub_model = get_cur_model(model,switches_normal,switches_reduce,num_to_keep,num_to_drop,sp) for epoch in range(epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: # if 0: model.module.p = float( drop_rate[sp]) * (epochs - epoch - 1) / epochs model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=False) else: model.module.p = float(drop_rate[sp]) * np.exp( -(epoch - eps_no_arch) * scale_factor) model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation if epochs - epoch < 5: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) print('------Dropping %d paths------' % num_to_drop[sp]) # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) # drop operations with low architecture weights arch_param = model.module.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_normal[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: # for the last stage, drop all Zero operations drop = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(normal_prob[i, :], num_to_drop[sp]) for idx in drop: switches_normal[i][idxs[idx]] = False reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_reduce[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(reduce_prob[i, :], num_to_drop[sp]) for idx in drop: switches_reduce[i][idxs[idx]] = False logging.info('switches_normal = %s', switches_normal) logging_switches(switches_normal) logging.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce) if sp == len(num_to_keep) - 1: arch_param = model.module.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(14)] reduce_final = [0 for idx in range(14)] # remove all Zero operations for i in range(14): if switches_normal_2[i][0] == True: normal_prob[i][0] = 0 normal_final[i] = max(normal_prob[i]) if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture, similar to DARTS keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): # 选出最大的两个前序节点 end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 # set switches according the ranking of arch parameters for i in range(14): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) ## restrict skipconnect (normal cell only) logging.info('Restricting skipconnect...') # generating genotypes with different numbers of skip-connect operations for sks in range(0, 9): max_sk = 8 - sks num_sk = check_sk_number(switches_normal) if not num_sk > max_sk: continue while num_sk > max_sk: normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logging.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype)
def __init__(self, args: Namespace, genotype: Genotype, my_dataset: MyDataset, choose_cell=False): self.__args = args self.__dataset = my_dataset self.__previous_epochs = 0 if args.seed is None: raise Exception('designate seed.') elif args.epochs is None: raise Exception('designate epochs.') if not (args.arch or args.arch_path): raise Exception('need to designate arch.') log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) np.random.seed(args.seed) cudnn.benchmark = True cudnn.enabled = True torch.manual_seed(args.seed) logging.info(f'gpu device = {args.gpu}') logging.info(f'args = {args}') logging.info(f'Train genotype: {genotype}') if my_dataset == MyDataset.CIFAR10: self.model = NetworkCIFAR(args.init_ch, 10, args.layers, args.auxiliary, genotype) train_transform, valid_transform = utils._data_transforms_cifar10( args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) elif my_dataset == MyDataset.CIFAR100: self.model = NetworkCIFAR(args.init_ch, 100, args.layers, args.auxiliary, genotype) train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform) elif my_dataset == MyDataset.ImageNet: self.model = NetworkImageNet(args.init_ch, 1000, args.layers, args.auxiliary, genotype) self.__criterion_smooth = CrossEntropyLabelSmooth( 1000, args.label_smooth).to(device) traindir = os.path.join(args.data, 'train') validdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_data = dset.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.2), transforms.ToTensor(), normalize, ])) valid_data = dset.ImageFolder( validdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) else: raise Exception('No match Dataset') checkpoint = None if use_DataParallel: print('use Data Parallel') if args.checkpoint_path: checkpoint = torch.load(args.checkpoint_path) utils.load(self.model, checkpoint['state_dict'], args.to_parallel) self.__previous_epochs = checkpoint['epoch'] args.epochs -= self.__previous_epochs if args.epochs <= 0: raise Exception('args.epochs is too small.') self.model = nn.DataParallel(self.model) self.__module = self.model.module torch.cuda.manual_seed_all(args.seed) else: if args.checkpoint_path: checkpoint = torch.load(args.checkpoint_path) utils.load(self.model, checkpoint['state_dict'], args.to_parallel) args.epochs -= checkpoint['epoch'] if args.epochs <= 0: raise Exception('args.epochs is too small.') torch.cuda.manual_seed(args.seed) self.__module = self.model self.model.to(device) param_size = utils.count_parameters_in_MB(self.model) logging.info(f'param size = {param_size}MB') self.__criterion = nn.CrossEntropyLoss().to(device) self.__optimizer = torch.optim.SGD(self.__module.parameters(), args.lr, momentum=args.momentum, weight_decay=args.wd) if checkpoint: self.__optimizer.load_state_dict(checkpoint['optimizer']) num_workers = torch.cuda.device_count() * 4 if choose_cell: num_train = len(train_data) # 50000 indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) # 25000 self.__train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split]), pin_memory=True, num_workers=num_workers) self.__valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:]), pin_memory=True, num_workers=num_workers) else: self.__train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batchsz, shuffle=True, pin_memory=True, num_workers=num_workers) self.__valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batchsz, shuffle=False, pin_memory=True, num_workers=num_workers) if my_dataset == MyDataset.CIFAR10 or MyDataset.CIFAR100: self.__scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.__optimizer, args.epochs) elif my_dataset == MyDataset.ImageNet: self.__scheduler = torch.optim.lr_scheduler.StepLR( self.__optimizer, args.decay_period, gamma=args.gamma) else: raise Exception('No match Dataset') if checkpoint: self.__scheduler.load_state_dict(checkpoint['scheduler'])
def main(): # if not torch.cuda.is_available(): # logging.info('no gpu device available') # sys.exit(1) np.random.seed(args.seed) # torch.cuda.set_device(args.gpu) # cudnn.benchmark = True # torch.manual_seed(args.seed) # cudnn.enabled = True # torch.cuda.manual_seed(args.seed) # logging.info('gpu device = %d' % args.gpu) # logging.info("args = %s", args) torch.manual_seed(args.seed) logging.info('use cpu') logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() # criterion = criterion.cuda() criterion.to(device) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion, learnable_bn=args.learnable_bn) # model = model.cuda() model.to(device) a = list(model.parameters()) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) # optimizer = torch.optim.SGD( # model.parameters(), # args.learning_rate, # momentum=args.momentum, # weight_decay=args.weight_decay) ################################################################################ # AdaS: optimizer and scheduler optimizer = SGDVec(params=model.parameters(), lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = AdaS( parameters=list(model.parameters()), init_lr=args.learning_rate, # min_lr=kwargs['min_lr'], # zeta=kwargs['zeta'], p=args.scheduler_p, beta=args.scheduler_beta) ################################################################################ # train_transform, valid_transform = utils._data_transforms_cifar100(args) # train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) """From https://github.com/chenxin061/pdarts/""" if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( # optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) """Hessian""" analyser = Analyzer(model, args) """adaptive stopping""" stop_checker = StopChecker() METRICS = Metrics(list(model.parameters()), p=1) PERFORMANCE_STATISTICS = {} ARCH_STATISTICS = {} GENOTYPE_STATISTICS = {} metrics_path = './metrics_stat_test_adas.xlsx' weights_path = './weights_stat_test_adas.xlsx' genotypes_path = './genotypes_stat_test_adas.xlsx' for epoch in range(args.epochs): # scheduler.step() # lr = scheduler.get_lr()[0] # logging.info genotype = model.genotype() logging.info('genotype = %s', genotype) if epoch % 5 == 0 or epoch == args.epochs - 1: GENOTYPE_STATISTICS[f'epoch_{epoch}'] = [genotype] genotypes_df = pd.DataFrame(data=GENOTYPE_STATISTICS) genotypes_df.to_excel(genotypes_path) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # training train_acc, train_obj = train(epoch, train_queue, valid_queue, model, architect, criterion, optimizer, METRICS, scheduler, analyser) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) # metrics io_metrics = METRICS.evaluate(epoch) PERFORMANCE_STATISTICS[ f'in_S_epoch_{epoch}'] = io_metrics.input_channel_S PERFORMANCE_STATISTICS[ f'out_S_epoch_{epoch}'] = io_metrics.output_channel_S PERFORMANCE_STATISTICS[f'fc_S_epoch_{epoch}'] = io_metrics.fc_S PERFORMANCE_STATISTICS[ f'in_rank_epoch_{epoch}'] = io_metrics.input_channel_rank PERFORMANCE_STATISTICS[ f'out_rank_epoch_{epoch}'] = io_metrics.output_channel_rank PERFORMANCE_STATISTICS[f'fc_rank_epoch_{epoch}'] = io_metrics.fc_rank PERFORMANCE_STATISTICS[ f'in_condition_epoch_{epoch}'] = io_metrics.input_channel_condition PERFORMANCE_STATISTICS[ f'out_condition_epoch_{epoch}'] = io_metrics.output_channel_condition ################################################################################ # AdaS: update learning rates lr_metrics = scheduler.step(epoch, METRICS) PERFORMANCE_STATISTICS[ f'rank_velocity_epoch_{epoch}'] = lr_metrics.rank_velocity PERFORMANCE_STATISTICS[ f'learning_rate_epoch_{epoch}'] = lr_metrics.r_conv ################################################################################ # write metrics data to xls file metrics_df = pd.DataFrame(data=PERFORMANCE_STATISTICS) metrics_df.to_excel(metrics_path) # weights weights_normal = F.softmax(model.alphas_normal, dim=-1).detach().cpu().numpy() weights_reduce = F.softmax(model.alphas_reduce, dim=-1).detach().cpu().numpy() # normal ARCH_STATISTICS[f'normal_none_epoch{epoch}'] = weights_normal[:, 0] ARCH_STATISTICS[f'normal_max_epoch{epoch}'] = weights_normal[:, 1] ARCH_STATISTICS[f'normal_avg_epoch{epoch}'] = weights_normal[:, 2] ARCH_STATISTICS[f'normal_skip_epoch{epoch}'] = weights_normal[:, 3] ARCH_STATISTICS[f'normal_sep_3_epoch{epoch}'] = weights_normal[:, 4] ARCH_STATISTICS[f'normal_sep_5_epoch{epoch}'] = weights_normal[:, 5] ARCH_STATISTICS[f'normal_dil_3_epoch{epoch}'] = weights_normal[:, 6] ARCH_STATISTICS[f'normal_dil_5_epoch{epoch}'] = weights_normal[:, 7] # reduce ARCH_STATISTICS[f'reduce_none_epoch{epoch}'] = weights_reduce[:, 0] ARCH_STATISTICS[f'reduce_max_epoch{epoch}'] = weights_reduce[:, 1] ARCH_STATISTICS[f'reduce_avg_epoch{epoch}'] = weights_reduce[:, 2] ARCH_STATISTICS[f'reduce_skip_epoch{epoch}'] = weights_reduce[:, 3] ARCH_STATISTICS[f'reduce_sep_3_epoch{epoch}'] = weights_reduce[:, 4] ARCH_STATISTICS[f'reduce_sep_5_epoch{epoch}'] = weights_reduce[:, 5] ARCH_STATISTICS[f'reduce_dil_3_epoch{epoch}'] = weights_reduce[:, 6] ARCH_STATISTICS[f'reduce_dil_5_epoch{epoch}'] = weights_reduce[:, 7] # write weights data to xls file weights_df = pd.DataFrame(data=ARCH_STATISTICS) weights_df.to_excel(weights_path) # adaptive stopping criterion if args.adaptive_stop and epoch >= 10: # apply local stopping criterion stop_checker.local_stop(METRICS, epoch) # freeze some edges based on their knowledge gains iteration_p = 0 for p in model.parameters(): if ~METRICS.layers_index_todo[iteration_p]: p.requires_grad = False p.grad = None iteration_p += 1 utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): torch.set_num_threads(3) if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) if not 'debug' in args.save: api = API('pth file path') criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if args.method == 'snas': # Create the decrease step for the gumbel softmax temperature args.epochs = 100 tau_step = (args.tau_min - args.tau_max) / args.epochs tau_epoch = args.tau_max model = TinyNetwork(C=args.init_channels, N=5, max_nodes=4, num_classes=n_classes, criterion=criterion, search_space=NAS_BENCH_201, k=args.k, species='gumbel') elif args.method == 'dirichlet': model = TinyNetwork(C=args.init_channels, N=5, max_nodes=4, num_classes=n_classes, criterion=criterion, search_space=NAS_BENCH_201, k=args.k, species='dirichlet') elif args.method == 'darts': model = TinyNetwork(C=args.init_channels, N=5, max_nodes=4, num_classes=n_classes, criterion=criterion, search_space=NAS_BENCH_201, k=args.k, species='softmax') model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.get_weights(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) if args.dataset == 'cifar10': train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) elif args.dataset == 'cifar100': train_transform, valid_transform = utils._data_transforms_cifar100(args) train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) elif args.dataset == 'svhn': train_transform, valid_transform = utils._data_transforms_svhn(args) train_data = dset.SVHN(root=args.data, split='train', download=True, transform=train_transform) elif args.dataset == 'imagenet16-120': import torchvision.transforms as transforms from nasbench201.DownsampledImageNet import ImageNet16 mean = [x / 255 for x in [122.68, 116.66, 104.01]] std = [x / 255 for x in [63.22, 61.26, 65.09]] lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(16, padding=2), transforms.ToTensor(), transforms.Normalize(mean, std)] train_transform = transforms.Compose(lists) train_data = ImageNet16(root=os.path.join(args.data,'imagenet16'), train=True, transform=train_transform, use_num_of_class_only=120) assert len(train_data) == 151700 num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True) architect = Architect(model, args) # configure progressive parameter epoch = 0 ks = [4, 2] num_keeps = [5, 3] train_epochs = [2, 2] if 'debug' in args.save else [50, 50] scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min) for i, current_epochs in enumerate(train_epochs): for e in range(current_epochs): lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters() # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, e) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) if not 'debug' in args.save: # nasbench201 result = api.query_by_arch(model.genotype()) logging.info('{:}'.format(result)) cifar10_train, cifar10_test, cifar100_train, cifar100_valid, \ cifar100_test, imagenet16_train, imagenet16_valid, imagenet16_test = distill(result) logging.info('cifar10 train %f test %f', cifar10_train, cifar10_test) logging.info('cifar100 train %f valid %f test %f', cifar100_train, cifar100_valid, cifar100_test) logging.info('imagenet16 train %f valid %f test %f', imagenet16_train, imagenet16_valid, imagenet16_test) # tensorboard writer.add_scalars('accuracy', {'train':train_acc,'valid':valid_acc}, epoch) writer.add_scalars('loss', {'train':train_obj,'valid':valid_obj}, epoch) writer.add_scalars('nasbench201/cifar10', {'train':cifar10_train,'test':cifar10_test}, epoch) writer.add_scalars('nasbench201/cifar100', {'train':cifar100_train,'valid':cifar100_valid, 'test':cifar100_test}, epoch) writer.add_scalars('nasbench201/imagenet16', {'train':imagenet16_train,'valid':imagenet16_valid, 'test':imagenet16_test}, epoch) utils.save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'alpha': model.arch_parameters() }, False, args.save) epoch += 1 scheduler.step() if args.method == 'snas': # Decrease the temperature for the gumbel softmax linearly tau_epoch += tau_step logging.info('tau %f', tau_epoch) model.set_tau(tau_epoch) if not i == len(train_epochs) - 1: model.pruning(num_keeps[i+1]) # architect.pruning([model._mask]) model.wider(ks[i+1]) optimizer = configure_optimizer(optimizer, torch.optim.SGD( model.get_weights(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay)) scheduler = configure_scheduler(scheduler, torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(sum(train_epochs)), eta_min=args.learning_rate_min)) logging.info('pruning finish, %d ops left per edge', num_keeps[i+1]) logging.info('network wider finish, current pc parameter %d', ks[i+1]) genotype = model.genotype() logging.info('genotype = %s', genotype) model.show_arch_parameters() writer.close()
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) num_gpus = torch.cuda.device_count() logging.info('Training with %d GPU(s)', num_gpus) model = eval("se_resnet%s(num_classes=CIFAR_CLASSES)" % args.resnet_type) if num_gpus > 1: model = torch.nn.DataParallel(model) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100(args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[90, 135], gamma=0.1) best_acc = 0.0 results ={'tr_acc': [], 'tr_loss': [], 'val_acc': [], 'val_loss': []} for epoch in range(args.epochs): scheduler.step() logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) start_time = time.time() train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('Train_acc: %f', train_acc) results['tr_acc'].append(train_acc) results['tr_loss'].append(train_obj) valid_acc, valid_obj = infer(valid_queue, model, criterion) if valid_acc > best_acc: best_acc = valid_acc utils.save(model, os.path.join(args.save, 'best_weights.pt')) logging.info('Valid_acc: %f', valid_acc) results['val_acc'].append(valid_acc) results['val_loss'].append(valid_obj) end_time = time.time() duration = end_time - start_time print('Epoch time: %ds.' % duration ) utils.save(model, os.path.join(args.save, 'final_weights.pt')) with open('{}/train_loss.txt'.format(args.save), 'w') as file: for item in results['tr_loss']: file.write(str(item) + '\n') with open('{}/train_acc.txt'.format(args.save), 'w') as file: for item in results['tr_acc']: file.write(str(item) + '\n') logging.info('Best testing accuracy is: %f\n___________________________________END_____________________________', best_acc)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) if args.set=='cifar100': train_transform, valid_transform = utils._data_transforms_cifar100(args) train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) #train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) #valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) # train_queue = torch.utils.data.DataLoader( # train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) # valid_queue = torch.utils.data.DataLoader( # valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=4) num_train = len(train_data) indices = list(range(num_train)) split = 45000 # int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=4) train_valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:]), pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) best_acc = 0.0 for epoch in range(args.epochs): logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) scheduler.step() logging.info('train_acc %f', train_acc) ## valid_acc, valid_obj = infer(train_valid_queue, model, criterion) if valid_acc > best_acc: best_acc = valid_acc test_acc, _ = infer(valid_queue, model, criterion) logging.info('Test_acc: %f', test_acc) logging.info('Valid_acc: %f', valid_acc) logging.info('Best_acc: %f', best_acc) #### # valid_acc, valid_obj = infer(valid_queue, model, criterion) # if valid_acc > best_acc: # best_acc = valid_acc # logging.info('valid_acc %f, best_acc %f', valid_acc, best_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) os.environ['PYTHONHASHSEED'] = str(args.seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) num_gpus = torch.cuda.device_count() genotype = eval("genotypes.%s" % args.arch) print('---------Genotype---------') logging.info(genotype) print('--------------------------') resnet_types = {'resnet20': 3, 'resnet32': 5, 'resnet44': 7, 'resnet56': 9, 'resnet110': 18} n_sizes = resnet_types[args.net_type] logging.info('Number of attentional residual block(s): %s', n_sizes * 3) model = att_resnet_cifar(genotype, n_size=n_sizes, num_classes=CIFAR_CLASSES) if num_gpus > 1: model = torch.nn.DataParallel(model) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100(args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) best_acc = 0.0 for epoch in range(args.epochs): scheduler.step() logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) if num_gpus > 1: model.module._block.drop_path_prob = args.drop_path_prob * epoch / args.epochs else: model._block.drop_path_prob = args.drop_path_prob * epoch / args.epochs start_time = time.time() train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('Train_acc: %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) if valid_acc > best_acc: best_acc = valid_acc logging.info('Valid_acc: %f', valid_acc) end_time = time.time() duration = end_time - start_time print('Epoch time: %ds.' % duration ) utils.save(model, os.path.join(args.save, 'weights.pt')) logging.info("_____________________________________\nBest Valid Accuracy is: %f\n______________________END_____________________", best_acc)
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True #找寻特定卷积算法 torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=False, transform=train_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=False, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.workers) #pinmemary 锁存固定,高端设备玩主 #************************************************************************************************# #第二阶段:配置网络逻辑层 # criterion = nn.CrossEntropyLoss() #可以选择特定的train阶段损失函数 # criterion = criterion.cuda() #L0-1损失函数 criterion_train = ConvSeparateLoss( weight=args.aux_loss_weight ) if args.sep_loss == 'l2' else TriSeparateLoss( weight=args.aux_loss_weight) criterion_val = nn.CrossEntropyLoss() criterion_train = criterion_train.cuda() criterion_val = nn.CrossEntropyLoss().cuda() switches = [] #操作淘汰标志 for i in range(14): switches.append([True for j in range(len(PRIMITIVES))]) switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # To be moved to args num_to_keep = [5, 3, 1] num_to_drop = [2, 2, 2] #操作的淘汰数 if len(args.add_width) == args.stages: add_width = args.add_width else: add_width = [[0, 16], [0, 8, 16]][args.stages - 2] #add_width初始 if len(args.add_layers) == args.stages: add_layers = args.add_layers else: add_layers = [[0, 7], [0, 6, 12]][args.stages - 2] #add_layers初始 if len(args.dropout_rate) == args.stages: drop_rate = args.dropout_rate else: drop_rate = [0.0] * args.stages #dropout逻辑 eps_no_archs = [args.noarc] * args.stages #前n个epoch只更新逻辑参数 if len(args.sample) == args.stages: sample = args.sample else: sample = [[4, 8], [4, 4, 4]][args.stages - 2] epochs = [25, 25, 25] #***************************************************************************************# #第三阶段:训练逻辑实现层# for sp in range(len(num_to_keep)): model = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion_val, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp]), K=int(sample[sp]), use_baidu=args.use_baidu, use_EN=args.use_EN) model = nn.DataParallel(model) #多GPU并行 model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) logging.info("layers=%d", args.layers + int(add_layers[sp])) logging.info("channels=%d", args.init_channels + int(add_width[sp])) logging.info("K=%d", int(sample[sp])) network_params = [] for k, v in model.named_parameters(): if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce') or k.endswith('betas_reduce') or k.endswith('betas_normal')): #具体是啥 network_params.append(v) optimizer = torch.optim.SGD(network_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_a = torch.optim.Adam(model.module.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 # epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 for epoch in range(epochs[sp]): scheduler.step() lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: model.module.p = float(drop_rate[sp]) * (epochs[sp] - epoch - 1) / epochs[sp] #!!! model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion_train, optimizer, optimizer_a, lr, train_arch=False) else: model.module.p = float(drop_rate[sp]) * np.exp( -(epoch - eps_no_arch) * scale_factor) model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion_train, optimizer, optimizer_a, lr, train_arch=True) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # print("beats",model.module.arch_parameters()[1]) # validation if epochs[sp] - epoch < 5: valid_acc, valid_obj = infer(valid_queue, model, criterion_val) logging.info('Valid_acc %f', valid_acc) # print("epoch=",epoch,'weights_normal=',model.module.weights_normal,'weights_reduce=',model.module.weights_reduce) # print('weights2_normal=',model.module.weights2_normal,'\n','weights2_reduce=',model.module.weights2_reduce) #/************************************************************/ arch_normal = model.module.arch_parameters()[0] arch_reduce = model.module.arch_parameters()[1] betas_nor = model.module.weights2_normal betas_redu = model.module.weights2_reduce shengcheng(arch_normal, arch_reduce, switches_normal, switches_reduce, betas_nor, betas_redu) #/***********************************************************/ utils.save(model, os.path.join(args.save, 'weights.pt')) print('------Dropping %d paths------' % num_to_drop[sp]) #************************************************************************************8 # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) # drop operations with low architecture weights arch_param = model.module.arch_parameters() normal_prob = F.sigmoid(arch_param[0]).data.cpu().numpy() ##化概率 for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_normal[i][j]: idxs.append(j) # for the last stage, drop all Zero operations # drop1 = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp]) ###看函数???看理论 drop2 = get_min_k(normal_prob[i, :], num_to_drop[sp]) # if sp == len(num_to_keep) - 1: # for idx in drop1: # switches_normal[i][idxs[idx]] = False # else: for idx in drop2: switches_normal[i][idxs[idx]] = False #不断地关掉无效操作,正则化方法 logging.info('switches_normal = %s', switches_normal) logging_switches(switches_normal) if args.use_baidu == False: reduce_prob = F.sigmoid(arch_param[1]).data.cpu().numpy() #reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_reduce[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(reduce_prob[i, :], num_to_drop[sp]) for idx in drop: switches_reduce[i][idxs[idx]] = False logging.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce)
train_data = CIFAR10_bad(root=args.data, train=True, download=True, transform=train_transform, bad_rate=args.bad_rate) else: train_data = CIFAR10(root=args.data, train=True, download=True, transform=train_transform) test_data = CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) else: train_transform, valid_transform = _data_transforms_cifar100(args) if args.bad_rate is not None: train_data = CIFAR100_bad(root=args.data, train=True, download=True, transform=train_transform, bad_rate=args.bad_rate) else: train_data = CIFAR100(root=args.data, train=True, download=True, transform=train_transform) test_data = CIFAR100(root=args.data, train=False, download=True, transform=valid_transform)
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled=True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100(args) else: # train_transform, valid_transform = utils._data_transforms_cifar10(args) train_transform = transforms.Compose([transforms.Resize(32), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) valid_transform = transforms.Compose([transforms.Resize(32), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) label_dim = 10 image_size = 32 # label preprocess onehot = torch.zeros(label_dim, label_dim) onehot = onehot.scatter_(1, torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).view(label_dim, 1), 1).view(label_dim, label_dim, 1, 1) fill = torch.zeros([label_dim, label_dim, image_size, image_size]) for i in range(label_dim): fill[i, i, :, :] = 1 num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), pin_memory=True, num_workers=args.workers) adversarial_loss = nn.MSELoss() adversarial_loss.cuda() # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() switches = [] for i in range(14): switches.append([True for j in range(len(PRIMITIVES))]) switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # To be moved to args num_to_keep = [5, 3, 1] num_to_drop = [3, 2, 2] if len(args.add_width) == 3: add_width = args.add_width else: add_width = [0, 0, 0] if len(args.add_layers) == 3: add_layers = args.add_layers else: add_layers = [0, 6, 12] if len(args.dropout_rate) ==3: drop_rate = args.dropout_rate else: drop_rate = [0.0, 0.0, 0.0] eps_no_archs = [10, 10, 10] # gen = Generator(100) # gen.cuda() # gen.apply(weights_init) # logging.info("param size gen= %fMB", utils.count_parameters_in_MB(gen)) # optimizer_gen = torch.optim.Adam(gen.parameters(), lr=args.lr, # betas=(args.b1, args.b2)) # sp = 0 # disc = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp])) # disc = nn.DataParallel(disc) # disc = disc.cuda() # logging.info("param size disc= %fMB", utils.count_parameters_in_MB(disc)) # network_params = [] # for k, v in disc.named_parameters(): # if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')): # network_params.append(v) # optimizer_disc = torch.optim.SGD( # network_params, # args.learning_rate, # momentum=args.momentum, # weight_decay=args.weight_decay) # scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( # optimizer_disc, float(args.epochs), eta_min=args.learning_rate_min) # for epoch in range(100): # logging.info('Epoch: %d', epoch) # epoch_start = time.time() # train_acc, train_obj = train_gan(train_queue, valid_queue, gen, disc, network_params, criterion, adversarial_loss, optimizer_gen, optimizer_disc, 0, 0, 0, 0, train_arch=True) # epoch_duration = time.time() - epoch_start # logging.info('Epoch time: %ds', epoch_duration) # # utils.save(disc, os.path.join(args.save, 'disc_dump.pt')) # utils.save(gen, os.path.join(args.save, 'gen_dump.pt')) for sp in range(len(num_to_keep)): gen = Generator(100) gen.cuda() model = Resnet18() model.cuda() logging.info("param size gen= %fMB", utils.count_parameters_in_MB(gen)) logging.info("param size model= %fMB", utils.count_parameters_in_MB(model)) optimizer_gen = torch.optim.Adam(gen.parameters(), lr=args.lr, betas=(args.b1, args.b2)) optimizer_model = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) scheduler_model = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer_model, T_max=200) sp = 0 disc = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp])) disc = nn.DataParallel(disc) disc = disc.cuda() logging.info("param size disc= %fMB", utils.count_parameters_in_MB(disc)) network_params = [] for k, v in disc.named_parameters(): if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')): network_params.append(v) # optimizer_disc = torch.optim.SGD( # network_params, # args.learning_rate, # momentum=args.momentum, # weight_decay=args.weight_decay) optimizer_disc = torch.optim.Adam(network_params, lr=args.lr, betas=(args.b1, args.b2)) optimizer_a = torch.optim.Adam(disc.module.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer_disc, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 # utils.load(disc, 'disc_dump.pt') # utils.load(gen, os.path.join(args.save, 'gen_dump.pt')) architect = Architect(gen, disc, model, network_params, criterion, adversarial_loss, CIFAR_CLASSES, args) for epoch in range(100): logging.info('Epoch: %d', epoch) epoch_start = time.time() train_acc, train_obj = train_gan(epoch, train_queue, valid_queue, gen, disc, network_params, criterion, adversarial_loss, optimizer_gen, optimizer_disc, 0, 0, 0, 0, train_arch=True) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # for epoch in range(epochs): for epoch in range(0): scheduler.step() scheduler_model.step() lr_gen = args.lr lr_disc = args.learning_rate lr = scheduler.get_lr()[0] lr_model = scheduler_model.get_lr()[0] logging.info('Epoch: %d lr: %e lr_model: %e', epoch, lr, lr_model) epoch_start = time.time() # training if epoch < eps_no_arch: disc.module.p = float(drop_rate[sp]) * (epochs - epoch - 1) / epochs disc.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, architect, gen, model, disc, network_params, criterion, adversarial_loss, optimizer_gen, optimizer_disc, optimizer_model, optimizer_a, lr, lr_model, lr_gen, lr_disc, train_arch=False) else: disc.module.p = float(drop_rate[sp]) * np.exp(-(epoch - eps_no_arch) * scale_factor) disc.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, architect, gen, model, disc, network_params, criterion, adversarial_loss, optimizer_gen, optimizer_disc, optimizer_model, optimizer_a, lr, lr_model, lr_gen, lr_disc, train_arch=True) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation if epochs - epoch < 5: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f', valid_acc) utils.save(disc, os.path.join(args.save, 'disc.pt')) utils.save(gen, os.path.join(args.save, 'gen.pt')) utils.save(model, os.path.join(args.save, 'model.pt')) print('------Dropping %d paths------' % num_to_drop[sp]) # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) # drop operations with low architecture weights arch_param = disc.module.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_normal[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: # for the last stage, drop all Zero operations drop = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(normal_prob[i, :], num_to_drop[sp]) for idx in drop: switches_normal[i][idxs[idx]] = False reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_reduce[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(reduce_prob[i, :], num_to_drop[sp]) for idx in drop: switches_reduce[i][idxs[idx]] = False logging.info('switches_normal = %s', switches_normal) logging_switches(switches_normal) logging.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce) if sp == len(num_to_keep) - 1: arch_param = disc.module.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(14)] reduce_final = [0 for idx in range(14)] # remove all Zero operations for i in range(14): if switches_normal_2[i][0] == True: normal_prob[i][0] = 0 normal_final[i] = max(normal_prob[i]) if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture, similar to DARTS keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 # set switches according the ranking of arch parameters for i in range(14): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) ## restrict skipconnect (normal cell only) logging.info('Restricting skipconnect...') # generating genotypes with different numbers of skip-connect operations for sks in range(0, 9): max_sk = 8 - sks num_sk = check_sk_number(switches_normal) if not num_sk > max_sk: continue while num_sk > max_sk: normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logging.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype)
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) num_gpus = torch.cuda.device_count() genotype = eval("genotypes.%s" % args.arch) print('---------Genotype---------') logging.info(genotype) print('--------------------------') disc = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) disc = torch.nn.DataParallel(disc) disc = disc.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(disc)) adversarial_loss = nn.MSELoss() adversarial_loss.cuda() criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer_disc = torch.optim.SGD(disc.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer_disc, float(args.epochs)) gen = Generator(100) gen.cuda() model = Resnet18() model.cuda() logging.info("param size gen= %fMB", utils.count_parameters_in_MB(gen)) logging.info("param size model= %fMB", utils.count_parameters_in_MB(model)) optimizer_gen = torch.optim.Adam(gen.parameters(), lr=args.lr, betas=(args.b1, args.b2)) optimizer_model = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) scheduler_model = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer_model, T_max=200) if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) best_acc = 0.0 for epoch in range(args.epochs): # scheduler_model.step() # lr_gen = args.lr # lr_disc = args.learning_rate # lr_model = scheduler_model.get_lr()[0] # logging.info('Epoch: %d lr_model %e', epoch, lr_model) # disc.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs # disc.drop_path_prob = args.drop_path_prob * epoch / args.epochs # start_time = time.time() # train_acc, train_obj = train(train_queue, gen ,disc, model, criterion, adversarial_loss, optimizer_disc, optimizer_gen, optimizer_model) # logging.info('Train_acc: %f', train_acc) # valid_acc, valid_obj = infer(valid_queue, model, criterion) # if valid_acc > best_acc: # best_acc = valid_acc # logging.info('Valid_acc: %f', valid_acc) logging.info('Epoch: %d', epoch) epoch_start = time.time() train_acc, train_obj = train_gan(epoch, train_queue, valid_queue, gen, disc, criterion, adversarial_loss, optimizer_gen, optimizer_disc, 0, 0, 0, 0, train_arch=True) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) end_time = time.time() duration = end_time - start_time print('Epoch time: %ds.' % duration)
def main_worker(gpu, ngpus_per_node, args): if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) logging.info(("=> load data '{}'".format(args.dtype))) if args.dtype == 'cifar10': train_transform, valid_transform = utils._data_transforms_cifar10(args, cutout=True) train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) num_classes = 10 update_lrs = [150, 250, 350] elif args.dtype == 'cifar100': train_transform, valid_transform = utils._data_transforms_cifar100(args, cutout=True) train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=valid_transform) num_classes = 100 update_lrs = [40, 80, 160, 300] else: logging.info('no data type available') sys.exit(1) logging.info("update lrs: '{}'".format(update_lrs)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) logging.info(("=> creating model '{}'".format(args.arch))) blocks_args, global_params = mixnet_builder.get_model_params(args.arch) model = MixNet(input_size=32, num_classes=num_classes, blocks_args=blocks_args, global_params=global_params) # print(model) # exit(0) logging.info("args = %s", args) # logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) logging.info("param size = %fMB", model._num_params / 1e6) # exit(0) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: # model.make_cuda_and_parallel() # model.avgpool = torch.nn.DataParallel(model.avgpool) # model.classifier = torch.nn.DataParallel(model.classifier) model = torch.nn.DataParallel(model) model = model.cuda() criterion = nn.CrossEntropyLoss().cuda() if args.optim == 'adam': optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) elif args.optim == 'rmsprop': optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.momentum, eps=args.eps, weight_decay=args.weight_decay) else: optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) cudnn.benchmark = True # scaled_lr = args.lr * args.batch_size / 256 # optim = { # "adam" : lambda : torch.optim.Adam(model.parameters()), # "rmsprop" : lambda : torch.optim.RMSprop(model.parameters(), lr=scaled_lr, momentum=args.momentum, eps=args.eps, weight_decay=args.weight_decay) # }[args.optim]() # scheduler = get_scheduler(optim, args.scheduler, int(2.4*len(train_queue)), args.epochs * len(train_queue)) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) best_acc = 0.0 cur_lr = args.lr for epoch in range(args.epochs): scheduler.step() logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) # cur_lr = adjust_learning_rate(optimizer, epoch, cur_lr, update_lrs) # logging.info('Epoch: %d lr %e', epoch, cur_lr) start_time = time.time() train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('Train_acc: %f', train_acc) valid_acc, valid_obj = test(valid_queue, model, criterion) if valid_acc > best_acc: best_acc = valid_acc logging.info('Valid_acc: %f', valid_acc) end_time = time.time() duration = end_time - start_time print('Epoch time: %ds.' % duration ) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() switches = [] #switch开关,标记path中哪个op是开放状态 for i in range(14): # 一个Cell中有4个计算Node,2个输入Node,因此总的path为14 switches.append([True for j in range(len(PRIMITIVES)) ]) #每个path上有len(PRIMITIVES)个op,初试状态是全部都用 switches_normal = copy.deepcopy(switches) #normal cell 中op的开关状态 switches_reduce = copy.deepcopy(switches) #reduce cell 中op的开关状态 # To be moved to args num_to_keep = [5, 3, 1] num_to_drop = [3, 2, 2] if len(args.add_width) == 3: #默认参数为0 add_width = args.add_width else: add_width = [0, 0, 0] if len(args.add_layers) == 3: #传进去两个args.add_layers add_layers = args.add_layers else: add_layers = [0, 6, 12] if len(args.dropout_rate) == 3: #传进去三个参数 drop_rate = args.dropout_rate else: drop_rate = [0.0, 0.0, 0.0] eps_no_archs = [10, 10, 10] for sp in range(len(num_to_keep)): #训练分为3个阶段 # args.init_channels 默认16,即网络的head输出channels为16,通过设置args.add_width可以改变这个输出channels # args.layers 默认5,3个normal cell + 2个reduce cell,第二阶段layer为11,第三阶段为17,这通过add_layers调节 model = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp])) model = nn.DataParallel(model) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) network_params = [] # 保存网络权重 for k, v in model.named_parameters(): if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')): network_params.append(v) optimizer = torch.optim.SGD( network_params, # 负责更新网络权重 args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_a = torch.optim.Adam( model.module.arch_parameters(), # 负责更新网络架构参数 lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 for epoch in range(epochs): # 训练epoch,默认25,每个阶段训练25个epoch scheduler.step() lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: #前eps_no_arch(10) 训练网络权重 model.module.p = float( drop_rate[sp]) * (epochs - epoch - 1) / epochs model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=False) else: # 训练网络架构参数 model.module.p = float(drop_rate[sp]) * np.exp( -(epoch - eps_no_arch) * scale_factor) model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation if epochs - epoch < 5: # epochs=25,即最后5个epoch,在验证集上验证下 valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) #一个阶段的训练结束后,保存下模型 # 一个阶段训练完后,丢弃一些Op print('------Dropping %d paths------' % num_to_drop[sp]) # num_to_drop=[3,2,2],共有8个候选op,依次丢弃3,2,2,最后剩下一个 # Save switches info for s-c refinement. if sp == len(num_to_keep ) - 1: # num_to_keep=[5,3,1], 只有在sp=2时,即最后一个训练阶段,才执行下面的语句 switches_normal_2 = copy.deepcopy( switches_normal) #此时每条path中还有3个op,后面要删除掉2个,剩下最后一个 switches_reduce_2 = copy.deepcopy(switches_reduce) # drop operations with low architecture weights # 放弃拥有低概率的op arch_param = model.module.arch_parameters() #获取结构参数 # 处理arch_normal normal_prob = F.softmax( arch_param[0], dim=sm_dim).data.cpu().numpy() # 计算arch_normal的softmax for i in range(14): #一个Cell中共有14条path idxs = [] #记录每条path上选择的op的索引 for j in range(len(PRIMITIVES)): # 遍历每条path上的op if switches_normal[i][j]: #如果为True,即选择它 idxs.append(j) #idxs中有3个元素 if sp == len(num_to_keep) - 1: # 最后一个训练阶段 # for the last stage, drop all Zero operations # 对于最后一个训练阶段,丢弃所有 Zero 操作 drop = get_min_k_no_zero( normal_prob[i, :], idxs, num_to_drop[sp]) #最后一个阶段num_to_drop[2]=1 else: drop = get_min_k(normal_prob[i, :], num_to_drop[sp]) for idx in drop: switches_normal[i][ idxs[idx]] = False #将概率最低的k个op关闭,注意此处更新了switches_normal # 处理arch_reduce reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() for i in range(14): idxs = [] for j in range(len(PRIMITIVES)): if switches_reduce[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(reduce_prob[i, :], num_to_drop[sp]) for idx in drop: switches_reduce[i][idxs[idx]] = False #注意此处更新了switches_reduce logging.info('switches_normal = %s', switches_normal) logging_switches(switches_normal) logging.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce) if sp == len(num_to_keep) - 1: #最后一个阶段 arch_param = model.module.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() #计算各个op概率 reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(14)] #记录每条path上选择的op的索引 reduce_final = [0 for idx in range(14)] # remove all Zero operations for i in range(14): if switches_normal_2[i][ 0] == True: #如果Zero操作被选择了将其概率置为0,在最后一个阶段训练完成后,每条path还剩3个op normal_prob[i][ 0] = 0 #如果我们在第3阶段后,还有Zero operations,将其对应的概率置0 normal_final[i] = max(normal_prob[i]) #记录第i条path上选择的op的概率的最大值 if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture, similar to DARTS # 为每个计算节点选择两条输入path,由于0节点的输入是固定的,因此不用选 # 1Node候选path数3,2Node候选path数4,3Node候选path数5 keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(3): end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) #从候选的path中选择两条 keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 # set switches according the ranking of arch parameters # 设置switches,对于没有选择的path,将其上的op全部关掉 for i in range(14): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype) ## restrict skipconnect (normal cell only) 约束跳跃链接 logging.info('Restricting skipconnect...') # generating genotypes with different numbers of skip-connect operations # 生成不同数量skip-connect operations 的基因 for sks in range(0, 9): max_sk = 8 - sks num_sk = check_sk_number(switches_normal) if not num_sk > max_sk: continue while num_sk > max_sk: #删除多余的skip-connection normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logging.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce) logging.info(genotype)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) if not args.is_parallel: torch.cuda.set_device(int(args.gpu)) logging.info('gpu device = %d' % int(args.gpu)) else: logging.info('gpu device = %s' % args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if args.is_cifar100: model = Network(args.init_channels, CIFAR100_CLASSES, args.layers, criterion) model1 = Network(args.init_channels, CIFAR100_CLASSES, args.layers, criterion) # important for initializing the two models differently. # model1.init_weights() else: model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model1 = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) # model1.init_weights() model = model.cuda() model1 = model1.cuda() logging.info("param size of model1 = %fMB", utils.count_parameters_in_MB(model)) logging.info("param size of model2 = %fMB", utils.count_parameters_in_MB(model1)) # if args.is_parallel: # # import ipdb; ipdb.set_trace() # gpus = [int(i) for i in args.gpu.split(',')] # model = nn.parallel.DataParallel( # model, device_ids=gpus, output_device=gpus[0]) # model = model.module optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer1 = torch.optim.SGD(model1.parameters(), args.learning_rate1, momentum=args.momentum, weight_decay=args.weight_decay) if args.is_cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.is_cifar100: train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=False, num_workers=4) external_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=False, num_workers=4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=False, num_workers=4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer1, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, model1, args) for epoch in range(args.epochs): lr = scheduler.get_lr()[0] lr1 = scheduler1.get_lr()[0] logging.info('epoch %d lr %e lr1 %e', epoch, lr, lr1) genotype = model.genotype() genotype1 = model1.genotype() logging.info('genotype1 = %s', genotype) logging.info('genotype2 = %s', genotype1) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) print(F.softmax(model1.alphas_normal, dim=-1)) print(F.softmax(model1.alphas_reduce, dim=-1)) # training train_acc, train_obj, train_acc1, train_obj1 = train( train_queue, valid_queue, external_queue, model, model1, architect, criterion, optimizer, optimizer1, lr, lr1) logging.info('train_acc %f train_acc1 %f', train_acc, train_acc1) scheduler.step() scheduler1.step() # validation valid_acc, valid_obj, valid_acc1, valid_obj1 = infer( valid_queue, model, model1, criterion) logging.info('valid_acc %f valid_acc1 %f', valid_acc, valid_acc1) utils.save(model, os.path.join(args.save, 'weights.pt')) utils.save(model1, os.path.join(args.save, 'weights1.pt'))
def __init__(self, test_args: Namespace, my_dataset: MyDataset, model: nn.Module = None): self.__device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') np.random.seed(test_args.seed) torch.manual_seed(test_args.seed) cudnn.benchmark = True cudnn.enabled = True logging.info(f'gpu device = {test_args.gpu}') logging.info(f'args = {test_args}') if model is None: # equal to: genotype = genotypes.DARTS_v2 if not (test_args.arch or test_args.arch_path): logging.info('need to designate arch.') sys.exit(1) genotype = eval( f'genotypes.{test_args.arch}' ) if not test_args.arch_path else utils.load_genotype( test_args.arch_path) print('Load genotype:', genotype) if my_dataset is MyDataset.CIFAR10: model = NetworkCIFAR(test_args.init_ch, 10, test_args.layers, test_args.auxiliary, genotype).to(self.__device) elif my_dataset is MyDataset.CIFAR100: model = NetworkCIFAR(test_args.init_ch, 100, test_args.layers, test_args.auxiliary, genotype).to(self.__device) elif my_dataset is MyDataset.ImageNet: model = NetworkImageNet(test_args.init_ch, 1000, test_args.layers, test_args.auxiliary, genotype).to(self.__device) else: raise Exception('No match MyDataset') utils.load(model, test_args.model_path, False) model = model.to(self.__device) param_size = utils.count_parameters_in_MB(model) logging.info(f'param size = {param_size}MB') model.drop_path_prob = test_args.drop_path_prob self.__model = model self.__args = test_args self.__criterion = nn.CrossEntropyLoss().to(self.__device) if my_dataset is MyDataset.CIFAR10: _, test_transform = utils._data_transforms_cifar10(test_args) test_data = dset.CIFAR10(root=test_args.data, train=False, download=True, transform=test_transform) elif my_dataset is MyDataset.CIFAR100: _, test_transform = utils._data_transforms_cifar100(test_args) test_data = dset.CIFAR100(root=test_args.data, train=False, download=True, transform=test_transform) elif my_dataset is MyDataset.ImageNet: validdir = test_args.data / 'val' normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) valid_data = dset.ImageFolder( validdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), normalize, ])) test_data = valid_data else: raise Exception('No match MyDataset') self.__test_queue = torch.utils.data.DataLoader( test_data, batch_size=test_args.batchsz, shuffle=False, pin_memory=True, num_workers=4)
def model_compress(args): if os.path.isdir(args.save) == False: os.makedirs(args.save) save_dir = '{}compress-{}-{}'.format(args.save, args.note, time.strftime("%Y%m%d-%H%M%S")) utils.create_exp_dir(save_dir, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(save_dir, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) if args.cifar100: CIFAR_CLASSES = 100 data_folder = 'cifar-100-python' else: CIFAR_CLASSES = 10 data_folder = 'cifar-10-batches-py' if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.train_data_dir, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.train_data_dir, train=True, download=True, transform=train_transform) num_train = len(train_data) iter_per_one_epoch = num_train // (2 * args.batch_size) if iter_per_one_epoch >= 100: train_extend_rate = 1 else: train_extend_rate = (100 // iter_per_one_epoch) + 1 iter_per_one_epoch = iter_per_one_epoch * train_extend_rate logging.info('num original train data: %d', num_train) logging.info('iter per one epoch: %d', iter_per_one_epoch) indices = list(range(num_train)) random.shuffle(indices) split = int(np.floor(args.train_portion * num_train)) train_set = torch.utils.data.Subset(train_data, indices[:split]) valid_set = torch.utils.data.Subset(train_data, indices[split:num_train]) train_set = torch.utils.data.ConcatDataset([train_set] * train_extend_rate) # valid_set = torch.utils.data.ConcatDataset([valid_set]*train_extend_rate) train_queue = torch.utils.data.DataLoader( train_set, batch_size=args.batch_size, sampler=torch.utils.data.sampler.RandomSampler(train_set), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( valid_set, batch_size=args.batch_size, sampler=torch.utils.data.sampler.RandomSampler(valid_set), pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() eps_no_arch = args.eps_no_archs epochs = args.epochs if args.arch in genotypes.__dict__.keys(): genotype = eval("genotypes.%s" % args.arch) else: genotype = eval(args.arch) model = Network(genotype, args.init_channels, CIFAR_CLASSES, args.layers, criterion, steps=args.inter_nodes, multiplier=args.inter_nodes, stem_multiplier=args.stem_multiplier, residual_connection=args.residual_connection) model = nn.DataParallel(model) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) network_params = [] for k, v in model.named_parameters(): if not (k.endswith('alphas_normal') or k.endswith('alphas_reduce')): network_params.append(v) optimizer = torch.optim.SGD(network_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_a = torch.optim.Adam(model.module.arch_parameters(), lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(epochs), eta_min=args.learning_rate_min) scheduler_a = torch.optim.lr_scheduler.StepLR(optimizer_a, 30, gamma=0.2) train_epoch_record = -1 arch_train_count = 0 prev_geno = '' prev_rank = None rank_geno = None result_geno = None arch_stable = 0 best_arch_stable = 0 for epoch in range(epochs): lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=False) else: ops, probs = compressing_parse(model) concat = range(2, 2 + model.module._steps) genotype = Genotype( normal=ops[0], normal_concat=concat, reduce=ops[1], reduce_concat=concat, ) if str(prev_geno) != str(genotype): prev_geno = genotype logging.info(genotype) # early stopping stable_cond = True rank = [] for i in range(len(probs)): rank_tmp = ranking(probs[i]) rank.append(rank_tmp) if prev_rank != rank: stable_cond = False arch_stable = 0 prev_rank = rank rank_geno = genotype logging.info('rank: %s', rank) if stable_cond: arch_stable += 1 if arch_stable > best_arch_stable: best_arch_stable = arch_stable result_geno = rank_geno logging.info('arch_stable: %d', arch_stable) logging.info('best genotype: %s', rank_geno) if arch_stable >= args.stable_arch - 1: logging.info('stable genotype: %s', rank_geno) result_geno = rank_geno break train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True) arch_train_count += 1 scheduler_a.step() scheduler.step() logging.info('Train_acc %f, Objs: %e', train_acc, train_obj) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation if epoch >= eps_no_arch: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f, Objs: %e', valid_acc, valid_obj) # # early arch training # if train_epoch_record == -1: # if train_acc > 70: # arch_train_num = args.epochs - args.eps_no_archs # eps_no_arch = 0 # train_epoch_record = epoch # else: # if epoch >= train_epoch_record + arch_train_num: # break utils.save(model, os.path.join(save_dir, 'weights.pt')) # last geno parser ops, probs = compressing_parse(model) concat = range(2, 2 + model.module._steps) genotype = Genotype( normal=ops[0], normal_concat=concat, reduce=ops[1], reduce_concat=concat, ) logging.info('Last geno: %s', genotype) if result_geno == None: result_geno = genotype return result_geno, best_arch_stable
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() arch1, alphas_normal1, alphas_reduce1,\ betas_normal1, betas_reduce1 = initialize_alphas() arch2, alphas_normal2, alphas_reduce2,\ betas_normal2, betas_reduce2 = initialize_alphas() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() model1 = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model1 = model1.cuda() # model for pretraining. model_pretrain = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model_pretrain = model_pretrain.cuda() model1_pretrain = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model1_pretrain = model1_pretrain.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) model._arch_parameters = arch1 model1._arch_parameters = arch2 model.alphas_reduce = alphas_reduce1 model.alphas_normal = alphas_normal1 model1.alphas_reduce = alphas_reduce2 model1.alphas_normal = alphas_normal2 model.betas_reduce = betas_reduce1 model.betas_normal = betas_normal1 model1.betas_reduce = betas_reduce2 model1.betas_normal = betas_normal2 model_pretrain._arch_parameters = arch1 model1_pretrain._arch_parameters = arch2 model_pretrain.alphas_reduce = alphas_reduce1 model_pretrain.alphas_normal = alphas_normal1 model1_pretrain.alphas_reduce = alphas_reduce2 model1_pretrain.alphas_normal = alphas_normal2 model_pretrain.betas_reduce = betas_reduce1 model_pretrain.betas_normal = betas_normal1 model1_pretrain.betas_reduce = betas_reduce2 model1_pretrain.betas_normal = betas_normal2 optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer1 = torch.optim.SGD(model1.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_pretrain = torch.optim.SGD(model_pretrain.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer1_pretrain = torch.optim.SGD(model1_pretrain.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) if args.set == 'cifar100': train_transform, valid_transform = utils._data_transforms_cifar100( args) train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) external_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) scheduler1 = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer1, float(args.epochs), eta_min=args.learning_rate_min) scheduler_pretrain = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer_pretrain, float(args.epochs + args.pretrain_steps), eta_min=args.learning_rate_min) scheduler1_pretrain = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer1_pretrain, float(args.epochs + args.pretrain_steps), eta_min=args.learning_rate_min) architect = Architect(model, model1, args) for epoch in range(args.epochs + args.pretrain_steps): lr = scheduler.get_lr()[0] lr1 = scheduler1.get_lr()[0] lr_pretrain = scheduler_pretrain.get_lr()[0] lr1_pretrain = scheduler1_pretrain.get_lr()[0] logging.info('epoch %d lr %e lr1 %e lr_pretrain %e lr1_pretrain %e', epoch, lr, lr1, lr_pretrain, lr1_pretrain) if epoch >= args.pretrain_steps: genotype = model.genotype() genotype1 = model1.genotype() logging.info('genotype1 = %s', genotype) logging.info('genotype2 = %s', genotype1) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) print(F.softmax(model1.alphas_normal, dim=-1)) print(F.softmax(model1.alphas_reduce, dim=-1)) # training train_acc, train_obj, train_acc1, train_obj1 = train( args, epoch, train_queue, valid_queue, external_queue, model, model1, model_pretrain, model1_pretrain, architect, criterion, optimizer, optimizer1, optimizer_pretrain, optimizer1_pretrain, lr, lr1, lr_pretrain, lr1_pretrain) if epoch >= args.pretrain_steps: logging.info('train_acc %f train_acc1 %f', train_acc, train_acc1) else: logging.info('pretrain_acc %f pretrain_acc1 %f', train_acc, train_acc1) if epoch >= args.pretrain_steps: scheduler_pretrain.step() scheduler1_pretrain.step() scheduler.step() scheduler1.step() else: scheduler_pretrain.step() scheduler1_pretrain.step() # validation if epoch >= args.pretrain_steps and (args.epochs + args.pretrain_steps) - epoch <= 1: valid_acc, valid_obj, valid_acc1, valid_obj1 = infer( valid_queue, model, model1, criterion) logging.info('valid_acc %f valid_acc1 %f', valid_acc, valid_acc1) utils.save(model, os.path.join(args.save, 'weights.pt')) utils.save(model1, os.path.join(args.save, 'weights1.pt'))
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) seed = args.seed logging.info('Using the random seed of %d for searching...' % seed) np.random.seed(seed) cudnn.benchmark = True torch.manual_seed(seed) cudnn.enabled = True torch.cuda.manual_seed(seed) torch.cuda.manual_seed_all(seed) os.environ['PYTHONHASHSEED'] = str(seed) logging.info("args = %s", args) num_gpus = torch.cuda.device_count() logging.info('Training with %d GPU(s)', num_gpus) # build Network # default as ResNet20 since the constrain of GPU memory when doing search process resnet_types = { 'resnet20': 3, 'resnet32': 5, 'resnet44': 7, 'resnet56': 9, 'resnet110': 18 } n_sizes = resnet_types[args.net_type] logging.info('Number of attentional residual block(s): %s', n_sizes * 3) model = att_resnet_cifar(n_size=n_sizes, no_gpus=num_gpus, num_classes=CIFAR_CLASSES) if num_gpus > 1: model = nn.DataParallel(model) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.workers) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if num_gpus > 1: optimizer = torch.optim.SGD(model.module.net_parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) architect = Architect_m(model, args) else: optimizer = torch.optim.SGD(model.net_parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) architect = Architect_s(model, args) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) epochs = args.epochs scale_factor = 0.19 BEST_accVal = 0.0 for epoch in range(epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if args.dropout_rate > 0.: drop_rate = args.dropout_rate * np.exp(-epoch * scale_factor) if num_gpus > 1: model.module.update_p(drop_rate) else: model.update_p(drop_rate) train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr, num_gpus) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation if epochs - epoch < 10: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f', valid_acc) if valid_acc > BEST_accVal: BEST_accVal = valid_acc utils.save(model, os.path.join(args.save, 'weights.pt')) logging.info('BEST VALID ACCURACY IS: %f', BEST_accVal) if num_gpus > 1: genotype = model.module.genotype() else: genotype = model.genotype() logging.info( '______________________________________________\nFinal genotype = %s', genotype) with open('{}/result.txt'.format(args.save), 'w') as file: file.write(str(genotype)) logging.info('____________________END_______________________')
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) # torch.autograd.set_detect_anomaly(True) logging.info("args = %s", args) # prepare dataset if args.cifar100: train_transform, test_transform = utils._data_transforms_cifar100(args) else: train_transform, test_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=True, transform=train_transform) test_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=test_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=args.workers) test_queue = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, pin_memory=True, num_workers=args.workers) # build Network criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() path_num = sum(1 for i in range(args.nodes) for n in range(2 + i)) switches = [] for i in range(path_num): switches.append([True for j in range(len(PRIMITIVES))]) if args.drop_none: switches[i][0] = False # switch off zero operator if args.drop_skip: switches[i][3] = False # switch off identity operator switches_normal = copy.deepcopy(switches) switches_reduce = copy.deepcopy(switches) # To be moved to args num_to_keep = [5, 3, 1] num_to_drop = [2, 2, 2] if len(args.add_width) == 3: add_width = args.add_width else: add_width = [0, 0, 0] if len(args.add_layers) == 3: add_layers = args.add_layers else: add_layers = [0, 6, 12] if len(args.dropout_rate) == 3: drop_rate = args.dropout_rate else: drop_rate = [0.0, 0.0, 0.0] eps_no_archs = [10, 10, 10] for sp in range(len(num_to_keep)): # if sp == len(num_to_keep)-1: # switch on zero operator in the last stage # for i in range(path_num): # switches_normal[i][0]=True # for i in range(path_num): # switches_reduce[i][0]=True model = Network(args.init_channels + int(add_width[sp]), CIFAR_CLASSES, args.layers + int(add_layers[sp]), criterion, steps=args.nodes, multiplier=args.nodes, switches_normal=switches_normal, switches_reduce=switches_reduce, p=float(drop_rate[sp])) model = nn.DataParallel(model) # print(model) # if sp==0: # utils.save(model, os.path.join(args.save, 'cell_weights.pt')) # keep initial weights # else: # utils.load(model.module.cells, os.path.join(args.save, 'cell_weights.pt')) # strict=False # print('copying weight....') # state_dict = torch.load(os.path.join(args.save, 'cell_weights.pt')) # for key in state_dict.keys(): # print(key) # for key in state_dict.keys(): # if 'm_ops' in key and 'op0' not in key: # s = re.split('op\d', key) # copy_key = s[0]+'op0'+s[1] # state_dict[key] = state_dict[copy_key] # print(key) # model.load_state_dict(state_dict) # print('done!') model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) network_params = [] arch_params = [] for k, v in model.named_parameters(): if 'alpha' in k: print(k) arch_params.append(v) else: network_params.append(v) # if not (k.endswith('alphas_normal_source') or k.endswith('alphas_reduce')): # network_params.append(v) optimizer = torch.optim.SGD(network_params, args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) optimizer_a = torch.optim.Adam(arch_params, lr=args.arch_learning_rate, betas=(0.5, 0.999), weight_decay=args.arch_weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) sm_dim = -1 epochs = args.epochs eps_no_arch = eps_no_archs[sp] scale_factor = 0.2 for epoch in range(epochs): #epochs scheduler.step() lr = scheduler.get_lr()[0] #args.learning_rate# logging.info('Epoch: %d lr: %e', epoch, lr) epoch_start = time.time() # training if epoch < eps_no_arch: model.module.p = float( drop_rate[sp]) * (epochs - epoch - 1) / epochs model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=False, train_weight=True) elif epoch < epochs: model.module.p = float(drop_rate[sp]) * np.exp( -(epoch - eps_no_arch) * scale_factor) model.module.update_p() train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True, train_weight=True) else: # train arch only train_acc, train_obj = train(train_queue, valid_queue, model, network_params, criterion, optimizer, optimizer_a, lr, train_arch=True, train_weight=False) logging.info('Train_acc %f', train_acc) epoch_duration = time.time() - epoch_start logging.info('Epoch time: %ds', epoch_duration) # validation # if epochs - epoch < 5: valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('Valid_acc %f', valid_acc) test_acc, test_obj = infer(test_queue, model, criterion) logging.info('Test_acc %f', test_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) print('------Dropping %d paths------' % num_to_drop[sp]) # Save switches info for s-c refinement. if sp == len(num_to_keep) - 1: switches_normal_2 = copy.deepcopy(switches_normal) switches_reduce_2 = copy.deepcopy(switches_reduce) # drop operations with low architecture weights arch_param = model.module.arch_parameters() # n = 3 # start = 2 # weightsn2 = F.softmax(arch_param[2][0:2], dim=-1) # weightsr2 = F.softmax(arch_param[3][0:2], dim=-1) weightsn2 = F.sigmoid(arch_param[2]) weightsr2 = F.sigmoid(arch_param[3]) # for i in range(args.nodes-1): # end = start + n # tn2 = F.softmax(arch_param[2][start:end], dim=-1) # tr2 = F.softmax(arch_param[3][start:end], dim=-1) # start = end # n += 1 # weightsn2 = torch.cat([weightsn2, tn2],dim=0) # weightsr2 = torch.cat([weightsr2, tr2],dim=0) weightsn2 = weightsn2.data.cpu().numpy() weightsr2 = weightsr2.data.cpu().numpy() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() for i in range(path_num): normal_prob[i] = normal_prob[i] * weightsn2[i] idxs = [] for j in range(len(PRIMITIVES)): if switches_normal[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: # for the last stage, drop all Zero operations drop = get_min_k_no_zero(normal_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(normal_prob[i, :], num_to_drop[sp]) for idx in drop: switches_normal[i][idxs[idx]] = False reduce_prob = F.softmax(arch_param[1], dim=-1).data.cpu().numpy() for i in range(path_num): reduce_prob[i] = reduce_prob[i] * weightsr2[i] idxs = [] for j in range(len(PRIMITIVES)): if switches_reduce[i][j]: idxs.append(j) if sp == len(num_to_keep) - 1: drop = get_min_k_no_zero(reduce_prob[i, :], idxs, num_to_drop[sp]) else: drop = get_min_k(reduce_prob[i, :], num_to_drop[sp]) for idx in drop: switches_reduce[i][idxs[idx]] = False logging.info('switches_normal = %s', switches_normal) logging_switches(switches_normal) logging.info('switches_reduce = %s', switches_reduce) logging_switches(switches_reduce) if sp == len(num_to_keep) - 1: # n = 3 # start = 2 # weightsn2 = F.softmax(arch_param[2][0:2], dim=-1) # weightsr2 = F.softmax(arch_param[3][0:2], dim=-1) weightsn2 = F.sigmoid(arch_param[2]) weightsr2 = F.sigmoid(arch_param[3]) # for i in range(args.nodes-1): # end = start + n # tn2 = F.softmax(arch_param[2][start:end], dim=-1) # tr2 = F.softmax(arch_param[3][start:end], dim=-1) # start = end # n += 1 # weightsn2 = torch.cat([weightsn2, tn2],dim=0) # weightsr2 = torch.cat([weightsr2, tr2],dim=0) weightsn2 = weightsn2.data.cpu().numpy() weightsr2 = weightsr2.data.cpu().numpy() arch_param = model.module.arch_parameters() normal_prob = F.softmax(arch_param[0], dim=sm_dim).data.cpu().numpy() reduce_prob = F.softmax(arch_param[1], dim=sm_dim).data.cpu().numpy() normal_final = [0 for idx in range(path_num)] reduce_final = [0 for idx in range(path_num)] # remove all Zero operations for i in range(path_num): normal_prob[i] = normal_prob[i] * weightsn2[i] if switches_normal_2[i][0] == True: normal_prob[i][0] = 0 normal_final[i] = max(normal_prob[i]) reduce_prob[i] = reduce_prob[i] * weightsr2[i] if switches_reduce_2[i][0] == True: reduce_prob[i][0] = 0 reduce_final[i] = max(reduce_prob[i]) # Generate Architecture, similar to DARTS keep_normal = [0, 1] keep_reduce = [0, 1] n = 3 start = 2 for i in range(args.nodes - 1): end = start + n tbsn = normal_final[start:end] tbsr = reduce_final[start:end] edge_n = sorted(range(n), key=lambda x: tbsn[x]) keep_normal.append(edge_n[-1] + start) keep_normal.append(edge_n[-2] + start) edge_r = sorted(range(n), key=lambda x: tbsr[x]) keep_reduce.append(edge_r[-1] + start) keep_reduce.append(edge_r[-2] + start) start = end n = n + 1 # set switches according the ranking of arch parameters for i in range(path_num): if not i in keep_normal: for j in range(len(PRIMITIVES)): switches_normal[i][j] = False if not i in keep_reduce: for j in range(len(PRIMITIVES)): switches_reduce[i][j] = False # translate switches into genotype genotype = parse_network(switches_normal, switches_reduce, steps=args.nodes) logging.info(genotype) ## restrict skipconnect (normal cell only) logging.info('Restricting skipconnect...') # generating genotypes with different numbers of skip-connect operations for sks in range(0, 9): max_sk = 8 - sks num_sk = check_sk_number(switches_normal) if not num_sk > max_sk: continue while num_sk > max_sk: normal_prob = delete_min_sk_prob(switches_normal, switches_normal_2, normal_prob) switches_normal = keep_1_on(switches_normal_2, normal_prob) switches_normal = keep_2_branches(switches_normal, normal_prob) num_sk = check_sk_number(switches_normal) logging.info('Number of skip-connect: %d', max_sk) genotype = parse_network(switches_normal, switches_reduce, steps=args.nodes) logging.info(genotype)
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) np.random.seed(args.seed) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info("args = %s", args) logging.info("unparsed args = %s", unparsed) num_gpus = torch.cuda.device_count() genotype = eval("genotypes.%s" % args.arch) print('---------Genotype---------') logging.info(genotype) print('--------------------------') model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = torch.nn.DataParallel(model) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.tmp_data_dir, train=True, download=False, transform=train_transform) valid_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.tmp_data_dir, train=True, download=False, transform=train_transform) valid_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=False, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs)) best_acc = 0.0 for epoch in range(args.epochs): scheduler.step() logging.info('Epoch: %d lr %e', epoch, scheduler.get_lr()[0]) model.module.drop_path_prob = args.drop_path_prob * epoch / args.epochs model.drop_path_prob = args.drop_path_prob * epoch / args.epochs start_time = time.time() train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('Train_acc: %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) if valid_acc > best_acc: best_acc = valid_acc utils.save(model.module, os.path.join(args.save, 'weights.pt')) logging.info('Valid_acc: %f', valid_acc) logging.info('best_acc:%f', best_acc) end_time = time.time() duration = end_time - start_time print('Epoch time: %ds.' % duration)
def main(): if not torch.cuda.is_available(): logging.info('No GPU device available') sys.exit(1) cudnn.enabled = True logging.info("args = %s", args) num_gpus = torch.cuda.device_count() genotype = eval("genotypes.%s" % args.arch) print('---------Genotype---------') logging.info(genotype) print('--------------------------') resnet_types = { 'resnet20': 3, 'resnet32': 5, 'resnet44': 7, 'resnet56': 9, 'resnet110': 18 } n_sizes = resnet_types[args.net_type] logging.info('Number of attentional residual block(s): %s', n_sizes * 3) model = att_resnet_cifar(genotype, n_size=n_sizes, num_classes=CIFAR_CLASSES) if num_gpus > 1: model = torch.nn.DataParallel(model) model = model.cuda() try: utils.load(model, args.model_path) except: model = model.module utils.load(model, args.model_path) logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() if args.cifar100: _, test_transform = utils._data_transforms_cifar100(args) else: _, test_transform = utils._data_transforms_cifar10(args) if args.cifar100: test_data = dset.CIFAR100(root=args.tmp_data_dir, train=False, download=True, transform=test_transform) else: test_data = dset.CIFAR10(root=args.tmp_data_dir, train=False, download=True, transform=test_transform) test_queue = torch.utils.data.DataLoader(test_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) if num_gpus > 1: model.module._block.drop_path_prob = 0. else: model._block.drop_path_prob = 0. test_acc, test_obj = infer(test_queue, model, criterion) logging.info('TEST ACCURACY: --- %f% ---', test_acc)
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) logging.info('genotype = %s', genotype) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay ) # train_transform, valid_transform = utils._data_transforms_cifar10(args) # # train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) # valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) """From https://github.com/chenxin061/pdarts/""" if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100(args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR100(root=args.data, train=False, download=True, transform=valid_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=2) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, float(args.epochs)) for epoch in range(args.epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) model.drop_path_prob = args.drop_path_prob * epoch / args.epochs train_acc, train_obj = train(train_queue, model, criterion, optimizer) logging.info('train_acc %f', train_acc) valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) logging.info('gpu device = %d' % args.gpu) logging.info("args = %s", args) criterion = nn.CrossEntropyLoss() criterion = criterion.cuda() model = Network(args.init_channels, CIFAR_CLASSES, args.layers, criterion) model = model.cuda() logging.info("param size = %fMB", utils.count_parameters_in_MB(model)) optimizer = torch.optim.SGD( model.parameters(), ####### 这里指定用SGD对模型的权重训练 args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # prepare dataset if args.cifar100: train_transform, valid_transform = utils._data_transforms_cifar100( args) else: train_transform, valid_transform = utils._data_transforms_cifar10(args) if args.cifar100: train_data = dset.CIFAR100(root=args.data, train=True, download=True, transform=train_transform) else: train_data = dset.CIFAR10(root=args.data, train=True, download=False, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int( np.floor(args.train_portion * num_train)) # 用train set 训练参数时,根据train_portion划分训练集与验证集 train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split]), # 训练集的指针 from 0 到split点 pin_memory=True, num_workers=4) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), # 验证集的指针 from split点 到数据集最后点 pin_memory=True, num_workers=4) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.epochs), eta_min=args.learning_rate_min) architect = Architect(model, args) for epoch in range(args.epochs): lr = scheduler.get_last_lr()[0] logging.info('epoch %d lr %e', epoch, lr) # training train_acc, train_obj = train(train_queue, valid_queue, model, architect, criterion, optimizer, lr) logging.info('train_acc %f', train_acc) genotype = model.genotype( ) # 读取当前epoch的结构参数并log保存,自己从log中读取保存的参数写入genotype中开始train_from the scratch logging.info('genotype = %s', genotype) print(F.softmax(model.alphas_normal, dim=-1)) print(F.softmax(model.alphas_reduce, dim=-1)) # validation valid_acc, valid_obj = infer(valid_queue, model, criterion) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt')) scheduler.step()