def evolution_search(): for exp_type in config_dict()['exp_order']: save_dir = f'{os.path.dirname(os.path.abspath(__file__))}/search-{args.save}-{exp_type}-{dataset}-{time.strftime("%Y%m%d-%H%M%S")}' utils.create_exp_dir(save_dir) fh = logging.FileHandler(os.path.join(save_dir, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) np.random.seed(args.seed) logging.info("args = %s", args) # setup NAS search problem if exp_type == 'micro': # NASNet search space n_var, lb, ub = set_micro_exp(args) elif exp_type == 'macro': # modified GeneticCNN search space n_var, lb, ub = set_macro_exp(args) elif exp_type == 'micromacro' or exp_type == 'micro_garbage' or exp_type == 'macro_garbage': # modified GeneticCNN search space n_var_mac, lb_mac, ub_mac = set_macro_exp(args) n_var_mic, lb_mic, ub_mic = set_micro_exp(args) n_var = n_var_mic + n_var_mac lb = np.array([*lb_mac, *lb_mic]) ub = np.array([*ub_mac, *ub_mic]) else: raise NameError('Unknown search space type') problem = NAS(n_var=n_var, search_space=exp_type, n_obj=2, n_constr=0, lb=lb, ub=ub, init_channels=args.init_channels, layers=args.layers, epochs=args.epochs, save_dir=save_dir, batch_size=args.batch_size) # configure the nsga-net method method = engine.nsganet(pop_size=args.pop_size, n_offsprings=args.n_offspring, eliminate_duplicates=True) if args.termination == 'ngens': termination = ('n_gen', args.n_gens) elif args.termination == 'time': termination = TimeTermination(time.time(), args.max_time) res = minimize(problem, method, callback=do_every_generations, termination=termination) val_accs = res.pop.get('F')[:, 0] if exp_type == 'microtomacro' or exp_type == 'micro': best_idx = np.where(val_accs == np.min(val_accs))[0][0] best_genome = res.pop[best_idx].X with open(f'{save_dir}/best_genome.pkl', 'wb') as pkl_file: pickle.dump(best_genome, pkl_file) if exp_type == 'microtomacro': set_config('micro_creator', make_micro_creator(best_genome)) return (100 - np.min(val_accs)) / 100
def __init__(self, save_to=None, continue_from=None, args=None, **kwargs): kwargs["individual"] = Individual(rank=np.inf, crowding=-1) super().__init__(**kwargs) logger.info("new version") self.tournament_type = "comp_by_dom_and_crowding" self.func_display_attrs = disp_multi_objective self.continue_from = continue_from self.args = args self.save_to = os.path.join(args.save, args.code) utils.create_exp_dir(self.save_to)
(`RankAndCrowdingSurvival` in this, CHANGED) 3. call callback function (`do_every_generations` in this) 4. call `GeneticAlgorithm.selection` to make selection. func_comp serve as a compare function (`binary_tournament` in this, same as NSGA-II) go back ''' res = minimize(problem, method, callback=do_every_generations, termination=('n_gen', args.n_gens)) return if __name__ == "__main__": args.save = 'search-{}-{}-{}'.format(args.save, args.search_space, time.strftime("%Y%m%d-%H%M%S")) utils.create_exp_dir(args.save) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) pop_hist = [] # keep track of every evaluated architecture main()
def main(args): save_dir = f'{os.path.dirname(os.path.abspath(__file__))}/../train/train-{args.save}-{time.strftime("%Y%m%d-%H%M%S")}' utils.create_exp_dir(save_dir) data_root = '../data' CIFAR_CLASSES = config_dict()['n_classes'] INPUT_CHANNELS = config_dict()['n_channels'] if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) if args.auxiliary and args.net_type == 'macro': logging.info( 'auxiliary head classifier not supported for macro search space models' ) sys.exit(1) logging.info("args = %s", args) cudnn.enabled = True cudnn.benchmark = True np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) best_acc = 0 # initiate a artificial best accuracy so far # Data train_transform, valid_transform = utils._data_transforms_cifar10(args) # train_data = torchvision.datasets.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) # valid_data = torchvision.datasets.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) train_data = my_cifar10.CIFAR10(root=data_root, train=True, download=False, transform=train_transform) valid_data = my_cifar10.CIFAR10(root=data_root, train=False, download=False, transform=valid_transform) train_queue = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=1) valid_queue = torch.utils.data.DataLoader(valid_data, batch_size=128, shuffle=False, pin_memory=True, num_workers=1) # Model if args.net_type == 'micro': logging.info("==> Building micro search space encoded architectures") genotype = eval("genotypes.%s" % args.arch) net = NetworkCIFAR(args.init_channels, num_classes=CIFAR_CLASSES, num_channels=INPUT_CHANNELS, layers=args.layers, auxiliary=args.auxiliary, genotype=genotype, SE=args.SE) elif args.net_type == 'macro': genome = eval("macro_genotypes.%s" % args.arch) channels = [(INPUT_CHANNELS, 128), (128, 128), (128, 128)] net = EvoNetwork( genome, channels, CIFAR_CLASSES, (config_dict()['INPUT_HEIGHT'], config_dict()['INPUT_WIDTH']), decoder='dense') else: raise NameError( 'Unknown network type, please only use supported network type') # logging.info("{}".format(net)) logging.info("param size = %fMB", utils.count_parameters_in_MB(net)) net = net.to(device) n_epochs = args.epochs parameters = filter(lambda p: p.requires_grad, net.parameters()) criterion = nn.CrossEntropyLoss() criterion.to(device) optimizer = optim.SGD(parameters, lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, n_epochs, eta_min=args.min_learning_rate) for epoch in range(n_epochs): scheduler.step() logging.info('epoch %d lr %e', epoch, scheduler.get_lr()[0]) net.droprate = args.droprate * epoch / args.epochs train(args, train_queue, net, criterion, optimizer) _, valid_acc = infer(args, valid_queue, net, criterion) if valid_acc > best_acc: utils.save(net, os.path.join(save_dir, 'weights.pt')) best_acc = valid_acc return best_acc
def train_and_evaluate( genome: tuple, individual=None, args: argparse.Namespace = None, first_gen: bool = True, save: str = None, client_id: str = None, ): """ Function to train and evaluate an individual using a TPU. Results are always saved in the save dir to make distributed data management easier. Args: first_gen: genome: save: individual: args: Returns: """ if args.stream == "tpu": # must warp up TPU import torch_xla auxiliary = False assert hasattr(individual, "id") if not first_gen: # this is not the first generation, so mating should have occurred assert hasattr(individual, "parents") expr_root = "" save_pth = os.path.join(expr_root, "{}".format(save)) utils.create_exp_dir(save_pth) CIFAR_CLASSES = 10 learning_rate = 0.025 momentum = 0.9 weight_decay = 3e-4 data_root = "../data" batch_size = args.batch_size auxiliary_weight = 0.4 grad_clip = 5 report_freq = 50 train_params = { "auxiliary": auxiliary, "auxiliary_weight": auxiliary_weight, "grad_clip": grad_clip, "report_freq": report_freq, } if args.search_space == "micro": genotype = micro_encoding.decode(genome) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, auxiliary, genotype) if not first_gen: # change the way the weights are set up model = manage_weights(model, individual, expr_root, args) elif args.search_space == "macro": raise NotImplementedError("Not supported") else: raise NameError("Unknown search space type") logger.info("Architecture = %s", genotype) try: max_weight = args.max_weight except: print("Could Not Determine Maximum Weight Argument") max_weight = 1e20 clip = weightClip(max_weight=max_weight, min_weight=max_weight * -1) if args.stream == "tpu": from projectcode.training.tpu import get_map_fn import torch_xla.distributed.xla_multiprocessing as xmp WRAPPED_MODEL = xmp.MpModelWrapper(model) logger.info("Executing TPU Training") map_fn = get_map_fn(model, train_params, data_root, momentum, weight_decay, CIFAR_CLASSES, learning_rate, args.layers, batch_size, epochs=args.epochs, save_pth=save_pth, args=args, WRAPPED_MODEL=WRAPPED_MODEL, clip=clip) FLAGS = {} xmp.spawn(map_fn, args=(FLAGS, ), nprocs=1, start_method="fork") valid_acc, n_flops = torch.load("results.pt") elif args.stream == "gpu": from projectcode.training.gpu import train_gpu logger.info("Executing GPU Training") valid_acc, n_flops = train_gpu(model, train_params, data_root, momentum, weight_decay, CIFAR_CLASSES, learning_rate, args.layers, batch_size, epochs=args.epochs, save_pth=save_pth, args=args, clip=clip) else: raise NameError("Unrecognized client stream") n_params = (np.sum( np.prod(v.size()) for v in filter(lambda p: p.requires_grad, model.parameters())) / 1e6) if main_config.distributed_cloud and args.weight_init == "lammarckian": wt_path = f"{args.code}_{client_id}_weights_{individual.id:05d}.pt" torch.save(model.state_dict(), wt_path) blob_name = upload_blob(wt_path) else: blob_name = None torch.save(model.state_dict(), os.path.join(save_pth, "weights.pt")) result_dict = { "id": individual.id, "save_path": save_pth, "valid_acc": valid_acc, "params": n_params, "flops": n_flops, "wt_blob_name": blob_name, } dump(result_dict, os.path.join(save_pth, "result.pkl")) return result_dict