def sample_from_pretrained_controller(args): """ Experimental Implementation. """ assert args.num_sampling > 0, "num_sampling must be > 0." path = os.path.join(args.model_save_path, 'controller_params.h5') assert os.path.exists(path), "controller's weights seem to be missing!" nn.parameter.load_parameters(path) for i in range(args.num_sampling): output_line = " Sampled Architecture {} / {} ".format( (i + 1), args.num_sampling) print("\n{0:-^80s}\n".format(output_line)) with nn.auto_forward(): both_archs, _, _ = sample_from_controller(args) show_arch(both_archs) filename = "sampled_micro_arch_{}.npy".format(i) np.save("sampled_micro_arch_{}.npy".format(i), np.array(both_archs)) print("when you want to train the sampled network from scratch,\n\ type like 'python micro_retrain.py <OPTION> --recommended-arch {}'".format( filename))
def main(): args = get_micro_args() args.num_nodes = args.num_nodes - 2 if args.recommended_arch: filename = args.recommended_arch ctx = get_extension_context( args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) ext = nn.ext_utils.import_extension_module(args.context) data_iterator = data_iterator_cifar10 tdata = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) mean_val_train, std_val_train, channel, img_height, img_width, num_class = get_data_stats( tdata) mean_val_valid, std_val_valid, _, _, _, _ = get_data_stats(vdata) data_dict = {"train_data": (tdata, mean_val_train, std_val_train), "valid_data": (vdata, mean_val_valid, std_val_valid), "basic_info": (channel, img_height, img_width, num_class)} check_arch = np.load(filename) print("Train the model whose architecture is:") show_arch(check_arch) val_acc = CNN_run(args, check_arch.tolist(), data_dict, with_train=True, after_search=True)
def sample_arch_and_train(args, data_dict, controller_weights_dict): """ Execute these process. 1. For a certain number of times, let the controller construct sample architectures and test their performances. (By calling get_sample_and_feedback) 2. By using the performances acquired by the previous process, train the controller. 3. Select one architecture with the best validation accuracy and train its parameters. """ solver = S.Momentum(args.control_lr) # create solver for the controller solver.set_parameters(controller_weights_dict, reset=False, retain_state=True) solver.zero_grad() val_list = list() arch_list = list() with nn.auto_forward(): for c in range(args.num_candidate): output_line = " Architecture {} / {} ".format((c + 1), args.num_candidate) print("{0:-^80s}".format(output_line)) # sample one architecture and get its feedback for RL as loss loss, val_acc, both_archs = get_sample_and_feedback( args, data_dict) val_list.append(val_acc) arch_list.append(both_archs) loss.backward() # accumulate gradient each time print("{0:-^80s}\n".format(" Reinforcement Learning Phase ")) print("current accumulated loss:", loss.d) solver.weight_decay(0.025) solver.update() # train the controller print("\n{0:-^80s}\n".format(" CNN Learning Phase ")) best_idx = np.argmax(val_list) sample_arch = arch_list[best_idx] print("Train the model whose architecture is:") show_arch(sample_arch) print("and its accuracy is: {:.2f} %\n".format(100 * np.max(val_list))) print("Learnable Parameters:", params_count(nn.get_parameters())) # train a child network which achieves the best validation accuracy. val_acc = CNN_run(args, sample_arch, data_dict, with_train=True) return sample_arch, val_acc
def get_sample_and_feedback(args, data_dict): """ Let the controller predict one architecture and test its performance to get feedback. Here the feedback is validation accuracy and will be reused to train the controller. """ entropy_weight = args.entropy_weight bl_dec = args.baseline_decay both_archs, log_probs, entropys = sample_from_controller(args) sample_entropy = entropys sample_log_prob = log_probs show_arch(both_archs) nn.set_auto_forward(False) val_acc = CNN_run(args, both_archs, data_dict) nn.set_auto_forward(True) print("Accuracy on Validation: {:.2f} %\n".format(100 * val_acc)) reward = val_acc if entropy_weight is not None: reward = F.add_scalar(F.mul_scalar(sample_entropy, entropy_weight), reward).d sample_log_prob = F.mul_scalar(sample_log_prob, (1 / args.num_candidate)) if args.use_variance_reduction: baseline = 0.0 # variance reduction baseline = baseline - ((1 - bl_dec) * (baseline - reward)) reward = reward - baseline loss = F.mul_scalar(sample_log_prob, (-1) * reward) return loss, val_acc, both_archs
def main(): """ Start architecture search and save the architecture found by the controller during the search. """ args = get_micro_args() arguments_assertion(args) args.num_nodes = args.num_nodes - 2 ctx = get_extension_context(args.context, device_id=args.device_id, type_config=args.type_config) nn.set_default_context(ctx) ext = nn.ext_utils.import_extension_module(args.context) if args.sampling_only: sample_from_pretrained_controller(args) return data_iterator = data_iterator_cifar10 tdata = data_iterator(args.batch_size, True) vdata = data_iterator(args.batch_size, False) mean_val_train, std_val_train, channel, img_height, img_width, num_class = get_data_stats( tdata) mean_val_valid, std_val_valid, _, _, _, _ = get_data_stats(vdata) data_dict = { "train_data": (tdata, mean_val_train, std_val_train), "valid_data": (vdata, mean_val_valid, std_val_valid), "basic_info": (channel, img_height, img_width, num_class) } initializer = I.UniformInitializer((-0.1, 0.1)) # Prepare all the weights in advance controller_weights_and_shape = { 'controller_lstm/0/lstm/affine/W': (2 * args.lstm_size, 4, args.lstm_size), 'controller_lstm/0/lstm/affine/b': (4, args.lstm_size), 'controller_lstm/1/lstm/affine/W': (2 * args.lstm_size, 4, args.lstm_size), 'controller_lstm/1/lstm/affine/b': (4, args.lstm_size), 'ops/affine/W': (args.lstm_size, args.num_ops), 'skip_affine_1/affine/W': (args.lstm_size, args.lstm_size), 'skip_affine_2/affine/W': (args.lstm_size, 1), 'skip_affine_3/affine/W': (args.lstm_size, args.lstm_size) } for w_name, w_shape in controller_weights_and_shape.items(): nn.parameter.get_parameter_or_create(w_name, w_shape, initializer=initializer, need_grad=True) # create dictionary of controller's weights controller_weights_dict = { w_name: nn.get_parameters()[w_name] for w_name in controller_weights_and_shape.keys() } arch_change, best_arch = search_architecture(args, data_dict, controller_weights_dict) if args.select_strategy == "best": print( "saving the model which achieved the best validation accuracy as {}." .format(args.recommended_arch)) check_arch = best_arch else: # Use the latest architecture. it's not necessarily the one with the best architecture. print("saving the latest model recommended by the controller as {}.". format(args.recommended_arch)) check_arch = arch_change[-1] np.save(args.recommended_arch, np.array(check_arch)) print("The saved architecture is;") show_arch(check_arch) print("when you want to train the network from scratch,\n\ type 'python micro_retrain.py <OPTION> --recommended-arch {}'".format( args.recommended_arch)) # save the controller's weights so that another architectures can be made. all_params = nn.get_parameters(grad_only=False) controller_weights = list(controller_weights_and_shape.keys()) + [ "w_emb", "anchors", "anchors_w_1" ] for param_name in all_params.keys(): if param_name not in controller_weights_and_shape.keys(): nn.parameter.pop_parameter(param_name) nn.save_parameters( os.path.join(args.model_save_path, 'controller_params.h5')) # If you want to retrain the model recommended by the controller # right after architecture search, uncomment the lines below. # nn.clear_parameters() # ext.clear_memory_cache() # Clear all the Variables. # val_acc = CNN_run(args, both_archs, data_dict, with_train=True, is_retrain=True) return