def main():
    args = get_macro_args()

    if args.recommended_arch:
        filename = args.recommended_arch

    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)
    ext = nn.ext_utils.import_extension_module(args.context)

    data_iterator = data_iterator_cifar10
    tdata = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)
    mean_val_train, std_val_train, channel, img_height, img_width, num_class = get_data_stats(
        tdata)
    mean_val_valid, std_val_valid, _, _, _, _ = get_data_stats(vdata)

    data_dict = {
        "train_data": (tdata, mean_val_train, std_val_train),
        "valid_data": (vdata, mean_val_valid, std_val_valid),
        "basic_info": (channel, img_height, img_width, num_class)
    }

    check_arch = np.load(filename, allow_pickle=True)
    print("Train the model whose architecture is:")
    show_arch(check_arch)

    val_acc = CNN_run(args,
                      check_arch.tolist(),
                      data_dict,
                      with_train=True,
                      after_search=True)
Exemple #2
0
def sample_from_pretrained_controller(args):
    """
        Experimental Implementation.
    """
    assert args.num_sampling > 0, "num_sampling must be > 0."
    path = os.path.join(args.model_save_path, 'controller_params.h5')
    assert os.path.exists(path), "controller's weights seem to be missing!"
    nn.parameter.load_parameters(path)

    for i in range(args.num_sampling):
        output_line = " Sampled Architecture {} / {} ".format(
            (i + 1), args.num_sampling)
        print("\n{0:-^80s}\n".format(output_line))

        with nn.auto_forward():
            arc_seq, _, _, _ = sample_from_controller(args)

        sample_arch = list()
        for arc in arc_seq:
            sample_arch.extend(arc.tolist())

        show_arch(sample_arch)
        filename = "sampled_macro_arch_{}.npy".format(i)
        np.save("sampled_macro_arch_{}.npy".format(i), np.array(sample_arch))

    print("when you want to train the sampled network from scratch,\n\
    type like 'python macro_retrain.py <OPTION> --recommended-arch {}'".format(
        filename))
Exemple #3
0
def sample_arch_and_train(args, data_dict, controller_weights_dict):
    """
        Execute these process.
        1. For a certain number of times, let the controller construct sample architectures 
           and test their performances. (By calling get_sample_and_feedback)
        2. By using the performances acquired by the previous process, train the controller.
        3. Select one architecture with the best validation accuracy and train its parameters.
    """

    solver = S.Momentum(args.control_lr)  # create solver for the controller
    solver.set_parameters(controller_weights_dict,
                          reset=False,
                          retain_state=True)
    solver.zero_grad()

    val_list = list()
    arch_list = list()

    with nn.auto_forward():
        for c in range(args.num_candidate):
            output_line = " Architecture {} / {} ".format((c + 1),
                                                          args.num_candidate)
            print("{0:-^80s}".format(output_line))

            # sample one architecture and get its feedback for RL as loss
            loss, val_acc, sample_arch = get_sample_and_feedback(
                args, data_dict)

            val_list.append(val_acc)
            arch_list.append(sample_arch)
            loss.backward()  # accumulate gradient each time

        print("{0:-^80s}\n".format(" Reinforcement Learning Phase "))
        print("current accumulated loss:", loss.d)

        solver.weight_decay(0.025)
        solver.update()  # train the controller

        print("\n{0:-^80s}\n".format(" CNN Learning Phase "))
        best_idx = np.argmax(val_list)
        sample_arch = arch_list[best_idx]
        print("Train the model whose architecture is:")
        show_arch(sample_arch)
        print("and its accuracy is: {:.2f} %\n".format(100 * np.max(val_list)))
        print("Learnable Parameters:", params_count(nn.get_parameters()))

    # train a child network which achieves the best validation accuracy.
    val_acc = CNN_run(args, sample_arch, data_dict, with_train=True)

    return sample_arch, val_acc
Exemple #4
0
def get_sample_and_feedback(args, data_dict):
    """
        Let the controller predict one architecture and test its performance to get feedback.
        Here the feedback is validation accuracy and will be reused to train the controller. 
    """

    skip_weight = args.skip_weight
    entropy_weight = args.entropy_weight
    bl_dec = args.baseline_decay

    arc_seq, log_probs, entropys, skip_penaltys = sample_from_controller(args)

    sample_arch = list()
    for arc in arc_seq:
        sample_arch.extend(arc.tolist())

    show_arch(sample_arch)

    sample_entropy = entropys
    sample_log_prob = log_probs

    nn.set_auto_forward(False)
    val_acc = CNN_run(args, sample_arch, data_dict)  # Execute Evaluation Only
    nn.set_auto_forward(True)

    print("Accuracy on Validation: {:.2f} %\n".format(100 * val_acc))

    reward = val_acc  # use validation accuracy as reward

    if entropy_weight is not None:
        reward = F.add_scalar(F.mul_scalar(sample_entropy, entropy_weight),
                              reward).d

    sample_log_prob = F.mul_scalar(sample_log_prob, (1 / args.num_candidate))

    if args.use_variance_reduction:
        baseline = 0.0
        # variance reduction
        baseline = baseline - ((1 - bl_dec) * (baseline - reward))
        reward = reward - baseline

    loss = F.mul_scalar(sample_log_prob, (-1) * reward)

    if skip_weight is not None:
        adding_penalty = F.mul_scalar(skip_penaltys, skip_weight)
        loss = F.add2(loss, adding_penalty)

    return loss, val_acc, sample_arch
Exemple #5
0
def main():
    """
        Start architecture search and save the architecture found by the controller during the search.
    """
    args = get_macro_args()
    arguments_assertion(args)

    ctx = get_extension_context(args.context,
                                device_id=args.device_id,
                                type_config=args.type_config)
    nn.set_default_context(ctx)
    ext = nn.ext_utils.import_extension_module(args.context)

    if args.sampling_only:
        sample_from_pretrained_controller(args)
        return

    data_iterator = data_iterator_cifar10
    tdata = data_iterator(args.batch_size, True)
    vdata = data_iterator(args.batch_size, False)
    mean_val_train, std_val_train, channel, img_height, img_width, num_class = get_data_stats(
        tdata)
    mean_val_valid, std_val_valid, _, _, _, _ = get_data_stats(vdata)

    data_dict = {
        "train_data": (tdata, mean_val_train, std_val_train),
        "valid_data": (vdata, mean_val_valid, std_val_valid),
        "basic_info": (channel, img_height, img_width, num_class)
    }

    initializer = I.UniformInitializer((-0.1, 0.1))

    # Prepare all the weights in advance
    controller_weights_and_shape = {
        'controller_lstm/0/lstm/affine/W':
        (2 * args.lstm_size, 4, args.lstm_size),
        'controller_lstm/0/lstm/affine/b': (4, args.lstm_size),
        'controller_lstm/1/lstm/affine/W':
        (2 * args.lstm_size, 4, args.lstm_size),
        'controller_lstm/1/lstm/affine/b': (4, args.lstm_size),
        'ops/affine/W': (args.lstm_size, args.num_ops),
        'skip_affine_1/affine/W': (args.lstm_size, args.lstm_size),
        'skip_affine_2/affine/W': (args.lstm_size, 1),
        'skip_affine_3/affine/W': (args.lstm_size, args.lstm_size)
    }
    for w_name, w_shape in controller_weights_and_shape.items():
        nn.parameter.get_parameter_or_create(w_name,
                                             w_shape,
                                             initializer=initializer,
                                             need_grad=True)

    # create dictionary of controller's weights
    controller_weights_dict = {
        w_name: nn.get_parameters()[w_name]
        for w_name in controller_weights_and_shape.keys()
    }

    arch_change, best_arch = search_architecture(args, data_dict,
                                                 controller_weights_dict)

    if args.select_strategy == "best":
        print(
            "saving the model which achieved the best validation accuracy as {}."
            .format(args.recommended_arch))
        check_arch = best_arch
    else:
        # Use the latest architecture. it's not necessarily the one with the best architecture.
        print("saving the latest model recommended by the controller as {}.".
              format(args.recommended_arch))
        check_arch = arch_change[-1]
        np.save(args.recommended_arch, np.array(check_arch))

    print("The saved architecture is;")
    show_arch(check_arch)
    print("when you want to train the network from scratch,\n\
    type 'python macro_retrain.py <OPTION> --recommended-arch {}'".format(
        args.recommended_arch))

    # save the controller's weights so that another architectures can be made.
    all_params = nn.get_parameters(grad_only=False)
    controller_weights = list(controller_weights_and_shape.keys()) + ["w_emb"]
    for param_name in all_params.keys():
        if param_name not in controller_weights:
            nn.parameter.pop_parameter(param_name)
    nn.save_parameters(
        os.path.join(args.model_save_path, 'controller_params.h5'))

    # If you want to train the model recommended by the controller from scratch
    # right after architecture search, uncomment the lines below
    # nn.clear_parameters()
    # ext.clear_memory_cache()  # clear all the Variables
    # val_acc = CNN_run(args, check_arch, data_dict, with_train=True, after_search=True)
    return