Example #1
0
def train(dataset1, dataset2, initial_state, if_restore):
    total_reward = 0.0
    with policy_sess.as_default():
        # create the Controller and build the internal policy network
        controller = Controller(policy_sess,
                                NUM_LAYERS,
                                state_space,
                                reg_param=REGULARIZATION,
                                exploration=EXPLORATION,
                                controller_cells=CONTROLLER_CELLS,
                                embedding_dim=EMBEDDING_DIM,
                                restore_controller=if_restore)
    # clear the previous files
    controller.remove_files()
    # create the Network Manager
    manager1 = NetworkManager(dataset1,
                              epochs=MAX_EPOCHS,
                              child_batchsize=CHILD_BATCHSIZE,
                              clip_rewards=CLIP_REWARDS,
                              acc_beta=ACCURACY_BETA)
    manager2 = NetworkManager(dataset2,
                              epochs=MAX_EPOCHS,
                              child_batchsize=CHILD_BATCHSIZE,
                              clip_rewards=CLIP_REWARDS,
                              acc_beta=ACCURACY_BETA)

    result_reward = []
    result_total_reward = []
    result_acc = []
    result_moving_acc = []
    result_explore_acc = []
    result_exploit_acc = []

    flag = None
    manager = None
    for trial in range(MAX_TRIALS):
        print("\nTrial %d:" % (trial + 1))
        if 2 * trial < MAX_TRIALS:
            manager = manager1
            if trial % 2 == 0:
                actions = state_space.get_local_state_space_add(
                    int(trial / 2), initial_state)
            else:
                actions = state_space.get_local_state_space(
                    int(trial / 2), initial_state)
        else:
            manager = manager2
            with policy_sess.as_default():
                K.set_session(policy_sess)
                flag, actions = controller.get_action(
                    state)  # get an action for the previous state

        # print the action probabilities
        # state_space.print_actions(actions)
        print("Actions : ", state_space.parse_state_space_list(actions))
        # build a model, train and get reward and accuracy from the network manager
        reward, previous_acc, moving_acc = manager.get_rewards(
            state_space.parse_state_space_list(actions))
        print("Rewards : ", reward)
        print("Accuracy : ", previous_acc)
        print("Movingacc :", moving_acc)

        with policy_sess.as_default():
            K.set_session(policy_sess)

            total_reward += reward
            print("Total reward : ", total_reward)

            # actions and states are equivalent, save the state and reward
            state = actions
            controller.store_rollout(state, reward)

            # train the controller on the saved state and the discounted rewards
            loss = controller.train_step()
            print("Controller loss : %0.6f" % (loss))

            # write the results of this trial into a file
            with open('train_history.csv', mode='a+') as f:
                data = [previous_acc, reward]
                data.extend(state_space.parse_state_space_list(state))
                writer = csv.writer(f)
                writer.writerow(data)
        print()
        result_reward.append(reward)
        result_total_reward.append(total_reward)
        result_acc.append(previous_acc)
        result_moving_acc.append(moving_acc)
        if 2 * trial >= MAX_TRIALS:
            if not flag:
                result_explore_acc.append(previous_acc)
            else:
                result_exploit_acc.append(previous_acc)

    print("Rewards : ", result_reward)
    print("Total Rewards :", result_total_reward)
    print("Accuracy : ", result_acc)
    print("Moving acc : ", result_moving_acc)
    print("Explore acc :", result_explore_acc)
    print("Exploit acc : ", result_exploit_acc)
Example #2
0
File: core.py Project: biyifang/NAS
    print("Predicted actions : ", state_space.parse_state_space_list(actions))

    # build a model, train and get reward and accuracy from the network manager
    reward, previous_acc = manager.get_rewards(
        model_fn, state_space.parse_state_space_list(actions))
    print("Rewards : ", reward, "Accuracy : ", previous_acc)

    with policy_sess.as_default():
        K.set_session(policy_sess)

        total_reward += reward
        print("Total reward : ", total_reward)

        # actions and states are equivalent, save the state and reward
        state = actions
        controller.store_rollout(state, reward)

        # train the controller on the saved state and the discounted rewards
        loss = controller.train_step()
        print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss))

        # write the results of this trial into a file
        with open('train_history.csv', mode='a+') as f:
            data = [previous_acc, reward]
            data.extend(state_space.parse_state_space_list(state))
            writer = csv.writer(f)
            writer.writerow(data)
    print()

print("Total Reward : ", total_reward)