Ejemplo n.º 1
0
class Game(object):
    def __init__(self):
        self.input = Controller()
        self.world = World(debug=True)

    def output(self, o):
        print '\n' * 100, o

    def play(self):
        world_output = self.world.start
        while True:
            try:
                self.output(world_output)
                action = self.input.get_action()
                world_output = self.world.make_action(action)
            except SystemExit:
                print "\nBye Bye"
                sys.exit(0)
Ejemplo n.º 2
0
class Game(object):
    def __init__(self):
        self.input  = Controller()
        self.world  = World(debug=True)

    def output(self, o):
        print '\n'*100, o

    def play(self):
        world_output = self.world.start
        while True:
            try:
                self.output(world_output)
                action = self.input.get_action()
                world_output = self.world.make_action(action)
            except SystemExit:
                print "\nBye Bye"
                sys.exit(0)
Ejemplo n.º 3
0
def train(dataset1, dataset2, initial_state, if_restore):
    total_reward = 0.0
    with policy_sess.as_default():
        # create the Controller and build the internal policy network
        controller = Controller(policy_sess,
                                NUM_LAYERS,
                                state_space,
                                reg_param=REGULARIZATION,
                                exploration=EXPLORATION,
                                controller_cells=CONTROLLER_CELLS,
                                embedding_dim=EMBEDDING_DIM,
                                restore_controller=if_restore)
    # clear the previous files
    controller.remove_files()
    # create the Network Manager
    manager1 = NetworkManager(dataset1,
                              epochs=MAX_EPOCHS,
                              child_batchsize=CHILD_BATCHSIZE,
                              clip_rewards=CLIP_REWARDS,
                              acc_beta=ACCURACY_BETA)
    manager2 = NetworkManager(dataset2,
                              epochs=MAX_EPOCHS,
                              child_batchsize=CHILD_BATCHSIZE,
                              clip_rewards=CLIP_REWARDS,
                              acc_beta=ACCURACY_BETA)

    result_reward = []
    result_total_reward = []
    result_acc = []
    result_moving_acc = []
    result_explore_acc = []
    result_exploit_acc = []

    flag = None
    manager = None
    for trial in range(MAX_TRIALS):
        print("\nTrial %d:" % (trial + 1))
        if 2 * trial < MAX_TRIALS:
            manager = manager1
            if trial % 2 == 0:
                actions = state_space.get_local_state_space_add(
                    int(trial / 2), initial_state)
            else:
                actions = state_space.get_local_state_space(
                    int(trial / 2), initial_state)
        else:
            manager = manager2
            with policy_sess.as_default():
                K.set_session(policy_sess)
                flag, actions = controller.get_action(
                    state)  # get an action for the previous state

        # print the action probabilities
        # state_space.print_actions(actions)
        print("Actions : ", state_space.parse_state_space_list(actions))
        # build a model, train and get reward and accuracy from the network manager
        reward, previous_acc, moving_acc = manager.get_rewards(
            state_space.parse_state_space_list(actions))
        print("Rewards : ", reward)
        print("Accuracy : ", previous_acc)
        print("Movingacc :", moving_acc)

        with policy_sess.as_default():
            K.set_session(policy_sess)

            total_reward += reward
            print("Total reward : ", total_reward)

            # actions and states are equivalent, save the state and reward
            state = actions
            controller.store_rollout(state, reward)

            # train the controller on the saved state and the discounted rewards
            loss = controller.train_step()
            print("Controller loss : %0.6f" % (loss))

            # write the results of this trial into a file
            with open('train_history.csv', mode='a+') as f:
                data = [previous_acc, reward]
                data.extend(state_space.parse_state_space_list(state))
                writer = csv.writer(f)
                writer.writerow(data)
        print()
        result_reward.append(reward)
        result_total_reward.append(total_reward)
        result_acc.append(previous_acc)
        result_moving_acc.append(moving_acc)
        if 2 * trial >= MAX_TRIALS:
            if not flag:
                result_explore_acc.append(previous_acc)
            else:
                result_exploit_acc.append(previous_acc)

    print("Rewards : ", result_reward)
    print("Total Rewards :", result_total_reward)
    print("Accuracy : ", result_acc)
    print("Moving acc : ", result_moving_acc)
    print("Explore acc :", result_explore_acc)
    print("Exploit acc : ", result_exploit_acc)
Ejemplo n.º 4
0
Archivo: core.py Proyecto: biyifang/NAS
                 acc_beta=ACCURACY_BETA)

# get an initial random state space if controller needs to predict an
# action from the initial state
state = state_space.get_random_state_space(NUM_LAYERS)
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()

# clear the previous files
controller.remove_files()

# train for number of trails
for trial in range(MAX_TRIALS):
    with policy_sess.as_default():
        K.set_session(policy_sess)
        actions = controller.get_action(
            state)  # get an action for the previous state

    # print the action probabilities
    state_space.print_actions(actions)
    print("Predicted actions : ", state_space.parse_state_space_list(actions))

    # build a model, train and get reward and accuracy from the network manager
    reward, previous_acc = manager.get_rewards(
        model_fn, state_space.parse_state_space_list(actions))
    print("Rewards : ", reward, "Accuracy : ", previous_acc)

    with policy_sess.as_default():
        K.set_session(policy_sess)

        total_reward += reward
        print("Total reward : ", total_reward)