예제 #1
0
):  # create the Controller and build the internal policy network
    controller = Controller(policy_sess,
                            NUM_LAYERS,
                            state_space,
                            reg_param=REGULARIZATION,
                            exploration=EXPLORATION,
                            controller_cells=CONTROLLER_CELLS,
                            embedding_dim=EMBEDDING_DIM,
                            restore_controller=RESTORE_CONTROLLER)

# get an initial random state space if controller needs to predict an
# action from the initial state
#随机初始化
# state = state_space.get_random_state_space(NUM_LAYERS)
state = state_space.get_state([3, 'relu', 3, 'linear'])
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()

# clear the previous files
controller.remove_files()
isFirst = True
# train for number of trails
for trial in range(MAX_TRIALS):
    if isFirst:
        actions = state
        isFirst = False
    else:
        with policy_sess.as_default():
            K.set_session(policy_sess)
            actions = controller.get_action(
                state)  # get an action for the previous state
def main(_):
    # create a shared session between Keras and Tensorflow
    policy_sess = tf.Session()
    K.set_session(policy_sess)

    NUM_LAYERS = 3  # number of layers of the state space
    MAX_TRIALS = 250  # maximum number of models generated

    MAX_EPOCHS = 60  # maximum number of epochs to train
    BATCHSIZE = 100  # batchsize
    EXPLORATION = 0.5  # high exploration for the first 1000 steps
    REGULARIZATION = 1e-3  # regularization strength
    CONTROLLER_CELLS = 32  # number of cells in RNN controller
    CLIP_REWARDS = False  # clip rewards in the [-0.05, 0.05] range
    RESTORE_CONTROLLER = True  # restore controller to continue training

    # construct a state space
    state_space = StateSpace()

    # add states
    #state_space.add_state(name='kernel', values=[3])
    state_space.add_state(name='filters', values=[30, 60, 100, 144])
    #state_space.add_state(name='stride', values=[1])

    # print the state space being searched
    state_space.print_state_space()

    previous_acc = 0.0
    total_reward = 0.0

    with policy_sess.as_default():
        # create the Controller and build the internal policy network
        controller = Controller(policy_sess,
                                NUM_LAYERS,
                                state_space,
                                reg_param=REGULARIZATION,
                                exploration=EXPLORATION,
                                controller_cells=CONTROLLER_CELLS,
                                restore_controller=RESTORE_CONTROLLER)
    print('done')
    # create the Network Manager
    manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS)

    # get an initial random state space if controller needs to predict an
    # action from the initial state
    state = state_space.get_random_state_space(NUM_LAYERS)
    print("Initial Random State : ", state_space.parse_state_space_list(state))
    #print()

    # train for number of trails
    for trial in range(MAX_TRIALS):
        with policy_sess.as_default():
            actions = controller.get_action(
                state)  # get an action for the previous state

        # print the action probabilities
        state_space.print_actions(actions)
        print("Predicted actions : ",
              state_space.parse_state_space_list(actions))

        # build a model, train and get reward and accuracy from the network manager
        reward, previous_acc = manager.get_rewards(
            model_fn_cnn, state_space.parse_state_space_list(actions))
        print("Rewards : ", reward, "Accuracy : ", previous_acc)

        with policy_sess.as_default():

            total_reward += reward
            print("Total reward : ", total_reward)

            # actions and states are equivalent, save the state and reward
            state = actions
            controller.store_rollout(state, reward)

            # train the controller on the saved state and the discounted rewards
            loss = controller.train_step()
            print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss))

            # write the results of this trial into a file
            with open('train_history.csv', mode='a+') as f:
                data = [previous_acc, reward]
                data.extend(state_space.parse_state_space_list(state))
                writer = csv.writer(f)
                writer.writerow(data)
        print()

    print("Total Reward : ", total_reward)
예제 #3
0
    # create the Controller and build the internal policy network
    controller = Controller(policy_sess, NUM_LAYERS, state_space,
                            reg_param=REGULARIZATION,
                            exploration=EXPLORATION,
                            controller_cells=CONTROLLER_CELLS,
                            embedding_dim=EMBEDDING_DIM,
                            restore_controller=RESTORE_CONTROLLER)

# create the Network Manager
manager = NetworkManager(dataset, epochs=MAX_EPOCHS, child_batchsize=CHILD_BATCHSIZE, clip_rewards=CLIP_REWARDS,
                         acc_beta=ACCURACY_BETA)

# get an initial random state space if controller needs to predict an
# action from the initial state
state = state_space.get_random_state_space(NUM_LAYERS)
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()

# clear the previous files
controller.remove_files()

# train for number of trails
for trial in range(MAX_TRIALS):
    with policy_sess.as_default():
        K.set_session(policy_sess)
        actions = controller.get_action(state)  # get an action for the previous state

    # print the action probabilities
    state_space.print_actions(actions)
    print("Predicted actions : ", state_space.parse_state_space_list(actions))
예제 #4
0
    controller = Controller(policy_sess,
                            NUM_LAYERS,
                            state_space,
                            reg_param=REGULARIZATION,
                            exploration=EXPLORATION,
                            controller_cells=CONTROLLER_CELLS,
                            embedding_dim=EMBEDDING_DIM,
                            restore_controller=RESTORE_CONTROLLER)

# create the Manager
manager = RewardManager(clip_rewards=CLIP_REWARDS, acc_beta=ACCURACY_BETA)

# get an initial random state space if controller needs to predict an
# action from the initial state
state = state_space.get_random_state_space(NUM_LAYERS)
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()

# train for number of trails
for trial in range(MAX_TRIALS):
    with policy_sess.as_default():
        K.set_session(policy_sess)
        actions = controller.get_action(
            state)  # get an action for the previous state
        # if trial == 3:
        #     ini_action = actions
        # if trial == 3005:
        #     final_action = actions

    # print the action probabilities
    state_space.print_actions(actions)
예제 #5
0
with policy_sess.as_default():
    # create the Controller and build the internal policy network
    controller = Controller(policy_sess, NUM_LAYERS, state_space,
                            reg_param=REGULARIZATION,
                            exploration=EXPLORATION,
                            controller_cells=CONTROLLER_CELLS,
                            restore_controller=RESTORE_CONTROLLER)

# create the Network Manager
manager = NetworkManager(dataset, epochs=MAX_EPOCHS, batchsize=BATCHSIZE, clip_rewards=CLIP_REWARDS)

# get an initial random state space if controller needs to predict an
# action from the initial state
state = state_space.get_random_state_space(NUM_LAYERS)
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()

# train for number of trails
for trial in range(MAX_TRIALS):
    with policy_sess.as_default():
        K.set_session(policy_sess)
        actions = controller.get_action(state)  # get an action for the previous state

    # print the action probabilities
    state_space.print_actions(actions)
    print("Predicted actions : ", state_space.parse_state_space_list(actions))

    # build a model, train and get reward and accuracy from the network manager
    reward, previous_acc = manager.get_rewards(model_fn, state_space.parse_state_space_list(actions))
    print("Rewards : ", reward, "Accuracy : ", previous_acc)
예제 #6
0
                            exploration=EXPLORATION,
                            controller_cells=CONTROLLER_CELLS,
                            embedding_dim=EMBEDDING_DIM,
                            restore_controller=RESTORE_CONTROLLER)

# create the Network Manager
manager = NetworkManager(dataset,
                         epochs=MAX_EPOCHS,
                         child_batchsize=CHILD_BATCHSIZE,
                         clip_rewards=CLIP_REWARDS,
                         acc_beta=ACCURACY_BETA)

# get an initial random state space if controller needs to predict an
# action from the initial state
state = state_space.get_random_state_space(NUM_LAYERS)
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()

# clear the previous files
controller.remove_files()

old_acc = 0.7
ite_count = 0
acc_list = []
per_list = []
rew_list = []
old_action = []
old_action_cnt = 0
# train for number of trails
for trial in range(MAX_TRIALS):
    G = nx.DiGraph()
예제 #7
0
                            exploration=EXPLORATION,
                            controller_cells=CONTROLLER_CELLS,
                            embedding_dim=EMBEDDING_DIM,
                            restore_controller=RESTORE_CONTROLLER)

# create the Network Manager
manager = NetworkManager(dataset,
                         epochs=MAX_EPOCHS,
                         child_batchsize=CHILD_BATCHSIZE,
                         clip_rewards=CLIP_REWARDS,
                         acc_beta=ACCURACY_BETA)

# get an initial random state space if controller needs to predict an
# action from the initial state
state = state_space.get_random_state_space(NUM_LAYERS)
print("Initial Random State : ", state_space.parse_state_space_list(state))
print()

# clear the previous files
controller.remove_files()

best_acc = 0.0
best_state_space = []
# used to dedup action info
#action_history_dict = {}

start_time = datetime.datetime.now()
print(start_time.strftime('%H:%M:%S'))
# train for number of trails
for trial in range(MAX_TRIALS):
    with policy_sess.as_default():