# clear the previous files controller.remove_files() isFirst = True # train for number of trails for trial in range(MAX_TRIALS): if isFirst: actions = state isFirst = False else: with policy_sess.as_default(): K.set_session(policy_sess) actions = controller.get_action( state) # get an action for the previous state # print the action probabilities state_space.print_actions(actions) print("Predicted actions : ", state_space.parse_state_space_list(actions)) # build a model, train and get reward and accuracy from the network manager #与子模型关联较弱,只需返回reward与accuracy即可;reward为accuracy指数滑动平均后的值 # if isFirst: # reward, previous_acc = manger.main(action=[1,'relu',1,'linear']) # isFirst = False # else: reward, previous_acc = manger.main( action=state_space.parse_state_space_list(actions)) # reward, previous_acc = (1,1) print("Rewards : ", reward, "Accuracy : ", previous_acc) with policy_sess.as_default():
def main(_): # create a shared session between Keras and Tensorflow policy_sess = tf.Session() K.set_session(policy_sess) NUM_LAYERS = 3 # number of layers of the state space MAX_TRIALS = 250 # maximum number of models generated MAX_EPOCHS = 60 # maximum number of epochs to train BATCHSIZE = 100 # batchsize EXPLORATION = 0.5 # high exploration for the first 1000 steps REGULARIZATION = 1e-3 # regularization strength CONTROLLER_CELLS = 32 # number of cells in RNN controller CLIP_REWARDS = False # clip rewards in the [-0.05, 0.05] range RESTORE_CONTROLLER = True # restore controller to continue training # construct a state space state_space = StateSpace() # add states #state_space.add_state(name='kernel', values=[3]) state_space.add_state(name='filters', values=[30, 60, 100, 144]) #state_space.add_state(name='stride', values=[1]) # print the state space being searched state_space.print_state_space() previous_acc = 0.0 total_reward = 0.0 with policy_sess.as_default(): # create the Controller and build the internal policy network controller = Controller(policy_sess, NUM_LAYERS, state_space, reg_param=REGULARIZATION, exploration=EXPLORATION, controller_cells=CONTROLLER_CELLS, restore_controller=RESTORE_CONTROLLER) print('done') # create the Network Manager manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) # get an initial random state space if controller needs to predict an # action from the initial state state = state_space.get_random_state_space(NUM_LAYERS) print("Initial Random State : ", state_space.parse_state_space_list(state)) #print() # train for number of trails for trial in range(MAX_TRIALS): with policy_sess.as_default(): actions = controller.get_action( state) # get an action for the previous state # print the action probabilities state_space.print_actions(actions) print("Predicted actions : ", state_space.parse_state_space_list(actions)) # build a model, train and get reward and accuracy from the network manager reward, previous_acc = manager.get_rewards( model_fn_cnn, state_space.parse_state_space_list(actions)) print("Rewards : ", reward, "Accuracy : ", previous_acc) with policy_sess.as_default(): total_reward += reward print("Total reward : ", total_reward) # actions and states are equivalent, save the state and reward state = actions controller.store_rollout(state, reward) # train the controller on the saved state and the discounted rewards loss = controller.train_step() print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss)) # write the results of this trial into a file with open('train_history.csv', mode='a+') as f: data = [previous_acc, reward] data.extend(state_space.parse_state_space_list(state)) writer = csv.writer(f) writer.writerow(data) print() print("Total Reward : ", total_reward)
manager = NetworkManager(dataset, epochs=MAX_EPOCHS, batchsize=BATCHSIZE, clip_rewards=CLIP_REWARDS) # get an initial random state space if controller needs to predict an # action from the initial state state = state_space.get_random_state_space(NUM_LAYERS) print("Initial Random State : ", state_space.parse_state_space_list(state)) print() # train for number of trails for trial in range(MAX_TRIALS): with policy_sess.as_default(): K.set_session(policy_sess) actions = controller.get_action(state) # get an action for the previous state # print the action probabilities state_space.print_actions(actions) print("Predicted actions : ", state_space.parse_state_space_list(actions)) # build a model, train and get reward and accuracy from the network manager reward, previous_acc = manager.get_rewards(model_fn, state_space.parse_state_space_list(actions)) print("Rewards : ", reward, "Accuracy : ", previous_acc) with policy_sess.as_default(): K.set_session(policy_sess) total_reward += reward print("Total reward : ", total_reward) # actions and states are equivalent, save the state and reward state = actions controller.store_rollout(state, reward)