args = parser.parse_args() arch = args.architecture.replace("\'", "\"") print(">>> arch received by trial") print(arch) nn_config = args.nn_config.replace("\'", "\"") print(">>> nn_config received by trial") print(nn_config) num_epochs = args.num_epochs print(">>> num_epochs received by trial") print(num_epochs) print(">>> Constructing Model...") constructor = ModelConstructor(arch, nn_config) test_model = constructor.build_model() print(">>> Model Constructed Successfully") test_model.summary() test_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=1e-3, decay=1e-4), metrics=['accuracy']) (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype('float32') x_test = x_test.astype('float32') x_train /= 255 x_test /= 255 y_train = to_categorical(y_train) y_test = to_categorical(y_test)
def train(self): if not os.path.exists('weights/'): os.makedirs('weights/') previous_acc = 0.0 total_reward = 0.0 policy_sess = tf.Session() K.set_session(policy_sess) with policy_sess.as_default(): # create the Controller and build the internal policy network controller = Controller(policy_sess, self.num_conv_layers, self.state_space, reg_param=self.regularization, exploration=self.exploration, controller_cells=self.num_control_cells, embedding_dim=self.embedding_dim, restore_controller=self.restore_model) # create the Network Manager manager = NetworkManager(self. dataset, epochs=self.num_epochs, child_batchsize=self.batch_size, clip_rewards=self.clip_rewards, acc_beta=self.accuracy_beta) # create the Model Constructor constructor = ModelConstructor(self.input_size, self.output_size, self.num_conv_layers, self.num_dense_layers, self.dense_config) # get an initial random state space if controller needs to predict an # action from the initial state state = self.state_space.init_random_states(self.num_conv_layers) print state print("Initial Random State : ", self.state_space.parse_state_space_list(state)) print() # clear the previous files controller.remove_files() # train for number of trails for trial in range(self.num_trials): with policy_sess.as_default(): K.set_session(policy_sess) actions = controller.get_action(state) # get an action for the previous state # print the action probabilities self.state_space.print_actions(actions) print("Predicted actions: ", self.state_space.parse_state_space_list(actions, local=True)) # build a model, train and get reward and accuracy from the network manager reward, previous_acc = manager.get_rewards(constructor, self.state_space.parse_state_space_list(actions, local=True)) print("Rewards: ", reward, "Accuracy: ", previous_acc) with policy_sess.as_default(): K.set_session(policy_sess) total_reward += reward print("Total reward: ", total_reward) # actions and states are equivalent, save the state and reward first_state = state[0] state = actions controller.store_rollout(state, reward) # train the controller on the saved state and the discounted rewards loss = controller.train_step(first_state) print("Trial %d: Controller loss: %0.6f" % (trial + 1, loss)) print() print("Total Reward: ", total_reward)