def main(_): action_reward_dict = {} policy_sess = tf.Session() #K.set_session(policy_sess) manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) size = [len(LAYER_SIZES)] * NUM_LAYERS reward_space = np.zeros((size)) #print(reward_space.shape) for i in range(NUM_ENUM): for idx, val in np.ndenumerate(reward_space): action = [LAYER_SIZES[i] for i in idx] #print(action) with policy_sess.as_default(): _, acc = manager.get_rewards(model_fn, action) print(action, acc) acc = round(acc * JSON_SCALE, 2) action = str(tuple(action)) if action not in action_reward_dict: action_reward_dict[action] = [acc] else: action_reward_dict[action].append(acc) action_average_reward_dict = {} for k in action_reward_dict.keys(): action_average_reward_dict[k] = round(np.mean(action_reward_dict[k]), 2) with open('action_reward_dict.json', 'w') as f: json.dump(action_reward_dict, f) f.close() with open('action_average_reward_dict.json', 'w') as f: json.dump(action_average_reward_dict, f) f.close()
def main(_): CLIP_REWARDS = False value = [30,60,100,144] state_space = [v for v in itertools.product(value, repeat=3)] data = defaultdict(list) for itr in xrange(500): for state in state_space: states = list(state) manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) reward, previous_acc = manager.get_rewards(model_fn, states) previous_acc = round(previous_acc*100000,2) data[str(state)].append(previous_acc) with open('data.json', 'w') as outfile: json.dump(data, outfile)
def main(_): CLIP_REWARDS = False #filter_space = [v for v in itertools.product(filter_val, repeat=3)] #stride_space = [v for v in itertools.product(stride_val, repeat=3)] #kernal_space = [v for v in itertools.product(kernel_val, repeat=3)] data = defaultdict(list) with open('result.txt', 'w') as out: for state in states: print(state) manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) reward, previous_acc = manager.get_rewards(model_fn_cnn, state) previous_acc = round(previous_acc * 100000, 2) print(previous_acc) data[str(state)].append(previous_acc) out.write("{} {}\n".format(state, previous_acc)) with open('data.json', 'w') as outfile: json.dump(data, outfile)
def main(_): CLIP_REWARDS = False data = defaultdict(list) with open('main_result.txt', 'w') as out: for ite in xrange(3): ite += 1 print('outter iteration:', ite) iteration = 0 for state in states: iteration += 1 print(iteration, state) manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) reward, previous_acc = manager.get_rewards(model_fn_cnn, state) previous_acc = round(previous_acc * 100000, 2) print(previous_acc) data[str(state)].append(previous_acc) out.write("{} {}\n".format(state, previous_acc)) with open('main_sample.json', 'w') as outfile: json.dump(data, outfile)
def main(_): # create a shared session between Keras and Tensorflow policy_sess = tf.Session() K.set_session(policy_sess) NUM_LAYERS = 3 # number of layers of the state space MAX_TRIALS = 250 # maximum number of models generated MAX_EPOCHS = 60 # maximum number of epochs to train BATCHSIZE = 100 # batchsize EXPLORATION = 0.5 # high exploration for the first 1000 steps REGULARIZATION = 1e-3 # regularization strength CONTROLLER_CELLS = 32 # number of cells in RNN controller CLIP_REWARDS = False # clip rewards in the [-0.05, 0.05] range RESTORE_CONTROLLER = True # restore controller to continue training # construct a state space state_space = StateSpace() # add states #state_space.add_state(name='kernel', values=[3]) state_space.add_state(name='filters', values=[30, 60, 100, 144]) #state_space.add_state(name='stride', values=[1]) # print the state space being searched state_space.print_state_space() previous_acc = 0.0 total_reward = 0.0 with policy_sess.as_default(): # create the Controller and build the internal policy network controller = Controller(policy_sess, NUM_LAYERS, state_space, reg_param=REGULARIZATION, exploration=EXPLORATION, controller_cells=CONTROLLER_CELLS, restore_controller=RESTORE_CONTROLLER) print('done') # create the Network Manager manager = NetworkManager(FLAGS, clip_rewards=CLIP_REWARDS) # get an initial random state space if controller needs to predict an # action from the initial state state = state_space.get_random_state_space(NUM_LAYERS) print("Initial Random State : ", state_space.parse_state_space_list(state)) #print() # train for number of trails for trial in range(MAX_TRIALS): with policy_sess.as_default(): actions = controller.get_action( state) # get an action for the previous state # print the action probabilities state_space.print_actions(actions) print("Predicted actions : ", state_space.parse_state_space_list(actions)) # build a model, train and get reward and accuracy from the network manager reward, previous_acc = manager.get_rewards( model_fn_cnn, state_space.parse_state_space_list(actions)) print("Rewards : ", reward, "Accuracy : ", previous_acc) with policy_sess.as_default(): total_reward += reward print("Total reward : ", total_reward) # actions and states are equivalent, save the state and reward state = actions controller.store_rollout(state, reward) # train the controller on the saved state and the discounted rewards loss = controller.train_step() print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss)) # write the results of this trial into a file with open('train_history.csv', mode='a+') as f: data = [previous_acc, reward] data.extend(state_space.parse_state_space_list(state)) writer = csv.writer(f) writer.writerow(data) print() print("Total Reward : ", total_reward)
# clear the previous files controller.remove_files() # train for number of trails for trial in range(MAX_TRIALS): with policy_sess.as_default(): K.set_session(policy_sess) actions = controller.get_action( state) # get an action for the previous state # print the action probabilities state_space.print_actions(actions) print("Predicted actions : ", state_space.parse_state_space_list(actions)) # build a model, train and get reward and accuracy from the network manager reward, previous_acc = manager.get_rewards( model_fn, state_space.parse_state_space_list(actions)) print("Rewards : ", reward, "Accuracy : ", previous_acc) with policy_sess.as_default(): K.set_session(policy_sess) total_reward += reward print("Total reward : ", total_reward) # actions and states are equivalent, save the state and reward state = actions controller.store_rollout(state, reward) # train the controller on the saved state and the discounted rewards loss = controller.train_step() print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss))
else: k = K_ actions = controller.get_actions( top_k=k) # get all actions for the previous state rewards = [] for t, action in enumerate(actions): # print the action probabilities state_space.print_actions(action) print("Model #%d / #%d" % (t + 1, len(actions))) print("Predicted actions : ", state_space.parse_state_space_list(action)) # build a model, train and get reward and accuracy from the network manager reward = manager.get_rewards( model_fn, state_space.parse_state_space_list(action)) print("Final Accuracy : ", reward) rewards.append(reward) print("\nFinished %d out of %d models ! \n" % (t + 1, len(actions))) # write the results of this trial into a file with open('train_history.csv', mode='a+', newline='') as f: data = [reward] data.extend(state_space.parse_state_space_list(action)) writer = csv.writer(f) writer.writerow(data) with policy_sess.as_default(): K.set_session(policy_sess) # train the controller on the saved state and the discounted rewards
state = state_space.get_random_state_space(NUM_LAYERS) print("Initial Random State : ", state_space.parse_state_space_list(state)) print() # train for number of trails for trial in range(MAX_TRIALS): with policy_sess.as_default(): K.set_session(policy_sess) actions = controller.get_action(state) # get an action for the previous state # print the action probabilities state_space.print_actions(actions) print("Predicted actions : ", state_space.parse_state_space_list(actions)) # build a model, train and get reward and accuracy from the network manager reward, previous_acc = manager.get_rewards(model_fn, state_space.parse_state_space_list(actions)) print("Rewards : ", reward, "Accuracy : ", previous_acc) with policy_sess.as_default(): K.set_session(policy_sess) total_reward += reward print("Total reward : ", total_reward) # actions and states are equivalent, save the state and reward state = actions controller.store_rollout(state, reward) # train the controller on the saved state and the discounted rewards loss = controller.train_step() print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss))
k = None else: k = K actions = controller.get_actions( top_k=k) # get all actions for the previous state rewards = [] for t, action in enumerate(actions): # print the action probabilities state_space.print_actions(action) print("Model #%d / #%d" % (t + 1, len(actions))) print(" ", state_space.parse_state_space_list(action)) # build a model, train and get reward and accuracy from the network manager reward = manager.get_rewards( ModelGenerator, state_space.parse_state_space_list(action)) print("Final Accuracy : ", reward) rewards.append(reward) print("\nFinished %d out of %d models ! \n" % (t + 1, len(actions))) # write the results of this trial into a file with open('train_history.csv', mode='a+', newline='') as f: data = [reward] data.extend(state_space.parse_state_space_list(action)) writer = csv.writer(f) writer.writerow(data) loss = controller.train_step(rewards) print("Trial %d: ControllerManager loss : %0.6f" % (trial + 1, loss))
print("This is time to explore design", end - start, "\n") start = time.time() time_performance = M_SCHEDULE.schedule_run(layers, layersname) # We set performance according to the previous iteration #OPT_TIMEPERFORMANCE = min(time_performance,OPT_TIMEPERFORMANCE) end = time.time() print("PPPPPPPPPPPPPPPPPPPPP This is time performance", time_performance, "\n", "This is time to evaluate time formance", end - start, "\n", "\n", "\n") per_list.append(time_performance) # build a model, train and get reward and accuracy from the network manager reward, previous_acc = manager.get_rewards( model_fn, state_space.parse_state_space_list(actions), time_performance, OPT_TIMEPERFORMANCE) # CNN train and return the best accura print("Rewards : ", reward, "Accuracy : ", previous_acc) acc_list.append(previous_acc) rew_list.append(reward) print("===============+WWW+=================") print("++++++++++Acc History", acc_list) print("++++++++++Per History", per_list) print("++++++++++Rew History", rew_list) print("=====================================") #OPT_TIMEPERFORMANCE = min(time_performance*(1+(old_acc-previous_acc)),OPT_TIMEPERFORMANCE) old_acc = previous_acc with policy_sess.as_default(): K.set_session(policy_sess)
def main_training(): # Number of GPUs available. Use 0 for CPU mode. ngpu = torch.cuda.device_count() # Decide which device we want to run on device = torch.device("cuda:0" if ( torch.cuda.is_available() and ngpu > 0) else "cpu") train_loader, val_loader, test_loader = get_dataset( CHILD_BATCHSIZE, '/tmp/datasets/cifar-10') dataloaders = [train_loader, val_loader, test_loader] # construct a state space state_space = StateSpace() # add states state_space.add_state(name='kernel', values=[1, 3, 5, 7]) state_space.add_state(name='filters', values=[24, 36, 48, 64]) previous_acc = 0.0 total_reward = 0.0 controller = Controller(NUM_LAYERS, state_space) manager = NetworkManager(dataloaders, device, epochs=2) # get an initial random state space if controller needs to predict an action from the initial state state = state_space.get_random_state_space(NUM_LAYERS) print("Initial Random State : ", state_space.parse_state_space_list(state)) for trial in range(MAX_TRIALS): actions, prob_actions = controller.get_action( state) # get an action for the previous state # print the action probabilities state_space.print_actions(actions) print("Predicted actions : ", state_space.parse_state_space_list(actions)) # build a model, train and get reward and accuracy from the network manager model = ChildNetwork(state_space.parse_state_space_list(actions)) reward, previous_acc = manager.get_rewards(model) print("Rewards : ", reward, "Accuracy : ", previous_acc) total_reward += reward print("Total reward : ", total_reward) # actions and states are equivalent, save the state and reward state = actions prob_state = prob_actions controller.store_rollout(state, reward, prob_state) # train the controller on the saved state and the discounted rewards loss = controller.update_policy() print("Trial %d: Controller loss : %0.6f" % (trial + 1, loss)) # write the results of this trial into a file with open('train_history.csv', mode='a+') as f: data = [previous_acc, reward] data.extend(state_space.parse_state_space_list(state)) writer = csv.writer(f) writer.writerow(data) print() print("Total Reward : ", total_reward)