def central_agent(net_params_queues, exp_queues): assert len(net_params_queues) == NUM_AGENTS assert len(exp_queues) == NUM_AGENTS with tf.Session() as sess, open(LOG_FILE + '_test.txt', 'w') as test_log_file: actor = network.Network(sess, state_dim=S_DIM, action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=1000) # save neural net parameters # restore neural net parameters nn_model = NN_MODEL if nn_model is not None: # nn_model is the path to file saver.restore(sess, nn_model) print("Model restored.") # while True: # assemble experiences from agents, compute the gradients for epoch in range(TRAIN_EPOCH): # synchronize the network parameters of work agent actor_net_params = actor.get_network_params() for i in range(NUM_AGENTS): net_params_queues[i].put(actor_net_params) s, a, p, g = [], [], [], [] for i in range(NUM_AGENTS): s_, a_, p_, g_ = exp_queues[i].get() s += s_ a += a_ p += p_ g += g_ s_batch = np.stack(s, axis=0) a_batch = np.vstack(a) p_batch = np.vstack(p) v_batch = np.vstack(g) for _ in range(PPO_TRAINING_EPO): actor.train(s_batch, a_batch, p_batch, v_batch, epoch) if epoch % MODEL_SAVE_INTERVAL == 0: # Save the neural net parameters to disk. save_path = saver.save( sess, SUMMARY_DIR + "/nn_model_ep_" + str(epoch) + ".ckpt") testing(epoch, SUMMARY_DIR + "/nn_model_ep_" + str(epoch) + ".ckpt", test_log_file)
def agent(agent_id, net_params_queue, exp_queue): env = ABREnv(agent_id) with tf.Session() as sess, open( SUMMARY_DIR + '/log_agent_' + str(agent_id), 'w') as log_file: actor = network.Network(sess, state_dim=S_DIM, action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) # initial synchronization of the network parameters from the coordinator actor_net_params = net_params_queue.get() actor.set_network_params(actor_net_params) time_stamp = 0 for epoch in range(TRAIN_EPOCH): obs = env.reset() s_batch, a_batch, p_batch, r_batch = [], [], [], [] for step in range(TRAIN_SEQ_LEN): s_batch.append(obs) action_prob = actor.predict( np.reshape(obs, (1, S_DIM[0], S_DIM[1]))) #action_cumsum = np.cumsum(action_prob) #bit_rate = (action_cumsum > np.random.randint( # 1, RAND_RANGE) / float(RAND_RANGE)).argmax() # gumbel noise noise = np.random.gumbel(size=len(action_prob)) bit_rate = np.argmax(np.log(action_prob) + noise) obs, rew, done, info = env.step(bit_rate) action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 a_batch.append(action_vec) r_batch.append(rew) p_batch.append(action_prob) if done: break v_batch = actor.compute_v(s_batch, a_batch, r_batch, done) exp_queue.put([s_batch, a_batch, p_batch, v_batch]) actor_net_params = net_params_queue.get() actor.set_network_params(actor_net_params)
def main(): env = Env() with tf.Session() as sess: actor = network.Network(sess, state_dim=S_DIM, action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=1000) # save neural net parameters # restore neural net parameters nn_model = NN_MODEL if nn_model is not None: # nn_model is the path to file saver.restore(sess, nn_model) # print("Model restored.") time_stamp = 0 obs = env.reset() s_batch, a_batch, p_batch, r_batch = [], [], [], [] for step in range(TRAIN_SEQ_LEN): s_batch.append(obs) action_prob = actor.predict( np.reshape(obs, (-1, S_DIM[0], S_DIM[1]))) noise = np.random.gumbel(size=len(action_prob)) act = np.argmax(np.log(action_prob) + noise) obs, rew, done, info = env.step(act) print(0.14 + act / A_DIM * 0.4) action_vec = np.zeros(A_DIM) action_vec[act] = 1 a_batch.append(action_vec) r_batch.append(rew) p_batch.append(action_prob) if done: break
def central_agent(net_params_queues, exp_queues): assert len(net_params_queues) == NUM_AGENTS assert len(exp_queues) == NUM_AGENTS tf_config = tf.ConfigProto(intra_op_parallelism_threads=5, inter_op_parallelism_threads=5) with tf.Session(config=tf_config) as sess, open(LOG_FILE + '_test.txt', 'w') as test_log_file: summary_ops, summary_vars = build_summaries() actor = network.Network(sess, state_dim=S_DIM, action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph) # training monitor saver = tf.train.Saver(max_to_keep=1000) # save neural net parameters # restore neural net parameters nn_model = NN_MODEL if nn_model is not None: # nn_model is the path to file saver.restore(sess, nn_model) print("Model restored.") max_reward, max_epoch = -10000., 0 tick_gap = 0 # while True: # assemble experiences from agents, compute the gradients for epoch in range(TRAIN_EPOCH): # synchronize the network parameters of work agent actor_net_params = actor.get_network_params() for i in range(NUM_AGENTS): net_params_queues[i].put(actor_net_params) s, a, p, g = [], [], [], [] for i in range(NUM_AGENTS): s_, a_, p_, g_ = exp_queues[i].get() s += s_ a += a_ p += p_ g += g_ s_batch = np.stack(s, axis=0) a_batch = np.vstack(a) p_batch = np.vstack(p) v_batch = np.vstack(g) for _ in range(PPO_TRAINING_EPO): actor.train(s_batch, a_batch, p_batch, v_batch, epoch) # actor.train(s_batch, a_batch, v_batch, epoch) if epoch % MODEL_SAVE_INTERVAL == 0: # Save the neural net parameters to disk. save_path = saver.save( sess, SUMMARY_DIR + "/nn_model_ep_" + str(epoch) + ".ckpt") avg_reward, avg_entropy = testing( epoch, SUMMARY_DIR + "/nn_model_ep_" + str(epoch) + ".ckpt", test_log_file) if avg_reward > max_reward: max_reward = avg_reward max_epoch = epoch tick_gap = 0 else: tick_gap += 1 if tick_gap >= 10: # saver.restore(sess, SUMMARY_DIR + "/nn_model_ep_" + str(max_epoch) + ".ckpt") actor.set_entropy_decay() tick_gap = 0 summary_str = sess.run(summary_ops, feed_dict={ summary_vars[0]: actor.get_entropy(epoch), summary_vars[1]: avg_reward, summary_vars[2]: avg_entropy }) writer.add_summary(summary_str, epoch) writer.flush()
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( TEST_TRACES) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w') with tf.Session() as sess: actor = network.Network(sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # save neural net parameters # restore neural net parameters if NN_MODEL is not None: # NN_MODEL is the path to file saver.restore(sess, NN_MODEL) print("Testing model restored.") time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smoothness reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array( next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN))) print(np.round(action_prob, 2)) action_cumsum = np.cumsum(action_prob) bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() s_batch.append(state) entropy_record.append(0.) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w')