def load_environment(self): DEBUG = False random_seed = 2 # Control the subdirectory where log files will be stored. LOG_FILE_PATH = './log/' # create result directory if not os.path.exists(LOG_FILE_PATH): os.makedirs(LOG_FILE_PATH) # NETWORK_TRACE = 'fixed' VIDEO_TRACE = 'AsianCup_China_Uzbekistan' # network_trace_dir = './dataset/network_trace/' + NETWORK_TRACE + '/' # all_cooked_time, all_cooked_bw, self.all_file_names = load_trace.load_trace(network_trace_dir) network_trace = ['fixed', 'high'] video_trace_prefix = './dataset/video_trace/' + VIDEO_TRACE + '/frame_trace_' network_trace_dir_list = ['./dataset/network_trace/' + trace + '/' for trace in network_trace] all_cooked_time, all_cooked_bw, self.all_file_names = load_trace.load_trace_list(network_trace_dir_list) self.net_env = fixed_env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, random_seed=random_seed, logfile_path=LOG_FILE_PATH, VIDEO_SIZE_FILE=video_trace_prefix, Debug=DEBUG) # 视频相关的环境初始化,把load的所有的网络的数据输进去 return
def testing(tabular_q, epoch): os.system('rm -r ' + TEST_LOG_FOLDER) os.system('mkdir ' + TEST_LOG_FOLDER) all_cooked_time, all_cooked_bw, all_file_names = \ load_trace.load_trace('./cooked_test_traces/') test_net_env = fixed_env.Environment( all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = TEST_LOG_FOLDER + 'log_' + all_file_names[test_net_env.trace_idx] log_file = open(log_path, 'wb') time_stamp = 0 video_count = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY state = [0, 0, 0, 0] while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ test_net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smoothness reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K last_bit_rate = bit_rate log_file.write(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() bw = float(video_chunk_size) / float(delay) / M_IN_K * BITS_IN_BYTE # Mbit/sec bw = min(int(bw / D_BW) * D_BW, BW_MAX) bf = min(int(buffer_size / D_BF) * D_BF, BF_MAX) br = bit_rate c = min(video_chunk_remain, N_CHUNK - 1) state = [bw, bf, br, c] bit_rate = tabular_q.get_q_action(state, deterministic=True) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here state = [0, 0, 0, 0] video_count += 1 if video_count >= len(all_file_names): break log_path = TEST_LOG_FOLDER + 'log_' + all_file_names[test_net_env.trace_idx] log_file = open(log_path, 'wb') with open(TEST_LOG_PATH, 'ab') as log_file: # append test performance to the log rewards = [] test_log_files = os.listdir(TEST_LOG_FOLDER) for test_log_file in test_log_files: reward = [] with open(TEST_LOG_FOLDER + test_log_file, 'rb') as f: for line in f: parse = line.split() try: reward.append(float(parse[-1])) except IndexError: break rewards.append(np.sum(reward[1:])) rewards = np.array(rewards) rewards_min = np.min(rewards) rewards_5per = np.percentile(rewards, 5) rewards_mean = np.mean(rewards) rewards_median = np.percentile(rewards, 50) rewards_95per = np.percentile(rewards, 95) rewards_max = np.max(rewards) log_file.write(str(epoch) + '\t' + str(rewards_min) + '\t' + str(rewards_5per) + '\t' + str(rewards_mean) + '\t' + str(rewards_median) + '\t' + str(rewards_95per) + '\t' + str(rewards_max) + '\n') log_file.flush()
def main(): # utility_offset = -math.log(VIDEO_BIT_RATE[0]) # so utilities[0] = 0 # utilities = [math.log(b) + utility_offset for b in VIDEO_BIT_RATE] np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, _ = load_trace.load_trace() load_trace.plot_bandwidth(all_cooked_time, all_cooked_bw, _) if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) with tf.Session() as sess, open(LOG_FILE, 'w') as log_file: actor = a3c.ActorNetwork(sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) critic = a3c.CriticNetwork(sess, state_dim=[S_INFO, S_LEN], learning_rate=CRITIC_LR_RATE) summary_ops, summary_vars = a3c.build_summaries() sess.run(tf.global_variables_initializer()) writer = tf.summary.FileWriter(SUMMARY_DIR, sess.graph) # training monitor saver = tf.train.Saver() # save neural net parameters # restore neural net parameters nn_model = NN_MODEL if nn_model is not None: # nn_model is the path to file saver.restore(sess, nn_model) print("Model restored.") epoch = 0 time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] actor_gradient_batch = [] critic_gradient_batch = [] while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_counter,throughput,video_chunk_remain = \ net_env.get_video_chunk(bit_rate) #print(net_env.get_video_chunk(bit_rate)) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smooth penalty reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # print(state) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array( next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) # print('state',state) action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN))) action_cumsum = np.cumsum(action_prob) rand = np.random.randint(1, RAND_RANGE) / float(RAND_RANGE) print(action_cumsum, action_cumsum > rand, (action_cumsum > rand).argmax()) # print(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)) # print(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() #compute Vp and map bitrate # bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() Vp_index = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() Vp = BUFFER_PARAMETER[Vp_index] # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states config = { 'buffer_size': env.BUFFER_THRESH, 'gp': GP, 'Vp': Vp, 'abr_osc': False, 'abr_basic': False, 'no_ibr': False } bola = get_bitrate.Bola(config=config) bit_rate = bola.get_quality( Vp, buffer_size * env.MILLISECONDS_IN_SECOND, last_bit_rate, throughput) #决策前的信息 print( '[%d]:download time %.2fms,thrput=%.2f,chunk size %d,buffer=%.2fs,bitrate=%d' % (video_chunk_counter, throughput, delay, video_chunk_size, buffer_size, last_bit_rate)) entropy_record.append(a3c.compute_entropy(action_prob[0])) # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() if len(r_batch ) >= TRAIN_SEQ_LEN or end_of_video: # do training once actor_gradient, critic_gradient, td_batch = \ a3c.compute_gradients(s_batch=np.stack(s_batch[1:], axis=0), # ignore the first chuck a_batch=np.vstack(a_batch[1:]), # since we don't have the r_batch=np.vstack(r_batch[1:]), # control over it terminal=end_of_video, actor=actor, critic=critic) td_loss = np.mean(td_batch) actor_gradient_batch.append(actor_gradient) critic_gradient_batch.append(critic_gradient) print("====") print("Epoch", epoch) print("TD_loss", td_loss, "Avg_reward", np.mean(r_batch), "Avg_entropy", np.mean(entropy_record)) print("====") summary_str = sess.run(summary_ops, feed_dict={ summary_vars[0]: td_loss, summary_vars[1]: np.mean(r_batch), summary_vars[2]: np.mean(entropy_record) }) writer.add_summary(summary_str, epoch) writer.flush() entropy_record = [] if len(actor_gradient_batch) >= GRADIENT_BATCH_SIZE: assert len(actor_gradient_batch) == len( critic_gradient_batch) # assembled_actor_gradient = actor_gradient_batch[0] # assembled_critic_gradient = critic_gradient_batch[0] # assert len(actor_gradient_batch) == len(critic_gradient_batch) # for i in xrange(len(actor_gradient_batch) - 1): # for j in xrange(len(actor_gradient)): # assembled_actor_gradient[j] += actor_gradient_batch[i][j] # assembled_critic_gradient[j] += critic_gradient_batch[i][j] # actor.apply_gradients(assembled_actor_gradient) # critic.apply_gradients(assembled_critic_gradient) for i in range(len(actor_gradient_batch)): actor.apply_gradients(actor_gradient_batch[i]) critic.apply_gradients(critic_gradient_batch[i]) actor_gradient_batch = [] critic_gradient_batch = [] epoch += 1 if epoch % MODEL_SAVE_INTERVAL == 0: # Save the neural net parameters to disk. save_path = saver.save( sess, SUMMARY_DIR + "/nn_model_ep_" + str(epoch) + ".ckpt") print("Model saved in file: %s" % save_path) del s_batch[:] del a_batch[:] del r_batch[:] if end_of_video: last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) else: s_batch.append(state) action_vec = np.zeros(A_DIM) # print(bit_rate) action_vec[bit_rate] = 1 a_batch.append(action_vec)
def test(user_id,ABR_NAME_,QoE_,NETWORK_TRACE_,VIDEO_TRACE_): #1 Algorithm Setting: RBA, BBA, DYNAMIC, PDDQN, Pensieve ABR_NAME = ABR_NAME_ #2 QoE Setting: ar, al, hd, b, max QoE = QoE_ #3 Network Dataset: high, medium, low, fixed NETWORK_TRACE = NETWORK_TRACE_ #4 Video Dataset: AsianCup_China_Uzbekistan, Fengtimo_2018_11_3, YYF_2018_08_12 VIDEO_TRACE = VIDEO_TRACE_ model_name = "" if ABR_NAME == 'BBA': import BBA as ABR if ABR_NAME == 'RBA': import RBA as ABR if ABR_NAME == 'DYNAMIC': import DYNAMIC as ABR if ABR_NAME == 'PDDQN': model_name = "./PDDQN_models/PDDQN_b/" import PDDQN_ as ABR if ABR_NAME == 'PDDQN-R': model_name = "./PDDQN_models/"+QoE+'/' import PDDQN_R as ABR if ABR_NAME == 'Pensieve': model_name = "./Pensieve_models/"+QoE+'/' import Pensieve as ABR SMOOTH_PENALTY = 0 REBUF_PENALTY = 0.0 LANTENCY_PENALTY = 0.0 SKIP_PENALTY = 0.0 BITRATE_REWARD = 0.0 if QoE == 'al': SMOOTH_PENALTY = 0.01 REBUF_PENALTY = 1.5 LANTENCY_PENALTY = 0.01 BITRATE_REWARD = 0.001 SKIP_PENALTY = 1 if QoE == 'ar': SMOOTH_PENALTY = 0.0 REBUF_PENALTY = 3 LANTENCY_PENALTY = 0.0 BITRATE_REWARD = 0.001 SKIP_PENALTY = 0.0 if QoE == 'b': SMOOTH_PENALTY = 0.02 REBUF_PENALTY = 1.5 LANTENCY_PENALTY = 0.005 BITRATE_REWARD = 0.001 SKIP_PENALTY = 0.5 if QoE == 'hd': SMOOTH_PENALTY = 0.0 REBUF_PENALTY = 0.5 LANTENCY_PENALTY = 0.0 BITRATE_REWARD = 0.001 SKIP_PENALTY = 0.0 if QoE == 'max': SMOOTH_PENALTY = 0 REBUF_PENALTY = 0.0 LANTENCY_PENALTY = 0.0 SKIP_PENALTY = 0.0 BITRATE_REWARD = 0.001 FILE_NAME = './'+'result/'+QoE+'_'+NETWORK_TRACE+'_'+VIDEO_TRACE+'.csv' else: FILE_NAME = './'+'result/'+ABR_NAME+'_'+QoE+'_'+NETWORK_TRACE+'_'+VIDEO_TRACE+'.csv' FILE_NAME = './' + 'result/Startup/' + NETWORK_TRACE +'/'+ABR_NAME+ '/QoE.csv' out = open(FILE_NAME, 'w', newline='') w = csv.writer(out) DEBUG = False LOG_FILE_PATH = './log/' # create result directory if not os.path.exists(LOG_FILE_PATH): os.makedirs(LOG_FILE_PATH) # -- End Configuration -- network_trace_dir = './dataset/new_network_trace/' + NETWORK_TRACE + '/' video_trace_prefix = './dataset/video_trace/' + VIDEO_TRACE + '/frame_trace_' # load the trace all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(network_trace_dir) start_avgbw = (sum(all_cooked_bw[0][0:10])/10) *1000 # random_seed random_seed = 2 count = 0 trace_count = 1 FPS = 25 frame_time_len = 0.04 reward_all_sum = 0 run_time = 0 net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, random_seed=random_seed, logfile_path=LOG_FILE_PATH, VIDEO_SIZE_FILE=video_trace_prefix, Debug=DEBUG) abr = ABR.Algorithm() abr_init = abr.Initial(model_name) BIT_RATE = [500.0, 850.0, 1200.0, 1850.0] # kpbs TARGET_BUFFER = [0.5,0.75,1,1.25] # seconds # ABR setting RESEVOIR = 0.5 CUSHION = 2 cnt = 0 # defalut setting last_bit_rate = 0 bit_rate = 0 target_buffer = 0 latency_limit = 4 # reward setting reward_frame = 0 reward_all = 0 # past_info setting past_frame_num = 200 S_time_interval = [0] * past_frame_num S_send_data_size = [0] * past_frame_num S_chunk_len = [0] * past_frame_num S_rebuf = [0] * past_frame_num S_buffer_size = [0] * past_frame_num S_end_delay = [0] * past_frame_num S_chunk_size = [0] * past_frame_num S_play_time_len = [0] * past_frame_num S_decision_flag = [0] * past_frame_num S_buffer_flag = [0] * past_frame_num S_cdn_flag = [0] * past_frame_num S_skip_time = [0] * past_frame_num # params setting call_time_sum = 0 reward_chunk = 0 while True: reward_frame = 0 time, time_interval, send_data_size, chunk_len, \ rebuf, buffer_size, play_time_len, end_delay, \ cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \ buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer, latency_limit) # S_info is sequential order S_time_interval.pop(0) S_send_data_size.pop(0) S_chunk_len.pop(0) S_buffer_size.pop(0) S_rebuf.pop(0) S_end_delay.pop(0) S_play_time_len.pop(0) S_decision_flag.pop(0) S_buffer_flag.pop(0) S_cdn_flag.pop(0) S_skip_time.pop(0) S_time_interval.append(time_interval) S_send_data_size.append(send_data_size) S_chunk_len.append(chunk_len) S_buffer_size.append(buffer_size) S_rebuf.append(rebuf) S_end_delay.append(end_delay) S_play_time_len.append(play_time_len) S_decision_flag.append(decision_flag) S_buffer_flag.append(buffer_flag) S_cdn_flag.append(cdn_flag) S_skip_time.append(skip_frame_time_len) # QOE setting # if end_delay <= 1.0: # LANTENCY_PENALTY = 0.005 # else: # LANTENCY_PENALTY = 0.01 if not cdn_flag: reward_frame = frame_time_len * float(BIT_RATE[ bit_rate]) * BITRATE_REWARD - REBUF_PENALTY * rebuf - LANTENCY_PENALTY * end_delay - SKIP_PENALTY * skip_frame_time_len else: reward_frame = -(REBUF_PENALTY * rebuf) if decision_flag or end_of_video: reward_frame += -1 * SMOOTH_PENALTY * (abs(BIT_RATE[bit_rate] - BIT_RATE[last_bit_rate]) / 1000) reward_chunk += reward_frame w.writerow([ABR_NAME,reward_chunk]) reward_chunk = 0 last_bit_rate = bit_rate # ----------------- Your Algorithm --------------------- cnt += 1 timestamp_start = tm.time() bit_rate, target_buffer, latency_limit = abr.run(time, S_time_interval, S_send_data_size, S_chunk_len, S_rebuf, S_buffer_size, S_play_time_len, S_end_delay, S_decision_flag, S_buffer_flag, S_cdn_flag, S_skip_time, end_of_video, cdn_newest_id, download_id, cdn_has_frame, abr_init, start_avgbw) start_avgbw = -1 timestamp_end = tm.time() call_time_sum += timestamp_end - timestamp_start # -------------------- End -------------------------------- else: reward_chunk += reward_frame if end_of_video: break # print("network traceID, network_reward, avg_running_time", trace_count, reward_all, call_time_sum / cnt) reward_all = reward_all/cnt reward_all_sum += reward_all run_time += call_time_sum / cnt if trace_count >= len(all_file_names): break trace_count += 1 cnt = 0 call_time_sum = 0 last_bit_rate = 0 reward_all = 0 bit_rate = 0 target_buffer = 0 S_time_interval = [0] * past_frame_num S_send_data_size = [0] * past_frame_num S_chunk_len = [0] * past_frame_num S_rebuf = [0] * past_frame_num S_buffer_size = [0] * past_frame_num S_end_delay = [0] * past_frame_num S_chunk_size = [0] * past_frame_num S_play_time_len = [0] * past_frame_num S_decision_flag = [0] * past_frame_num S_buffer_flag = [0] * past_frame_num S_cdn_flag = [0] * past_frame_num reward_all += reward_frame return [reward_all_sum / trace_count, run_time / trace_count]
saver1.restore(sess, nn_model) print("Model restored.") chunk_reward = 0 for i_eps in range(50): video_count = 0 is_first = True video_id = i_eps % 5 VIDEO_TRACE = VIDEO_TRACE_list[video_id] video_trace_prefix = './dataset/video_trace/' + VIDEO_TRACE + '/frame_trace_' all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( network_trace_dir) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, random_seed=random_seed, logfile_path=LOG_FILE_PATH, VIDEO_SIZE_FILE=video_trace_prefix, Debug=DEBUG) pre_ac = 0 while True: timestamp_start = tm.time() reward_frame = 0 time, time_interval, send_data_size, frame_time_len, \ rebuf, buffer_size, play_time_len, end_delay, \ cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \ buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer, latency_limit) # QOE setting if end_delay <= 1.0: LANTENCY_PENALTY = 0.005
def main(): torch.set_num_threads(1) np.random.seed(RANDOM_SEED) torch.manual_seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( TEST_TRACES) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w') # all models have same actor network # so model_type can be anything net = ActorNetwork([S_INFO, S_LEN], A_DIM) # restore neural net parameters net.load_state_dict(torch.load(ACTOR_MODEL)) print("Testing model restored.") time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY video_count = 0 state = torch.zeros((S_INFO, S_LEN)) weights = np.array([0.2, 0.3, 0.5]) while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms w1 = weights[0] w2 = weights[1] w3 = weights[2] reward = w1 * VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - w2 * REBUF_PENALTY * rebuf \ - w3 * SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state state = torch.roll(state, -1, dims=-1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = torch.tensor( next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = min( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) with torch.no_grad(): probability = net.forward(state.unsqueeze(0)) m = Categorical(probability) bit_rate = m.sample().item() # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states if end_of_video: weights = np.random.randn(3) # Normalization weights = np.abs(weights) / np.linalg.norm(weights, ord=1) log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here state = torch.zeros((S_INFO, S_LEN)) video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w')
def TestRun(sess, actor, critic, epoch): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM net_env = env.Environment() time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] reward_sum_all = [] reward_video_all = [] reward_sum_per_video = [] reward_mean_cur = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real assert bit_rate >= 0 assert bit_rate < A_DIM bitrate_send_last, lossrate_recv_last, bitrate_real_recovery, \ bitrate_send_last_probe, lossrate_recv_last_probe, bitrate_real_recovery_probe, \ end_of_video, end_of_validation \ = net_env.action_dispatch_and_report_svr(VIDEO_BIT_RATE[bit_rate]) time_stamp += 2 # in ms # reward is video quality - rebuffer penalty - smoothness reward = bitrate_real_recovery / M_IN_K # 0.1 0.2 ... 1.1 1.2 r_batch.append(reward) last_bit_rate = bit_rate reward_sum_per_video.append(reward) # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = bitrate_send_last / 1000.0 # last quality state[1, -1] = lossrate_recv_last # 丢包率0.1 0.2 0.3 0.4 state[2, -1] = bitrate_real_recovery / 1000.0 # kilo byte / ms state = np.roll(state, -1, axis=1) state[0, -1] = bitrate_send_last_probe / 1000.0 # last quality state[1, -1] = lossrate_recv_last_probe # 丢包率0.1 0.2 0.3 0.4 state[2, -1] = bitrate_real_recovery_probe / 1000.0 # kilo byte / ms state[3, :A_DIM] = np.array(VIDEO_BIT_RATE[:]) / 1000.0 # kilo byte / ms state[4, -1] = bitrate_send_last / 1000.0 # kilo byte / ms action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN))) # log_file.write('action_prob: '+ str(action_prob)+'\n') action_cumsum = np.cumsum(action_prob) # log_file.write('action_cumsum: ' + str(action_cumsum)+'\n') random_value = np.random.randint(1, RAND_RANGE) / float(RAND_RANGE) decision_arrary = (action_cumsum > random_value) bit_rate = decision_arrary.argmax() # log_file.write('decision: ' + str(bit_rate) + ' random value: ' + str(random_value) + ' decision_arrary: ' + str(decision_arrary)+'\n') # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states s_batch.append(state) entropy_record.append(a3c.compute_entropy(action_prob[0])) if end_of_video: last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here reward_sum_all.append(reward_sum_per_video[1:]) video_reward_sum = np.sum(reward_sum_per_video[1:]) reward_video_all.append(video_reward_sum) meanvalue = np.mean(reward_sum_per_video) stdvalue = np.mean(reward_sum_per_video) del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] reward_sum_per_video = [] # print "video count", video_count, 'video_reward_sum:%.3f', video_reward_sum, ' meanvalue:', meanvalue, ' stdvalue:',stdvalue # print ("video count: %d video_reward_sum:%.3f meanvalue:%.3f stdvalue:%.3f"%(video_count, video_reward_sum, meanvalue, stdvalue)) if end_of_validation: mean_all_video_reward = np.mean(reward_video_all) sum_all_video_reward = np.sum(reward_video_all) std_all_video_reward = np.std(reward_video_all) reward_mean_cur = mean_all_video_reward # print ("video total count: %d reward_sum:%.3f reward_mean:%.3f reward_std:%.3f" % ( # video_count, sum_all_video_reward, mean_all_video_reward, std_all_video_reward)) print 'epoch:', epoch, ' reward_mean: ', reward_mean_cur break return reward_mean_cur
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(TEST_TRACES) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') with tf.Session() as sess: actor = a3c.ActorNetwork(sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) critic = a3c.CriticNetwork(sess, state_dim=[S_INFO, S_LEN], learning_rate=CRITIC_LR_RATE) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # save neural net parameters # restore neural net parameters if NN_MODEL is not None: # NN_MODEL is the path to file saver.restore(sess, NN_MODEL) print("Testing model restored.") time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smoothness reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN))) action_cumsum = np.cumsum(action_prob) bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax() # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states DECISIONS.append(bit_rate) s_batch.append(state) entropy_record.append(a3c.compute_entropy(action_prob[0])) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') print "Decisions: {}".format(Counter(DECISIONS))
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace() net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') epoch = 0 time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY r_batch = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file_write = (str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.write(log_file_write.encode('ANSI')) log_file.flush() if buffer_size < RESEVOIR: bit_rate = 0 elif buffer_size >= RESEVOIR + CUSHION: bit_rate = A_DIM - 1 else: bit_rate = (A_DIM - 1) * (buffer_size - RESEVOIR) / float(CUSHION) bit_rate = int(bit_rate) if end_of_video: log_file.write('\n'.encode('ANSI')) log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here r_batch = [] print("video count: " + str(video_count)) video_count += 1 if video_count > len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb')
def main(): # check the constant defination is valid or not assert len(bitRatesOptions) == bitRatesTypes # load the traces allCookedTime, allCookedBW, allFileNames = load_trace.load_trace() # set the environment netEnvironment = env.Environment(all_cooked_time=allCookedTime, all_cooked_bw=allCookedBW) # open the output log file to write outputFileName = outputFilePrefix + "_" + allFileNames[netEnvironment.trace_idx] outputFilePointer = open(outputFileName, "wb") # initial the local variables timeStamp = 0 lastBitRateOption = defaultBitRateOption currentBitRateOption = defaultBitRateOption videoCount = 0 historyState = np.zeros((stateInfoLength, pastFramesLength)) # initial the look up table initialLookUpTable() # computing kernel: while True: # get the video chunk according to the current bitrate option assert currentBitRateOption >= 0 delay, sleepTime, currentBufferSize, rebuffer, currentVideoChunkSize, \ nextVideoChunkSize, endFlag, chunkRemainCount = netEnvironment.get_video_chunk(currentBitRateOption) # update the time stamp because of the delay and sleeping time timeStamp += delay + sleepTime # ms # calculate the reward value according to the formula qualityValue = bitRatesOptions[currentBitRateOption] / bitsFactor # kb to Mb smoothValue = np.abs(bitRatesOptions[currentBitRateOption] \ - bitRatesOptions[lastBitRateOption]) / bitsFactor rewardValue = qualityValue \ - rebufferFactor * rebuffer \ - smoothFactor * smoothValue # write the output file outputItemStr = str(timeStamp / millsecondsPerSecond) + '\t' \ + str(bitRatesOptions[currentBitRateOption]) + '\t' \ + str(currentBufferSize) + '\t' \ + str(rebuffer) + '\t' \ + str(currentVideoChunkSize) + '\t' \ + str(delay) + '\t' \ + str(rewardValue) + '\n' outputFilePointer.write(outputItemStr.encode('utf-8')) outputFilePointer.flush() # update the bit rate option lastBitRateOption = currentBitRateOption # update the history state information like a sliding window historyState = np.roll(historyState, -1, axis=1) historyState[0, -1] = bitRatesOptions[currentBitRateOption] / float(maxBitRate) historyState[1, -1] = currentBufferSize / bufferNormFactor historyState[2, -1] = rebuffer historyState[3, -1] = float(currentVideoChunkSize) / float(delay) / bitsFactor historyState[4, -1] = np.minimum(chunkRemainCount, defaultChunkCountToEnd) / float(defaultChunkCountToEnd) # MPC kernel begin # calculate the normaliztion estimated error of bandwidth currentError = 0. if(len(pastBWEsts) > 0): currentError = abs(pastBWEsts[-1] - historyState[3, -1]) / float(historyState[3, -1]) pastErrors.append(currentError) # calculate the harmonic mean of last 5 history bandwidths # Step 1: collect the last 5 history bandwidths pastRealBWArray = historyState[3, -5:] while pastRealBWArray[0] == 0.0: pastRealBWArray = pastRealBWArray[1:] # Step 2: calculate the harmonic mean pastRealBWSum = 0.0 for pastRealBWItems in pastRealBWArray: pastRealBWSum += (1 / float(pastRealBWItems)) harmonicBW = 1.0 / (pastRealBWSum / len(pastRealBWArray)) # calculate the predicted future bandwidth according to the est. error and harmonic mean errorIndex = min(5, len(pastErrors)) maxError = float(max(pastErrors[-errorIndex:])) currentPredBW = harmonicBW / (1 + maxError) pastBWEsts.append(currentPredBW) # use the predicted bandwidth and the next chunk size to calculate the estimated download time allDownloadTime = [] for option in range(0, bitRatesTypes): allDownloadTime.append((float(nextVideoChunkSize[option]) / (bitsFactor * bitsFactor))/ currentPredBW) finalOption = Decision(currentBufferSize, allDownloadTime[0], currentBitRateOption) currentBitRateOption = finalOption assert finalOption >= 0 if endFlag: outputFilePointer.write("\n".encode('utf-8')) outputFilePointer.close() lastBitRateOption = defaultBitRateOption currentBitRateOption = defaultBitRateOption historyState = np.zeros((stateInfoLength, pastFramesLength)) print("video count", videoCount) videoCount += 1 if videoCount >= len(allFileNames): break outputFileName = outputFilePrefix + "_" + allFileNames[netEnvironment.trace_idx] outputFilePointer = open(outputFileName, "wb")
def main(): os.system('rm -r ' + TEST_LOG_FOLDER) os.system('mkdir ' + TEST_LOG_FOLDER) np.random.seed(RANDOM_SEED) all_user_pos, all_file_names = load_trace.load_trace(TEST_TRACES) net_env = fixed_env.Environment(all_user_pos=all_user_pos) log_path = TEST_LOG_FOLDER + 'log_sim_rl_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') with tf.Session() as sess: actor = a3c.ActorNetwork(sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) critic = a3c.CriticNetwork(sess, state_dim=[S_INFO, S_LEN], learning_rate=CRITIC_LR_RATE) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # save neural net parameters # restore neural net parameters if NN_MODEL is not None: # NN_MODEL is the path to file saver.restore(sess, NN_MODEL) print("Testing model restored.") # initializing association = one_hot().T num_shared = 50 trace_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real channel_gain, num_user_bs, rate, end_of_trace = \ net_env.scheduling_and_association(association, num_shared) reward = np.mean(np.log(rate)) # log time_stamp, bit_rate, buffer_size, reward log_file.write(str(reward) + '\n') log_file.flush() state_p1 = (channel_gain-np.mean(channel_gain.reshape((-1))))/(np.std(channel_gain.reshape((-1)))+1e-6) state_p2 = ((num_user_bs-np.mean(num_user_bs))/(np.std(num_user_bs)+1e-6)).reshape((7,1)) #state = np.concatenate([state_p1,state_p2],axis = 1) # state shape (7, 91) state = state_p1 # compute action probability vector action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN))) action = epsilon_greedy(action_prob, 0) # set epsilon to zero when testing association, num_shared = rl_scheduling(channel_gain, action) if end_of_trace: print all_file_names[net_env.trace_idx-1],net_env.scheduling_ptr,'number of shared subchannels:', num_shared, 'SINR threshold:', BETA_SET[np.argmax(action[K_DIM:A_DIM])] #plot_cellular_network(net_env.macrocell, net_env.picocells, net_env.current_user_pos, association) log_file.write('\n') log_file.close() association = one_hot().T num_shared = 50 trace_count += 1 if trace_count >= len(all_file_names): break log_path = TEST_LOG_FOLDER + 'log_sim_rl_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') # append test performance to the log with open(LOG_FILE + '_rl_test', 'ab') as log_file: rewards = [] test_log_files = os.listdir(TEST_LOG_FOLDER) for test_log_file in test_log_files: reward = [] with open(TEST_LOG_FOLDER + test_log_file, 'rb') as f: for line in f: parse = line.split() try: reward.append(float(parse[0])) except IndexError: break rewards.append(np.sum(reward[1:])) rewards = np.array(rewards) rewards_min = np.min(rewards) rewards_5per = np.percentile(rewards, 5) rewards_mean = np.mean(rewards) rewards_median = np.percentile(rewards, 50) rewards_95per = np.percentile(rewards, 95) rewards_max = np.max(rewards) log_file.write(str(rewards_min) + '\t' + str(rewards_5per) + '\t' + str(rewards_mean) + '\t' + str(rewards_median) + '\t' + str(rewards_95per) + '\t' + str(rewards_max) + '\n') log_file.flush() print 'testing results' + '\t average rewards: ' + str(rewards_mean)
def main(): run_id = 0 np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) if not os.path.exists(TRANS_DIR): os.makedirs(TRANS_DIR) all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(cooked_trace_folder=TRACE_DIR) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id) log_file = open(log_path, 'wb') trans_path = TRANS_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id) trans_file = open(trans_path, 'wb') model = abr_agent_sim.discrete_BCQ() time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smoothness reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] old_state = np.zeros((S_INFO, S_LEN), dtype=np.float64) else: state = np.array(s_batch[-1], copy=True) old_state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) bit_rate = model.select_action(np.reshape(state, (-1))) action_prob = np.zeros((len(VIDEO_BIT_RATE)), dtype=np.float64) action_prob[int(bit_rate)] = 1.0 send_data = str(bit_rate) trans_file.write('|'.join([str(list(old_state.reshape(-1))), str(list(action_prob.reshape(-1))), str(list(state.reshape(-1))), str(reward), str(send_data)])) trans_file.write('\n') trans_file.flush() s_batch.append(state) if end_of_video: log_file.write('\n') log_file.close() trans_file.write('\n') trans_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) print "video count", video_count video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id) log_file = open(log_path, 'wb') trans_path = TRANS_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id) trans_file = open(trans_path, 'wb')
def train(epoch, train_trace): # path setting TRAIN_TRACES = train_trace video_size_file = './dataset/video_trace/sports/frame_trace_' # video trace path setting, LogFile_Path = "./log/" # log file trace path setting, # load the trace all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( TRAIN_TRACES) # random_seed random_seed = 2 video_count = 0 frame_time_len = 0.04 reward_all_sum = 0 # init the environment net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, random_seed=random_seed, logfile_path=LogFile_Path, VIDEO_SIZE_FILE=video_size_file, Debug=False) BIT_RATE = [500.0, 850.0, 1200.0, 1850.0] # kpbs # ABR setting cnt = 0 # defalut setting bit_rate = 0 last_bit_rate = 0 target_buffer = 1 latency_limit = 7 # QOE setting reward_frame = 0 reward_all = 0 reward = 0 SMOOTH_PENALTY = 0.01 REBUF_PENALTY = 1.5 LANTENCY_PENALTY = 0.01 BITRATE_REWARD = 0.001 SKIP_PENALTY = 1 switch_num = 0 rebuf_time = 0 buffer_flag = 0 cdn_flag = 0 S_time_interval = [0] * 100 S_send_data_size = [0] * 100 S_buffer_size = [0] * 100 S_end_delay = [0] * 100 S_rebuf = [0] * 100 flag = False n = 0 mark = 0 marks = 0 while True: if len(agent.memory) > BATCH_SIZE and cnt % 1000 == 0: agent.replay(BATCH_SIZE) reward_frame = 0 time, time_interval, send_data_size, chunk_len, \ rebuf, buffer_size, play_time_len, end_delay, \ cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \ buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer, latency_limit) cnt += 1 S_time_interval.append(time_interval) S_time_interval.pop(0) S_buffer_size.append(buffer_size) S_buffer_size.pop(0) S_send_data_size.append(send_data_size) S_send_data_size.pop(0) S_end_delay.append(end_delay) S_end_delay.pop(0) S_rebuf.append(rebuf) S_rebuf.pop(0) # # QOE setting # if end_delay <= 1.0: # LANTENCY_PENALTY = 0.005 # else: # LANTENCY_PENALTY = 0.01 if not cdn_flag: reward_frame = frame_time_len * float( BIT_RATE[bit_rate] ) * BITRATE_REWARD - REBUF_PENALTY * rebuf - LANTENCY_PENALTY * end_delay - SKIP_PENALTY * skip_frame_time_len else: reward_frame = -(REBUF_PENALTY * rebuf) rebuf_time += rebuf n += 1 reward += reward_frame if decision_flag and not end_of_video: reward_frame = -1 * SMOOTH_PENALTY * ( abs(BIT_RATE[bit_rate] - BIT_RATE[last_bit_rate]) / 1000) last_bit_rate = bit_rate reward += reward_frame length = len(S_buffer_size) if flag: next_state = [] for i in S_buffer_size[length - history_len:]: next_state.append(i * 0.1) for i in S_send_data_size[length - history_len:]: next_state.append(i * 0.00001) for i in S_time_interval[length - history_len:]: next_state.append(i * 10) for i in S_end_delay[length - history_len:]: next_state.append(i * 0.1) for i in S_rebuf[length - history_len:]: next_state.append(i) marks += 1 if (n >= history_len - 40): next_state = np.reshape(next_state, [1, STATE_SIZE]) agent.remember(state, action, reward, next_state, done) reward = 0 else: mark += 1 n = 0 flag = True state = [] for i in S_buffer_size[length - history_len:]: state.append(i * 0.1) for i in S_send_data_size[length - history_len:]: state.append(i * 0.00001) for i in S_time_interval[length - history_len:]: state.append(i * 10) for i in S_end_delay[length - history_len:]: state.append(i * 0.1) for i in S_rebuf[length - history_len:]: state.append(i) state = np.reshape(state, [1, STATE_SIZE]) action = agent.act(state) bit_rate = ACTION_SAPCE[action][0] target_buffer = ACTION_SAPCE[action][1] latency_limit = ACTION_SAPCE[action][2] switch_num = 0 rebuf_time = 0 reward_all += reward_frame if end_of_video: agent.update_target_model() # Narrow the range of results print("video count", video_count, reward_all, mark, marks) reward_all_sum += reward_all / 20 video_count += 1 if video_count >= len(all_file_names): agent.save("save/" + str(epoch) + ".h5") break reward_all = 0 bit_rate = 0 target_buffer = 1 S_time_interval = [0] * 100 S_send_data_size = [0] * 100 S_buffer_size = [0] * 100 S_end_delay = [0] * 100 S_rebuf = [0] * 100 rebuf_time = 0 buffer_flag = 0 cdn_flag = 0 reward = 0 flag = False n = 0 mark = 0 marks = 0 return reward_all_sum
def main(): for num_shared in range(5, 100, 10): for beta in range(-6, 14, 2): #num_shared = 55 #beta = 2 print "num_shared, beta: ", num_shared, beta os.system('rm -r ' + TEST_LOG_FOLDER) os.system('mkdir ' + TEST_LOG_FOLDER) np.random.seed(RANDOM_SEED) all_user_pos, all_file_names = load_trace.load_trace(TEST_TRACES) net_env = fixed_env.Environment(all_user_pos=all_user_pos) log_path = TEST_LOG_FOLDER + 'log_sim_pf_' + all_file_names[ net_env.trace_idx] log_file = open(log_path, 'wb') association = one_hot().T trace_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real channel_gain, num_user_bs, rate, end_of_trace = \ net_env.scheduling_and_association(association, num_shared) reward = np.mean(np.log(rate)) # log time_stamp, bit_rate, buffer_size, reward log_file.write(str(reward) + '\n') log_file.flush() association = picocell_first(channel_gain, num_shared, beta) if end_of_trace: #plot_cellular_network(net_env.macrocell, net_env.picocells, net_env.current_user_pos, association) log_file.write('\n') log_file.close() association = one_hot().T print "trace_count", trace_count, all_file_names[ net_env.trace_idx] trace_count += 1 if trace_count >= len(all_file_names): break log_path = TEST_LOG_FOLDER + 'log_sim_pf_' + all_file_names[ net_env.trace_idx] log_file = open(log_path, 'wb') # append test performance to the log with open(LOG_FILE + '_test', 'ab') as log_file: rewards = [] test_log_files = os.listdir(TEST_LOG_FOLDER) for test_log_file in test_log_files: reward = [] with open(TEST_LOG_FOLDER + test_log_file, 'rb') as f: for line in f: parse = line.split() try: reward.append(float(parse[0])) except IndexError: break rewards.append(np.sum(reward[1:])) rewards = np.array(rewards) rewards_min = np.min(rewards) rewards_5per = np.percentile(rewards, 5) rewards_mean = np.mean(rewards) rewards_median = np.percentile(rewards, 50) rewards_95per = np.percentile(rewards, 95) rewards_max = np.max(rewards) log_file.write( str(num_shared) + '\t' + str(beta) + '\t' + str(rewards_mean) + '\n') ''' log_file.write(str(num_shared) + '\t' + str(beta) + '\t' + str(rewards_min) + '\t' + str(rewards_5per) + '\t' + str(rewards_mean) + '\t' + str(rewards_median) + '\t' + str(rewards_95per) + '\t' + str(rewards_max) + '\n') ''' log_file.flush() print 'testing results' + '\t average rewards: ' + str( rewards_mean)
def main(self, args, net_env=None, policy=None): np.random.seed(RANDOM_SEED) viper_flag = True assert len(VIDEO_BIT_RATE) == A_DIM log_f = LOG_FILE if net_env is None: viper_flag = False all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( args.traces) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) # if args.update: # log_f = log_f.replace('dt', 'du') if not viper_flag and args.log: log_path = LOG_FILE + '_' + net_env.all_file_names[ net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY s_batch = [np.zeros((S_INFO, S_LEN))] # a_batch = np.zeros((TOTAL_VIDEO_CHUNKS, 3)) r_batch = [] rollout = [] video_count = 0 reward_sum = 0 in_compute = [] # load dt policy if policy is None: with open(args.dt, 'rb') as f: policy = pk.load(f) policy = fsm.FSM(policy) # ========= @ zili: debug ======== # with open('decision_tree_ready/robustmpc_norway_500.pk3', 'rb') as f: # baseline = pk.load(f) while True: # serve video forever delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \ video_chunk_remain = net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms reward = get_reward(bit_rate, rebuf, last_bit_rate, args.qoe_metric) r_batch.append(reward) reward_sum += reward last_bit_rate = bit_rate if args.log: # log time_stamp, bit_rate, buffer_size, reward log_file.write( bytes(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n', encoding='utf-8')) log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR state[2, -1] = rebuf state[3, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[4, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) # state[5: 10, :] = future_chunk_sizes / M_IN_K / M_IN_K serialized_state = serial(state) bit_rate = int(policy.predict([serialized_state])[0]) rollout.append((state, bit_rate, serialized_state)) s_batch.append(state) # ======== @ zili: debug ======== # if video_chunk_remain > 0: # a_batch[TOTAL_VIDEO_CHUNKS - video_chunk_remain][0] = bit_rate # a_batch[TOTAL_VIDEO_CHUNKS - video_chunk_remain][2] = int(baseline.predict([serialized_state])[0]) # if args.update: # chunk_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain) # policy.chunk_leaf[chunk_index] = policy.tree.apply(np.array(serialized_state).reshape(1, -1)) # if chunk_index < CHUNK_TIL_VIDEO_END_CAP - HORIZON: # in_compute.append(fsm.Trajectory(chunk_index, max(0, bit_rate - 1), buffer_size - CHUNK_LEN, # last_bit_rate, state, args)) # in_compute.append(fsm.Trajectory(chunk_index, bit_rate, buffer_size - CHUNK_LEN, # last_bit_rate, state, args)) # in_compute.append(fsm.Trajectory(chunk_index, min(5, bit_rate + 1), buffer_size - CHUNK_LEN, # last_bit_rate, state, args)) # # for traj in in_compute: # this_chunk_size = video_chunk_size # this_delay = delay # while True: # if traj.apply(this_chunk_size, this_delay) == CHUNK_SWITCH: # new_bitrate = int(policy.predict(np.array(serial(traj.states)).reshape(1, -1))[0]) # traj.next_chunk(new_bitrate) # this_chunk_size, this_delay = traj.trans_msg # else: # break # # while len(in_compute) > 1 and in_compute[0].end and in_compute[1].end and in_compute[2].end: # r_below = sum([get_reward(in_compute[0].quality[i], in_compute[0].rebuf[i], # in_compute[0].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) # r_normal = sum([get_reward(in_compute[1].quality[i], in_compute[1].rebuf[i], # in_compute[1].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) # r_above = sum([get_reward(in_compute[2].quality[i], in_compute[2].rebuf[i], # in_compute[2].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) # if r_above == max(r_below, r_normal, r_above): # policy.update(in_compute[0].chunk_index, 1) # # a_batch[in_compute[0].chunk_index][1] = in_compute[0].chunk_init_bitrate # elif r_normal == max(r_below, r_normal, r_above): # policy.update(in_compute[0].chunk_index, -1) # # a_batch[in_compute[1].chunk_index][1] = in_compute[1].chunk_init_bitrate # else: # policy.update(in_compute[0].chunk_index, 0) # # a_batch[in_compute[2].chunk_index][1] = in_compute[2].chunk_init_bitrate # # in_compute.pop(0) # in_compute.pop(0) # in_compute.pop(0) if end_of_video: # print(a_batch) if args.log: log_file.write(bytes('\n', encoding='utf-8')) log_file.close() print("video count", video_count) last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here r_batch = [] in_compute = [] if viper_flag: return rollout else: video_count += 1 if video_count >= len(net_env.all_file_names): break if args.log: log_path = log_f + '_' + net_env.all_file_names[ net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') return reward_sum
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace() net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w') # epoch = 0 time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY r_batch = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # buffer based # if buffer_size < RESEVOIR: # bit_rate = 0 # elif buffer_size >= RESEVOIR + CUSHION: # bit_rate = A_DIM - 1 # else: # bit_rate = (A_DIM - 1) * (buffer_size - RESEVOIR) / float(CUSHION) # bola utils = [ np.log(s / next_video_chunk_sizes[-1]) for s in next_video_chunk_sizes ] V = 5.2 # control parameter p = 4 # chunk size in seconds gamma = 5.0 / p Q = buffer_size # Q is buffer size score = [] for i in range(A_DIM): score.append((V * utils[i] + V * gamma * p - buffer_size) / next_video_chunk_sizes[i]) bit_rate = np.argmax(score) bit_rate = int(bit_rate) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here r_batch = [] print("video count", video_count) video_count += 1 if video_count > len(all_file_names): break # stop test break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w')
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace() net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] video_count = 0 # make chunk combination options for combo in itertools.product([0, 1, 2, 3, 4, 5], repeat=5): CHUNK_COMBO_OPTIONS.append(combo) while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K # log scale reward # log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[0])) # log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0])) # reward = log_bit_rate \ # - REBUF_PENALTY * rebuf \ # - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate) # reward = BITRATE_REWARD[bit_rate] \ # - 8 * rebuf - np.abs(BITRATE_REWARD[bit_rate] - BITRATE_REWARD[last_bit_rate]) r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR state[2, -1] = rebuf state[3, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[4, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) # state[5: 10, :] = future_chunk_sizes / M_IN_K / M_IN_K # ================== MPC ========================= curr_error = 0 # defualt assumes that this is the first request so error is 0 since we have never predicted bandwidth if (len(past_bandwidth_ests) > 0): curr_error = abs(past_bandwidth_ests[-1] - state[3, -1]) / float( state[3, -1]) past_errors.append(curr_error) # pick bitrate according to MPC # first get harmonic mean of last 5 bandwidths past_bandwidths = state[3, -5:] while past_bandwidths[0] == 0.0: past_bandwidths = past_bandwidths[1:] #if ( len(state) < 5 ): # past_bandwidths = state[3,-len(state):] #else: # past_bandwidths = state[3,-5:] bandwidth_sum = 0 for past_val in past_bandwidths: bandwidth_sum += (1 / float(past_val)) harmonic_bandwidth = 1.0 / (bandwidth_sum / len(past_bandwidths)) # future bandwidth prediction # divide by 1 + max of last 5 (or up to 5) errors max_error = 0 error_pos = -5 if (len(past_errors) < 5): error_pos = -len(past_errors) max_error = float(max(past_errors[error_pos:])) future_bandwidth = harmonic_bandwidth / (1 + max_error ) # robustMPC here past_bandwidth_ests.append(harmonic_bandwidth) # future chunks length (try 4 if that many remaining) last_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain) future_chunk_length = MPC_FUTURE_CHUNK_COUNT if (TOTAL_VIDEO_CHUNKS - last_index < 5): future_chunk_length = TOTAL_VIDEO_CHUNKS - last_index # all possible combinations of 5 chunk bitrates (9^5 options) # iterate over list and for each, compute reward and store max reward combination max_reward = -100000000 best_combo = () start_buffer = buffer_size #start = time.time() for full_combo in CHUNK_COMBO_OPTIONS: combo = full_combo[0:future_chunk_length] # calculate total rebuffer time for this combination (start with start_buffer and subtract # each download time and add 2 seconds in that order) curr_rebuffer_time = 0 curr_buffer = start_buffer bitrate_sum = 0 smoothness_diffs = 0 last_quality = int(bit_rate) for position in range(0, len(combo)): chunk_quality = combo[position] index = last_index + position + 1 # e.g., if last chunk is 3, then first iter is 3+0+1=4 download_time = ( get_chunk_size(chunk_quality, index) / 1000000.) / future_bandwidth # this is MB/MB/s --> seconds if (curr_buffer < download_time): curr_rebuffer_time += (download_time - curr_buffer) curr_buffer = 0 else: curr_buffer -= download_time curr_buffer += 4 bitrate_sum += VIDEO_BIT_RATE[chunk_quality] smoothness_diffs += abs(VIDEO_BIT_RATE[chunk_quality] - VIDEO_BIT_RATE[last_quality]) # bitrate_sum += BITRATE_REWARD[chunk_quality] # smoothness_diffs += abs(BITRATE_REWARD[chunk_quality] - BITRATE_REWARD[last_quality]) last_quality = chunk_quality # compute reward for this combination (one reward per 5-chunk combo) # bitrates are in Mbits/s, rebuffer in seconds, and smoothness_diffs in Mbits/s reward = (bitrate_sum / 1000.) - ( REBUF_PENALTY * curr_rebuffer_time) - (smoothness_diffs / 1000.) # reward = bitrate_sum - (8*curr_rebuffer_time) - (smoothness_diffs) if (reward >= max_reward): if (best_combo != ()) and best_combo[0] < combo[0]: best_combo = combo else: best_combo = combo max_reward = reward # send data to html side (first chunk of best combo) send_data = 0 # no combo had reward better than -1000000 (ERROR) so send 0 if (best_combo != ()): # some combo was good send_data = best_combo[0] bit_rate = send_data # hack # if bit_rate == 1 or bit_rate == 2: # bit_rate = 0 # ================================================ # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states s_batch.append(state) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] print "video count", video_count video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb')
def main(): args = parser.parse_args() if args.lin: qoe_metric = 'results_lin' elif args.log: qoe_metric = 'results_log' else: print('Please select the QoE Metric!') if args.FCC: dataset = 'fcc' elif args.HSDPA: dataset = 'HSDPA' elif args.Oboe: dataset = 'Oboe' else: print('Please select the dataset!') dataset_path = './traces_' + dataset + '/' Log_file_path = './' + qoe_metric + '/' + dataset + '/log_sim_rb' np.random.seed(RANDOM_SEED) # if not os.path.exists(SUMMARY_DIR): # os.makedirs(SUMMARY_DIR) all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( dataset_path) past_bandwidths = np.zeros(6) opt_ptr = 0 net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = Log_file_path + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # current_psnr = DEFAULT_PSNR # last_psnr = DEFAULT_PSNR video_count = 0 while True: # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) # throuput_e = np.roll(throuput_e, -1) # throuput_e[-1] = float(video_chunk_size) / float(delay) * M_IN_K # byte/s # while throuput_e[0] == 0.0: # throuput_e = throuput_e[1:] # bandwidth_sum = 0 # for past_val in throuput_e: # bandwidth_sum += (1/float(past_val)) # harmonic_bandwidth = 1.0/(bandwidth_sum/len(throuput_e)) # throuput_a = harmonic_bandwidth past_bandwidths = np.roll(past_bandwidths, -1) past_bandwidths[-1] = float(video_chunk_size) / float( delay) * M_IN_K # byte/s while past_bandwidths[0] == 0.0: past_bandwidths = past_bandwidths[1:] curr_error = 0 # defualt assumes that this is the first request so error is 0 since we have never predicted bandwidth if (len(past_bandwidth_ests) > 0): curr_error = abs(past_bandwidth_ests[-1] - past_bandwidths[-1]) / float(past_bandwidths[-1]) past_errors.append(curr_error) # pick bitrate according to MPC # first get harmonic mean of last 5 bandwidths # if ( len(state) < 5 ): # past_bandwidths = state[3,-len(state):] # else: # past_bandwidths = state[3,-5:] bandwidth_sum = 0 for past_val in past_bandwidths: bandwidth_sum += (1 / float(past_val)) harmonic_bandwidth = 1.0 / (bandwidth_sum / len(past_bandwidths)) # future bandwidth prediction # divide by 1 + max of last 5 (or up to 5) errors max_error = 0 error_pos = -5 if (len(past_errors) < 5): error_pos = -len(past_errors) max_error = float(max(past_errors[error_pos:])) future_bandwidth = harmonic_bandwidth / (1 + max_error ) # robustMPC here past_bandwidth_ests.append(harmonic_bandwidth) chunksize_min = next_video_chunk_sizes[0] time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty if qoe_metric == 'results_lin': REBUF_PENALTY = 4.3 reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K else: REBUF_PENALTY = 2.66 log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[0])) log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0])) reward = log_bit_rate \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate) last_bit_rate = bit_rate ## last_psnr = current_psnr # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() all_reward = [] all_quality_tuple = [] ptr = 0 # RB-algorithm bit_rate = 0 for q in xrange(5, -1, -1): next_size = next_video_chunk_sizes[q] if next_size / future_bandwidth - (buffer_size) <= 0: bit_rate = q break #next_psnr = next_chunk_psnr[q] # if throuput_a * 2 < next_size: # reward = 0 # else: # reward = VIDEO_BIT_RATE[q] / M_IN_K \ # - REBUF_PENALTY * np.maximum(next_size/future_bandwidth - buffer_size, 0) \ # - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[q] - # VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K # log_bit_rate = np.log(VIDEO_BIT_RATE[q] / float(VIDEO_BIT_RATE[0])) # log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0])) # reward = log_bit_rate \ # - REBUF_PENALTY * np.maximum(next_size/future_bandwidth - buffer_size, 0) \ # - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate) # all_reward.append(reward) # all_quality_tuple.append(q) # ptr += 1 # all_reward = np.asarray(all_reward) # if all_reward.all() == 0 : # bit_rate = 0 # #current_psnr = next_chunk_psnr[bit_rate] # else: # opt_ptr = all_reward.argmax() # bit_rate = all_quality_tuple[opt_ptr] #current_psnr = next_chunk_psnr[bit_rate] if end_of_video: log_file.write('\n') log_file.close() # bit_rate = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here #current_psnr = DEFAULT_PSNR del past_bandwidth_ests[:] print "video count", video_count video_count += 1 if video_count >= len(all_file_names): break log_path = Log_file_path + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb')
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace() net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') with torch.no_grad(): model = a3c.ActorCritic(state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=[ACTOR_LR_RATE, CRITIC_LR_RATE],islstm = islstm) nn_model = NN_MODEL if nn_model is not None: # nn_model is the path to file model.load_state_dict(torch.load(nn_model, map_location=torch.device('cpu'))) print("Model restored.") state = torch.zeros(S_INFO, S_LEN) time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = torch.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [torch.zeros(S_INFO, S_LEN)] a_batch = [action_vec] r_batch = [] entropy_record = [] video_count = 0 cx = torch.zeros(1, 128) hx = torch.zeros(1, 128) while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smoothness reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write((str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n').encode("utf-8")) log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [torch.zeros((S_INFO, S_LEN))] state = torch.roll(state, -1) # Fill in the state vector with normalization state[0, -1] = torch.Tensor([VIDEO_BIT_RATE[last_bit_rate] / float(max(VIDEO_BIT_RATE))]) # last quality state[1, -1] = torch.Tensor([buffer_size / BUFFER_NORM_FACTOR]) # buffer size state[2, -1] = torch.Tensor([float(video_chunk_size) / float(delay) / M_IN_K]) # kilo byte / ms state[3, -1] = torch.Tensor([float(delay) / M_IN_K / BUFFER_NORM_FACTOR]) # /10 sec state[4, :A_DIM] = torch.Tensor([next_video_chunk_sizes]) / M_IN_K / M_IN_K # mega byte # remaining chunk number state[5, -1] = torch.Tensor([min(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)]) if islstm == 0: logits, value = model(state.unsqueeze(dim=0)) else: logits, value, hx, cx = model((state.unsqueeze(dim=0),hx,cx)) # print(f"index {index}, state {state}, logits {logits}, value {value}",sep="\n") # print(state,logits) try: cate = Categorical(logits) bit_rate = cate.sample().item() except Exception as e: print(e) print(f"walking into an error of all null distribution") print(logits, state) exit() policy = logits log_policy = torch.log(logits) entropy = (policy * log_policy).sum(1, keepdim=True) s_batch.append(state) entropy_record.append(entropy) if end_of_video: log_file.write('\n'.encode("utf-8")) log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] cx = cx.detach() hx = hx.detach() action_vec = torch.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(torch.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] print ("video count", video_count) video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb')
def main(): np.random.seed(RANDOOM_SEED) assert len(TILES_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_throughput_trace.load_throughput_trace( ) all_cooked_tiles = load_fov_traces.load_fov_traces() all_tile_chunk_video_size = load_tile_chunk_video_size.load_tile_chunk_video_size( ) net_env = env.Environment( all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_cooked_tiles=all_cooked_tiles, all_tile_chunk_video_size=all_tile_chunk_video_size) log_path = LOG_FILE + '_' + all_file_names[net_env.bw_trace_idx] log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] video_count = 0 # make chunk combination options # this combo is used for future optimization for combo in itertools.product(list(range(MPC_FUTURE_CHUNK_COUNT + 1)), repeat=MPC_FUTURE_CHUNK_COUNT): CHUNK_COMBO_OPTIONS.append(combo) # print(combo) while True: # serve video forever # the action is from the last decision if net_env.video_chunk_counter == 0: print(all_file_names[net_env.bw_trace_idx]) delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, \ end_of_video, video_chunk_remain, \ video_chunk_quality, \ basic_video_chunk_quality, \ highest_video_chunk_quality = \ net_env.fetch_video_chunk(bit_rate) time_stamp += delay # ms time_stamp += sleep_time # in ms # initialize the last_video chunk quality if (net_env.video_chunk_counter == 1): last_video_chunk_quality = video_chunk_quality # reward is video quality - rebuffer_penalty reward = video_chunk_quality / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(last_video_chunk_quality - video_chunk_quality) / M_IN_K # log scale reward # log_chunk_quality = np.log(video_chunk_quality / float(basic_video_chunk_quality)) # log_last_chunk_quality = np.log(last_video_chunk_quality / float(basic_video_chunk_quality)) # reward = log_chunk_quality \ # - REBUF_PENALTY * rebuf \ # - SMOOTH_PENALTY * np.abs( log_chunk_quality - log_last_chunk_quality) r_batch.append(reward) last_video_chunk_quality = video_chunk_quality # log time_stamp, video_chunk_quality, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + # unit: sec str(video_chunk_quality) + '\t' + # unit: Kbps str(buffer_size) + '\t' + # unit: sec str(rebuf) + '\t' + # unit: sec str(video_chunk_size) + '\t' + # unit: Bytes str(delay) + '\t' + # unit: ms str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # each row left-shift one # this should be S_INFO number of terms state[0, -1] = video_chunk_quality / float( highest_video_chunk_quality) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR state[2, -1] = rebuf state[3, -1] = float(video_chunk_size) / float(delay) / M_IN_K # Mbyte/s state[4, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) # ===================================MPC =================================================== curr_error = 0 # default assumes that this is the first request so error is 0 since we have never predicted bandwidth if (len(past_bandwidth_ests) > 0): curr_error = abs(past_bandwidth_ests[-1] - state[3, -1]) / float( state[3, -1]) past_errors.append(curr_error) # pick bitrate according to MPC # first get harmonic mean of last n bandwidths past_bandwidths = state[3, -PAST_BW_TO_PREDICT:] # cut the meaning throughput while past_bandwidths[0] == 0.0: past_bandwidths = past_bandwidths[1:] bandwidth_sum = 0 for past_val in past_bandwidths: bandwidth_sum += (1 / float(past_val)) harmonic_bandwidth = 1.0 / (bandwidth_sum / len(past_bandwidths)) # future bandwidth prediction # divide by (1+max) of last PAST_BW_TO_PREDICT max_error = 0 error_pos = -PAST_BW_TO_PREDICT if (len(past_errors) < PAST_BW_TO_PREDICT): error_pos = -len(past_errors) max_error = float(max(past_errors[error_pos:])) future_bandwidth = harmonic_bandwidth / (1 + max_error ) # robustMPC here past_bandwidth_ests.append(harmonic_bandwidth) # future chunks length last_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain - 1) future_chunk_length = MPC_FUTURE_CHUNK_COUNT # if future chunk num less than PAST_BW_TO_PREDICT if TOTAL_VIDEO_CHUNKS - 1 - last_index < MPC_FUTURE_CHUNK_COUNT: future_chunk_length = int(TOTAL_VIDEO_CHUNKS - last_index - 1) # all possible combinations of MPC_FUTURE_CHUNK_COUNT chunk video qualitys # iterate over list and for each, compute reward and store max reward combination max_reward = -100000000 best_combo = () start_buffer = buffer_size for full_combo in CHUNK_COMBO_OPTIONS: combo = full_combo[0:future_chunk_length] curr_rebuffer_time = 0 curr_buffer = start_buffer quality_sum = 0 smoothness_diffs = 0 last_quality = video_chunk_quality for position in range(0, len(combo)): chunk_quality = combo[position] index = last_index + position + 1 # decide all LT or only FoV if curr_buffer <= BUFFER_THRESH_FOV: curr_video_chunk_size, curr_video_chunk_quality = net_env.get_video_chunk_size_quality( quality_in_fov=chunk_quality, quality_out_fov=-1, chunk_index=index) else: curr_video_chunk_size, curr_video_chunk_quality = net_env.get_video_chunk_size_quality( quality_in_fov=0, quality_out_fov=0, chunk_index=index) download_time = (curr_video_chunk_size / 1000000.0) / future_bandwidth if curr_buffer < download_time: curr_rebuffer_time += (download_time - curr_buffer) curr_buffer = 0 else: curr_buffer -= download_time curr_buffer += VIDEO_CHUNK_LEN quality_sum += curr_video_chunk_quality smoothness_diffs += abs(curr_video_chunk_quality - last_quality) last_quality = curr_video_chunk_quality reward = quality_sum/1000.0 \ - REBUF_PENALTY * curr_rebuffer_time \ - SMOOTH_PENALTY * smoothness_diffs / 1000.0 if reward >= max_reward: if best_combo != () and best_combo[0] < combo[0]: best_combo = combo else: best_combo = combo max_reward = reward send_data = 0 if best_combo != (): send_data = best_combo[0] bit_rate = send_data s_batch.append(state) # ===================================MPC end ================================================== if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] print("trace count" + str(video_count)) video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.bw_trace_idx] log_file = open(log_path, 'wb')
def main(self, args, net_env=None, policy=None): viper_flag = True assert len(VIDEO_BIT_RATE) == A_DIM log_f = LOG_FILE if net_env is None: viper_flag = False all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(args.traces) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) if args.update: log_f = log_f.replace('dt', 'du') if not viper_flag and args.log: log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] rollout = [] video_count = 0 reward_sum = 0 in_compute = [] # load dt policy if policy is None: with open(args.dt, 'rb') as f: policy = pk.load(f) policy = fsm.FSM(policy) while True: # serve video forever delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \ video_chunk_remain = net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms reward = get_reward(bit_rate, rebuf, last_bit_rate, args.qoe_metric) r_batch.append(reward) reward_sum += reward last_bit_rate = bit_rate if args.log: log_file.write(bytes(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n', encoding='utf-8')) log_file.flush() # select bit_rate according to decision tree if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) serialized_state = serial(state) bit_rate = int(policy.predict(np.array(serialized_state).reshape(1, -1))[0]) rollout.append((state, bit_rate, serialized_state)) s_batch.append(state) if args.update: chunk_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain - 1) policy.chunk_leaf[chunk_index] = policy.tree.apply(np.array(serialized_state).reshape(1, -1)) if chunk_index < CHUNK_TIL_VIDEO_END_CAP - HORIZON: in_compute.append(fsm.Trajectory(chunk_index, max(0, bit_rate - 1), buffer_size - CHUNK_LEN, last_bit_rate, state, args)) in_compute.append(fsm.Trajectory(chunk_index, bit_rate, buffer_size - CHUNK_LEN, last_bit_rate, state, args)) in_compute.append(fsm.Trajectory(chunk_index, min(5, bit_rate + 1), buffer_size - CHUNK_LEN, last_bit_rate, state, args)) for traj in in_compute: this_chunk_size = video_chunk_size this_delay = delay while True: if traj.apply(this_chunk_size, this_delay) == CHUNK_SWITCH: new_bitrate = int(policy.predict(np.array(serial(traj.states)).reshape(1, -1))[0]) traj.next_chunk(new_bitrate) this_chunk_size, this_delay = traj.trans_msg else: break while len(in_compute) > 1 and in_compute[0].end and in_compute[1].end and in_compute[2].end: r_below = sum([get_reward(in_compute[0].quality[i], in_compute[0].rebuf[i], in_compute[0].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) r_normal = sum([get_reward(in_compute[1].quality[i], in_compute[1].rebuf[i], in_compute[1].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) r_above = sum([get_reward(in_compute[2].quality[i], in_compute[2].rebuf[i], in_compute[2].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)]) if r_above == max(r_below, r_normal, r_above): policy.update(in_compute[0].chunk_index, 1) elif r_normal == max(r_below, r_normal, r_above): policy.update(in_compute[0].chunk_index, -1) else: policy.update(in_compute[0].chunk_index, 0) in_compute.pop(0) in_compute.pop(0) in_compute.pop(0) if end_of_video: if args.log: log_file.write(bytes('\n', encoding='utf-8')) log_file.close() print("video count", video_count) last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here r_batch = [] in_compute = [] if viper_flag: return rollout else: video_count += 1 if video_count >= len(net_env.all_file_names): break if args.log: log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') return reward_sum
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( TEST_TRACES) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') gpu_options = tf.GPUOptions(allow_growth=True) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) bba = bbaplus(sess) #sess.run(tf.global_variables_initializer()) time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] #alpha_prob = np.zeros(S_INFO) #action_prob = np.zeros(A_DIM) #action_prob[bit_rate] = 1. action_prob = 5., 10. #r_batch = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smoothness reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K # r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(np.round(action_prob, 2)) + '\t' + #str(np.round(alpha_prob[0], 2)) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / \ float(np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / \ float(delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array( next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) s_batch.append(state) action_prob = bba.predict(state) RESEVOIR, CUSHION = action_prob if buffer_size < RESEVOIR: bit_rate = 0 elif buffer_size >= RESEVOIR + CUSHION: bit_rate = A_DIM - 1 else: bit_rate = (A_DIM - 1) * (buffer_size - RESEVOIR) / float(CUSHION) bit_rate = int(bit_rate) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) #entropy_record = [] video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb')
def agent(agent_id, all_cooked_time, all_cooked_bw, all_file_names, video_size_file, net_params_queue, exp_queue): net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, random_seed=agent_id, VIDEO_SIZE_FILE=video_size_file, Debug=False) with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id), 'wb') as log_file: actor = a3c.ActorNetwork(sess, state_dim=[S_INFO, S_LEN], action_dim=A_DIM, learning_rate=ACTOR_LR_RATE) critic = a3c.CriticNetwork(sess, state_dim=[S_INFO, S_LEN], learning_rate=CRITIC_LR_RATE) # initial synchronization of the network parameters from the coordinator actor_net_params, critic_net_params = net_params_queue.get() actor.set_network_params(actor_net_params) critic.set_network_params(critic_net_params) bit_rate = DEFAULT_QUALITY target_buffer = DEFAULT_QUALITY latency_limit = 4 index = 1 action_vec = np.zeros(A_DIM) action_vec[index] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] video_count = 0 reward_all_sum = 0 reward_all = 0 reward = 0 switch_num = 0 SMOOTH_PENALTY = 0.0 REBUF_PENALTY = 3 LANTENCY_PENALTY = 0.0 BITRATE_REWARD = 0.001 SKIP_PENALTY = 0.0 epoch = 0 n = 0 state = np.array(s_batch[-1], copy=True) frame_time_len = 0.04 last_bit_rate = DEFAULT_QUALITY while True: # experience video streaming forever # the action is from the last decision # this is to make the framework similar to the real time, time_interval, send_data_size, chunk_len, \ rebuf, buffer_size, play_time_len, end_delay, \ cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \ buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer, latency_limit) # # QOE setting # if end_delay <= 1.0: # LANTENCY_PENALTY = 0.005 # else: # LANTENCY_PENALTY = 0.01 reward_frame = 0 epoch += 1 if not cdn_flag: reward_frame = frame_time_len * float( BIT_RATE[bit_rate] ) * BITRATE_REWARD - REBUF_PENALTY * rebuf - LANTENCY_PENALTY * end_delay - SKIP_PENALTY * skip_frame_time_len else: reward_frame = -(REBUF_PENALTY * rebuf) reward += reward_frame # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = buffer_size * 0.1 state[1, -1] = send_data_size * 0.00001 state[2, -1] = time_interval * 10 # kilo byte / ms state[3, -1] = end_delay * 0.1 # 10 sec state[4, -1] = rebuf # mega byte if decision_flag and not end_of_video: reward_frame = -1 * SMOOTH_PENALTY * ( abs(BIT_RATE[bit_rate] - BIT_RATE[last_bit_rate]) / 1000) reward += reward_frame last_bit_rate = bit_rate r_batch.append(reward) reward = 0 # compute action probability vector action_prob = actor.predict( np.reshape(state, (1, S_INFO, S_LEN))) action_cumsum = np.cumsum(action_prob) temp = np.random.randint(1, RAND_RANGE) / float(RAND_RANGE) index = (action_cumsum > temp).argmax() bit_rate = ACTION_SAPCE[index][0] target_buffer = ACTION_SAPCE[index][1] latency_limit = ACTION_SAPCE[index][2] # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states entropy_record.append(a3c.compute_entropy(action_prob[0])) # report experience to the coordinator if len(r_batch) >= TRAIN_SEQ_LEN: exp_queue.put([ s_batch[1:], # ignore the first chuck a_batch[1:], # since we don't have the r_batch[1:], # control over it end_of_video, { 'entropy': entropy_record } ]) # synchronize the network parameters from the coordinator actor_net_params, critic_net_params = net_params_queue.get( ) actor.set_network_params(actor_net_params) critic.set_network_params(critic_net_params) del s_batch[:] del a_batch[:] del r_batch[:] del entropy_record[:] s_batch.append(state) action_vec = np.zeros(A_DIM) action_vec[index] = 1 a_batch.append(action_vec) reward_all += reward_frame # store the state and action into batches if end_of_video: r_batch.append(reward) reward_all_sum += reward_all / 20 video_count += 1 if video_count >= len(all_file_names): n += 1 video_count = 0 print(n, "agent_id ", agent_id, "reward_all_sum:", reward_all_sum) w.writerow([n, reward_all_sum]) out.flush() reward_all_sum = 0 net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, random_seed=epoch, VIDEO_SIZE_FILE=video_size_file, Debug=False) if n == NUM_EPOCH: break reward_all = 0 reward = 0 switch_num = 0 bit_rate = DEFAULT_QUALITY # use the default action here target_buffer = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec)
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace() net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') with open(DTModel, 'rb') as f: policy = pk.load(f) time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smoothness reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array( next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) serialized_state = serial(state) bit_rate = int( policy.predict(np.array(serialized_state).reshape(1, -1))[0]) # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states s_batch.append(state) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) print "video count", video_count video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb')
max_iters = args.iters max_pts = 200000 train_frac = 0.8 np.random.seed(RANDOM_SEED) states, actions, serials = [], [], [] precision = [] #trees = [] all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( args.traces) if args.abr == 'hotdash': net_env = env_hotdash.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) else: net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) if args.abr == 'pensieve': teacher = pensieve.Pensieve() student = pensilin.Pensilin() #test = pensieve.Pensieve() elif args.abr == 'robustmpc': teacher = robustmpc.RobustMPC() student = robustlin.Robustlin() elif args.abr == 'hotdash': teacher = hotdash.Hotdash() student = hotdlin.Hotdlin() else: raise NotImplementedError
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM if not os.path.exists(SUMMARY_DIR): os.makedirs(SUMMARY_DIR) all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace() net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] count = 0 video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, \ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms count += 1 # reward is video quality - rebuffer penalty - smoothness reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K r_batch.append(reward) last_bit_rate = bit_rate log_file.write(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(rebuf) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) if state[1,-1] <= 2: if bit_rate == 0: bit_rate = bit_rate else: bit_rate = bit_rate - 1 elif state[1,-1] >2 and state[1,-1]<=5: bit_rate = bit_rate else: if bit_rate == 4: bit_rate = bit_rate else: bit_rate = bit_rate + 1 s_batch.append(state) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here time_stamp = 0 del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] print ("video count", video_count) video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb')
def main(): # check the constant defination is valid or not assert len(bitRatesOptions) == bitRatesTypes # load the traces allCookedTime, allCookedBW, allFileNames = load_trace.load_trace() # set the environment netEnvironment = env.Environment(all_cooked_time=allCookedTime, all_cooked_bw=allCookedBW) # open the output log file to write outputFileName = outputFilePrefix + "_" + allFileNames[ netEnvironment.trace_idx] outputFilePointer = open(outputFileName, "wb") # initial the local variables timeStamp = 0 lastBitRateOption = defaultBitRateOption currentBitRateOption = defaultBitRateOption videoCount = 0 historyState = np.zeros((stateInfoLength, pastFramesLength)) # enum all possible solutions of future chunks for solution in itertools.product([i for i in range(bitRatesTypes)], repeat=defaultFutureChunkCount): chunkOptionsSet.append(solution) # computing kernel: while True: # get the video chunk according to the current bitrate option assert currentBitRateOption >= 0 delay, sleepTime, currentBufferSize, rebuffer, currentVideoChunkSize, \ nextVideoChunkSize, endFlag, chunkRemainCount = netEnvironment.get_video_chunk(currentBitRateOption) # update the time stamp because of the delay and sleeping time timeStamp += delay + sleepTime # ms # calculate the reward value according to the formula qualityValue = bitRatesOptions[ currentBitRateOption] / bitsFactor # kb to Mb smoothValue = np.abs(bitRatesOptions[currentBitRateOption] \ - bitRatesOptions[lastBitRateOption]) / bitsFactor rewardValue = qualityValue \ - rebufferFactor * rebuffer \ - smoothFactor * smoothValue # write the output file outputItemStr = str(timeStamp / millsecondsPerSecond) + '\t' \ + str(bitRatesOptions[currentBitRateOption]) + '\t' \ + str(currentBufferSize) + '\t' \ + str(rebuffer) + '\t' \ + str(currentVideoChunkSize) + '\t' \ + str(delay) + '\t' \ + str(rewardValue) + '\n' outputFilePointer.write(outputItemStr.encode('utf-8')) outputFilePointer.flush() # update the bit rate option lastBitRateOption = currentBitRateOption # update the history state information like a sliding window historyState = np.roll(historyState, -1, axis=1) historyState[ 0, -1] = bitRatesOptions[currentBitRateOption] / float(maxBitRate) historyState[1, -1] = currentBufferSize / bufferNormFactor historyState[2, -1] = rebuffer historyState[ 3, -1] = float(currentVideoChunkSize) / float(delay) / bitsFactor historyState[4, -1] = np.minimum( chunkRemainCount, defaultChunkCountToEnd) / float(defaultChunkCountToEnd) # MPC kernel begin # calculate the normaliztion estimated error of bandwidth currentError = 0. if (len(pastBWEsts) > 0): currentError = abs(pastBWEsts[-1] - historyState[3, -1]) / float( historyState[3, -1]) pastErrors.append(currentError) # calculate the harmonic mean of last 5 history bandwidths # Step 1: collect the last 5 history bandwidths pastRealBWArray = historyState[3, -5:] while pastRealBWArray[0] == 0.0: pastRealBWArray = pastRealBWArray[1:] # Step 2: calculate the harmonic mean pastRealBWSum = 0.0 for pastRealBWItems in pastRealBWArray: pastRealBWSum += (1 / float(pastRealBWItems)) harmonicBW = 1.0 / (pastRealBWSum / len(pastRealBWArray)) # calculate the predicted future bandwidth according to the est. error and harmonic mean errorIndex = min(5, len(pastErrors)) maxError = float(max(pastErrors[-errorIndex:])) currentPredBW = harmonicBW / (1 + maxError) pastBWEsts.append(currentPredBW) # fixed this bug, reward increases # get the video chunks information of this round prediction currentLastIndex = totalChunksCount - chunkRemainCount currentFutureChunkCount = min(chunkRemainCount, defaultFutureChunkCount) # enumerate all the possible solutions and pick the best one bestReward = -INF bestSolution = () finalOption = -1 startBufferSize = currentBufferSize for solution in chunkOptionsSet: localSolution = solution[0:currentFutureChunkCount] localRebufferTime = 0.0 localCurrentBufferSize = startBufferSize localBitRateSum = 0. localSmoothDiffs = 0. localLastChunkOption = currentBitRateOption # the 5 future chunks loop for pos in range(0, currentFutureChunkCount): thisChunkOption = localSolution[pos] thisIndex = currentLastIndex + pos + 1 thisChunkSize = getChunkSize(thisChunkOption, thisIndex) downloadTime = (float(thisChunkSize) / (bitsFactor * bitsFactor) ) / currentPredBW # Bytes to MBytes if localCurrentBufferSize < downloadTime: localRebufferTime += downloadTime - localCurrentBufferSize localCurrentBufferSize = 0 else: localCurrentBufferSize -= downloadTime # This 4 means the play speed localCurrentBufferSize += 4 localBitRateSum += bitRatesOptions[thisChunkOption] localSmoothDiffs += abs(bitRatesOptions[thisChunkOption] - bitRatesOptions[localLastChunkOption]) localLastChunkOption = thisChunkOption localReward = float(localBitRateSum) / bitsFactor \ - rebufferFactor * localRebufferTime \ - float(localSmoothDiffs) / bitsFactor if localReward >= bestReward: if bestSolution != () and bestSolution[0] < localSolution[0]: bestSolution = localSolution else: bestSolution = localSolution bestReward = localReward if bestSolution != (): finalOption = bestSolution[0] currentBitRateOption = finalOption if endFlag: outputFilePointer.write("\n".encode('utf-8')) outputFilePointer.close() lastBitRateOption = defaultBitRateOption currentBitRateOption = defaultBitRateOption historyState = np.zeros((stateInfoLength, pastFramesLength)) print("video count", videoCount) videoCount += 1 if videoCount >= len(allFileNames): break outputFileName = outputFilePrefix + "_naive_" + allFileNames[ netEnvironment.trace_idx] outputFilePointer = open(outputFileName, "wb")
def main(): np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( TEST_TRACES) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w') with tf.Session() as sess: actor = libcomyco.libcomyco(sess, S_INFO, S_LEN, A_DIM, LR_RATE=1e-4) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() # save neural net parameters # restore neural net parameters if NN_MODEL is not None: # NN_MODEL is the path to file saver.restore(sess, NN_MODEL) print("Testing model restored.") time_stamp = 0 bit_rate = DEFAULT_QUALITY last_chunk_vmaf = None action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] entropy_record = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, next_video_chunk_sizes, next_video_chunk_vmaf, \ end_of_video, video_chunk_remain, video_chunk_vmaf = \ net_env.get_video_chunk(bit_rate) if last_chunk_vmaf is None: last_chunk_vmaf = video_chunk_vmaf time_stamp += delay # in ms time_stamp += sleep_time # in ms reward = 0.8469011 * video_chunk_vmaf - 28.79591348 * rebuf + 0.29797156 * \ np.abs(np.maximum(video_chunk_vmaf - last_chunk_vmaf, 0.)) - 1.06099887 * \ np.abs(np.minimum(video_chunk_vmaf - last_chunk_vmaf, 0.)) - \ 2.661618558192494 r_batch.append(reward) last_chunk_vmaf = video_chunk_vmaf # log time_stamp, bit_rate, buffer_size, reward log_file.write( str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = video_chunk_vmaf / 100. state[1, -1] = buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :A_DIM] = np.array( next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, :A_DIM] = np.array( next_video_chunk_vmaf) / 100. # mega byte state[6, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) action_prob, _ = actor.predict( np.reshape(state, (-1, S_INFO, S_LEN))) bit_rate = np.argmax(action_prob[0]) s_batch.append(state) entropy_record.append(actor.compute_entropy(action_prob[0])) if end_of_video: log_file.write('\n') log_file.close() bit_rate = DEFAULT_QUALITY # use the default action here last_chunk_vmaf = None del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] video_count += 1 if video_count >= len(all_file_names): break log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'w')
def main(self, args, net_env=None): self.args = args np.random.seed(RANDOM_SEED) viper_flag = True assert len(VIDEO_BIT_RATE) == A_DIM if net_env is None: viper_flag = False all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( args.traces) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) if not viper_flag and args.log: log_path = LOG_FILE + '_' + net_env.all_file_names[ net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] rollout = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \ video_chunk_remain = net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms reward = get_reward(bit_rate, rebuf, last_bit_rate, args.qoe_metric) r_batch.append(reward) last_bit_rate = bit_rate if args.log: # log time_stamp, bit_rate, buffer_size, reward log_file.write( bytes(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\n', encoding='utf-8')) log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR state[2, -1] = rebuf state[3, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[4, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) bit_rate = self.predict(state) serialized_state = [] # Log input of neural network serialized_state.append(state[0, -1]) serialized_state.append(state[1, -1]) serialized_state.append(state[2, -1]) for i in range(5): serialized_state.append(state[3, i]) serialized_state.append(state[4, -1]) #print(serialized_state) #print(state) rollout.append((state, bit_rate, serialized_state)) if end_of_video: if args.log: log_file.write(bytes('\n', encoding='utf-8')) log_file.close() print("video count", video_count) last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] if viper_flag: break else: video_count += 1 if video_count >= len(net_env.all_file_names): break if args.log: log_path = LOG_FILE + '_' + net_env.all_file_names[ net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') return rollout
def main(): args = parser.parse_args() if args.cb or args.lin: qoe_metric = 'results_lin' elif args.log: qoe_metric = 'results_log' else: print('Please select the QoE Metric!') if args.FCC: dataset = 'fcc' elif args.HSDPA: dataset = 'HSDPA' elif args.Oboe: dataset = 'Oboe' else: print('Please select the dataset!') dataset_path = './traces_' + dataset + '/' if args.cb: Log_file_path = './' + qoe_metric + '/cb_' + dataset + '/log_sim_mpc' else: Log_file_path = './' + qoe_metric + '/' + dataset + '/log_sim_mpc' start = time.time() np.random.seed(RANDOM_SEED) assert len(VIDEO_BIT_RATE) == A_DIM all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(dataset_path) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw) log_path = Log_file_path + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') chunk_size_info = video_size() chunk_size_info.store_size() time_stamp = 0 last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY harmonic_bandwidth = 0 future_bandwidth = 0 action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] a_batch = [action_vec] r_batch = [] # entropy_record = [] video_count = 0 while True: # serve video forever # the action is from the last decision # this is to make the framework similar to the real delay, sleep_time, buffer_size, rebuf, \ video_chunk_size, _,\ end_of_video, video_chunk_remain = \ net_env.get_video_chunk(bit_rate) time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty if qoe_metric == 'results_lin': REBUF_PENALTY = 4.3 reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] - VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K else:# log scale reward REBUF_PENALTY = 2.66 log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[0])) log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0])) reward = log_bit_rate \ - REBUF_PENALTY * rebuf \ - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate) # reward = BITRATE_REWARD[bit_rate] \ # - 8 * rebuf - np.abs(BITRATE_REWARD[bit_rate] - BITRATE_REWARD[last_bit_rate]) r_batch.append(reward) last_bit_rate = bit_rate # log time_stamp, bit_rate, buffer_size, reward log_file.write(str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\t' + str(harmonic_bandwidth) + '\t' + str(harmonic_bandwidth - future_bandwidth) + '\t' + str(future_bandwidth) + '\t' + str(float(video_chunk_size) / float(delay) / M_IN_K) + '\n') log_file.flush() # retrieve previous state if len(s_batch) == 0: state = [np.zeros((S_INFO, S_LEN))] else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = buffer_size / BUFFER_NORM_FACTOR state[2, -1] = rebuf state[3, -1] = float(video_chunk_size) / float(delay) / M_IN_K # kilo byte / ms state[4, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) # state[5: 10, :] = future_chunk_sizes / M_IN_K / M_IN_K # ================== MPC ========================= curr_error = 0 # defualt assumes that this is the first request so error is 0 since we have never predicted bandwidth if ( len(past_bandwidth_ests) > 0 ): curr_error = abs(past_bandwidth_ests[-1]-state[3,-1])/float(state[3,-1]) past_errors.append(curr_error) # pick bitrate according to MPC # first get harmonic mean of last 5 bandwidths past_bandwidths = state[3,-5:] while past_bandwidths[0] == 0.0: past_bandwidths = past_bandwidths[1:] #if ( len(state) < 5 ): # past_bandwidths = state[3,-len(state):] #else: # past_bandwidths = state[3,-5:] bandwidth_sum = 0 for past_val in past_bandwidths: bandwidth_sum += (1/float(past_val)) harmonic_bandwidth = 1.0/(bandwidth_sum/len(past_bandwidths)) # future bandwidth prediction # divide by 1 + max of last 5 (or up to 5) errors max_error = 0 error_pos = -5 if ( len(past_errors) < 5 ): error_pos = -len(past_errors) max_error = float(max(past_errors[error_pos:])) future_bandwidth = harmonic_bandwidth/(1+max_error) # robustMPC here past_bandwidth_ests.append(harmonic_bandwidth) # future chunks length (try 4 if that many remaining) last_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain -1) future_chunk_length = MPC_FUTURE_CHUNK_COUNT if ( TOTAL_VIDEO_CHUNKS - last_index < MPC_FUTURE_CHUNK_COUNT ): future_chunk_length = TOTAL_VIDEO_CHUNKS - last_index # all possible combinations of 5 chunk bitrates (9^5 options) # iterate over list and for each, compute reward and store max reward combination max_reward = -100000000 # best_combo = () start_buffer = buffer_size #start = time.time() download_time_every_step = [] for position in range(future_chunk_length): download_time_current = [] for action in range(0, A_DIM): index = last_index + position + 1 # e.g., if last chunk is 3, then first iter is 3+0+1=4 download_time = (chunk_size_info.get_chunk_size(action, index)/1000000.)/future_bandwidth # this is MB/MB/s --> seconds download_time_current.append(download_time) download_time_every_step.append(download_time_current) reward_comparison = False send_data = 0 parents_pool = [[0.0, start_buffer, int(bit_rate)]] for position in range(future_chunk_length): if position == future_chunk_length-1: reward_comparison = True children_pool = [] for parent in parents_pool: action = 0 curr_buffer = parent[1] last_quality = parent[-1] curr_rebuffer_time = 0 chunk_quality = action download_time = download_time_every_step[position][chunk_quality] if ( curr_buffer < download_time ): curr_rebuffer_time += (download_time - curr_buffer) curr_buffer = 0.0 else: curr_buffer -= download_time curr_buffer += 4 # reward bitrate_sum = VIDEO_BIT_RATE[chunk_quality] smoothness_diffs = abs(VIDEO_BIT_RATE[chunk_quality] - VIDEO_BIT_RATE[last_quality]) reward = (bitrate_sum/1000.) - (REBUF_PENALTY*curr_rebuffer_time) - (SMOOTH_PENALTY*smoothness_diffs/1000.) reward += parent[0] children = parent[:] children[0] = reward children[1] = curr_buffer children.append(action) children_pool.append(children) if (reward >= max_reward) and reward_comparison: if send_data > children[3] and reward == max_reward: send_data = send_data else: send_data = children[3] max_reward = reward # criterion terms # theta = SMOOTH_PENALTY * (VIDEO_BIT_RATE[action+1]/1000. - VIDEO_BIT_RATE[action]/1000.) rebuffer_term = REBUF_PENALTY * (max(download_time_every_step[position][action+1] - parent[1], 0) - max(download_time_every_step[position][action] - parent[1], 0)) if (action + 1 <= parent[-1]): High_Maybe_Superior = ((1.0 + 2 * SMOOTH_PENALTY)*(VIDEO_BIT_RATE[action]/1000. - VIDEO_BIT_RATE[action+1]/1000.) + rebuffer_term < 0.0) else: High_Maybe_Superior = ((VIDEO_BIT_RATE[action]/1000. - VIDEO_BIT_RATE[action+1]/1000.) + rebuffer_term < 0.0) # while REBUF_PENALTY*(download_time_every_step[position][action+1] - parent[1]) <= ((VIDEO_BIT_RATE[action+1]/1000. - VIDEO_BIT_RATE[action]/1000.)-(abs(VIDEO_BIT_RATE[action+1] - VIDEO_BIT_RATE[parent[-1]]) - abs(VIDEO_BIT_RATE[action] - VIDEO_BIT_RATE[parent[-1]]))/1000.): while High_Maybe_Superior: curr_buffer = parent[1] last_quality = parent[-1] curr_rebuffer_time = 0 chunk_quality = action + 1 download_time = download_time_every_step[position][chunk_quality] if ( curr_buffer < download_time ): curr_rebuffer_time += (download_time - curr_buffer) curr_buffer = 0 else: curr_buffer -= download_time curr_buffer += 4 # reward bitrate_sum = VIDEO_BIT_RATE[chunk_quality] smoothness_diffs = abs(VIDEO_BIT_RATE[chunk_quality] - VIDEO_BIT_RATE[last_quality]) reward = (bitrate_sum/1000.) - (REBUF_PENALTY*curr_rebuffer_time) - (SMOOTH_PENALTY*smoothness_diffs/1000.) reward += parent[0] children = parent[:] children[0] = reward children[1] = curr_buffer children.append(chunk_quality) children_pool.append(children) if (reward >= max_reward) and reward_comparison: if send_data > children[3] and reward == max_reward: send_data = send_data else: send_data = children[3] max_reward = reward action += 1 if action + 1 == A_DIM: break # criterion terms # theta = SMOOTH_PENALTY * (VIDEO_BIT_RATE[action+1]/1000. - VIDEO_BIT_RATE[action]/1000.) rebuffer_term = REBUF_PENALTY * (max(download_time_every_step[position][action+1] - parent[1], 0) - max(download_time_every_step[position][action] - parent[1], 0)) if (action + 1 <= parent[-1]): High_Maybe_Superior = ((1.0 + 2 * SMOOTH_PENALTY)*(VIDEO_BIT_RATE[action]/1000. - VIDEO_BIT_RATE[action+1]/1000.) + rebuffer_term < 0) else: High_Maybe_Superior = ((VIDEO_BIT_RATE[action]/1000. - VIDEO_BIT_RATE[action+1]/1000.) + rebuffer_term < 0) parents_pool = children_pool bit_rate = send_data # hack # if bit_rate == 1 or bit_rate == 2: # bit_rate = 0 # ================================================ # Note: we need to discretize the probability into 1/RAND_RANGE steps, # because there is an intrinsic discrepancy in passing single state and batch states s_batch.append(state) if end_of_video: log_file.write('\n') log_file.close() last_bit_rate = DEFAULT_QUALITY bit_rate = DEFAULT_QUALITY # use the default action here del s_batch[:] del a_batch[:] del r_batch[:] del past_bandwidth_ests[:] time_stamp = 0 action_vec = np.zeros(A_DIM) action_vec[bit_rate] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) a_batch.append(action_vec) entropy_record = [] print("video count", video_count) video_count += 1 if video_count >= len(all_file_names): end = time.time() print(end - start) break log_path = Log_file_path + '_' + all_file_names[net_env.trace_idx] log_file = open(log_path, 'wb') end = time.time() print(end - start)