parser.add_argument('-t', '--traces', choices=['norway', 'fcc', 'oboe']) args = parser.parse_args() n_batch_rollouts = 10 max_iters = args.iters max_pts = 200000 train_frac = 0.8 np.random.seed(RANDOM_SEED) states, actions, serials = [], [], [] precision = [] #trees = [] all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( args.traces) if args.abr == 'hotdash': net_env = env_hotdash.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) else: net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) if args.abr == 'pensieve': teacher = pensieve.Pensieve() student = pensilin.Pensilin() #test = pensieve.Pensieve() elif args.abr == 'robustmpc': teacher = robustmpc.RobustMPC() student = robustlin.Robustlin() elif args.abr == 'hotdash': teacher = hotdash.Hotdash()
def main(self, args, net_env=None, policy=None): viper_flag = True assert len(VIDEO_BIT_RATE) == A_DIM if net_env is None: viper_flag = False all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace( args.traces) net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw, all_file_names=all_file_names) if not viper_flag and args.log: log_path = LOG_FILE + '_' + net_env.all_file_names[ net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') time_stamp = 0 prefetch_decision = DEFAULT_PREFETCH next_normal_bitrate = DEFAULT_QUALITY next_hotspot_bitrate = DEFAULT_QUALITY action_vec = np.zeros(A_DIM_prefetch) action_vec[prefetch_decision] = 1 s_batch = [np.zeros((S_INFO, S_LEN))] s_batch_pensieve1 = [np.zeros((S_INFO_PENSIEVE, S_LEN))] s_batch_pensieve2 = [np.zeros((S_INFO_PENSIEVE, S_LEN))] a_batch = [action_vec] r_batch = [] rollout = [] video_count = 0 # load dt policy if policy is None: with open(args.dt, 'rb') as f: policy = pk.load(f) while True: # serve video forever state_data_for_action = net_env.execute_action( prefetch_decision, next_normal_bitrate, next_hotspot_bitrate) # normal chunk state information delay = state_data_for_action['delay'] sleep_time = state_data_for_action['sleep_time'] last_bit_rate = state_data_for_action['last_bit_rate'] play_buffer_size = state_data_for_action['play_buffer_size'] rebuf = state_data_for_action['rebuf'] video_chunk_size = state_data_for_action['video_chunk_size'] next_video_chunk_sizes = state_data_for_action[ 'next_video_chunk_sizes'] end_of_video = state_data_for_action['end_of_video'] video_chunk_remain = state_data_for_action['video_chunk_remain'] current_seq_no = state_data_for_action['current_seq_no'] log_prefetch_decision = state_data_for_action[ 'log_prefetch_decision'] # hotspot chunk state information was_hotspot_chunk = int(state_data_for_action['was_hotspot_chunk']) hotspot_chunks_remain = state_data_for_action[ 'hotspot_chunks_remain'] chunks_till_played = state_data_for_action['chunks_till_played'] total_buffer_size = state_data_for_action['total_buffer_size'] last_hotspot_bit_rate = state_data_for_action[ 'last_hotspot_bit_rate'] next_hotspot_chunk_sizes = state_data_for_action[ 'next_hotspot_chunk_sizes'] dist_from_hotspot_chunks = state_data_for_action[ 'dist_from_hotspot_chunks'] smoothness_eval_bitrates = state_data_for_action[ 'smoothness_eval_bitrates'] # abr decision state information normal_bitrate_pensieve = state_data_for_action[ 'normal_bitrate_pensieve'] hotspot_bitrate_pensieve = state_data_for_action[ 'hotspot_bitrate_pensieve'] time_stamp += delay # in ms time_stamp += sleep_time # in ms # reward is video quality - rebuffer penalty - smoothness if args.qoe_metric == 'lin': util_array = [util / M_IN_K for util in VIDEO_BIT_RATE] elif args.qoe_metric == 'log': util_array = [ np.log(util / VIDEO_BIT_RATE[-1]) for util in VIDEO_BIT_RATE ] elif args.qoe_metric == 'hd': util_array = HD_REWARD else: raise NotImplementedError reward_br = util_array[int(last_hotspot_bit_rate) if was_hotspot_chunk else int(last_bit_rate)] reward_rebuffering = REBUF_PENALTY * rebuf * 1.0 reward_smoothness = 0.0 if len(smoothness_eval_bitrates) > 1: for i in range(len(smoothness_eval_bitrates) - 1): reward_smoothness += 1.0 * SMOOTH_PENALTY * (1.0 * np.abs( VIDEO_BIT_RATE[int(smoothness_eval_bitrates[i + 1])] - VIDEO_BIT_RATE[int(smoothness_eval_bitrates[i])]) / M_IN_K) reward = (1.0 * reward_br) - (1.0 * reward_rebuffering) - ( 1.0 * reward_smoothness) r_batch.append(reward) last_overall_bitrate = last_bit_rate if prefetch_decision == 1: last_overall_bitrate = last_hotspot_bit_rate if args.log: log_file.write( bytes(str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[int(last_overall_bitrate)]) + '\t' + str(play_buffer_size) + '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) + '\t' + str(log_prefetch_decision) + '\t' + str(int(was_hotspot_chunk)) + '\t' + str(current_seq_no) + '\n', encoding='utf-8')) log_file.flush() # select bit_rate according to decision tree if len(s_batch) == 0: state = np.zeros((S_INFO, S_LEN)) else: state = np.array(s_batch[-1], copy=True) # dequeue history record state = np.roll(state, -1, axis=1) # this should be S_INFO number of terms # Normal state S_ABR_INFO state[0, -1] = VIDEO_BIT_RATE[int(last_overall_bitrate)] / float( np.max(VIDEO_BIT_RATE)) # last quality state[1, -1] = play_buffer_size / BUFFER_NORM_FACTOR # 10 sec state[2, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state[4, :BITRATE_LEVELS] = np.array( next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state[5, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) # Hotspot state S_HOT_INFO state[6, -1] = np.minimum( hotspot_chunks_remain, NUM_HOTSPOT_CHUNKS) / float(NUM_HOTSPOT_CHUNKS) state[7, -1] = np.minimum( chunks_till_played, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) state[8, -1] = total_buffer_size / BUFFER_NORM_FACTOR state[9, -1] = last_hotspot_bit_rate / float(np.max(VIDEO_BIT_RATE)) state[10, :BITRATE_LEVELS] = np.array( next_hotspot_chunk_sizes) / M_IN_K / M_IN_K state[11, :NUM_HOTSPOT_CHUNKS] = ( np.array(dist_from_hotspot_chunks) + CHUNK_TIL_VIDEO_END_CAP) / float(2 * CHUNK_TIL_VIDEO_END_CAP) # Bitrate actions state S_BRT_INFO state[12, -1] = normal_bitrate_pensieve / float(np.max(VIDEO_BIT_RATE)) state[13, -1] = hotspot_bitrate_pensieve / float( np.max(VIDEO_BIT_RATE)) if len(s_batch_pensieve1) == 0: state_info_pensieve_n = [np.zeros((S_INFO_PENSIEVE, S_LEN))] else: state_info_pensieve_n = np.array(s_batch_pensieve1[-1], copy=True) state_info_pensieve_n = np.roll(state_info_pensieve_n, -1, axis=1) state_info_pensieve_n[ 0, -1] = VIDEO_BIT_RATE[int(last_bit_rate)] / float( np.max(VIDEO_BIT_RATE)) # last quality state_info_pensieve_n[ 1, -1] = play_buffer_size / BUFFER_NORM_FACTOR # 10 sec state_info_pensieve_n[2, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state_info_pensieve_n[ 3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state_info_pensieve_n[4, :BITRATE_LEVELS] = np.array( next_video_chunk_sizes) / M_IN_K / M_IN_K # mega byte state_info_pensieve_n[5, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) if len(s_batch_pensieve2) == 0: state_info_pensieve_h = [np.zeros((S_INFO_PENSIEVE, S_LEN))] else: state_info_pensieve_h = np.array(s_batch_pensieve2[-1], copy=True) state_info_pensieve_h = np.roll(state_info_pensieve_h, -1, axis=1) state_info_pensieve_h[ 0, -1] = VIDEO_BIT_RATE[int(last_bit_rate)] / float( np.max(VIDEO_BIT_RATE)) # last quality state_info_pensieve_h[ 1, -1] = play_buffer_size / BUFFER_NORM_FACTOR # 10 sec state_info_pensieve_h[2, -1] = float(video_chunk_size) / float( delay) / M_IN_K # kilo byte / ms state_info_pensieve_h[ 3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR # 10 sec state_info_pensieve_h[4, :BITRATE_LEVELS] = np.array( next_hotspot_chunk_sizes) / M_IN_K / M_IN_K # mega byte state_info_pensieve_h[5, -1] = np.minimum( video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP) state_list = [state, state_info_pensieve_n, state_info_pensieve_h] serialized_state = [] serialized_state.append(state[0, -1]) serialized_state.append(state[1, -1]) for i in range(S_LEN): serialized_state.append(state[2, i]) for i in range(S_LEN): serialized_state.append(state[3, i]) for i in range(A_DIM): serialized_state.append(state[4, i]) serialized_state.append(state[5, -1]) for i in range(S_LEN): serialized_state.append(state[6, i]) serialized_state.append(state[7, -1]) serialized_state.append(state[8, -1]) serialized_state.append(state[9, -1]) for i in range(BITRATE_LEVELS): serialized_state.append(state[10, i]) for i in range(NUM_HOTSPOT_CHUNKS): serialized_state.append(state[11, i]) serialized_state.append(state[12, -1]) serialized_state.append(state[13, -1]) tmp = policy.predict(np.array(serialized_state).reshape(1, -1)) prefetch_decision = tmp[0][0] next_normal_bitrate = tmp[0][1] next_hotspot_bitrate = tmp[0][2] rollout.append((state_list, [ prefetch_decision, next_normal_bitrate, next_hotspot_bitrate ], serialized_state)) if end_of_video: if args.log: log_file.write(bytes('\n', encoding='utf-8')) log_file.close() print("video count", video_count) prefetch_decision = DEFAULT_PREFETCH del s_batch[:] del s_batch_pensieve1[:] del s_batch_pensieve2[:] del a_batch[:] del r_batch[:] action_vec = np.zeros(A_DIM) action_vec[prefetch_decision] = 1 s_batch.append(np.zeros((S_INFO, S_LEN))) s_batch_pensieve1.append(np.zeros((S_INFO_PENSIEVE, S_LEN))) s_batch_pensieve2.append(np.zeros((S_INFO_PENSIEVE, S_LEN))) a_batch.append(action_vec) entropy_record = [] if viper_flag: break else: video_count += 1 if video_count >= len(net_env.all_file_names): break if args.log: log_path = LOG_FILE + '_' + net_env.all_file_names[ net_env.trace_idx] + '_' + args.qoe_metric log_file = open(log_path, 'wb') return rollout