Beispiel #1
0
    parser.add_argument('-t', '--traces', choices=['norway', 'fcc', 'oboe'])

    args = parser.parse_args()
    n_batch_rollouts = 10
    max_iters = args.iters
    max_pts = 200000
    train_frac = 0.8
    np.random.seed(RANDOM_SEED)
    states, actions, serials = [], [], []
    precision = []
    #trees = []
    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        args.traces)
    if args.abr == 'hotdash':
        net_env = env_hotdash.Environment(all_cooked_time=all_cooked_time,
                                          all_cooked_bw=all_cooked_bw,
                                          all_file_names=all_file_names)
    else:
        net_env = env.Environment(all_cooked_time=all_cooked_time,
                                  all_cooked_bw=all_cooked_bw,
                                  all_file_names=all_file_names)

    if args.abr == 'pensieve':
        teacher = pensieve.Pensieve()
        student = pensilin.Pensilin()
        #test = pensieve.Pensieve()
    elif args.abr == 'robustmpc':
        teacher = robustmpc.RobustMPC()
        student = robustlin.Robustlin()
    elif args.abr == 'hotdash':
        teacher = hotdash.Hotdash()
Beispiel #2
0
    def main(self, args, net_env=None, policy=None):
        viper_flag = True
        assert len(VIDEO_BIT_RATE) == A_DIM

        if net_env is None:
            viper_flag = False
            all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
                args.traces)
            net_env = env.Environment(all_cooked_time=all_cooked_time,
                                      all_cooked_bw=all_cooked_bw,
                                      all_file_names=all_file_names)

        if not viper_flag and args.log:
            log_path = LOG_FILE + '_' + net_env.all_file_names[
                net_env.trace_idx] + '_' + args.qoe_metric
            log_file = open(log_path, 'wb')

        time_stamp = 0

        prefetch_decision = DEFAULT_PREFETCH
        next_normal_bitrate = DEFAULT_QUALITY
        next_hotspot_bitrate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM_prefetch)
        action_vec[prefetch_decision] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        s_batch_pensieve1 = [np.zeros((S_INFO_PENSIEVE, S_LEN))]
        s_batch_pensieve2 = [np.zeros((S_INFO_PENSIEVE, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        rollout = []
        video_count = 0

        # load dt policy
        if policy is None:
            with open(args.dt, 'rb') as f:
                policy = pk.load(f)

        while True:  # serve video forever
            state_data_for_action = net_env.execute_action(
                prefetch_decision, next_normal_bitrate, next_hotspot_bitrate)

            # normal chunk state information
            delay = state_data_for_action['delay']
            sleep_time = state_data_for_action['sleep_time']
            last_bit_rate = state_data_for_action['last_bit_rate']
            play_buffer_size = state_data_for_action['play_buffer_size']
            rebuf = state_data_for_action['rebuf']
            video_chunk_size = state_data_for_action['video_chunk_size']
            next_video_chunk_sizes = state_data_for_action[
                'next_video_chunk_sizes']
            end_of_video = state_data_for_action['end_of_video']
            video_chunk_remain = state_data_for_action['video_chunk_remain']
            current_seq_no = state_data_for_action['current_seq_no']
            log_prefetch_decision = state_data_for_action[
                'log_prefetch_decision']

            # hotspot chunk state information
            was_hotspot_chunk = int(state_data_for_action['was_hotspot_chunk'])
            hotspot_chunks_remain = state_data_for_action[
                'hotspot_chunks_remain']
            chunks_till_played = state_data_for_action['chunks_till_played']
            total_buffer_size = state_data_for_action['total_buffer_size']
            last_hotspot_bit_rate = state_data_for_action[
                'last_hotspot_bit_rate']
            next_hotspot_chunk_sizes = state_data_for_action[
                'next_hotspot_chunk_sizes']
            dist_from_hotspot_chunks = state_data_for_action[
                'dist_from_hotspot_chunks']
            smoothness_eval_bitrates = state_data_for_action[
                'smoothness_eval_bitrates']

            # abr decision state information
            normal_bitrate_pensieve = state_data_for_action[
                'normal_bitrate_pensieve']
            hotspot_bitrate_pensieve = state_data_for_action[
                'hotspot_bitrate_pensieve']

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smoothness
            if args.qoe_metric == 'lin':
                util_array = [util / M_IN_K for util in VIDEO_BIT_RATE]
            elif args.qoe_metric == 'log':
                util_array = [
                    np.log(util / VIDEO_BIT_RATE[-1])
                    for util in VIDEO_BIT_RATE
                ]
            elif args.qoe_metric == 'hd':
                util_array = HD_REWARD
            else:
                raise NotImplementedError
            reward_br = util_array[int(last_hotspot_bit_rate) if
                                   was_hotspot_chunk else int(last_bit_rate)]
            reward_rebuffering = REBUF_PENALTY * rebuf * 1.0
            reward_smoothness = 0.0
            if len(smoothness_eval_bitrates) > 1:
                for i in range(len(smoothness_eval_bitrates) - 1):
                    reward_smoothness += 1.0 * SMOOTH_PENALTY * (1.0 * np.abs(
                        VIDEO_BIT_RATE[int(smoothness_eval_bitrates[i + 1])] -
                        VIDEO_BIT_RATE[int(smoothness_eval_bitrates[i])]) /
                                                                 M_IN_K)

            reward = (1.0 * reward_br) - (1.0 * reward_rebuffering) - (
                1.0 * reward_smoothness)
            r_batch.append(reward)
            last_overall_bitrate = last_bit_rate
            if prefetch_decision == 1:
                last_overall_bitrate = last_hotspot_bit_rate

            if args.log:
                log_file.write(
                    bytes(str(time_stamp) + '\t' +
                          str(VIDEO_BIT_RATE[int(last_overall_bitrate)]) +
                          '\t' + str(play_buffer_size) + '\t' + str(rebuf) +
                          '\t' + str(video_chunk_size) + '\t' + str(delay) +
                          '\t' + str(reward) + '\t' +
                          str(log_prefetch_decision) + '\t' +
                          str(int(was_hotspot_chunk)) + '\t' +
                          str(current_seq_no) + '\n',
                          encoding='utf-8'))
                log_file.flush()

            # select bit_rate according to decision tree
            if len(s_batch) == 0:
                state = np.zeros((S_INFO, S_LEN))
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            # Normal state S_ABR_INFO
            state[0, -1] = VIDEO_BIT_RATE[int(last_overall_bitrate)] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = play_buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :BITRATE_LEVELS] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
            # Hotspot state S_HOT_INFO
            state[6, -1] = np.minimum(
                hotspot_chunks_remain,
                NUM_HOTSPOT_CHUNKS) / float(NUM_HOTSPOT_CHUNKS)
            state[7, -1] = np.minimum(
                chunks_till_played,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
            state[8, -1] = total_buffer_size / BUFFER_NORM_FACTOR
            state[9,
                  -1] = last_hotspot_bit_rate / float(np.max(VIDEO_BIT_RATE))
            state[10, :BITRATE_LEVELS] = np.array(
                next_hotspot_chunk_sizes) / M_IN_K / M_IN_K
            state[11, :NUM_HOTSPOT_CHUNKS] = (
                np.array(dist_from_hotspot_chunks) +
                CHUNK_TIL_VIDEO_END_CAP) / float(2 * CHUNK_TIL_VIDEO_END_CAP)

            # Bitrate actions state S_BRT_INFO
            state[12,
                  -1] = normal_bitrate_pensieve / float(np.max(VIDEO_BIT_RATE))
            state[13, -1] = hotspot_bitrate_pensieve / float(
                np.max(VIDEO_BIT_RATE))

            if len(s_batch_pensieve1) == 0:
                state_info_pensieve_n = [np.zeros((S_INFO_PENSIEVE, S_LEN))]
            else:
                state_info_pensieve_n = np.array(s_batch_pensieve1[-1],
                                                 copy=True)

            state_info_pensieve_n = np.roll(state_info_pensieve_n, -1, axis=1)
            state_info_pensieve_n[
                0, -1] = VIDEO_BIT_RATE[int(last_bit_rate)] / float(
                    np.max(VIDEO_BIT_RATE))  # last quality
            state_info_pensieve_n[
                1, -1] = play_buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state_info_pensieve_n[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state_info_pensieve_n[
                3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state_info_pensieve_n[4, :BITRATE_LEVELS] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state_info_pensieve_n[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            if len(s_batch_pensieve2) == 0:
                state_info_pensieve_h = [np.zeros((S_INFO_PENSIEVE, S_LEN))]
            else:
                state_info_pensieve_h = np.array(s_batch_pensieve2[-1],
                                                 copy=True)

            state_info_pensieve_h = np.roll(state_info_pensieve_h, -1, axis=1)
            state_info_pensieve_h[
                0, -1] = VIDEO_BIT_RATE[int(last_bit_rate)] / float(
                    np.max(VIDEO_BIT_RATE))  # last quality
            state_info_pensieve_h[
                1, -1] = play_buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state_info_pensieve_h[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state_info_pensieve_h[
                3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state_info_pensieve_h[4, :BITRATE_LEVELS] = np.array(
                next_hotspot_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state_info_pensieve_h[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            state_list = [state, state_info_pensieve_n, state_info_pensieve_h]

            serialized_state = []
            serialized_state.append(state[0, -1])
            serialized_state.append(state[1, -1])
            for i in range(S_LEN):
                serialized_state.append(state[2, i])
            for i in range(S_LEN):
                serialized_state.append(state[3, i])
            for i in range(A_DIM):
                serialized_state.append(state[4, i])
            serialized_state.append(state[5, -1])
            for i in range(S_LEN):
                serialized_state.append(state[6, i])
            serialized_state.append(state[7, -1])
            serialized_state.append(state[8, -1])
            serialized_state.append(state[9, -1])
            for i in range(BITRATE_LEVELS):
                serialized_state.append(state[10, i])
            for i in range(NUM_HOTSPOT_CHUNKS):
                serialized_state.append(state[11, i])
            serialized_state.append(state[12, -1])
            serialized_state.append(state[13, -1])

            tmp = policy.predict(np.array(serialized_state).reshape(1, -1))
            prefetch_decision = tmp[0][0]
            next_normal_bitrate = tmp[0][1]
            next_hotspot_bitrate = tmp[0][2]

            rollout.append((state_list, [
                prefetch_decision, next_normal_bitrate, next_hotspot_bitrate
            ], serialized_state))
            if end_of_video:
                if args.log:
                    log_file.write(bytes('\n', encoding='utf-8'))
                    log_file.close()
                    print("video count", video_count)

                prefetch_decision = DEFAULT_PREFETCH

                del s_batch[:]
                del s_batch_pensieve1[:]
                del s_batch_pensieve2[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[prefetch_decision] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                s_batch_pensieve1.append(np.zeros((S_INFO_PENSIEVE, S_LEN)))
                s_batch_pensieve2.append(np.zeros((S_INFO_PENSIEVE, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                if viper_flag:
                    break
                else:
                    video_count += 1
                    if video_count >= len(net_env.all_file_names):
                        break
                    if args.log:
                        log_path = LOG_FILE + '_' + net_env.all_file_names[
                            net_env.trace_idx] + '_' + args.qoe_metric
                        log_file = open(log_path, 'wb')

        return rollout