def load_environment(self):
     DEBUG = False
     random_seed = 2
     # Control the subdirectory where log files will be stored.
     LOG_FILE_PATH = './log/'
     # create result directory
     if not os.path.exists(LOG_FILE_PATH):
         os.makedirs(LOG_FILE_PATH)
     # NETWORK_TRACE = 'fixed'
     VIDEO_TRACE = 'AsianCup_China_Uzbekistan'
     # network_trace_dir = './dataset/network_trace/' + NETWORK_TRACE + '/'
     # all_cooked_time, all_cooked_bw, self.all_file_names = load_trace.load_trace(network_trace_dir)
     network_trace = ['fixed', 'high']
     video_trace_prefix = './dataset/video_trace/' + VIDEO_TRACE + '/frame_trace_'
     network_trace_dir_list = ['./dataset/network_trace/' + trace + '/' for trace in network_trace]
     all_cooked_time, all_cooked_bw, self.all_file_names = load_trace.load_trace_list(network_trace_dir_list)
     self.net_env = fixed_env.Environment(all_cooked_time=all_cooked_time,
                                     all_cooked_bw=all_cooked_bw,
                                     random_seed=random_seed,
                                     logfile_path=LOG_FILE_PATH,
                                     VIDEO_SIZE_FILE=video_trace_prefix,
                                     Debug=DEBUG)  # 视频相关的环境初始化,把load的所有的网络的数据输进去
     return
def testing(tabular_q, epoch):

    os.system('rm -r ' + TEST_LOG_FOLDER)
    os.system('mkdir ' + TEST_LOG_FOLDER)

    all_cooked_time, all_cooked_bw, all_file_names = \
        load_trace.load_trace('./cooked_test_traces/')
    test_net_env = fixed_env.Environment(
        all_cooked_time=all_cooked_time,
        all_cooked_bw=all_cooked_bw)

    log_path = TEST_LOG_FOLDER + 'log_' + all_file_names[test_net_env.trace_idx]
    log_file = open(log_path, 'wb')

    time_stamp = 0
    video_count = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    state = [0, 0, 0, 0]

    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, next_video_chunk_sizes, \
        end_of_video, video_chunk_remain = \
            test_net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        # reward is video quality - rebuffer penalty - smoothness
        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                 - REBUF_PENALTY * rebuf \
                 - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                           VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        last_bit_rate = bit_rate

        log_file.write(str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n')
        log_file.flush()

        bw = float(video_chunk_size) / float(delay) / M_IN_K * BITS_IN_BYTE # Mbit/sec
        bw = min(int(bw / D_BW) * D_BW, BW_MAX)
        bf = min(int(buffer_size / D_BF) * D_BF, BF_MAX)
        br = bit_rate
        c = min(video_chunk_remain, N_CHUNK - 1)
        state = [bw, bf, br, c]

        bit_rate = tabular_q.get_q_action(state, deterministic=True)

        if end_of_video:
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here

            state = [0, 0, 0, 0]

            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = TEST_LOG_FOLDER + 'log_' + all_file_names[test_net_env.trace_idx]
            log_file = open(log_path, 'wb')

    with open(TEST_LOG_PATH, 'ab') as log_file:
         # append test performance to the log
        rewards = []
        test_log_files = os.listdir(TEST_LOG_FOLDER)
        for test_log_file in test_log_files:
            reward = []
            with open(TEST_LOG_FOLDER + test_log_file, 'rb') as f:
                for line in f:
                    parse = line.split()
                    try:
                        reward.append(float(parse[-1]))
                    except IndexError:
                        break
            rewards.append(np.sum(reward[1:]))

        rewards = np.array(rewards)

        rewards_min = np.min(rewards)
        rewards_5per = np.percentile(rewards, 5)
        rewards_mean = np.mean(rewards)
        rewards_median = np.percentile(rewards, 50)
        rewards_95per = np.percentile(rewards, 95)
        rewards_max = np.max(rewards)

        log_file.write(str(epoch) + '\t' +
                       str(rewards_min) + '\t' +
                       str(rewards_5per) + '\t' +
                       str(rewards_mean) + '\t' +
                       str(rewards_median) + '\t' +
                       str(rewards_95per) + '\t' +
                       str(rewards_max) + '\n')
        log_file.flush()
Beispiel #3
0
def main():
    # utility_offset = -math.log(VIDEO_BIT_RATE[0])  # so utilities[0] = 0
    # utilities = [math.log(b) + utility_offset for b in VIDEO_BIT_RATE]

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, _ = load_trace.load_trace()
    load_trace.plot_bandwidth(all_cooked_time, all_cooked_bw, _)
    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    with tf.Session() as sess, open(LOG_FILE, 'w') as log_file:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        summary_ops, summary_vars = a3c.build_summaries()

        sess.run(tf.global_variables_initializer())
        writer = tf.summary.FileWriter(SUMMARY_DIR,
                                       sess.graph)  # training monitor
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            saver.restore(sess, nn_model)
            print("Model restored.")

        epoch = 0
        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        actor_gradient_batch = []
        critic_gradient_batch = []

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_counter,throughput,video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)
            #print(net_env.get_video_chunk(bit_rate))
            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smooth penalty
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
            r_batch.append(reward)

            last_bit_rate = bit_rate

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)
            # print(state)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            # print('state',state)
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            rand = np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)
            print(action_cumsum, action_cumsum > rand,
                  (action_cumsum > rand).argmax())
            # print(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE))
            # print(action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()

            #compute Vp and map bitrate
            # bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()

            Vp_index = (action_cumsum > np.random.randint(1, RAND_RANGE) /
                        float(RAND_RANGE)).argmax()
            Vp = BUFFER_PARAMETER[Vp_index]
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            config = {
                'buffer_size': env.BUFFER_THRESH,
                'gp': GP,
                'Vp': Vp,
                'abr_osc': False,
                'abr_basic': False,
                'no_ibr': False
            }
            bola = get_bitrate.Bola(config=config)
            bit_rate = bola.get_quality(
                Vp, buffer_size * env.MILLISECONDS_IN_SECOND, last_bit_rate,
                throughput)

            #决策前的信息
            print(
                '[%d]:download time %.2fms,thrput=%.2f,chunk size %d,buffer=%.2fs,bitrate=%d'
                % (video_chunk_counter, throughput, delay, video_chunk_size,
                   buffer_size, last_bit_rate))

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                str(buffer_size) + '\t' + str(rebuf) + '\t' +
                str(video_chunk_size) + '\t' + str(delay) + '\t' +
                str(reward) + '\n')
            log_file.flush()

            if len(r_batch
                   ) >= TRAIN_SEQ_LEN or end_of_video:  # do training once

                actor_gradient, critic_gradient, td_batch = \
                    a3c.compute_gradients(s_batch=np.stack(s_batch[1:], axis=0),  # ignore the first chuck
                                          a_batch=np.vstack(a_batch[1:]),  # since we don't have the
                                          r_batch=np.vstack(r_batch[1:]),  # control over it
                                          terminal=end_of_video, actor=actor, critic=critic)
                td_loss = np.mean(td_batch)

                actor_gradient_batch.append(actor_gradient)
                critic_gradient_batch.append(critic_gradient)

                print("====")
                print("Epoch", epoch)
                print("TD_loss", td_loss, "Avg_reward", np.mean(r_batch),
                      "Avg_entropy", np.mean(entropy_record))
                print("====")

                summary_str = sess.run(summary_ops,
                                       feed_dict={
                                           summary_vars[0]: td_loss,
                                           summary_vars[1]: np.mean(r_batch),
                                           summary_vars[2]:
                                           np.mean(entropy_record)
                                       })

                writer.add_summary(summary_str, epoch)
                writer.flush()

                entropy_record = []

                if len(actor_gradient_batch) >= GRADIENT_BATCH_SIZE:

                    assert len(actor_gradient_batch) == len(
                        critic_gradient_batch)
                    # assembled_actor_gradient = actor_gradient_batch[0]
                    # assembled_critic_gradient = critic_gradient_batch[0]
                    # assert len(actor_gradient_batch) == len(critic_gradient_batch)
                    # for i in xrange(len(actor_gradient_batch) - 1):
                    #     for j in xrange(len(actor_gradient)):
                    #         assembled_actor_gradient[j] += actor_gradient_batch[i][j]
                    #         assembled_critic_gradient[j] += critic_gradient_batch[i][j]
                    # actor.apply_gradients(assembled_actor_gradient)
                    # critic.apply_gradients(assembled_critic_gradient)

                    for i in range(len(actor_gradient_batch)):
                        actor.apply_gradients(actor_gradient_batch[i])
                        critic.apply_gradients(critic_gradient_batch[i])

                    actor_gradient_batch = []
                    critic_gradient_batch = []

                    epoch += 1
                    if epoch % MODEL_SAVE_INTERVAL == 0:
                        # Save the neural net parameters to disk.
                        save_path = saver.save(
                            sess, SUMMARY_DIR + "/nn_model_ep_" + str(epoch) +
                            ".ckpt")
                        print("Model saved in file: %s" % save_path)

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

            if end_of_video:
                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

            else:
                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                # print(bit_rate)
                action_vec[bit_rate] = 1
                a_batch.append(action_vec)
Beispiel #4
0
def test(user_id,ABR_NAME_,QoE_,NETWORK_TRACE_,VIDEO_TRACE_):
    #1  Algorithm Setting:  RBA, BBA, DYNAMIC, PDDQN, Pensieve
    ABR_NAME = ABR_NAME_
    #2  QoE Setting:  ar, al, hd, b, max
    QoE = QoE_
    #3  Network Dataset: high,  medium, low, fixed
    NETWORK_TRACE = NETWORK_TRACE_
    #4  Video Dataset: AsianCup_China_Uzbekistan, Fengtimo_2018_11_3, YYF_2018_08_12
    VIDEO_TRACE = VIDEO_TRACE_

    model_name = ""

    if ABR_NAME == 'BBA':
        import BBA as ABR
    if ABR_NAME == 'RBA':
        import RBA as ABR
    if ABR_NAME == 'DYNAMIC':
        import DYNAMIC as ABR
    if ABR_NAME == 'PDDQN':
        model_name = "./PDDQN_models/PDDQN_b/"
        import PDDQN_ as ABR
    if ABR_NAME == 'PDDQN-R':
        model_name = "./PDDQN_models/"+QoE+'/'
        import PDDQN_R as ABR
    if ABR_NAME == 'Pensieve':
        model_name = "./Pensieve_models/"+QoE+'/'
        import Pensieve as ABR

    SMOOTH_PENALTY = 0
    REBUF_PENALTY = 0.0
    LANTENCY_PENALTY = 0.0
    SKIP_PENALTY = 0.0
    BITRATE_REWARD = 0.0

    if QoE == 'al':
        SMOOTH_PENALTY = 0.01
        REBUF_PENALTY = 1.5
        LANTENCY_PENALTY = 0.01
        BITRATE_REWARD = 0.001
        SKIP_PENALTY = 1
    if QoE == 'ar':
        SMOOTH_PENALTY = 0.0
        REBUF_PENALTY = 3
        LANTENCY_PENALTY = 0.0
        BITRATE_REWARD = 0.001
        SKIP_PENALTY = 0.0
    if QoE == 'b':
        SMOOTH_PENALTY = 0.02
        REBUF_PENALTY = 1.5
        LANTENCY_PENALTY = 0.005
        BITRATE_REWARD = 0.001
        SKIP_PENALTY = 0.5
    if QoE == 'hd':
        SMOOTH_PENALTY = 0.0
        REBUF_PENALTY = 0.5
        LANTENCY_PENALTY = 0.0
        BITRATE_REWARD = 0.001
        SKIP_PENALTY = 0.0

    if QoE == 'max':
        SMOOTH_PENALTY = 0
        REBUF_PENALTY = 0.0
        LANTENCY_PENALTY = 0.0
        SKIP_PENALTY = 0.0
        BITRATE_REWARD = 0.001
        FILE_NAME = './'+'result/'+QoE+'_'+NETWORK_TRACE+'_'+VIDEO_TRACE+'.csv'
    else:
        FILE_NAME = './'+'result/'+ABR_NAME+'_'+QoE+'_'+NETWORK_TRACE+'_'+VIDEO_TRACE+'.csv'

    FILE_NAME = './' + 'result/Startup/' + NETWORK_TRACE +'/'+ABR_NAME+ '/QoE.csv'
    out = open(FILE_NAME, 'w', newline='')
    w = csv.writer(out)

    DEBUG = False

    LOG_FILE_PATH = './log/'

    # create result directory
    if not os.path.exists(LOG_FILE_PATH):
        os.makedirs(LOG_FILE_PATH)

    # -- End Configuration --

    network_trace_dir = './dataset/new_network_trace/' + NETWORK_TRACE + '/'
    video_trace_prefix = './dataset/video_trace/' + VIDEO_TRACE + '/frame_trace_'

    # load the trace
    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(network_trace_dir)
    start_avgbw = (sum(all_cooked_bw[0][0:10])/10) *1000

    # random_seed
    random_seed = 2
    count = 0
    trace_count = 1
    FPS = 25
    frame_time_len = 0.04
    reward_all_sum = 0
    run_time = 0

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                                    all_cooked_bw=all_cooked_bw,
                                    random_seed=random_seed,
                                    logfile_path=LOG_FILE_PATH,
                                    VIDEO_SIZE_FILE=video_trace_prefix,
                                    Debug=DEBUG)

    abr = ABR.Algorithm()
    abr_init = abr.Initial(model_name)

    BIT_RATE = [500.0, 850.0, 1200.0, 1850.0]  # kpbs
    TARGET_BUFFER = [0.5,0.75,1,1.25]  # seconds
    # ABR setting
    RESEVOIR = 0.5
    CUSHION = 2

    cnt = 0
    # defalut setting
    last_bit_rate = 0
    bit_rate = 0
    target_buffer = 0
    latency_limit = 4

    # reward setting
    reward_frame = 0
    reward_all = 0

    # past_info setting
    past_frame_num = 200
    S_time_interval = [0] * past_frame_num
    S_send_data_size = [0] * past_frame_num
    S_chunk_len = [0] * past_frame_num
    S_rebuf = [0] * past_frame_num
    S_buffer_size = [0] * past_frame_num
    S_end_delay = [0] * past_frame_num
    S_chunk_size = [0] * past_frame_num
    S_play_time_len = [0] * past_frame_num
    S_decision_flag = [0] * past_frame_num
    S_buffer_flag = [0] * past_frame_num
    S_cdn_flag = [0] * past_frame_num
    S_skip_time = [0] * past_frame_num
    # params setting
    call_time_sum = 0
    reward_chunk = 0
    while True:

        reward_frame = 0

        time, time_interval, send_data_size, chunk_len, \
        rebuf, buffer_size, play_time_len, end_delay, \
        cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \
        buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer, latency_limit)
        # S_info is sequential order
        S_time_interval.pop(0)
        S_send_data_size.pop(0)
        S_chunk_len.pop(0)
        S_buffer_size.pop(0)
        S_rebuf.pop(0)
        S_end_delay.pop(0)
        S_play_time_len.pop(0)
        S_decision_flag.pop(0)
        S_buffer_flag.pop(0)
        S_cdn_flag.pop(0)
        S_skip_time.pop(0)

        S_time_interval.append(time_interval)
        S_send_data_size.append(send_data_size)
        S_chunk_len.append(chunk_len)
        S_buffer_size.append(buffer_size)
        S_rebuf.append(rebuf)
        S_end_delay.append(end_delay)
        S_play_time_len.append(play_time_len)
        S_decision_flag.append(decision_flag)
        S_buffer_flag.append(buffer_flag)
        S_cdn_flag.append(cdn_flag)
        S_skip_time.append(skip_frame_time_len)

        # QOE setting
        # if end_delay <= 1.0:
        #     LANTENCY_PENALTY = 0.005
        # else:
        #     LANTENCY_PENALTY = 0.01

        if not cdn_flag:
            reward_frame = frame_time_len * float(BIT_RATE[
                                                      bit_rate]) * BITRATE_REWARD - REBUF_PENALTY * rebuf - LANTENCY_PENALTY * end_delay - SKIP_PENALTY * skip_frame_time_len
        else:
            reward_frame = -(REBUF_PENALTY * rebuf)

        if decision_flag or end_of_video:
            reward_frame += -1 * SMOOTH_PENALTY * (abs(BIT_RATE[bit_rate] - BIT_RATE[last_bit_rate]) / 1000)
            reward_chunk += reward_frame
            w.writerow([ABR_NAME,reward_chunk])
            reward_chunk = 0
            last_bit_rate = bit_rate

            # ----------------- Your Algorithm ---------------------

            cnt += 1
            timestamp_start = tm.time()
            bit_rate, target_buffer, latency_limit = abr.run(time,
                                                             S_time_interval,
                                                             S_send_data_size,
                                                             S_chunk_len,
                                                             S_rebuf,
                                                             S_buffer_size,
                                                             S_play_time_len,
                                                             S_end_delay,
                                                             S_decision_flag,
                                                             S_buffer_flag,
                                                             S_cdn_flag,
                                                             S_skip_time,
                                                             end_of_video,
                                                             cdn_newest_id,
                                                             download_id,
                                                             cdn_has_frame,
                                                             abr_init,
                                                             start_avgbw)
            start_avgbw = -1
            timestamp_end = tm.time()
            call_time_sum += timestamp_end - timestamp_start
            # -------------------- End --------------------------------
        else:
            reward_chunk += reward_frame
        if end_of_video:
            break




            # print("network traceID, network_reward, avg_running_time", trace_count, reward_all, call_time_sum / cnt)

            reward_all = reward_all/cnt
            reward_all_sum += reward_all
            run_time += call_time_sum / cnt
            if trace_count >= len(all_file_names):
                break
            trace_count += 1
            cnt = 0

            call_time_sum = 0
            last_bit_rate = 0
            reward_all = 0
            bit_rate = 0
            target_buffer = 0

            S_time_interval = [0] * past_frame_num
            S_send_data_size = [0] * past_frame_num
            S_chunk_len = [0] * past_frame_num
            S_rebuf = [0] * past_frame_num
            S_buffer_size = [0] * past_frame_num
            S_end_delay = [0] * past_frame_num
            S_chunk_size = [0] * past_frame_num
            S_play_time_len = [0] * past_frame_num
            S_decision_flag = [0] * past_frame_num
            S_buffer_flag = [0] * past_frame_num
            S_cdn_flag = [0] * past_frame_num

        reward_all += reward_frame

    return [reward_all_sum / trace_count, run_time / trace_count]
Beispiel #5
0
        saver1.restore(sess, nn_model)
        print("Model restored.")

    chunk_reward = 0
    for i_eps in range(50):
        video_count = 0
        is_first = True

        video_id = i_eps % 5
        VIDEO_TRACE = VIDEO_TRACE_list[video_id]
        video_trace_prefix = './dataset/video_trace/' + VIDEO_TRACE + '/frame_trace_'
        all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
            network_trace_dir)
        net_env = env.Environment(all_cooked_time=all_cooked_time,
                                  all_cooked_bw=all_cooked_bw,
                                  random_seed=random_seed,
                                  logfile_path=LOG_FILE_PATH,
                                  VIDEO_SIZE_FILE=video_trace_prefix,
                                  Debug=DEBUG)
        pre_ac = 0
        while True:
            timestamp_start = tm.time()
            reward_frame = 0

            time, time_interval, send_data_size, frame_time_len, \
            rebuf, buffer_size, play_time_len, end_delay, \
            cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \
            buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer, latency_limit)

            # QOE setting
            if end_delay <= 1.0:
                LANTENCY_PENALTY = 0.005
def main():
    torch.set_num_threads(1)

    np.random.seed(RANDOM_SEED)
    torch.manual_seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        TEST_TRACES)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'w')

    # all models have same actor network
    # so model_type can be anything
    net = ActorNetwork([S_INFO, S_LEN], A_DIM)

    # restore neural net parameters
    net.load_state_dict(torch.load(ACTOR_MODEL))
    print("Testing model restored.")

    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    video_count = 0
    state = torch.zeros((S_INFO, S_LEN))

    weights = np.array([0.2, 0.3, 0.5])

    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, next_video_chunk_sizes, \
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        w1 = weights[0]
        w2 = weights[1]
        w3 = weights[2]

        reward = w1 * VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                 - w2 * REBUF_PENALTY * rebuf \
                 - w3 * SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                                VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        last_bit_rate = bit_rate

        # log time_stamp, bit_rate, buffer_size, reward
        log_file.write(
            str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) +
            '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' +
            str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) +
            '\n')
        log_file.flush()

        # retrieve previous state
        state = torch.roll(state, -1, dims=-1)

        # this should be S_INFO number of terms
        state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
            np.max(VIDEO_BIT_RATE))  # last quality
        state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
        state[2, -1] = float(video_chunk_size) / float(
            delay) / M_IN_K  # kilo byte / ms
        state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
        state[4, :A_DIM] = torch.tensor(
            next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
        state[5, -1] = min(
            video_chunk_remain,
            CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

        with torch.no_grad():
            probability = net.forward(state.unsqueeze(0))
            m = Categorical(probability)
            bit_rate = m.sample().item()
        # Note: we need to discretize the probability into 1/RAND_RANGE steps,
        # because there is an intrinsic discrepancy in passing single state and batch states

        if end_of_video:
            weights = np.random.randn(3)  # Normalization
            weights = np.abs(weights) / np.linalg.norm(weights, ord=1)
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here

            state = torch.zeros((S_INFO, S_LEN))

            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'w')
def TestRun(sess, actor, critic, epoch):

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM


    net_env = env.Environment()


    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    action_vec = np.zeros(A_DIM)
    action_vec[bit_rate] = 1

    s_batch = [np.zeros((S_INFO, S_LEN))]
    a_batch = [action_vec]
    r_batch = []
    entropy_record = []

    reward_sum_all = []
    reward_video_all = []
    reward_sum_per_video = []

    reward_mean_cur = 0



    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        assert bit_rate >= 0
        assert bit_rate < A_DIM

        bitrate_send_last, lossrate_recv_last, bitrate_real_recovery, \
        bitrate_send_last_probe, lossrate_recv_last_probe, bitrate_real_recovery_probe, \
        end_of_video, end_of_validation \
            = net_env.action_dispatch_and_report_svr(VIDEO_BIT_RATE[bit_rate])

        time_stamp += 2  # in ms

        # reward is video quality - rebuffer penalty - smoothness
        reward = bitrate_real_recovery / M_IN_K  # 0.1 0.2 ... 1.1 1.2

        r_batch.append(reward)

        last_bit_rate = bit_rate
        reward_sum_per_video.append(reward)


        # retrieve previous state
        if len(s_batch) == 0:
            state = [np.zeros((S_INFO, S_LEN))]
        else:
            state = np.array(s_batch[-1], copy=True)

        # dequeue history record

        state = np.roll(state, -1, axis=1)
        # this should be S_INFO number of terms
        state[0, -1] = bitrate_send_last / 1000.0  # last quality
        state[1, -1] = lossrate_recv_last  # 丢包率0.1 0.2 0.3 0.4
        state[2, -1] = bitrate_real_recovery / 1000.0  # kilo byte / ms

        state = np.roll(state, -1, axis=1)
        state[0, -1] = bitrate_send_last_probe / 1000.0  # last quality
        state[1, -1] = lossrate_recv_last_probe  # 丢包率0.1 0.2 0.3 0.4
        state[2, -1] = bitrate_real_recovery_probe / 1000.0  # kilo byte / ms

        state[3, :A_DIM] = np.array(VIDEO_BIT_RATE[:]) / 1000.0  # kilo byte / ms
        state[4, -1] = bitrate_send_last / 1000.0  # kilo byte / ms


        action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
        # log_file.write('action_prob: '+ str(action_prob)+'\n')
        action_cumsum = np.cumsum(action_prob)
        # log_file.write('action_cumsum: ' + str(action_cumsum)+'\n')
        random_value = np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)
        decision_arrary = (action_cumsum > random_value)
        bit_rate = decision_arrary.argmax()
        # log_file.write('decision: ' + str(bit_rate) + ' random value: ' + str(random_value) + ' decision_arrary: ' + str(decision_arrary)+'\n')
        # Note: we need to discretize the probability into 1/RAND_RANGE steps,
        # because there is an intrinsic discrepancy in passing single state and batch states

        s_batch.append(state)

        entropy_record.append(a3c.compute_entropy(action_prob[0]))

        if end_of_video:

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here
            reward_sum_all.append(reward_sum_per_video[1:])
            video_reward_sum = np.sum(reward_sum_per_video[1:])
            reward_video_all.append(video_reward_sum)
            meanvalue = np.mean(reward_sum_per_video)
            stdvalue = np.mean(reward_sum_per_video)

            del s_batch[:]
            del a_batch[:]
            del r_batch[:]

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1

            s_batch.append(np.zeros((S_INFO, S_LEN)))
            a_batch.append(action_vec)
            entropy_record = []
            reward_sum_per_video = []

            # print "video count", video_count, 'video_reward_sum:%.3f', video_reward_sum, ' meanvalue:', meanvalue, ' stdvalue:',stdvalue
            # print ("video count: %d video_reward_sum:%.3f meanvalue:%.3f stdvalue:%.3f"%(video_count, video_reward_sum, meanvalue, stdvalue))



            if end_of_validation:
                mean_all_video_reward = np.mean(reward_video_all)
                sum_all_video_reward = np.sum(reward_video_all)
                std_all_video_reward = np.std(reward_video_all)
                reward_mean_cur = mean_all_video_reward
                # print ("video total count: %d reward_sum:%.3f reward_mean:%.3f reward_std:%.3f" % (
                #     video_count, sum_all_video_reward, mean_all_video_reward, std_all_video_reward))

                print 'epoch:', epoch, ' reward_mean: ', reward_mean_cur
                break
    return reward_mean_cur
Beispiel #8
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(TEST_TRACES)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    with tf.Session() as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN], action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        if NN_MODEL is not None:  # NN_MODEL is the path to file
            saver.restore(sess, NN_MODEL)
            print("Testing model restored.")

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smoothness
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n')
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))
            action_cumsum = np.cumsum(action_prob)
            bit_rate = (action_cumsum > np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)).argmax()
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            DECISIONS.append(bit_rate)

            s_batch.append(state)

            entropy_record.append(a3c.compute_entropy(action_prob[0]))

            if end_of_video:
                log_file.write('\n')
                log_file.close()

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                video_count += 1

                if video_count >= len(all_file_names):
                    break

                log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'wb')

    print "Decisions: {}".format(Counter(DECISIONS))
Beispiel #9
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace()

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    epoch = 0
    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    r_batch = []

    video_count = 0

    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, next_video_chunk_sizes, \
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        # reward is video quality - rebuffer penalty
        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                 - REBUF_PENALTY * rebuf \
                 - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                           VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
        r_batch.append(reward)

        last_bit_rate = bit_rate

        # log time_stamp, bit_rate, buffer_size, reward
        log_file_write = (str(time_stamp / M_IN_K) + '\t' +
                          str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                          str(buffer_size) + '\t' + str(rebuf) + '\t' +
                          str(video_chunk_size) + '\t' + str(delay) + '\t' +
                          str(reward) + '\n')
        log_file.write(log_file_write.encode('ANSI'))
        log_file.flush()

        if buffer_size < RESEVOIR:
            bit_rate = 0
        elif buffer_size >= RESEVOIR + CUSHION:
            bit_rate = A_DIM - 1
        else:
            bit_rate = (A_DIM - 1) * (buffer_size - RESEVOIR) / float(CUSHION)

        bit_rate = int(bit_rate)

        if end_of_video:
            log_file.write('\n'.encode('ANSI'))
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here
            r_batch = []

            print("video count: " + str(video_count))
            video_count += 1

            if video_count > len(all_file_names):
                break

            log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'wb')
Beispiel #10
0
def main():
    # check the constant defination is valid or not
    assert len(bitRatesOptions) == bitRatesTypes
    
    # load the traces
    allCookedTime, allCookedBW, allFileNames = load_trace.load_trace()

    # set the environment
    netEnvironment = env.Environment(all_cooked_time=allCookedTime,
                                      all_cooked_bw=allCookedBW)

    # open the output log file to write
    outputFileName = outputFilePrefix + "_" + allFileNames[netEnvironment.trace_idx]
    outputFilePointer = open(outputFileName, "wb")

    # initial the local variables
    timeStamp = 0
    lastBitRateOption = defaultBitRateOption
    currentBitRateOption = defaultBitRateOption
    videoCount = 0
    historyState = np.zeros((stateInfoLength, pastFramesLength))

    # initial the look up table
    initialLookUpTable()
    # computing kernel:
    while True:
        # get the video chunk according to the current bitrate option
        assert currentBitRateOption >= 0

        delay, sleepTime, currentBufferSize, rebuffer, currentVideoChunkSize, \
            nextVideoChunkSize, endFlag, chunkRemainCount = netEnvironment.get_video_chunk(currentBitRateOption)
        
        # update the time stamp because of the delay and sleeping time
        timeStamp += delay + sleepTime  # ms

        # calculate the reward value according to the formula
        qualityValue = bitRatesOptions[currentBitRateOption] / bitsFactor # kb to Mb
        smoothValue = np.abs(bitRatesOptions[currentBitRateOption] \
                    - bitRatesOptions[lastBitRateOption]) / bitsFactor
        rewardValue =  qualityValue \
                    - rebufferFactor * rebuffer \
                    - smoothFactor * smoothValue
        
        # write the output file
        outputItemStr = str(timeStamp / millsecondsPerSecond) + '\t' \
                    + str(bitRatesOptions[currentBitRateOption]) + '\t' \
                    + str(currentBufferSize) + '\t' \
                    + str(rebuffer) + '\t' \
                    + str(currentVideoChunkSize) + '\t' \
                    + str(delay) + '\t' \
                    + str(rewardValue) + '\n'
        outputFilePointer.write(outputItemStr.encode('utf-8'))
        outputFilePointer.flush()

        # update the bit rate option
        lastBitRateOption = currentBitRateOption

        # update the history state information like a sliding window
        historyState = np.roll(historyState, -1, axis=1)
        historyState[0, -1] = bitRatesOptions[currentBitRateOption] / float(maxBitRate)
        historyState[1, -1] = currentBufferSize / bufferNormFactor
        historyState[2, -1] = rebuffer
        historyState[3, -1] = float(currentVideoChunkSize) / float(delay) / bitsFactor
        historyState[4, -1] = np.minimum(chunkRemainCount, defaultChunkCountToEnd) / float(defaultChunkCountToEnd)

        # MPC kernel begin
        # calculate the normaliztion estimated error of bandwidth
        currentError = 0.
        if(len(pastBWEsts) > 0):
            currentError = abs(pastBWEsts[-1] - historyState[3, -1]) / float(historyState[3, -1])
        pastErrors.append(currentError)
        
        # calculate the harmonic mean of last 5 history bandwidths
        # Step 1: collect the last 5 history bandwidths
        pastRealBWArray = historyState[3, -5:]
        while pastRealBWArray[0] == 0.0:
            pastRealBWArray = pastRealBWArray[1:]
        
        # Step 2: calculate the harmonic mean
        pastRealBWSum = 0.0
        for pastRealBWItems in pastRealBWArray:
            pastRealBWSum += (1 / float(pastRealBWItems))
        harmonicBW = 1.0 / (pastRealBWSum / len(pastRealBWArray))

        # calculate the predicted future bandwidth according to the est. error and harmonic mean
        errorIndex = min(5, len(pastErrors))
        maxError = float(max(pastErrors[-errorIndex:]))
        currentPredBW = harmonicBW / (1 + maxError)
        pastBWEsts.append(currentPredBW) 


        # use the predicted bandwidth and the next chunk size to calculate the estimated download time
        allDownloadTime = []
        for option in range(0, bitRatesTypes):
            allDownloadTime.append((float(nextVideoChunkSize[option]) / (bitsFactor * bitsFactor))/ currentPredBW)

        finalOption = Decision(currentBufferSize, allDownloadTime[0], currentBitRateOption)
        currentBitRateOption = finalOption

        assert finalOption >= 0
        if endFlag:
            outputFilePointer.write("\n".encode('utf-8'))
            outputFilePointer.close()

            lastBitRateOption = defaultBitRateOption
            currentBitRateOption = defaultBitRateOption
            historyState = np.zeros((stateInfoLength, pastFramesLength))

            print("video count", videoCount)
            videoCount += 1

            if videoCount >= len(allFileNames):
                break

            outputFileName = outputFilePrefix + "_" + allFileNames[netEnvironment.trace_idx]
            outputFilePointer = open(outputFileName, "wb")
Beispiel #11
0
def main():
    os.system('rm -r ' + TEST_LOG_FOLDER)
    os.system('mkdir ' + TEST_LOG_FOLDER)
    
    np.random.seed(RANDOM_SEED)
    all_user_pos, all_file_names = load_trace.load_trace(TEST_TRACES)
    net_env = fixed_env.Environment(all_user_pos=all_user_pos)
    log_path = TEST_LOG_FOLDER + 'log_sim_rl_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')
    
    with tf.Session() as sess:

        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN], action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)

        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        if NN_MODEL is not None:  # NN_MODEL is the path to file
            saver.restore(sess, NN_MODEL)
            print("Testing model restored.")
        
        # initializing
        association = one_hot().T 
        num_shared = 50
        trace_count = 0
        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            channel_gain, num_user_bs, rate, end_of_trace = \
                net_env.scheduling_and_association(association, num_shared)

            reward = np.mean(np.log(rate)) 



            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(str(reward) + '\n')
            log_file.flush()

            state_p1 = (channel_gain-np.mean(channel_gain.reshape((-1))))/(np.std(channel_gain.reshape((-1)))+1e-6)
            state_p2 = ((num_user_bs-np.mean(num_user_bs))/(np.std(num_user_bs)+1e-6)).reshape((7,1))
            #state = np.concatenate([state_p1,state_p2],axis = 1)     # state shape (7, 91)
            state = state_p1


            # compute action probability vector
            action_prob = actor.predict(np.reshape(state, (1, S_INFO, S_LEN)))  
            action = epsilon_greedy(action_prob, 0)          # set epsilon to zero when testing

            association, num_shared = rl_scheduling(channel_gain, action)

            if end_of_trace:
                print all_file_names[net_env.trace_idx-1],net_env.scheduling_ptr,'number of shared subchannels:', num_shared, 'SINR threshold:', BETA_SET[np.argmax(action[K_DIM:A_DIM])]
                #plot_cellular_network(net_env.macrocell, net_env.picocells, net_env.current_user_pos, association)
                log_file.write('\n')
                log_file.close()
                association = one_hot().T 
                num_shared = 50
                
                trace_count += 1
                if trace_count >= len(all_file_names):
                    break

                log_path = TEST_LOG_FOLDER + 'log_sim_rl_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'wb')

        # append test performance to the log
    with open(LOG_FILE + '_rl_test', 'ab') as log_file:        
        rewards = []
        test_log_files = os.listdir(TEST_LOG_FOLDER)
        for test_log_file in test_log_files:
            reward = []
            with open(TEST_LOG_FOLDER + test_log_file, 'rb') as f:
                for line in f:
                    parse = line.split()
                    try:
                        reward.append(float(parse[0]))
                    except IndexError:
                        break
            rewards.append(np.sum(reward[1:]))

        rewards = np.array(rewards)
        rewards_min = np.min(rewards)
        rewards_5per = np.percentile(rewards, 5)
        rewards_mean = np.mean(rewards)
        rewards_median = np.percentile(rewards, 50)
        rewards_95per = np.percentile(rewards, 95)
        rewards_max = np.max(rewards)

        log_file.write(str(rewards_min) + '\t' +
                       str(rewards_5per) + '\t' +
                       str(rewards_mean) + '\t' +
                       str(rewards_median) + '\t' +
                       str(rewards_95per) + '\t' +
                       str(rewards_max) + '\n')
        log_file.flush()
        
        print 'testing results' + '\t average rewards: ' + str(rewards_mean)
Beispiel #12
0
def main():
    run_id = 0
    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)
    if not os.path.exists(TRANS_DIR):
        os.makedirs(TRANS_DIR)

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(cooked_trace_folder=TRACE_DIR)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id)
    log_file = open(log_path, 'wb')
    trans_path = TRANS_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id)
    trans_file = open(trans_path, 'wb')

    model = abr_agent_sim.discrete_BCQ()

    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    action_vec = np.zeros(A_DIM)
    action_vec[bit_rate] = 1

    s_batch = [np.zeros((S_INFO, S_LEN))]
    a_batch = [action_vec]
    r_batch = []

    video_count = 0

    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, next_video_chunk_sizes, \
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        # reward is video quality - rebuffer penalty - smoothness
        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                 - REBUF_PENALTY * rebuf \
                 - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                           VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        r_batch.append(reward)

        last_bit_rate = bit_rate

        # log time_stamp, bit_rate, buffer_size, reward
        log_file.write(str(time_stamp / M_IN_K) + '\t' +
                       str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                       str(buffer_size) + '\t' +
                       str(rebuf) + '\t' +
                       str(video_chunk_size) + '\t' +
                       str(delay) + '\t' +
                       str(reward) + '\n')
        log_file.flush()

        # retrieve previous state
        if len(s_batch) == 0:
            state = [np.zeros((S_INFO, S_LEN))]
            old_state = np.zeros((S_INFO, S_LEN), dtype=np.float64)
        else:
            state = np.array(s_batch[-1], copy=True)
            old_state = np.array(s_batch[-1], copy=True)

        # dequeue history record
        state = np.roll(state, -1, axis=1)

        # this should be S_INFO number of terms
        state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
        state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
        state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
        state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
        state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
        state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

        bit_rate = model.select_action(np.reshape(state, (-1)))
        action_prob = np.zeros((len(VIDEO_BIT_RATE)), dtype=np.float64)
        action_prob[int(bit_rate)] = 1.0

        send_data = str(bit_rate)
        trans_file.write('|'.join([str(list(old_state.reshape(-1))),
                                      str(list(action_prob.reshape(-1))),
                                      str(list(state.reshape(-1))),
                                      str(reward), str(send_data)]))
        trans_file.write('\n')
        trans_file.flush()

        s_batch.append(state)

        if end_of_video:
            log_file.write('\n')
            log_file.close()

            trans_file.write('\n')
            trans_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here

            del s_batch[:]
            del a_batch[:]
            del r_batch[:]

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1

            s_batch.append(np.zeros((S_INFO, S_LEN)))
            a_batch.append(action_vec)

            print "video count", video_count
            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id)
            log_file = open(log_path, 'wb')
            trans_path = TRANS_FILE + '_' + all_file_names[net_env.trace_idx] + '_' + str(run_id)
            trans_file = open(trans_path, 'wb')
Beispiel #13
0
def train(epoch, train_trace):
    # path setting
    TRAIN_TRACES = train_trace
    video_size_file = './dataset/video_trace/sports/frame_trace_'  # video trace path setting,
    LogFile_Path = "./log/"  # log file trace path setting,
    # load the trace
    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        TRAIN_TRACES)
    # random_seed
    random_seed = 2
    video_count = 0
    frame_time_len = 0.04
    reward_all_sum = 0
    # init the environment
    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=random_seed,
                              logfile_path=LogFile_Path,
                              VIDEO_SIZE_FILE=video_size_file,
                              Debug=False)
    BIT_RATE = [500.0, 850.0, 1200.0, 1850.0]  # kpbs
    # ABR setting
    cnt = 0
    # defalut setting
    bit_rate = 0
    last_bit_rate = 0
    target_buffer = 1
    latency_limit = 7
    # QOE setting
    reward_frame = 0
    reward_all = 0
    reward = 0
    SMOOTH_PENALTY = 0.01
    REBUF_PENALTY = 1.5
    LANTENCY_PENALTY = 0.01
    BITRATE_REWARD = 0.001
    SKIP_PENALTY = 1
    switch_num = 0
    rebuf_time = 0
    buffer_flag = 0
    cdn_flag = 0
    S_time_interval = [0] * 100
    S_send_data_size = [0] * 100
    S_buffer_size = [0] * 100
    S_end_delay = [0] * 100
    S_rebuf = [0] * 100

    flag = False
    n = 0
    mark = 0
    marks = 0
    while True:

        if len(agent.memory) > BATCH_SIZE and cnt % 1000 == 0:
            agent.replay(BATCH_SIZE)

        reward_frame = 0
        time, time_interval, send_data_size, chunk_len, \
        rebuf, buffer_size, play_time_len, end_delay, \
        cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \
        buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer, latency_limit)

        cnt += 1

        S_time_interval.append(time_interval)
        S_time_interval.pop(0)
        S_buffer_size.append(buffer_size)
        S_buffer_size.pop(0)
        S_send_data_size.append(send_data_size)
        S_send_data_size.pop(0)
        S_end_delay.append(end_delay)
        S_end_delay.pop(0)
        S_rebuf.append(rebuf)
        S_rebuf.pop(0)

        # # QOE setting
        # if end_delay <= 1.0:
        #     LANTENCY_PENALTY = 0.005
        # else:
        #     LANTENCY_PENALTY = 0.01

        if not cdn_flag:
            reward_frame = frame_time_len * float(
                BIT_RATE[bit_rate]
            ) * BITRATE_REWARD - REBUF_PENALTY * rebuf - LANTENCY_PENALTY * end_delay - SKIP_PENALTY * skip_frame_time_len
        else:
            reward_frame = -(REBUF_PENALTY * rebuf)
        rebuf_time += rebuf
        n += 1
        reward += reward_frame
        if decision_flag and not end_of_video:
            reward_frame = -1 * SMOOTH_PENALTY * (
                abs(BIT_RATE[bit_rate] - BIT_RATE[last_bit_rate]) / 1000)
            last_bit_rate = bit_rate
            reward += reward_frame
            length = len(S_buffer_size)
            if flag:
                next_state = []

                for i in S_buffer_size[length - history_len:]:
                    next_state.append(i * 0.1)
                for i in S_send_data_size[length - history_len:]:
                    next_state.append(i * 0.00001)
                for i in S_time_interval[length - history_len:]:
                    next_state.append(i * 10)
                for i in S_end_delay[length - history_len:]:
                    next_state.append(i * 0.1)
                for i in S_rebuf[length - history_len:]:
                    next_state.append(i)
                marks += 1
                if (n >= history_len - 40):
                    next_state = np.reshape(next_state, [1, STATE_SIZE])
                    agent.remember(state, action, reward, next_state, done)
                    reward = 0
                else:
                    mark += 1
                n = 0

            flag = True
            state = []

            for i in S_buffer_size[length - history_len:]:
                state.append(i * 0.1)
            for i in S_send_data_size[length - history_len:]:
                state.append(i * 0.00001)
            for i in S_time_interval[length - history_len:]:
                state.append(i * 10)
            for i in S_end_delay[length - history_len:]:
                state.append(i * 0.1)
            for i in S_rebuf[length - history_len:]:
                state.append(i)

            state = np.reshape(state, [1, STATE_SIZE])
            action = agent.act(state)
            bit_rate = ACTION_SAPCE[action][0]
            target_buffer = ACTION_SAPCE[action][1]
            latency_limit = ACTION_SAPCE[action][2]
            switch_num = 0
            rebuf_time = 0

        reward_all += reward_frame
        if end_of_video:
            agent.update_target_model()

            # Narrow the range of results
            print("video count", video_count, reward_all, mark, marks)
            reward_all_sum += reward_all / 20
            video_count += 1
            if video_count >= len(all_file_names):
                agent.save("save/" + str(epoch) + ".h5")
                break
            reward_all = 0
            bit_rate = 0
            target_buffer = 1
            S_time_interval = [0] * 100
            S_send_data_size = [0] * 100
            S_buffer_size = [0] * 100
            S_end_delay = [0] * 100
            S_rebuf = [0] * 100
            rebuf_time = 0
            buffer_flag = 0
            cdn_flag = 0
            reward = 0
            flag = False
            n = 0
            mark = 0
            marks = 0

    return reward_all_sum
Beispiel #14
0
def main():
    for num_shared in range(5, 100, 10):
        for beta in range(-6, 14, 2):
            #num_shared = 55
            #beta = 2
            print "num_shared, beta: ", num_shared, beta
            os.system('rm -r ' + TEST_LOG_FOLDER)
            os.system('mkdir ' + TEST_LOG_FOLDER)

            np.random.seed(RANDOM_SEED)
            all_user_pos, all_file_names = load_trace.load_trace(TEST_TRACES)
            net_env = fixed_env.Environment(all_user_pos=all_user_pos)
            log_path = TEST_LOG_FOLDER + 'log_sim_pf_' + all_file_names[
                net_env.trace_idx]
            log_file = open(log_path, 'wb')

            association = one_hot().T
            trace_count = 0

            while True:  # serve video forever
                # the action is from the last decision
                # this is to make the framework similar to the real
                channel_gain, num_user_bs, rate, end_of_trace = \
                    net_env.scheduling_and_association(association, num_shared)

                reward = np.mean(np.log(rate))

                # log time_stamp, bit_rate, buffer_size, reward
                log_file.write(str(reward) + '\n')
                log_file.flush()

                association = picocell_first(channel_gain, num_shared, beta)

                if end_of_trace:
                    #plot_cellular_network(net_env.macrocell, net_env.picocells, net_env.current_user_pos, association)
                    log_file.write('\n')
                    log_file.close()
                    association = one_hot().T

                    print "trace_count", trace_count, all_file_names[
                        net_env.trace_idx]
                    trace_count += 1
                    if trace_count >= len(all_file_names):
                        break

                    log_path = TEST_LOG_FOLDER + 'log_sim_pf_' + all_file_names[
                        net_env.trace_idx]
                    log_file = open(log_path, 'wb')

            # append test performance to the log
            with open(LOG_FILE + '_test', 'ab') as log_file:
                rewards = []
                test_log_files = os.listdir(TEST_LOG_FOLDER)
                for test_log_file in test_log_files:
                    reward = []
                    with open(TEST_LOG_FOLDER + test_log_file, 'rb') as f:
                        for line in f:
                            parse = line.split()
                            try:
                                reward.append(float(parse[0]))
                            except IndexError:
                                break
                    rewards.append(np.sum(reward[1:]))

                rewards = np.array(rewards)
                rewards_min = np.min(rewards)
                rewards_5per = np.percentile(rewards, 5)
                rewards_mean = np.mean(rewards)
                rewards_median = np.percentile(rewards, 50)
                rewards_95per = np.percentile(rewards, 95)
                rewards_max = np.max(rewards)

                log_file.write(
                    str(num_shared) + '\t' + str(beta) + '\t' +
                    str(rewards_mean) + '\n')
                '''
                log_file.write(str(num_shared) + '\t' +
                               str(beta) + '\t' +
                               str(rewards_min) + '\t' +
                               str(rewards_5per) + '\t' +
                               str(rewards_mean) + '\t' +
                               str(rewards_median) + '\t' +
                               str(rewards_95per) + '\t' +
                               str(rewards_max) + '\n')
                '''
                log_file.flush()

                print 'testing results' + '\t average rewards: ' + str(
                    rewards_mean)
Beispiel #15
0
    def main(self, args, net_env=None, policy=None):
        np.random.seed(RANDOM_SEED)
        viper_flag = True
        assert len(VIDEO_BIT_RATE) == A_DIM
        log_f = LOG_FILE

        if net_env is None:
            viper_flag = False
            all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
                args.traces)
            net_env = env.Environment(all_cooked_time=all_cooked_time,
                                      all_cooked_bw=all_cooked_bw,
                                      all_file_names=all_file_names)

        # if args.update:
        #     log_f = log_f.replace('dt', 'du')

        if not viper_flag and args.log:
            log_path = LOG_FILE + '_' + net_env.all_file_names[
                net_env.trace_idx] + '_' + args.qoe_metric
            log_file = open(log_path, 'wb')

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        s_batch = [np.zeros((S_INFO, S_LEN))]
        # a_batch = np.zeros((TOTAL_VIDEO_CHUNKS, 3))
        r_batch = []
        rollout = []
        video_count = 0
        reward_sum = 0
        in_compute = []

        # load dt policy
        if policy is None:
            with open(args.dt, 'rb') as f:
                policy = pk.load(f)
        policy = fsm.FSM(policy)

        # ========= @ zili: debug ========
        # with open('decision_tree_ready/robustmpc_norway_500.pk3', 'rb') as f:
        #     baseline = pk.load(f)

        while True:  # serve video forever

            delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \
            video_chunk_remain = net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = get_reward(bit_rate, rebuf, last_bit_rate,
                                args.qoe_metric)
            r_batch.append(reward)
            reward_sum += reward
            last_bit_rate = bit_rate

            if args.log:
                # log time_stamp, bit_rate, buffer_size, reward
                log_file.write(
                    bytes(str(time_stamp / M_IN_K) + '\t' +
                          str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                          str(buffer_size) + '\t' + str(rebuf) + '\t' +
                          str(video_chunk_size) + '\t' + str(delay) + '\t' +
                          str(reward) + '\n',
                          encoding='utf-8'))
                log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
            state[2, -1] = rebuf
            state[3, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[4, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
            # state[5: 10, :] = future_chunk_sizes / M_IN_K / M_IN_K

            serialized_state = serial(state)
            bit_rate = int(policy.predict([serialized_state])[0])
            rollout.append((state, bit_rate, serialized_state))
            s_batch.append(state)

            # ======== @ zili: debug ========
            # if video_chunk_remain > 0:
            #     a_batch[TOTAL_VIDEO_CHUNKS - video_chunk_remain][0] = bit_rate
            #     a_batch[TOTAL_VIDEO_CHUNKS - video_chunk_remain][2] = int(baseline.predict([serialized_state])[0])

            # if args.update:
            #     chunk_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain)
            #     policy.chunk_leaf[chunk_index] = policy.tree.apply(np.array(serialized_state).reshape(1, -1))
            #     if chunk_index < CHUNK_TIL_VIDEO_END_CAP - HORIZON:
            #         in_compute.append(fsm.Trajectory(chunk_index, max(0, bit_rate - 1), buffer_size - CHUNK_LEN,
            #                                          last_bit_rate, state, args))
            #         in_compute.append(fsm.Trajectory(chunk_index, bit_rate, buffer_size - CHUNK_LEN,
            #                                          last_bit_rate, state, args))
            #         in_compute.append(fsm.Trajectory(chunk_index, min(5, bit_rate + 1), buffer_size - CHUNK_LEN,
            #                                          last_bit_rate, state, args))
            #
            #     for traj in in_compute:
            #         this_chunk_size = video_chunk_size
            #         this_delay = delay
            #         while True:
            #             if traj.apply(this_chunk_size, this_delay) == CHUNK_SWITCH:
            #                 new_bitrate = int(policy.predict(np.array(serial(traj.states)).reshape(1, -1))[0])
            #                 traj.next_chunk(new_bitrate)
            #                 this_chunk_size, this_delay = traj.trans_msg
            #             else:
            #                 break
            #
            #         while len(in_compute) > 1 and in_compute[0].end and in_compute[1].end and in_compute[2].end:
            #             r_below = sum([get_reward(in_compute[0].quality[i], in_compute[0].rebuf[i],
            #                                       in_compute[0].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
            #             r_normal = sum([get_reward(in_compute[1].quality[i], in_compute[1].rebuf[i],
            #                                       in_compute[1].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
            #             r_above = sum([get_reward(in_compute[2].quality[i], in_compute[2].rebuf[i],
            #                                       in_compute[2].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
            #             if r_above == max(r_below, r_normal, r_above):
            #                 policy.update(in_compute[0].chunk_index, 1)
            #                 # a_batch[in_compute[0].chunk_index][1] = in_compute[0].chunk_init_bitrate
            #             elif r_normal == max(r_below, r_normal, r_above):
            #                 policy.update(in_compute[0].chunk_index, -1)
            #                 # a_batch[in_compute[1].chunk_index][1] = in_compute[1].chunk_init_bitrate
            #             else:
            #                 policy.update(in_compute[0].chunk_index, 0)
            #                 # a_batch[in_compute[2].chunk_index][1] = in_compute[2].chunk_init_bitrate
            #
            #             in_compute.pop(0)
            #             in_compute.pop(0)
            #             in_compute.pop(0)

            if end_of_video:
                # print(a_batch)
                if args.log:
                    log_file.write(bytes('\n', encoding='utf-8'))
                    log_file.close()
                    print("video count", video_count)

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here
                r_batch = []
                in_compute = []

                if viper_flag:
                    return rollout
                else:
                    video_count += 1
                    if video_count >= len(net_env.all_file_names):
                        break
                    if args.log:
                        log_path = log_f + '_' + net_env.all_file_names[
                            net_env.trace_idx] + '_' + args.qoe_metric
                        log_file = open(log_path, 'wb')

        return reward_sum
Beispiel #16
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace()

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'w')

    # epoch = 0
    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    r_batch = []

    video_count = 0

    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        # reward is video quality - rebuffer penalty
        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
            - REBUF_PENALTY * rebuf \
            - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                      VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
        r_batch.append(reward)

        last_bit_rate = bit_rate

        # log time_stamp, bit_rate, buffer_size, reward
        log_file.write(
            str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) +
            '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' +
            str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) +
            '\n')
        log_file.flush()
        # buffer based
        # if buffer_size < RESEVOIR:
        #     bit_rate = 0
        # elif buffer_size >= RESEVOIR + CUSHION:
        #     bit_rate = A_DIM - 1
        # else:
        #     bit_rate = (A_DIM - 1) * (buffer_size - RESEVOIR) / float(CUSHION)

        # bola
        utils = [
            np.log(s / next_video_chunk_sizes[-1])
            for s in next_video_chunk_sizes
        ]
        V = 5.2  # control parameter
        p = 4  # chunk size in seconds
        gamma = 5.0 / p
        Q = buffer_size  # Q is buffer size
        score = []
        for i in range(A_DIM):
            score.append((V * utils[i] + V * gamma * p - buffer_size) /
                         next_video_chunk_sizes[i])
        bit_rate = np.argmax(score)
        bit_rate = int(bit_rate)

        if end_of_video:
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here
            r_batch = []

            print("video count", video_count)
            video_count += 1

            if video_count > len(all_file_names):
                break
            # stop test
            break
            log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'w')
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace()

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    action_vec = np.zeros(A_DIM)
    action_vec[bit_rate] = 1

    s_batch = [np.zeros((S_INFO, S_LEN))]
    a_batch = [action_vec]
    r_batch = []
    entropy_record = []

    video_count = 0

    # make chunk combination options
    for combo in itertools.product([0, 1, 2, 3, 4, 5], repeat=5):
        CHUNK_COMBO_OPTIONS.append(combo)

    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, \
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        # reward is video quality - rebuffer penalty
        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                 - REBUF_PENALTY * rebuf \
                 - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                           VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        # log scale reward
        # log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[0]))
        # log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0]))

        # reward = log_bit_rate \
        #          - REBUF_PENALTY * rebuf \
        #          - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

        # reward = BITRATE_REWARD[bit_rate] \
        #          - 8 * rebuf - np.abs(BITRATE_REWARD[bit_rate] - BITRATE_REWARD[last_bit_rate])

        r_batch.append(reward)

        last_bit_rate = bit_rate

        # log time_stamp, bit_rate, buffer_size, reward
        log_file.write(
            str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) +
            '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' +
            str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) +
            '\n')
        log_file.flush()

        # retrieve previous state
        if len(s_batch) == 0:
            state = [np.zeros((S_INFO, S_LEN))]
        else:
            state = np.array(s_batch[-1], copy=True)

        # dequeue history record
        state = np.roll(state, -1, axis=1)

        # this should be S_INFO number of terms
        state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
            np.max(VIDEO_BIT_RATE))  # last quality
        state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
        state[2, -1] = rebuf
        state[3, -1] = float(video_chunk_size) / float(
            delay) / M_IN_K  # kilo byte / ms
        state[4, -1] = np.minimum(
            video_chunk_remain,
            CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
        # state[5: 10, :] = future_chunk_sizes / M_IN_K / M_IN_K

        # ================== MPC =========================
        curr_error = 0  # defualt assumes that this is the first request so error is 0 since we have never predicted bandwidth
        if (len(past_bandwidth_ests) > 0):
            curr_error = abs(past_bandwidth_ests[-1] - state[3, -1]) / float(
                state[3, -1])
        past_errors.append(curr_error)

        # pick bitrate according to MPC
        # first get harmonic mean of last 5 bandwidths
        past_bandwidths = state[3, -5:]
        while past_bandwidths[0] == 0.0:
            past_bandwidths = past_bandwidths[1:]
        #if ( len(state) < 5 ):
        #    past_bandwidths = state[3,-len(state):]
        #else:
        #    past_bandwidths = state[3,-5:]
        bandwidth_sum = 0
        for past_val in past_bandwidths:
            bandwidth_sum += (1 / float(past_val))
        harmonic_bandwidth = 1.0 / (bandwidth_sum / len(past_bandwidths))

        # future bandwidth prediction
        # divide by 1 + max of last 5 (or up to 5) errors
        max_error = 0
        error_pos = -5
        if (len(past_errors) < 5):
            error_pos = -len(past_errors)
        max_error = float(max(past_errors[error_pos:]))
        future_bandwidth = harmonic_bandwidth / (1 + max_error
                                                 )  # robustMPC here
        past_bandwidth_ests.append(harmonic_bandwidth)

        # future chunks length (try 4 if that many remaining)
        last_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain)
        future_chunk_length = MPC_FUTURE_CHUNK_COUNT
        if (TOTAL_VIDEO_CHUNKS - last_index < 5):
            future_chunk_length = TOTAL_VIDEO_CHUNKS - last_index

        # all possible combinations of 5 chunk bitrates (9^5 options)
        # iterate over list and for each, compute reward and store max reward combination
        max_reward = -100000000
        best_combo = ()
        start_buffer = buffer_size
        #start = time.time()
        for full_combo in CHUNK_COMBO_OPTIONS:
            combo = full_combo[0:future_chunk_length]
            # calculate total rebuffer time for this combination (start with start_buffer and subtract
            # each download time and add 2 seconds in that order)
            curr_rebuffer_time = 0
            curr_buffer = start_buffer
            bitrate_sum = 0
            smoothness_diffs = 0
            last_quality = int(bit_rate)
            for position in range(0, len(combo)):
                chunk_quality = combo[position]
                index = last_index + position + 1  # e.g., if last chunk is 3, then first iter is 3+0+1=4
                download_time = (
                    get_chunk_size(chunk_quality, index) /
                    1000000.) / future_bandwidth  # this is MB/MB/s --> seconds
                if (curr_buffer < download_time):
                    curr_rebuffer_time += (download_time - curr_buffer)
                    curr_buffer = 0
                else:
                    curr_buffer -= download_time
                curr_buffer += 4
                bitrate_sum += VIDEO_BIT_RATE[chunk_quality]
                smoothness_diffs += abs(VIDEO_BIT_RATE[chunk_quality] -
                                        VIDEO_BIT_RATE[last_quality])
                # bitrate_sum += BITRATE_REWARD[chunk_quality]
                # smoothness_diffs += abs(BITRATE_REWARD[chunk_quality] - BITRATE_REWARD[last_quality])
                last_quality = chunk_quality
            # compute reward for this combination (one reward per 5-chunk combo)
            # bitrates are in Mbits/s, rebuffer in seconds, and smoothness_diffs in Mbits/s

            reward = (bitrate_sum / 1000.) - (
                REBUF_PENALTY * curr_rebuffer_time) - (smoothness_diffs /
                                                       1000.)
            # reward = bitrate_sum - (8*curr_rebuffer_time) - (smoothness_diffs)

            if (reward >= max_reward):
                if (best_combo != ()) and best_combo[0] < combo[0]:
                    best_combo = combo
                else:
                    best_combo = combo
                max_reward = reward
                # send data to html side (first chunk of best combo)
                send_data = 0  # no combo had reward better than -1000000 (ERROR) so send 0
                if (best_combo != ()):  # some combo was good
                    send_data = best_combo[0]

        bit_rate = send_data
        # hack
        # if bit_rate == 1 or bit_rate == 2:
        #    bit_rate = 0

        # ================================================

        # Note: we need to discretize the probability into 1/RAND_RANGE steps,
        # because there is an intrinsic discrepancy in passing single state and batch states

        s_batch.append(state)

        if end_of_video:
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here

            del s_batch[:]
            del a_batch[:]
            del r_batch[:]

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1

            s_batch.append(np.zeros((S_INFO, S_LEN)))
            a_batch.append(action_vec)
            entropy_record = []

            print "video count", video_count
            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'wb')
Beispiel #18
0
def main():
    args = parser.parse_args()
    if args.lin:
        qoe_metric = 'results_lin'
    elif args.log:
        qoe_metric = 'results_log'
    else:
        print('Please select the QoE Metric!')

    if args.FCC:
        dataset = 'fcc'
    elif args.HSDPA:
        dataset = 'HSDPA'
    elif args.Oboe:
        dataset = 'Oboe'
    else:
        print('Please select the dataset!')

    dataset_path = './traces_' + dataset + '/'
    Log_file_path = './' + qoe_metric + '/' + dataset + '/log_sim_rb'

    np.random.seed(RANDOM_SEED)

    # if not os.path.exists(SUMMARY_DIR):
    #     os.makedirs(SUMMARY_DIR)

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        dataset_path)
    past_bandwidths = np.zeros(6)
    opt_ptr = 0

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = Log_file_path + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY
    # current_psnr = DEFAULT_PSNR
    # last_psnr = DEFAULT_PSNR

    video_count = 0

    while True:
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, next_video_chunk_sizes, \
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        # throuput_e = np.roll(throuput_e, -1)
        # throuput_e[-1] = float(video_chunk_size) / float(delay) * M_IN_K  # byte/s
        # while throuput_e[0] == 0.0:
        #     throuput_e = throuput_e[1:]
        # bandwidth_sum = 0
        # for past_val in throuput_e:
        #     bandwidth_sum += (1/float(past_val))
        # harmonic_bandwidth = 1.0/(bandwidth_sum/len(throuput_e))
        # throuput_a = harmonic_bandwidth

        past_bandwidths = np.roll(past_bandwidths, -1)
        past_bandwidths[-1] = float(video_chunk_size) / float(
            delay) * M_IN_K  # byte/s

        while past_bandwidths[0] == 0.0:
            past_bandwidths = past_bandwidths[1:]

        curr_error = 0  # defualt assumes that this is the first request so error is 0 since we have never predicted bandwidth
        if (len(past_bandwidth_ests) > 0):
            curr_error = abs(past_bandwidth_ests[-1] -
                             past_bandwidths[-1]) / float(past_bandwidths[-1])
        past_errors.append(curr_error)

        # pick bitrate according to MPC
        # first get harmonic mean of last 5 bandwidths
        # if ( len(state) < 5 ):
        #    past_bandwidths = state[3,-len(state):]
        # else:
        #    past_bandwidths = state[3,-5:]
        bandwidth_sum = 0
        for past_val in past_bandwidths:
            bandwidth_sum += (1 / float(past_val))
        harmonic_bandwidth = 1.0 / (bandwidth_sum / len(past_bandwidths))

        # future bandwidth prediction
        # divide by 1 + max of last 5 (or up to 5) errors
        max_error = 0
        error_pos = -5
        if (len(past_errors) < 5):
            error_pos = -len(past_errors)
        max_error = float(max(past_errors[error_pos:]))
        future_bandwidth = harmonic_bandwidth / (1 + max_error
                                                 )  # robustMPC here
        past_bandwidth_ests.append(harmonic_bandwidth)

        chunksize_min = next_video_chunk_sizes[0]

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        # reward is video quality - rebuffer penalty
        if qoe_metric == 'results_lin':
            REBUF_PENALTY = 4.3
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                    - REBUF_PENALTY * rebuf \
                    - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                            VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
        else:
            REBUF_PENALTY = 2.66
            log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] /
                                  float(VIDEO_BIT_RATE[0]))
            log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] /
                                       float(VIDEO_BIT_RATE[0]))

            reward = log_bit_rate \
                    - REBUF_PENALTY * rebuf \
                    - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

        last_bit_rate = bit_rate
        ## last_psnr = current_psnr

        # log time_stamp, bit_rate, buffer_size, reward
        log_file.write(
            str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) +
            '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' +
            str(video_chunk_size) + '\t' + str(delay) + '\t' + str(reward) +
            '\n')
        log_file.flush()

        all_reward = []
        all_quality_tuple = []
        ptr = 0
        # RB-algorithm
        bit_rate = 0
        for q in xrange(5, -1, -1):
            next_size = next_video_chunk_sizes[q]
            if next_size / future_bandwidth - (buffer_size) <= 0:
                bit_rate = q
                break
            #next_psnr = next_chunk_psnr[q]
            # if throuput_a * 2 < next_size:
            #     reward = 0
            # else:
            # reward = VIDEO_BIT_RATE[q] / M_IN_K \
            #             - REBUF_PENALTY * np.maximum(next_size/future_bandwidth - buffer_size, 0) \
            #             - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[q] -
            #                                VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            # log_bit_rate = np.log(VIDEO_BIT_RATE[q] / float(VIDEO_BIT_RATE[0]))
            # log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0]))

            # reward = log_bit_rate \
            #             - REBUF_PENALTY * np.maximum(next_size/future_bandwidth - buffer_size, 0) \
            #             - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)
            # all_reward.append(reward)
            # all_quality_tuple.append(q)
            # ptr += 1
        # all_reward = np.asarray(all_reward)
        # if all_reward.all() == 0 :
        #     bit_rate = 0
        #     #current_psnr = next_chunk_psnr[bit_rate]
        # else:
        #     opt_ptr = all_reward.argmax()
        #     bit_rate = all_quality_tuple[opt_ptr]
        #current_psnr = next_chunk_psnr[bit_rate]

        if end_of_video:
            log_file.write('\n')
            log_file.close()

            # bit_rate = 0
            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here
            #current_psnr = DEFAULT_PSNR
            del past_bandwidth_ests[:]

            print "video count", video_count
            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = Log_file_path + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'wb')
Beispiel #19
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace()

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    with torch.no_grad():
        model = a3c.ActorCritic(state_dim=[S_INFO, S_LEN],
                                action_dim=A_DIM,
                                learning_rate=[ACTOR_LR_RATE, CRITIC_LR_RATE],islstm = islstm)

        nn_model = NN_MODEL
        if nn_model is not None:  # nn_model is the path to file
            model.load_state_dict(torch.load(nn_model, map_location=torch.device('cpu')))
            print("Model restored.")

        state = torch.zeros(S_INFO, S_LEN)
        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = torch.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [torch.zeros(S_INFO, S_LEN)]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0


        cx = torch.zeros(1, 128)
        hx = torch.zeros(1, 128)

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smoothness
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write((str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(buffer_size) + '\t' +
                           str(rebuf) + '\t' +
                           str(video_chunk_size) + '\t' +
                           str(delay) + '\t' +
                           str(reward) + '\n').encode("utf-8"))
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [torch.zeros((S_INFO, S_LEN))]

            state = torch.roll(state, -1)

            # Fill in the state vector with normalization
            state[0, -1] = torch.Tensor([VIDEO_BIT_RATE[last_bit_rate] / float(max(VIDEO_BIT_RATE))])  # last quality
            state[1, -1] = torch.Tensor([buffer_size / BUFFER_NORM_FACTOR])  # buffer size
            state[2, -1] = torch.Tensor([float(video_chunk_size) / float(delay) / M_IN_K])  # kilo byte / ms
            state[3, -1] = torch.Tensor([float(delay) / M_IN_K / BUFFER_NORM_FACTOR])  # /10 sec
            state[4, :A_DIM] = torch.Tensor([next_video_chunk_sizes]) / M_IN_K / M_IN_K  # mega byte
            # remaining chunk number
            state[5, -1] = torch.Tensor([min(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)])

            if islstm == 0:
                logits, value = model(state.unsqueeze(dim=0))
            else:
                logits, value, hx, cx = model((state.unsqueeze(dim=0),hx,cx))
            # print(f"index {index}, state {state}, logits {logits}, value {value}",sep="\n")
            # print(state,logits)
            try:
                cate         = Categorical(logits)
                bit_rate     = cate.sample().item()
            except Exception as e:
                print(e)
                print(f"walking into an error of all null distribution")
                print(logits, state)
                exit()

            policy       = logits
            log_policy   = torch.log(logits)
            entropy      = (policy * log_policy).sum(1, keepdim=True)

            s_batch.append(state)
            entropy_record.append(entropy)

            if end_of_video:
                log_file.write('\n'.encode("utf-8"))
                log_file.close()

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                cx = cx.detach()
                hx = hx.detach()

                action_vec = torch.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(torch.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                print ("video count", video_count)
                video_count += 1

                if video_count >= len(all_file_names):
                    break

                log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'wb')
Beispiel #20
0
def main():
    np.random.seed(RANDOOM_SEED)

    assert len(TILES_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_throughput_trace.load_throughput_trace(
    )
    all_cooked_tiles = load_fov_traces.load_fov_traces()
    all_tile_chunk_video_size = load_tile_chunk_video_size.load_tile_chunk_video_size(
    )

    net_env = env.Environment(
        all_cooked_time=all_cooked_time,
        all_cooked_bw=all_cooked_bw,
        all_cooked_tiles=all_cooked_tiles,
        all_tile_chunk_video_size=all_tile_chunk_video_size)

    log_path = LOG_FILE + '_' + all_file_names[net_env.bw_trace_idx]
    log_file = open(log_path, 'wb')

    time_stamp = 0
    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    action_vec = np.zeros(A_DIM)
    action_vec[bit_rate] = 1

    s_batch = [np.zeros((S_INFO, S_LEN))]
    a_batch = [action_vec]
    r_batch = []

    video_count = 0

    # make chunk combination options
    # this combo is used for future optimization
    for combo in itertools.product(list(range(MPC_FUTURE_CHUNK_COUNT + 1)),
                                   repeat=MPC_FUTURE_CHUNK_COUNT):
        CHUNK_COMBO_OPTIONS.append(combo)
        # print(combo)

    while True:  # serve video forever
        # the action is from the last decision
        if net_env.video_chunk_counter == 0:
            print(all_file_names[net_env.bw_trace_idx])

        delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, \
            end_of_video, video_chunk_remain, \
            video_chunk_quality, \
            basic_video_chunk_quality, \
            highest_video_chunk_quality = \
            net_env.fetch_video_chunk(bit_rate)

        time_stamp += delay  # ms
        time_stamp += sleep_time  # in ms

        # initialize the last_video chunk quality
        if (net_env.video_chunk_counter == 1):
            last_video_chunk_quality = video_chunk_quality

        # reward is video quality - rebuffer_penalty
        reward = video_chunk_quality / M_IN_K \
                 - REBUF_PENALTY * rebuf \
                 - SMOOTH_PENALTY * np.abs(last_video_chunk_quality -
                                           video_chunk_quality) / M_IN_K

        # log scale reward
        # log_chunk_quality = np.log(video_chunk_quality / float(basic_video_chunk_quality))
        # log_last_chunk_quality = np.log(last_video_chunk_quality / float(basic_video_chunk_quality))
        # reward = log_chunk_quality \
        #             - REBUF_PENALTY * rebuf \
        #             - SMOOTH_PENALTY * np.abs( log_chunk_quality - log_last_chunk_quality)

        r_batch.append(reward)
        last_video_chunk_quality = video_chunk_quality

        # log time_stamp, video_chunk_quality, buffer_size, reward
        log_file.write(
            str(time_stamp / M_IN_K) + '\t' +  # unit: sec
            str(video_chunk_quality) + '\t' +  # unit: Kbps
            str(buffer_size) + '\t' +  # unit: sec
            str(rebuf) + '\t' +  # unit: sec
            str(video_chunk_size) + '\t' +  # unit: Bytes
            str(delay) + '\t' +  # unit: ms
            str(reward) + '\n')

        log_file.flush()

        # retrieve previous state
        if len(s_batch) == 0:
            state = [np.zeros((S_INFO, S_LEN))]
        else:
            state = np.array(s_batch[-1], copy=True)

        # dequeue history record
        state = np.roll(state, -1, axis=1)  # each row left-shift one
        # this should be S_INFO number of terms
        state[0, -1] = video_chunk_quality / float(
            highest_video_chunk_quality)  # last quality
        state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
        state[2, -1] = rebuf
        state[3,
              -1] = float(video_chunk_size) / float(delay) / M_IN_K  # Mbyte/s
        state[4, -1] = np.minimum(
            video_chunk_remain,
            CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

        # ===================================MPC ===================================================
        curr_error = 0  # default assumes that this is the first request so error is 0 since we have never predicted bandwidth
        if (len(past_bandwidth_ests) > 0):
            curr_error = abs(past_bandwidth_ests[-1] - state[3, -1]) / float(
                state[3, -1])
        past_errors.append(curr_error)

        # pick bitrate according to MPC
        # first get harmonic mean of last n bandwidths
        past_bandwidths = state[3, -PAST_BW_TO_PREDICT:]
        # cut the meaning throughput
        while past_bandwidths[0] == 0.0:
            past_bandwidths = past_bandwidths[1:]

        bandwidth_sum = 0
        for past_val in past_bandwidths:
            bandwidth_sum += (1 / float(past_val))
        harmonic_bandwidth = 1.0 / (bandwidth_sum / len(past_bandwidths))

        # future bandwidth prediction
        # divide by (1+max)  of last PAST_BW_TO_PREDICT
        max_error = 0
        error_pos = -PAST_BW_TO_PREDICT
        if (len(past_errors) < PAST_BW_TO_PREDICT):
            error_pos = -len(past_errors)
        max_error = float(max(past_errors[error_pos:]))
        future_bandwidth = harmonic_bandwidth / (1 + max_error
                                                 )  # robustMPC here
        past_bandwidth_ests.append(harmonic_bandwidth)

        # future chunks length
        last_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain - 1)

        future_chunk_length = MPC_FUTURE_CHUNK_COUNT

        # if future chunk num less than PAST_BW_TO_PREDICT
        if TOTAL_VIDEO_CHUNKS - 1 - last_index < MPC_FUTURE_CHUNK_COUNT:
            future_chunk_length = int(TOTAL_VIDEO_CHUNKS - last_index - 1)

        # all possible combinations of MPC_FUTURE_CHUNK_COUNT chunk video qualitys
        # iterate over list and for each, compute reward and store max reward combination
        max_reward = -100000000
        best_combo = ()
        start_buffer = buffer_size
        for full_combo in CHUNK_COMBO_OPTIONS:
            combo = full_combo[0:future_chunk_length]
            curr_rebuffer_time = 0
            curr_buffer = start_buffer
            quality_sum = 0
            smoothness_diffs = 0
            last_quality = video_chunk_quality
            for position in range(0, len(combo)):
                chunk_quality = combo[position]
                index = last_index + position + 1

                # decide all LT or only FoV
                if curr_buffer <= BUFFER_THRESH_FOV:
                    curr_video_chunk_size, curr_video_chunk_quality = net_env.get_video_chunk_size_quality(
                        quality_in_fov=chunk_quality,
                        quality_out_fov=-1,
                        chunk_index=index)
                else:
                    curr_video_chunk_size, curr_video_chunk_quality = net_env.get_video_chunk_size_quality(
                        quality_in_fov=0, quality_out_fov=0, chunk_index=index)

                download_time = (curr_video_chunk_size /
                                 1000000.0) / future_bandwidth
                if curr_buffer < download_time:
                    curr_rebuffer_time += (download_time - curr_buffer)
                    curr_buffer = 0
                else:
                    curr_buffer -= download_time

                curr_buffer += VIDEO_CHUNK_LEN
                quality_sum += curr_video_chunk_quality
                smoothness_diffs += abs(curr_video_chunk_quality -
                                        last_quality)
                last_quality = curr_video_chunk_quality

            reward = quality_sum/1000.0 \
                     - REBUF_PENALTY * curr_rebuffer_time \
                     - SMOOTH_PENALTY * smoothness_diffs / 1000.0

            if reward >= max_reward:
                if best_combo != () and best_combo[0] < combo[0]:
                    best_combo = combo
                else:
                    best_combo = combo
                max_reward = reward

                send_data = 0
                if best_combo != ():
                    send_data = best_combo[0]
        bit_rate = send_data

        s_batch.append(state)

        # ===================================MPC end ==================================================

        if end_of_video:
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here

            del s_batch[:]
            del a_batch[:]
            del r_batch[:]

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1

            s_batch.append(np.zeros((S_INFO, S_LEN)))
            a_batch.append(action_vec)
            entropy_record = []

            print("trace count" + str(video_count))
            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = LOG_FILE + '_' + all_file_names[net_env.bw_trace_idx]
            log_file = open(log_path, 'wb')
Beispiel #21
0
    def main(self, args, net_env=None, policy=None):
        viper_flag = True
        assert len(VIDEO_BIT_RATE) == A_DIM
        log_f = LOG_FILE

        if net_env is None:
            viper_flag = False
            all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(args.traces)
            net_env = env.Environment(all_cooked_time=all_cooked_time, all_cooked_bw=all_cooked_bw,
                                      all_file_names=all_file_names)
        if args.update:
            log_f = log_f.replace('dt', 'du')

        if not viper_flag and args.log:
            log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric
            log_file = open(log_path, 'wb')

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        rollout = []
        video_count = 0
        reward_sum = 0
        in_compute = []

        # load dt policy
        if policy is None:
            with open(args.dt, 'rb') as f:
                policy = pk.load(f)
        policy = fsm.FSM(policy)

        while True:  # serve video forever
            delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \
            video_chunk_remain = net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = get_reward(bit_rate, rebuf, last_bit_rate, args.qoe_metric)
            r_batch.append(reward)
            reward_sum += reward
            last_bit_rate = bit_rate

            if args.log:
                log_file.write(bytes(str(time_stamp / M_IN_K) + '\t' +
                               str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                               str(buffer_size) + '\t' +
                               str(rebuf) + '\t' +
                               str(video_chunk_size) + '\t' +
                               str(delay) + '\t' +
                               str(reward) + '\n', encoding='utf-8'))
                log_file.flush()


            # select bit_rate according to decision tree
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            serialized_state = serial(state)
            bit_rate = int(policy.predict(np.array(serialized_state).reshape(1, -1))[0])
            rollout.append((state, bit_rate, serialized_state))
            s_batch.append(state)

            if args.update:
                chunk_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain - 1)
                policy.chunk_leaf[chunk_index] = policy.tree.apply(np.array(serialized_state).reshape(1, -1))
                if chunk_index < CHUNK_TIL_VIDEO_END_CAP - HORIZON:
                    in_compute.append(fsm.Trajectory(chunk_index, max(0, bit_rate - 1), buffer_size - CHUNK_LEN,
                                                     last_bit_rate, state, args))
                    in_compute.append(fsm.Trajectory(chunk_index, bit_rate, buffer_size - CHUNK_LEN,
                                                     last_bit_rate, state, args))
                    in_compute.append(fsm.Trajectory(chunk_index, min(5, bit_rate + 1), buffer_size - CHUNK_LEN,
                                                     last_bit_rate, state, args))

                for traj in in_compute:
                    this_chunk_size = video_chunk_size
                    this_delay = delay
                    while True:
                        if traj.apply(this_chunk_size, this_delay) == CHUNK_SWITCH:
                            new_bitrate = int(policy.predict(np.array(serial(traj.states)).reshape(1, -1))[0])
                            traj.next_chunk(new_bitrate)
                            this_chunk_size, this_delay = traj.trans_msg
                        else:
                            break

                    while len(in_compute) > 1 and in_compute[0].end and in_compute[1].end and in_compute[2].end:
                        r_below = sum([get_reward(in_compute[0].quality[i], in_compute[0].rebuf[i],
                                                  in_compute[0].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
                        r_normal = sum([get_reward(in_compute[1].quality[i], in_compute[1].rebuf[i],
                                                  in_compute[1].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
                        r_above = sum([get_reward(in_compute[2].quality[i], in_compute[2].rebuf[i],
                                                  in_compute[2].last_bitrate[i], args.qoe_metric) for i in range(HORIZON)])
                        if r_above == max(r_below, r_normal, r_above):
                            policy.update(in_compute[0].chunk_index, 1)
                        elif r_normal == max(r_below, r_normal, r_above):
                            policy.update(in_compute[0].chunk_index, -1)
                        else:
                            policy.update(in_compute[0].chunk_index, 0)

                        in_compute.pop(0)
                        in_compute.pop(0)
                        in_compute.pop(0)

            if end_of_video:
                if args.log:
                    log_file.write(bytes('\n', encoding='utf-8'))
                    log_file.close()
                    print("video count", video_count)

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here
                r_batch = []
                in_compute = []

                if viper_flag:
                    return rollout
                else:
                    video_count += 1
                    if video_count >= len(net_env.all_file_names):
                        break
                    if args.log:
                        log_path = log_f + '_' + net_env.all_file_names[net_env.trace_idx] + '_' + args.qoe_metric
                        log_file = open(log_path, 'wb')

        return reward_sum
Beispiel #22
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        TEST_TRACES)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')
    gpu_options = tf.GPUOptions(allow_growth=True)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
    bba = bbaplus(sess)
    #sess.run(tf.global_variables_initializer())

    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    action_vec = np.zeros(A_DIM)
    action_vec[bit_rate] = 1

    s_batch = [np.zeros((S_INFO, S_LEN))]
    a_batch = [action_vec]
    #alpha_prob = np.zeros(S_INFO)
    #action_prob = np.zeros(A_DIM)
    #action_prob[bit_rate] = 1.
    action_prob = 5., 10.
    #r_batch = []
    video_count = 0

    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        # reward is video quality - rebuffer penalty - smoothness
        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
            - REBUF_PENALTY * rebuf \
            - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                      VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        # r_batch.append(reward)

        last_bit_rate = bit_rate

        # log time_stamp, bit_rate, buffer_size, reward
        log_file.write(
            str(time_stamp / M_IN_K) + '\t' + str(VIDEO_BIT_RATE[bit_rate]) +
            '\t' + str(buffer_size) + '\t' + str(rebuf) + '\t' +
            str(video_chunk_size) + '\t' + str(delay) + '\t' +
            str(np.round(action_prob, 2)) + '\t' +
            #str(np.round(alpha_prob[0], 2)) + '\t' +
            str(reward) + '\n')
        log_file.flush()

        # retrieve previous state
        if len(s_batch) == 0:
            state = [np.zeros((S_INFO, S_LEN))]
        else:
            state = np.array(s_batch[-1], copy=True)

        # dequeue history record
        state = np.roll(state, -1, axis=1)

        # this should be S_INFO number of terms
        state[0, -1] = VIDEO_BIT_RATE[bit_rate] / \
            float(np.max(VIDEO_BIT_RATE))  # last quality
        state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
        state[2, -1] = float(video_chunk_size) / \
            float(delay) / M_IN_K  # kilo byte / ms
        state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
        state[4, :A_DIM] = np.array(
            next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
        state[5, -1] = np.minimum(
            video_chunk_remain,
            CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

        s_batch.append(state)

        action_prob = bba.predict(state)
        RESEVOIR, CUSHION = action_prob
        if buffer_size < RESEVOIR:
            bit_rate = 0
        elif buffer_size >= RESEVOIR + CUSHION:
            bit_rate = A_DIM - 1
        else:
            bit_rate = (A_DIM - 1) * (buffer_size - RESEVOIR) / float(CUSHION)

        bit_rate = int(bit_rate)

        if end_of_video:
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1

            s_batch.append(np.zeros((S_INFO, S_LEN)))
            a_batch.append(action_vec)
            #entropy_record = []

            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'wb')
Beispiel #23
0
def agent(agent_id, all_cooked_time, all_cooked_bw, all_file_names,
          video_size_file, net_params_queue, exp_queue):
    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw,
                              random_seed=agent_id,
                              VIDEO_SIZE_FILE=video_size_file,
                              Debug=False)

    with tf.Session() as sess, open(LOG_FILE + '_agent_' + str(agent_id),
                                    'wb') as log_file:
        actor = a3c.ActorNetwork(sess,
                                 state_dim=[S_INFO, S_LEN],
                                 action_dim=A_DIM,
                                 learning_rate=ACTOR_LR_RATE)
        critic = a3c.CriticNetwork(sess,
                                   state_dim=[S_INFO, S_LEN],
                                   learning_rate=CRITIC_LR_RATE)

        # initial synchronization of the network parameters from the coordinator
        actor_net_params, critic_net_params = net_params_queue.get()
        actor.set_network_params(actor_net_params)
        critic.set_network_params(critic_net_params)

        bit_rate = DEFAULT_QUALITY
        target_buffer = DEFAULT_QUALITY
        latency_limit = 4
        index = 1
        action_vec = np.zeros(A_DIM)
        action_vec[index] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0
        reward_all_sum = 0
        reward_all = 0
        reward = 0
        switch_num = 0
        SMOOTH_PENALTY = 0.0
        REBUF_PENALTY = 3
        LANTENCY_PENALTY = 0.0
        BITRATE_REWARD = 0.001
        SKIP_PENALTY = 0.0
        epoch = 0
        n = 0
        state = np.array(s_batch[-1], copy=True)
        frame_time_len = 0.04
        last_bit_rate = DEFAULT_QUALITY
        while True:  # experience video streaming forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            time, time_interval, send_data_size, chunk_len, \
            rebuf, buffer_size, play_time_len, end_delay, \
            cdn_newest_id, download_id, cdn_has_frame, skip_frame_time_len, decision_flag, \
            buffer_flag, cdn_flag, skip_flag, end_of_video = net_env.get_video_frame(bit_rate, target_buffer,
                                                                                     latency_limit)
            # # QOE setting
            # if end_delay <= 1.0:
            #     LANTENCY_PENALTY = 0.005
            # else:
            #     LANTENCY_PENALTY = 0.01

            reward_frame = 0
            epoch += 1
            if not cdn_flag:
                reward_frame = frame_time_len * float(
                    BIT_RATE[bit_rate]
                ) * BITRATE_REWARD - REBUF_PENALTY * rebuf - LANTENCY_PENALTY * end_delay - SKIP_PENALTY * skip_frame_time_len
            else:
                reward_frame = -(REBUF_PENALTY * rebuf)
            reward += reward_frame

            # dequeue history record
            state = np.roll(state, -1, axis=1)
            # this should be S_INFO number of terms
            state[0, -1] = buffer_size * 0.1
            state[1, -1] = send_data_size * 0.00001
            state[2, -1] = time_interval * 10  # kilo byte / ms
            state[3, -1] = end_delay * 0.1  # 10 sec
            state[4, -1] = rebuf  # mega byte

            if decision_flag and not end_of_video:

                reward_frame = -1 * SMOOTH_PENALTY * (
                    abs(BIT_RATE[bit_rate] - BIT_RATE[last_bit_rate]) / 1000)
                reward += reward_frame
                last_bit_rate = bit_rate
                r_batch.append(reward)

                reward = 0

                # compute action probability vector
                action_prob = actor.predict(
                    np.reshape(state, (1, S_INFO, S_LEN)))
                action_cumsum = np.cumsum(action_prob)
                temp = np.random.randint(1, RAND_RANGE) / float(RAND_RANGE)
                index = (action_cumsum > temp).argmax()

                bit_rate = ACTION_SAPCE[index][0]
                target_buffer = ACTION_SAPCE[index][1]
                latency_limit = ACTION_SAPCE[index][2]
                # Note: we need to discretize the probability into 1/RAND_RANGE steps,
                # because there is an intrinsic discrepancy in passing single state and batch states

                entropy_record.append(a3c.compute_entropy(action_prob[0]))

                # report experience to the coordinator
                if len(r_batch) >= TRAIN_SEQ_LEN:
                    exp_queue.put([
                        s_batch[1:],  # ignore the first chuck
                        a_batch[1:],  # since we don't have the
                        r_batch[1:],  # control over it
                        end_of_video,
                        {
                            'entropy': entropy_record
                        }
                    ])

                    # synchronize the network parameters from the coordinator
                    actor_net_params, critic_net_params = net_params_queue.get(
                    )
                    actor.set_network_params(actor_net_params)
                    critic.set_network_params(critic_net_params)

                    del s_batch[:]
                    del a_batch[:]
                    del r_batch[:]
                    del entropy_record[:]

                s_batch.append(state)

                action_vec = np.zeros(A_DIM)
                action_vec[index] = 1
                a_batch.append(action_vec)

            reward_all += reward_frame

            # store the state and action into batches
            if end_of_video:
                r_batch.append(reward)

                reward_all_sum += reward_all / 20
                video_count += 1
                if video_count >= len(all_file_names):
                    n += 1
                    video_count = 0
                    print(n, "agent_id ", agent_id, "reward_all_sum:",
                          reward_all_sum)
                    w.writerow([n, reward_all_sum])
                    out.flush()
                    reward_all_sum = 0
                    net_env = env.Environment(all_cooked_time=all_cooked_time,
                                              all_cooked_bw=all_cooked_bw,
                                              random_seed=epoch,
                                              VIDEO_SIZE_FILE=video_size_file,
                                              Debug=False)
                    if n == NUM_EPOCH:
                        break

                reward_all = 0
                reward = 0
                switch_num = 0

                bit_rate = DEFAULT_QUALITY  # use the default action here
                target_buffer = DEFAULT_QUALITY

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
Beispiel #24
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace()

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')
    with open(DTModel, 'rb') as f:
        policy = pk.load(f)

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            # reward is video quality - rebuffer penalty - smoothness
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

            r_batch.append(reward)

            last_bit_rate = bit_rate

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp / M_IN_K) + '\t' +
                str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) +
                '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' +
                str(delay) + '\t' + str(reward) + '\n')
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            serialized_state = serial(state)
            bit_rate = int(
                policy.predict(np.array(serialized_state).reshape(1, -1))[0])
            # Note: we need to discretize the probability into 1/RAND_RANGE steps,
            # because there is an intrinsic discrepancy in passing single state and batch states

            s_batch.append(state)

            if end_of_video:
                log_file.write('\n')
                log_file.close()

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)

                print "video count", video_count
                video_count += 1

                if video_count >= len(all_file_names):
                    break

                log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'wb')
Beispiel #25
0
    max_iters = args.iters
    max_pts = 200000
    train_frac = 0.8
    np.random.seed(RANDOM_SEED)
    states, actions, serials = [], [], []
    precision = []
    #trees = []
    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        args.traces)
    if args.abr == 'hotdash':
        net_env = env_hotdash.Environment(all_cooked_time=all_cooked_time,
                                          all_cooked_bw=all_cooked_bw,
                                          all_file_names=all_file_names)
    else:
        net_env = env.Environment(all_cooked_time=all_cooked_time,
                                  all_cooked_bw=all_cooked_bw,
                                  all_file_names=all_file_names)

    if args.abr == 'pensieve':
        teacher = pensieve.Pensieve()
        student = pensilin.Pensilin()
        #test = pensieve.Pensieve()
    elif args.abr == 'robustmpc':
        teacher = robustmpc.RobustMPC()
        student = robustlin.Robustlin()
    elif args.abr == 'hotdash':
        teacher = hotdash.Hotdash()
        student = hotdlin.Hotdlin()
    else:
        raise NotImplementedError
Beispiel #26
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    if not os.path.exists(SUMMARY_DIR):
        os.makedirs(SUMMARY_DIR)

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace()

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY

    action_vec = np.zeros(A_DIM)
    action_vec[bit_rate] = 1

    s_batch = [np.zeros((S_INFO, S_LEN))]
    a_batch = [action_vec]
    r_batch = []
    entropy_record = []
    count = 0

    video_count = 0

    while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, \
            end_of_video, video_chunk_remain = \
                net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms
        count += 1

        # reward is video quality - rebuffer penalty - smoothness
        reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                     - REBUF_PENALTY * rebuf \
                     - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                               VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K

        r_batch.append(reward)

        last_bit_rate = bit_rate

        log_file.write(str(time_stamp / M_IN_K) + '\t' +
                           str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                           str(rebuf) + '\t' +
                           str(reward) + '\n')
        log_file.flush()

        # retrieve previous state
        if len(s_batch) == 0:
            state = [np.zeros((S_INFO, S_LEN))]
        else:
            state = np.array(s_batch[-1], copy=True)

        # dequeue history record
        state = np.roll(state, -1, axis=1)

        # this should be S_INFO number of terms
        state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
        state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
        state[2, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
        state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
        state[4, :A_DIM] = np.array(next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
        state[5, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

        if state[1,-1] <= 2:
            if bit_rate == 0:
                bit_rate = bit_rate
            else:
                bit_rate = bit_rate - 1
        elif state[1,-1] >2 and state[1,-1]<=5:
            bit_rate = bit_rate
        else:
            if bit_rate == 4:
                bit_rate = bit_rate
            else:
                bit_rate = bit_rate + 1

        s_batch.append(state)


        if end_of_video:
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here
            time_stamp = 0

            del s_batch[:]
            del a_batch[:]
            del r_batch[:]

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1

            s_batch.append(np.zeros((S_INFO, S_LEN)))
            a_batch.append(action_vec)
            entropy_record = []

            print ("video count", video_count)
            video_count += 1

            if video_count >= len(all_file_names):
                break

            log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'wb')
Beispiel #27
0
def main():
    # check the constant defination is valid or not
    assert len(bitRatesOptions) == bitRatesTypes

    # load the traces
    allCookedTime, allCookedBW, allFileNames = load_trace.load_trace()

    # set the environment
    netEnvironment = env.Environment(all_cooked_time=allCookedTime,
                                     all_cooked_bw=allCookedBW)

    # open the output log file to write
    outputFileName = outputFilePrefix + "_" + allFileNames[
        netEnvironment.trace_idx]
    outputFilePointer = open(outputFileName, "wb")

    # initial the local variables
    timeStamp = 0
    lastBitRateOption = defaultBitRateOption
    currentBitRateOption = defaultBitRateOption
    videoCount = 0
    historyState = np.zeros((stateInfoLength, pastFramesLength))

    # enum all possible solutions of future chunks
    for solution in itertools.product([i for i in range(bitRatesTypes)],
                                      repeat=defaultFutureChunkCount):
        chunkOptionsSet.append(solution)

    # computing kernel:
    while True:
        # get the video chunk according to the current bitrate option
        assert currentBitRateOption >= 0
        delay, sleepTime, currentBufferSize, rebuffer, currentVideoChunkSize, \
            nextVideoChunkSize, endFlag, chunkRemainCount = netEnvironment.get_video_chunk(currentBitRateOption)

        # update the time stamp because of the delay and sleeping time
        timeStamp += delay + sleepTime  # ms

        # calculate the reward value according to the formula
        qualityValue = bitRatesOptions[
            currentBitRateOption] / bitsFactor  # kb to Mb
        smoothValue = np.abs(bitRatesOptions[currentBitRateOption] \
                    - bitRatesOptions[lastBitRateOption]) / bitsFactor
        rewardValue =  qualityValue \
                    - rebufferFactor * rebuffer \
                    - smoothFactor * smoothValue

        # write the output file
        outputItemStr = str(timeStamp / millsecondsPerSecond) + '\t' \
                    + str(bitRatesOptions[currentBitRateOption]) + '\t' \
                    + str(currentBufferSize) + '\t' \
                    + str(rebuffer) + '\t' \
                    + str(currentVideoChunkSize) + '\t' \
                    + str(delay) + '\t' \
                    + str(rewardValue) + '\n'
        outputFilePointer.write(outputItemStr.encode('utf-8'))
        outputFilePointer.flush()

        # update the bit rate option
        lastBitRateOption = currentBitRateOption

        # update the history state information like a sliding window
        historyState = np.roll(historyState, -1, axis=1)
        historyState[
            0, -1] = bitRatesOptions[currentBitRateOption] / float(maxBitRate)
        historyState[1, -1] = currentBufferSize / bufferNormFactor
        historyState[2, -1] = rebuffer
        historyState[
            3, -1] = float(currentVideoChunkSize) / float(delay) / bitsFactor
        historyState[4, -1] = np.minimum(
            chunkRemainCount,
            defaultChunkCountToEnd) / float(defaultChunkCountToEnd)

        # MPC kernel begin
        # calculate the normaliztion estimated error of bandwidth
        currentError = 0.
        if (len(pastBWEsts) > 0):
            currentError = abs(pastBWEsts[-1] - historyState[3, -1]) / float(
                historyState[3, -1])
        pastErrors.append(currentError)

        # calculate the harmonic mean of last 5 history bandwidths
        # Step 1: collect the last 5 history bandwidths
        pastRealBWArray = historyState[3, -5:]
        while pastRealBWArray[0] == 0.0:
            pastRealBWArray = pastRealBWArray[1:]

        # Step 2: calculate the harmonic mean
        pastRealBWSum = 0.0
        for pastRealBWItems in pastRealBWArray:
            pastRealBWSum += (1 / float(pastRealBWItems))
        harmonicBW = 1.0 / (pastRealBWSum / len(pastRealBWArray))

        # calculate the predicted future bandwidth according to the est. error and harmonic mean
        errorIndex = min(5, len(pastErrors))
        maxError = float(max(pastErrors[-errorIndex:]))
        currentPredBW = harmonicBW / (1 + maxError)
        pastBWEsts.append(currentPredBW)  # fixed this bug, reward increases

        # get the video chunks information of this round prediction
        currentLastIndex = totalChunksCount - chunkRemainCount
        currentFutureChunkCount = min(chunkRemainCount,
                                      defaultFutureChunkCount)

        # enumerate all the possible solutions and pick the best one
        bestReward = -INF
        bestSolution = ()
        finalOption = -1
        startBufferSize = currentBufferSize

        for solution in chunkOptionsSet:
            localSolution = solution[0:currentFutureChunkCount]
            localRebufferTime = 0.0
            localCurrentBufferSize = startBufferSize
            localBitRateSum = 0.
            localSmoothDiffs = 0.
            localLastChunkOption = currentBitRateOption
            # the 5 future chunks loop
            for pos in range(0, currentFutureChunkCount):
                thisChunkOption = localSolution[pos]
                thisIndex = currentLastIndex + pos + 1
                thisChunkSize = getChunkSize(thisChunkOption, thisIndex)
                downloadTime = (float(thisChunkSize) /
                                (bitsFactor * bitsFactor)
                                ) / currentPredBW  # Bytes to MBytes
                if localCurrentBufferSize < downloadTime:
                    localRebufferTime += downloadTime - localCurrentBufferSize
                    localCurrentBufferSize = 0
                else:
                    localCurrentBufferSize -= downloadTime
                # This 4 means the play speed
                localCurrentBufferSize += 4
                localBitRateSum += bitRatesOptions[thisChunkOption]
                localSmoothDiffs += abs(bitRatesOptions[thisChunkOption] -
                                        bitRatesOptions[localLastChunkOption])
                localLastChunkOption = thisChunkOption

            localReward = float(localBitRateSum) / bitsFactor \
                             - rebufferFactor * localRebufferTime \
                             - float(localSmoothDiffs) / bitsFactor
            if localReward >= bestReward:
                if bestSolution != () and bestSolution[0] < localSolution[0]:
                    bestSolution = localSolution
                else:
                    bestSolution = localSolution
                bestReward = localReward
                if bestSolution != ():
                    finalOption = bestSolution[0]
        currentBitRateOption = finalOption

        if endFlag:
            outputFilePointer.write("\n".encode('utf-8'))
            outputFilePointer.close()

            lastBitRateOption = defaultBitRateOption
            currentBitRateOption = defaultBitRateOption
            historyState = np.zeros((stateInfoLength, pastFramesLength))

            print("video count", videoCount)
            videoCount += 1

            if videoCount >= len(allFileNames):
                break

            outputFileName = outputFilePrefix + "_naive_" + allFileNames[
                netEnvironment.trace_idx]
            outputFilePointer = open(outputFileName, "wb")
Beispiel #28
0
def main():

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
        TEST_TRACES)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'w')

    with tf.Session() as sess:
        actor = libcomyco.libcomyco(sess, S_INFO, S_LEN, A_DIM, LR_RATE=1e-4)
        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver()  # save neural net parameters

        # restore neural net parameters
        if NN_MODEL is not None:  # NN_MODEL is the path to file
            saver.restore(sess, NN_MODEL)
            print("Testing model restored.")

        time_stamp = 0

        bit_rate = DEFAULT_QUALITY
        last_chunk_vmaf = None

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        entropy_record = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real
            delay, sleep_time, buffer_size, rebuf, \
            video_chunk_size, next_video_chunk_sizes, next_video_chunk_vmaf, \
            end_of_video, video_chunk_remain, video_chunk_vmaf = \
                net_env.get_video_chunk(bit_rate)

            if last_chunk_vmaf is None:
                last_chunk_vmaf = video_chunk_vmaf

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = 0.8469011 * video_chunk_vmaf - 28.79591348 * rebuf + 0.29797156 * \
                np.abs(np.maximum(video_chunk_vmaf - last_chunk_vmaf, 0.)) - 1.06099887 * \
                np.abs(np.minimum(video_chunk_vmaf - last_chunk_vmaf, 0.)) - \
                2.661618558192494
            r_batch.append(reward)

            last_chunk_vmaf = video_chunk_vmaf

            # log time_stamp, bit_rate, buffer_size, reward
            log_file.write(
                str(time_stamp / M_IN_K) + '\t' +
                str(VIDEO_BIT_RATE[bit_rate]) + '\t' + str(buffer_size) +
                '\t' + str(rebuf) + '\t' + str(video_chunk_size) + '\t' +
                str(delay) + '\t' + str(reward) + '\n')
            log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = video_chunk_vmaf / 100.
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR  # 10 sec
            state[2, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[3, -1] = float(delay) / M_IN_K / BUFFER_NORM_FACTOR  # 10 sec
            state[4, :A_DIM] = np.array(
                next_video_chunk_sizes) / M_IN_K / M_IN_K  # mega byte
            state[5, :A_DIM] = np.array(
                next_video_chunk_vmaf) / 100.  # mega byte
            state[6, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            action_prob, _ = actor.predict(
                np.reshape(state, (-1, S_INFO, S_LEN)))
            bit_rate = np.argmax(action_prob[0])

            s_batch.append(state)

            entropy_record.append(actor.compute_entropy(action_prob[0]))

            if end_of_video:
                log_file.write('\n')
                log_file.close()

                bit_rate = DEFAULT_QUALITY  # use the default action here
                last_chunk_vmaf = None

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                video_count += 1

                if video_count >= len(all_file_names):
                    break

                log_path = LOG_FILE + '_' + all_file_names[net_env.trace_idx]
                log_file = open(log_path, 'w')
Beispiel #29
0
    def main(self, args, net_env=None):
        self.args = args
        np.random.seed(RANDOM_SEED)
        viper_flag = True
        assert len(VIDEO_BIT_RATE) == A_DIM

        if net_env is None:
            viper_flag = False
            all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(
                args.traces)
            net_env = env.Environment(all_cooked_time=all_cooked_time,
                                      all_cooked_bw=all_cooked_bw,
                                      all_file_names=all_file_names)

        if not viper_flag and args.log:
            log_path = LOG_FILE + '_' + net_env.all_file_names[
                net_env.trace_idx] + '_' + args.qoe_metric
            log_file = open(log_path, 'wb')

        time_stamp = 0

        last_bit_rate = DEFAULT_QUALITY
        bit_rate = DEFAULT_QUALITY

        action_vec = np.zeros(A_DIM)
        action_vec[bit_rate] = 1

        s_batch = [np.zeros((S_INFO, S_LEN))]
        a_batch = [action_vec]
        r_batch = []
        rollout = []

        video_count = 0

        while True:  # serve video forever
            # the action is from the last decision
            # this is to make the framework similar to the real

            delay, sleep_time, buffer_size, rebuf, video_chunk_size, next_video_chunk_sizes, end_of_video, \
            video_chunk_remain = net_env.get_video_chunk(bit_rate)

            time_stamp += delay  # in ms
            time_stamp += sleep_time  # in ms

            reward = get_reward(bit_rate, rebuf, last_bit_rate,
                                args.qoe_metric)
            r_batch.append(reward)
            last_bit_rate = bit_rate

            if args.log:
                # log time_stamp, bit_rate, buffer_size, reward
                log_file.write(
                    bytes(str(time_stamp / M_IN_K) + '\t' +
                          str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                          str(buffer_size) + '\t' + str(rebuf) + '\t' +
                          str(video_chunk_size) + '\t' + str(delay) + '\t' +
                          str(reward) + '\n',
                          encoding='utf-8'))
                log_file.flush()

            # retrieve previous state
            if len(s_batch) == 0:
                state = [np.zeros((S_INFO, S_LEN))]
            else:
                state = np.array(s_batch[-1], copy=True)

            # dequeue history record
            state = np.roll(state, -1, axis=1)

            # this should be S_INFO number of terms
            state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(
                np.max(VIDEO_BIT_RATE))  # last quality
            state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
            state[2, -1] = rebuf
            state[3, -1] = float(video_chunk_size) / float(
                delay) / M_IN_K  # kilo byte / ms
            state[4, -1] = np.minimum(
                video_chunk_remain,
                CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)

            bit_rate = self.predict(state)
            serialized_state = []
            # Log input of neural network
            serialized_state.append(state[0, -1])
            serialized_state.append(state[1, -1])
            serialized_state.append(state[2, -1])
            for i in range(5):
                serialized_state.append(state[3, i])
            serialized_state.append(state[4, -1])
            #print(serialized_state)
            #print(state)
            rollout.append((state, bit_rate, serialized_state))

            if end_of_video:
                if args.log:
                    log_file.write(bytes('\n', encoding='utf-8'))
                    log_file.close()
                    print("video count", video_count)

                last_bit_rate = DEFAULT_QUALITY
                bit_rate = DEFAULT_QUALITY  # use the default action here

                del s_batch[:]
                del a_batch[:]
                del r_batch[:]

                action_vec = np.zeros(A_DIM)
                action_vec[bit_rate] = 1

                s_batch.append(np.zeros((S_INFO, S_LEN)))
                a_batch.append(action_vec)
                entropy_record = []

                if viper_flag:
                    break
                else:
                    video_count += 1
                    if video_count >= len(net_env.all_file_names):
                        break
                    if args.log:
                        log_path = LOG_FILE + '_' + net_env.all_file_names[
                            net_env.trace_idx] + '_' + args.qoe_metric
                        log_file = open(log_path, 'wb')

        return rollout
Beispiel #30
0
def main():
    args = parser.parse_args()
    if args.cb or args.lin:
        qoe_metric = 'results_lin'
    elif args.log:
        qoe_metric = 'results_log'
    else:
        print('Please select the QoE Metric!')
    
    if args.FCC:
        dataset = 'fcc'
    elif args.HSDPA:
        dataset = 'HSDPA'
    elif args.Oboe:
        dataset = 'Oboe'
    else:
        print('Please select the dataset!')
    
    dataset_path = './traces_' + dataset + '/'
    if args.cb:
        Log_file_path = './' + qoe_metric + '/cb_' + dataset + '/log_sim_mpc'
    else:
        Log_file_path = './' + qoe_metric + '/' + dataset + '/log_sim_mpc'
    
    start = time.time()

    np.random.seed(RANDOM_SEED)

    assert len(VIDEO_BIT_RATE) == A_DIM

    all_cooked_time, all_cooked_bw, all_file_names = load_trace.load_trace(dataset_path)

    net_env = env.Environment(all_cooked_time=all_cooked_time,
                              all_cooked_bw=all_cooked_bw)

    log_path = Log_file_path + '_' + all_file_names[net_env.trace_idx]
    log_file = open(log_path, 'wb')

    chunk_size_info = video_size()
    chunk_size_info.store_size()

    time_stamp = 0

    last_bit_rate = DEFAULT_QUALITY
    bit_rate = DEFAULT_QUALITY
    harmonic_bandwidth = 0 
    future_bandwidth = 0

    action_vec = np.zeros(A_DIM)
    action_vec[bit_rate] = 1

    s_batch = [np.zeros((S_INFO, S_LEN))]
    a_batch = [action_vec]
    r_batch = []
    # entropy_record = []

    video_count = 0
    while True:  # serve video forever
        # the action is from the last decision
        # this is to make the framework similar to the real
        delay, sleep_time, buffer_size, rebuf, \
        video_chunk_size, _,\
        end_of_video, video_chunk_remain = \
            net_env.get_video_chunk(bit_rate)

        time_stamp += delay  # in ms
        time_stamp += sleep_time  # in ms

        # reward is video quality - rebuffer penalty
        if qoe_metric == 'results_lin':
            REBUF_PENALTY = 4.3
            reward = VIDEO_BIT_RATE[bit_rate] / M_IN_K \
                    - REBUF_PENALTY * rebuf \
                    - SMOOTH_PENALTY * np.abs(VIDEO_BIT_RATE[bit_rate] -
                                            VIDEO_BIT_RATE[last_bit_rate]) / M_IN_K
        else:# log scale reward
            REBUF_PENALTY = 2.66
            log_bit_rate = np.log(VIDEO_BIT_RATE[bit_rate] / float(VIDEO_BIT_RATE[0]))
            log_last_bit_rate = np.log(VIDEO_BIT_RATE[last_bit_rate] / float(VIDEO_BIT_RATE[0]))

            reward = log_bit_rate \
                    - REBUF_PENALTY * rebuf \
                    - SMOOTH_PENALTY * np.abs(log_bit_rate - log_last_bit_rate)

        # reward = BITRATE_REWARD[bit_rate] \
        #          - 8 * rebuf - np.abs(BITRATE_REWARD[bit_rate] - BITRATE_REWARD[last_bit_rate])


        r_batch.append(reward)

        last_bit_rate = bit_rate

        # log time_stamp, bit_rate, buffer_size, reward
        log_file.write(str(time_stamp / M_IN_K) + '\t' +
                       str(VIDEO_BIT_RATE[bit_rate]) + '\t' +
                       str(buffer_size) + '\t' +
                       str(rebuf) + '\t' +
                       str(video_chunk_size) + '\t' +
                       str(delay) + '\t' +
                       str(reward) + '\t' + 
                       str(harmonic_bandwidth) + '\t' + 
                       str(harmonic_bandwidth - future_bandwidth) + '\t' + 
                       str(future_bandwidth) + '\t' +
                       str(float(video_chunk_size) / float(delay) / M_IN_K) + '\n')
        log_file.flush()

        # retrieve previous state
        if len(s_batch) == 0:
            state = [np.zeros((S_INFO, S_LEN))]
        else:
            state = np.array(s_batch[-1], copy=True)

        # dequeue history record
        state = np.roll(state, -1, axis=1)

        # this should be S_INFO number of terms
        state[0, -1] = VIDEO_BIT_RATE[bit_rate] / float(np.max(VIDEO_BIT_RATE))  # last quality
        state[1, -1] = buffer_size / BUFFER_NORM_FACTOR
        state[2, -1] = rebuf
        state[3, -1] = float(video_chunk_size) / float(delay) / M_IN_K  # kilo byte / ms
        state[4, -1] = np.minimum(video_chunk_remain, CHUNK_TIL_VIDEO_END_CAP) / float(CHUNK_TIL_VIDEO_END_CAP)
        # state[5: 10, :] = future_chunk_sizes / M_IN_K / M_IN_K

        # ================== MPC =========================
        curr_error = 0 # defualt assumes that this is the first request so error is 0 since we have never predicted bandwidth
        if ( len(past_bandwidth_ests) > 0 ):
            curr_error  = abs(past_bandwidth_ests[-1]-state[3,-1])/float(state[3,-1])
        past_errors.append(curr_error)

        # pick bitrate according to MPC           
        # first get harmonic mean of last 5 bandwidths
        past_bandwidths = state[3,-5:]
        while past_bandwidths[0] == 0.0:
            past_bandwidths = past_bandwidths[1:]
        #if ( len(state) < 5 ):
        #    past_bandwidths = state[3,-len(state):]
        #else:
        #    past_bandwidths = state[3,-5:]
        bandwidth_sum = 0
        for past_val in past_bandwidths:
            bandwidth_sum += (1/float(past_val))
        harmonic_bandwidth = 1.0/(bandwidth_sum/len(past_bandwidths))

        # future bandwidth prediction
        # divide by 1 + max of last 5 (or up to 5) errors
        max_error = 0
        error_pos = -5
        if ( len(past_errors) < 5 ):
            error_pos = -len(past_errors)
        max_error = float(max(past_errors[error_pos:]))
        future_bandwidth = harmonic_bandwidth/(1+max_error)  # robustMPC here
        past_bandwidth_ests.append(harmonic_bandwidth)


        # future chunks length (try 4 if that many remaining)
        last_index = int(CHUNK_TIL_VIDEO_END_CAP - video_chunk_remain -1)
        future_chunk_length = MPC_FUTURE_CHUNK_COUNT
        if ( TOTAL_VIDEO_CHUNKS - last_index < MPC_FUTURE_CHUNK_COUNT ):
            future_chunk_length = TOTAL_VIDEO_CHUNKS - last_index

        # all possible combinations of 5 chunk bitrates (9^5 options)
        # iterate over list and for each, compute reward and store max reward combination
        max_reward = -100000000
        # best_combo = ()
        start_buffer = buffer_size
        #start = time.time()
        download_time_every_step = []
        for position in range(future_chunk_length):
            download_time_current = []
            for action in range(0, A_DIM):
                index = last_index + position + 1 # e.g., if last chunk is 3, then first iter is 3+0+1=4
                download_time = (chunk_size_info.get_chunk_size(action, index)/1000000.)/future_bandwidth # this is MB/MB/s --> seconds
                download_time_current.append(download_time)
            download_time_every_step.append(download_time_current)

        reward_comparison = False
        send_data = 0
        parents_pool = [[0.0, start_buffer, int(bit_rate)]]
        for position in range(future_chunk_length):
            if position == future_chunk_length-1:
                reward_comparison = True
            children_pool = []
            for parent in parents_pool:
                action = 0
                curr_buffer = parent[1]
                last_quality = parent[-1]
                curr_rebuffer_time = 0
                chunk_quality = action
                download_time = download_time_every_step[position][chunk_quality]
                if ( curr_buffer < download_time ):
                    curr_rebuffer_time += (download_time - curr_buffer)
                    curr_buffer = 0.0
                else:
                    curr_buffer -= download_time
                curr_buffer += 4

                # reward
                bitrate_sum = VIDEO_BIT_RATE[chunk_quality]
                smoothness_diffs = abs(VIDEO_BIT_RATE[chunk_quality] - VIDEO_BIT_RATE[last_quality])
                reward = (bitrate_sum/1000.) - (REBUF_PENALTY*curr_rebuffer_time) - (SMOOTH_PENALTY*smoothness_diffs/1000.)
                reward += parent[0]

                children = parent[:]
                children[0] = reward
                children[1] = curr_buffer
                children.append(action)
                children_pool.append(children)
                if (reward >= max_reward) and reward_comparison:
                    if send_data > children[3] and reward == max_reward:
                        send_data = send_data
                    else:
                        send_data = children[3]
                    max_reward = reward

                # criterion terms
                # theta = SMOOTH_PENALTY * (VIDEO_BIT_RATE[action+1]/1000. - VIDEO_BIT_RATE[action]/1000.)
                rebuffer_term = REBUF_PENALTY * (max(download_time_every_step[position][action+1] - parent[1], 0) - max(download_time_every_step[position][action] - parent[1], 0))
                if (action + 1 <= parent[-1]):
                    High_Maybe_Superior = ((1.0 + 2 * SMOOTH_PENALTY)*(VIDEO_BIT_RATE[action]/1000. - VIDEO_BIT_RATE[action+1]/1000.) + rebuffer_term < 0.0)
                else:
                    High_Maybe_Superior = ((VIDEO_BIT_RATE[action]/1000. - VIDEO_BIT_RATE[action+1]/1000.) + rebuffer_term < 0.0)



                # while REBUF_PENALTY*(download_time_every_step[position][action+1] - parent[1]) <= ((VIDEO_BIT_RATE[action+1]/1000. - VIDEO_BIT_RATE[action]/1000.)-(abs(VIDEO_BIT_RATE[action+1] - VIDEO_BIT_RATE[parent[-1]]) - abs(VIDEO_BIT_RATE[action] - VIDEO_BIT_RATE[parent[-1]]))/1000.):
                while High_Maybe_Superior:
                    curr_buffer = parent[1]
                    last_quality = parent[-1]
                    curr_rebuffer_time = 0
                    chunk_quality = action + 1
                    download_time = download_time_every_step[position][chunk_quality]
                    if ( curr_buffer < download_time ):
                        curr_rebuffer_time += (download_time - curr_buffer)
                        curr_buffer = 0
                    else:
                        curr_buffer -= download_time
                    curr_buffer += 4

                    # reward
                    bitrate_sum = VIDEO_BIT_RATE[chunk_quality]
                    smoothness_diffs = abs(VIDEO_BIT_RATE[chunk_quality] - VIDEO_BIT_RATE[last_quality])
                    reward = (bitrate_sum/1000.) - (REBUF_PENALTY*curr_rebuffer_time) - (SMOOTH_PENALTY*smoothness_diffs/1000.)
                    reward += parent[0]

                    children = parent[:]
                    children[0] = reward
                    children[1] = curr_buffer
                    children.append(chunk_quality)
                    children_pool.append(children)
                    if (reward >= max_reward) and reward_comparison:
                        if send_data > children[3] and reward == max_reward:
                            send_data = send_data
                        else:
                            send_data = children[3]
                        max_reward = reward

                    action += 1
                    if action + 1 == A_DIM:
                        break
                    # criterion terms
                    # theta = SMOOTH_PENALTY * (VIDEO_BIT_RATE[action+1]/1000. - VIDEO_BIT_RATE[action]/1000.)
                    rebuffer_term = REBUF_PENALTY * (max(download_time_every_step[position][action+1] - parent[1], 0) - max(download_time_every_step[position][action] - parent[1], 0))
                    if (action + 1 <= parent[-1]):
                        High_Maybe_Superior = ((1.0 + 2 * SMOOTH_PENALTY)*(VIDEO_BIT_RATE[action]/1000. - VIDEO_BIT_RATE[action+1]/1000.) + rebuffer_term < 0)
                    else:
                        High_Maybe_Superior = ((VIDEO_BIT_RATE[action]/1000. - VIDEO_BIT_RATE[action+1]/1000.) + rebuffer_term < 0)

            parents_pool = children_pool

        bit_rate = send_data
        # hack
        # if bit_rate == 1 or bit_rate == 2:
        #    bit_rate = 0

        # ================================================

        # Note: we need to discretize the probability into 1/RAND_RANGE steps,
        # because there is an intrinsic discrepancy in passing single state and batch states

        s_batch.append(state)

        if end_of_video:
            log_file.write('\n')
            log_file.close()

            last_bit_rate = DEFAULT_QUALITY
            bit_rate = DEFAULT_QUALITY  # use the default action here

            del s_batch[:]
            del a_batch[:]
            del r_batch[:]
            del past_bandwidth_ests[:]

            time_stamp = 0

            action_vec = np.zeros(A_DIM)
            action_vec[bit_rate] = 1

            s_batch.append(np.zeros((S_INFO, S_LEN)))
            a_batch.append(action_vec)
            entropy_record = []

            print("video count", video_count)
            video_count += 1

            if video_count >= len(all_file_names):
                end = time.time()
                print(end - start)
                break

            log_path = Log_file_path + '_' + all_file_names[net_env.trace_idx]
            log_file = open(log_path, 'wb')

            end = time.time()
            print(end - start)