Esempio n. 1
0
    def _process_user(self, user, cp_time, s, action, next_user_id, queue_len,
                      done):

        # 从 sleep 中醒来后发现 env 已经 reset 了,直接返回,不再执行该 user_thread
        # if user.user_id not in self.running_users:
        #     return
        # self.running_users.remove(user.user_id)
        cloud = Cloud(core_num=user.assign_n_core)
        trans = Trans(cloud, band_width=user.assign_n_bandwidth)
        cp_len, last = rp_time.ours(user.graph[0], user.graph[1],
                                    user.graph[2], cloud, trans)

        # is_done = len(self.queue) == 0 and self.is_stop_generate_user

        if not done:
            # 这里貌似还会有问题????当进入循环等待后,shutdown env 会使得此处产生死循环
            # while (next_user_id is None) and (not self.is_stop_generate_user):
            # if (next_user_id is None) and (not self.is_stop_generate_user):
            #     next_user_id = self._choose_user_from_queue()
            # # 更新当前 experience 的时机:当前 user 执行完毕(得到执行时间)并且下一个 user 开始执行(得到下一个 user 的排队时间)
            # # 等待下一个 user 开始执行
            # while next_user_id != -1 and (self.users[next_user_id].queue_time is None):
            #     pass
            # print("next_user_id : %d" % next_user_id)
            # if next_user_id == -1:
            #     self.store_transition(done, user, s, cp_len, 0,
            #                       user.state,
            #                       queue_len, a=action)
            # else:
            #     next_user_queue_time = self.users[next_user_id].queue_time
            #     self.store_transition(done, user, s, cp_len, next_user_queue_time, self.users[next_user_id].state, queue_len, a=action)
            #
            self.store_transition(done,
                                  user,
                                  s,
                                  cp_len,
                                  0,
                                  user.state,
                                  queue_len,
                                  a=action)
            time.sleep(cp_time)
            self._release_user_resources(user)
        else:
            self.store_transition(done,
                                  user,
                                  s,
                                  cp_len,
                                  0,
                                  user.state,
                                  queue_len,
                                  a=action)
            # self.store_transition(done, user, s, cp_len, 0, [user.state[0] - user.assign_n_core, user.state[1] - user.assign_n_bandwidth], queue_len, a=action)
        logger.info(
            'end process user : %d, n_core_left : %f, n_bandwidth_left : %f, queue : %s'
            % (user.user_id, self.n_core_left, self.n_bandwidth_left,
               self.queue))
        self.running_users.remove(user.user_id)
        self.processed_user += 1
Esempio n. 2
0
    def store_transition(self, done, user, s, run_time, queue_time, s_,
                         queue_len, a):
        if not hasattr(self, 'memory_counter'):
            self.memory_counter = 0

        punish = 600 - user.user_id if done else 0
        if done:
            print('punish : %d' % punish)
        # experience = np.hstack([s, (a[0] - 1) * 10.0 + (a[1] - 1), 300.0 - (20 * run_time) - 10 * punish, s_])
        experience = np.hstack([
            s, (a[0] - 1) * 10.0 + (a[1] - 1),
            1000.0 - (20 * run_time) - punish, s_
        ])
        if punish != 0:
            self.score = 1000.0 - (20 * run_time) - punish
        if np.shape(experience)[0] == 6:
            raise TypeError('experience : %s, user.id : %d, state : %s' %
                            (experience, user.user_id, user.state))
        if self.prioritized:
            self._store_transition_with_prioritized(experience)
        else:
            self._store_transition_without_prioritized(experience)
        # logger.info('user : %d, action : <%d, %d>, time : %f + %f = %f' % (user.user_id, user.assign_n_core, user.assign_n_bandwidth, run_time, queue_time, run_time + queue_time))

        self.memory_counter += 1

        index = self.memory_counter % self.experience_pool_size
        self.transitions[index] = {
            'user': user,
            'user_id': user.user_id,
            'run_time': run_time,
            # 'next_user_queue_time': queue_time
        }

        if self.memory_counter % 100 == 0:
            total_time = 0
            total_users = 0
            total_queue_time = 0
            for e in self.transitions:
                if e != 0:
                    total_time += e['run_time'] + e['user'].queue_time
                    total_queue_time += e['user'].queue_time
                    total_users += 1
            logger.info('epoch %d : average time : %f , queue time occupy %f' %
                        (int(self.memory_counter / 300), total_time /
                         total_users * 1.0, total_queue_time / total_time))
            self.plt_record.append([
                total_time / total_users * 1.0,
                total_queue_time / total_users * 1.0
            ])
Esempio n. 3
0
def run_env(env, agent):
    step = 0

    observation = env.reset()

    done = False

    # fig = plt.figure()
    # ax = fig.add_subplot(1, 1, 1)
    # plt.ion()
    # plt.show()

    for episode in range(500):

        # observation = env.reset()WWWWWWWW

        # action = agent.choose_action(observation)
        #
        # observation_, reward, done = env.step(action)

        # RL.store_transition(observation, action, reward, observation_)
        #
        # if step > 200 and step % 5 == 0:
        #     RL.learn()

        observation_, reward, done = agent.take_a_step(observation)

        observation = observation_

        # if step % 150 == 0:
        #     try:
        #         ax.lines.remove(lines[0])
        #     except Exception:
        #         pass
        #     lines = ax.plot([_ for _ in range(len(env.plt_record))], [_[0] for _ in env.plt_record], 'r-', lw=5)
        #     plt.pause(0.0001)

        step += 1

    env.shutdown_generate_user()

    cp_time_record = [_[0] for _ in env.plt_record]
    queue_time_record = [_[1] for _ in env.plt_record]
    logger.info('average time : %f, queue_time : %f' % (sum(cp_time_record) / len(cp_time_record), sum(queue_time_record) / len(queue_time_record)))

    while not done:
        observation_, reward, done = agent.take_a_step(observation)

        observation = observation_
Esempio n. 4
0
    def step(self, action):
        assign_n_core, assign_n_bandwidth = action
        print('start step')
        user_id = self._choose_user_from_queue()
        next_user_id = self._choose_next_user_from_queue()

        # graph_vec = self._choose_user_from_queue()
        logger.info('choose user %d from queue' % user_id)
        user = self.users[user_id]
        s = [
            float(user.graph_id),
            float(self.n_core_left),
            float(self.n_bandwidth_left)
        ]
        self.n_core_left -= assign_n_core
        self.n_bandwidth_left -= assign_n_bandwidth
        user.state = s
        # graph_id = np.argmax(graph_vec)
        user.assign_n_core = assign_n_core
        user.assign_n_bandwidth = assign_n_bandwidth

        logger.info(
            'assign user n_core : %f, n_bandwidth : %f; n_core_left : %f, n_bandwidth_left : %f'
            % (assign_n_core, assign_n_bandwidth, self.n_core_left,
               self.n_bandwidth_left))
        done = self.is_stop_generate_user
        self._start_process_user(user, s, action, next_user_id,
                                 len(self.queue), done)

        # is_done = (len(self.queue) == 0) and self.is_stop_generate_user

        next_user_id = -1
        if not done:
            print('not done')
            next_user_id = self._choose_user_from_queue()
            # if not is_done or not self.is_stop_generate_user:
            #     next_user_id = self._choose_user_from_queue()
            # if len(self.queue) > MAX_QUEUE_SIZE:
            #     is_done = True
            if len(self.queue
                   ) >= MAX_QUEUE or self.processed_user >= MAX_PROCESS_USER:
                self.shutdown_generate_user()
        return [
            self.users[next_user_id].graph_id if next_user_id != -1 else None,
            self.n_core_left, self.n_bandwidth_left
        ], None, done
Esempio n. 5
0
    def _start_process_user(self, user, s, action, next_user_id, queue_len,
                            done):
        user.queue_time = time.time() - user.generate_time
        graph = self.graphs[user.graph_id]

        cp_time = self._computation_time(user, graph)

        self.queue.pop(0)
        logger.info(
            'start process user : %d, cp_time : %f, queue : %s ; action : <%d, %d>'
            % (user.user_id, cp_time, self.queue, action[0], action[1]))

        t = threading.Thread(target=self._process_user,
                             args=(user, cp_time, s, action, next_user_id,
                                   queue_len, done))
        self.running_users.append(user.user_id)
        t.start()
Esempio n. 6
0
 def _generate_user(self, thread_id):
     id = 0
     self.out = False
     while not self.is_stop_generate_user:
         nt = self._next_time(POISSON_RATE)
         time.sleep(nt)
         if self.is_stop_generate_user:
             break
         # print('thread id : %d, shutdown flag : %d' % (thread_id, self.shutdown_flag))
         # if thread_id == self.shutdown_flag:
         #     break
         self._assert_graphs()
         graph_id = random.randint(0, len(self.graphs) - 1)
         user = User(user_id=id,
                     graph=self.graphs[graph_id],
                     graph_id=graph_id)
         self.users[user.user_id] = user
         self.queue.append(user.user_id)
         logger.info('thread id : %d , generate user : %d; queue : %s' %
                     (thread_id, id, self.queue))
         id += 1
     self.out = True
Esempio n. 7
0
 def _init_graph_threshold(self):
     for i in range(self.env.total_graphs):
         graph = self.env.graphs[i]
         time_matrix = self._time_matrix(graph)
         core_threshold = -1
         bandwidth_threshold = -1
         for i in range(MAX_CORE - 1):
             if time_matrix[i][0] == time_matrix[i + 1][0]:
                 core_threshold = i + 1
         core_threshold = core_threshold if core_threshold != -1 else MAX_CORE - 1
         for j in range(MAX_BANDWIDTH - 1):
             if time_matrix[core_threshold][j] == time_matrix[
                     core_threshold][j + 1]:
                 bandwidth_threshold = j
         bandwidth_threshold = bandwidth_threshold if bandwidth_threshold != -1 else MAX_BANDWIDTH - 1
         # self.graph_threshold.append([4, 2])
         core_threshold = int((core_threshold + 1) * DISCOUNT) if int(
             (core_threshold + 1) * DISCOUNT) > 0 else 1
         bandwidth_threshold = int(
             (bandwidth_threshold + 1) * DISCOUNT) if int(
                 (bandwidth_threshold + 1) * DISCOUNT) > 0 else 1
         self.graph_threshold.append([core_threshold, bandwidth_threshold])
     logger.info('graph_threshold : %s' % self.graph_threshold)
Esempio n. 8
0
 def restore_model(self):
     self.saver.restore(self.sess, 'my_prio_net/save_net.ckpt')
     logger.info('restore model from my_prio_net/save_net.ckpt')
     s = np.array([[12.0, 31.0, 15.0]])
     action_value = self.sess.run(self.q_eval, feed_dict={self.s: s})
     print(action_value)
Esempio n. 9
0
def run_env(env, agent):
    step = 0
    observation = env.reset()
    done = False
    # plt_thread = threading.Thread(target=plot_ record, args=(env, ))
    # plt_thread.start()
    if plt_flag:
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        plt.ion()
        plt.show()

    for episode in range(1000):
        action = trans_action(agent.choose_action(observation))

        observation_, reward, done = agent.take_a_step(observation, action)

        # if done:
        #     print('env reset')
        #     observation = env.reset()
        #     continue

        # agent.store_transition(observation, action, reward, observation_)

        if TRAINING and step > 50 and step % 50 == 0:
            agent.learn()

        observation = observation_

        if step % 150 == 0 and plt_flag:
            try:
                ax.lines.remove(lines[0])
            except Exception:
                pass
            lines = ax.plot([_ for _ in range(len(env.plt_record))],
                            [10 - _[0] for _ in env.plt_record],
                            'r-',
                            lw=5)
            plt.pause(0.0001)

        step += 1

        if (len(env.queue) > 50 or env.processed_user >= 500) and TRAINING:
            # env.stop_generate_user()
            env.shutdown_generate_user()

            while not done:
                action = trans_action(agent.choose_action(observation))
                observation_, reward, done = agent.take_a_step(
                    observation, action)
                observation = observation_

            observation = env.reset()
            print('env reset, thread id : %d' % env.thread_id)

    env.stop_generate_user()

    agent.plot_cost()

    cp_time_record = [_[0] for _ in env.plt_record]
    queue_time_record = [_[1] for _ in env.plt_record]
    logger.info('average time : %f, queue_time : %f' %
                (sum(cp_time_record) / len(cp_time_record),
                 sum(queue_time_record) / len(queue_time_record)))

    if TRAINING:
        agent.store_model()
        summarize_experience(env.experience_pool)

    while not done:

        action = trans_action(agent.choose_action(observation))

        observation_, reward, done = agent.take_a_step(observation, action)

        # agent.store_transition(observation, action, reward, observation_)

        # if step > 200 and step % 5 == 0:
        #     agent.learn()

        observation = observation_

        step += 1

    print('game over')
Esempio n. 10
0
 def shutdown_generate_user(self):
     print('shutdown generate user')
     self.queue = [self._choose_user_from_queue()]
     logger.info('shutdown_env : %s' % self.queue)
     self.stop_generate_user()
Esempio n. 11
0
 def restore_model(self):
     self.saver.restore(self.sess, 'my_net/save_net.ckpt')
     logger.info('restore model from my_net/save_net.ckpt')
Esempio n. 12
0
 def store_model(self):
     self.saver.save(self.sess, 'my_net/save_net.ckpt')
     logger.info('store model to my_net/save_net.ckpt')
Esempio n. 13
0
def run_env(env, agent):
    step = 0
    done = False
    if plt_flag:
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        plt.ion()
        plt.show()

    for episode in range(20):
        observation = env.reset()
        done = False
        while not done:

            action = trans_action(agent.choose_action(observation))

            observation_, reward, done = agent.take_a_step(observation, action)

            if TRAINING and step > MEMORY_SIZE and step % 50 == 0:
                agent.learn()

            observation = observation_

            if step % 150 == 0 and plt_flag:
                try:
                    ax.lines.remove(lines[0])
                except Exception:
                    pass
                lines = ax.plot([_ for _ in range(len(env.plt_record))], [10 - _[0] for _ in env.plt_record], 'r-', lw=5)
                plt.pause(0.0001)

            step += 1

        agent.summary_score(env.score)

            # if len(env.queue) > 50 and TRAINING:
            #     env.shutdown_generate_user()
            #
            #     while not done:
            #         action = trans_action(agent.choose_action(observation))
            #         observation_, reward, done = agent.take_a_step(observation, action)
            #         observation = observation_
            #
            #     observation = env.reset()
            #     print('env reset, thread id : %d' % env.thread_id)

    # env.stop_generate_user()
    # env.shutdown_generate_user()

    # if plt_flag:
    #     agent.plot_cost()

    cp_time_record = [_[0] for _ in env.plt_record]
    queue_time_record = [_[1] for _ in env.plt_record]
    logger.info('average time : %f, queue_time : %f' % (sum(cp_time_record) / len(cp_time_record), sum(queue_time_record) / len(queue_time_record)))

    if TRAINING:
        agent.store_model()
        summarize_experience(env.experience_pool)

    # while not done:
    #
    #     action = trans_action(agent.choose_action(observation))
    #
    #     observation_, reward, done = agent.take_a_step(observation, action)
    #
    #     observation = observation_
    #
    #     step += 1

    print('game over')