def _process_user(self, user, cp_time, s, action, next_user_id, queue_len, done): # 从 sleep 中醒来后发现 env 已经 reset 了,直接返回,不再执行该 user_thread # if user.user_id not in self.running_users: # return # self.running_users.remove(user.user_id) cloud = Cloud(core_num=user.assign_n_core) trans = Trans(cloud, band_width=user.assign_n_bandwidth) cp_len, last = rp_time.ours(user.graph[0], user.graph[1], user.graph[2], cloud, trans) # is_done = len(self.queue) == 0 and self.is_stop_generate_user if not done: # 这里貌似还会有问题????当进入循环等待后,shutdown env 会使得此处产生死循环 # while (next_user_id is None) and (not self.is_stop_generate_user): # if (next_user_id is None) and (not self.is_stop_generate_user): # next_user_id = self._choose_user_from_queue() # # 更新当前 experience 的时机:当前 user 执行完毕(得到执行时间)并且下一个 user 开始执行(得到下一个 user 的排队时间) # # 等待下一个 user 开始执行 # while next_user_id != -1 and (self.users[next_user_id].queue_time is None): # pass # print("next_user_id : %d" % next_user_id) # if next_user_id == -1: # self.store_transition(done, user, s, cp_len, 0, # user.state, # queue_len, a=action) # else: # next_user_queue_time = self.users[next_user_id].queue_time # self.store_transition(done, user, s, cp_len, next_user_queue_time, self.users[next_user_id].state, queue_len, a=action) # self.store_transition(done, user, s, cp_len, 0, user.state, queue_len, a=action) time.sleep(cp_time) self._release_user_resources(user) else: self.store_transition(done, user, s, cp_len, 0, user.state, queue_len, a=action) # self.store_transition(done, user, s, cp_len, 0, [user.state[0] - user.assign_n_core, user.state[1] - user.assign_n_bandwidth], queue_len, a=action) logger.info( 'end process user : %d, n_core_left : %f, n_bandwidth_left : %f, queue : %s' % (user.user_id, self.n_core_left, self.n_bandwidth_left, self.queue)) self.running_users.remove(user.user_id) self.processed_user += 1
def store_transition(self, done, user, s, run_time, queue_time, s_, queue_len, a): if not hasattr(self, 'memory_counter'): self.memory_counter = 0 punish = 600 - user.user_id if done else 0 if done: print('punish : %d' % punish) # experience = np.hstack([s, (a[0] - 1) * 10.0 + (a[1] - 1), 300.0 - (20 * run_time) - 10 * punish, s_]) experience = np.hstack([ s, (a[0] - 1) * 10.0 + (a[1] - 1), 1000.0 - (20 * run_time) - punish, s_ ]) if punish != 0: self.score = 1000.0 - (20 * run_time) - punish if np.shape(experience)[0] == 6: raise TypeError('experience : %s, user.id : %d, state : %s' % (experience, user.user_id, user.state)) if self.prioritized: self._store_transition_with_prioritized(experience) else: self._store_transition_without_prioritized(experience) # logger.info('user : %d, action : <%d, %d>, time : %f + %f = %f' % (user.user_id, user.assign_n_core, user.assign_n_bandwidth, run_time, queue_time, run_time + queue_time)) self.memory_counter += 1 index = self.memory_counter % self.experience_pool_size self.transitions[index] = { 'user': user, 'user_id': user.user_id, 'run_time': run_time, # 'next_user_queue_time': queue_time } if self.memory_counter % 100 == 0: total_time = 0 total_users = 0 total_queue_time = 0 for e in self.transitions: if e != 0: total_time += e['run_time'] + e['user'].queue_time total_queue_time += e['user'].queue_time total_users += 1 logger.info('epoch %d : average time : %f , queue time occupy %f' % (int(self.memory_counter / 300), total_time / total_users * 1.0, total_queue_time / total_time)) self.plt_record.append([ total_time / total_users * 1.0, total_queue_time / total_users * 1.0 ])
def run_env(env, agent): step = 0 observation = env.reset() done = False # fig = plt.figure() # ax = fig.add_subplot(1, 1, 1) # plt.ion() # plt.show() for episode in range(500): # observation = env.reset()WWWWWWWW # action = agent.choose_action(observation) # # observation_, reward, done = env.step(action) # RL.store_transition(observation, action, reward, observation_) # # if step > 200 and step % 5 == 0: # RL.learn() observation_, reward, done = agent.take_a_step(observation) observation = observation_ # if step % 150 == 0: # try: # ax.lines.remove(lines[0]) # except Exception: # pass # lines = ax.plot([_ for _ in range(len(env.plt_record))], [_[0] for _ in env.plt_record], 'r-', lw=5) # plt.pause(0.0001) step += 1 env.shutdown_generate_user() cp_time_record = [_[0] for _ in env.plt_record] queue_time_record = [_[1] for _ in env.plt_record] logger.info('average time : %f, queue_time : %f' % (sum(cp_time_record) / len(cp_time_record), sum(queue_time_record) / len(queue_time_record))) while not done: observation_, reward, done = agent.take_a_step(observation) observation = observation_
def step(self, action): assign_n_core, assign_n_bandwidth = action print('start step') user_id = self._choose_user_from_queue() next_user_id = self._choose_next_user_from_queue() # graph_vec = self._choose_user_from_queue() logger.info('choose user %d from queue' % user_id) user = self.users[user_id] s = [ float(user.graph_id), float(self.n_core_left), float(self.n_bandwidth_left) ] self.n_core_left -= assign_n_core self.n_bandwidth_left -= assign_n_bandwidth user.state = s # graph_id = np.argmax(graph_vec) user.assign_n_core = assign_n_core user.assign_n_bandwidth = assign_n_bandwidth logger.info( 'assign user n_core : %f, n_bandwidth : %f; n_core_left : %f, n_bandwidth_left : %f' % (assign_n_core, assign_n_bandwidth, self.n_core_left, self.n_bandwidth_left)) done = self.is_stop_generate_user self._start_process_user(user, s, action, next_user_id, len(self.queue), done) # is_done = (len(self.queue) == 0) and self.is_stop_generate_user next_user_id = -1 if not done: print('not done') next_user_id = self._choose_user_from_queue() # if not is_done or not self.is_stop_generate_user: # next_user_id = self._choose_user_from_queue() # if len(self.queue) > MAX_QUEUE_SIZE: # is_done = True if len(self.queue ) >= MAX_QUEUE or self.processed_user >= MAX_PROCESS_USER: self.shutdown_generate_user() return [ self.users[next_user_id].graph_id if next_user_id != -1 else None, self.n_core_left, self.n_bandwidth_left ], None, done
def _start_process_user(self, user, s, action, next_user_id, queue_len, done): user.queue_time = time.time() - user.generate_time graph = self.graphs[user.graph_id] cp_time = self._computation_time(user, graph) self.queue.pop(0) logger.info( 'start process user : %d, cp_time : %f, queue : %s ; action : <%d, %d>' % (user.user_id, cp_time, self.queue, action[0], action[1])) t = threading.Thread(target=self._process_user, args=(user, cp_time, s, action, next_user_id, queue_len, done)) self.running_users.append(user.user_id) t.start()
def _generate_user(self, thread_id): id = 0 self.out = False while not self.is_stop_generate_user: nt = self._next_time(POISSON_RATE) time.sleep(nt) if self.is_stop_generate_user: break # print('thread id : %d, shutdown flag : %d' % (thread_id, self.shutdown_flag)) # if thread_id == self.shutdown_flag: # break self._assert_graphs() graph_id = random.randint(0, len(self.graphs) - 1) user = User(user_id=id, graph=self.graphs[graph_id], graph_id=graph_id) self.users[user.user_id] = user self.queue.append(user.user_id) logger.info('thread id : %d , generate user : %d; queue : %s' % (thread_id, id, self.queue)) id += 1 self.out = True
def _init_graph_threshold(self): for i in range(self.env.total_graphs): graph = self.env.graphs[i] time_matrix = self._time_matrix(graph) core_threshold = -1 bandwidth_threshold = -1 for i in range(MAX_CORE - 1): if time_matrix[i][0] == time_matrix[i + 1][0]: core_threshold = i + 1 core_threshold = core_threshold if core_threshold != -1 else MAX_CORE - 1 for j in range(MAX_BANDWIDTH - 1): if time_matrix[core_threshold][j] == time_matrix[ core_threshold][j + 1]: bandwidth_threshold = j bandwidth_threshold = bandwidth_threshold if bandwidth_threshold != -1 else MAX_BANDWIDTH - 1 # self.graph_threshold.append([4, 2]) core_threshold = int((core_threshold + 1) * DISCOUNT) if int( (core_threshold + 1) * DISCOUNT) > 0 else 1 bandwidth_threshold = int( (bandwidth_threshold + 1) * DISCOUNT) if int( (bandwidth_threshold + 1) * DISCOUNT) > 0 else 1 self.graph_threshold.append([core_threshold, bandwidth_threshold]) logger.info('graph_threshold : %s' % self.graph_threshold)
def restore_model(self): self.saver.restore(self.sess, 'my_prio_net/save_net.ckpt') logger.info('restore model from my_prio_net/save_net.ckpt') s = np.array([[12.0, 31.0, 15.0]]) action_value = self.sess.run(self.q_eval, feed_dict={self.s: s}) print(action_value)
def run_env(env, agent): step = 0 observation = env.reset() done = False # plt_thread = threading.Thread(target=plot_ record, args=(env, )) # plt_thread.start() if plt_flag: fig = plt.figure() ax = fig.add_subplot(1, 1, 1) plt.ion() plt.show() for episode in range(1000): action = trans_action(agent.choose_action(observation)) observation_, reward, done = agent.take_a_step(observation, action) # if done: # print('env reset') # observation = env.reset() # continue # agent.store_transition(observation, action, reward, observation_) if TRAINING and step > 50 and step % 50 == 0: agent.learn() observation = observation_ if step % 150 == 0 and plt_flag: try: ax.lines.remove(lines[0]) except Exception: pass lines = ax.plot([_ for _ in range(len(env.plt_record))], [10 - _[0] for _ in env.plt_record], 'r-', lw=5) plt.pause(0.0001) step += 1 if (len(env.queue) > 50 or env.processed_user >= 500) and TRAINING: # env.stop_generate_user() env.shutdown_generate_user() while not done: action = trans_action(agent.choose_action(observation)) observation_, reward, done = agent.take_a_step( observation, action) observation = observation_ observation = env.reset() print('env reset, thread id : %d' % env.thread_id) env.stop_generate_user() agent.plot_cost() cp_time_record = [_[0] for _ in env.plt_record] queue_time_record = [_[1] for _ in env.plt_record] logger.info('average time : %f, queue_time : %f' % (sum(cp_time_record) / len(cp_time_record), sum(queue_time_record) / len(queue_time_record))) if TRAINING: agent.store_model() summarize_experience(env.experience_pool) while not done: action = trans_action(agent.choose_action(observation)) observation_, reward, done = agent.take_a_step(observation, action) # agent.store_transition(observation, action, reward, observation_) # if step > 200 and step % 5 == 0: # agent.learn() observation = observation_ step += 1 print('game over')
def shutdown_generate_user(self): print('shutdown generate user') self.queue = [self._choose_user_from_queue()] logger.info('shutdown_env : %s' % self.queue) self.stop_generate_user()
def restore_model(self): self.saver.restore(self.sess, 'my_net/save_net.ckpt') logger.info('restore model from my_net/save_net.ckpt')
def store_model(self): self.saver.save(self.sess, 'my_net/save_net.ckpt') logger.info('store model to my_net/save_net.ckpt')
def run_env(env, agent): step = 0 done = False if plt_flag: fig = plt.figure() ax = fig.add_subplot(1, 1, 1) plt.ion() plt.show() for episode in range(20): observation = env.reset() done = False while not done: action = trans_action(agent.choose_action(observation)) observation_, reward, done = agent.take_a_step(observation, action) if TRAINING and step > MEMORY_SIZE and step % 50 == 0: agent.learn() observation = observation_ if step % 150 == 0 and plt_flag: try: ax.lines.remove(lines[0]) except Exception: pass lines = ax.plot([_ for _ in range(len(env.plt_record))], [10 - _[0] for _ in env.plt_record], 'r-', lw=5) plt.pause(0.0001) step += 1 agent.summary_score(env.score) # if len(env.queue) > 50 and TRAINING: # env.shutdown_generate_user() # # while not done: # action = trans_action(agent.choose_action(observation)) # observation_, reward, done = agent.take_a_step(observation, action) # observation = observation_ # # observation = env.reset() # print('env reset, thread id : %d' % env.thread_id) # env.stop_generate_user() # env.shutdown_generate_user() # if plt_flag: # agent.plot_cost() cp_time_record = [_[0] for _ in env.plt_record] queue_time_record = [_[1] for _ in env.plt_record] logger.info('average time : %f, queue_time : %f' % (sum(cp_time_record) / len(cp_time_record), sum(queue_time_record) / len(queue_time_record))) if TRAINING: agent.store_model() summarize_experience(env.experience_pool) # while not done: # # action = trans_action(agent.choose_action(observation)) # # observation_, reward, done = agent.take_a_step(observation, action) # # observation = observation_ # # step += 1 print('game over')