currIt = 0 rd = [] plt.ion() try: while True: print("Start iteration: ", currIt) obs = env.reset() print("Step: ", stepIdx) print("---obs:", obs) while True: reward = 0 matrixOfChanAlloc = np.zeros((nOfenb, nOfchannel)) stepIdx += 1 if stepIdx % 100 == 0: PG.ep = PG.ep * 0.7 ax.append(stepIdx) print("stepIdx: ", stepIdx) # ax.append(stepIdx) # --------------------------------------------------------------------------------------- observation = [] #环境的观测值,状态observation for j in range((int)(len(obs) / 4)): #状态 observation.append([ obs[4 * j], obs[4 * j + 1], obs[4 * j + 2], obs[4 * j + 3] ]) action_list = [] print("obs: ", obs) if (len(observation) == 0): observation_step = [0, 0, 0, 0] ss = observation[k].copy()
plt.ion() try: while True: print("Start iteration: ", currIt) obs = env.reset() print("Step: ", stepIdx) print("---obs:", obs) flag = False while True: reward = 0 matrixOfChanAlloc = np.zeros((nOfenb, nOfchannel)) stepIdx += 1 if stepIdx % 100 == 0: PG.ep = PG.ep * 0.95 ax.append(stepIdx) print("stepIdx: ", stepIdx) print("obs: ", obs) observation = [] #环境的观测值,状态observation observation, numue = getObservation(observation, obs) #将ns3的观测值转为gym可用的形式 action_list = [] #存储动作的list if numue == 0: #若有效请求数为0,则返回一个空动作 addaction(0, 0, 0, action_list) action_tuple = listTotuple(action_list) obs, reward_step, done, info = env.step( action_tuple) #获取这一eposide的奖励