예제 #1
0
currIt = 0
rd = []
plt.ion()
try:
    while True:
        print("Start iteration: ", currIt)
        obs = env.reset()
        print("Step: ", stepIdx)
        print("---obs:", obs)
        while True:
            reward = 0
            matrixOfChanAlloc = np.zeros((nOfenb, nOfchannel))

            stepIdx += 1
            if stepIdx % 100 == 0:
                PG.ep = PG.ep * 0.7
            ax.append(stepIdx)
            print("stepIdx: ", stepIdx)
            # ax.append(stepIdx)
            # ---------------------------------------------------------------------------------------
            observation = []  #环境的观测值,状态observation
            for j in range((int)(len(obs) / 4)):
                #状态
                observation.append([
                    obs[4 * j], obs[4 * j + 1], obs[4 * j + 2], obs[4 * j + 3]
                ])
            action_list = []
            print("obs: ", obs)
            if (len(observation) == 0):
                observation_step = [0, 0, 0, 0]
                ss = observation[k].copy()
예제 #2
0
plt.ion()

try:
    while True:
        print("Start iteration: ", currIt)
        obs = env.reset()
        print("Step: ", stepIdx)
        print("---obs:", obs)
        flag = False
        while True:
            reward = 0
            matrixOfChanAlloc = np.zeros((nOfenb, nOfchannel))

            stepIdx += 1
            if stepIdx % 100 == 0:
                PG.ep = PG.ep * 0.95

            ax.append(stepIdx)
            print("stepIdx: ", stepIdx)
            print("obs: ", obs)
            observation = []  #环境的观测值,状态observation
            observation, numue = getObservation(observation,
                                                obs)  #将ns3的观测值转为gym可用的形式

            action_list = []  #存储动作的list

            if numue == 0:  #若有效请求数为0,则返回一个空动作
                addaction(0, 0, 0, action_list)
                action_tuple = listTotuple(action_list)
                obs, reward_step, done, info = env.step(
                    action_tuple)  #获取这一eposide的奖励