def __init__(self): self.Num_AP = 1 self.Num_UE = 50 self.channel = [1] self.oriTHO = np.zeros([1, self.Num_AP]) loss_cal = env.Scenario(self.Num_AP, self.Num_UE, freq=2, avr_ap=1) self.contact, self.placeAP, self.placeUE, self.Loss = loss_cal.sendout( ) self.action_space = spaces.Box(low=-0.2, high=0.2, shape=(self.Num_AP, ), dtype=np.float32) self.observation_space = spaces.Box(low=-0, high=1, shape=(self.Num_AP, ), dtype=np.float32) self.state = self.Num_AP * [0.5] envir = tho.ThoughtOutCal(self.channel, self.state * 60, self.Num_AP, self.Num_UE) RSSI, Speed, self.connection = envir.subspeed(self.Loss) thought_out_ue, P = envir.thomain(Speed, self.connection) # 将UE的转化为AP的 thought_out_AP = np.zeros([self.Num_AP]) for kki in range(0, self.Num_AP): tempN = np.argwhere(self.connection == kki) for kkj in tempN: thought_out_AP[kki] += thought_out_ue[kkj] self.oriTHO[:] = thought_out_AP[:]
def step(self, u): reward = np.zeros([1, self.Num_AP]) s_ = np.zeros([self.Num_AP]) for kk in range(0, self.Num_AP): if self.state[kk] + u[kk] < 0: s_[kk] = 0 elif self.state[kk] + u[kk] > 1: s_[kk] = 1 else: s_[kk] = self.state[kk] + u[kk] envir = tho.ThoughtOutCal(self.channel, s_ * 60, self.Num_AP, self.Num_UE) RSSI, Speed, connection = envir.subspeed(self.Loss) thought_out_ue, P = envir.thomain(Speed, connection) # 将UE的转化为AP的 thought_out_AP = np.zeros([self.Num_AP]) for kki in range(0, self.Num_AP): tempN = np.argwhere(connection == kki) for kkj in tempN: thought_out_AP[kki] += thought_out_ue[kkj] # 计算reward for kk in range(0, self.Num_AP): if self.state[kk] + u[kk] < 0: reward[kk] = -100 elif self.state[kk] + u[kk] > 1: reward[kk] = -100 else: tempppppp = thought_out_AP[kk] reward[kk] = tempppppp * 10 # reward[kk] = (thought_out_AP[kk]-self.oriTHO[kk])*10 self.oriTHO[:] = thought_out_AP[:] # print(s_.shape) return s_, np.sum(reward), False, {}