Exemple #1
0
    def get_Exh_Rate(self, state):
        dist, ue_ang, rbd = (state*self.high_obs)
        ue_pos = np.array(sph2cart(ue_ang, 0, dist))  # ue_pos is(x,y)

        mimo_exh_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx)
        #rbeam_vec = self.BeamSet#Generate_BeamDir(self.N)
        exh_SNR = []
        exh_rates = []

        for rbeam in self.BeamSet:#rbeam_vec:
            SNR, rate = mimo_exh_model.Calc_Rate(self.SF_time, np.array([rbeam, 0]))
            exh_SNR.append(SNR)
            exh_rates.append(rate)

        best_rbeam_ndx = np.argmax(exh_rates)
        return self.BeamSet[best_rbeam_ndx], np.max(exh_rates) #(Best RBS, Best Rate)
Exemple #2
0
    def get_Exh_Rate(self, state):
        state = np.rint(state * self.high_obs)
        ue_xloc, ue_yloc = state
        ue_pos = np.array([ue_xloc, ue_yloc,0])

        mimo_exh_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx)
        #rbeam_vec = self.BeamSet#Generate_BeamDir(self.N)
        exh_SNR = []
        exh_rates = []

        for rbeam in self.BeamSet:#rbeam_vec:
            SNR, rate = mimo_exh_model.Calc_Rate(self.SF_time, np.array([rbeam, 0]))
            #rate = 1e3 * rate
            exh_SNR.append(SNR)
            exh_rates.append(rate)

        best_rbeam_ndx = np.argmax(exh_rates)
        #print("[UAV_Env]: AOD: {}, AoA: {}, AoD-AoA: {}".format(mimo_exh_model.channel.az_aod[0], self.BeamSet[best_rbeam_ndx], -self.BeamSet[best_rbeam_ndx]+mimo_exh_model.channel.az_aod[0]))
        return self.BeamSet[best_rbeam_ndx], np.max(exh_rates) #(Best RBS, Best Rate)
Exemple #3
0
class RFBeamEnv:
    #metadata = {'render.modes': ['human']}

    def __init__(self, sc_xyz, ch_model):

        self.N_tx = 1 # Num of transmitter antenna elements
        self.N_rx = 8  # Num of receiver antenna elements
        self.count = 0
        self.ptx = 30  #dB
        self.SF_time = 20 #msec - for 60KHz carrier frequency in 5G
        self.alpha = 0
        #self.level = 1
        #self.state = None

        # (x1,y1,z1) of UE_source location
        self.ue_s = None#[10,15,0]
        self.ue_v = None#10
        #self.ue_tdist = 90
        #self.cur_ue = None

        #gNB locations One Serving gNB_1 node, 2 visible gnB_2,gNB_3 nodes
        self.gNB = [[0,0,0], [20,30,0], [40,60,0]]
        self.sc_xyz= sc_xyz
        self.ch_model= ch_model
        #Action space parameters: |A| = 8C4 x |delta_p| =70 x 5 = 350
        #self.Actions = {
        #    'K': 4,  #Beam_set length
        #    'N': 4,  #Overall beam directions
        #    'delta_p': [0,-1,+1,-2,+2] #position control space along x-direction
        #}

        #self.K = self.Actions['K']  #Beam_set length
        #self.N = self.Actions['N']  #Overall beam directions
        self.N = self.N_rx #Overall beam directions
        #self.delta_p = self.Actions['delta_p'] #position control space along x-direction
        self.BeamSet = Generate_BeamDir(self.N) #Set of all beam directions
        #self.beta =  math.pi/(2*(self.N-1)) # Beamwidth beta, 0 < beta <= (pi / (N - 1))

        #State of the system - UE_t w.r.t gnB_1
        #self.obs_space = [[x,self.ue_s[1],self.ue_s[2]] for x in range(self.ue_s[0],self.ue_s[0]+self.ue_tdist,self.ue_v)]

        #Observation - RSSI information of states
        self.obs = None
        self.rate = None
        self.goal_diff = None#None

        #Action-Observation Mapping (Q_table)

        #self.observation_values = Custom_Space_Mapping(self.Observations)
        #self.rev_observation_values = dict((v[0], k) for k, v in self.observation_values.items())
        #self.num_observations = len(self.observation_values.keys())
        #self.min_state = self.observation_values[0][0]  # minimum SNR state

        #self.action_values = Custom_Space_Mapping(self.Actions)
        #self.num_actions = len(self.action_values.keys())
        #self.action_space = spaces.Discrete(self.num_actions) #I have to define this
        #self.observation_space = spaces.Discrete(self.num_observations) #I can avoid this
        # self.max_state = self.observation_values[self.num_observations-1][0]#maximum SNR state
        self.rate_threshold = 0.7 # good enough QoS val (Rate)

        #self.seed()
        #self.viewer = None

    def GenAction_Sample(self):
        #delta_p = self.Actions['delta_p']
        #rp_ndx = np.random.randint(0, len(delta_p))
        #rdelta_p = delta_p[rp_ndx]
        rk_beams = Gen_RandomBeams(1, self.N)
        #rk_beams.append([rdelta_p])
        #str_rk_beams = str(rk_beams)
        #rk_beams = eval(str_rk_beams)
        return rk_beams


    '''
    state, reward, done, {} - step(action)
    - A basic function prototype of an Env class under gym
    - This function is called every time, the env needs to be updated into a new state based on the applied action

    Parameters:
    action - the action tuple applied by the RL agent on Env current state

    Output:
    state - The new/update state of the environment
    reward - Env reward to RL agent, for the given action
    done - bool to check if the environment goal state is reached
    {} - empty set

    '''

    def step(self, action):
        # check the legal move first and then return its reward
        # if action in self.actions[self.current_state]:
        #assert self.action_space.contains(action), "%r (%s) invalid" % (action, type(action))
        #rkbeam_vec, delta_p = action[:self.K], action[self.K][0]
        #rkbeam_vec = action[:]
        #tbeam_vec = [self.TB_r]
        #changing only receiver beam based on the action

        #prev_ue = self.cur_ue
        #self.cur_ue[0] += self.ue_v + delta_p
        #prev_obs = self.obs[:]
        #prev_obs[0] += delta_p
        self.ue_s[0] += self.ue_v #+ delta_p
        self.obs[1] = np.array(action)
        self.obs[0] = np.array(self.ue_s)
        #self.TB_r = get_TBD(self.ue_s, self.alpha)
        #RIM = self.Compute_RIM(prev_obs, kbeam_vec)
        #rssi_gnb1 = RIM[:self.K]
        #best_rssi_val = np.max(rssi_gnb1)
        #print('[RF_Env] UE_S: ', self.ue_s)

        self.mimo_model = MIMO(self.ue_s, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx)

        self.SNR, self.rate = self.mimo_model.Calc_Rate(self.SF_time, np.array([self.obs[1][0], 0]))#rkbeam_vec, tbeam_vec )
        #print("[RF_Env] SNR: {0}, rate: {1}".format(20*np.log10(self.SNR), self.rate))

        #self.mimo_los_model = MIMO(self.ue_s, self.gNB[0], self.ptx, self.N_tx, self.N_rx)

        #print("[RF_Env] Los_SNR: {0}, Los_rate: {1}".format(20 * np.log10(self.Los_SNR), self.Los_rate))
        #self.mimo_los_model = MIMO(self.obs, self.gNB[0], self.ptx, self.N_tx, self.N_rx)
        #self.Los_SNR, self.LoS_rate = self.mimo_los_model.Los_Rate(SF_time, self.K)
        #print("[RF_Env] pos_corr: {0}, Rate: {1}".format(delta_p,self.rate))


        self.cum_rate += self.rate

        #self.Los_rate = self.get_LoS_Rate(self.ue_s)
        #self.cum_Los_rate += self.Los_rate

        self.count += 1
        rwd = self.Reward()
        done = self.Game_Over()


        #self.obs[0] += self.ue_v
        #done = self.Game_Over()
        #self.cur_ue = prev_ue
        return self.obs, rwd, done

    '''
    game_state = game_over(s)
    - Function to check if the agent has reached its goal in the environment

    Parameters:
    s - current state of the environment

    Output:
    game_state {    False       if s < goal_state
                    True        if s = goal_state
    '''

    #Define the reward function here
    def Reward(self):
        #if rate <= prev_rate:#>= self.rate_threshold:
        #if self.cum_rate >= (self.count*self.rate_threshold):
        #los_rate = self.get_LoS_Rate(self.ue_s)
        #done = self.Game_Over()
        #if done and (self.cum_rate >= self.count*self.rate_threshold):
        #    return 1#self.rate
        #else:
        #    return 0
        #if self.rate >= self.rate_threshold:#done and (self.cum_rate >= self.count*self.rate_threshold):
        #   return 1
        #elif self.rate < self.rate_threshold:
        #    return -1
        #else:
         #   return 0 #-0.5
        #if self.rate >= los_rate:
        #    return los_rate
        #if self.cum_rate >= self.cum_Los_rate:
        #if self.cum_rate >= (self.count*self.rate_threshold):
        return self.rate
        #else:
        #    return 0

    def Game_Over(self):
        #print("gameover: obs {0}, {1}".format(self.obs[0], self.ue_s[0] + self.ue_tdist))
        #print("[Env] ue_s: {0}, goal: {1}".format(self.ue_s, self.goal))
        #return (self.ue_s == self.goal)#(self.obs[0] == (self.ue_s[0]+self.ue_tdist))
        return np.array_equal(self.ue_s, self.goal)
        #self.cum_rate += self.rate
        #self.cum_Los_rate += self.Los_rate
        #return #np.around(self.Los_rate-self.rate, decimals=4) <= self.goal_diff)#np.round(self.cum_Los_rate/self.cum_rate) <= self.rate_ratio)
    '''
      reset()
      - Resets the environment to its default values
      - Prototype of the gym environment class  
      '''

    def reset(self, ue, vel):
        # Note: should be a uniform random value between starting 4-5 SNR states
        #self.TB_r = get_TBD(ue, self.alpha)#Gen_RandomBeams(1, self.N)[0]  # one random TX beam
        self.RB_r = Gen_RandomBeams(1, self.N)  # one random RX beam
        # print(self.TB_r)

        self.obs = [np.array(ue), self.RB_r]
        self.ue_s = np.array(ue)
        self.ue_v = vel
        self.count = 0
        self.rate = 0
        self.cum_rate = 0
        self.cum_Los_rate = 0
        return np.array(self.obs)


    def set_goal(self, ue_d):
        self.goal = np.array(ue_d)
        return

    def set_velocity(self, vel):
        self.ue_v = vel
        return

    def set_rate_threshold(self, rate_th):
        self.rate_threshold = rate_th
        return

    def get_Rate(self):
        return self.rate

    def get_LoS_Rate(self, ue_s):
        sc_xyz = np.array([])
        ch_model = 'fsp'
        mimo_model = MIMO(ue_s, self.gNB[0], sc_xyz, ch_model, self.ptx, self.N_tx, self.N_rx)
        #print("[Env]: LoS h: {0}".format(mimo_model.channel.pathloss))
        Los_SNR, Los_rate = mimo_model.Los_Rate()
        return Los_rate

    def get_Exh_Rate(self, ue_s):
        #print("[RF_Env] obs: {0}".format(self.obs))
        #print("[RF_Env] TB_r: {0}".format(self.TB_r))
        self.mimo_exh_model = MIMO(ue_s, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx)
        #print("[Env]: exh h: {0}".format(self.mimo_exh_model.channel.pathloss))
        rbeam_vec = Generate_BeamDir(self.N)
        tbeam_vec = Generate_BeamDir(self.N)
        exh_SNR = []
        exh_rates=[]

        for rbeam in rbeam_vec:
            SNR, rate = self.mimo_exh_model.Calc_Rate(self.SF_time, np.array([rbeam[0], 0]))
            exh_SNR.append(SNR)
            exh_rates.append(rate)

        best_rbeam_ndx = np.argmax(exh_rates)
        return rbeam_vec[best_rbeam_ndx], np.max(exh_rates)
Exemple #4
0
class UAV_Env_v2(gym.Env):
    """
    Description:
    A UAV moves in a region around the base station. The problem is to provide the UAV with best possible QoS over N steps


    Observation:
        Type: Box(3,)
        Num Observation     Min     Max
        0   distance (D)    -100.0  100.0
        1   TBD               0.0   3.14159
        2   RBD               0.0   3.14159

    Action:
        Type:Discrete(Nrx)
        Num     Action
        0       Bdir 0
        1       Bdir 1
        ...     ....
        Nrx-1   Bdir {Nrx-1}

    Reward:
        Reward is rate value computed for every step taken, including the termination step. Rate value measured is [0.0, 4.0]

    Starting State:
        All observations are assigned a uniform random value in their respective Min Max range

    Episode Termination:
        When UAV makes N hops or N steps from the starting state
    """

    def __init__(self):

        self.N_tx = 1 # Num of transmitter antenna elements
        self.N_rx = 8  # Num of receiver antenna elements
        self.count = 0
        self.ptx = 30  #dB
        self.SF_time = 20 #msec - for 60KHz carrier frequency in 5G
        self.alpha = 0

        # (x1,y1,z1) of UE_source location
        self.ue_s = None#[10,15,0]
        #self.ue_v = None


        self.gNB = np.array([[0,0,0]])#, [20,30,0], [40,60,0]]
        self.sc_xyz= np.array([])
        self.ch_model= 'fsp'
        self.N = self.N_rx #Overall beam directions
        self.BeamSet = Generate_BeamDir(self.N) #Set of all beam directions

        #Observation - RSSI information of states
        self.state = None
        self.rate = None
        self.rate_threshold = None  # good enough QoS val (Rate)
        self.Nhops = 5

        #UE information
        self.ue_step = 50
        self.ue_xloc = np.arange(-500, 550, 50)  #10 locs
        #self.ue_xloc = np.delete(self.ue_xloc, np.argwhere(self.ue_xloc == 0)) #remove (0,0) from ue_xloc
        self.ue_yloc = np.arange(50,550, 50)     #5 locs
        #self.ue_yloc = np.delete(self.ue_yloc, np.argwhere(self.ue_yloc == 0))  # remove (0,0) from ue_xloc
        self.ue_vx = np.array([50,100]) #3 speed parameters
        self.ue_vy = np.array([50,100]) #3 speed parameters
        self.ue_xdest = np.array([np.min(self.ue_xloc)]) # 1 x-dest loc np.min(self.ue_xloc)
        self.ue_ydest = np.array([np.min(self.ue_yloc)]) # 1 y-dest loc
        self.ue_xsrc = np.array([np.max(self.ue_xloc)]) # 1 source x-loc
        self.ue_ysrc = np.array([np.max(self.ue_yloc)]) # 1 source y-loc
        self.ue_moves = np.array(['L', 'R', 'U', 'D'])  # moving direction of UAV

        self.seed()
        #low_obs = np.array([-500, 0, 0.0, 10.0, 10.0])
        self.high_obs = np.array([np.max(self.ue_xloc), np.max(self.ue_yloc)])
        self.obs_space = spaces.MultiDiscrete([len(self.ue_xloc), #ue_xloc
                                               len(self.ue_yloc), #ue_yloc
                                             ])

        self.act_space = spaces.Discrete(self.N*len(self.ue_moves)) #n(RBD)*n(ue_xvel)*n(ue_yvel)


    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        assert self.act_space.contains(action), "%r (%s) invalid" % (action, type(action))

        state = np.rint(self.state * self.high_obs)

        rbd_ndx, ue_mv_ndx = self.decode_action(action)
        ue_vx, ue_vy = self.choose_vel(ue_mv_ndx)
        rbs = self.BeamSet[rbd_ndx]
        ue_xdest = self.ue_xdest[0]
        ue_ydest = self.ue_ydest[0]

        ue_xloc, ue_yloc = state

        ue_mv = self.ue_moves[ue_mv_ndx]
        if ue_mv == 'L':
            new_ue_xloc = max(ue_xloc + ue_vx, np.min(self.ue_xloc))
            new_ue_yloc = ue_yloc + ue_vy
        if ue_mv == 'U':
            new_ue_xloc = ue_xloc + ue_vx
            new_ue_yloc = min(ue_yloc + ue_vy, np.max(self.ue_yloc))
        if ue_mv == 'R':
            new_ue_xloc = min(ue_xloc + ue_vx, np.max(self.ue_xloc))
            new_ue_yloc = ue_yloc + ue_vy
        if ue_mv == 'D':
            new_ue_xloc = ue_xloc + ue_vx
            new_ue_yloc = max(ue_yloc + ue_vy, np.min(self.ue_yloc))

        new_ue_pos = np.array([new_ue_xloc, new_ue_yloc, 0])
        self.mimo_model = MIMO(new_ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx)


        self.SNR, self.rate = self.mimo_model.Calc_Rate(self.SF_time, np.array([rbs, 0]))  # rkbeam_vec, tbeam_vec )

        self.cur_rate = self.rate
        self.cur_dist = np.sqrt((ue_xloc-ue_xdest)**2 + (ue_yloc-ue_ydest)**2) #x**2 + y**2
        self.state = np.array([new_ue_xloc, new_ue_yloc]) / self.high_obs
        rwd, done = self._gameover(rbs, self.mimo_model.az_aod)

        #self.rate = 1e3*self.rate

        new_ue_xndx = np.where(self.ue_xloc ==new_ue_xloc)[0][0]
        new_ue_yndx = np.where(self.ue_yloc == new_ue_yloc)[0][0]
        self.ue_path_rates.append(self.rate)
        #self.ue_path_rates.append(self.rate)
        self.ue_path.append(np.array([new_ue_xloc, new_ue_yloc]))

        self.steps_done += 1

        #rwd = self._reward(prev_dist)
        #print("[uav_env] rwd: {}".format(rwd))


        return self.state, rwd, done, {}

    def reset(self, rate_thr):
        # Note: should be a uniform random value between starting 4-5 SNR states
        #self.TB_r = get_TBD(ue, self.alpha)#Gen_RandomBeams(1, self.N)[0]  # one random TX beam
        #state_indices = self.obs_space.sample()
        xloc_ndx, yloc_ndx = self.obs_space.sample()

        #Start from a fixed start location
        self.state = np.array([self.ue_xloc[xloc_ndx],
                               self.ue_yloc[yloc_ndx]
                               ])
        #self.state = np.array([self.ue_xsrc[0],
        #                       self.ue_ysrc[0]
        #                       ])

        self.steps_done = 0
        self.rate = 0.0
        self.cur_dist = np.Inf
        self.cur_rate = 0.0
        self.ue_path = []
        self.ue_path.append(self.state)
        self.ue_xsrc = self.state[0]
        self.ue_ysrc = self.state[1]
        self.ue_path_rates = []
        #self.ue_path_rates = []
        #Computing the rate threshold for the given destination
        #ue_dest = np.array([self.ue_xloc[xloc_ndx], self.ue_yloc[yloc_ndx], 0])
        #dest_mimo_model = MIMO(ue_dest, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx)
        #dest_SNR = []
        #dest_rates = []
        #for rbeam in self.BeamSet:  # rbeam_vec:
        #    SNR, rate = dest_mimo_model.Calc_Rate(self.SF_time, np.array([rbeam, 0]))
        #    dest_SNR.append(SNR)
        #    dest_rates.append(rate)


        self.rate_threshold = rate_thr #np.max(dest_rates)

        self.state = self.state / self.high_obs
        #self.state = self.state.reshape((1, len(self.state)))
        return self.state

    def render(self, mode='human', close=False):
        #fig, ax = plt.subplots(1, 1, figsize=(10, 10))

        #x_axis = [x[0] for x in self.ue_path]
        #y_axis = [x[1] for x in self.ue_path]
        #z_axis = self.ue_path_rates
        #plt.plot(x_axis, y_axis)

        #plt.show()

        from matplotlib.path import Path
        import matplotlib.patches as patches

        verts = [(int(x[0]),int(x[1])) for x in self.ue_path]
        #print(self.ue_path)
        #print(verts)

        codes = [Path.LINETO for x in range(len(verts))]
        codes[0] = Path.MOVETO
        codes[-1] = Path.STOP

        path = Path(verts, codes)

        fig = plt.figure(figsize=(10,10))
        ax = fig.add_subplot(111)
        patch = patches.PathPatch(path, facecolor='none', lw=2)
        ax.add_patch(patch)

        xs, ys = zip(*verts)
        ax.plot(xs, ys, 'x--', lw=2, color='black')

        #xdisplay, ydisplay = ax.transData.transform_point((self.ue_xsrc, self.ue_ysrc))

        bbox = dict(boxstyle="round", fc="0.8")
        arrowprops = dict(
            arrowstyle="->",
            connectionstyle="angle,angleA=0,angleB=90,rad=10")

        offset = 40
        ax.annotate('Src = (%d, %d)' % (self.ue_xsrc, self.ue_ysrc),
                    (self.ue_xsrc, self.ue_ysrc), xytext=(-2 * offset, offset), textcoords='offset points',
                    bbox=bbox, arrowprops=arrowprops)

        ax.annotate('Dest = (%d, %d)' % (self.ue_xdest[0], self.ue_ydest[0]),
                           (self.ue_xdest[0], self.ue_ydest[0]), xytext=(0.5 * offset, -offset),
                           textcoords='offset points',
                           bbox=bbox, arrowprops=arrowprops)


        offset= 10
        bbox =dict(boxstyle="round", facecolor='yellow', edgecolor='none')
        for i in range(0,len(self.ue_path_rates)):
            ax.annotate('%.2f' % np.around(self.ue_path_rates[i], decimals=2),
                        (verts[i+1][0], verts[i+1][1]), xytext=(-2 * offset, offset), textcoords='offset points',
                        bbox=bbox, arrowprops=arrowprops)

        ax.grid()
        ax.set_xticks(self.ue_xloc)
        ax.set_yticks(self.ue_yloc)
        ax.set_title("UAV graph w.r.t gNB [0,0,0]")
        ax.set_xlabel("X direction")
        ax.set_ylabel("Y direction")

        plt.show()

        return

    #Not using this function
    def _reward(self, prev_dist):

        #bf_condn = False
        #if ((prev_rate >= self.rate) and (prev_dist <= cur_dist)) or ((prev_rate <= self.rate) and (prev_dist >= cur_dist)):
        #    bf_condn = True
        #if (self.rate > self.rate_threshold) and (bf_condn is True):
        #    return 10*(self.rate-self.rate_threshold)+8#10+ self.rate-self.rate_threshold-1
        #elif (self.rate > self.rate_threshold) and (bf_condn is False):
        #    return 3
        #else:
        #    return -3

        ue_dist = np.sqrt((self.state[0]-self.ue_xdest) ** 2 + (self.state[1]-self.ue_ydest) ** 2)
        #ue_dest_dist = np.sqrt(self.state[0][-2]**2 + self.state[0][-1]**2)

        if (self.rate >= self.rate_threshold) and (ue_dist <= prev_dist):
            return 10*self.rate + 3
        else:
            return 0.0#10*self.rate - 3

    def dest_check(self):
        reached = False
        state = np.rint(self.state * self.high_obs)
        next_dist = np.sqrt((state[0] - self.ue_xdest[0]) ** 2 + (state[1] - self.ue_ydest[0]) ** 2)

        if next_dist < 50:
            reached = True
        return reached

    def _gameover(self, aoa, aod): #prev_dist, curr_rate):
        #ue_dist = np.sqrt(self.state[0][0]**2 + self.state[0][1]**2)
        #ue_dest_dist = np.sqrt(self.state[0][-2]**2 + self.state[0][-1]**2)
        #return ue_dist >= ue_dest_dist
        state = np.rint(self.state * self.high_obs)
        next_dist = np.sqrt((state[0] - self.ue_xdest[0]) ** 2 + (state[1] - self.ue_ydest[0]) ** 2)
        ang_1 = 3.14 - np.around(np.pi/self.N,decimals=2)
        ang_2 = 3.14 + np.around(np.pi/self.N,decimals=2)
        ang_3 = 0#2*3.14 - np.around(np.pi/self.N,decimals=2)
        ang_4 = np.around(np.pi/self.N,decimals=2)#2*3.14

        #if (next_dist < 50) and (ang_1 < np.around(aod-aoa, decimals=2) < ang_2):

        #elif (next_dist < 50) and (ang_3 < np.around(aod-aoa, decimals=2) < ang_4):
        #    rwd = 2.0#3.1#2.1#self.rate + 2.0#2000.0
        #    done = True
        #elif (ang_1 < np.around(aod-aoa, decimals=2) < ang_2): #(ang_1 < np.around(aod-aoa, decimals=2) < ang_2) and
        if (self.dest_check()) and (self.rate >= self.rate_threshold):
            rwd = 1.0#3.1#2.1#self.rate + 2.0#2000.0
            done = True

        elif self.dest_check():
            rwd = -1.0
            done = True

        elif (self.rate >= self.rate_threshold):
            rwd = 1.0*np.exp(-1*(self.steps_done-1)/50) *np.log10(self.rate+1)# np.exp(self.rate/50)#1.0#self.rate+1.0#self.rate + 2.0 #10*np.log10(val+1) + 2.0
            done = False
        #elif (ang_3 < np.around(aod-aoa, decimals=2) < ang_4): #(self.rate >= self.rate_threshold) and
        #    rwd = 1.0 * np.exp(-1 * (self.steps_done - 1) / 10)  # 1.0#self.rate+1.0#self.rate + 2.0 #10*np.log10(val+1) + 2.0
        #    done = False
        else:
            rwd = -1.0#-self.rate-1.0#-self.rate -2.0#-20.0
            done = False
        self.aoa = aoa
        self.aod = aod
        return rwd, done

    def decode_action(self, action_ndx):
        #ue_vy_ndx = action_ndx % len(self.ue_vy)
        #action_ndx = action_ndx // len(self.ue_vy)
        #ue_v_ndx = action_ndx % len(self.ue_vx)
        #action_ndx = action_ndx // len(self.ue_vx)

        ue_mv_ndx = action_ndx % len(self.ue_moves)
        action_ndx = action_ndx // len(self.ue_moves)

        beam_ndx = action_ndx % self.N
        action_ndx = action_ndx // self.N

        assert 0<= action_ndx <= self.act_space.n
        return (beam_ndx, ue_mv_ndx)

    #Not using this function
    def encode_action(self, beam_ndx, ue_vx_ndx, ue_vy_ndx):
        i = beam_ndx
        i*= self.N

        i += ue_vx_ndx
        i*=len(self.ue_vx)

        i += ue_vy_ndx
        i*= len(self.ue_vy)

        return i

    def choose_vel(self, ue_mv_ndx):
        ue_mv = self.ue_moves[ue_mv_ndx]

        if ue_mv == 'L': #move left
            ue_vx = -1 * self.ue_vx[0]
            ue_vy = 0
        elif ue_mv == 'U': #move up
            ue_vx = 0
            ue_vy = self.ue_vy[0]
        elif ue_mv == 'D': #move down
            ue_vx = 0
            ue_vy = -1*self.ue_vy[0]
        else: #move right
            ue_vx = self.ue_vx[0]
            ue_vy = 0

        return ue_vx, ue_vy

    def get_Los_Rate(self, state):

        state = np.rint(state * self.high_obs)
        ue_xloc, ue_yloc = state

        sc_xyz = np.array([])
        ch_model = 'fsp'
        ue_pos = np.array([ue_xloc, ue_yloc, 0])

        mimo_model = MIMO(ue_pos, self.gNB[0], sc_xyz, ch_model, self.ptx, self.N_tx, self.N_rx)
        SNR, rate = mimo_model.Los_Rate()  # rkbeam_vec, tbeam_vec )
        #rate = 1e3 * rate
        return SNR, rate

    def get_Exh_Rate(self, state):
        state = np.rint(state * self.high_obs)
        ue_xloc, ue_yloc = state
        ue_pos = np.array([ue_xloc, ue_yloc,0])

        mimo_exh_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx)
        #rbeam_vec = self.BeamSet#Generate_BeamDir(self.N)
        exh_SNR = []
        exh_rates = []

        for rbeam in self.BeamSet:#rbeam_vec:
            SNR, rate = mimo_exh_model.Calc_Rate(self.SF_time, np.array([rbeam, 0]))
            #rate = 1e3 * rate
            exh_SNR.append(SNR)
            exh_rates.append(rate)

        best_rbeam_ndx = np.argmax(exh_rates)
        #print("[UAV_Env]: AOD: {}, AoA: {}, AoD-AoA: {}".format(mimo_exh_model.channel.az_aod[0], self.BeamSet[best_rbeam_ndx], -self.BeamSet[best_rbeam_ndx]+mimo_exh_model.channel.az_aod[0]))
        return self.BeamSet[best_rbeam_ndx], np.max(exh_rates) #(Best RBS, Best Rate)

    def get_Rate(self):
        return self.rate
Exemple #5
0
class UAV_Env(gym.Env):
    """
    Description:
    A UAV moves in a region around the base station. The problem is to provide the UAV with best possible QoS over N steps


    Observation:
        Type: Box(3,)
        Num Observation     Min     Max
        0   distance (D)    -100.0  100.0
        1   TBD               0.0   3.14159
        2   RBD               0.0   3.14159

    Action:
        Type:Discrete(Nrx)
        Num     Action
        0       Bdir 0
        1       Bdir 1
        ...     ....
        Nrx-1   Bdir {Nrx-1}

    Reward:
        Reward is rate value computed for every step taken, including the termination step. Rate value measured is [0.0, 4.0]

    Starting State:
        All observations are assigned a uniform random value in their respective Min Max range

    Episode Termination:
        When UAV makes N hops or N steps from the starting state
    """

    def __init__(self):

        self.N_tx = 1 # Num of transmitter antenna elements
        self.N_rx = 8  # Num of receiver antenna elements
        self.count = 0
        self.ptx = 30  #dB
        self.SF_time = 20 #msec - for 60KHz carrier frequency in 5G
        self.alpha = 0

        # (x1,y1,z1) of UE_source location
        self.ue_s = None#[10,15,0]
        self.ue_v = 10

        self.gNB = np.array([[0,0,0]])#, [20,30,0], [40,60,0]]
        self.sc_xyz= np.array([])
        self.ch_model= 'fsp'
        self.N = self.N_rx #Overall beam directions
        self.BeamSet = Generate_BeamDir(self.N) #Set of all beam directions

        #Observation - RSSI information of states
        self.state = None
        self.rate = None
        self.rate_threshold = 0.07  # good enough QoS val (Rate)
        self.Nhops = 5

        self.seed()
        low_obs = np.array([30.0, 0.0, 0.0])
        self.high_obs = np.array([100.0, 3.14159, 3.14159])
        self.obs_space = spaces.Box(low=low_obs,high=self.high_obs)

        self.act_space = spaces.Discrete(self.N)


    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        assert self.act_space.contains(action), "%r (%s) invalid" % (action, type(action))

        state = self.state * self.high_obs
        dist, ue_ang, rbd = state
        rbs = self.BeamSet[action]

        ue_pos = np.array(sph2cart(ue_ang, 0, dist)) #ue_pos is(x,y)

        ue_pos[0] += self.ue_v

        new_dist = np.sqrt(ue_pos[0]**2 + ue_pos[1]**2) #x**2 + y**2
        new_ang = np.arctan2(ue_pos[1],ue_pos[0])
        self.state = np.array([new_dist, new_ang, rbs]) / self.high_obs

        self.mimo_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx)
        prev_rate = self.rate
        prev_dist = dist
        self.SNR, self.rate = self.mimo_model.Calc_Rate(self.SF_time, np.array([rbs, 0]))#rkbeam_vec, tbeam_vec )

        self.steps_done += 1

        rwd = self._reward(prev_rate, prev_dist, new_dist)
        #print("[uav_env] rwd: {}".format(rwd))
        done = self._gameover()

        return self.state, rwd, done, {}

    def reset(self):
        # Note: should be a uniform random value between starting 4-5 SNR states
        #self.TB_r = get_TBD(ue, self.alpha)#Gen_RandomBeams(1, self.N)[0]  # one random TX beam
        dist = np.random.uniform(low=30.0, high=50.0)#self.np_random.uniform(low=30.0, high=50.0)
        TBD = np.random.uniform(low=0.0, high=3.14159)#self.np_random.uniform(low=0.0, high=3.14159)
        RBD = Gen_RandomBeams(1, self.N)  # one random RX beam
        self.state = np.array([dist, TBD, RBD]) / self.high_obs

        self.steps_done = 0
        self.rate = 0

        return np.array(self.state)

    def render(self, mode='human', close=False):
        pass

    def _reward(self, prev_rate, prev_dist, cur_dist):

        #bf_condn = False
        #if ((prev_rate >= self.rate) and (prev_dist <= cur_dist)) or ((prev_rate <= self.rate) and (prev_dist >= cur_dist)):
        #    bf_condn = True
        #if (self.rate > self.rate_threshold) and (bf_condn is True):
        #    return 10*(self.rate-self.rate_threshold)+8#10+ self.rate-self.rate_threshold-1
        #elif (self.rate > self.rate_threshold) and (bf_condn is False):
        #    return 3
        #else:
        #    return -3
        #(az_aod, temp, temp) = cart2sph(rx[0] - tx[0], rx[1] - tx[1], rx[2] - tx[2])
        #if az_aod == rb_ang:
        #    return 1
        #else:
        #    return 0
        #val = np.abs(az_aod-rb_ang)
        #print("[uav_env] val: {}, az_aod: {}, rbs: {}", val, az_aod, rb_ang)
        #if (val >= (np.pi)):
        #    return 1+ np.log10((2*val/(np.pi))-1)  #1+log10(2x/pi -1)
        #else:
        #    return 0
        if(self.rate > self.rate_threshold):
            return 10*self.rate +3 #np.exp(self.rate-self.rate_threshold-1)
        else:
            return 0#10*self.rate -3

    def _gameover(self):
        return (self.steps_done == self.Nhops)

    def get_Los_Rate(self, state):
        dist, ue_ang, rbd = (state * self.high_obs)
        ue_pos = np.array(sph2cart(ue_ang, 0, dist))  # ue_pos is(x,y)

        sc_xyz = np.array([])
        ch_model = 'fsp'

        mimo_model = MIMO(ue_pos, self.gNB[0], sc_xyz, ch_model, self.ptx, self.N_tx, self.N_rx)
        SNR, rate = mimo_model.Los_Rate()  # rkbeam_vec, tbeam_vec )

        return SNR, rate

    def get_Exh_Rate(self, state):
        dist, ue_ang, rbd = (state*self.high_obs)
        ue_pos = np.array(sph2cart(ue_ang, 0, dist))  # ue_pos is(x,y)

        mimo_exh_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model, self.ptx, self.N_tx, self.N_rx)
        #rbeam_vec = self.BeamSet#Generate_BeamDir(self.N)
        exh_SNR = []
        exh_rates = []

        for rbeam in self.BeamSet:#rbeam_vec:
            SNR, rate = mimo_exh_model.Calc_Rate(self.SF_time, np.array([rbeam, 0]))
            exh_SNR.append(SNR)
            exh_rates.append(rate)

        best_rbeam_ndx = np.argmax(exh_rates)
        return self.BeamSet[best_rbeam_ndx], np.max(exh_rates) #(Best RBS, Best Rate)

    def get_Rate(self):
        return self.rate
Exemple #6
0
class UAV_Env_v3(gym.Env):
    """
    Description:
    A UAV moves in a region within the coverage area of the base station.
    The objective of the problem is to guide UAV (using gNB)in a rate requirement path,
    reaching the destination in an energy minimized way as early as possible

    Observation:
        Type: MultiDiscrete(2,)
        Num Observation     Min     Max     Step
        1   UAV_xloc       -500.0   500.0   50.0
        2   UAV_yloc       -500.0   500.0   50.0

    Action:
        Type:Discrete(Nrx*num(uav_moves))
        Num                   Action
        0                   Bdir0, mov0
        1                   Bdir0, mov1
        2                   Bdir0, mov2
        3                   Bdir0, mov3
        4                   Bdir 1, ...
        ...                     ....
        (Nrx-1)*uav_moves   Bdir{Nrx-1}, mov3

    Reward:
        Reward is value computed based on rate measurements and energy minimization conditions. Range [-1.0, 1.0]

    Starting State:
        Obs_space.sample() - Any random location with the Observation range

    Episode Termination:
        When UAV reaches the defined destination D
    """
    def __init__(self):

        #Antenna Modelling
        #Uniform Linear Arrays (ULA) antenna modelling is considered
        self.N_tx = 8  # Num of transmitter antenna elements
        self.N_rx = 8  # Num of receiver antenna elements
        self.count = 0
        self.ptx = 30  #dB
        self.SF_time = 20  #msec - for 60KHz carrier frequency in 5G
        self.alpha = 0

        #Base Statin Locations
        self.gNB = np.array([[0, 0, 0]])  #, [20,30,0], [40,60,0]]

        #Channel
        self.sc_xyz = np.array([])
        self.ch_model = 'uma-los'
        self.N = self.N_rx  #Overall beam directions
        self.BeamSet = Generate_BeamDir(self.N)  #Set of all beam directions

        #Observation - RSSI information of states
        self.state = None
        self.rate = None
        self.rate_threshold = None  # good enough QoS val (Rate)

        #UE information
        self.ue_step = 50
        self.ue_xloc = np.arange(-500, 550, self.ue_step)  #10 locs
        #self.ue_xloc = np.delete(self.ue_xloc, np.argwhere(self.ue_xloc == 0)) #remove (0,0) from ue_xloc
        self.ue_yloc = np.arange(-500, 550, self.ue_step)  #5 locs
        #self.ue_yloc = np.delete(self.ue_yloc, np.argwhere(self.ue_yloc == 0))  # remove (0,0) from ue_xloc
        self.ue_vx = np.array([50, 100])  #3 speed parameters
        self.ue_vy = np.array([50, 100])  #3 speed parameters
        self.ue_xdest = np.array([np.min(self.ue_xloc)
                                  ])  # 1 x-dest loc np.min(self.ue_xloc)
        self.ue_ydest = np.array([400])  # 1 y-dest loc
        self.ue_xsrc = np.array([np.max(self.ue_xloc)])  # 1 source x-loc
        self.ue_ysrc = np.array([np.max(self.ue_yloc)])  # 1 source y-loc
        self.ue_moves = np.array(['L', 'R', 'U',
                                  'D'])  # moving direction of UAV

        self.seed()

        #Observation and Action Spaces

        #low_obs = np.array([-500, 0, 0.0, 10.0, 10.0])
        self.high_obs = np.array([np.max(self.ue_xloc), np.max(self.ue_yloc)])
        self.obs_space = spaces.MultiDiscrete([
            len(self.ue_xloc),  #ue_xloc
            len(self.ue_yloc),  #ue_yloc
        ])

        self.act_space = spaces.Discrete(
            self.N * len(self.ue_moves))  #n(RBD)*n(ue_moves)

    def seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def step(self, action):
        assert self.act_space.contains(
            action), "%r (%s) invalid" % (action, type(action))

        state = np.rint(self.state * self.high_obs)
        rbd_ndx, ue_mv_ndx = self.decode_action(action)
        ue_vx, ue_vy = self.choose_vel(ue_mv_ndx)
        rbs = self.BeamSet[rbd_ndx]
        ue_xdest = self.ue_xdest[0]
        ue_ydest = self.ue_ydest[0]
        ue_xloc, ue_yloc = state

        self.cur_dist = np.sqrt(
            (ue_xloc - ue_xdest)**2 + (ue_yloc - ue_ydest)**2)  # x**2 + y**2
        self.cur_state = state

        if self.done:  #reached terminal state
            return self.state, self.rwd, self.done, {}

        ue_mv = self.ue_moves[ue_mv_ndx]
        if ue_mv == 'L':
            new_ue_xloc = max(ue_xloc + ue_vx, np.min(self.ue_xloc))
            new_ue_yloc = ue_yloc + ue_vy
        if ue_mv == 'U':
            new_ue_xloc = ue_xloc + ue_vx
            new_ue_yloc = min(ue_yloc + ue_vy, np.max(self.ue_yloc))
        if ue_mv == 'R':
            new_ue_xloc = min(ue_xloc + ue_vx, np.max(self.ue_xloc))
            new_ue_yloc = ue_yloc + ue_vy
        if ue_mv == 'D':
            new_ue_xloc = ue_xloc + ue_vx
            new_ue_yloc = max(ue_yloc + ue_vy, np.min(self.ue_yloc))

        new_ue_pos = np.array([new_ue_xloc, new_ue_yloc, 0])

        #Approximating (0,0) to (20,20) location to prevent rate->Inf
        if (new_ue_xloc == 0) and (new_ue_yloc == 0):
            self.mimo_model = MIMO(np.array([40, 40, 0]), self.gNB[0],
                                   self.sc_xyz, self.ch_model, self.ptx,
                                   self.N_tx, self.N_rx)
            self.SNR, self.rate = self.mimo_model.Calc_Rate(
                self.SF_time, np.array([rbs, 0]))  # rkbeam_vec, tbeam_vec )

        else:
            self.mimo_model = MIMO(new_ue_pos, self.gNB[0], self.sc_xyz,
                                   self.ch_model, self.ptx, self.N_tx,
                                   self.N_rx)
            self.SNR, self.rate = self.mimo_model.Calc_Rate(
                self.SF_time, np.array([rbs, 0]))  # rkbeam_vec, tbeam_vec )

        if self.measure == 'rate_thr_path':
            self.rwd, self.done = self._gameover()
        elif self.measure == 'rate_path':
            self.rwd, self.done = self.rate_path_gameover()
        elif self.measure == 'short_path':
            self.rwd, self.done = self.short_path_gameover()
        else:
            print("Err: Incorrect measure str\n")
            self.rwd, self.done = -100.0, True

        self.ue_path_rates.append(self.rate)
        self.ue_path.append(np.array([new_ue_xloc, new_ue_yloc]))

        self.cur_rate = self.rate
        self.prev_dist = self.cur_dist
        self.state = np.array([new_ue_xloc, new_ue_yloc]) / self.high_obs

        self.steps_done += 1

        return self.state, self.rwd, self.done, {}

    def beyond_border(self, ue_xpos, ue_ypos):
        if (ue_xpos == np.min(self.ue_xloc)) or (ue_xpos == np.max(
                self.ue_xloc)) or (ue_ypos == np.min(
                    self.ue_yloc)) or (ue_ypos == np.max(self.ue_yloc)):
            return True
        else:
            return False

    def reset(self, rate_thr, meas, state_indices):

        #state_indices = self.obs_space.sample()
        #xloc_ndx, yloc_ndx = self.obs_space.sample()
        xloc_ndx, yloc_ndx = state_indices

        #Start from a random start location
        self.state = np.array([self.ue_xloc[xloc_ndx], self.ue_yloc[yloc_ndx]])
        #self.state = np.array([self.ue_xsrc[0],
        #                       self.ue_ysrc[0]
        #                       ])

        self.steps_done = 0
        self.rate = 0.0

        self.ue_path = []
        #self.ue_path.append(self.state)
        self.ue_xsrc = self.state[0]
        self.ue_ysrc = self.state[1]
        self.ue_path_rates = []
        self.measure = meas
        self.rwd = 0.0
        self.done = False
        #self.ue_path_rates = []

        self.rate_threshold = rate_thr  #np.max(dest_rates)

        self.state = self.state / self.high_obs
        self.prev_dist = np.Inf

        #_, self.cur_rate = self.get_Exh_Rate(self.state)
        self.cur_rate = 0.0
        #self.state = self.state.reshape((1, len(self.state)))
        return self.state

    def render(self, mode='human', close=False):
        #fig, ax = plt.subplots(1, 1, figsize=(10, 10))

        #x_axis = [x[0] for x in self.ue_path]
        #y_axis = [x[1] for x in self.ue_path]
        #z_axis = self.ue_path_rates
        #plt.plot(x_axis, y_axis)

        #plt.show()

        from matplotlib.path import Path
        import matplotlib.patches as patches

        verts = [(int(x[0]), int(x[1])) for x in self.ue_path]
        #print(self.ue_path)
        #print(verts)

        codes = [Path.LINETO for x in range(len(verts))]
        codes[0] = Path.MOVETO
        codes[-1] = Path.STOP

        path = Path(verts, codes)

        fig = plt.figure(figsize=(10, 10))
        ax = fig.add_subplot(111)
        patch = patches.PathPatch(path, facecolor='none', lw=2)
        ax.add_patch(patch)

        xs, ys = zip(*verts)
        ax.plot(xs, ys, 'x--', lw=2, color='black')

        #xdisplay, ydisplay = ax.transData.transform_point((self.ue_xsrc, self.ue_ysrc))

        bbox = dict(boxstyle="round", fc="0.8")
        arrowprops = dict(arrowstyle="->",
                          connectionstyle="angle,angleA=0,angleB=90,rad=10")

        offset = 40
        ax.annotate('Src = (%d, %d)' % (self.ue_xsrc, self.ue_ysrc),
                    (self.ue_xsrc, self.ue_ysrc),
                    xytext=(-2 * offset, offset),
                    textcoords='offset points',
                    bbox=bbox,
                    arrowprops=arrowprops)

        ax.annotate('Dest = (%d, %d)' % (self.ue_xdest[0], self.ue_ydest[0]),
                    (self.ue_xdest[0], self.ue_ydest[0]),
                    xytext=(0.5 * offset, -offset),
                    textcoords='offset points',
                    bbox=bbox,
                    arrowprops=arrowprops)

        offset = 10
        bbox = dict(boxstyle="round", facecolor='yellow', edgecolor='none')
        for i in range(0, len(self.ue_path_rates)):
            ax.annotate('%.2f' % np.around(self.ue_path_rates[i], decimals=2),
                        (verts[i][0], verts[i][1]),
                        xytext=(-2 * offset, offset),
                        textcoords='offset points',
                        bbox=bbox,
                        arrowprops=arrowprops)

        ax.grid()
        ax.set_xticks(self.ue_xloc)
        ax.set_yticks(self.ue_yloc)
        ax.set_title("UAV graph w.r.t gNB [0,0,0]")
        ax.set_xlabel("X direction")
        ax.set_ylabel("Y direction")

        plt.show()

        return

    def dest_check(self):
        reached = False
        #state = self.cur_state#np.rint(self.cur_state * self.high_obs)
        #curr_dist = np.sqrt((state[0] - self.ue_xdest[0]) ** 2 + (state[1] - self.ue_ydest[0]) ** 2)

        if self.cur_dist < self.ue_step:
            reached = True
        return reached

    def _gameover(self):
        #state = np.rint(self.state * self.high_obs)
        #curr_dist = np.sqrt((state[0] - self.ue_xdest[0]) ** 2 + (state[1] - self.ue_ydest[0]) ** 2)

        #if (self.dest_check()) and (self.rate >= self.rate_threshold):
        #    rwd = 1.0#3.1#2.1#self.rate + 2.0#2000.0
        #    done = True

        if self.dest_check():
            rwd = 1.0 * np.log10(
                8 * self.rate +
                1)  #*np.log10(self.rate + 1) #*np.exp(-self.steps_done/100)
            done = True
        elif (self.cur_rate >=
              self.rate_threshold):  #and (self.cur_dist < self.prev_dist):

            #rwd = 1.0*np.exp(-1*(self.steps_done-1)/50)*np.log10(max(21-self.rate,0)+1)#*np.exp(self.rate/10)/20#np.log10(max(21.5-self.rate, 0)+1)/3#*np.log10(self.rate+1)# np.exp(self.rate/50)#1.0#self.rate+1.0#self.rate + 2.0 #10*np.log10(val+1) + 2.0
            rwd = 0.6 * np.exp(
                -self.cur_dist / 1000
            ) * np.exp(-2 * (self.steps_done - 1) / 50) * np.log10(
                8 * self.rate + 1
            )  #*np.exp(-2*(self.steps_done-1)/50)#*np.log10(self.rate + 1)#*np.exp(self.rate/20)#*min(np.exp(self.rate/20), np.exp((self.rate_threshold-self.rate)/20.0))#0.5 * np.exp(-1 * (self.steps_done - 1) / 50) *(1-self.rate/30)
            #print(rwd)
            done = False
        elif (self.cur_dist < self.prev_dist):
            rwd = 0.8 * np.exp(-self.cur_dist / 1000) * np.exp(
                -2 * (self.steps_done - 1) / 50) * np.log10(
                    8 * self.rate + 1
                )  #*np.exp(-self.rate/20)#-self.rate-1.0#-self.rate -2.0#-20.0
            done = False

        elif (self.cur_dist >
              self.prev_dist):  #self.cur_rate >= self.rate_threshold) and
            rwd = 0.2 * np.exp(
                -self.cur_dist / 1000
            ) * np.exp(-2 * (self.steps_done - 1) / 50) * np.log10(
                8 * self.rate + 1
            )  #*np.log10(self.rate + 1)#*np.exp(self.rate/20)#*min(np.exp(self.rate/20), np.exp((self.rate_threshold-self.rate)/20.0))
            done = False

        #elif (self.beyond_border(self.cur_state[0], self.cur_state[1])):
        #    rwd = 0.3*np.exp(-curr_dist/1000)*np.exp(-2*(self.steps_done-1)/50)*np.log10(8*self.rate + 1)
        #    done = False
        #elif (self.cur_dist >= self.prev_dist):
        #    rwd = 0.2*np.exp(-self.cur_dist/1000)*np.exp(-2*(self.steps_done-1)/50)*np.log10(8*self.cur_rate + 1)#*np.exp(-self.rate/20)#-self.rate-1.0#-self.rate -2.0#-20.0
        #    done = False
        else:
            rwd = 0.0  #0.2*np.exp(-self.cur_dist/1000)*np.exp(-2*(self.steps_done-1)/50)*np.log10(8*self.rate + 1)
            done = False

        #done = False
        #if self.dest_check():
        #    done = True
        #rwd = np.log10(8*self.rate + 1) + np.exp(-self.cur_dist/1000)*np.exp(-2*(self.steps_done-1)/50)
        return rwd, done

    #Reward Function for max rate path
    def rate_path_gameover(self):
        state = np.rint(self.state * self.high_obs)
        next_dist = np.sqrt((state[0] - self.ue_xdest[0])**2 +
                            (state[1] - self.ue_ydest[0])**2)

        if (self.dest_check()):
            rwd = 1.0  # 3.1#2.1#self.rate + 2.0#2000.0
            done = True

        elif (next_dist < self.cur_dist):

            # rwd = 1.0*np.exp(-1*(self.steps_done-1)/50)*np.log10(max(21-self.rate,0)+1)#*np.exp(self.rate/10)/20#np.log10(max(21.5-self.rate, 0)+1)/3#*np.log10(self.rate+1)# np.exp(self.rate/50)#1.0#self.rate+1.0#self.rate + 2.0 #10*np.log10(val+1) + 2.0
            rwd = 0.5 * np.exp(-1 * (self.steps_done - 1) / 50) * np.exp(
                self.rate / 20
            )  # *min(np.exp(self.rate/20), np.exp((self.rate_threshold-self.rate)/20.0))#0.5 * np.exp(-1 * (self.steps_done - 1) / 50) *(1-self.rate/30)
            # print(rwd)
            done = False

        elif (next_dist > self.cur_dist):
            rwd = 0.2 * np.exp(-1 * (self.steps_done - 1) / 50) * np.exp(
                self.rate / 20
            )  # *min(np.exp(self.rate/20), np.exp((self.rate_threshold-self.rate)/20.0))
            done = False
        else:
            rwd = -1.0 * np.exp(-1 * (self.steps_done - 1) / 50) * np.exp(
                -self.rate / 20)  # -self.rate-1.0#-self.rate -2.0#-20.0
            done = False

        return rwd, done

    #Reward function for shortest path
    def short_path_gameover(self):
        state = np.rint(self.state * self.high_obs)
        next_dist = np.sqrt((state[0] - self.ue_xdest[0])**2 +
                            (state[1] - self.ue_ydest[0])**2)

        if (self.dest_check()):
            rwd = 1.0  # 3.1#2.1#self.rate + 2.0#2000.0
            done = True
        elif (next_dist < self.cur_dist):
            rwd = 0.2
            done = False
        else:
            rwd = -1.0
            done = False

        return rwd, done

    def decode_action(self, action_ndx):
        #ue_vy_ndx = action_ndx % len(self.ue_vy)
        #action_ndx = action_ndx // len(self.ue_vy)
        #ue_v_ndx = action_ndx % len(self.ue_vx)
        #action_ndx = action_ndx // len(self.ue_vx)

        ue_mv_ndx = action_ndx % len(self.ue_moves)
        action_ndx = action_ndx // len(self.ue_moves)

        beam_ndx = action_ndx % self.N
        action_ndx = action_ndx // self.N

        assert 0 <= action_ndx <= self.act_space.n
        return (beam_ndx, ue_mv_ndx)

    #Not using this function
    def encode_action(self, beam_ndx, ue_vx_ndx, ue_vy_ndx):
        i = beam_ndx
        i *= self.N

        i += ue_vx_ndx
        i *= len(self.ue_vx)

        i += ue_vy_ndx
        i *= len(self.ue_vy)

        return i

    def choose_vel(self, ue_mv_ndx):
        ue_mv = self.ue_moves[ue_mv_ndx]

        if ue_mv == 'L':  #move left
            ue_vx = -1 * self.ue_vx[0]
            ue_vy = 0
        elif ue_mv == 'U':  #move up
            ue_vx = 0
            ue_vy = self.ue_vy[0]
        elif ue_mv == 'D':  #move down
            ue_vx = 0
            ue_vy = -1 * self.ue_vy[0]
        else:  #move right
            ue_vx = self.ue_vx[0]
            ue_vy = 0

        return ue_vx, ue_vy

    def get_Los_Rate(self, state):

        state = np.rint(state * self.high_obs)
        ue_xloc, ue_yloc = state

        sc_xyz = np.array([])
        ch_model = 'fsp'
        ue_pos = np.array([ue_xloc, ue_yloc, 0])

        if (ue_xloc == 0) and (ue_yloc) == 0:
            ue_pos = np.array([ue_xloc + 40, ue_yloc + 40, 0])

        mimo_model = MIMO(ue_pos, self.gNB[0], sc_xyz, ch_model, self.ptx,
                          self.N_tx, self.N_rx)
        SNR, rate = mimo_model.Los_Rate()  # rkbeam_vec, tbeam_vec )

        return SNR, rate

    def get_Exh_Rate(self, state):
        state = np.rint(state * self.high_obs)
        ue_xloc, ue_yloc = state
        ue_pos = np.array([ue_xloc, ue_yloc, 0])

        if (ue_xloc == 0) and (ue_yloc) == 0:
            #return -1.0,-1.0
            ue_pos = np.array([ue_xloc + 40, ue_yloc + 40, 0])

        mimo_exh_model = MIMO(ue_pos, self.gNB[0], self.sc_xyz, self.ch_model,
                              self.ptx, self.N_tx, self.N_rx)
        #rbeam_vec = self.BeamSet#Generate_BeamDir(self.N)
        exh_SNR = []
        exh_rates = []

        for rbeam in self.BeamSet:  #rbeam_vec:
            SNR, rate = mimo_exh_model.Calc_ExhRate(self.SF_time,
                                                    np.array([rbeam, 0]))
            #rate = 1e3 * rate
            exh_SNR.append(SNR)
            exh_rates.append(rate)

        best_rbeam_ndx = np.argmax(exh_rates)
        best_beam = self.BeamSet[best_rbeam_ndx]
        SNRmax, rate_max = mimo_exh_model.Calc_ExhRate(self.SF_time,
                                                       np.array([best_beam,
                                                                 0]),
                                                       noise_flag=False)
        #print("[UAV_Env]: AOD: {}, AoA: {}, AoD-AoA: {}".format(mimo_exh_model.channel.az_aod[0], self.BeamSet[best_rbeam_ndx], -self.BeamSet[best_rbeam_ndx]+mimo_exh_model.channel.az_aod[0]))
        return best_beam, rate_max  #(Best RBS, Best Rate)

    def get_Rate(self):
        return self.cur_rate