Python Env.getNextState Examples

Programming Language: Python

Namespace/Package Name: Environment

Class/Type: Env

Method/Function: getNextState

Examples at hotexamples.com: 3

Python Env.getNextState - 3 examples found. These are the top rated real world Python examples of Environment.Env.getNextState extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Env(13)

getNextState(3)

nextStep(2)

is_train(2)

isTerminal(2)

getTotalReward(2)

getTodayIndex(2)

getState(2)

getReward(2)

getToday(2)

getAllActions(1)

initValues(1)

plot_col(1)

check_if_reward(1)

new_state(1)

modificaMapa(1)

crearMapa(1)

crearMapaPrueba(1)

init(1)

generateState(1)

height(1)

get_model(1)

destroy(1)

endX(1)

check_feasible(1)

fin(1)

plot_row(1)

Example #1

Show file

File: Aloha.py Project: niulanqi/DQSAKeras

def testEnv():
    env = Env()
    channelThroughPut = 0  # fraction of time that packets are successfully delivered over the channel
    # i.e no collisions or idle time slots
    for iteration in range(config.Iterations):
        for t in range(config.TimeSlots):
            initialState = env.reset()
            for user in range(config.N):
                action = slottedAlohaProtocol()
                env.step(action=action, user=user)
                # each user changes the inner state of the environment where the environment uses the inner state
                # in order to keep track on the channels and the ACK signals for each user
            nextStateForEachUser, rewardForEachUser = env.getNextState()
            # if a reward is one that means that a packets was successfully delivered over the channel
            # the sum has a maximum of the number of channels -> config.K
            channelThroughPut = channelThroughPut + np.sum(rewardForEachUser)
    # measuring the expected value
    channelThroughPut = channelThroughPut / (config.Iterations *
                                             config.TimeSlots)
    print("Channel Utilization average {}".format(channelThroughPut))
    ToPlotX = range(config.Iterations * config.TimeSlots)
    ToPlotY = np.ones_like(ToPlotX) * channelThroughPut
    plot_graph(data=[ToPlotX, ToPlotY],
               filename="Aloha",
               title="Aloha",
               xlabel="Time slot",
               ylabel="Average channel utilization",
               legend="SlottedAloha")


#
#
# def testTimeEnv():
#     env = TimeDependentEnv()
#     channelThroughPut = 0  # fraction of time that packets are successfully delivered over the channel
#     # i.e no collisions or idle time slots
#     for iteration in range(config.Iterations):
#         TimeSPU = env.reset()
#         for t in range(config.TimeSlots):
#             env.resetTimeStep()
#             #  reset the internal state of the environment
#             #  which keep tracks of the users actions through out the time step
#             for user in range(config.N):
#                 action = slottedAlohaProtocol()
#                 env.step(action=action, user=user)
#                 # each user changes the inner state of the environment where the environment uses the inner state
#                 # in order to keep track on the channels and the ACK signals for each user
#             nextStateForEachUser, rewardForEachUser = env.tstep(timestep=t)
#             # if a reward is one that means that a packets was successfully delivered over the channel
#             # the sum has a maximum of the number of channels -> config.K
#             channelThroughPut = channelThroughPut + np.sum(rewardForEachUser)
#     # measuring the expected value
#     channelThroughPut = channelThroughPut / (config.Iterations * config.TimeSlots)
#     print("Channel Utilization average {}".format(channelThroughPut))
#     ToPlotX = range(config.Iterations * config.TimeSlots)
#     ToPlotY = np.ones_like(ToPlotX) * channelThroughPut
#     plot_graph(data=[ToPlotX, ToPlotY], filename="Aloha", title="Aloha",
#                xlabel="Time slot", ylabel="Average channel utilization", legend="SlottedAloha")

Example #2

Show file

    def transcate_DDPG(self):
        BATCH_SIZE = 32
        total_steps = 0  # 记录步数，一天是一步
        profit_list = []  # 记录每局总收益
        profitAdvanced_list = []
        actions = 2  # 行动个数
        s_dim = 87
        a_dim = 1
        brain = DDPG(
            a_dim=a_dim,
            s_dim=s_dim,
            a_bound=1.,
            LR_A=0.001,
            LR_C=0.001,
            GAMMA=.99,
            TAU=0.01,
            # replacement=REPLACEMENT,
        )
        gameNum = 0  #记录游戏轮数
        ex_steps = 500  #探索衰减的轮数
        epsilon = self.epsilon
        last_remainder = 0
        reward_list = [0]  #存储每次的收益，来计算baseline
        Loss_list = []  #存储训练过程中的损失值
        wait_list = []  #记录N轮游戏分别等待天数
        gameSplit = 5000  #每多少轮游戏画图
        while total_steps < 60000:
            # 初始化游戏
            # routeId = random.randrange(0, 49, 1)
            routeId = 21
            self.routeline = self.allRoute[routeId]
            # print(self.routeline)
            env = Env(self.routeline)
            gameNum += 1
            # state = env.getState()  # 以state[0]、state[1]方式访问
            today = env.getToday()

            terminal = False
            order_accepted = False
            isExploration = False
            create_date = 1
            end_date = 0
            stay_num = 0
            # 一局游戏
            # print("GAME#:",gameNum)
            baseline = 0
            tao_prob = []
            tao_reward = []
            wait_day = []  #记录一局游戏等待哪些天

            while today < self.routeline[-1] and terminal == False:
                # 有新订单产生 (当订单数已满10个时，此处不会收到新订单)
                if order_accepted == False:
                    self.orderSelect(self.routeline, 60)
                    # print(self.order)
                    env.setOrder(self.order)
                    order_accepted = True
                # 遍历self.orders(即state[0])字典，对每一个订单操作
                state = env.getState()

                # 当前状态
                state_tf = np.mat(state)
                # print(state_tf,len(state_tf))
                # 由神经网络选择行动
                if random.random() < epsilon and isExploration == False:
                    isExploration = True
                    # end_date = random.randrange(env.getTodayIndex(),87,1)
                    end_date = 60

                if isExploration:
                    if env.getTodayIndex() == end_date:
                        action_model = 1
                        if ex_steps > 0:
                            ex_steps -= 1
                    else:
                        action_model = 0
                else:
                    #action from learning
                    action_model = brain.choose_action(state_tf)
                    # print(action_model)

                wait_day.append(env.getTodayIndex())
                # 订单字典 历史曲线 reward

                reward = env.getReward(action_model)
                tao_reward.append(reward)
                # 订单完成或者到最后一天
                terminal = env.isTerminal(action_model)
                state_ = env.getNextState(action_model)
                if len(state_) == 1:
                    state_ = copy.deepcopy(state)
                brain.store_transition(state, action_model, reward, state_)
                # profitAdvanced_list.append(td_error[0][0])

                if brain.pointer > brain.MEMORY_CAPACITY:
                    # print(b_s_)
                    brain.learn()

                total_steps += 1
                if terminal:
                    # wait_list.append(wait_day[-1])
                    # loss = brain.learn()
                    # Loss_list.append(loss)
                    break

                # step 过一天加一
                env.nextStep()

            # 一局的总收益
            epsilon = self.epsilon * (ex_steps / 500)
            print("epsilon:", epsilon)
            print("TD_Error:", baseline)
            profit = env.getTotalReward()
            profit_list.append(profit)
            print("total_steps:", total_steps)
            print("profit_list", profit_list)
            print("profit:", profit, "profitAvg:", np.mean(profit_list))
            print("action-prob:", tao_prob)
            print("Reward:", tao_reward)
            print("wait_day:", wait_day)
            self.writeHistory('./picture/history.txt', epsilon, baseline,
                              total_steps, profit_list, profit, tao_prob,
                              tao_reward, wait_day, gameNum)

            print("########################" + str(gameNum) +
                  "###########################")
            if len(profit_list) >= gameSplit:
                plt.figure()
                plt.plot(profit_list, 'r-')
                plt.savefig('./picture/' + str(gameNum) +
                            'liner_profit_PG.jpg')
                plt.figure()
                plt.scatter(np.arange(gameSplit), profit_list)
                plt.savefig('./picture/' + str(gameNum) +
                            'scatter_profit_PG.jpg')
                plt.figure()
                plt.plot(profitAdvanced_list, 'g-')
                plt.savefig('./picture/' + str(gameNum) +
                            'liner_advanced_PG.jpg')
                plt.figure()
                plt.plot(Loss_list, 'y-')
                plt.savefig('./picture/' + str(gameNum) + 'liner_loss_PG.jpg')
                plt.figure()
                plt.scatter(np.arange(gameSplit), wait_list, c='r')
                plt.savefig('./picture/' + str(gameNum) +
                            'scatter_waitDay_PG.jpg')
            if len(profit_list) >= 500:
                profit_list.clear()
                wait_list.clear()

Example #3

Show file

    def transcate_AC(self):
        total_steps = 0  # 记录步数，一天是一步
        profit_list = []  # 记录每局总收益
        profitAdvanced_list = []
        actions = 2  # 行动个数
        brain = ActorCritic(
            n_actions=2,
            n_features=87,
            LR_A=0.001,
            LR_C=0.01,
            reward_decay=1.,
            prob_clip=0.,
        )
        gameNum = 0  #记录游戏轮数
        ex_steps = 500  #探索衰减的轮数
        epsilon = self.epsilon
        last_remainder = 0
        reward_list = [0]  #存储每次的收益，来计算baseline
        Loss_list = []  #存储训练过程中的损失值
        wait_list = []  #记录N轮游戏分别等待天数
        gameSplit = 500  #每多少轮游戏画图
        while total_steps < 60000:
            # 初始化游戏
            # routeId = random.randrange(0, 49, 1)
            routeId = 21
            self.routeline = self.allRoute[routeId]
            # print(self.routeline)
            env = Env(self.routeline)
            gameNum += 1
            # state = env.getState()  # 以state[0]、state[1]方式访问
            today = env.getToday()

            terminal = False
            order_accepted = False
            isExploration = False
            create_date = 1
            end_date = 0
            stay_num = 0
            # 一局游戏
            # print("GAME#:",gameNum)
            baseline = 0
            tao_prob = []
            tao_reward = []
            wait_day = []  #记录一局游戏等待哪些天

            while today < self.routeline[-1] and terminal == False:
                # 有新订单产生 (当订单数已满10个时，此处不会收到新订单)
                if order_accepted == False:
                    self.orderSelect(self.routeline, 60)
                    # print(self.order)
                    env.setOrder(self.order)
                    order_accepted = True
                # 遍历self.orders(即state[0])字典，对每一个订单操作
                state = env.getState()

                # 当前状态
                state_tf = np.mat(state)
                # print(state_tf,len(state_tf))
                # 由神经网络选择行动
                if random.random() < epsilon and isExploration == False:
                    isExploration = True
                    end_date = random.randrange(env.getTodayIndex(), 87, 1)
                    # end_date = 60

                if isExploration:
                    if env.getTodayIndex() == end_date:
                        action_model = 1
                        if ex_steps > 0:
                            ex_steps -= 1
                    else:
                        action_model = 0
                else:
                    #action from learning
                    action_model, p = brain.choose_action(
                        state_tf, env.getTodayIndex())
                    tao_prob.append(p)
                if action_model == 0:
                    action_finishOrder = [1, 0]
                else:
                    action_finishOrder = [0, 1]

                wait_day.append(env.getTodayIndex())
                # 订单字典 历史曲线 reward

                reward = env.getReward(action_model)
                tao_reward.append(reward)
                # 订单完成或者到最后一天
                terminal = env.isTerminal(action_model)
                state_ = env.getNextState(action_model)
                # print(state_tf)
                # print(state_)
                td_error = brain.criticLearn(state_tf, reward, state_)
                baseline = td_error
                profitAdvanced_list.append(td_error[0][0])
                loss = brain.actorLearn(state_tf, action_model, td_error)
                # print(loss)
                Loss_list.append(loss)
                # 保存记录到记忆库
                # print("this is store arg:",state_tf,";", action_model,";", reward,";", env.getTodayIndex())
                # brain.store_transition(state_tf, action_model, reward, env.getTodayIndex())
                # print(action_model)

                total_steps += 1
                if terminal:
                    wait_list.append(wait_day[-1])
                    break

                # step 过一天加一
                env.nextStep()

            # 一局的总收益
            # epsilon = self.epsilon*(ex_steps/500)
            print("epsilon:", epsilon)
            print("TD_Error:", baseline)
            profit = env.getTotalReward()
            profit_list.append(profit)
            print("total_steps:", total_steps)
            print("profit_list", profit_list)
            print("profit:", profit, "profitAvg:", np.mean(profit_list))
            print("action-prob:", tao_prob)
            print("Reward:", tao_reward)
            print("wait_day:", wait_day)
            self.writeHistory('./picture/history.txt', epsilon, baseline,
                              total_steps, profit_list, profit, tao_prob,
                              tao_reward, wait_day, gameNum)

            print("########################" + str(gameNum) +
                  "###########################")
            if len(profit_list) >= gameSplit:
                plt.figure()
                plt.plot(profit_list, 'r-')
                plt.savefig('./picture/' + str(gameNum) +
                            'liner_profit_PG.jpg')
                plt.figure()
                plt.scatter(np.arange(gameSplit), profit_list)
                plt.savefig('./picture/' + str(gameNum) +
                            'scatter_profit_PG.jpg')
                plt.figure()
                plt.plot(profitAdvanced_list, 'g-')
                plt.savefig('./picture/' + str(gameNum) +
                            'liner_advanced_PG.jpg')
                plt.figure()
                plt.plot(Loss_list, 'y-')
                plt.savefig('./picture/' + str(gameNum) + 'liner_loss_PG.jpg')
                plt.figure()
                plt.scatter(np.arange(gameSplit), wait_list, c='r')
                plt.savefig('./picture/' + str(gameNum) +
                            'scatter_waitDay_PG.jpg')
                profit_list.clear()
                wait_list.clear()