Exemple #1
0
    def __init__(self):
        # define publisher to control start or stop vrep
        self.pub_start_signal = rospy.Publisher("/startSimulation",
                                                Bool,
                                                queue_size=1)
        self.pub_stop_signal = rospy.Publisher("/stopSimulation",
                                               Bool,
                                               queue_size=1)

        # maybe start the simulation with hand would be a good way
        time.sleep(2)
        start_signal = Bool()
        start_signal.data = True
        self.pub_start_signal.publish(start_signal)

        # define DQN algorithm
        tensorflow.reset_default_graph()
        self.RL1 = DeepQNetwork(
            n_actions=len(robot1.action_space),
            n_features=len(robot1.observation_space),
            learning_rate=0.01,
            e_greedy=0.9,
            replace_target_iter=100,
            memory_size=2000,
            e_greedy_increment=0.0008,
        )

        self.total_steps = 0
        self.rsrvl = 0.05  # to check
        self.train()
Exemple #2
0
def load_DQN(epsilon_start=0.5):
    global RL_0
    global RL_1
    try:
        RL_1 = DeepQNetwork(
            env.n_action,
            env.n_features,
            'player_1',
            'player_0',
            # replace_target_iter = 200,
            memory_size=4000,
            batch_size=500,
            epsilon_start=epsilon_start,
            e_greedy_increment=0.01)
    except:
        RL_0 = DeepQNetwork(
            env.n_action,
            env.n_features,
            'player_0',
            'player_1',
            # replace_target_iter = 200,
            memory_size=4000,
            batch_size=500,
            epsilon_start=epsilon_start,
            e_greedy_increment=0.01)
        RL_1 = DeepQNetwork(
            env.n_action,
            env.n_features,
            'player_1',
            'player_0',
            # replace_target_iter = 200,
            memory_size=4000,
            batch_size=500,
            epsilon_start=epsilon_start,
            e_greedy_increment=0.01)
    else:
        RL_0 = DeepQNetwork(
            env.n_action,
            env.n_features,
            'player_0',
            'player_1',
            # replace_target_iter = 200,
            memory_size=4000,
            batch_size=500,
            epsilon_start=epsilon_start,
            e_greedy_increment=0.01)
Exemple #3
0
def step():


def run_maze():
    step = 0
    for episode in range(300):
        # initial observation
        observation = update()

        while True:
            # fresh env
            render()
            #生成随机环境
            # RL choose action based on observation
            action = RL.choose_action(observation)

            # RL take action and get next observation and reward
            observation_, reward, done = step(action)

            RL.store_transition(observation, action, reward, observation_)

            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1
            time.sleep(0.01)
        time.sleep(0.02)


    # end of game
    print('game over')

if __name__ == "__main__":
    # maze game
    RL = DeepQNetwork(n_actions=8, n_features=30,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=2000,
                      # output_graph=True
                      )
    #env.after(100, run_maze)
    run_maze()
    RL.plot_cost()
Exemple #4
0
    def __init__(self):

        # define DQN algorithm
        tensorflow.reset_default_graph()
        self.RL1 = DeepQNetwork(
            n_actions=len(robot1.action_space),
            n_features=len(robot1.observation_space),
            learning_rate=0.0001,
            e_greedy=0.9,
            replace_target_iter=100,
            memory_size=2000,
            e_greedy_increment=0.008,
        )  #0.0008

        self.total_steps = 0
        self.rsrvl = 0.05  # to check
        self.train()
Exemple #5
0
def hello():
    #print("hello")
    from maze_env import Maze
    from RL_brain import DeepQNetwork
    global env
    env = Maze()
    global RL
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )
    env.after(100, run_maze)
    env.mainloop()
def run_snake():
    brain = DeepQNetwork(4, "")
    snakeGame = SnakeEnv()
    #先给一个向右走的决策输入,启动游戏
    observation, reward, terminal,score =snakeGame.step(np.array([0, 0, 0, 1]))
    observation = pre_process(observation)
    brain.set_init_state(observation[:,:,0])

    #开始正式游戏
    i = 1  # 步数
    while i<=500000:
        i = i + 1
        action = brain.choose_action()
        next_observation, reward, terminal, score = snakeGame.step(action)
        # print(reward)
        
        next_observation = pre_process(next_observation)
        brain.learn(next_observation, action, reward, terminal)
        if(i%100) == 0:
            print(i)
    
    # 画loss和round step的曲线
    brain.plot_cost()
    snakeGame.plot_cost()
Exemple #7
0
# automatically creates a local docker container
env.configure(remotes=1, fps=5,
              vnc_driver='go', 
              vnc_kwargs={'encoding': 'tight', 'compress_level': 0, 
                          'fine_quality_level': 100, 'subsample_level': 0})
observation_n = env.reset()
#
step = 0
n_act =3
n_features =2
cur_y,cur_x=resetY,resetX
RL = DeepQNetwork(n_act, n_features,
              learning_rate=0.11,
              reward_decay=0.9,
              e_greedy=0.9,
              hidden_layers=[10, 10],
              replace_target_iter=200,
              memory_size=4000,
              # output_graph=True
              )
#

while True:
  goal_y,goal_x = centre_button(observation_n[0])
  if goal_y  ==-1:
    observation_n, reward_n, done_n, info = env.step([[universe.spaces.PointerEvent(resetX, resetY, 0)]])
    cur_x,cur_y=resetX,resetY
    env.render()
    continue
  state=[goal_y-cur_y,goal_x-cur_x]
  #state = [int(round(n)) for n in state] 
Exemple #8
0
import gym
import numpy as np
import time
from RL_brain import DeepQNetwork

env = gym.make('sheep-v0')
env = env.unwrapped

print(env.action_space)
print(env.observation_space)

RL = DeepQNetwork(
    n_actions=env.DISCRETE_Action_Count,
    n_features=env.FEATURE_Count,
    learning_rate=0.01,
    e_greedy=0.9,
    replace_target_iter=100,
    memory_size=2000,
    e_greedy_increment=0.001,
)

total_steps = 0
REWARD_DISTANCE = 100000
REWARD_RADIUS = 50

for i_episode in range(1000):

    #reset is not correctly working
    observation = env._reset()
    observation = np.asarray(observation)
    ep_r = 0
    NUM_IOT = 50
    NUM_FOG = 5
    NUM_EPISODE = 1000
    NUM_TIME_BASE = 100
    MAX_DELAY = 10
    NUM_TIME = NUM_TIME_BASE + MAX_DELAY

    # GENERATE ENVIRONMENT
    env = Offload(NUM_IOT, NUM_FOG, NUM_TIME, MAX_DELAY)

    # GENERATE MULTIPLE CLASSES FOR RL
    iot_RL_list = list()
    for iot in range(NUM_IOT):
        iot_RL_list.append(
            DeepQNetwork(
                env.n_actions,
                env.n_features,
                env.n_lstm_state,
                env.n_time,
                learning_rate=0.01,
                reward_decay=0.9,
                e_greedy=0.99,
                replace_target_iter=200,  # each 200 steps, update target net
                memory_size=500,  # maximum of memory
            ))

    # TRAIN THE SYSTEM
    train(iot_RL_list, NUM_EPISODE)
    print('Training Finished')
            'show_deck_public':
            ['+3', '+11', '+15', 'dog', 'DOG', '-5', '+5', '+8', '-8']
        }]
    }

    action = decide(inputDic)

    print(action)
    input("Testing Action")


if __name__ == "__main__":

    # Parse the data properly
    # data_parser.rl_parse_raw_data()

    RL = DeepQNetwork(SELL_ACTIONS,
                      SELL_FEATURES,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=2000,
                      output_graph=False)

    test()

    train_from_data()

    print("Displaying the cost...")
    RL.plot_cost()
Exemple #11
0
    index_ = '_4'
    save_list = [
        10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 1000, 5000, 10000, 50000,
        100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000, 900000,
        1000000
    ]

    train = True
    env = envR(show=False)

    RL = DeepQNetwork(env.n_actions,
                      env.n_features,
                      rows=env.rows,
                      cols=env.cols,
                      learning_rate=0.00001,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=5000,
                      e_greedy_increment=0.0005,
                      output_graph=False)

    step = 0
    succ = 0
    total_cost = 0
    for episode in range(int(r)):
        pre_maps = env.reset()
        for i in range(100):

            isVisited = True
            # print('episode:',episode)
Exemple #12
0

import gym
from gym import wrappers
from RL_brain import DeepQNetwork

env = gym.make('MountainCar-v0')
env = wrappers.Monitor(env, "./gym-results", force=True)

print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

RL = DeepQNetwork(n_actions=3, n_features=2, learning_rate=0.01, e_greedy=0.9,
                  replace_target_iter=300, memory_size=3000,
                  e_greedy_increment=0.0002,)

total_steps = 0


for i_episode in range(300):

    observation = env.reset()
    ep_r = 0
    while True:
        env.render()

        action = RL.choose_action(observation)

        observation_, reward, done, info = env.step(action)
Exemple #13
0
vrep.simxFinish(-1)  #clean up the previous stuff
clientID = vrep.simxStart('127.0.0.1', 19997, True, True, 5000, 5)
if clientID == -1:
    print("Could not connect to server")
    sys.exit()

print(robot1.action_space)  # 查看这个环境中可用的 action 有多少个
print(robot1.observation_space)  # 查看这个环境中可用的 state 的 observation 有多少个

# 定义使用 DQN 的算法
RL1 = DeepQNetwork(
    n_actions=len(robot1.action_space),
    n_features=len(robot1.observation_space),
    learning_rate=0.01,
    e_greedy=0.9,
    replace_target_iter=100,
    memory_size=2000,
    e_greedy_increment=0.0008,
)

RL2 = DeepQNetwork(
    n_actions=len(robot2.action_space),
    n_features=len(robot2.observation_space),
    learning_rate=0.01,
    e_greedy=0.9,
    replace_target_iter=100,
    memory_size=2000,
    e_greedy_increment=0.0008,
)
Exemple #14
0
import numpy as np
import torch as t
from gym import make

from RL_brain import DeepQNetwork

np.set_printoptions(precision=2, suppress=True)

env = make("MountainCar-v0")
# env = env.unwrapped
print("Observation_space:{}\nAction_space:{}".format(env.observation_space,
                                                     env.action_space))
RL = DeepQNetwork(env.action_space.n,
                  2,
                  learning_rate=0.01,
                  e_greedy_increment=0.001,
                  double_q=True,
                  prioritized=True,
                  dueling=True)

for i in range(500):
    observation = env.reset()
    while True:
        env.render()
        action = RL.choose_action(observation)
        observation_, reward, done, _ = env.step(action)
        reward = observation_[0]
        RL.store_transition(observation, action, reward, observation_)
        if RL.memory_counter > RL.memory_size:
            RL.learn()
        if done:
Exemple #15
0
                break
            step += 1  # 总步数

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,  #observation/state 的属性,如长宽高
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        prioritized=True,
        replace_target_iter=200,  # 每 200 步替换一次 target_net 的参数
        memory_size=2000,  # 记忆上限
        # output_graph=True   # 是否输出 tensorboard 文件
    )
    env.after(100, run_maze)  #进行强化学习训练
    env.mainloop()

    #观看训练时间曲线
    his_prioritize = np.vstack((episodes, steps))

    file = open('his_prioritize.pickle', 'wb')
    pickle.dump(his_prioritize, file)
    file.close()
Exemple #16
0
            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DeepQNetwork(env.n_actions, env.n_features,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,    # 每 200 步替换一次 target_net 的参数
                      memory_size=2000,           # 记忆上限
                      # output_graph=True
                      )
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
def main():

    #env = grc.RemoteEnv('tmp/sock')

    #env = TrackedEnv(env)

    env = retro.make(game='SonicTheHedgehog-Genesis',
                     state='GreenHillZone.Act1')

    total_steps = 0  #总的步数

    sess = tf.Session()
    env = DeepQNetwork(n_actions=9, double_q=False, sess=sess, env=env)  #初始化类
    '''
    设计到复合动作,比如右走同时跳,但是gym的retro环境是一个env.action_space.sample()长度为12d的one_hot数组,每个位置上的1表示一个单一
    的动作,比如[0,0,0,0,0,0,1,0,0,0,0,0]表示只往右走,为了获得复合动作,重新设计如下动作,并对设计的复合动作和retro的默认动作进行转换
                  [[1,0,0,0,0,0,0], 不动
                  [0,1,0,0,0,0,0], 左走
                  [0,0,1,0,0,0,0], 右走
                  [0,0,0,0,1,0,0], 左翻滚
                  [0,0,0,0,0,1,0], 右翻滚
                  [0,0,0,1,0,0,0], 蹲下
                  [0,0,0,0,0,0,1]
    在输入进行训练的时候,动作空间就只有这6种,找到对应动作后再进行转换
    '''

    sess.run(
        tf.global_variables_initializer()
    )  #初始化所有变量,变量的初始化必须在模型的其它操作运行之前先明确地完成。最简单的方法就是添加一个给所有变量初始化的操作,并在使用模型之前首先运行那个操作

    saver = tf.train.Saver()  #保存训练完的所有变量
    checkpoint = tf.train.get_checkpoint_state(
        'saved_networks')  #把训练过模型的放到saved_networks这个文件夹下,下次可以接着从这里开始训练

    if checkpoint and checkpoint.model_checkpoint_path:
        saver.restore(sess,
                      checkpoint.model_checkpoint_path)  #如果以前训练过这个模型,可以接着训练
        print('Successfully loaded: ', checkpoint.model_checkpoint_path)
    else:
        print('loaded failed')  #如果没有这个模型就从头开始训练'''

    #for episode in range(2000):
    action_stack = deque()  #用来记录当前动作需要执行重复次数的栈
    info_rings = deque()  #用来记录info中rings的变换量的栈
    info_rings.append(0)  #先放一个0进去

    #total_reward=0 #总的奖励值
    observation = env.reset(
    )  #初始化环境,获得最开始的observation,这个observation是一幅彩色图像,但这一帧彩色图像暂时不用

    #observation, reward, done, _=env.step(env.action_space.sample())#随机执行一个动作
    '''回合最开始的时候没有4帧图像,就用最初的图像重叠4帧'''
    #x_t_1, r_0, terminal, _=env.step(env.action_space.sample()) #随机执行一个动作,执行完一帧之后得到的图像数据x_t, reward:r_0, terminal:是否终止
    x_t_2 = cv2.cvtColor(cv2.resize(observation, (80, 80)),
                         cv2.COLOR_BGR2GRAY)  #执行完一帧之后得到的图形数据,转换为80*80,并转换为灰度图
    ret, x_t = cv2.threshold(x_t_2, 1, 255,
                             cv2.THRESH_BINARY)  #对图像进行二值化转换,黑白两色

    s_t = np.stack(
        (x_t, x_t, x_t, x_t),
        axis=2)  #开始的时候只有一帧图形,batch需要4帧作为一个state,我们把这一帧叠在一起,成为80*80*4的结构
    #x=cv2.cvtColor(cv2.resize(observation_, (80,80)), cv2.COLOR_BGR2YCR_CB)
    for episode in range(3000):
        while True:
            env.render()  #渲染环境

            #下面的动作选择,由于是每4帧图像重叠在一起作为一个state,那么我就把这4帧图像的动作设置成一样,意思就是每个动作重复执行4次

            if len(action_stack) > 0:  #如果存放动作的栈不空,那么就从里面选择一个动作来执行
                action_exute = action_stack.pop()  #随意出栈4个动作的其中一个
            else:  #如果动作栈里面的 动作用完了,那么再次用state选出一个动作
                action_button = env.choose_action(
                    s_t)  #根据这4帧图像选择动作,这个动作是自己设计的复合动作
                action_system = env.action_button_2_onehot(
                    action_button
                )  #np.append(np.append(a0,a1),b1)# #转变为系统所能是被的动作
                for i in range(3):
                    action_stack.append(
                        action_system)  #把这个动作放到栈里面去,放3个,加上自己的一个,就是4个重复动作
                action_exute = action_system

            observation_, reward, done, info = env.step(
                action_exute)  #用当前动作获得下一帧的observation_, 当前的奖励等等信息
            info_rings.append(
                info['rings'])  #把执行当前动作获得的info里面的rings的个数存到info_rings的栈里面去

            if info_rings[1] - info_rings[
                    0] < 0:  #如果后面的一次动作使得rings的数量减少了,那么奖励值就要减少
                reward -= 50
            elif info_rings[1] - info_rings[
                    0] > 0:  #如果后面的一次动作使得rings的数量增加了了,那么奖励值就要增加
                reward += 50
            else:
                pass

            info_rings.popleft()

            #total_reward+=reward
            #print(total_reward)

            #让他往右边走,所以在往右边走的这个动作上设计一个比较小的奖励
            #if np.argmax(action_system)==7:
            #   reward=0.1

            if done:  #如果中途死掉了,就给一个负的奖励
                reward = -100
                env.reset()

            x_t1_pre = cv2.cvtColor(
                cv2.resize(observation_, (80, 80)),
                cv2.COLOR_BGR2GRAY)  #执行完一帧之后得到的图形数据,转换为80*80,并转换为灰度图
            ret, x_t1 = cv2.threshold(x_t1_pre, 1, 255,
                                      cv2.THRESH_BINARY)  #对图像进行二值化转换,黑白两色
            x_t1 = np.reshape(x_t1, (80, 80, 1))  #把上面执行动作得到的一帧图像,转换变成80*80*1,

            #x_t1=RL.image_process(observation_[0]) #把上面执行完动作之后得到的一帧彩色图像observation_处理成黑白的80*80*1的大小
            s_t1 = np.append(s_t[:, :, :3], x_t1,
                             axis=2)  #把前面3帧和计算得到的1帧加起来作为下一个state

            #把当前的状态s_t,当前采取的动作action,当前奖励reward,下一个状态s_t1,是否结束的标志存起来
            #需要注意的是当前的状态s_t和下一个状态s_t1是4帧图像叠起来的,所以需要在前面进行处理

            env.store_memory(s_t, action_button, reward, s_t1,
                             done)  #存的时候还是存自己设计的复合动作按键

            if (total_steps > 3000) and (total_steps % 100
                                         == 0):  #在步数超过200补之后开始学习,并且每50步学习一次
                with tf.device("/gpu:0"):
                    env.trainNet()  #开始训练网络,学习

            if done:
                break

            s_t = s_t1

            total_steps += 1

            if total_steps % 50000 == 0:
                saver.save(sess, './', global_step=total_steps)

    print('Game over')
    env.close()
Exemple #18
0
import gym_abr
import numpy as np
from RL_brain import DeepQNetwork
import os

S_INFO = 6  # bit_rate, buffer_size, next_chunk_size, bandwidth_measurement(throughput and time), chunk_til_video_end
S_LEN = 8  # take how many frames in the past
A_DIM = 6

env = gym.make('ABR-v0')

RL = DeepQNetwork(
    n_actions=A_DIM,
    n_features=S_INFO * S_LEN,
    learning_rate=1e-4,
    e_greedy=0.99,
    replace_target_iter=100,
    memory_size=3000,
    e_greedy_increment=1e-6,
)

_file = open('test.csv', 'w')
step = 0
episode = 0
while True:
    #for episode in range(3000):
    # initial observation
    ep_r = 0.
    fetch = 0.
    observation = env.reset()
    observation = np.reshape(observation, (S_INFO * S_LEN))
Exemple #19
0
            slice_user_seq, max_index = env.generate_subframe_index(
                association_ues, lamda_k=env.lamda_k, data_num=data_num)

            # virtual resource allocation
            r_sk = np.ones([4, 4]) * 0.25

            # physical resource allocation
            total_subframe = max_index
            pr = PhysicalResource(TPs=TPs, user_qos=env.user_qos, env=env)
            slice_sat_ratio, slice_avg_RU = pr.allocate(
                association_ues, r_sk, total_subframe)

            print('slice_sat_ratio: ', slice_sat_ratio, ' \n slice_avg_RU: ',
                  slice_avg_RU)
            print('-------end----')


if __name__ == '__main__':
    # main()
    RL = DeepQNetwork(
        n_actions=256,
        n_features=12,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,  # 每 200 步替换一次 target_net 的参数
        memory_size=3000,  # 记忆上限
        # output_graph=True   # 是否输出 tensorboard 文件
    )
    train()
            # swap observation
            observation = observation_
            # break while loop when end of this episode
            if done:
                break
            step += 1

            #if LastTime > 50: return(LastTime);

    # end of game
    print('game over')
    #env.destroy()


if __name__ == "__main__":
    # bird game
    env = game.GameState()  # define the game environment -> jump to game
    actions = 2
    features = 5
    RL = DeepQNetwork(actions,
                      features,
                      learning_rate=10**-2,
                      reward_decay=1.0,
                      e_greedy=0.6,
                      replace_target_iter=200,
                      memory_size=50,
                      output_graph=True)
    time.sleep(0.5)
    run_bird()
    #env.mainloop()
    RL.plot_cost()
Exemple #21
0
            #swap observation
            observation = observation_

            #break while loop when end of this eposide:
            if done:
                break

            step += 1

    print('game over!')
    env.destroy()


if __name__ == '__main__':
    env = Maze()

    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
    )

    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
    Num_Training = timesteps - Num_Exploration
    ratio_total_reward = 0.2

    RL_set = []
    graph_set = []
    sess_set = []
    for i in range(n_agents):
        g = tf.Graph()
        sess = tf.Session(graph=g)

        with sess.as_default():
            with g.as_default():

                RL = DeepQNetwork(n_actions=n_actions,
                                  n_features=vector_obs_len,
                                  sess=sess,
                                  agent_id=i,
                                  num_training=Num_Training,
                                  learning_rate=0.00025,   #0.002
                                  reward_decay=0.99,
                                  replace_target_iter=5000,
                                  memory_size=Num_Exploration,
                                  batch_size=32,
                                  save_model_freq=10000,
                                  load_model=False,
                                  )

                RL_set.append(RL)

    # run_this写成一个所有智能体执行的函数
    run_this(RL_set, n_episode, learn_freq, Num_Exploration, n_agents, ratio_total_reward)
Exemple #23
0
 env_list = []
 env_list2 = []
 for file_path in file_path_list:
     
     df = pd.read_csv(file_path)
     df = df.sort_values('trade_date', ascending=True)
     df = df.iloc[22:].reset_index(drop=True) # 去除前几天没有均线信息
     env_list.append(stock(df.iloc[0:1500], init_money=1000000, window_size=60))
     print(env_list)
     env_list2.append(stock(df.iloc[1500:].reset_index(drop=True),init_money=1000000, window_size=60))
     
 RL = DeepQNetwork(env_list[0].n_actions, env_list[0].n_features,
                   learning_rate=0.002,
                   reward_decay=0.9,
                   e_greedy=0.9,
                   replace_target_iter=300,
                   memory_size=7000,
                   batch_size=256,
                   # output_graph=True
                   )
 
 run(env_list, max_round)
 
 # env = stock(df)
 # env = BackTest(env, show_log=True)
 # env.draw('trade.png', 'profit.png')
 
 i = 0
 for env in env_list2:
     BackTest(env, show_log=False)
     name1 = 'trade1_' + str(i) + '.png'
Exemple #24
0
from RL_brain import DeepQNetwork
from env import Env
from visual import read_log
import matplotlib.pyplot as plt
import numpy as np
import os

if __name__ == "__main__":
    if os.path.exists("log.txt"):
        os.remove("log.txt")
    ENV = Env()
    RL = DeepQNetwork(
        4,
        2,
        learning_rate=0.01,
        reward_decay=0.75,
        e_greedy=0.8,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )

    step = 1
    reward_his = []
    for episode in range(200):
        ENV.reset()  # 重置环境
        re_hi = 0.0
        # print("episode:", episode)
        with open('log.txt', 'a+') as f:
            f.write("episode:" + str(episode) + "\n")
        while True:
            observation = ENV.refresh_env()  # 获取环境信息
Exemple #25
0
import gym
from RL_brain import DeepQNetwork
import time

env = gym.make('CartPole-v0')
print(env.action_space)
print(env.observation_space)
print(env.observation_space.high)
print(env.observation_space.low)

RL = DeepQNetwork(
    n_actions=env.action_space.n,
    n_features=len(env.observation_space.high),
    learning_rate=0.01,
    e_greedy=0.99,
    replace_target_iter=100,
    memory_size=2000,
    e_greedy_increment=0.001,
    hidden_layers=[20, 20],
)

total_steps = 0

for i_episode in range(100):

    observation = env.reset()

    while True:
        env.render()

        action = RL.choose_action(observation)
Exemple #26
0
    # create world
    world = scenario.make_world()
    # create multiagent environment
    env = MultiAgentEnv(world,
                        scenario.reset_world,
                        scenario.reward,
                        scenario.observation,
                        info_callback=None,
                        shared_viewer=False)

    # 定义使用 DQN 的算法
    RL = DeepQNetwork(
        n_actions=env.action_space[0].n,
        n_features=env.observation_space[0].shape[0],
        learning_rate=0.01,
        e_greedy=0.9,
        replace_target_iter=100,
        memory_size=2000,
        e_greedy_increment=0.0008,
    )

    total_steps = 0
    for i_episode in range(100):

        # 获取回合 i_episode 第一个 observation
        observation = env.reset()
        observation = observation[0]
        ep_r = 0
        cnt = 0
        while True:
            print("i_episode: " + str(i_episode) + "    cnt: " + str(cnt))
Exemple #27
0
    #    servo[7] = servo_max
    #else:
    #    servo[7] = servo_mid
    # Iterate through the                                                                                                                                      positions sequence 3 times.
    
    
    for i in range(4):
        pwm.set_pwm(i, 0, servo[i])


action_num = 16
observation_num = 4
distance_riq = 1
RL = DeepQNetwork(n_actions=action_num,
                  n_features=observation_num,
                  learning_rate=0.01, e_greedy=0.9,
                  replace_target_iter=100, memory_size=2000,
                  e_greedy_increment=0.001,)

total_steps = 0
actionDrive = [0,0,0,0]


def convert(action):
    
    actionDrive = '{0:04b}'.format(action)
    actionDrive = list(actionDrive)
    
    drive(map(int,actionDrive))
##  if action == 0:
##    actionDrive = [0,0,0,0]
Exemple #28
0
            # break while loop when end of this episode
            if done:
                break
            step += 1

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        output_graph=True  # 是否输出图像,True or False
    )

    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()

# VS Code中直接调用tensorboard的方法。
# https://devblogs.microsoft.com/python/python-in-visual-studio-code-february-2021-release/
Exemple #29
0
                RL.learn()

            # 更新状态
            observation = observation_

            if done:
                break
            step += 1

    print("game over")
    env.destroy()


if __name__ == "__main__":
    # maze game
    env = Maze()
    RL = DeepQNetwork(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        e_greedy_increment=0.01,
        # output_graph=True
    )
    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
Exemple #30
0
def topology():
    "Create a network."
    net = Mininet_wifi(controller=Controller, accessPoint=OVSKernelAP)

    print "*** Creating nodes"
    h1 = net.addHost('h1', mac='00:00:00:00:00:01', ip='10.0.0.1/8')
    sta14 = net.addStation('sta1',
                           mac='00:00:00:00:00:02',
                           ip='10.0.0.2/8',
                           position='70,50,0')

    ap1 = net.addAccessPoint('ap1',
                             ssid='ssid-ap1',
                             mode='g',
                             channel='1',
                             position='50,50,0')
    ap2 = net.addAccessPoint('ap2',
                             ssid='ssid-ap2',
                             mode='g',
                             channel='6',
                             position='70,50,0',
                             range=30)
    ap3 = net.addAccessPoint('ap3',
                             ssid='ssid-ap3',
                             mode='g',
                             channel='11',
                             position='90,50,0')
    c1 = net.addController('c1', controller=Controller)

    net.setPropagationModel(model="logDistance", exp=5)
    print "*** Configuring wifi nodes"
    net.configureWifiNodes()

    print "*** Associating and Creating links"
    net.addLink(h1, ap1)
    net.addLink(ap1, ap2)
    net.addLink(ap2, ap3)
    # net.addLink(ap1, sta14)
    """uncomment to plot graph"""
    # net.plotGraph(max_x=400, max_y=400)
    net.plotGraph(max_x=120, max_y=120)

    net.setMobilityModel(time=0,
                         model='RandomWayPoint',
                         max_x=120,
                         max_y=120,
                         min_v=30,
                         max_v=50,
                         seed=5,
                         ac_method='ssf')

    print "*** Starting network"
    net.build()
    c1.start()
    ap1.start([c1])
    ap2.start([c1])
    ap3.start([c1])

    print "*** Running CLI"
    # CLI_wifi(net)
    second = sleeptime(0, 0, 1)

    new_rssi = [
        chanFunt(ap1, sta14),
        chanFunt(ap2, sta14),
        chanFunt(ap3, sta14)
    ]
    print new_rssi

    n_actions, n_APs = len(new_rssi), len(new_rssi)
    brain = DeepQNetwork(n_actions, n_APs, param_file=None)

    state = new_rssi
    print 'initial observation:' + str(state)

    try:
        while True:
            time.sleep(second)
            new_rssi = [
                chanFunt(ap1, sta14),
                chanFunt(ap2, sta14),
                chanFunt(ap3, sta14)
            ]
            #         # print new_rssi, rssi_tag(sta14)
            #         # print _getreward(sta14,h1)
            #         # print iperf([sta14, h1], seconds=0.0000001)
            #         # print '*********############*'
            action, q_value = brain.choose_action(state)
            reward, nextstate = step(rssi_tag(sta14), action, sta14, ap1, ap2,
                                     ap3, h1)
            #
            #         print 'iperf' + iperf([sta14, h1])
            #         print new_rssi
            brain.setPerception(state, action, reward, nextstate)
            state = nextstate
    except KeyboardInterrupt:
        print 'saving replayMemory...'
        brain.saveReplayMemory()
    pass
    # # print new_rssi
    # # snr_dict = map(setSNR,new_rssi)
    #
    # print "*** Stopping network"
    net.stop()