Esempio n. 1
0
def return_DQN(name_, fea_num):
    with tf.variable_scope(name_):
        dueling_DQN = DuelingDQN(n_actions=ACTION_SPACE,
                                 n_features=fea_num,
                                 memory_size=MEMORY_SIZE,
                                 e_greedy_increment=0.001,
                                 sess=sess,
                                 dueling=True,
                                 output_graph=True)
    return dueling_DQN
Esempio n. 2
0
 def __init__(self, gateway):
     self.gateway = gateway
     self.DuelingDQN = DuelingDQN(actions, 141)
     self.actionMap = ActionMap()
     self.R = 0  # total reward in a round
     self.action = 0
     self.MaxPoint = 120  # max projectile damage (ver 4.10)
     self.SubPoint = 0  # max damage in usual action (ver 4.10)
     self.countProcess = 0
     self.frameData = None
     self.nonDelay = None
     self.currentFrameNum = None
     self.inputKey = None
     self.cc = None
     self.player = None
     self.simulator = None
     self.lastHp_opp = None
     self.lastHp_my = None
     self.isGameJustStarted = None
     self.currentRoundNum = None
     self.isFinishd = None
     self.reward = None
     self.state = []
     self.frame_per_action = self.DuelingDQN.frame_per_action
Esempio n. 3
0
from RL_brain import DuelingDQN
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

env = gym.make('Pendulum-v0')
env = env.unwrapped
env.seed(1)
MEMORY_SIZE = 3000
ACTION_SPACE = 25

sess = tf.Session()
with tf.variable_scope('natural'):
    natural_DQN = DuelingDQN(n_actions=ACTION_SPACE,
                             n_features=3,
                             memory_size=MEMORY_SIZE,
                             e_greedy_increment=0.001,
                             sess=sess,
                             dueling=False)

with tf.variable_scope('dueling'):
    dueling_DQN = DuelingDQN(n_actions=ACTION_SPACE,
                             n_features=3,
                             memory_size=MEMORY_SIZE,
                             e_greedy_increment=0.001,
                             sess=sess,
                             dueling=True,
                             output_graph=True)

sess.run(tf.global_variables_initializer())

                break
            step += 1  # 总步数

    # end of game
    print('game over')
    env.destroy()


if __name__ == "__main__":
    env = Maze()
    RL = DuelingDQN(
        env.n_actions,
        env.n_features,  #observation/state 的属性,如长宽高
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        dueling=True,
        replace_target_iter=200,  # 每 200 步替换一次 target_net 的参数
        memory_size=2000,  # 记忆上限
        # output_graph=True   # 是否输出 tensorboard 文件
    )
    env.after(100, run_maze)  #进行强化学习训练
    env.mainloop()
    # 观看训练时间曲线
    his_dueling = np.vstack((episodes, steps))
    file = open('his_dueling', 'wb')
    pickle.dump(his_dueling, file)
    file.close()

    plt.plot(his_dueling[0, :],
             his_dueling[1, :] - his_dueling[1, 0],
Esempio n. 5
0
        if total_steps > MEMORY_SIZE:
            RL.learn()

        if total_steps - MEMORY_SIZE > 15000:
            break

        observation = observation_
        total_steps += 1
    return RL.cost_his, acc_r


if __name__ == "__main__":
    dueling_DQN = DuelingDQN(n_actions=ACTION_SPACE,
                             n_features=3,
                             learning_rate=0.01,
                             e_greedy=0.9,
                             replace_target_iter=100,
                             memory_size=MEMORY_SIZE,
                             e_greedy_increment=0.00005,
                             dueling=True)

    natural_DQN = DuelingDQN(n_actions=ACTION_SPACE,
                             n_features=3,
                             learning_rate=0.01,
                             e_greedy=0.9,
                             replace_target_iter=100,
                             memory_size=MEMORY_SIZE,
                             e_greedy_increment=0.00005,
                             dueling=False)

    c_natural, r_natural = train(natural_DQN)
    c_dueling, r_dueling = train(dueling_DQN)
Esempio n. 6
0
from env import environment
from RL_brain import DuelingDQN
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import algorithm_naive_1 as naive

env = environment(number_of_sbs=9)  # 定义使用 gym 库中的那一个环境
env_naive = naive.environment(bs_list=env.bs_list)
number = env.number_of_sbs
RL = DuelingDQN(n_actions=2 ** number,
                n_features=4 * number + 2,
                learning_rate=0.01, e_greedy=0.9,
                replace_target_iter=100, memory_size=2000,
                e_greedy_increment=0.0008,
                output_graph=True)
total_steps = 0  # 记录步数
a = pd.DataFrame(columns=['energy_cost'])
plt.figure()
plt.ion()
plt.show()
ep_r_total = []
count_time = 0
energy = []
EE_rate_total = np.zeros(50)
EE_rate_mean = []
counter = 0
mean_min = 10
min_index = 0
for i_episode in range(1000):
    print('iteration is %d' % i_episode)
Esempio n. 7
0
import os
import tensorflow as tf

from VCM_environment import VCMEN
from RL_brain import DuelingDQN

MEMORY_SIZE = 1000
ACTION_SPACE = 8

if __name__ == "__main__":
    env = VCMEN()
    load_model_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                  "models")
    agent = DuelingDQN(n_actions=ACTION_SPACE,
                       n_features=144,
                       memory_size=MEMORY_SIZE,
                       environment_name=env.name,
                       dueling=True,
                       load_model_dir=load_model_dir)
    state_t, reward_t, win = env.observe()

    step = 0
    while not win:
        step += 1
        print(state_t)
        # choose
        observation = state_t.flatten()
        action_t = agent.choose_action(observation)
        # act
        env.execute_action(action_t)
        state_t_1, reward_t, win = env.observe()
        state_t = state_t_1
Esempio n. 8
0
                break
            step += 1
            time += 1
    # end of training
    print 'Training over'
    env.get_data_info()


if __name__ == "__main__":
    if len(sys.argv) == 1:
        num = 0
    else:
        num = (int)(sys.argv[1])
    env = elev_sys(num=num, oddeven=False)
    if num / 2 < 1:
        #RL = DeepQNetwork(9,len(env._step(0,0)[0]),batch_size=64,e_greedy_increment=0.001)
        RL = DuelingDQN(9,
                        len(env._step(0, 0)[0]),
                        memory_size=10000,
                        dueling=False,
                        e_greedy_increment=0.00005)
    else:
        RL = DuelingDQN(9,
                        len(env._step(0, 0)[0]),
                        memory_size=10000,
                        dueling=True,
                        e_greedy_increment=0.0001)
    #RL.load(3)
    train(env, RL, num=num)
    RL.save(num)
Esempio n. 9
0
            if (step > 200) and (step % 5 == 0):
                RL.learn()

            # swap observation
            observation = observation_

            # break while loop when end of this episode
            if done:
                break
            step += 1

    print('game over')
    env.destroy()


if __name__ == '__main__':
    env = Maze()
    RL = DuelingDQN(
        env.n_actions,
        env.n_features,
        learning_rate=0.01,
        reward_decay=0.9,
        e_greedy=0.9,
        replace_target_iter=200,
        memory_size=2000,
        # output_graph=True
    )

    env.after(100, run_maze)
    env.mainloop()
    RL.plot_cost()
Esempio n. 10
0
            # swap observation
            observation = observation_

            step += 1
            # break while loop when end of this episode
            if done:
                break
        scores.append(env.score)

        if episode % 5 == 0:
            print("#" * 80)
            print(episode, ",", int(step / 10), ",score:", env.score, ",e:",
                  RL.epsilon)
            print("avg-score: {}".format(np.mean(list(scores)[-1500:])))

        if episode % 100 == 0:
            print(observation)
            env.show()


if __name__ == "__main__":
    env = Game()
    RL = DuelingDQN(env.n_actions,
                    env.n_features,
                    learning_rate=1e-4,
                    reward_decay=0.95,
                    e_greedy=0.99,
                    start_epsilon=0.5,
                    e_greedy_increment=1e-5)
    train_2048()
Esempio n. 11
0
import tensorflow as tf
import matplotlib.pyplot as plt

from RL_brain import DuelingDQN
from VCM_environment import VCMEN

N_EPOCHS = 1500
MEMORY_SIZE = 500
ACTION_SPACE = 8

if __name__ == "__main__":

    env = VCMEN()
    agent = DuelingDQN(n_actions=ACTION_SPACE,
                       n_features=144,
                       memory_size=MEMORY_SIZE,
                       environment_name=env.name,
                       e_greedy_increment=0.01,
                       dueling=True)

    win_cnt = 0
    acc_r = [0]
    for foo in range(N_EPOCHS):
        step = 0
        env.reset()
        state_t, reward_t, win = env.observe()
        while True:
            step += 1
            # choose
            observation = state_t.flatten()
            action_t = agent.choose_action(observation)
            # act
Esempio n. 12
0
from env import environment
from RL_brain import DuelingDQN
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

env = environment()  # 定义使用 gym 库中的那一个环境

RL1 = DuelingDQN(n_actions=2,
                 n_features=6,
                 learning_rate=0.01,
                 e_greedy=0.9,
                 replace_target_iter=100,
                 memory_size=2000,
                 e_greedy_increment=0.0008,
                 output_graph=True)
RL2 = DuelingDQN(n_actions=2,
                 n_features=6,
                 learning_rate=0.01,
                 e_greedy=0.9,
                 replace_target_iter=100,
                 memory_size=2000,
                 e_greedy_increment=0.0008,
                 output_graph=True)
RL3 = DuelingDQN(n_actions=2,
                 n_features=6,
                 learning_rate=0.01,
                 e_greedy=0.9,
                 replace_target_iter=100,
                 memory_size=2000,
                 e_greedy_increment=0.0008,
Esempio n. 13
0
            action_ = (action) * 50 + 300
            # action = state * press_coefficient
            state_, reward, done = env.step(action_)
            if not done:
                reward += 0.05 * (tmp + 1)
            RL.store_transition(state, action, np.float64(reward), state_)

            if done == True:
                print('......挂掉了')
                RL.learn()
                env.touch_the_restart()
                break
            tmp += 1
            max_ = max(max_, tmp)
            state = state_
        print('你的阿尔法跳一跳最远跳了:', max_, '下')


env = Env()
if __name__ == '__main__':
    with tf.Session() as sess:
        with tf.variable_scope('dueling'):
            RL = DuelingDQN(n_actions=14,
                            n_features=1,
                            memory_size=5000000,
                            e_greedy_increment=0.0001,
                            sess=sess,
                            dueling=True,
                            output_graph=True)
        sess.run(tf.global_variables_initializer())
        tf.app.run()