Ejemplo n.º 1
0
    def __init__(self):
        self.train_episode = 1000
        self.r = False  # render or not
        self.u = False  # update or not
        self.env = envR.envR(rows=10, cols=10, n_features=10)
        self.max_steps = 30  # (self.env.maze.c - 2) * (self.env.maze.r - 2)
        self.brain = PolicyGradient(n_actions=4,
                                    n_features=(self.env.maze.c *
                                                self.env.maze.r),
                                    learning_rate=0.0001,
                                    reward_decay=0.95,
                                    output_graph=False,
                                    restore=True)

        # used for evaluation
        self.evaluate = Evaluate(rows=10, cols=10, start_pos=(10, 1))
        self.num_fail = 0
        self.num_find_target = 0
        self.cost, self.density = [], []  # dp is deceptive_percentage
        self.opt_cost, self.opt_dp = [], []  # optimal deceptive path
        self.path = []
        self.reward = []
Ejemplo n.º 2
0
def test(RL):
    env = envR(show=False)
    path, cost, density, num_find_target, opt_cost = [], [], [], 0, []
    evaluate = Evaluate(rows=10, cols=10)
    train = False
    succ = 0
    print("****************************************************")
    for episode in range(100):
        pre_maps = env.reset()
        step = 0
        evaluate.set_start(start_pos=env.agent)
        evaluate.set_goals(real_pos=env.maze.food_pos[0],
                           fake_pos=env.maze.food_pos[1])
        # print("****************************************************")
        # print("EPISODE ", episode)
        # start_test = time.time()
        for step in range(100):

            action = RL.choose_action(str(pre_maps), train)

            reward, done, action_ = env.step(action)

            path.append(action_)

            step += 1
            if done:
                succ += 1
                cost, density, num_find_target, opt_cost = evaluation(
                    evaluate, cost, density, num_find_target, opt_cost, path)
                path = []
                break
            pre_maps = env.get_maps()
    print('This is ', episode, 'cost:', step, 'succ', succ)
    print('average cost:', np.mean(cost), ' average density:',
          np.mean(density), ' deceptive extent:', num_find_target / succ)
    print('optimal cost:', np.mean(opt_cost))
    print()
Ejemplo n.º 3
0
from envR import envR
from RL_brain import DeepQNetwork

import time

if __name__ == "__main__":
    r = 1000000
    index_ = '_4'
    save_list = [
        10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 1000, 5000, 10000, 50000,
        100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000, 900000,
        1000000
    ]

    train = True
    env = envR(show=False)

    RL = DeepQNetwork(env.n_actions,
                      env.n_features,
                      rows=env.rows,
                      cols=env.cols,
                      learning_rate=0.00001,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=5000,
                      e_greedy_increment=0.0005,
                      output_graph=False)

    step = 0
    succ = 0
Ejemplo n.º 4
0
                self.sess,
                tf.train.latest_checkpoint(
                    '/home/yiranruan/cnn_s/data/checkpoint_dir_' + name))

    # def get_steps(self):
    #     print(self.sess.run(self.steps))

    def plot_cost(self, name):
        self.f = open("./cost_2_" + name + ".txt", 'a')
        self.f.write(str(self.cost_his) + '\n')
        self.f.close()


if __name__ == "__main__":
    from envR import envR
    env = envR(True)
    env.reset()
    RL = DeepQNetwork(env.n_actions,
                      env.n_features,
                      rows=env.rows,
                      cols=env.cols,
                      learning_rate=0.01,
                      reward_decay=0.9,
                      e_greedy=0.9,
                      replace_target_iter=200,
                      memory_size=2000,
                      output_graph=True)
    for i in range(100):
        action = input('actions: ')
        reward = 0
        pre_maps = env.get_maps()