def __init__(self): self.train_episode = 1000 self.r = False # render or not self.u = False # update or not self.env = envR.envR(rows=10, cols=10, n_features=10) self.max_steps = 30 # (self.env.maze.c - 2) * (self.env.maze.r - 2) self.brain = PolicyGradient(n_actions=4, n_features=(self.env.maze.c * self.env.maze.r), learning_rate=0.0001, reward_decay=0.95, output_graph=False, restore=True) # used for evaluation self.evaluate = Evaluate(rows=10, cols=10, start_pos=(10, 1)) self.num_fail = 0 self.num_find_target = 0 self.cost, self.density = [], [] # dp is deceptive_percentage self.opt_cost, self.opt_dp = [], [] # optimal deceptive path self.path = [] self.reward = []
def test(RL): env = envR(show=False) path, cost, density, num_find_target, opt_cost = [], [], [], 0, [] evaluate = Evaluate(rows=10, cols=10) train = False succ = 0 print("****************************************************") for episode in range(100): pre_maps = env.reset() step = 0 evaluate.set_start(start_pos=env.agent) evaluate.set_goals(real_pos=env.maze.food_pos[0], fake_pos=env.maze.food_pos[1]) # print("****************************************************") # print("EPISODE ", episode) # start_test = time.time() for step in range(100): action = RL.choose_action(str(pre_maps), train) reward, done, action_ = env.step(action) path.append(action_) step += 1 if done: succ += 1 cost, density, num_find_target, opt_cost = evaluation( evaluate, cost, density, num_find_target, opt_cost, path) path = [] break pre_maps = env.get_maps() print('This is ', episode, 'cost:', step, 'succ', succ) print('average cost:', np.mean(cost), ' average density:', np.mean(density), ' deceptive extent:', num_find_target / succ) print('optimal cost:', np.mean(opt_cost)) print()
from envR import envR from RL_brain import DeepQNetwork import time if __name__ == "__main__": r = 1000000 index_ = '_4' save_list = [ 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 1000, 5000, 10000, 50000, 100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000, 900000, 1000000 ] train = True env = envR(show=False) RL = DeepQNetwork(env.n_actions, env.n_features, rows=env.rows, cols=env.cols, learning_rate=0.00001, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=5000, e_greedy_increment=0.0005, output_graph=False) step = 0 succ = 0
self.sess, tf.train.latest_checkpoint( '/home/yiranruan/cnn_s/data/checkpoint_dir_' + name)) # def get_steps(self): # print(self.sess.run(self.steps)) def plot_cost(self, name): self.f = open("./cost_2_" + name + ".txt", 'a') self.f.write(str(self.cost_his) + '\n') self.f.close() if __name__ == "__main__": from envR import envR env = envR(True) env.reset() RL = DeepQNetwork(env.n_actions, env.n_features, rows=env.rows, cols=env.cols, learning_rate=0.01, reward_decay=0.9, e_greedy=0.9, replace_target_iter=200, memory_size=2000, output_graph=True) for i in range(100): action = input('actions: ') reward = 0 pre_maps = env.get_maps()