Map=\ ['#o#*#o#*#', 'o1o *o^o', '# # # #o#', 'o #####^o', '#*#####o#', 'oo^oo^ o', '#oo^#oo0#' ] human_play=False have_render=True env = Maze(Map) if(have_render): Image = env.render() act=[0,0] score=[0,0] def ch(x): if(x=='l'):return 0 if(x=='r'):return 1 if(x=='u'):return 2 if(x=='d'):return 3 if(x=='b'):return 4 if(x=='s'):return 5 def read_action(event): if event.keysym == 'Left': act[0]='l' if event.keysym == 'Right':
GLOBAL_UPDATE_COUNTER, GLOBAL_EP = 0, 0 GLOBAL_RUNNING_R = [] COORD = tf.train.Coordinator() QUEUE = queue.Queue() # workers putting data in this queue threads = [] for worker in workers: # worker threads t = threading.Thread(target=worker.work, args=()) t.start() # training threads.append(t) # add a PPO updating thread threads.append(threading.Thread(target=GLOBAL_PPO.update, )) threads[-1].start() COORD.join(threads) print('aasdas') env = Maze(Map) tf.reset_default_graph() load_PPO = PPO(Load=True) while True: s = env.reset() for t in range(100): env.render() a = load_PPO.choose_action(s) baseline_a = base.choose_action(env, 1) s, r, done = env.step({(0, a), (1, U.ch(baseline_a))}) if (r[0] != -1): print(r) if (done): break