Ejemplo n.º 1
0
Map=\
['#o#*#o#*#',
 'o1o  *o^o',
 '# # # #o#',
 'o #####^o',
 '#*#####o#',
 'oo^oo^  o',
 '#oo^#oo0#'
]

human_play=False
have_render=True

env = Maze(Map)
if(have_render):
    Image = env.render()

act=[0,0]
score=[0,0]

def ch(x):
    if(x=='l'):return 0
    if(x=='r'):return 1
    if(x=='u'):return 2
    if(x=='d'):return 3
    if(x=='b'):return 4
    if(x=='s'):return 5
def read_action(event):
    if event.keysym == 'Left':
        act[0]='l'
    if event.keysym == 'Right':
Ejemplo n.º 2
0
    GLOBAL_UPDATE_COUNTER, GLOBAL_EP = 0, 0
    GLOBAL_RUNNING_R = []
    COORD = tf.train.Coordinator()
    QUEUE = queue.Queue()  # workers putting data in this queue
    threads = []
    for worker in workers:  # worker threads
        t = threading.Thread(target=worker.work, args=())
        t.start()  # training
        threads.append(t)
    # add a PPO updating thread
    threads.append(threading.Thread(target=GLOBAL_PPO.update, ))
    threads[-1].start()

    COORD.join(threads)
    print('aasdas')

    env = Maze(Map)
    tf.reset_default_graph()
    load_PPO = PPO(Load=True)
    while True:
        s = env.reset()
        for t in range(100):
            env.render()
            a = load_PPO.choose_action(s)
            baseline_a = base.choose_action(env, 1)
            s, r, done = env.step({(0, a), (1, U.ch(baseline_a))})
            if (r[0] != -1):
                print(r)
            if (done):
                break