Python Maze.reset Examples

Programming Language: Python

Namespace/Package Name: env

Class/Type: Maze

Method/Function: reset

Examples at hotexamples.com: 3

Python Maze.reset - 3 examples found. These are the top rated real world Python examples of env.Maze.reset extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Maze(5)

after(4)

reset(3)

step(3)

moveCat(2)

moveMouse(2)

render(2)

renderWindow(2)

restart(2)

turnEnd(2)

mainloop(1)

Example #1

Show file

class Worker(object):
    def __init__(self, wid):
        self.wid = wid
        self.env = Maze(Map)
        self.ppo = GLOBAL_PPO

    def work(self):
        global GLOBAL_EP, GLOBAL_RUNNING_R, GLOBAL_UPDATE_COUNTER
        while not COORD.should_stop():
            s = self.env.reset()
            ep_r = 0
            buffer_s, buffer_a, buffer_r = [], [], []
            t = 0
            while True:
                if not ROLLING_EVENT.is_set():  # while global PPO is updating
                    ROLLING_EVENT.wait()  # wait until PPO is updated
                    buffer_s, buffer_a, buffer_r = [], [], [
                    ]  # clear history buffer, use new policy to collect data
                a = self.ppo.choose_action(s)
                baseline_a = base.choose_action(self.env, 1)
                s_, r, done = self.env.step({(0, a), (1, U.ch(baseline_a))})
                r = r[0]
                buffer_s.append(s.flatten())
                buffer_a.append(a)
                buffer_r.append(r)  # normalize reward, find to be useful
                s = s_
                ep_r += r

                t += 1
                #print('step : %d, reward : %d, done : %d' % (t, r, done))

                GLOBAL_UPDATE_COUNTER += 1  # count to minimum batch size, no need to wait other workers
                if GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE or done:
                    print(GLOBAL_EP)
                    if done:
                        v_s_ = 0  # terminal
                    else:
                        v_s_ = self.ppo.get_v(s_)
                    discounted_r = []  # compute discounted reward
                    for r in buffer_r[::-1]:
                        v_s_ = r + GAMMA * v_s_
                        discounted_r.append(v_s_)
                    discounted_r.reverse()

                    bs, ba, br = np.vstack(buffer_s), np.vstack(
                        buffer_a), np.array(discounted_r)[:, np.newaxis]
                    buffer_s, buffer_a, buffer_r = [], [], []
                    QUEUE.put(np.hstack((bs, ba, br)))  # put data in the queue
                    if GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE or GLOBAL_EP >= EP_MAX:
                        print('update')
                        ROLLING_EVENT.clear()  # stop collecting data
                        UPDATE_EVENT.set()  # globalPPO update
                    if GLOBAL_EP >= EP_MAX:  # stop training
                        COORD.request_stop()
                        break

                if done:
                    # record reward changes, plot later
                    if len(GLOBAL_RUNNING_R) == 0:
                        GLOBAL_RUNNING_R.append(ep_r)
                    else:
                        GLOBAL_RUNNING_R.append(GLOBAL_RUNNING_R[-1] * 0.9 +
                                                ep_r * 0.1)
                    GLOBAL_EP += 1
                    print(
                        '{0:.1f}%'.format(GLOBAL_EP / EP_MAX * 100),
                        '|W%i' % self.wid,
                        '|Ep_r: %.2f' % ep_r,
                    )
                    break

Example #2

Show file

        act[1]=0
    else:
        if(act[0]==0 and act[1]==0):
            return
        if(act[0]==0):
            print('Wait for P0')
        else: print('Wait for P1')
    
    print('你按下了: ' + event.keysym)


if human_play:
    Image.bind('<Key>', read_action)
else:
    while(True):
        env.reset()
        stepcnt=0
        start=time.time()
        score=[0,0]
        if(have_render):
            env.render()   # render
        while(True):
            stepcnt+=1
            act[0]=baseline.choose_action(env,0)
            act[1]=baseline.choose_action(env,1)
            res=env.step({(0,ch(act[0])),(1,ch(act[1]))})
            
            cv2.imshow("Image", res[0])
            cv2.waitKey()
            cv2.destroyAllWindows()

Example #3

Show file

    GLOBAL_UPDATE_COUNTER, GLOBAL_EP = 0, 0
    GLOBAL_RUNNING_R = []
    COORD = tf.train.Coordinator()
    QUEUE = queue.Queue()  # workers putting data in this queue
    threads = []
    for worker in workers:  # worker threads
        t = threading.Thread(target=worker.work, args=())
        t.start()  # training
        threads.append(t)
    # add a PPO updating thread
    threads.append(threading.Thread(target=GLOBAL_PPO.update, ))
    threads[-1].start()

    COORD.join(threads)
    print('aasdas')

    env = Maze(Map)
    tf.reset_default_graph()
    load_PPO = PPO(Load=True)
    while True:
        s = env.reset()
        for t in range(100):
            env.render()
            a = load_PPO.choose_action(s)
            baseline_a = base.choose_action(env, 1)
            s, r, done = env.step({(0, a), (1, U.ch(baseline_a))})
            if (r[0] != -1):
                print(r)
            if (done):
                break