Python Osillator.render 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: env

클래스/타입: Osillator

메소드/함수: render

hotexamples.com에서의 예제들: 2

Python Osillator.render - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 env.Osillator.render에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Osillator(10)

reset(10)

step(10)

close(2)

render(2)

예제 #1

파일 보기

파일: ppo.py 프로젝트: wangyixu14/MultiControllerAdapter

def train():
    env = Osillator()
    state_dim = 2
    action_dim = 2

    # reproducible
    # env.seed(RANDOMSEED)
    np.random.seed(RANDOMSEED)
    torch.manual_seed(RANDOMSEED)

    ppo = PPO(state_dim, action_dim, method=METHOD)
    global all_ep_r, update_plot, stop_plot
    all_ep_r = []
    for ep in range(EP_MAX):
        s = env.reset()
        ep_r = 0
        t0 = time.time()
        for t in range(EP_LEN):
            if RENDER:
                env.render()
            a = ppo.choose_action(s)
            u = gene_u(s, a, model_1, model_2)
            s_, _, done = env.step(u)
            # print(s, a, s_, r, done)
            # assert False
            r = 10
            r -= WEIGHT * abs(np.clip(u, -1, 1)) * 20
            r -= 1 / WEIGHT * (abs(s_[0]) + abs(s_[1]))
            if done and t < 95:
                r -= 100
            ppo.store_transition(
                s, a, r
            )  # useful for pendulum since the nets are very small, normalization make it easier to learn
            s = s_
            ep_r += r

            # update ppo
            if len(ppo.state_buffer) == BATCH_SIZE:
                ppo.finish_path(s_, done)
                ppo.update()
            # if done:
            #     break
        ppo.finish_path(s_, done)
        print(
            'Episode: {}/{}  | Episode Reward: {:.4f}  | Running Time: {:.4f}'.
            format(ep + 1, EP_MAX, ep_r,
                   time.time() - t0))
        if ep == 0:
            all_ep_r.append(ep_r)
        else:
            all_ep_r.append(all_ep_r[-1] * 0.9 + ep_r * 0.1)
        if PLOT_RESULT:
            update_plot.set()
        if (ep + 1) % 500 == 0 and ep >= 3000:
            ppo.save_model(path='ppo', ep=ep, weight=WEIGHT)
    if PLOT_RESULT:
        stop_plot.set()
    env.close()

예제 #2

파일 보기

파일: ppo.py 프로젝트: wangyixu14/MultiControllerAdapter

 #     thread = threading.Thread(target=train)
 #     thread.daemon = True
 #     thread.start()
 #     if PLOT_RESULT:
 #         drawer = Drawer()
 #         drawer.plot()
 #         drawer.save()
 #     thread.join()
 train()
 assert False
 # test
 env = Osillator()
 state_dim = 2
 action_dim = 2
 ppo = PPO(state_dim, action_dim, method=METHOD)
 ppo.load_model()
 mean_epoch_reward = 0
 for _ in range(TEST_EP):
     state = env.reset()
     for i in range(EP_LEN):
         if RENDER:
             env.render()
         action = ppo.choose_action(state, True)
         u = gene_u(state, action, model_1, model_2)
         next_state, reward, done = env.step(u)
         mean_epoch_reward += reward
         state = next_state
         if done:
             break
 print(mean_epoch_reward / TEST_EP)
 env.close()