Python Monitor.reset 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: tf_rl.common.monitor

클래스/타입: Monitor

메소드/함수: reset

hotexamples.com에서의 예제들: 3

Python Monitor.reset - 3개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 tf_rl.common.monitor.Monitor.reset에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Monitor(8)

step(5)

reset(3)

record_end(2)

record_start(2)

close(1)

seed(1)

예제 #1

파일 보기

파일: eval.py 프로젝트: Rowing0914/NerveNet_experimental

env = gym.make(params.env_name)
env = Monitor(env, params.video_dir, force=True)

random_process = GaussianNoise(mu=0.0, sigma=0.0)
agent = DDPG(Actor, Critic, env.action_space.shape[0], random_process, params)

global_timestep = tf.compat.v1.train.get_or_create_global_step()

all_distances, all_rewards, all_actions = list(), list(), list()
distance_func = get_distance(
    agent.params.env_name)  # create the distance measure func
print("=== Evaluation Mode ===")
for ep in range(params.n_trial):
    env.record_start()
    obs = env.reset()
    state = obs["flat_obs"]
    done = False
    episode_reward = 0
    while not done:
        action = agent.eval_predict(state)
        # action = env.action_space.sample()

        # scale for execution in env (in DDPG, every action is clipped between [-1, 1] in agent.predict)
        obs, reward, done, info = env.step(action * env.action_space.high)
        # print(action, reward)
        next_flat_state, next_graph_state = obs["flat_obs"], obs["graph_obs"]
        distance = distance_func(action, reward, info)
        all_actions.append(action.mean()**2)  # Mean Squared of action values
        all_distances.append(distance)
        state = next_flat_state

예제 #2

파일 보기

# run this from the terminal and make sure you are loading appropriate environment variables
# $ echo $LD_LIBRARY_PATH

import gym
from tf_rl.common.monitor import Monitor
import environments.register as register

video_dir = "./video/"
temp = 5

env = gym.make("CentipedeSix-v1")
env = Monitor(env, video_dir, force=True)

for ep in range(10):
    if ep % temp == 0:
        print("recording")
        env.record_start()

    env.reset()
    done = False
    while not done:
        # env.render()
        action = env.action_space.sample()
        s, r, done, info = env.step(action)  # take a random action
    if ep % temp == 0:
        env.record_end()

예제 #3

파일 보기

파일: DDPG_GGNN.py 프로젝트: Rowing0914/NerveNet_experimental

# Invoke the agent
agent = DDPG(GGNN, Critic, node_info, env.action_space.shape[0], params)
""" === Training Phase === """
get_ready(agent.params)

global_timestep = tf.compat.v1.train.get_or_create_global_step()
time_buffer = deque(maxlen=agent.params.reward_buffer_ep)
log = logger(agent.params)
action_buffer, distance_buffer, eval_epochs = list(), list(), list()

with summary_writer.as_default():
    # for summary purpose, we put all codes in this context
    with tf.contrib.summary.always_record_summaries():

        for i in itertools.count():
            state = env.reset()
            total_reward = 0
            start = time.time()
            done = False
            episode_len = 0
            while not done:
                if global_timestep.numpy() < agent.params.learning_start:
                    action = env.action_space.sample()
                else:
                    action = agent.predict(state)
                # scale for execution in env (in DDPG, every action is clipped between [-1, 1] in agent.predict)
                next_state, reward, done, info = env.step(
                    action * env.action_space.high)
                replay_buffer.add(state, action, reward, next_state, done)
                """
                === Update the models