Esempi in Python per PolicySearch_Agent.reset_episode

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: agents.policy_search

Classe/tipologia: PolicySearch_Agent

Metodo/funzione: reset_episode

Esempi su hotexamples.com: 2

PolicySearch_Agent.reset_episode in Python: 2 esempi trovati. Questi sono i migliori esempi reali in Python per agents.policy_search.PolicySearch_Agent.reset_episode, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

PolicySearch_Agent(2)

reset_episode(2)

step(2)

act(1)

Esempio n. 1

Mostra file

# Modify the values below to give the quadcopter a different starting position.
runtime = 5.  # time limit of the episode
init_pose = np.array([5., 5., 5., 0., 0., 0.])  # initial pose
init_velocities = np.array([0., 0., 0.])  # initial velocities
init_angle_velocities = np.array([0., 0., 0.])  # initial angle velocities
file_output = 'data.txt'  # file name for saved results

num_episodes = 1000
target_pos = np.array([5., 5., 5.])
task = Task(init_pose, init_velocities, init_angle_velocities, runtime,
            target_pos)
agent = PolicySearch_Agent(task)
rewards = []

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new episode
    while True:
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(reward, done)
        state = next_state
        if done:
            rewards.append(agent.total_reward)
            print(
                "\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f}), noise_scale = {}"
                .format(i_episode, agent.total_reward, agent.best_reward,
                        agent.noise_scale),
                end="")  # [debug]
            break
    sys.stdout.flush()

Esempio n. 2

Mostra file

File: main.py Progetto: giullianomorroni/DeepLearningNanoDegree

import sys
from agents.policy_search import PolicySearch_Agent
from task import Task
import numpy as np

num_episodes = 1000
target_pos = np.array([0., 0., 10.])
task = Task(target_pos=target_pos)
agent = PolicySearch_Agent(task)

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()
    while True:
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(reward, done, next_state, done)
        state = next_state
        if done:
            print(
                "\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f}), noise_scale = {}"
                .format(i_episode, agent.score, agent.best_score,
                        agent.noise_scale),
                end="")  # [debug]
            break
    sys.stdout.flush()