Python Agent.step Examples

Programming Language: Python

Namespace/Package Name: agents.agent

Class/Type: Agent

Method/Function: step

Examples at hotexamples.com: 3

Python Agent.step - 3 examples found. These are the top rated real world Python examples of agents.agent.Agent.step extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Agent(30)

__init__(16)

test(6)

act(3)

prediction_history(3)

reset(3)

run_reps(3)

train(3)

step(3)

get_action(2)

aggregate_history(2)

collect_reward(2)

rank_bet(2)

set_sim_t(2)

state_dict(1)

test_goal_prediction(1)

put_data(1)

restart_coloring(1)

reset_episode(1)

reward_for_action(1)

observe(1)

set_session(1)

set_sim_dt(1)

place_bet(1)

load_model(1)

noisy_eval(1)

apply_control_open_loop(1)

_init_fmm_map(1)

_init_obj_fn(1)

_init_planner(1)

_init_psc_objective(1)

_init_system_dynamics(1)

actions(1)

advantage(1)

calc_weights(1)

memorize(1)

cuda(1)

environment(1)

eval(1)

get_move(1)

infer(1)

init_actor_critic(1)

load_memory(1)

value(1)

Example #1

Show file

File: interact.py Project: zh0123210/continuous-rl

def interact(env: Env, agent: Agent,
             start_obs: Arrayable) -> Tuple[array, array, array]:
    """One step interaction between env and agent.

    :args env: environment
    :args agent: agent
    :args start_obs: initial observation

    :return: (next observation, reward, terminal?)
    """
    action = agent.step(start_obs)
    next_obs, reward, done, information = env.step(action)
    time_limit = information[
        'time_limit'] if 'time_limit' in information else None
    agent.observe(next_obs, reward, done, time_limit)
    return next_obs, reward, done

Example #2

Show file

File: main.py Project: thenhz/RL-Quadcopter-2

target_pos = np.array([0., 0., 150.])
task = Task(init_pose=init_pos, target_pos=target_pos, runtime=10.)
agent = Agent(task)

# save rewards for plotting
rewards = []
rotor_speeds_var = []

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new episode
    step = 0
    while True:
        step += 1
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(action, reward, next_state, done)
        state = next_state
        if done:
            rewards.append(agent.score)
            rotor_speeds_var.append(np.var(action))
            print(
                "\r\nEp={:4d}, score={:7.3f} (top={:7.3f}) pos={} {} {} {} {} {} {}"
                .format(i_episode, agent.score, agent.top_score,
                        round(task.sim.pose[:3][0], 2),
                        round(task.sim.pose[:3][1], 2),
                        round(task.sim.pose[:3][2], 2),
                        round(task.sim.pose[3:6][0], 2),
                        round(task.sim.pose[3:6][1], 2),
                        round(task.sim.pose[3:6][2], 2),
                        round(abs(task.sim.v).sum()), 2),
                end="")  # [debug]

Example #3

Show file

File: main.py Project: lucasmfaria/Reinforcement-Learning

#Results with the conditions of the quadcopter
labels = [
    'time', 'x', 'y', 'z', 'phi', 'theta', 'psi', 'x_velocity', 'y_velocity',
    'z_velocity', 'phi_velocity', 'theta_velocity', 'psi_velocity',
    'rotor_speed1', 'rotor_speed2', 'rotor_speed3', 'rotor_speed4'
]
results = {x: [] for x in labels}

num_episodes = 1000

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new episode
    while True:
        action = agent.act(state)
        next_state, reward, done = takeoff.step(action)
        agent.step(reward, done)
        state = next_state
        to_write = [takeoff.sim.time] + list(takeoff.sim.pose) + list(
            takeoff.sim.v) + list(takeoff.sim.angular_v) + list(action)
        for ii in range(len(labels)):
            results[labels[ii]].append(to_write[ii])
        if done:
            print(
                "\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f}), noise_scale = {}"
                .format(i_episode, agent.score, agent.best_score,
                        agent.noise_scale),
                end="")  # [debug]
            break
    sys.stdout.flush()
''' Shows the results of the control
plt.plot(results['time'], results['x'], label='x')