Ejemplo n.º 1
0
def init(target_pos, init_pose, init_angle_velocities, init_velocities, runtime, action_low, action_high, agent_type,
            action_repeat, action_size, success_mem_len,
            gamma=0.9, tau=0.1, buffer_size=100000, batch_size=128, exploration_mu=0,
            exploration_theta=0.15, exploration_sigma=0.2, success_distance=1):

    task = TakeOff_Task(target_pos=target_pos, init_pose=init_pose,
                init_angle_velocities=init_angle_velocities, init_velocities=init_velocities,
                runtime=runtime)

    task.configure(action_repeat=action_repeat, action_low=action_low, action_high=action_high, action_size=action_size,
                   target_pos=target_pos, init_velocities=init_velocities, init_angle_velocities=init_angle_velocities,
                   init_pose=init_pose, success_distance=success_distance)

    if agent_type == 'DDPG':
        agent = DDPG_Agent(task)
        agent.configure(gamma, tau, buffer_size, batch_size, exploration_mu, exploration_theta, exploration_sigma)
    if agent_type == 'Policy_Search':
        agent = PolicySearch_Agent(task)
    if agent_type == 'Random_Binary':
        agent = Random_Binary_Agent(task)
        agent.configure(success_mem_len)
    if agent_type == 'Simple':
        agent = DDPG_Agent_Simple(task)
        agent.configure(gamma, tau, buffer_size, batch_size, exploration_mu, exploration_theta, exploration_sigma)

    return task, agent
Ejemplo n.º 2
0
# The sample agent given in `agents/policy_search.py` uses a very simplistic linear policy to directly compute the action vector as a dot product of the state vector and a matrix of weights. Then, it randomly perturbs the parameters by adding some Gaussian noise, to produce a different policy. Based on the average reward obtained in each episode (`score`), it keeps track of the best set of parameters found so far, how the score is changing, and accordingly tweaks a scaling factor to widen or tighten the noise.
# 
# Run the code cell below to see how the agent performs on the sample task.

# In[64]:


import sys
import pandas as pd
from agents.policy_search import PolicySearch_Agent
from task import Task

num_episodes = 1000
target_pos = np.array([0., 0., 10.])
task = Task(target_pos=target_pos)
agent = PolicySearch_Agent(task) 

for i_episode in range(1, num_episodes+1):
    state = agent.reset_episode() # start a new episode
    while True:
        action = agent.act(state) 
        next_state, reward, done = task.step(action)
        agent.step(reward, done)
        state = next_state
        if done:
            print("\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f}), noise_scale = {}".format(
                i_episode, agent.score, agent.best_score, agent.noise_scale), end="")  # [debug]
            break
    sys.stdout.flush()
    
print("\nfinished running agent")
Ejemplo n.º 3
0
from task import Task
import numpy as np
import matplotlib.pyplot as plt

# Modify the values below to give the quadcopter a different starting position.
runtime = 5.  # time limit of the episode
init_pose = np.array([5., 5., 5., 0., 0., 0.])  # initial pose
init_velocities = np.array([0., 0., 0.])  # initial velocities
init_angle_velocities = np.array([0., 0., 0.])  # initial angle velocities
file_output = 'data.txt'  # file name for saved results

num_episodes = 1000
target_pos = np.array([5., 5., 5.])
task = Task(init_pose, init_velocities, init_angle_velocities, runtime,
            target_pos)
agent = PolicySearch_Agent(task)
rewards = []

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()  # start a new episode
    while True:
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(reward, done)
        state = next_state
        if done:
            rewards.append(agent.total_reward)
            print(
                "\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f}), noise_scale = {}"
                .format(i_episode, agent.total_reward, agent.best_reward,
                        agent.noise_scale),
import sys
from agents.policy_search import PolicySearch_Agent
from task import Task
import numpy as np

num_episodes = 1000
target_pos = np.array([0., 0., 10.])
task = Task(target_pos=target_pos)
agent = PolicySearch_Agent(task)

for i_episode in range(1, num_episodes + 1):
    state = agent.reset_episode()
    while True:
        action = agent.act(state)
        next_state, reward, done = task.step(action)
        agent.step(reward, done, next_state, done)
        state = next_state
        if done:
            print(
                "\rEpisode = {:4d}, score = {:7.3f} (best = {:7.3f}), noise_scale = {}"
                .format(i_episode, agent.score, agent.best_score,
                        agent.noise_scale),
                end="")  # [debug]
            break
    sys.stdout.flush()