Python Agent Examples

Programming Language: Python

Namespace/Package Name: DDPGv2Agent

Class/Type: Agent

Examples at hotexamples.com: 4

Python Agent - 4 examples found. These are the top rated real world Python examples of DDPGv2Agent.Agent extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Agent(3)

load(2)

select_action(2)

Bstep(1)

learn(1)

save(1)

Example #1

Show file

File: run_inverse_multiple.py Project: minhae0516/firefly-monkey

df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv',
                 usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward',
                          'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward',
                          'obs noise std angular', 'goal radius'])

DISCOUNT_FACTOR = df['discount_factor'][0]
arg.gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()),
               np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())]

arg.std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(),
               df['process noise std angular'].min(), df['process noise std angular'].max()]
arg.goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()]


env = Model(arg) # build an environment
agent = Agent(env.state_dim, env.action_dim, arg,  filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001, device = "cpu")
agent.load(filename)


true_theta_log = []
final_theta_log = []
stderr_log = []
result_log = []

for num_thetas in range(10):

    true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range)
    true_theta_log.append(true_theta.data.clone())
    x_traj, obs_traj, a_traj, _ = trajectory(agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range, arg.std_range, arg.goal_radius_range) # generate true trajectory
    true_loss = getLoss(agent, x_traj, obs_traj, a_traj, true_theta, env, arg.gains_range, arg.std_range) # this is the lower bound of loss?

Example #2

Show file

File: play.py Project: minhae0516/firefly-monkey

rewards = deque(maxlen=100)
video_path = './pretrained/ddpg_minhae/video.mp4'

TOT_T = 500
env = gym.make('FireflyTorch-v0')
#rec = VideoRecorder(env, video_path, enabled=video_path is not None) #for video
state_dim = env.state_dim
action_dim = env.action_dim

std = 0.05
noise = Noise(action_dim, mean=0., std=std)
agent = Agent(PROC_NOISE_STD,
              OBS_NOISE_STD,
              gains,
              obs_gains,
              rew_std,
              state_dim,
              action_dim,
              hidden_dim=128,
              tau=0.001)
agent.load('pretrained/ddpg_minhae/ddpg_model_EE.pth.tar')

tot_t = 0.
episode = 0.
while tot_t <= TOT_T:
    episode += 1  # every episode starts a new firefly
    t, x, P, ox, b, state = env.reset()
    episode_reward = 0.

    while t < EPISODE_LEN:
        action = agent.select_action(state, noise)

Example #3

Show file

env.setup(arg)
env.model.box = arg.WORLD_SIZE
env.model.min_goal_radius = goal_radius_range[0]
x, b, state, pro_gains, pro_noise_stds, obs_gains, obs_noise_stds, goal_radius = env.reset(
    gains_range, std_range, goal_radius_range)
state_dim = env.model.state_dim
action_dim = env.model.action_dim

MAX_EPISODE = 1000
std = 0.00001  #0.05
noise = Noise(action_dim, mean=0., std=std)

agent = Agent(state_dim,
              action_dim,
              arg,
              filename,
              hidden_dim=128,
              gamma=DISCOUNT_FACTOR,
              tau=0.001)
agent.load(filename)

tot_t = 0.
episode = 0.

COLUMNS = [
    'total time', 'ep', 'time step', 'reward', 'goal', 'a_vel', 'a_ang',
    'true_r', 'true_rel_ang', 'r', 'rel_ang', 'vel', 'ang_vel', 'vecL1',
    'vecL2', 'vecL3', 'vecL4', 'vecL5', 'vecL6', 'vecL7', 'vecL8', 'vecL9',
    'vecL10', 'vecL11', 'vecL12', 'vecL13', 'vecL14', 'vecL15',
    'process gain forward', 'process gain angular',
    'process noise std forward', 'process noise std angular',

Example #4

Show file

File: train_forward_agent.py Project: yizhoucc/ffsb

    tot_t = 0.  # number of total time steps
    episode = 0.  # number of fireflies
    int_t = 1  # variable for changing the world setting every EPISODE_LEN time steps

    state_dim = env.state_dim
    action_dim = env.action_dim
    filename = arg.filename

    argument = arg.__dict__
    torch.save(argument, arg.data_path + 'data/' + filename + '_arg.pkl')

    agent = Agent(state_dim,
                  action_dim,
                  arg,
                  filename,
                  hidden_dim=128,
                  gamma=arg.DISCOUNT_FACTOR,
                  tau=0.001)

    #"""
    # if you want to use pretrained agent, load the data as below
    # if not, comment it out
    #agent.load('20191004-160540')
    #"""

    b, state, obs_gains, obs_noise_ln_vars = agent.Bstep.reset(
        x, torch.zeros(1), pro_gains, pro_noise_ln_vars, goal_radius,
        arg.gains_range, arg.noise_range)  # reset monkey's internal model

    # action space noise