df = pd.read_csv('../firefly-inverse-data/data/' + filename + '_log.csv',
                 usecols=['discount_factor','process gain forward', 'process gain angular', 'process noise std forward',
                          'process noise std angular', 'obs gain forward', 'obs gain angular', 'obs noise std forward',
                          'obs noise std angular', 'goal radius'])

DISCOUNT_FACTOR = df['discount_factor'][0]
arg.gains_range = [np.floor(df['process gain forward'].min()), np.ceil(df['process gain forward'].max()),
               np.floor(df['process gain angular'].min()), np.ceil(df['process gain angular'].max())]

arg.std_range = [df['process noise std forward'].min(), df['process noise std forward'].max(),
               df['process noise std angular'].min(), df['process noise std angular'].max()]
arg.goal_radius_range = [df['goal radius'].min(), df['goal radius'].max()]


env = Model(arg) # build an environment
agent = Agent(env.state_dim, env.action_dim, arg,  filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001, device = "cpu")
agent.load(filename)


true_theta_log = []
final_theta_log = []
stderr_log = []
result_log = []

for num_thetas in range(10):

    true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range)
    true_theta_log.append(true_theta.data.clone())
    x_traj, obs_traj, a_traj, _ = trajectory(agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range, arg.std_range, arg.goal_radius_range) # generate true trajectory
    true_loss = getLoss(agent, x_traj, obs_traj, a_traj, true_theta, env, arg.gains_range, arg.std_range) # this is the lower bound of loss?
Example #2
0
rewards = deque(maxlen=100)
video_path = './pretrained/ddpg_minhae/video.mp4'

TOT_T = 500
env = gym.make('FireflyTorch-v0')
#rec = VideoRecorder(env, video_path, enabled=video_path is not None) #for video
state_dim = env.state_dim
action_dim = env.action_dim

std = 0.05
noise = Noise(action_dim, mean=0., std=std)
agent = Agent(PROC_NOISE_STD,
              OBS_NOISE_STD,
              gains,
              obs_gains,
              rew_std,
              state_dim,
              action_dim,
              hidden_dim=128,
              tau=0.001)
agent.load('pretrained/ddpg_minhae/ddpg_model_EE.pth.tar')

tot_t = 0.
episode = 0.
while tot_t <= TOT_T:
    episode += 1  # every episode starts a new firefly
    t, x, P, ox, b, state = env.reset()
    episode_reward = 0.

    while t < EPISODE_LEN:
        action = agent.select_action(state, noise)
Example #3
0
env.setup(arg)
env.model.box = arg.WORLD_SIZE
env.model.min_goal_radius = goal_radius_range[0]
x, b, state, pro_gains, pro_noise_stds, obs_gains, obs_noise_stds, goal_radius = env.reset(
    gains_range, std_range, goal_radius_range)
state_dim = env.model.state_dim
action_dim = env.model.action_dim

MAX_EPISODE = 1000
std = 0.00001  #0.05
noise = Noise(action_dim, mean=0., std=std)

agent = Agent(state_dim,
              action_dim,
              arg,
              filename,
              hidden_dim=128,
              gamma=DISCOUNT_FACTOR,
              tau=0.001)
agent.load(filename)

tot_t = 0.
episode = 0.

COLUMNS = [
    'total time', 'ep', 'time step', 'reward', 'goal', 'a_vel', 'a_ang',
    'true_r', 'true_rel_ang', 'r', 'rel_ang', 'vel', 'ang_vel', 'vecL1',
    'vecL2', 'vecL3', 'vecL4', 'vecL5', 'vecL6', 'vecL7', 'vecL8', 'vecL9',
    'vecL10', 'vecL11', 'vecL12', 'vecL13', 'vecL14', 'vecL15',
    'process gain forward', 'process gain angular',
    'process noise std forward', 'process noise std angular',
Example #4
0
    tot_t = 0.  # number of total time steps
    episode = 0.  # number of fireflies
    int_t = 1  # variable for changing the world setting every EPISODE_LEN time steps

    state_dim = env.state_dim
    action_dim = env.action_dim
    filename = arg.filename

    argument = arg.__dict__
    torch.save(argument, arg.data_path + 'data/' + filename + '_arg.pkl')

    agent = Agent(state_dim,
                  action_dim,
                  arg,
                  filename,
                  hidden_dim=128,
                  gamma=arg.DISCOUNT_FACTOR,
                  tau=0.001)

    #"""
    # if you want to use pretrained agent, load the data as below
    # if not, comment it out
    #agent.load('20191004-160540')
    #"""

    b, state, obs_gains, obs_noise_ln_vars = agent.Bstep.reset(
        x, torch.zeros(1), pro_gains, pro_noise_ln_vars, goal_radius,
        arg.gains_range, arg.noise_range)  # reset monkey's internal model

    # action space noise