Ejemplo n.º 1
0
def compute_loss(env, agent, theta_estimation, true_theta, phi, trajectory_data=None, num_episodes=100,is1d=False):
  if trajectory_data is None:
    states, actions, tasks=trajectory(agent, torch.Tensor(phi), torch.Tensor(true_theta), env, num_episodes, is1d=is1d)
  else:
    actions=trajectory_data['actions']
    tasks=trajectory_data['tasks']
  theta_estimation=torch.nn.Parameter(torch.Tensor(theta_estimation))
  loss = getLoss(agent, actions, tasks, torch.Tensor(phi), theta_estimation, env,states=states)
  return loss
env = Model(arg) # build an environment
agent = Agent(env.state_dim, env.action_dim, arg,  filename, hidden_dim=128, gamma=DISCOUNT_FACTOR, tau=0.001, device = "cpu")
agent.load(filename)


true_theta_log = []
final_theta_log = []
stderr_log = []
result_log = []

for num_thetas in range(10):

    true_theta = reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range)
    true_theta_log.append(true_theta.data.clone())
    x_traj, obs_traj, a_traj, _ = trajectory(agent, true_theta, arg.INVERSE_BATCH_SIZE, env, arg, arg.gains_range, arg.std_range, arg.goal_radius_range) # generate true trajectory
    true_loss = getLoss(agent, x_traj, obs_traj, a_traj, true_theta, env, arg.gains_range, arg.std_range) # this is the lower bound of loss?


    #theta = nn.Parameter(true_theta.data.clone()+0.5*true_theta.data.clone())
    theta = nn.Parameter(reset_theta(arg.gains_range, arg.std_range, arg.goal_radius_range))
    ini_theta = theta.data.clone()


    loss_log = deque(maxlen=5000)
    theta_log = deque(maxlen=5000)
    optT = torch.optim.Adam([theta], lr=1e-3)
    prev_loss = 100000
    loss_diff = deque(maxlen=5)


    for num_batches in range(5000):
Ejemplo n.º 3
0
arg.DELTA_T=0.2
arg.EPISODE_LEN=35
a=load_inverse_data('17_21_34')
theta_trajectory=a['theta_estimations']
true_theta=a['true_theta']
theta_estimation=theta_trajectory[-1]
phi=np.array(a['phi'])
# no bg, faster
env=ffac_1d.FireflyTrue1d_cpu(arg)
baselines_mlp_model =TD3_torch.TD3.load('trained_agent/1d_1000000_9_16_22_20.zip')
agent=baselines_mlp_model.actor

agent.cpu()
agent.requires_grad=False

is1d=True
H_dim=7
num_episodes=100
states, actions, tasks=trajectory(agent, torch.Tensor(phi), torch.Tensor(true_theta), env, num_episodes,is1d=is1d)
theta_estimation=torch.nn.Parameter(torch.Tensor(theta_estimation))
phi=torch.nn.Parameter(torch.Tensor(phi))
phi.requires_grad=False
loss = getLoss(agent, actions, tasks, phi, theta_estimation, env,states=states, gpu=False)
grads = torch.autograd.grad(loss, theta_estimation, create_graph=True,allow_unused=True)[0]
print(grads)
H = torch.zeros(H_dim,H_dim)
for i in range(H_dim):
    print(i)
    H[i] = torch.autograd.grad(grads[i], theta_estimation, retain_graph=True,allow_unused=True)[0].view(-1)

Ejemplo n.º 4
0
def single_inverse(true_theta, arg, env, agent, x_traj, a_traj,  true_loss, filename, n, Pro_Noise = None, Obs_Noise = None, part_theta=False):
    tic = time.time()

    if Pro_Noise is not None:
        Pro_Noise = true_theta[2:4]
    if Obs_Noise is not None:
        Obs_Noise = true_theta[6:8]

    #rndsgn = torch.sign(torch.randn(1,len(true_theta))).view(-1)
    #purt= torch.Tensor([0.1,0.1,1,1,0.1,0.1,1,1,0.1])#perturbation

    #theta = nn.Parameter(true_theta.data.clone()+purt *torch.randn(1,len(true_theta)).view(-1))
    #theta = theta_range(theta, arg.gains_range, arg.noise_range, arg.goal_radius_range)  # keep inside of trained range


    # just for checking
    #theta = nn.Parameter(true_theta.data.clone())  # just for checking


    theta = nn.Parameter(reset_theta(arg.gains_range, arg.noise_range, arg.goal_radius_range))
    ini_theta = theta.data.clone()


    loss_log = deque(maxlen=arg.NUM_IT)
    loss_log_recent = deque(maxlen=100)
    #loss_act_log = deque(maxlen=arg.NUM_IT)
    #loss_obs_log = deque(maxlen=arg.NUM_IT)
    theta_log = deque(maxlen=arg.NUM_IT)

    optT = torch.optim.Adam([theta], lr=arg.ADAM_LR)


    for it in tqdm(range(arg.NUM_IT)):
        loss, loss_act, loss_obs = getLoss(agent, x_traj, a_traj, theta, env, arg.gains_range, arg.noise_range, arg.PI_STD, arg.NUM_SAMPLES)
        loss_log.append(loss.data)
        loss_log_recent.append(loss.data)
        #loss_act_log.append(loss_act.data)
        #loss_obs_log.append(loss_obs.data)
        optT.zero_grad() #clears old gradients from the last step
        loss.backward(retain_graph=True) #computes the derivative of the loss w.r.t. the parameters using backpropagation
        optT.step() # performing single optimize step: this changes theta
        if part_theta == False:
            theta = theta_range(theta, arg.gains_range, arg.noise_range, arg.goal_radius_range) # keep inside of trained range
        elif part_theta == True:
            theta = theta_range(theta, arg.gains_range, arg.noise_range,
                                arg.goal_radius_range, Pro_Noise, Obs_Noise)  # keep inside of trained range


        theta_log.append(theta.data.clone())

        if it%5 == 0:
            #print("num_theta:{}, num:{}, loss:{}".format(n, it, np.round(loss.data.item(), 6)))
            #print("num:{},theta diff sum:{}".format(it, 1e6 * (true_theta - theta.data.clone()).sum().data))
            print("num_theta:{}, num:{}, loss:{}, true loss:{},\n true_theta:{}, \n converged_theta:{}\n".format(n, it,np.round(loss.data.item(), 6),np.round(true_loss.data.item(), 6),true_theta.data, theta.data))

        if it%50 == 0 and it >0:
            plt.plot(loss_log)
            plt.title("it:{}".format(it))
            plt.savefig('../firefly-inverse-data/data/'+filename +str(n)+'_loss.png')

        """
        if it >1000 and it%10==0:
            if np.mean(loss_log_recent) < true_loss:
                break
                
        """


    toc = time.time()
    # print((toc - tic)/60/60, "hours")
    """
    loss, _, _ = getLoss(agent, x_traj, a_traj, theta, env, arg.gains_range, arg.noise_range, arg.PI_STD, arg.NUM_SAMPLES)
    #print("loss:{}".format(loss))

    grads = grad(loss, theta, create_graph=True)[0]
    H = torch.zeros(9,9)
    for i in range(9):
        H[i] = grad(grads[i], theta, retain_graph=True)[0]
    I = H.inverse()
    stderr = torch.sqrt(torch.abs(I).diag())


    stderr_ii = 1/torch.sqrt(torch.abs(H.diag()))
    """



    result = {'true_theta': true_theta,
              'initial_theta': ini_theta,
              'x_traj': x_traj,
              'a_traj': a_traj,
              'theta': theta,
              'theta_log': theta_log,
              'loss_log': loss_log,
              'true_loss': true_loss,
              'filename': filename,
              'num_theta': n,
              'converging_it': it,
              'duration': toc-tic,
              'arguments': arg}
              #'stderr': stderr,
              #'stderr_ii': stderr_ii
              #}
            #'loss_act_log': loss_act_log,
            #'loss_obs_log': loss_obs_log,

    torch.save(result, '../firefly-inverse-data/data/' + filename + str(n)+ str(arg.NUM_thetas) + "EP" + str(arg.NUM_EP) + str(np.around(arg.PI_STD, decimals=2)) + str(arg.NUM_SAMPLES) + "IT" + str(arg.NUM_IT) + str(arg.SEED_NUMBER) +'_single_result_part.pkl')

    return result