def Breshape(self, b, time, theta): # reshape belief for policy pro_gains, pro_noise_ln_vars, obs_gains, obs_noise_ln_vars, goal_radius = theta # unpack the theta x, P = b # unpack the belief px, py, ang, vel, ang_vel = torch.split(x.view(-1), 1) # unpack states r = torch.norm(torch.cat([px, py])).view(-1) # what is r? relative distance to firefly rel_ang = ang - torch.atan2(-py, -px).view(-1) # relative angel rel_ang = range_angle(rel_ang) # resize relative angel into -pi pi range. vecL = vectorLowerCholesky(P) # take the lower triangle state = torch.cat([r, rel_ang, vel, ang_vel, time, vecL, pro_gains.view(-1), pro_noise_ln_vars.view(-1), obs_gains.view(-1), obs_noise_ln_vars.view(-1), torch.ones(1)*goal_radius]) # original #state = torch.cat([r, rel_ang, vel, ang_vel]) #, time, vecL]) #simple return state.view(1, -1)
def Breshape(self, b, time, theta): # reshape belief for policy pro_gains, pro_noise_ln_vars, obs_gains, obs_noise_ln_vars, goal_radius = theta x, P = b px, py, ang, vel, ang_vel = torch.split(x.view(-1), 1) r = torch.norm(torch.cat([px, py])).view(-1) rel_ang = ang - torch.atan2(-py, -px).view(-1) rel_ang = range_angle(rel_ang) vecL = vectorLowerCholesky(P) state = torch.cat([ r, rel_ang, vel, ang_vel, time, vecL, pro_gains.view(-1), pro_noise_ln_vars.view(-1), obs_gains.view(-1), obs_noise_ln_vars.view(-1), torch.ones(1) * goal_radius ]) # original #state = torch.cat([r, rel_ang, vel, ang_vel]) #, time, vecL]) #simple return state.view(1, -1)
#time.sleep(0.1) # delay for 0.005 sec if info['stop']: time.sleep(1) # check time limit TimeEnd = ( t + 1 == arg.EPISODE_LEN ) # if the monkey can't catch the firefly in EPISODE_LEN, reset the game. mask = torch.tensor([1 - float(TimeEnd)]) # mask = 0: episode is over data = np.array([[ tot_t, episode, t, reward, reached_target.item(), action[0][0].item(), action[0][1].item(), torch.norm(x.view(-1)[0:2]).item(), range_angle( x.view(-1)[2] - torch.atan2(-x.view(-1)[1], -x.view(-1)[0]).view(-1)).item(), state[0][0].item(), state[0][1].item(), state[0][2].item(), state[0][3].item(), state[0][5].item(), state[0][6].item(), state[0][7].item(), state[0][8].item(), state[0][9].item(), state[0][10].item(), state[0][11].item(), state[0][12].item(), state[0][13].item(), state[0][14].item(), state[0][15].item(), state[0][16].item(), state[0][17].item(), state[0][18].item(), state[0][19].item(), pro_gains[0].item(), pro_gains[1].item(), pro_noise_stds[0].item(), pro_noise_stds[1].item(), obs_gains[0].item(), obs_gains[1].item(), obs_noise_stds[0].item(), obs_noise_stds[1].item(), goal_radius.item(), arg.WORLD_SIZE, DISCOUNT_FACTOR ]]) df1 = pd.DataFrame(data, columns=COLUMNS)