def get_optimal(theta): opt = TrajOpt(theta=theta) xi, res, solve_time = opt.optimize() T = 10.0 traj = Trajectory(xi, T) env = Task() state = env.reset() start_time = time.time() curr_time = time.time() - start_time while curr_time < T + 0.5: q_des = traj.get(curr_time) qdot = 10 * (q_des - state["joint_position"][0:7]) next_state, reward, done, info = env.step(qdot) state = next_state curr_time = time.time() - start_time env.close() return xi
def replay_demo(xi): T = 10.0 traj = Trajectory(xi, T) env = Task() state = env.reset() count = 0 max_count = 10001 timesteps = np.linspace(0, T, max_count) xi = [] while count < max_count: curr_time = timesteps[count] if count % 1000 == 0: xi.append(state["joint_position"][0:7].tolist()) count += 1 q_des = traj.get(curr_time) qdot = 10 * (q_des - state["joint_position"][0:7]) next_state, reward, done, info = env.step(qdot) state = next_state env.close() return xi
def main(): theta = float(sys.argv[1]) opt = TrajOpt(theta=theta) xi, res, solve_time = opt.optimize() opt.trajcost(xi) print("it took me this long to solve: ", solve_time) T = 10.0 traj = Trajectory(xi, T) env = Task() state = env.reset() start_time = time.time() curr_time = time.time() - start_time while curr_time < T + 0.5: q_des = traj.get(curr_time) qdot = 10 * (q_des - state["joint_position"][0:7]) next_state, reward, done, info = env.step(qdot) state = next_state curr_time = time.time() - start_time env.close()