def plot_trajectories(): df = pd.read_csv("warmstart.csv") data = df.values row = data[0, :] init_xs = [] init_us = [] starting_state = row[0:3] init_xs.append(np.array(starting_state)) full_state = np.array(row[3:]) full_state = full_state.reshape(30, 6) state = full_state[:, 0:3] control = full_state[:, 3:5] assert state.shape[0] == control.shape[0] assert state.shape[1] == 3 assert control.shape[1] == 2 for state in state: init_xs.append(np.array(state)) for control in control: init_us.append(np.array(control)) #...... WARMSTARTING CROCODDYL model = crocoddyl.ActionModelUnicycle() model.costWeights = np.matrix([1, 0.3]).T problem = crocoddyl.ShootingProblem( np.matrix(starting_state).T, [model] * 30, model) ddp = crocoddyl.SolverDDP(problem) ddp.solve(init_xs, [], 1000) ddp_xs = np.array(ddp.xs) #..... COLDSTARTING CROCODDYL model2 = crocoddyl.ActionModelUnicycle() model2.costWeights = np.matrix([1, 0.3]).T problem2 = crocoddyl.ShootingProblem( np.matrix(starting_state).T, [model2] * 30, model2) ddp2 = crocoddyl.SolverDDP(problem2) ddp2.solve([], [], 1000) ddp2_xs = np.array(ddp2.xs) print("PLotting 2 trajectories for comparision \n") print( "The value functions, iterations for warmstarted and coldstarted trajectories are as follows ", "Cost :", ddp.cost, ",", ddp2.cost, "\n", "iterations ", ddp.iter, ",", ddp2.iter) fig = plt.figure(figsize=(20, 10)) ax = fig.add_subplot(111) ax.set_xlim(xmin=-2.1, xmax=2.1) ax.set_ylim(ymin=-2.1, ymax=2.1) ax.plot(ddp_xs[:, 0], ddp_xs[:, 1], c='red', label='Warmstarted crocoddyl.') ax.scatter(ddp2_xs[:, 0], ddp2_xs[:, 1], c='green', label='Coldstarted crocoddyl') plt.legend() plt.show()
def warm(self): net = train_net() neuralNet = net.generate_trained_net() """ state_weight: float = 1., # can use random.uniform(1, 3) control_weight: float = 0.3, # can use random.uniform(0,1) nodes: int = 20, """ for _ in range(self.__n_trajectories): initial_config = [ random.uniform(-2.1, 2.), random.uniform(-2.1, 2.), random.uniform(0, 1) ] model = crocoddyl.ActionModelUnicycle() model2 = crocoddyl.ActionModelUnicycle() model.costWeights = np.matrix([1., .3]).T model2.costWeights = np.matrix([1., .3]).T problem = crocoddyl.ShootingProblem( np.matrix(initial_config).T, [model] * 20, model) problem2 = crocoddyl.ShootingProblem( np.matrix(initial_config).T, [model2] * 20, model2) ddp = crocoddyl.SolverDDP(problem) ddp2 = crocoddyl.SolverDDP(problem2) x_data = np.matrix(initial_config).reshape(1, 3) prediction = neuralNet.predict(x_data).reshape(20, 5) xs = np.matrix(prediction[:, 0:3]) us = np.matrix(prediction[:, 3:5]) ddp_xs = [] ddp_xs.append(np.matrix(initial_config).reshape(1, 3).T) for _ in range(xs.shape[0]): ddp_xs.append((np.matrix(xs[_]).T)) ddp_us = [] for _ in range(us.shape[0]): ddp_us.append(np.matrix(us[_]).T) ddp.solve([], ddp_us) ddp2.solve() print(f" With warmstart, without warmstart ") print(ddp.iter, " ", ddp2.iter)
def solveDdp(pos_i, pos_f, graph_disp): # print("--- Ddp ---") final_state = [(pos_f[0] - pos_i[0]), (pos_f[1] - pos_i[1]), pos_f[2]] model.finalState = np.matrix( [final_state[0], final_state[1], final_state[2]]).T terminal_model.finalState = np.matrix( [final_state[0], final_state[1], final_state[2]]).T init_state = np.matrix([0, 0, pos_i[2], 0, 0, 0]).T distance = sqrt((pos_f[0] - pos_i[0])**2 + (pos_f[1] - pos_i[1])**2) T_guess = int(distance * 100 / model.alpha * 2 / 3) optimal = minimize(optimizeT, T_guess, args=(init_state,distance*100/model.alpha),\ method='Nelder-Mead',options = {'xtol': 0.01,'ftol': 0.001}) T_opt = int(optimal.x) # print("----",T_guess,T_opt) problem = crocoddyl.ShootingProblem(init_state, [model] * T_opt, terminal_model) ddp = crocoddyl.SolverDDP(problem) done = ddp.solve() title = "DdpResult_from_"+str(pos_i[0])+","+str(pos_i[1])+","+\ str(floor(pos_i[2]*100)/100)+"_to_0,0,"+str(floor(pos_f[2]*100)/100) x, y, theta = translate(ddp.xs, pos_i) if graph_disp: plotDdpResult(x, y, theta) path = "data/DdpResult/" + title + "_pos.dat" sol = [x, y, theta] np.savetxt(path, np.transpose(sol))
def _solveProblem(self, initial_config, logger: bool = False): """Solve one unicycle problem""" if not isinstance(initial_config, np.ndarray): initial_config = np.array(initial_config).reshape(-1, 1) model = crocoddyl.ActionModelUnicycle() model.costWeights = np.array([*self.weights]).T if self.terminal_model is None: problem = crocoddyl.ShootingProblem(initial_config, [model] * self.horizon, model) else: problem = crocoddyl.ShootingProblem(initial_config, [model] * self.horizon, self.terminal_model) ddp = crocoddyl.SolverDDP(problem) if logger: log = crocoddyl.CallbackLogger() ddp.setCallbacks([log]) ddp.th_stop = self.precision ddp.solve([], [], self.maxiters) if logger: #print("\n Returning logs and ddp") return ddp, log else: return ddp
def createProblem(self): for i in range(int(self.T_mpc / self.dt)): model = ActionModel(12, 12, self.mu) model.createModel() # Weight on the state model.stateWeight = np.array( [1, 1, 150, 35, 30, 8, 20, 20, 15, 4, 4, 8]) # Weight on the friction cone penalisation model.frictionWeight = 10 # Add model to the list of model self.ListAction.append(model) # Terminal Model self.terminalModel = ActionModel() self.terminalModel.createModel() # Weight on the friction cone penalisation self.terminalModel.stateWeight = np.array( [1, 1, 150, 35, 30, 8, 20, 20, 15, 4, 4, 8]) # No weight on the command for the terminal node self.terminalModel.frictionWeight = 0.0 self.problem = crocoddyl.ShootingProblem(np.zeros(12), self.ListAction, self.terminalModel) self.ddp = crocoddyl.SolverDDP(self.problem)
def get_trajectories(ntraj: int = 2): """ In the cartpole system, each state is 4 dimensional, while the control is 1 dimensional. Therefore, the each row y_data should be 100 * 5 -> 500. So shape y_data : n_trjac X 500 """ initial_state = [] trajectory = [] for _ in range(ntraj): model = cartpole_model() x0 = [ random.uniform(0, 1), random.uniform(-3.14, 3.14), random.uniform(0., 0.99), random.uniform(0., .99) ] T = 100 problem = crocoddyl.ShootingProblem( np.matrix(x0).T, [model] * T, model) ddp = crocoddyl.SolverDDP(problem) # Solving this problem done = ddp.solve([], [], 100) del ddp.xs[0] y = [np.append(np.array(i).T, j) for i, j in zip(ddp.xs, ddp.us)] initial_state.append(x0) trajectory.append(np.array(y).flatten()) #print(_) initial_state = np.asarray(initial_state) trajectory = np.asarray(trajectory) return initial_state, trajectory
def runBenchmark(model): robot_model = ROBOT.model q0 = np.matrix([0.173046, 1., -0.52366, 0., 0., 0.1, -0.005]).T x0 = np.vstack([q0, np.zeros((robot_model.nv, 1))]) # Note that we need to include a cost model (i.e. set of cost functions) in # order to fully define the action model for our optimal control problem. # For this particular example, we formulate three running-cost functions: # goal-tracking cost, state and control regularization; and one terminal-cost: # goal cost. First, let's create the common cost functions. state = crocoddyl.StateMultibody(robot_model) Mref = crocoddyl.FramePlacement( robot_model.getFrameId("gripper_left_joint"), pinocchio.SE3(np.eye(3), np.matrix([[.0], [.0], [.4]]))) goalTrackingCost = crocoddyl.CostModelFramePlacement(state, Mref) xRegCost = crocoddyl.CostModelState(state) uRegCost = crocoddyl.CostModelControl(state) # Create a cost model per the running and terminal action model. runningCostModel = crocoddyl.CostModelSum(state) terminalCostModel = crocoddyl.CostModelSum(state) # Then let's added the running and terminal cost functions runningCostModel.addCost("gripperPose", goalTrackingCost, 1e-3) runningCostModel.addCost("xReg", xRegCost, 1e-7) runningCostModel.addCost("uReg", uRegCost, 1e-7) terminalCostModel.addCost("gripperPose", goalTrackingCost, 1) # Next, we need to create an action model for running and terminal knots. The # forward dynamics (computed using ABA) are implemented # inside DifferentialActionModelFullyActuated. runningModel = crocoddyl.IntegratedActionModelEuler( model(state, runningCostModel), 1e-3) runningModel.differential.armature = np.matrix( [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.]).T terminalModel = crocoddyl.IntegratedActionModelEuler( model(state, terminalCostModel), 1e-3) terminalModel.differential.armature = np.matrix( [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.]).T # For this optimal control problem, we define 100 knots (or running action # models) plus a terminal knot problem = crocoddyl.ShootingProblem(x0, [runningModel] * N, terminalModel) # Creating the DDP solver for this OC problem, defining a logger ddp = crocoddyl.SolverDDP(problem) duration = [] for i in range(T): c_start = time.time() ddp.solve([], [], MAXITER) c_end = time.time() duration.append(1e3 * (c_end - c_start)) avrg_duration = sum(duration) / len(duration) min_duration = min(duration) max_duration = max(duration) return avrg_duration, min_duration, max_duration
def stateData(nTraj: int = 10000, modelWeight1: float = 1, modelWeight2: float = 0.4, timeHorizon=30, initialTheta: float = 0., fddp: bool = False, varyingInitialTheta: bool = False, saveData: bool = False): trajectory = [] model = crocoddyl.ActionModelUnicycle() for _ in range(nTraj): if varyingInitialTheta: initial_config = [ random.uniform(-1.99, 1.99), random.uniform(-1.99, 1.99), random.uniform(0, 1) ] else: initial_config = [ random.uniform(-1.99, 1.99), random.uniform(-1.99, 1.99), initialTheta ] model.costWeights = np.matrix([modelWeight1, modelWeight2]).T problem = crocoddyl.ShootingProblem( np.matrix(initial_config).T, [model] * timeHorizon, model) if fddp: ddp = crocoddyl.SolverFDDP(problem) else: ddp = crocoddyl.SolverDDP(problem) # Will need to include a assertion check for done in more complicated examples ddp.solve([], [], 1000) if ddp.iter < 1000: # Store trajectory(ie x, y, theta) a = np.array(ddp.xs) # trajectory -> (x, y, theta)..(x, y, theta).. a = np.array(a.flatten()) # append cost at the end of each trajectory b = np.append(a, sum(d.cost for d in ddp.datas())) trajectory.append(b) trajectory_ = np.array(trajectory) if saveData: df = pd.DataFrame(trajectory_[0:, 0:]) df.to_csv("trajectories.csv") print(trajectory_.shape) print( "# Dataset 2: Shape (nTraj, 3 X TimeWindow + 1)..columns are recurring ie (x,y, theta),(x,y, theta).... \n\ # The first two columns will form train. The last column is the cost associated with each trajectory" ) return trajectory_
def cartpole_data(ntraj: int = 1): cartpoleDAM = DifferentialActionModelCartpole() cartpoleData = cartpoleDAM.createData() x = cartpoleDAM.state.rand() u = np.zeros(1) cartpoleDAM.calc(cartpoleData, x, u) cartpoleND = crocoddyl.DifferentialActionModelNumDiff(cartpoleDAM, True) timeStep = 5e-2 cartpoleIAM = crocoddyl.IntegratedActionModelEuler(cartpoleND, timeStep) T = 50 data = [] for _ in range(ntraj): x0 = [ random.uniform(0, 1), random.uniform(-3.14, 3.14), random.uniform(0., 1), 0. ] problem = crocoddyl.ShootingProblem( np.array(x0).T, [cartpoleIAM] * T, cartpoleIAM) ddp = crocoddyl.SolverDDP(problem) ddp.solve([], [], 1000) if ddp.iter < 1000: ddp_xs = np.array(ddp.xs) # Append value function to initial state x0.append(ddp.cost) # get us, xs for T timesteps us = np.array(ddp.us) xs = ddp_xs[1:, :] cost = [] for d in ddp.datas(): cost.append(d.cost) # Remove the first value cost.pop(0) cost = np.array(cost).reshape(-1, 1) info = np.hstack((xs, us, cost)) info = info.ravel() state = np.concatenate((x0, info)) data.append(state) data = np.array(data) print( "Shape x_train -> 0 : 4, value function = index 4, then (x, y, z, theta, control, cost)..repeated" ) print("T -> 50, so T x 6 = 300, + 5 ") #np.savetxt("foo.csv", data, delimiter=",") return data
def solver(starting_condition, T=30, precision=1e-9, maxiters=1000): """Solve one crocoddyl problem""" model = crocoddyl.ActionModelUnicycle() model.costWeights = np.array([1., 1.]).T problem = crocoddyl.ShootingProblem(starting_condition, [model] * T, model) ddp = crocoddyl.SolverDDP(problem) ddp.th_stop = precision ddp.solve([], [], maxiters)
def solver_ddp(self, xyz): """ Returns the ddp """ model = crocoddyl.ActionModelUnicycle() T = 30 model.costWeights = np.matrix([1, 1]).T problem = crocoddyl.ShootingProblem( np.array(xyz).T, [model] * T, model) ddp = crocoddyl.SolverDDP(problem) ddp.solve([], [], 1000) return ddp
def optimizeT(T_min,T_max,X0): T_opt = T_min cost_min = - 1 for T in range (T_min,T_max,5): problem = crocoddyl.ShootingProblem(X0, [ model ] * T, model) ddp = crocoddyl.SolverDDP(problem) done = ddp.solve() cost = costFunction(model.costWeights,ddp.xs,ddp.us,model.finalState) if (cost_min < 0 or cost_min > abs(cost) and ddp.iter < 80): cost_min = abs(cost) T_opt = T return T_opt
def getTrainingData(nTraj: int = 10000, thetaVal: bool = False, fddp: bool = False): model = crocoddyl.ActionModelUnicycle() trajectory = [] for _ in range(nTraj): if thetaVal: initial_config = [ random.uniform(-1.99, 1.99), random.uniform(-1.99, 1.99), random.uniform(0., 1.) ] else: initial_config = [ random.uniform(-1.99, 1.99), random.uniform(-1.99, 1.99), 0.0 ] model.costWeights = np.matrix([1, 0.3]).T problem = crocoddyl.ShootingProblem( np.matrix(initial_config).T, [model] * 30, model) if fddp: ddp = crocoddyl.SolverFDDP(problem) else: ddp = crocoddyl.SolverDDP(problem) ddp.solve([], [], 1000) if ddp.iter < 1000: state = [] # Attach x, y, theta. this will form the columns in x_training... index 0, 1, 2 state.extend(i for i in ddp.xs[0]) # Attach control: linear_velocity, angular_velocty.....index 3, 4 state.extend(i for i in ddp.us[0]) # Attach value function: cost....index 5 state.append(sum(d.cost for d in ddp.datas())) trajectory_control = np.hstack( (np.array(ddp.xs[1:]), np.array(ddp.us))) state.extend(i for i in trajectory_control.flatten() ) #..............index 6 to -1.....30 X 5 trajectory.append(state) data = np.array(trajectory) df = pd.DataFrame(data) #.................................... # x_train = data[:,0:3] x,y,z # y_train = data[:,3:] lv, av, cost, (x, y, theta, control1, control2) #.................................... return data
def solve_problem(terminal_model=None, initial_configuration=None, horizon: int = 100, precision: float = 1e-9, maxiters: int = 1000, state_weight: float = 1., control_weight: float = 1.): """ Solve the problem for a given initial_position. @params: 1: terminal_model = Terminal model with neural network inside it, for the crocoddyl problem. If none, then Crocoddyl Action Model will be used as terminal model. 2: initial_configuration = initial position for the unicycle, either a list or a numpy array or a tensor. 3: horizon = Time horizon for the unicycle. Defaults to 100 4: stop = ddp.th_stop. Defaults to 1e-9 5: maxiters = maximum iterations allowed for crocoddyl.Defaults to 1000 5: state_weight = defaults to 1. 6: control_weight = defaults to 1. @returns: ddp """ if isinstance(initial_configuration, list): initial_configuration = np.array(initial_configuration) elif isinstance(initial_configuration, torch.Tensor): initial_configuration = initial_configuration.numpy() model = crocoddyl.ActionModelUnicycle() model.costWeights = np.matrix([state_weight, control_weight]).T if terminal_model is None: problem = crocoddyl.ShootingProblem(initial_configuration.T, [model] * horizon, model) else: problem = crocoddyl.ShootingProblem(initial_configuration.T, [model] * horizon, terminal_model) ddp = crocoddyl.SolverDDP(problem) ddp.th_stop = precision ddp.solve([], [], maxiters) return ddp
def solve_crocoddyl(xyz): """ xyz should either be a list or an array """ model = crocoddyl.ActionModelUnicycle() T = 30 model.costWeights = np.matrix([1, 1]).T problem = crocoddyl.ShootingProblem(m2a(xyz).T, [model] * T, model) ddp = crocoddyl.SolverDDP(problem) ddp.solve([], [], 1000) return ddp
def create_List_model(self): """Create the List model using ActionQuadrupedModel() The same model cannot be used [model]*(T_mpc/dt) because the dynamic changes for each nodes """ j = 0 k_cum = 0 # Iterate over all phases of the gait # The first column of xref correspond to the current state while (self.gait[j, 0] != 0): for i in range(k_cum, k_cum + np.int(self.gait[j, 0])): model = quadruped_walkgen.ActionModelQuadrupedAugmented() # Update intern parameters self.update_model_augmented(model) # Add model to the list of model self.ListAction.append(model) if np.sum(self.gait[j + 1, 1:]) == 4: # No optimisation on the first line model = quadruped_walkgen.ActionModelQuadrupedStep() # Update intern parameters self.update_model_step(model) # Add model to the list of model self.ListAction.append(model) k_cum += np.int(self.gait[j, 0]) j += 1 # Model parameters of terminal node self.terminalModel = quadruped_walkgen.ActionModelQuadrupedAugmented() self.update_model_augmented(self.terminalModel) # Weights vectors of terminal node self.terminalModel.forceWeights = np.zeros(12) self.terminalModel.frictionWeights = 0. self.terminalModel.shoulderWeights = np.full(8, 0.0) self.terminalModel.lastPositionWeights = np.full(8, 0.0) # Shooting problem self.problem = crocoddyl.ShootingProblem(np.zeros(20), self.ListAction, self.terminalModel) # DDP Solver self.ddp = crocoddyl.SolverDDP(self.problem) return 0
def generate_trajectories_random_xyz(self, save: bool = False): starting_configurations = [] optimal_trajectories = [] controls = [] for _ in range(self.n_trajectories): initial_config = [ random.uniform(-2.1, 2.1), random.uniform(-2.1, 2.1), random.uniform(0, 1) ] model = crocoddyl.ActionModelUnicycle() model.costWeights = np.matrix( [self.state_weight, self.control_weight]).T problem = crocoddyl.ShootingProblem( np.matrix(initial_config).T, [model] * self.knots, model) ddp = crocoddyl.SolverDDP(problem) ddp.solve([], [], self.maxiter) if ddp.isFeasible: state = np.array(ddp.xs) control = np.array(ddp.us) starting_configurations.append(state[0, :]) optimal_trajectories.append(state[1:, :]) controls.append(control) if save: f = open( './data/variable_initial_state/starting_configurations.pkl', 'wb') cPickle.dump(starting_configurations, f, protocol=cPickle.HIGHEST_PROTOCOL) g = open("./data/variable_initial_state/optimal_trajectories.pkl", "wb") cPickle.dump(optimal_trajectories, g, protocol=cPickle.HIGHEST_PROTOCOL) h = open("./data/variable_initial_state/control.pkl", "wb") cPickle.dump(controls, h, protocol=cPickle.HIGHEST_PROTOCOL) f.close(), g.close(), h.close() else: return starting_configurations, optimal_trajectories, controls
def _generate_trajectories(self): """ This could be done better with pool. But since we are generating a maximum of 10K trajectories, there' no need for pool """ starting_configurations = [] optimal_trajectories = [] feasible_trajectories = 0 for _ in range(self.__n_trajectories): initial_config = np.matrix([ random.uniform(-2.1, 2.), random.uniform(-2.1, 2.), random.uniform(0, 1) ]).T model = crocoddyl.ActionModelUnicycle() model.costWeights = np.matrix( [self.__state_weight, self.__control_weight]).T problem = crocoddyl.ShootingProblem(initial_config, [model] * self.__nodes, model) ddp = crocoddyl.SolverDDP(problem) ddp.solve() if ddp.isFeasible: ddp_xs = np.squeeze(np.asarray(ddp.xs)) ddp_us = np.squeeze(np.asarray(ddp.us)) feasible_trajectories += 1 x = ddp_xs[1:, 0] y = ddp_xs[1:, 1] theta = ddp_xs[1:, 2] velocity = ddp_us[:, 0] torque = ddp_us[:, 1] optimal_trajectory = np.hstack((x, y, theta, velocity, torque)) starting_configurations.append( np.squeeze(np.asarray(initial_config))) optimal_trajectories.append(optimal_trajectory) starting_configurations = np.asarray(starting_configurations) optimal_trajectories = np.asarray(optimal_trajectories) if self.save_trajectories: f = open('../../data/x_data.pkl', 'wb') cPickle.dump(starting_configurations, f, protocol=cPickle.HIGHEST_PROTOCOL) g = open("../../data/y_data.pkl", "wb") cPickle.dump(optimal_trajectories, g, protocol=cPickle.HIGHEST_PROTOCOL) f.close(), g.close() return starting_configurations, optimal_trajectories, feasible_trajectories
def runDDPSolveBenchmark(xs, us, problem): ddp = crocoddyl.SolverDDP(problem) duration = [] for _ in range(T): c_start = time.time() ddp.solve(xs, us, MAXITER, False, 0.1) c_end = time.time() duration.append(1e3 * (c_end - c_start)) avrg_duration = sum(duration) / len(duration) min_duration = min(duration) max_duration = max(duration) return avrg_duration, min_duration, max_duration
def runDDPSolveBenchmark(xs, us, problem): ddp = crocoddyl.SolverDDP(problem) if CALLBACKS: ddp.setCallbacks([crocoddyl.CallbackVerbose()]) duration = [] for i in range(T): c_start = time.time() ddp.solve(xs, us, MAXITER) c_end = time.time() duration.append(1e3 * (c_end - c_start)) avrg_duration = sum(duration) / len(duration) min_duration = min(duration) max_duration = max(duration) return avrg_duration, min_duration, max_duration
def solveProblem(self, starting_config): """Solve unicycle problems and yield ddp """ model = crocoddyl.ActionModelUnicycle() if self.terminal_model is not None: terminal_unicycle = self.terminal_model else: terminal_unicycle = crocoddyl.ActionModelUnicycle() assert terminal_unicycle is not None model.costWeights = np.array([*self.weights]).T problem = crocoddyl.ShootingProblem(starting_config, [model] * self.horizon, terminal_unicycle) ddp = crocoddyl.SolverDDP(problem) ddp.th_stop = self.precision ddp.solve([], [], self.maxiters) del model, terminal_unicycle, problem return ddp
def generate_data(ntrajectories: int = 100000, maxiter: int = 100, knots: int = 20, state_weight: float = 1., # can use random.uniform(1, 3) control_weight: float = 0.3, # can use random.uniform(0,1) ): """ @ Arguments: ntrajectories : number of trajectories to generate maxiter : number of iterations crocoddyl is supposed to make for each trajectory knots : Knots, e.g T = 20, 30 @ Returns: initial_states : matrix of starting points, shape (ntrajectories, 3)....x_data optimal trajectories : matrix of trajectories, shape (ntrajectores, knots).....y_data Each row of this matrix is: x1, y1, theta1, v1, w1, x2, y2, theta2, v2, w2, .....so on """ starting_configurations = [] optimal_trajectories = [] for _ in range(ntrajectories): initial_config = [random.uniform(-2.1, 2.), random.uniform(-2.1, 2.), random.uniform(0, 1)] model = crocoddyl.ActionModelUnicycle() model.costWeights = np.matrix([state_weight, control_weight]).T problem = crocoddyl.ShootingProblem(np.matrix(initial_config).T, [ model ] * knots, model) ddp = crocoddyl.SolverDDP(problem) ddp.solve([], [], maxiter) if ddp.isFeasible: state = np.array(ddp.xs) control = np.array(ddp.us) optimal_trajectory = np.hstack((state[1:,:], control)) optimal_trajectory = np.ravel(optimal_trajectory) optimal_trajectories.append(optimal_trajectory) starting_configurations.append(initial_config) optimal_trajectories = np.array(optimal_trajectories) starting_configurations = np.array(starting_configurations) return starting_configurations, optimal_trajectories
def optimizeT(T, x0, T_guess): T = int(T[0]) if T > 0 and T < 2 * T_guess: problem = crocoddyl.ShootingProblem(x0, [model] * T, terminal_model) ddp = crocoddyl.SolverDDP(problem) done = ddp.solve() # print(T,done,ddp.iter) if done: cost = costFunction(model.costWeights, terminal_model.costWeights, ddp.xs, ddp.us, model.finalState) / T # print(cost) elif ddp.iter < 50: cost = costFunction(model.costWeights, terminal_model.costWeights, ddp.xs, ddp.us, model.finalState) / T * 10 else: cost = 1e4 else: cost = 1e8 return cost
def get_trajectories(ntraj: int = 2): """ In the cartpole system, each state is 4 dimensional, while the control is 1 dimensional. Therefore, the each row y_data should be 100 * 5 -> 500. So shape y_data : n_trjac X 500 """ initial_state = [] trajectory = [] for _ in range(ntraj): model = cartpole_model() x0 = [ random.uniform(0, 1), random.uniform(-3.14, 3.14), random.uniform(0., 0.99), random.uniform(0., .99) ] T = 100 problem = crocoddyl.ShootingProblem( np.matrix(x0).T, [model] * T, model) ddp = crocoddyl.SolverDDP(problem) # Solving this problem done = ddp.solve([], [], 100) del ddp.xs[0] y = [np.append(np.array(i).T, j) for i, j in zip(ddp.xs, ddp.us)] initial_state.append(x0) trajectory.append(np.array(y).flatten()) #print(_) initial_state = np.asarray(initial_state) trajectory = np.asarray(trajectory) f = open('x_data.pkl', 'wb') cPickle.dump(initial_state, f, protocol=cPickle.HIGHEST_PROTOCOL) g = open("y_data.pkl", "wb") cPickle.dump(trajectory, g, protocol=cPickle.HIGHEST_PROTOCOL) f.close(), g.close() print("Generated data") print(f"shape of x_train {initial_state.shape}, y_data {trajectory.shape}")
def runBenchmark(model): x0 = np.matrix(np.zeros(NX)).T runningModels = [model(NX, NU)] * N terminalModel = model(NX, NU) xs = [x0] * (N + 1) us = [np.matrix(np.zeros(NU)).T] * N problem = crocoddyl.ShootingProblem(x0, runningModels, terminalModel) ddp = crocoddyl.SolverDDP(problem) duration = [] for i in range(T): c_start = time.time() ddp.solve(xs, us, MAXITER) c_end = time.time() duration.append(1e3 * (c_end - c_start)) avrg_duration = sum(duration) / len(duration) min_duration = min(duration) max_duration = max(duration) return avrg_duration, min_duration, max_duration
def data(nTraj: int = 1000, fddp: bool = False): model = crocoddyl.ActionModelUnicycle() x_data = [] y_data = [] for _ in range(nTraj): if thetaVal: initial_config = [ random.uniform(-1.99, 1.99), random.uniform(-1.99, 1.99), random.uniform(0., 1.) ] else: initial_config = [ random.uniform(-1.99, 1.99), random.uniform(-1.99, 1.99), 0.0 ] model.costWeights = np.matrix([1, 0.3]).T problem = crocoddyl.ShootingProblem( np.matrix(initial_config).T, [model] * 30, model) if fddp: ddp = crocoddyl.SolverFDDP(problem) else: ddp = crocoddyl.SolverDDP(problem) ddp.solve([], [], 1000) if ddp.iter < 1000: xs = np.array(ddp.xs) cost = [] for d in ddp.data: cost.append(d.cost) cost = np.array(cost).reshape(31, 1) state = np.hstack((cs, cost)) y_data.append(state.ravel()) x_data.append(initial_config) x_data = np.array(x_data) y_data = np.array(y_data)
def writeOptControl(X0,xf,yf,thetaf,T_opt): model.finalState = np.matrix([xf,yf,thetaf]).T problem = crocoddyl.ShootingProblem(X0, [ model ] * T_opt, model) ddp = crocoddyl.SolverDDP(problem) done = ddp.solve() title = "OptControl_from_"+str(-xf)+","+str(-yf)+","+str(floor((X0[2])*100)/100)+"_to_0,0,"+str(floor(thetaf*100)/100) sol = np.transpose(ddp.xs)[0] print(title) plotOptControlTranslate(ddp.xs, [xf,yf,thetaf]) sol[0] = np.array(sol[0])-xf sol[1] = np.array(sol[1])-yf plt.title(title) plt.plot(sol[0],sol[1]) path = "../data/OptControl/"+title+"_pos.dat" np.savetxt(path,np.transpose(sol)) vsol = np.zeros((len(sol[0])-1,3)) for i in range (0,len(sol[0])-1): vsol[i][0]=(sol[0][i+1]-sol[0][i])/0.01 vsol[i][1]=(sol[1][i+1]-sol[1][i])/0.01 vsol[i][2]=(sol[2][i+1]-sol[2][i])/0.01 vpath = "../data/OptControl/"+title+"_vel.dat" np.savetxt(vpath,vsol)
def cartpole_data(ntraj: int = 1000): cartpoleDAM = DifferentialActionModelCartpole() cartpoleData = cartpoleDAM.createData() x = cartpoleDAM.state.rand() u = np.zeros(1) cartpoleDAM.calc(cartpoleData, x, u) cartpoleND = crocoddyl.DifferentialActionModelNumDiff(cartpoleDAM, True) timeStep = 5e-2 cartpoleIAM = crocoddyl.IntegratedActionModelEuler(cartpoleND, timeStep) T = 50 data = [] for _ in range(ntraj): cost = [] x0 = np.array([ random.uniform(0, 1), random.uniform(-3.14, 3.14), random.uniform(0., 1), 0. ]) problem = crocoddyl.ShootingProblem(x0.T, [cartpoleIAM] * T, cartpoleIAM) ddp = crocoddyl.SolverDDP(problem) ddp.solve([], [], 1000) if ddp.iter < 1000: xs = np.array(ddp.xs) us = np.array(ddp.us) for d in ddp.datas(): cost.append(d.cost) data.append(xs.ravel()) data = np.array(data) return data
def testData(ntraj: int = 50): """ Data is in the form of (x, y, theta, cost) """ model = crocoddyl.ActionModelUnicycle() cost_trajectory = [] for _ in range(ntraj): x, y = point(0, 0, 2.1) initial_config = [x, y, 0] model.costWeights = np.matrix([1, 0.3]).T problem = crocoddyl.ShootingProblem( np.matrix(initial_config).T, [model] * 30, model) ddp = crocoddyl.SolverDDP(problem) ddp.solve([], [], 1000) if ddp.iter < 1000: a_ = np.array(ddp.xs) a = np.array(a_) b = a.flatten() c = np.append(sum(d.cost for d in ddp.datas()), a) cost_trajectory.append(c) return np.array(cost_trajectory)
[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.]).T terminalModel = crocoddyl.IntegratedActionModelEuler( crocoddyl.DifferentialActionModelFreeFwdDynamics(state, actuationModel, terminalCostModel), 0.) terminalModel.differential.armature = np.matrix( [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.]).T # For this optimal control problem, we define 250 knots (or running action # models) plus a terminal knot T = 250 q0 = np.matrix([0.173046, 1., -0.52366, 0., 0., 0.1, -0.005]).T x0 = np.concatenate([q0, pinocchio.utils.zero(robot_model.nv)]) problem = crocoddyl.ShootingProblem(x0, [runningModel] * T, terminalModel) # Creating the DDP solver for this OC problem, defining a logger ddp = crocoddyl.SolverDDP(problem) cameraTF = [2., 2.68, 0.54, 0.2, 0.62, 0.72, 0.22] if WITHDISPLAY and WITHPLOT: display = crocoddyl.GepettoDisplay(talos_arm, 4, 4, cameraTF) ddp.setCallbacks([ crocoddyl.CallbackLogger(), crocoddyl.CallbackVerbose(), crocoddyl.CallbackDisplay(display) ]) elif WITHDISPLAY: display = crocoddyl.GepettoDisplay(talos_arm, 4, 4, cameraTF) ddp.setCallbacks( [crocoddyl.CallbackVerbose(), crocoddyl.CallbackDisplay(display)]) elif WITHPLOT: ddp.setCallbacks([