def training(self): print( f"Starting {self.runs} Irepa runs for Feedforward Network.......\n" ) # Instantiate a neural net net = feedForwardNet(fc1_dims=self.fc1_dims, fc2_dims=self.fc2_dims, fc3_dims=self.fc3_dims) # irepa1: xtrain, ytrain = self._training_data(terminal_model=None) print(f"Starting 0th run .......\n") net = self._training(net, xtrain, ytrain) torch.save(net, "./nets/net1.pth") del xtrain, ytrain for i in tqdm(range(self.runs - 1)): # Generate training data with neural network inside crocoddyl print(f" Run {i + 1} .......\n") robot = example_robot_data.loadDoublePendulum() robot_model = robot.model terminal_model = terminalPendulum(net, robot_model) xtrain, ytrain = self._training_data(terminal_model=terminal_model) net = self._training(net=net, xtrain=xtrain, ytrain=ytrain) torch.save(net, './nets/net' + str(i + 2) + '.pth') del terminal_model, xtrain, ytrain print("Done........")
def cartpole(): # Loading the double pendulum model robot = example_robot_data.loadDoublePendulum() robot_model = robot.model state = crocoddyl.StateMultibody(robot_model) actModel = ActuationModelDoublePendulum(state, actLink=1) weights = np.array([1, 1, 1, 1] + [0.1] * 2) runningCostModel = crocoddyl.CostModelSum(state, actModel.nu) terminalCostModel = crocoddyl.CostModelSum(state, actModel.nu) xRegCost = crocoddyl.CostModelState( state, crocoddyl.ActivationModelQuad(state.ndx), state.zero(), actModel.nu) uRegCost = crocoddyl.CostModelControl(state, crocoddyl.ActivationModelQuad(1), actModel.nu) xPendCost = CostModelDoublePendulum( state, crocoddyl.ActivationModelWeightedQuad(np.matrix(weights).T), actModel.nu) dt = 1e-2 runningCostModel.addCost("uReg", uRegCost, 1e-4 / dt) runningCostModel.addCost("xGoal", xPendCost, 1e-5 / dt) terminalCostModel.addCost("xGoal", xPendCost, 1e4) runningModel = crocoddyl.IntegratedActionModelEuler( crocoddyl.DifferentialActionModelFreeFwdDynamics( state, actModel, runningCostModel), dt) terminalModel = crocoddyl.IntegratedActionModelEuler( crocoddyl.DifferentialActionModelFreeFwdDynamics( state, actModel, terminalCostModel), dt) return runningModel, terminalModel
def solver(starting_condition, T=30, precision=1e-9, maxiters=1000): """Solve one pendulum problem""" robot = example_robot_data.loadDoublePendulum() robot_model = robot.model state = crocoddyl.StateMultibody(robot_model) actModel = ActuationModelDoublePendulum(state, actLink=1) weights = np.array([1.5, 1.5, 1, 1] + [0.1] * 2) runningCostModel = crocoddyl.CostModelSum(state, actModel.nu) dt = 1e-2 xRegCost = crocoddyl.CostModelState( state, crocoddyl.ActivationModelQuad(state.ndx), state.zero(), actModel.nu) uRegCost = crocoddyl.CostModelControl(state, crocoddyl.ActivationModelQuad(1), actModel.nu) xPendCost = CostModelDoublePendulum( state, crocoddyl.ActivationModelWeightedQuad(weights), actModel.nu) runningCostModel.addCost("uReg", uRegCost, 1e-4 / dt) runningCostModel.addCost("xGoal", xPendCost, 1e-5 / dt) runningModel = crocoddyl.IntegratedActionModelEuler( crocoddyl.DifferentialActionModelFreeFwdDynamics( state, actModel, runningCostModel), dt) terminalCostModel = crocoddyl.CostModelSum(state, actModel.nu) terminalCostModel.addCost("xGoal", xPendCost, 1e4) terminal_model = crocoddyl.IntegratedActionModelEuler( crocoddyl.DifferentialActionModelFreeFwdDynamics( state, actModel, terminalCostModel), dt) problem = crocoddyl.ShootingProblem(starting_condition, [runningModel] * T, terminal_model) fddp = crocoddyl.SolverFDDP(problem) fddp.th_stop = precision fddp.solve([], [], maxiters)
def optimalSolution(init_states: Union[list, np.ndarray, torch.Tensor], terminal_model: crocoddyl.ActionModelAbstract = None, horizon: int = 150, precision: float = 1e-9, maxiters: int = 1000, use_fddp: bool = True): """Solve double pendulum problem with the given terminal model for the given position Parameters ---------- init_states : list or array or tensor These are the initial, starting configurations for the double pendulum terminal_model: crocoddyl.ActionModelAbstract The terminal model to be used to solve the problem horizon : int Time horizon for the running model precision : float precision for ddp.th_stop maxiters : int Maximum iterations allowed for the problem use_fddp : boolean Solve using ddp or fddp Returns -------- ddp : crocoddyl.Solverddp the optimal ddp or fddp of the prblem """ if isinstance(init_states, torch.Tensor): init_states = init_states.numpy() init_states = np.atleast_2d(init_states) solutions = [] for init_state in init_states: robot = example_robot_data.loadDoublePendulum() robot_model = robot.model state = crocoddyl.StateMultibody(robot_model) actModel = ActuationModelDoublePendulum(state, actLink=1) weights = np.array([1.5, 1.5, 1, 1] + [0.1] * 2) runningCostModel = crocoddyl.CostModelSum(state, actModel.nu) dt = 1e-2 xRegCost = crocoddyl.CostModelState( state, crocoddyl.ActivationModelQuad(state.ndx), state.zero(), actModel.nu) uRegCost = crocoddyl.CostModelControl( state, crocoddyl.ActivationModelQuad(1), actModel.nu) xPendCost = CostModelDoublePendulum( state, crocoddyl.ActivationModelWeightedQuad(weights), actModel.nu) runningCostModel.addCost("uReg", uRegCost, 1e-4 / dt) runningCostModel.addCost("xGoal", xPendCost, 1e-5 / dt) runningModel = crocoddyl.IntegratedActionModelEuler( crocoddyl.DifferentialActionModelFreeFwdDynamics( state, actModel, runningCostModel), dt) if terminal_model is None: terminalCostModel = crocoddyl.CostModelSum(state, actModel.nu) terminalCostModel.addCost("xGoal", xPendCost, 1e4) terminal_model = crocoddyl.IntegratedActionModelEuler( crocoddyl.DifferentialActionModelFreeFwdDynamics( state, actModel, terminalCostModel), dt) problem = crocoddyl.ShootingProblem(init_state, [runningModel] * horizon, terminal_model) if use_fddp: fddp = crocoddyl.SolverFDDP(problem) else: fddp = crocoddyl.SolverDDP(problem) fddp.th_stop = precision fddp.solve([], [], maxiters) solutions.append(fddp) return solutions
import os import sys import crocoddyl import numpy as np import example_robot_data from crocoddyl.utils.pendulum import CostModelDoublePendulum, ActuationModelDoublePendulum WITHDISPLAY = 'display' in sys.argv or 'CROCODDYL_DISPLAY' in os.environ WITHPLOT = 'plot' in sys.argv or 'CROCODDYL_PLOT' in os.environ # Loading the double pendulum model robot = example_robot_data.loadDoublePendulum() robot_model = robot.model state = crocoddyl.StateMultibody(robot_model) actModel = ActuationModelDoublePendulum(state, actLink=1) weights = np.array([1, 1, 1, 1] + [0.1] * 2) runningCostModel = crocoddyl.CostModelSum(state, actModel.nu) terminalCostModel = crocoddyl.CostModelSum(state, actModel.nu) xRegCost = crocoddyl.CostModelState(state, crocoddyl.ActivationModelQuad(state.ndx), state.zero(), actModel.nu) uRegCost = crocoddyl.CostModelControl(state, crocoddyl.ActivationModelQuad(1), actModel.nu) xPendCost = CostModelDoublePendulum( state, crocoddyl.ActivationModelWeightedQuad(weights), actModel.nu) dt = 1e-2
def solve_problem(terminal_model=None, initial_configuration=None, horizon: int = 100, precision: float = 1e-9, maxiters: int = 1000): """ Solve the problem for a given initial_position. @params: 1: terminal_model = Terminal model with neural network inside it, for the crocoddyl problem. If none, then Crocoddyl Integrated Action Model will be used as terminal model. 2: initial_configuration = initial position for the unicycle, either a list or a numpy array or a tensor. 3: horizon = Time horizon for the unicycle. Defaults to 100 4: stop = ddp.th_stop. Defaults to 1e-9 5: maxiters = maximum iterations allowed for crocoddyl.Defaults to 1000 @returns: ddp """ if isinstance(initial_configuration, list): initial_configuration = np.array(initial_configuration) elif isinstance(initial_configuration, torch.Tensor): initial_configuration = initial_configuration.numpy() # Loading the double pendulum model robot = example_robot_data.loadDoublePendulum() robot_model = robot.model state = crocoddyl.StateMultibody(robot_model) actModel = ActuationModelDoublePendulum(state, actLink=1) weights = np.array([1, 1, 1, 1] + [0.1] * 2) runningCostModel = crocoddyl.CostModelSum(state, actModel.nu) xRegCost = crocoddyl.CostModelState( state, crocoddyl.ActivationModelQuad(state.ndx), state.zero(), actModel.nu) uRegCost = crocoddyl.CostModelControl(state, crocoddyl.ActivationModelQuad(1), actModel.nu) xPendCost = CostModelDoublePendulum( state, crocoddyl.ActivationModelWeightedQuad(np.matrix(weights).T), actModel.nu) dt = 1e-2 runningCostModel.addCost("uReg", uRegCost, 1e-4 / dt) runningCostModel.addCost("xGoal", xPendCost, 1e-5 / dt) runningModel = crocoddyl.IntegratedActionModelEuler( crocoddyl.DifferentialActionModelFreeFwdDynamics( state, actModel, runningCostModel), dt) if terminal_model is None: terminalCostModel = crocoddyl.CostModelSum(state, actModel.nu) terminalCostModel.addCost("xGoal", xPendCost, 1e4) terminal_model = crocoddyl.IntegratedActionModelEuler( crocoddyl.DifferentialActionModelFreeFwdDynamics( state, actModel, terminalCostModel), dt) # Creating the shooting problem and the FDDP solver problem = crocoddyl.ShootingProblem(initial_configuration.T, [runningModel] * horizon, terminal_model) fddp = crocoddyl.SolverFDDP(problem) fddp.th_stop = precision fddp.solve([], [], maxiters) return fddp