def __init__(self): # LQR to stabilize pole in the starting phase self.lqr_controller = controller_lqr() self.horizon = mpc_horizon self.Predictor = predictor(horizon=self.horizon, dt=DT) self.rnn_eval_time = [] self.predictor_time = [] self.nfun = [] # I do the norm and unnorm unnecessarilly! # You need only to scale once! """ Get configured do-mpc modules: """ # State of the cart self.s = create_cartpole_state() # s like state self.target_position = 0.0 self.Q_hat = np.zeros(self.horizon) # MPC prediction of future control inputs self.Q_hat0 = np.random.normal(0.0, 0.1 ,self.horizon) # initial guess for future control inputs to be predicted self.Q_previous = 0.0 self.angle_cost = lambda angle: angle ** 2 self.angle_sin_cost = lambda angle_sin: angle_sin ** 2 self.angleD_cost = lambda angleD: angleD ** 2 self.position_cost = lambda position: (position - self.target_position) ** 2 self.positionD_cost = lambda positionD: positionD ** 2 self.Q_bounds = scipy.optimize.Bounds(lb=-1.0, ub=1.0) self.initial_state = create_cartpole_state() self.prediction_features_names = cartpole_state_indices_to_varnames(range(len(self.s))) self.predictions = np.zeros((self.horizon + 1, len(self.prediction_features_names) + 1)) # Array keeping individual costs for every timestep of prediction self.costs_names = ['angle_cost', 'angle_sin_cost', 'angleD_cost', 'position_cost', 'positionD_cost'] self.cost_array = np.zeros((self.horizon + 1, len(self.costs_names) + 1)) # Numbers of samples which should be bridged with output from a P controller # To give RNN time to settle self.warm_up_len = 0 self.sample_counter = 0 # Counting samples for above aim
def __init__(self): """ Get configured do-mpc modules: """ # State of the cart self.s = create_cartpole_state() # s like state self.target_position = 0.0 self.mpc_horizon = mpc_horizon self.dt = dt_mpc_simulation self.yp_hat = np.zeros(self.mpc_horizon, dtype=object) # MPC prediction of future states self.Q_hat = np.zeros( self.mpc_horizon) # MPC prediction of future control inputs self.Q_hat0 = np.zeros( self.mpc_horizon ) # initial guess for future control inputs to be predicted self.Q_previous = 0.0 self.E_kin_cart = lambda s: (s[POSITIOND_IDX] / v_max)**2 self.E_kin_pol = lambda s: (s[ANGLED_IDX] / (2 * np.pi))**2 self.E_pot_cost = lambda s: 1 - np.cos(s[ANGLE_IDX]) self.distance_difference = lambda s: (( (s[POSITION_IDX] - self.target_position) / TrackHalfLength))**2 self.Q_bounds = [(-1, 1)] * self.mpc_horizon
def get_prediction_for_testing_gui_from_euler(a, dataset, dt_sampling, dt_sampling_by_dt_fine=1): # region In either case testing is done on a data collected offline output_array = np.zeros(shape=(a.test_max_horizon + 1, a.test_len, len(a.features) + 1)) Q = dataset['Q'].to_numpy() Q_array = [Q[i:-a.test_max_horizon + i] for i in range(a.test_max_horizon)] Q_array = np.vstack(Q_array) output_array[:-1, :, -1] = Q_array print('Calculating predictions...') FEATURES_INDICES = cartpole_state_varnames_to_indices(a.features) for timestep in trange(a.test_len): state_dict = dataset.loc[dataset.index[[timestep]], :].to_dict( 'records')[0] s = create_cartpole_state(state_dict) output_array[0, timestep, :-1] = s[FEATURES_INDICES] # Progress max_horison steps # save the data for every step in a third dimension of an array for i in range(0, a.test_max_horizon): Q = Q_array[i, timestep] u = Q2u(Q) for _ in range(dt_sampling_by_dt_fine): angleDD, positionDD = cartpole_ode_numba(s, u) t_step = (dt_sampling / float(dt_sampling_by_dt_fine)) # Calculate next state s[POSITION_IDX] += s[POSITIOND_IDX] * t_step s[POSITIOND_IDX] += positionDD * t_step s[ANGLE_IDX] += s[ANGLED_IDX] * t_step s[ANGLED_IDX] += angleDD * t_step s[ANGLE_IDX] = \ wrap_angle_rad(s[ANGLE_IDX]) # Append s to outputs matrix s[ANGLE_COS_IDX] = np.cos(s[ANGLE_IDX]) s[ANGLE_SIN_IDX] = np.sin(s[ANGLE_IDX]) output_array[i + 1, timestep, :-1] = s[FEATURES_INDICES] output_array = np.swapaxes(output_array, 0, 1) # time_axis is a time axis for ground truth return output_array
def __init__(self): """Random number generator""" SEED = config["controller"]["mppi"]["SEED"] if SEED == "None": SEED = int( (datetime.now() - datetime(1970, 1, 1)).total_seconds() * 1000.0) # Fully random self.rng_mppi = Generator(SFC64(SEED)) self.rng_mppi_rnn = Generator( SFC64(SEED * 2) ) # There are some random numbers used at warm up of rnn only. Separate rng prevents a shift global dd_weight, ep_weight, ekp_weight, ekc_weight, cc_weight dd_weight = dd_weight * (1 + dd_noise * self.rng_mppi.uniform(-1.0, 1.0)) ep_weight = ep_weight * (1 + ep_noise * self.rng_mppi.uniform(-1.0, 1.0)) ekp_weight = ekp_weight * ( 1 + ekp_noise * self.rng_mppi.uniform(-1.0, 1.0)) ekc_weight = ekc_weight * ( 1 + ekc_noise * self.rng_mppi.uniform(-1.0, 1.0)) cc_weight = cc_weight * (1 + cc_noise * self.rng_mppi.uniform(-1.0, 1.0)) # State of the cart self.s = create_cartpole_state() self.target_position = 0.0 self.rho_sqrt_inv = 0.01 self.iteration = -1 self.control_enabled = True self.s_horizon = np.zeros((), dtype=np.float32) self.u = np.zeros((mpc_samples), dtype=np.float32) self.u_prev = np.zeros_like(self.u, dtype=np.float32) self.delta_u = np.zeros((num_rollouts, mpc_samples), dtype=np.float32) self.S_tilde_k = np.zeros((num_rollouts), dtype=np.float32) self.wash_out_len = WASH_OUT_LEN self.warm_up_countdown = self.wash_out_len try: from Controllers.controller_lqr import controller_lqr self.auxiliary_controller_available = True self.auxiliary_controller = controller_lqr() except ModuleNotFoundError: self.auxiliary_controller_available = False self.auxiliary_controller = None self.auxiliary_controller_available = False
def cartpole_integration(s, angleDD, positionDD, dt): """ Simple single step integration of CartPole state by dt Takes state as numpy array. :param s: state of the CartPole (position, positionD, angle, angleD must be set). Array order follows global definition. :param dt: time step by which the CartPole state should be integrated """ s_next = create_cartpole_state() s_next[POSITION_IDX] = s[POSITION_IDX] + s[POSITIOND_IDX] * dt s_next[POSITIOND_IDX] = s[POSITIOND_IDX] + positionDD * dt s_next[ANGLE_IDX] = s[ANGLE_IDX] + s[ANGLED_IDX] * dt s_next[ANGLED_IDX] = s[ANGLED_IDX] + angleDD * dt return s_next
def finish_thread(self): self.CartPoleInstance.use_pregenerated_target_position = False self.initial_state = create_cartpole_state() self.initial_position_slider.setValue(0) self.initial_angle_slider.setValue(0) self.CartPoleInstance.s = self.initial_state # Some controllers may collect they own statistics about their usage and print it after experiment terminated if self.simulator_mode != 'Replay': try: self.CartPoleInstance.controller.controller_report() except: pass if self.show_experiment_summary: self.w_summary = SummaryWindow( summary_plots=self.CartPoleInstance.summary_plots) # Reset variables and redraw the figures self.reset_variables(0) # Draw figures self.CartPoleInstance.draw_constant_elements(self.fig, self.fig.AxCart, self.fig.AxSlider) self.canvas.draw() # Enable back all elements of GUI: self.cb_save_history.setEnabled(True) self.cb_show_experiment_summary.setEnabled(True) for rb in self.rbs_simulator_mode: rb.setEnabled(True) for rb in self.rbs_controllers: rb.setEnabled(True) self.start_or_stop_action = "START!" # What should happen when "START! / STOP!" is pushed NEXT time
The convention: Pole upright position defines 0 angle Cart movement to the right is positive Clockwise angle rotation is defined as negative Required angle convention for CartPole GUI: CLOCK-NEG """ ANGLE_CONVENTION = 'CLOCK-NEG' """Defines if a clockwise angle change is negative ('CLOCK-NEG') or positive ('CLOCK-POS') The 0-angle state is always defined as pole in upright position. This currently cannot be changed """ # Create initial state vector s0 = create_cartpole_state() def _cartpole_ode(ca, sa, angleD, positionD, u, k=k, M=M, m=m, g=g, J_fric=J_fric, M_fric=M_fric, L=L): """
def __init__(self, *args, **kwargs): super(MainWindow, self).__init__(*args, **kwargs) # region Create CartPole instance and load initial settings # Create CartPole instance self.initial_state = create_cartpole_state() self.CartPoleInstance = CartPole(initial_state=self.initial_state) # Set timescales self.CartPoleInstance.dt_simulation = dt_simulation self.CartPoleInstance.dt_controller = controller_update_interval self.CartPoleInstance.dt_save = save_interval # set other settings self.CartPoleInstance.set_controller(controller_init) self.CartPoleInstance.stop_at_90 = stop_at_90_init self.set_random_experiment_generator_init_params() # endregion # region Decide whether to save the data in "CartPole memory" or not self.save_history = save_history_init self.show_experiment_summary = show_experiment_summary_init if self.save_history or self.show_experiment_summary: self.CartPoleInstance.save_data_in_cart = True else: self.CartPoleInstance.save_data_in_cart = False # endregion # region Other variables initial values as provided in gui_default_parameters.py # Start user controlled experiment/ start random experiment/ load and replay - on start button self.simulator_mode = simulator_mode_init self.slider_on_click = slider_on_click_init # Update slider on click/update slider while hoovering over it self.speedup = speedup_init # Default simulation speed-up # endregion # region Initialize loop-timer # This timer allows to relate the simulation time to user time # And (if your computer is fast enough) run simulation # slower or faster than real-time by predefined factor (speedup) self.looper = loop_timer( dt_target=(self.CartPoleInstance.dt_simulation / self.speedup)) # endregion # region Variables controlling the state of various processes (DO NOT MODIFY) self.terminate_experiment_or_replay_thread = False # True: gives signal causing thread to terminate self.pause_experiment_or_replay_thread = False # True: gives signal causing the thread to pause self.run_set_labels_thread = True # True if gauges (labels) keep being repeatedly updated # Stop threads by setting False # Flag indicating if the "START! / STOP!" button should act as start or as stop when pressed. # Can take values "START!" or "STOP!" self.start_or_stop_action = "START!" # Flag indicating whether the pause button should pause or unpause. self.pause_or_unpause_action = "PAUSE" # Flag indicating that saving of experiment recording to csv file has finished self.experiment_or_replay_thread_terminated = False self.user_time_counter = 0 # Measures the user time # Slider instant value (which is draw in GUI) differs from value saved in CartPole instance # if the option updating slider "on-click" is enabled. self.slider_instant_value = self.CartPoleInstance.slider_value self.noise = 'OFF' self.CartPoleInstance.NoiseAdderInstance.noise_mode = self.noise # endregion # region Create GUI Layout # region - Create container for top level layout layout = QVBoxLayout() # endregion # region - Change geometry of the main window self.setGeometry(300, 300, 2500, 1000) # endregion # region - Matplotlib figures (CartPole drawing and Slider) # Draw Figure self.fig = Figure( figsize=(25, 10) ) # Regulates the size of Figure in inches, before scaling to window size. self.canvas = FigureCanvas(self.fig) self.fig.AxCart = self.canvas.figure.add_subplot(211) self.fig.AxSlider = self.canvas.figure.add_subplot(212) self.fig.AxSlider.set_ylim(0, 1) self.CartPoleInstance.draw_constant_elements(self.fig, self.fig.AxCart, self.fig.AxSlider) # Attach figure to the layout lf = QVBoxLayout() lf.addWidget(self.canvas) # endregion # region - Radio buttons selecting current controller self.rbs_controllers = [] for controller_name in self.CartPoleInstance.controller_names: self.rbs_controllers.append(QRadioButton(controller_name)) # Ensures that radio buttons are exclusive self.controllers_buttons_group = QButtonGroup() for button in self.rbs_controllers: self.controllers_buttons_group.addButton(button) lr_c = QVBoxLayout() lr_c.addStretch(1) for rb in self.rbs_controllers: rb.clicked.connect(self.RadioButtons_controller_selection) lr_c.addWidget(rb) lr_c.addStretch(1) self.rbs_controllers[self.CartPoleInstance.controller_idx].setChecked( True) # endregion # region - Create central part of the layout for figures and radio buttons and add it to the whole layout lc = QHBoxLayout() lc.addLayout(lf) lc.addLayout(lr_c) layout.addLayout(lc) # endregion # region - Gauges displaying current values of various states and parameters (time, velocity, angle,...) # First row ld = QHBoxLayout() # User time self.labTime = QLabel("User's time (s): ") self.timer = QTimer() self.timer.setInterval(100) # Tick every 1/10 of the second self.timer.timeout.connect(self.set_user_time_label) self.timer.start() ld.addWidget(self.labTime) # Speed, angle, motor power (Q) self.labSpeed = QLabel('Speed (m/s):') self.labAngle = QLabel('Angle (deg):') self.labMotor = QLabel('') self.labTargetPosition = QLabel('') ld.addWidget(self.labSpeed) ld.addWidget(self.labAngle) ld.addWidget(self.labMotor) ld.addWidget(self.labTargetPosition) layout.addLayout(ld) # Second row of labels # Simulation time, Measured (real) speed-up, slider-value ld2 = QHBoxLayout() self.labTimeSim = QLabel('Simulation Time (s):') ld2.addWidget(self.labTimeSim) self.labSpeedUp = QLabel('Speed-up (measured):') ld2.addWidget(self.labSpeedUp) self.labSliderInstant = QLabel('') ld2.addWidget(self.labSliderInstant) layout.addLayout(ld2) # endregion # region - Buttons "START!" / "STOP!", "PAUSE", "QUIT" self.bss = QPushButton("START!") self.bss.pressed.connect(self.start_stop_button) self.bp = QPushButton("PAUSE") self.bp.pressed.connect(self.pause_unpause_button) bq = QPushButton("QUIT") bq.pressed.connect(self.quit_application) lspb = QHBoxLayout() # Sub-Layout for Start/Stop and Pause Buttons lspb.addWidget(self.bss) lspb.addWidget(self.bp) # endregion # region - Sliders setting initial state and buttons for kicking the pole # Sliders setting initial position and angle lb = QVBoxLayout() # Layout for buttons lb.addLayout(lspb) lb.addWidget(bq) ip = QHBoxLayout() # Layout for initial position sliders self.initial_position_slider = QSlider( orientation=Qt.Orientation.Horizontal) self.initial_position_slider.setRange( -int(float(1000 * TrackHalfLength)), int(float(1000 * TrackHalfLength))) self.initial_position_slider.setValue(0) self.initial_position_slider.setSingleStep(1) self.initial_position_slider.valueChanged.connect( self.update_initial_position) self.initial_angle_slider = QSlider( orientation=Qt.Orientation.Horizontal) self.initial_angle_slider.setRange(-int(float(100 * np.pi)), int(float(100 * np.pi))) self.initial_angle_slider.setValue(0) self.initial_angle_slider.setSingleStep(1) self.initial_angle_slider.valueChanged.connect( self.update_initial_angle) ip.addWidget(QLabel("Initial position:")) ip.addWidget(self.initial_position_slider) ip.addWidget(QLabel("Initial angle:")) ip.addWidget(self.initial_angle_slider) ip.addStretch(0.01) # Slider setting latency self.LATENCY_SLIDER_RANGE_INT = 1000 self.latency_slider = QSlider(orientation=Qt.Orientation.Horizontal) self.latency_slider.setRange(0, self.LATENCY_SLIDER_RANGE_INT) self.latency_slider.setValue( int(self.CartPoleInstance.LatencyAdderInstance.latency * self.LATENCY_SLIDER_RANGE_INT / self.CartPoleInstance.LatencyAdderInstance.max_latency)) self.latency_slider.setSingleStep(1) self.latency_slider.valueChanged.connect(self.update_latency) ip.addWidget(QLabel("Latency:")) ip.addWidget(self.latency_slider) self.labLatency = QLabel('Latency (ms): {:.1f}'.format( self.CartPoleInstance.LatencyAdderInstance.latency * 1000)) ip.addWidget(self.labLatency) # Buttons activating noise self.rbs_noise = [] for mode_name in ['ON', 'OFF']: self.rbs_noise.append(QRadioButton(mode_name)) # Ensures that radio buttons are exclusive self.noise_buttons_group = QButtonGroup() for button in self.rbs_noise: self.noise_buttons_group.addButton(button) lr_n = QHBoxLayout() lr_n.addWidget(QLabel('Noise:')) for rb in self.rbs_noise: rb.clicked.connect(self.RadioButtons_noise_on_off) lr_n.addWidget(rb) self.rbs_noise[1].setChecked(True) ip.addStretch(0.01) ip.addLayout(lr_n) ip.addStretch(0.01) # Buttons giving kick to the pole kick_label = QLabel("Kick pole:") kick_left_button = QPushButton() kick_left_button.setText("Left") kick_left_button.adjustSize() kick_left_button.clicked.connect(self.kick_pole) kick_right_button = QPushButton() kick_right_button.setText("Right") kick_right_button.adjustSize() kick_right_button.clicked.connect(self.kick_pole) ip.addWidget(kick_label) ip.addWidget(kick_left_button) ip.addWidget(kick_right_button) lb.addLayout(ip) layout.addLayout(lb) # endregion # region - Text boxes and Combobox to provide settings concerning generation of random experiment l_generate_trace = QHBoxLayout() l_generate_trace.addWidget(QLabel('Random experiment settings:')) l_generate_trace.addWidget(QLabel('Length (s):')) self.textbox_length = QLineEdit() l_generate_trace.addWidget(self.textbox_length) l_generate_trace.addWidget(QLabel('Turning Points (m):')) self.textbox_turning_points = QLineEdit() l_generate_trace.addWidget(self.textbox_turning_points) l_generate_trace.addWidget(QLabel('Interpolation:')) self.cb_interpolation = QComboBox() self.cb_interpolation.addItems( ['0-derivative-smooth', 'linear', 'previous']) self.cb_interpolation.currentIndexChanged.connect( self.cb_interpolation_selectionchange) self.cb_interpolation.setCurrentText( self.CartPoleInstance.interpolation_type) l_generate_trace.addWidget(self.cb_interpolation) layout.addLayout(l_generate_trace) # endregion # region - Textbox to provide csv file name for saving or loading data l_text = QHBoxLayout() textbox_title = QLabel('CSV file name:') self.textbox = QLineEdit() l_text.addWidget(textbox_title) l_text.addWidget(self.textbox) layout.addLayout(l_text) # endregion # region - Make strip of layout for checkboxes l_cb = QHBoxLayout() # endregion # region - Textbox to provide the target speed-up value l_text_speedup = QHBoxLayout() tx_speedup_title = QLabel('Speed-up (target):') self.tx_speedup = QLineEdit() l_text_speedup.addWidget(tx_speedup_title) l_text_speedup.addWidget(self.tx_speedup) self.tx_speedup.setText(str(self.speedup)) l_cb.addLayout(l_text_speedup) self.wrong_speedup_msg = QMessageBox() self.wrong_speedup_msg.setWindowTitle("Speed-up value problem") self.wrong_speedup_msg.setIcon(QMessageBox.Icon.Critical) # endregion # region - Checkboxes # region -- Checkbox: Save/don't save experiment recording self.cb_save_history = QCheckBox('Save results', self) if self.save_history: self.cb_save_history.toggle() self.cb_save_history.toggled.connect(self.cb_save_history_f) l_cb.addWidget(self.cb_save_history) # endregion # region -- Checkbox: Display plots showing dynamic evolution of the system as soon as experiment terminates self.cb_show_experiment_summary = QCheckBox('Show experiment summary', self) if self.show_experiment_summary: self.cb_show_experiment_summary.toggle() self.cb_show_experiment_summary.toggled.connect( self.cb_show_experiment_summary_f) l_cb.addWidget(self.cb_show_experiment_summary) # endregion # region -- Checkbox: Block pole if it reaches +/-90 deg self.cb_stop_at_90_deg = QCheckBox('Stop-at-90-deg', self) if self.CartPoleInstance.stop_at_90: self.cb_stop_at_90_deg.toggle() self.cb_stop_at_90_deg.toggled.connect(self.cb_stop_at_90_deg_f) l_cb.addWidget(self.cb_stop_at_90_deg) # endregion # region -- Checkbox: Update slider on click/update slider while hoovering over it self.cb_slider_on_click = QCheckBox('Update slider on click', self) if self.slider_on_click: self.cb_slider_on_click.toggle() self.cb_slider_on_click.toggled.connect(self.cb_slider_on_click_f) l_cb.addWidget(self.cb_slider_on_click) # endregion # endregion # region - Radio buttons selecting simulator mode: user defined experiment, random experiment, replay # List available simulator modes - constant self.available_simulator_modes = [ 'Slider-Controlled Experiment', 'Random Experiment', 'Replay' ] self.rbs_simulator_mode = [] for mode_name in self.available_simulator_modes: self.rbs_simulator_mode.append(QRadioButton(mode_name)) # Ensures that radio buttons are exclusive self.simulator_mode_buttons_group = QButtonGroup() for button in self.rbs_simulator_mode: self.simulator_mode_buttons_group.addButton(button) lr_sm = QHBoxLayout() lr_sm.addStretch(1) lr_sm.addWidget(QLabel('Simulator mode:')) for rb in self.rbs_simulator_mode: rb.clicked.connect(self.RadioButtons_simulator_mode) lr_sm.addWidget(rb) lr_sm.addStretch(1) self.rbs_simulator_mode[self.available_simulator_modes.index( self.simulator_mode)].setChecked(True) l_cb.addStretch(1) l_cb.addLayout(lr_sm) l_cb.addStretch(1) # endregion # region - Add checkboxes to layout layout.addLayout(l_cb) # endregion # region - Create an instance of a GUI window w = QWidget() w.setLayout(layout) self.setCentralWidget(w) self.show() self.setWindowTitle('CartPole Simulator') # endregion # endregion # region Open controller-specific popup windows self.open_additional_controller_widget() # endregion # region Activate functions capturing mouse movements and clicks over the slider # This line links function capturing the mouse position on the canvas of the Figure self.canvas.mpl_connect("motion_notify_event", self.on_mouse_movement) # This line links function capturing the mouse position on the canvas of the Figure click self.canvas.mpl_connect("button_press_event", self.on_mouse_click) # endregion # region Introducing multithreading # To ensure smooth functioning of the app, # the calculations and redrawing of the figures have to be done in a different thread # than the one capturing the mouse position and running the animation self.threadpool = QThreadPool() # endregion # region Starts a thread repeatedly redrawing gauges (labels) of the GUI # It runs till the QUIT button is pressed worker_labels = Worker(self.set_labels_thread) self.threadpool.start(worker_labels) # endregion # region Start animation repeatedly redrawing changing elements of matplotlib figures (CartPole drawing and slider) # This animation runs ALWAYS when the GUI is open # The buttons of GUI only decide if new parameters are calculated or not self.anim = self.CartPoleInstance.run_animation(self.fig)
def __init__(self, dt, intermediate_steps): self.s = create_cartpole_state() self.intermediate_steps = intermediate_steps self.t_step = np.float32(dt / float(self.intermediate_steps))
def __init__(self, dt, intermediate_steps): self.s = tf.convert_to_tensor(create_cartpole_state()) self.intermediate_steps = tf.convert_to_tensor(intermediate_steps, dtype=tf.int32) self.t_step = tf.convert_to_tensor(dt / float(self.intermediate_steps), dtype=tf.float32)