def start(self, fps, fast_step=False): """Start Begin the continuous event loop for the simulation. This event loop can be exited using the ctrl+c keyboard interrupt. Real-time constraints are enforced. [Hz] Arguments: fps: The value of frames per second of the simulation. fast_step: If True, the ODE fast step algorithm will be used. This is faster and requires less memory but is less accurate. (Default: False) """ paused = False stopped = False # Define the total time for the tooltip traversal t_total = 20.0 # Define the simulation frame rate t = 0.0 # [s] dt = 1.0 / float(fps) # [s] # Keep track of time overshoot in the case that simulation time must be # increased in order to maintain real-time constraints t_overshoot = 0.0 # Get the initial path position (center of gate7) pos_start = self.env.get_body_pos('gate7') # [m] # Get the first position of the PA10 at rest pos_init = self.env.get_body_pos('tooltip') # [m] # Calculate the new required joint angles of the PA10 #pa10_joint_angles = self.kinematics.calc_inverse_kinematics(pos_init, pos_start) # TODO: Move the PA10 end-effector to the starting position along the path # TODO: TEMP - Move the temporary end-effector pointer to the starting position self.env.set_group_pos('pointer', pos_start) # Generate long-term path from initial position t_input = np.linspace(start=0.0, stop=1.0, num=t_total / dt) t_input = np.reshape(t_input, (len(t_input), 1)) rnn_path = self.rnn.extrapolate(t_input, [pos_start], len(t_input) - 1) # Add the initial condition point back onto the data rnn_path = np.vstack((pos_start, rnn_path)) # Retrieve one set of standard gate position/orientation data file_path = pathutils.list_data_files(constants.G_TRAINING_DATA_DIR)[0] gate_data = datastore.retrieve(file_path) gate_start_idx = constants.G_GATE_IDX gate_end_idx = gate_start_idx + constants.G_NUM_GATE_INPUTS # Reshape the gate positions data gate_data = gate_data[0:1, gate_start_idx:gate_end_idx] gate_data = np.tile(gate_data, (len(rnn_path), 1)) # Complete the rnn path data rnn_path = np.hstack((t_input, gate_data, rnn_path)) # Save generated path for later examination datastore.store(rnn_path, constants.G_RNN_STATIC_PATH_OUT) # Define a variable to hold the final path (with real-time correction) final_path = rnn_path[:-1].copy() path_saved = False # Detect all path segments between gates in the generated path segments = pathutils._detect_segments(rnn_path) path_idx = 0 x_path_offset = np.array([0.0, 0.0, 0.0]) # [m] v_curr = np.array([0.0, 0.0, 0.0]) # [m/s] a_max = constants.G_MAX_ACCEL # [m/s^2] # Get the static table position x_table = self.env.get_body_pos('table') while not stopped: t_start = time.time() # If the last calculation took too long, catch up dt_warped = dt + t_overshoot self.env.set_dt(dt_warped) # Determine if the viewer is stopped. Then we can quit if self.viewer.is_dead: break # Pause the simulation if we are at the end if path_idx == len(rnn_path) - 1 or paused: self.env.step(paused=True, fast=fast_step) # If we have really hit the end of the simulation, save/plot the path if not paused and not path_saved: # Save the final data to a file datastore.store(final_path, constants.G_RNN_DYNAMIC_PATH_OUT) path_saved = True continue # Not a very elegant solution to pausing at the start, but it works if t <= 1000.0: self.env.step(paused=True, fast=fast_step) t += dt_warped continue # Determine the current path segment curr_segment_idx = 0 for segment_idx, segment_end in enumerate(segments): if path_idx <= segment_end: curr_segment_idx = segment_idx break x_curr = pathutils.get_path_tooltip_pos(rnn_path, path_idx) + x_path_offset x_next = pathutils.get_path_tooltip_pos( rnn_path, path_idx + 1) + x_path_offset # Get the expected gate position x_gate_expected = pathutils.get_path_gate_pos( rnn_path, segments[curr_segment_idx], curr_segment_idx) # Get the actual gate position x_gate_actual = self.env.get_body_pos('gate%d' % curr_segment_idx) # Calculate the new position from change to new gate position dx_gate = x_gate_actual - (x_gate_expected + x_path_offset) x_new = x_next + dx_gate # Calculate the new velocity v_new = (x_new - x_curr) / dt_warped # Calculate the new acceleration a_new = (v_new - v_curr) / dt_warped # Calculate the acceleration vector norm a_new_norm = np.linalg.norm(a_new) # Limit the norm vector a_new_norm_clipped = np.clip(a_new_norm, -a_max, a_max) # Determine the ratio of the clipped norm, protect against divide by zero if a_new_norm != 0.0: ratio_unclipped = a_new_norm_clipped / a_new_norm else: ratio_unclipped = 0.0 # Scale the acceleration vector by this ratio a_new = a_new * ratio_unclipped # Calculate the new change in velocity dv_new = a_new * dt_warped v_new = v_curr + dv_new # Calculate the new change in position dx_new = v_new * dt_warped x_new = x_curr + dx_new # Modify final path data with current tooltip and gate positions pathutils.set_path_time(final_path, path_idx, t) pathutils.set_path_tooltip_pos(final_path, path_idx, x_curr) for gate_idx in range(constants.G_NUM_GATES): gate_name = 'gate%d' % gate_idx x_gate = self.env.get_body_pos(gate_name) pathutils.set_path_gate_pos(final_path, path_idx, gate_idx, x_gate) # Store this velocity for the next time step v_curr = v_new # Recalculate the current offset x_path_offset += x_new - x_next # Perform inverse kinematics to get joint angles pa10_joint_angles = self.kinematics.calc_inverse_kinematics( x_curr, x_new) # TODO: TEMP - MOVE ONLY POINTER, NO PA10 self.env.set_group_pos('pointer', x_new) if constants.G_TABLE_IS_OSCILLATING: # Move the table with y-axis oscillation x_table_next = shaker_table(t, x_table) else: x_table_next = x_table self.env.set_body_pos('table', x_table_next) # Step through the world by 1 time frame and actuate pa10 joints self.env.performAction(pa10_joint_angles, fast=fast_step) # Update current time after this step t += dt_warped path_idx += 1 # Determine the difference in virtual vs actual time t_warped = dt - (time.time() - t_start) # Attempt to enforce real-time constraints if t_warped >= 0.0: # The calculation took less time than the virtual time. Sleep # the rest off time.sleep(t_warped) t_overshoot = 0.0 else: # The calculation took more time than the virtual time. We need # to catch up with the virtual time on the next time step t_overshoot = -t_warped return
def train_path_planning_network(): """Train Path Planning Network Trains an Evolino LSTM neural network for long-term path planning for use in the surgical simulator. Returns: A copy of the fully-trained path planning neural network. """ # Build up the list of files to use as training set training_dir = constants.G_TRAINING_DATA_DIR # Find all data files in the training data directory training_files = pathutils.list_data_files(training_dir) # Get the training data and place it into a dataset training_dataset = None # Store all training set ratings ratings = np.array([]) for training_file in training_files: training_data = datastore.retrieve(training_file) # Normalize the time input of the data training_data = pathutils.normalize_time(training_data, t_col=constants.G_TIME_IDX) # Add this data sample to the training dataset training_dataset = datastore.list_to_dataset( training_data[:,constants.G_RNN_INPUT_IDX:constants.G_RNN_INPUT_IDX+constants.G_RNN_NUM_INPUTS], training_data[:,constants.G_RNN_OUTPUT_IDX:constants.G_RNN_OUTPUT_IDX+constants.G_RNN_NUM_OUTPUTS], dataset=training_dataset ) # Store the rating of the data this_rating = training_data[1:,constants.G_RATING_IDX] ratings = np.hstack((ratings, this_rating)) # Get the starting point information for testing output_start_idx = constants.G_RNN_OUTPUT_IDX output_end_idx = output_start_idx + constants.G_RNN_NUM_OUTPUTS output_initial_condition = training_data[0,output_start_idx:output_end_idx] # Generate the time sequence input data for testing time_steps = constants.G_RNN_GENERATED_TIME_STEPS t_input = np.linspace(start=0.0, stop=1.0, num=time_steps) t_input = np.reshape(t_input, (len(t_input), 1)) gate_start_idx = constants.G_GATE_IDX gate_end_idx = gate_start_idx + constants.G_NUM_GATE_INPUTS # Pull the gate data from the last training dataset gate_data = training_data[0:1,gate_start_idx:gate_end_idx] gate_data = np.tile(gate_data, (time_steps, 1)) # Build up a full ratings matrix nd_ratings = None for rating in ratings: this_rating = rating * np.ones((1, constants.G_RNN_NUM_OUTPUTS)) if nd_ratings is None: nd_ratings = this_rating else: nd_ratings = np.vstack((nd_ratings, this_rating)) # Create network and trainer print('>>> Building Network...') net = PathPlanningNetwork() print('>>> Initializing Trainer...') trainer = PathPlanningTrainer( evolino_network=net, dataset=training_dataset, nBurstMutationEpochs=10, importance=nd_ratings ) # Begin the training iterations fitness_list = [] max_fitness = None max_fitness_epoch = None # Draw the generated path plot fig = plt.figure(1, facecolor='white') testing_axis = fig.add_subplot(111, projection='3d') fig.show() idx = 0 current_convergence_streak = 0 while True: print('>>> Training Network (Iteration: %3d)...' % (idx+1)) trainer.train() # Determine fitness of this network current_fitness = trainer.evaluation.max_fitness fitness_list.append(current_fitness) print('>>> FITNESS: %f' % current_fitness) # Determine if this is the minimal error network if max_fitness is None or max_fitness < current_fitness: # This is the minimum, record it max_fitness = current_fitness max_fitness_epoch = idx # Draw the generated path after training print('>>> Testing Network...') generated_output = net.extrapolate(t_input, [output_initial_condition], len(t_input)-1) generated_output = np.vstack((output_initial_condition, generated_output)) generated_input = np.hstack((t_input, gate_data)) # Smash together the input and output generated_data = np.hstack((generated_input, generated_output)) print('>>> Drawing Generated Path...') pathutils.display_path(testing_axis, generated_data, title='Generated Testing Path') plt.draw() if current_fitness > constants.G_RNN_CONVERGENCE_THRESHOLD: # We've encountered a fitness higher than threshold current_convergence_streak += 1 else: # Streak ended. Reset the streak counter current_convergence_streak = 0 if current_convergence_streak == constants.G_RNN_REQUIRED_CONVERGENCE_STREAK: print('>>> Convergence Achieved: %d Iterations' % idx) break elif idx == constants.G_RNN_MAX_ITERATIONS - 1: print('>>> Reached maximum iterations (%d)' % constants.G_RNN_MAX_ITERATIONS) break idx += 1 # Draw the iteration fitness plot plt.figure(facecolor='white') plt.cla() plt.title('Fitness of RNN over %d Iterations' % (idx-1)) plt.xlabel('Training Iterations') plt.ylabel('Fitness') plt.grid(True) plt.plot(range(len(fitness_list)), fitness_list, 'r-') plt.annotate('local max', xy=(max_fitness_epoch, fitness_list[max_fitness_epoch]), xytext=(max_fitness_epoch, fitness_list[max_fitness_epoch]+0.01), arrowprops=dict(facecolor='black', shrink=0.05)) plt.show() # Return a full copy of the trained neural network return copy.deepcopy(net)
def start(self, fps, fast_step=False): """Start Begin the continuous event loop for the simulation. This event loop can be exited using the ctrl+c keyboard interrupt. Real-time constraints are enforced. [Hz] Arguments: fps: The value of frames per second of the simulation. fast_step: If True, the ODE fast step algorithm will be used. This is faster and requires less memory but is less accurate. (Default: False) """ paused = False stopped = False # Define the total time for the tooltip traversal t_total = 20.0 # Define the simulation frame rate t = 0.0 # [s] dt = 1.0 / float(fps) # [s] # Keep track of time overshoot in the case that simulation time must be # increased in order to maintain real-time constraints t_overshoot = 0.0 # Get the initial path position (center of gate7) pos_start = self.env.get_body_pos('gate7') # [m] # Get the first position of the PA10 at rest pos_init = self.env.get_body_pos('tooltip') # [m] # Calculate the new required joint angles of the PA10 #pa10_joint_angles = self.kinematics.calc_inverse_kinematics(pos_init, pos_start) # TODO: Move the PA10 end-effector to the starting position along the path # TODO: TEMP - Move the temporary end-effector pointer to the starting position self.env.set_group_pos('pointer', pos_start) # Generate long-term path from initial position t_input = np.linspace(start=0.0, stop=1.0, num=t_total/dt) t_input = np.reshape(t_input, (len(t_input), 1)) rnn_path = self.rnn.extrapolate(t_input, [pos_start], len(t_input)-1) # Add the initial condition point back onto the data rnn_path = np.vstack((pos_start, rnn_path)) # Retrieve one set of standard gate position/orientation data file_path = pathutils.list_data_files(constants.G_TRAINING_DATA_DIR)[0] gate_data = datastore.retrieve(file_path) gate_start_idx = constants.G_GATE_IDX gate_end_idx = gate_start_idx + constants.G_NUM_GATE_INPUTS # Reshape the gate positions data gate_data = gate_data[0:1,gate_start_idx:gate_end_idx] gate_data = np.tile(gate_data, (len(rnn_path), 1)) # Complete the rnn path data rnn_path = np.hstack((t_input, gate_data, rnn_path)) # Save generated path for later examination datastore.store(rnn_path, constants.G_RNN_STATIC_PATH_OUT) # Define a variable to hold the final path (with real-time correction) final_path = rnn_path[:-1].copy() path_saved = False # Detect all path segments between gates in the generated path segments = pathutils._detect_segments(rnn_path) path_idx = 0 x_path_offset = np.array([0.0, 0.0, 0.0]) # [m] v_curr = np.array([0.0, 0.0, 0.0]) # [m/s] a_max = constants.G_MAX_ACCEL # [m/s^2] # Get the static table position x_table = self.env.get_body_pos('table') while not stopped: t_start = time.time() # If the last calculation took too long, catch up dt_warped = dt + t_overshoot self.env.set_dt(dt_warped) # Determine if the viewer is stopped. Then we can quit if self.viewer.is_dead: break # Pause the simulation if we are at the end if path_idx == len(rnn_path) - 1 or paused: self.env.step(paused=True, fast=fast_step) # If we have really hit the end of the simulation, save/plot the path if not paused and not path_saved: # Save the final data to a file datastore.store(final_path, constants.G_RNN_DYNAMIC_PATH_OUT) path_saved = True continue # Not a very elegant solution to pausing at the start, but it works if t <= 1000.0: self.env.step(paused=True, fast=fast_step) t += dt_warped continue # Determine the current path segment curr_segment_idx = 0 for segment_idx, segment_end in enumerate(segments): if path_idx <= segment_end: curr_segment_idx = segment_idx break x_curr = pathutils.get_path_tooltip_pos(rnn_path, path_idx) + x_path_offset x_next = pathutils.get_path_tooltip_pos(rnn_path, path_idx+1) + x_path_offset # Get the expected gate position x_gate_expected = pathutils.get_path_gate_pos( rnn_path, segments[curr_segment_idx], curr_segment_idx ) # Get the actual gate position x_gate_actual = self.env.get_body_pos('gate%d'%curr_segment_idx) # Calculate the new position from change to new gate position dx_gate = x_gate_actual - (x_gate_expected + x_path_offset) x_new = x_next + dx_gate # Calculate the new velocity v_new = (x_new - x_curr) / dt_warped # Calculate the new acceleration a_new = (v_new - v_curr) / dt_warped # Calculate the acceleration vector norm a_new_norm = np.linalg.norm(a_new) # Limit the norm vector a_new_norm_clipped = np.clip(a_new_norm, -a_max, a_max) # Determine the ratio of the clipped norm, protect against divide by zero if a_new_norm != 0.0: ratio_unclipped = a_new_norm_clipped / a_new_norm else: ratio_unclipped = 0.0 # Scale the acceleration vector by this ratio a_new = a_new * ratio_unclipped # Calculate the new change in velocity dv_new = a_new * dt_warped v_new = v_curr + dv_new # Calculate the new change in position dx_new = v_new * dt_warped x_new = x_curr + dx_new # Modify final path data with current tooltip and gate positions pathutils.set_path_time(final_path, path_idx, t) pathutils.set_path_tooltip_pos(final_path, path_idx, x_curr) for gate_idx in range(constants.G_NUM_GATES): gate_name = 'gate%d' % gate_idx x_gate = self.env.get_body_pos(gate_name) pathutils.set_path_gate_pos(final_path, path_idx, gate_idx, x_gate) # Store this velocity for the next time step v_curr = v_new # Recalculate the current offset x_path_offset += x_new - x_next # Perform inverse kinematics to get joint angles pa10_joint_angles = self.kinematics.calc_inverse_kinematics(x_curr, x_new) # TODO: TEMP - MOVE ONLY POINTER, NO PA10 self.env.set_group_pos('pointer', x_new) if constants.G_TABLE_IS_OSCILLATING: # Move the table with y-axis oscillation x_table_next = shaker_table(t, x_table) else: x_table_next = x_table self.env.set_body_pos('table', x_table_next) # Step through the world by 1 time frame and actuate pa10 joints self.env.performAction(pa10_joint_angles, fast=fast_step) # Update current time after this step t += dt_warped path_idx += 1 # Determine the difference in virtual vs actual time t_warped = dt - (time.time() - t_start) # Attempt to enforce real-time constraints if t_warped >= 0.0: # The calculation took less time than the virtual time. Sleep # the rest off time.sleep(t_warped) t_overshoot = 0.0 else: # The calculation took more time than the virtual time. We need # to catch up with the virtual time on the next time step t_overshoot = -t_warped return