def parse_data(self): reshape = self.param.get('load_data_reshape') file_name = self.param.get('directory_romi_dataset') + self.param.get( 'romi_dataset_pkl') pixels = self.param.get('image_size') channels = self.param.get('image_channels') images = [] positions = [] commands = [] with gzip.open(file_name, 'rb') as memory_file: memories = pickle.load(memory_file) #, encoding='bytes') print('converting data...') count = 0 for memory in memories: image = memory['image'] if (channels == 1) and (image.ndim == 3): image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) if self.param.get('image_resize'): image = cv2.resize(image, (pixels, pixels)) images.append(np.asarray(image).astype('float32') / 255) cmd = memory['command'] commands.append([ utils.normalise_x(float(cmd.x), self.param), utils.normalise_y(float(cmd.y), self.param) ]) #cmd_p = Position() #cmd_p.x = float(cmd.x) #cmd_p.y = float(cmd.y) #cmd_p.z = -90 #cmd_p.speed = 1400 #commands.append(normalise(cmd_p)) pos = memory['position'] positions.append([ utils.normalise_x(float(pos.x), self.param), utils.normalise_y(float(pos.y), self.param) ]) #pos_p = Position() #pos_p.x = float(pos.x) #pos_p.y = float(pos.y) #pos_p.z = -90 #pos_p.speed = 1400 #positions.append(normalise(pos_p)) count += 1 positions = np.asarray(positions) commands = np.asarray(commands) images = np.asarray(images) if reshape: images = images.reshape((len(images), pixels, pixels, channels)) #print ('images shape ', str(images.shape)) return images, commands, positions
def create_simulated_data(self, pos, cmd, param): self.lock.acquire() a = [int(pos.x), int(pos.y)] b = [int(cmd.x), int(cmd.y)] tr = self.cam_sim.get_trajectory(a, b) trn = self.cam_sim.get_trajectory_names(a, b) #print ('image size ', str(param.get('image_size'))) rounded = self.cam_sim.round2mul(tr, 5) # only images every 5mm for i in range(len(tr)): #pp = utils.Position() #pp.x = float(rounded[i][0]) #pp.y = float(rounded[i][1]) #pp.z = -90 #pp.speed = 1400 #print ('pp ',pp) #self.pos.append(utils.normalise(pp)) self.pos.append([ utils.normalise_x(float(rounded[i][0]), param), utils.normalise_y(float(rounded[i][1]), param) ]) #self.cmd.append([float(int(cmd.x)) / utils.x_lims[1], float(int(cmd.y)) / utils.y_lims[1]] ) self.cmd.append([ utils.normalise_x(float(int(cmd.x)), param), utils.normalise_y(float(int(cmd.y)), param) ]) #self.cmd.append( utils.normalise(cmd) ) cv2_img = cv2.imread(trn[i]) #,1 ) cv2.imshow('image', cv2_img) if param.get('image_channels') == 1: cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2GRAY) cv2_img = cv2.resize( cv2_img, (param.get('image_size'), param.get('image_size')), interpolation=cv2.INTER_LINEAR) cv2_img = cv2_img.astype('float32') / 255 cv2_img.reshape(1, param.get('image_size'), param.get('image_size'), param.get('image_channels')) self.img.append(cv2_img) # update memory # first update the memory, then update the models observed_pos = self.pos[-1] observed_img = cv2_img observed_img_code = np.asarray( self.models.encoder.predict( observed_img.reshape( 1, param.get('image_size'), param.get('image_size'), param.get('image_channels')))).reshape( param.get('code_size')) self.models.memory_fwd.update(observed_pos, observed_img_code) self.models.memory_inv.update(observed_img_code, observed_pos) self.lock.release()
def generate_simulated_sensorimotor_data(self, pos, cmd): #self.lock.acquire() a = [int(pos.x), int(pos.y)] b = [int(cmd.x), int(cmd.y)] tr = self.cam_sim.get_trajectory(a, b) #trn = self.cam_sim.get_trajectory_names(a,b) tr_img = self.cam_sim.get_trajectory_images(a, b) #print ('image size ', str(param.get('image_size'))) rounded = self.cam_sim.round2mul(tr, 5) # only images every 5mm for i in range(len(tr)): self.pos.append([ utils.normalise_x(float(rounded[i][0]), self.parameters), utils.normalise_y(float(rounded[i][1]), self.parameters) ]) self.cmd.append([ utils.normalise_x(float(int(cmd.x)), self.parameters), utils.normalise_y(float(int(cmd.y)), self.parameters) ]) self.img.append(tr_img[i]) ''' cv2_img = cv2.imread(trn[i])#,1 ) if param.get('image_channels') ==1 and (cv2_img.ndim == 3): cv2_img = cv2.cvtColor(cv2_img, cv2.COLOR_BGR2GRAY) if param.get('image_resize'): cv2_img = cv2.resize(cv2_img,(param.get('image_size'), param.get('image_size')), interpolation = cv2.INTER_LINEAR) cv2_img = cv2_img.astype('float32') / 255 cv2_img.reshape(1, param.get('image_size'), param.get('image_size'), param.get('image_channels')) self.img.append(cv2_img) ''' # update memory # first update the memory, then update the models observed_pos = self.pos[-1] #observed_img = cv2_img observed_img = tr_img[i] observed_img_code = np.asarray( self.models.encoder.predict( observed_img.reshape( 1, self.parameters.get('image_size'), self.parameters.get('image_size'), self.parameters.get('image_channels')))).reshape( self.parameters.get('code_size')) self.models.memory_fwd.update(observed_pos, observed_img_code) self.models.memory_inv.update(observed_img_code, observed_pos)
def run_babbling(self): for _ in range(self.parameters.get('single_run_duration')): print('DOE_id: ', self.parameters.get('doe_experiment_id'), '/', (self.doe_num_of_experiments - 1), '. Run:', self.parameters.get('run_id'), ', iter.', self.iteration, '/', self.parameters.get('single_run_duration')) # log current mean squared error for FWD and INV models self.log_MSE() # get the goal index in the SOM using the intrinsic motivation strategy self.current_goal_idx, self.current_goal_x, self.current_goal_y = self.intrinsic_motivation.select_goal( ) # get the goal coordinates from the selected neuron coordinates in the SOM feature space self.goal_code = self.models.goal_som._weights[ self.current_goal_x, self.current_goal_y].reshape(1, self.parameters.get('code_size')) #print ('goal code ', self.goal_code) # generate a motor command cmd = utils.Position() cmd_wo_noise = utils.Position() # without noise cmd.x, cmd.y, cmd_wo_noise.x, cmd_wo_noise.y = self.create_motor_cmd( ) # execute motor command and generate sensorimotor data self.generate_simulated_sensorimotor_data(self.prev_pos, cmd) # plot the explored points and the goal positions if self.iteration % self.parameters.get( 'plot_exploration_iter') == 0: goals_pos = self.models.inv_model.predict( self.models.goal_som._weights.reshape( len(self.models.goal_som._weights) * len(self.models.goal_som._weights[0]), len(self.models.goal_som._weights[0][0]))) # plot observations and goals plots.plot_exploration( positions=self.pos, goals=goals_pos, iteration=self.iteration, param=self.parameters, memory=False, title=self.parameters.get('goal_selection_mode') + '_' + str(self.iteration)) # plot memory positions and goals plots.plot_exploration( positions=self.models.memory_fwd.input_variables, goals=goals_pos, iteration=self.iteration, param=self.parameters, memory=True, title='memory_inputs_' + str(self.iteration)) # log the last movement if not self.random_cmd_flag: self.intrinsic_motivation.update_movement_dynamics( current_pos=cmd, previous_pos=self.prev_pos) # update mse dynamics if self.iteration % self.parameters.get( 'im_frequency_of_update_mse_dynamics') == 0: self.intrinsic_motivation.update_mse_dynamics( self.models.logger_fwd.get_last_mse()) # update error dynamics of the current goal (it is supposed that at this moment the action is finished #if len(self.img)>0:# and not (self.prev_goal_idx == -1) and not self.random_cmd_flag: #cmd_vector = [ utils.normalise_x(cmd.x, self.parameters), utils.normalise_y(cmd.y, self.parameters)] cmd_vector = [ utils.normalise_x(cmd_wo_noise.x, self.parameters), utils.normalise_y(cmd_wo_noise.y, self.parameters) ] predicted_code = self.models.fwd_model.predict( np.asarray(cmd_vector).reshape((1, 2))) prediction_error = np.linalg.norm( np.asarray(self.goal_code[:]) - np.asarray(predicted_code[:])) #observed_image_code = self.models.encoder.predict(np.asarray(self.img[-1]).reshape(1,self.parameters.get('image_size'),self.parameters.get('image_size'),self.parameters.get('image_channels'))) #prediction_error = np.linalg.norm(np.asarray(observed_image_code[:]) - np.asarray(predicted_code[:])) #self.intrinsic_motivation.update_error_dynamics(self.current_goal_x, self.current_goal_y, prediction_error, _append=(self.current_goal_idx == self.prev_goal_idx)) self.intrinsic_motivation.update_error_dynamics( self.current_goal_x, self.current_goal_y, prediction_error) # fit models if len(self.img) > self.parameters.get( 'batch_size'): # and (len(self.img) == len(self.pos)): # get image codes and position readings from the generated sensorimotor data observed_codes_batch = self.models.encoder.predict( np.asarray(self.img[-( self.parameters.get('batch_size')):]).reshape( self.parameters.get('batch_size'), self.parameters.get('image_size'), self.parameters.get('image_size'), self.parameters.get('image_channels'))) observed_pos_batch = copy.deepcopy( self.pos[-(self.parameters.get('batch_size')):]) #print ('code[0]', observed_codes_batch[0]) #if len(self.models.memory_fwd.output_variables)>0: # print ('mem code[0]', self.models.memory_fwd.output_variables[0]) # fit the model with the current batch of observations and the memory! # create then temporary input and output tensors containing batch and memory obs_and_mem_pos = [] obs_and_mem_img_codes = [] if not self.models.memory_fwd.is_memory_empty(): obs_and_mem_pos = np.vstack( (np.asarray(observed_pos_batch), np.asarray(self.models.memory_fwd.input_variables))) obs_and_mem_img_codes = np.vstack( (np.asarray(observed_codes_batch), np.asarray(self.models.memory_fwd.output_variables))) else: obs_and_mem_pos = np.asarray(observed_pos_batch) obs_and_mem_img_codes = np.asarray(observed_codes_batch) # update forward and inverse models self.models.train_forward_code_model_on_batch( obs_and_mem_pos, obs_and_mem_img_codes) self.models.train_inverse_code_model_on_batch( obs_and_mem_img_codes, obs_and_mem_pos) # update autoencoder #train_autoencoder_on_batch(self.autoencoder, self.encoder, self.decoder, np.asarray(self.img[-32:]).reshape(32, self.image_size, self.image_size, self.channels), batch_size=self.batch_size, cae_epochs=5) # update goals' self organising map if not self.parameters.get('fixed_goal_som'): self.models.update_som( np.asarray(observed_codes_batch).reshape( (self.parameters.get('batch_size'), self.parameters.get('code_size')))) ##### post-process steps # update the previous goal index variable if not self.random_cmd_flag: self.prev_goal_idx = copy.deepcopy(self.current_goal_idx) # update the previous position variable for next iteration self.prev_pos.x = cmd.x self.prev_pos.y = cmd.y self.prev_pos.z = cmd.z self.prev_pos.speed = cmd.speed # increase iteration count self.iteration = self.iteration + 1 if self.iteration % self.parameters.get( 'save_data_every_x_iteration') == 0: self.save_models() ### experiment is finished, save models and plots print('Saving models') self.save_models()
def run_babbling(self, param): p = utils.Position() for _ in range(param.get('max_iterations')): # record logs and data self.log_current_inv_mse(param) self.log_current_fwd_mse(param) #print ('Mode ', self.goal_selection_mode, ' hist_size ', str(self.history_size), ' prob ', str(self.history_buffer_update_prob), ' iteration : ', self.iteration) print('Iteration ', self.iteration) # select a goal using the intrinsic motivation strategy self.current_goal_idx, self.current_goal_x, self.current_goal_y = self.intrinsic_motivation.select_goal( ) if param.get('goal_selection_mode') == 'db' or param.get( 'goal_selection_mode') == 'random': self.goal_image = self.test_images[ self.current_goal_idx].reshape(1, param.get('image_size'), param.get('image_size'), param.get('image_channels')) self.goal_code = self.models.encoder.predict(self.goal_image) elif param.get('goal_selection_mode') == 'som': self.goal_code = self.models.goal_som._weights[ self.current_goal_x, self.current_goal_y].reshape(1, param.get('code_size')) else: print('wrong goal selection mode, exit!') sys.exit(1) # choose random motor commands from time to time ran = random.random() if ran < param.get('random_cmd_rate') or param.get( 'goal_selection_mode' ) == 'random': #or self.models.memory_fwd.is_memory_still_not_full() self.random_cmd_flag = True print('generating random motor command') p.x = random.uniform(utils.x_lims[0], utils.x_lims[1]) p.y = random.uniform(utils.y_lims[0], utils.y_lims[1]) self.prev_goal_idx = -1 else: self.random_cmd_flag = False motor_pred = [] if param.get('goal_selection_mode') == 'db': motor_pred = self.models.inv_model.predict(self.goal_code) print('pred ', motor_pred, ' real ', self.test_pos[self.current_goal_idx]) else: goal_decoded = self.models.decoder.predict(self.goal_code) motor_pred = self.models.inv_model.predict(self.goal_code) image_pred = self.models.decoder.predict( self.models.fwd_model.predict(np.asarray(motor_pred))) noise_x = np.random.normal(0, 0.02) noise_y = np.random.normal(0, 0.02) print('prediction ', motor_pred) p.x = utils.clamp_x( utils.unnormalise_x(motor_pred[0][0] + noise_x, param)) p.y = utils.clamp_y( utils.unnormalise_y(motor_pred[0][1] + noise_y, param)) #p = utils.clamp(utils.unnormalise(motor_pred[0])) # make it possible to add noise #print ('predicted utils.Position ', motor_pred[0], 'p+noise ', motor_pred[0][0]+noise_x, ' ' , motor_pred[0][1]+noise_y, ' utils.clamped ', p.x, ' ' , p.y, ' noise.x ', noise_x, ' n.y ', noise_y) p.z = int(-90) p.speed = int(1400) # generate movement self.create_simulated_data(self.prev_pos, p, param) # store the amplitude of this movement if not self.random_cmd_flag and (self.current_goal_idx == self.prev_goal_idx): self.intrinsic_motivation.log_last_movement(p, self.prev_pos) print('current_p', p.x, ' ', p.y) print('prev_p', self.prev_pos.x, ' ', self.prev_pos.y) # update the variables self.prev_pos.x = p.x self.prev_pos.y = p.y self.prev_pos.z = p.z self.prev_pos.speed = p.speed # plot the explored points and the utils.Position of the goals if self.iteration % param.get('plot_exploration_iter') == 0: if param.get('goal_selection_mode') == 'db' or param.get( 'goal_selection_mode') == 'random': goals_pos = self.test_pos[0:(param.get('goal_size') * param.get('goal_size'))] elif param.get('goal_selection_mode') == 'som': goals_pos = self.models.inv_model.predict( self.models.goal_som._weights.reshape( len(self.models.goal_som._weights) * len(self.models.goal_som._weights[0]), len(self.models.goal_som._weights[0][0]))) plot_exploration(positions=self.pos, goals=goals_pos, iteration=self.iteration, param=param) if self.iteration % param.get( 'im_pe_buffer_size_update_frequency') == 0: self.intrinsic_motivation.update_mse_dynamics( self.models.logger_fwd.get_last_mse()) # update error dynamics of the current goal (it is supposed that at this moment the action is finished if len(self.img) > 0 and not ( param.get('goal_selection_mode') == 'random') and not (self.random_cmd_flag and len( self.intrinsic_motivation.slopes_pe_buffer) > 0): #cmd = utils.normalise(p) # [p.x/float(utils.x_lims[1]), p.y/float(utils.y_lims[1])] cmd = [ utils.normalise_x(p.x, param), utils.normalise_y(p.y, param) ] prediction_code = self.models.fwd_model.predict( np.asarray(cmd).reshape((1, 2))) prediction_error = np.linalg.norm( np.asarray(self.goal_code[:]) - np.asarray(prediction_code[:])) if not (self.prev_goal_idx == -1): self.intrinsic_motivation.update_error_dynamics( self.current_goal_x, self.current_goal_y, prediction_error, _append=(self.current_goal_idx == self.prev_goal_idx)) # fit models if (len(self.img) > param.get('batch_size')) and (len( self.img) == len(self.pos)): observed_codes_batch = self.models.encoder.predict( np.asarray(self.img[-(param.get('batch_size')):]).reshape( param.get('batch_size'), param.get('image_size'), param.get('image_size'), param.get('image_channels'))) observed_pos_batch = self.pos[-(param.get('batch_size')):] # fit the model with the current batch of observations and the memory! # create then temporary input and output tensors containing batch and memory obs_and_mem_pos = [] obs_and_mem_img_codes = [] if not self.models.memory_fwd.is_memory_empty(): obs_and_mem_pos = np.vstack( (np.asarray(observed_pos_batch), np.asarray(self.models.memory_fwd.input_variables))) obs_and_mem_img_codes = np.vstack( (np.asarray(observed_codes_batch), np.asarray(self.models.memory_fwd.output_variables))) else: obs_and_mem_pos = np.asarray(observed_pos_batch) obs_and_mem_img_codes = np.asarray(observed_codes_batch) self.models.train_forward_code_model_on_batch( obs_and_mem_pos, obs_and_mem_img_codes, param) self.models.train_inverse_code_model_on_batch( obs_and_mem_img_codes, obs_and_mem_pos, param) #train_autoencoder_on_batch(self.autoencoder, self.encoder, self.decoder, np.asarray(self.img[-32:]).reshape(32, self.image_size, self.image_size, self.channels), batch_size=self.batch_size, cae_epochs=5) if not self.random_cmd_flag: self.prev_goal_idx = self.current_goal_idx self.iteration = self.iteration + 1 print('Saving models') self.save_models(param)