def __init__(self, buffer_size=1000000, controller=None, array_ix=0, n_samples=30, top_k=5, closeness_bonus = 2.0, knn_image_downscale_factor=4, rng=None, length_history_particle=4): ''' buffer_size: Maximum number of frames to store controller: A controller such as PIDController that can dynamically adjust the steering signal to keep the robot on track array_ix: the index of the motor signals which contains the steering signal n_samples: number of evenly spaced frames to evaluate from the buffer when classifying top_k: pick the top_k frames that have the best response as given by phaseCorrelate, and then evaluate these on the raw pixels closeness_bonus: how big boost to give to frames that are close to the best match from the previous round, compared with other frames knn_image_downscale_factor: images will be downscaled by this factor and a KNN used to get the closest frames to the current one as candidates for best matching frame. rng: random number generator to use, or None to create a new one length_history_particle: Number of frame to keep in memory for each particle ''' if rng is None: rng = np.random.RandomState() self._length_history_particle = length_history_particle self.buffer_size = buffer_size self.controller = controller self.array_ix = array_ix self.n_samples = n_samples self.top_k = top_k self.closeness_bonus = closeness_bonus self.knn_image_downscale_factor = knn_image_downscale_factor self._buffer_append_freq = 1 self._skipped = 0 self._stretches = [] self._stretches.append(Stretch(0)) self._current_stretch = 0 self._leader = None self.n_particles = 3 self._particles = [] self._rng = rng self.new_image_fft = None self._phase_corr_preprocessor = PhaseCorrPreprocessor() self.rebuild_knn()
class Salmon(object): ''' The Salmon is doing three things: - it keeps track of all seen training samples and the order in which they were seen, so that when classifying a given frame you can use as prior information that it is likely to be close to the previous one you classified - it uses a translation invariant metric to compare the new camera image with the ones seen during training, by calculating the cross-correlation between the new and the old image (through FFT for performance reasons), shifting the images according to the peak of the cross-correlation, and then comparing raw pixels - it uses the amount of translation in the horizontal direction to adjust the steering direction in order to stay on track. For example, if the new image is shifted right compared with the image seen during training, then the steering direction will be adjusted leftwards. ''' def __init__(self, buffer_size=1000000, controller=None, array_ix=0, n_samples=30, top_k=5, closeness_bonus = 2.0, knn_image_downscale_factor=4, rng=None, length_history_particle=4): ''' buffer_size: Maximum number of frames to store controller: A controller such as PIDController that can dynamically adjust the steering signal to keep the robot on track array_ix: the index of the motor signals which contains the steering signal n_samples: number of evenly spaced frames to evaluate from the buffer when classifying top_k: pick the top_k frames that have the best response as given by phaseCorrelate, and then evaluate these on the raw pixels closeness_bonus: how big boost to give to frames that are close to the best match from the previous round, compared with other frames knn_image_downscale_factor: images will be downscaled by this factor and a KNN used to get the closest frames to the current one as candidates for best matching frame. rng: random number generator to use, or None to create a new one length_history_particle: Number of frame to keep in memory for each particle ''' if rng is None: rng = np.random.RandomState() self._length_history_particle = length_history_particle self.buffer_size = buffer_size self.controller = controller self.array_ix = array_ix self.n_samples = n_samples self.top_k = top_k self.closeness_bonus = closeness_bonus self.knn_image_downscale_factor = knn_image_downscale_factor self._buffer_append_freq = 1 self._skipped = 0 self._stretches = [] self._stretches.append(Stretch(0)) self._current_stretch = 0 self._leader = None self.n_particles = 3 self._particles = [] self._rng = rng self.new_image_fft = None self._phase_corr_preprocessor = PhaseCorrPreprocessor() self.rebuild_knn() def camera_input(self, new_image): ''' The classification consists of the following steps: A. Convert image to spatial phase representation B. Find the image from training that matches most closely B1. Select up to n_samples images from the buffer of trained images B2. Create a new particle from the best matching image from B1 B3. Compare all particles with the surrounding frames to see if there is a better match B4. Switch the leader particle if there is another one that is 'significantly better' B5. Discard the worst particle if there are more than n_particles C. Pass along the horizontal displacement of the best matching frame from step B to the PID controller, which calculates a correction to apply to the motor signals in order to keep the robot on track. D. Add the correction from C to the motor signals that are associated with the best matching frame from B. E. Return the result from step D. Returns the motor commands to be applied to keep the robot on the path, as a list of numpy arrays corresponding to those given to the train method. If no training data has been provided yet, then None will be returned instead. ''' # Check correct format of input data # We're expecting a colour image represented as a three-dimensional numpy array, # where the last dimension is the color channel. # assert new_image.ndim == 3 # assert new_image.shape[2] == 3 self.new_image = new_image # Do preprocessing of the new frame consisting of: # - convert to grayscale # - apply Hann window function to avoid edge effects # - perform FFT calculation self.new_image_fft = self._phase_corr_preprocessor.preprocess(new_image) self.new_downscaled_image = cv2.resize(new_image, (new_image.shape[1] / self.knn_image_downscale_factor, new_image.shape[0] / self.knn_image_downscale_factor)) # Now find which image seen during training most closely matches the current one, # while taking into account that the image may have a translation in x and y direction allframes = self._stretches[0].frames N = len(allframes) if N == 0: # We have no training samples yet to predict from return None self.candidates_to_clear = [] # Advance all particles to a close frame that best matches the current input for particle in self._particles: self.advance_particle(particle) new_particle = self.create_new_particle() self.advance_particle(new_particle, backwards = 1, forwards = 1) self._particles.append(new_particle) # Sort the contender particles by average self._particles.sort(key = FrameParticle.average) if self._leader is not None: self.advance_particle(self._leader, backwards = 2, forwards = 4) self.remove_identical_particles() # Choose the leader if self._leader is None: # If there is no leader from previous round, # we just choose the current best one self._leader = self._particles[0] else: self.challenge_leader() # Prune the particles list if len(self._particles) > self.n_particles-1: self._particles.pop() best_frame = self._leader.frame if self.controller is None: result = best_frame.output_data else: result = self.apply_controller(best_frame) # Clear out all of the frames we touched for frame in self.candidates_to_clear: frame.clear() return result def apply_controller(self, best_frame): ''' Apply correction from controller (typically a PID controller of some sort) ''' e = best_frame.phase_x result = best_frame.output_data correction = self.controller.process(e) result = [a for a in result] result[self.array_ix] = result[self.array_ix] + correction log_message = "Chose %s, e = %.2f, correction = %.3f, result = %.3f" % (best_frame, e, correction, result[self.array_ix]) logging.getLogger('Salmon').debug(log_message) return result def learn(self, motor_cmd): ''' This method can be called during training of the robot. It informs the Salmon about what motor commands were applied by the trainer at this moment. The Salmon will associate these motor commands with the last image it has received. motor_cmd: a list of numpy arrays. In principle any data can be put in these numpy arrays, and the same data will be returned as the motor_command_out signal during autonomous execution corresponding to what the robot sees. If using a PID controller, then the first numpy array needs to contain only one number, the left-right steering signal. ''' if self.new_image_fft is None: # We have not received any images yet to associate this motor command with return # Currently, all frames go into the same Stretch. At some point we should probably # start actually putting each training segments into different _stretches. self._skipped += 1 if self._skipped < self._buffer_append_freq: # Skipping frame, as we are only picking every n'th frame (n = self._buffer_append_freq) return self._skipped = 0 frame = self._stretches[self._current_stretch].add_frame(self.new_image, self.new_image_fft, self.new_downscaled_image, motor_cmd) self.add_to_knn(frame) current_size = sum(len(stretch.frames) for stretch in self._stretches) logging.getLogger('TranslationClassifier').debug("inserting frame (size=%d)" % current_size) if current_size > self.buffer_size: self._buffer_append_freq *= 2 for p in self._particles + ([self._leader] if self._leader is not None else []): # particles pointing to frames that will be discarded on halving # have to point to the closest even-numbered frame if p.frame.id % 2 != 0: # odd frame ids will be discarded p.frame = self._stretches[p.frame.stretch_id].frames[p.frame.id - 1] for stretch in self._stretches: stretch.halve() new_size = sum(len(stretch.frames) for stretch in self._stretches) self.rebuild_knn() logging.getLogger('TranslationClassifier').debug("halved buffer size from %s to %d, doubled buffer_append_freq to %d" % (current_size, new_size, self._buffer_append_freq)) def advance_particle(self, particle, backwards=0, forwards=2): frame = particle.frame candidates = self._stretches[frame.stretch_id].frames[max(0, frame.id-backwards):frame.id+forwards+1] best_close_frame = self.get_best_frame(candidates) particle.set_new_frame(best_close_frame) def get_best_frame(self, frames): ''' frames: a list of frames from which to choose the best one return: the frame with the best match in raw pixels ''' # Add these frames to the list of frames to clear after we are done with classification of the current frame self.candidates_to_clear += frames # Calculate phase correlate for all frames for frame in frames: if not frame.response: phase_result = PhaseCorrelate.phaseCorrelate(frame.transformed_image, self.new_image_fft) frame.phase_x = phase_result[0] # We're currently not using the y-shift, which is stored in phase_result[1] frame.response = phase_result[2] # Pick frames with highest phase response candidates = heapq.nlargest(self.top_k, frames, lambda frame: frame.response) # Calculate image distance for top candidates for frame in candidates: if not frame.image_dist: frame.image_dist = shift_and_match_distance(frame.image, self.new_image, frame.phase_x) # Pick frame with lowest image distance return min(candidates, key = lambda frame: frame.image_dist) def create_new_particle(self): # Find which images seen during training most closely matches the current one labels = self.knn.find_neighbours(self.new_downscaled_image, self.n_samples) candidates = tuple(self._stretches[stretch_id].frames[frame_id] for stretch_id, frame_id in labels) particle = FrameParticle(self.get_best_frame(candidates), self._length_history_particle) return particle def remove_identical_particles(self): ''' Assumes that the particles have already been sorted by average image_dist ''' self._particles = [p for p in self._particles if p.frame is not self._leader.frame] particles_to_remove = [] for ix, p1 in enumerate(self._particles): for p2 in self._particles[ix+1:]: if p1.frame is p2.frame: # If p2 has longer history than p1, remove p1, # otherwise remove p2 (since p1 has better image_dist average) if len(p1.image_dist_history) < len(p2.image_dist_history): particles_to_remove.append(p1) else: particles_to_remove.append(p2) for particle in particles_to_remove: try: self._particles.remove(particle) except ValueError: # OK - the same particle may occur multiple times in the particles_to_remove list pass def challenge_leader(self): # First pick the contender with long enough history with the best average contender = None for particle in self._particles: if len(particle.image_dist_history) >= len(self._leader.image_dist_history): contender = particle break if contender is None: # No particle has long enough history to challenge the leader return if contender.image_dist_history[-1] < self._leader.image_dist_history[-1] and contender.average() + self.closeness_bonus < self._leader.average(): logging.getLogger('TranslationClassifier').debug("Switching leader from %s to %s" % (self._leader.frame, contender.frame)) # Put the old leader at the front of the particles contender list (so it won't get pruned this round) self._particles.insert(0, self._leader) self._leader = contender self._particles.remove(contender) def add_to_knn(self, frame): self.knn.add_training_data(frame.downscaled_image, np.array([frame.stretch_id, frame.id])) def rebuild_knn(self): self.knn = OnlineKNN(self.buffer_size * 2) for stretch in self._stretches: map(self.add_to_knn, stretch.frames)