def __init__(self, config, maze): Mouse.__init__(self, config, maze) # this mouse uses the CLA self._model_path = config['serialization']['path'] self._model_params = MODEL_PARAMS self._init_model() self._imagination = Imagination(self._model)
class SmartMouse(Mouse): def __init__(self, config, maze): Mouse.__init__(self, config, maze) # this mouse uses the CLA self._model_path = config['serialization']['path'] self._model_params = MODEL_PARAMS self._init_model() self._imagination = Imagination(self._model) def reset(self, initial_location, training_mode): Mouse.reset(self, initial_location, training_mode) self._previous_location = None # put the initial location into the model, # to establish an initial context for prediction self._model.resetSequenceStates() model_input_data = self._convert_to_model_input(initial_location) self._model.run(model_input_data) def move(self): self.before_move(self, self.location) possible_moves = self._maze.possible_moves(self.location) # avoid turning around whenever possible (i.e. be curious!) if not self._previous_location is None: assert self._previous_location in possible_moves if len(possible_moves) >= 2: possible_moves.remove(self._previous_location) else: # the mouse is turning around, use some "fresh thinking". self._model.resetSequenceStates() self._previous_location = self.location if self._training_mode: # exploration: randomly walk the maze to learn where the cheese is. self.location = random.sample(possible_moves, 1)[0] else: # exploitation: choose the best possible move best_location = self._choose_best_move(possible_moves) self.location = best_location # update the model with the action we've taken model_input_data = self._convert_to_model_input(self.location) self._model.run(model_input_data) self.after_move(self, self._previous_location, self.location) def _init_model(self): if os.path.exists(os.path.abspath(self._model_path)): self._model = ModelFactory.loadFromCheckpoint( os.path.relpath(self._model_path)) else: self._model = ModelFactory.create(self._model_params) predicted_field = self._model_params['predictedField'] if predicted_field: self._model.enableInference({'predictedField': predicted_field}) def _convert_to_model_input(self, location): model_input_data = { 'location': '%s-%s' % (location[0], location[1]), 'cheese': self._maze.cheese[location] } return model_input_data def _choose_best_move(self, possible_moves): # use the imagination module to make predictions based on the # a range of possible moves self._logger.debug('from location %s, possible moves are %s' % (self.location, possible_moves)) def predict_closure(input): def predict(model_fork): model_fork.disableLearning() result = model_fork.run(input) return result return predict if len(possible_moves) == 1: # no imagination is necessary in this case because there is only one choice self._logger.debug('trivial decision, move forward') return possible_moves[0] # apply the function for each action funcs = [ predict_closure(self._convert_to_model_input(location)) for location in possible_moves ] results = self._imagination.imagine(funcs) # get the OPF result for each possible move predictions = \ [(location,r.inferences['multiStepPredictions']) for location,r in zip(possible_moves,results)] # evaluate the benefit associated with each possible move (i.e. the cost function) benefits = \ [(location,self._maze.cheese[location] + self._benefit_function(prediction)) for location,prediction in predictions] # finally, select the best prediction sorted_predictions = sorted(benefits, key=operator.itemgetter(1)) sorted_predictions.reverse() for l, p in sorted_predictions: self._logger.debug('evalation of move %s yields benefit %d' % (l, p)) return sorted_predictions[0][0] def _benefit_function(self, multi_step_predictions): min_p = self._config['min_probability'] # estimate how much cheese is predicted down the given path # this algorithm simply looks for the maximum potential cheese (with a probability threshold) # TODO replace with a better algorithm return max([ max([v for v, p in probs.items() if p >= min_p]) / step for step, probs in multi_step_predictions.items() ])
class SmartMouse(Mouse): def __init__(self, config, maze): Mouse.__init__(self, config, maze) # this mouse uses the CLA self._model_path = config['serialization']['path'] self._model_params = MODEL_PARAMS self._init_model() self._imagination = Imagination(self._model) def reset(self, initial_location, training_mode): Mouse.reset(self, initial_location, training_mode) self._previous_location = None # put the initial location into the model, # to establish an initial context for prediction self._model.resetSequenceStates() model_input_data = self._convert_to_model_input(initial_location) self._model.run(model_input_data) def move(self): self.before_move(self, self.location) possible_moves = self._maze.possible_moves(self.location) # avoid turning around whenever possible (i.e. be curious!) if not self._previous_location is None: assert self._previous_location in possible_moves if len(possible_moves) >= 2: possible_moves.remove(self._previous_location) else: # the mouse is turning around, use some "fresh thinking". self._model.resetSequenceStates() self._previous_location = self.location if self._training_mode: # exploration: randomly walk the maze to learn where the cheese is. self.location = random.sample(possible_moves, 1)[0] else: # exploitation: choose the best possible move best_location = self._choose_best_move(possible_moves) self.location = best_location # update the model with the action we've taken model_input_data = self._convert_to_model_input(self.location) self._model.run(model_input_data) self.after_move(self, self._previous_location, self.location) def _init_model(self): if os.path.exists(os.path.abspath(self._model_path)): self._model = ModelFactory.loadFromCheckpoint( os.path.relpath(self._model_path)) else: self._model = ModelFactory.create(self._model_params) predicted_field = self._model_params['predictedField'] if predicted_field: self._model.enableInference({'predictedField': predicted_field}) def _convert_to_model_input(self, location): model_input_data = { 'location': '%s-%s' % (location[0], location[1]), 'cheese': self._maze.cheese[location] } return model_input_data def _choose_best_move(self, possible_moves): # use the imagination module to make predictions based on the # a range of possible moves self._logger.debug('from location %s, possible moves are %s' % (self.location, possible_moves)) def predict_closure(input): def predict(model_fork): model_fork.disableLearning() result = model_fork.run(input) return result return predict if len(possible_moves) == 1: # no imagination is necessary in this case because there is only one choice self._logger.debug('trivial decision, move forward') return possible_moves[0] # apply the function for each action funcs = [predict_closure(self._convert_to_model_input(location)) for location in possible_moves] results = self._imagination.imagine(funcs) # get the OPF result for each possible move predictions = \ [(location,r.inferences['multiStepPredictions']) for location,r in zip(possible_moves,results)] # evaluate the benefit associated with each possible move (i.e. the cost function) benefits = \ [(location,self._maze.cheese[location] + self._benefit_function(prediction)) for location,prediction in predictions] # finally, select the best prediction sorted_predictions = sorted(benefits, key=operator.itemgetter(1)) sorted_predictions.reverse() for l,p in sorted_predictions: self._logger.debug('evalation of move %s yields benefit %d' % (l,p)) return sorted_predictions[0][0] def _benefit_function(self, multi_step_predictions): min_p = self._config['min_probability'] # estimate how much cheese is predicted down the given path # this algorithm simply looks for the maximum potential cheese (with a probability threshold) # TODO replace with a better algorithm return max([max([v for v,p in probs.items() if p>=min_p])/step for step, probs in multi_step_predictions.items()])