self.model.step(action) if self.iteration_count >= 200: terminal = True else: terminal = star.terminal(self.model.state) reward = star.reward(self.model.state, terminal) brain_state = star.state(self.model.state) return (brain_state, reward, terminal) if __name__ == "__main__": config = bonsai_ai.Config(sys.argv) brain = bonsai_ai.Brain(config) model = CartPole() sim = CartpoleSimulator(brain, 'CartpoleSimulator', config) sim.model = model render = None if '--render' in sys.argv: log.info('rendering') from render import Viewer render = True viewer = Viewer() viewer.model = model log.info('starting simulation...')
def simulate(self, action): print("simulate") state = { "test_number": 0, "test_double": 1, "test_float": 2, "test_int64": 3, "test_int32": 4, "test_uint32": 5, "test_uint32": 6, #"test_bool": True, #"test_string": 0, } terminal = self.iteration_count > 5 reward = 1.0 return (state, reward, terminal) def episode_finish(self): print("episode_finish") sys.exit() if __name__ == "__main__": config = bonsai_ai.Config() brain = bonsai_ai.Brain(config) sim = BasicSimulator(brain, "the_simulator") while sim.run(): continue
return { "currentState": state, "comfort": self.simulation.comfort, "lastAction": self.simulation.lastAction } def _shape_reward(self): """ Return a reward for approaching the target. Max 1, min -2. """ return 0 def reward_shaped(self): """Reward for approaching target""" return self.simulation.get_subjects_comfort() def _get_numeric_state_from(self, state): if state not in self._statesMap: self._statesMap[state] = len(self._statesMap) return self._statesMap[state] if __name__ == "__main__": logger.info('starting...') config = bsa.Config(sys.argv) brain = bsa.Brain(config) sim = InteractiveHomeBridge(brain, "home_sim") while sim.run(): continue