def test_clipping_per_neuron(self, ctrnn_config, brain_param_identity, box2d): ctrnn_config = evolve(ctrnn_config, optimize_state_boundaries="per_neuron") bp = evolve(brain_param_identity, clip_max=np.array([2, 3]), clip_min=np.array([-4, -5])) ContinuousTimeRNN.generate_and_set_class_state(config=ctrnn_config, input_space=box2d, output_space=box2d, ) brain = ContinuousTimeRNN(input_space=box2d, output_space=box2d, individual=self.param_to_genom(bp), config=ctrnn_config) ob = np.array([1, 1]) brain.step(ob * 100000) assert np.allclose(brain.y, bp.clip_max) brain.step(ob * -100000) assert np.allclose(brain.y, bp.clip_min)
def test_step(self, ctrnn_config, brain_param_identity, box2d): ctrnn_config = evolve(ctrnn_config, set_principle_diagonal_elements_of_W_negative=False) bp = brain_param_identity ContinuousTimeRNN.generate_and_set_class_state(config=ctrnn_config, input_space=box2d, output_space=Box(-1, 1, shape=[2]), ) brain = ContinuousTimeRNN(input_space=box2d, output_space=box2d, individual=self.param_to_genom(bp), config=ctrnn_config) brain.delta_t = 1.0 ob = np.array([1, 1]) assert np.allclose(brain.y, np.zeros([2, 2])) res = brain.step(ob) # due to identity matrices after one iteration the internal state is now exactly the observersion assert np.allclose(brain.y, ob) # due to identity matrices after one iteration the output is just the input, but with tanh. assert np.allclose(res, np.tanh(ob)) brain.step(ob) assert np.allclose(brain.y, np.tanh(ob) + ob + ob)
def test_clipping_legacy(self, ctrnn_config, brain_param_identity, box2d): bp = brain_param_identity ContinuousTimeRNN.generate_and_set_class_state(config=ctrnn_config, input_space=box2d, output_space=Box(-1, 1, shape=[2]), ) brain = ContinuousTimeRNN(input_space=box2d, output_space=box2d, individual=self.param_to_genom(bp), config=ctrnn_config) ob = np.array([1, 1]) res = brain.step(ob * 1000) # due to tanh the maximum output is 1.0 assert np.allclose(res, np.ones(2)) # with legacy-clipping everything is clipped to the lowest max-value, which is 10 in this genome assert np.allclose(brain.y, np.ones(2) * 10)
output_space=experiment.output_space, individual=ind, config=cfg_exp.brain) ob = env.reset() transformed_ob = transform(ob, coming_from_space=experiment.input_space, is_brain_input=True) env.unwrapped.input_data = [0, 1, 0, 1, 1] env.unwrapped.target = env.unwrapped.target_from_input_data( env.unwrapped.input_data) env.render() done = False fitness_current = 0 while not done: brain_output = brain.step(transformed_ob) action = transform(brain_output, coming_from_space=experiment.output_space, is_brain_input=False) print("act: " + str(action)) ob, rew, done, info = env.step(action) transformed_ob = transform(ob, coming_from_space=experiment.input_space, is_brain_input=True) fitness_current += rew if rew < 0: print("error") env.render() env.render() print('score: ' + str(fitness_current))
env = experiment.env_template for i in range(1): brain = ContinuousTimeRNN(input_space=experiment.input_space, output_space=experiment.output_space, individual=ind, config=cfg_exp.brain) ob = env.reset() env.unwrapped.input_data = [0, 1, 0, 1, 1] env.unwrapped.target = env.unwrapped.target_from_input_data( env.unwrapped.input_data) env.render() done = False fitness_current = 0 while not done: brain_output = brain.step(ob) action = output_to_action(brain_output, experiment.output_space) print("act: " + str(action)) ob, rew, done, info = env.step(action) fitness_current += rew if rew < 0: print("error") env.render() env.render() print('score: ' + str(fitness_current)) # todo: use brain_vis to visualize this t = threading.Thread(target=experiment.visualize, args=[[ind], BrainVisualizerHandler(), 2, False, False])