def evaluate_fitness(self, params): """ Evaluates fitness of specified individual. :return: Fitness of the individual """ network_weights = params[0] seed = params[1] network = Network(self._input_shape, self._output_shape, seed, nn_width=self.nn_width, initializer="zeros") network.set_weights(network_weights) gym = GymEnvironment(self._env_name, seed=seed) state, done = gym.reset(), False # start_time = time() step = 0 equal_steps = 0 rewards = [] while not done: if self._render_each and step % self._render_each == 0: gym.render() state = np.expand_dims(state, 0) action = network(state).numpy()[0] next_state, reward, done, _ = gym.step(action) if self._min_equal_steps > 0: if np.allclose(state, next_state): equal_steps += 1 else: equal_steps = 0 rewards.append(reward) state = next_state step += 1 if step >= self._max_episode_len: done = True elif self._min_equal_steps > 0 and equal_steps >= self._min_equal_steps: done = True # add expected reward if we waited till the episode would end rewards.append((self._max_episode_len - step) * np.mean(rewards[-self._min_equal_steps:])) # print(f"Total steps {step}: {time() - start_time:.4f}") total_reward = np.sum(rewards) return total_reward
self.SPMe.__init__(init_soc=self.SOC_0) # self.state_of_charge = (np.random.uniform(.65, .99, 1)).item() # self.SPMe.__init__(init_soc=self.state_of_charge) # self.state_of_charge = self.SOC_0 # self.SPMe.__init__(init_soc=self.SOC_0) self.sim_state = self.SPMe.full_init_state [ bat_states, new_sen_states, outputs, sensitivity_outputs, soc_new, V_term, theta, docv_dCse, done ] = self.SPMe.SPMe_step(full_sim=True, states=self.sim_state, I_input=0) self.sim_state = [bat_states, new_sen_states] self.state = self.unpack_states(bat_states, new_sen_states, outputs, sensitivity_outputs) self.steps_beyond_done = None return np.array(self.state) if __name__ == '__main__': gym = SPMenv() gym.reset()