예제 #1
0
    def evaluate_fitness(self, params):
        """
        Evaluates fitness of specified individual.
        :return: Fitness of the individual
        """
        network_weights = params[0]
        seed = params[1]
        network = Network(self._input_shape, self._output_shape, seed, nn_width=self.nn_width, initializer="zeros")
        network.set_weights(network_weights)

        gym = GymEnvironment(self._env_name, seed=seed)
        state, done = gym.reset(), False

        # start_time = time()

        step = 0
        equal_steps = 0
        rewards = []
        while not done:
            if self._render_each and step % self._render_each == 0:
                gym.render()

            state = np.expand_dims(state, 0)
            action = network(state).numpy()[0]
            next_state, reward, done, _ = gym.step(action)
            if self._min_equal_steps > 0:
                if np.allclose(state, next_state):
                    equal_steps += 1
                else:
                    equal_steps = 0
            rewards.append(reward)

            state = next_state
            step += 1

            if step >= self._max_episode_len:
                done = True
            elif self._min_equal_steps > 0 and equal_steps >= self._min_equal_steps:
                done = True
                # add expected reward if we waited till the episode would end
                rewards.append((self._max_episode_len - step) * np.mean(rewards[-self._min_equal_steps:]))
        # print(f"Total steps {step}: {time() - start_time:.4f}")

        total_reward = np.sum(rewards)

        return total_reward
예제 #2
0
            self.SPMe.__init__(init_soc=self.SOC_0)

            # self.state_of_charge = (np.random.uniform(.65, .99, 1)).item()
            # self.SPMe.__init__(init_soc=self.state_of_charge)

        # self.state_of_charge = self.SOC_0
        # self.SPMe.__init__(init_soc=self.SOC_0)

        self.sim_state = self.SPMe.full_init_state

        [
            bat_states, new_sen_states, outputs, sensitivity_outputs, soc_new,
            V_term, theta, docv_dCse, done
        ] = self.SPMe.SPMe_step(full_sim=True,
                                states=self.sim_state,
                                I_input=0)

        self.sim_state = [bat_states, new_sen_states]
        self.state = self.unpack_states(bat_states, new_sen_states, outputs,
                                        sensitivity_outputs)

        self.steps_beyond_done = None
        return np.array(self.state)


if __name__ == '__main__':

    gym = SPMenv()

    gym.reset()