def _ivariants_sorted(self, sorted_keys): if len(sorted_keys) == 0: yield dict() else: first_keys = sorted_keys[:-1] first_variants = self._ivariants_sorted(first_keys) last_key = sorted_keys[-1] last_vals = [v for k, v, _ in self._variants if k == last_key][0] if hasattr(last_vals, "__call__"): last_val_keys = inspect.getargspec(last_vals).args if hasattr(last_vals, 'im_self') or hasattr( last_vals, '__self__'): last_val_keys = last_val_keys[1:] else: last_val_keys = None for variant in first_variants: if hasattr(last_vals, "__call__"): last_variants = last_vals( **{k: variant[k] for k in last_val_keys}) for last_choice in last_variants: yield AttrDict(variant, **{last_key: last_choice}) else: for last_choice in last_vals: yield AttrDict(variant, **{last_key: last_choice})
return samples def get_action(self, t, observation, policy, **kwargs): #applying MC Dropout and taking the mean action? action, _ = policy.get_action(observation) mc_dropout = 10 all_actions = np.zeros(shape=(mc_dropout, action.shape[0])) for d in range(mc_dropout): action, _ = policy.get_action(observation) all_actions[d, :] = action mean_action = np.mean(all_actions, axis=0) return mean_action if __name__ == "__main__": ou = MCDropout( env_spec=AttrDict(action_space=Box(low=-1, high=1, shape=(1, ))), mu=0, theta=0.15, sigma=0.3) states = [] for i in range(1000): states.append(ou.evolve_state()[0]) import matplotlib.pyplot as plt plt.plot(states) plt.show()