Python FeatureProxy.FeatureProxy Exemples

Langage de programmation: Python

Espace de nommage/Pack: baselines.feature_proxy

Class/Type: FeatureProxy

Méthode/Fonction: FeatureProxy

Exemples au hotexamples.com: 2

Python FeatureProxy.FeatureProxy - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de baselines.feature_proxy.FeatureProxy.FeatureProxy extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Méthodes fréquemment utilisées

Afficher Cacher

FeatureProxy(2)

add_state_features(2)

get_num_features(2)

Méthodes fréquemment utilisées

FeatureProxy (2)

add_state_features (2)

get_num_features (2)

Exemple #1

0

Afficher le fichier

def __init__(self, sess, policy_name, learning_params, curriculum, num_features, num_states, num_actions): # initialize attributes self.sess = sess self.learning_params = learning_params self.use_double_dqn = learning_params.use_double_dqn self.use_priority = learning_params.prioritized_replay self.policy_name = policy_name self.tabular_case = learning_params.tabular_case # This proxy adds the machine state representation to the MDP state self.feature_proxy = FeatureProxy(num_features, num_states, self.tabular_case) self.num_actions = num_actions self.num_features = self.feature_proxy.get_num_features() # create dqn network self._create_network(learning_params.lr, learning_params.gamma, learning_params.num_neurons, learning_params.num_hidden_layers) # create experience replay buffer if self.use_priority: self.replay_buffer = PrioritizedReplayBuffer( learning_params.buffer_size, alpha=learning_params.prioritized_replay_alpha) if learning_params.prioritized_replay_beta_iters is None: learning_params.prioritized_replay_beta_iters = curriculum.total_steps self.beta_schedule = LinearSchedule( learning_params.prioritized_replay_beta_iters, initial_p=learning_params.prioritized_replay_beta0, final_p=1.0) else: self.replay_buffer = ReplayBuffer(learning_params.buffer_size) self.beta_schedule = None # count of the number of environmental steps self.step = 0

Exemple #2

0

Afficher le fichier

Fichier : hrl.py Projet : david00710/Iterative-automata-learning-for-improving-reinforcement-learning

def __init__(self, sess, policy_name, options, option2file, rm, use_rm, learning_params, num_features, num_states, show_print, epsilon=0.1): self.show_print = show_print self.options = options self.option2file = option2file self.epsilon = epsilon self.gamma = learning_params.gamma self.rm = rm self.use_rm = use_rm self.tabular_case = learning_params.tabular_case # This proxy adds the machine state representation to the MDP state self.feature_proxy = FeatureProxy(num_features, num_states, self.tabular_case) self.num_actions = len(options) self.num_features = self.feature_proxy.get_num_features() # network parameters num_hidden_layers = 2 # this has no effect on the tabular case num_neurons = 64 # this has no effect on the tabular case self.target_network_update_freq = 100 # this has no effect on the tabular case if self.tabular_case: lr = 0.7 buffer_size = 1 self.batch_size = 1 self.learning_starts = 0 else: lr = 1e-3 buffer_size = 50000 self.batch_size = 32 self.learning_starts = 100 # create dqn network self.neuralnet = MCNet(sess, self.num_actions, self.num_features, policy_name, self.tabular_case, learning_params.use_double_dqn, lr, num_neurons, num_hidden_layers) # create experience replay buffer self.er_buffer = MCReplayBuffer(buffer_size) self.step = 0 # preprocessing action masks (for pruning useless options) self.mask = {} for u in self.rm.get_states(): a_mask = np.ones(self.num_actions, dtype=np.float) if use_rm and not self.rm.is_terminal_state(u): a_mask = np.zeros(self.num_actions, dtype=np.float) # Options that would move the RM to another state is useful useful_options = self.rm.get_useful_transitions(u) # looking for an exact match for i in range(self.num_actions): if _is_match(option2file[i].split("&"), useful_options, True): a_mask[i] = 1 # if no exact match is found, we relax this condition and use any option that might be useful if np.sum(a_mask) < 1: a_mask = np.zeros(self.num_actions, dtype=np.float) for i in range(self.num_actions): if _is_match(option2file[i].split("&"), useful_options, False): a_mask[i] = 1 self.mask[u] = a_mask