Python make_subgoal_options Exemples

Langage de programmation: Python

Espace de nommage/Pack: simple_rl.abstraction.aa_helpers

Méthode/Fonction: make_subgoal_options

Exemples au hotexamples.com: 2

Python make_subgoal_options - 2 exemples trouvés. Ce sont les exemples réels les mieux notés de simple_rl.abstraction.aa_helpers.make_subgoal_options extraits de projets open source. Vous pouvez noter les exemples pour nous aider à en améliorer la qualité.

Exemple #1

0

Afficher le fichier

def build_subgoal_option_agent(mdp, subgoals, init_region, agent=QLearningAgent, vectors=None, name='-abstr', n_trajs=50, n_steps=100, classifier='list', policy='vi'): # print('sbugoals=', subgoals) goal_based_options = aa_helpers.make_subgoal_options(mdp, subgoals, init_region, vectors=vectors, n_trajs=n_trajs, n_steps=n_steps, classifier=classifier, policy=policy) goal_based_aa = ActionAbstraction(prim_actions=mdp.get_actions(), options=goal_based_options, use_prims=True) # num_feats = mdp.get_num_state_feats() option_agent = AbstractionWrapper( agent, agent_params={"actions": mdp.get_actions()}, action_abstr=goal_based_aa, name_ext=name) return option_agent

Exemple #2

0

Afficher le fichier

Fichier : OnlineAbstractionWrapperClass.py Projet : xczhuusetc/Optimal-Options-ICML-2019

def generate_options(self): # TODO: Train the policy using the experience replay buffer instead of samling new trajectories. A, intToS = self.generate_matrix() known_region = list(intToS.values()) if self.method == 'eigen': # TODO: how is A represented? # print('matrix= ', A) _, options, vectors = Eigenoptions(A, self.n_ops) elif self.method == 'fiedler': _, options, _, vectors = FiedlerOptions(A, self.n_ops) elif self.method == 'bet': _, options, vectors = BetweennessOptions(A, self.n_ops) else: assert (False) print('generated options: ') for i, o in enumerate(options): if type(o[0]) is list: print('inits:') for ss in o[0]: print(intToS[ss]) print('goals:') for ss in o[1]: print(intToS[ss]) else: print('init:', intToS[o[0]]) print('goal:', intToS[o[1]]) egoal_list = [[]] * (len(options) * 2) for i, o in enumerate(options): if type(o[0]) is list: for ss in o[0]: egoal_list[i * 2].append(intToS[ss]) for ss in o[1]: egoal_list[i * 2 + 1].append(intToS[ss]) else: egoal_list[i * 2] = [intToS[o[0]]] egoal_list[i * 2 + 1] = [intToS[o[1]]] evector_list = [dict()] * (len(options) * 2) for i, o in enumerate(options): for j in intToS.keys(): # print('hash(', j, ')=', hash(intToS[j])) # print('s[j]=', intToS[j]) # for i in intToS[j].data.flatten(): # if i > 0: # print(i) evector_list[i * 2][hash(intToS[j])] = -vectors[i][j] evector_list[i * 2 + 1][hash(intToS[j])] = vectors[i][j] # TODO: policy is computed using vi right now. goal_options = aa_helpers.make_subgoal_options( self.mdp, egoal_list, known_region, vectors=evector_list, n_trajs=self.op_n_episodes, n_steps=self.op_n_steps, classifier='list', policy='vi') return goal_options