def learn_policy(self): # Initialize Q-learner. qlearner = QLearner( \ self.state_space, self.actions, self.handle_action, self.reset_training_world ) # Initialize goal states. goal_states = [] print "Enumerating goal states..." print self.state_space_dim for state_index in xrange(qlearner.r_table.size): state = numpy.unravel_index(state_index, qlearner.r_table.shape) if state[FullTransform.StateOffset.Arrows] == World.ArrowState.Arrows_Complete: goal_states.append(tuple(state)) print "Goal states: %d" % len(goal_states) for goal_state in goal_states: qlearner.set_r_value( goal_state, 100 ) #print qlearner.r_table # Run Q-learner. print "Total states: %d" % (qlearner.r_table.size) qlearner.execute(goal_states, 500000, 50) # Return policy. return qlearner.get_policy()
def learn_policy(self): # Initialize Q-learner. qlearner = QLearner(self.state_space, self.actions, self.handle_action, self.reset_training_world) # Initialize reward states. goal_states = [(PositionTransform.HorizontalState.At + 1, PositionTransform.VerticleState.At + 1)] for goal_state in goal_states: qlearner.set_r_value(goal_state, 100) # print qlearner.r_table # Run Q-learner. qlearner.execute(goal_states, 300, 50) # Return policy. return qlearner.get_policy()
def learn_policy(self): # Initialize Q-learner. qlearner = QLearner( \ self.state_space, self.actions, self.handle_action, self.reset_training_world ) # Initialize reward states. goal_states = [( self.state_space[0].index(World.SiteState.Useless), )] for goal_state in goal_states: qlearner.set_r_value( goal_state, 100 ) #print qlearner.r_table # Run Q-learner. qlearner.execute(goal_states, 300, 30) # Return policy. return qlearner.get_policy()