def __init__(self): """ @brief: Setting up internal parameters for the RL module""" # Navigation Task self._environment = NavigationEnvironment() self._task = NavigationTask(self._environment) # Number of States : (read from params.py) self._states = STATES self._state_limits = LIMITS # Total number of states: self._number_of_states = 1 for i in self._states: self._number_of_states *= i # Number of actions self._actions = ACTION_STATES self._action_limits = ACTION_RANGE # Action Value Table directory self.tables_directory = os.path.dirname(__file__) + "/tables/" self.table_code = "S"+str(self._number_of_states)+"_"+"A"+str(self._actions) self._filename = FILENAME + self.table_code # Action Value Table setup self.load_AV_Table() # Declare ROS Service to store Action Value Table store_service = rospy.Service('store_table', StoreAVTable, self.store_cb) # Set up task parameters: self._task.set_params(COMMAND_DURATION, FUSION_WEIGHTS, TIME_GRANULARITY, self._state_limits, MAX_REWARD, COST_THRESHOLD) # Agent set up self._learner = SARSA(alpha,gamma) self._learner._setExplorer(EpsilonGreedyExplorer(epsilon)) self._agent = LearningAgent(self._av_table, self._learner) # Experiment set up self._experiment = Experiment(self._task,self._agent) self._experiment.set_params(STEP_SIZE) # Start print table thread if VISUALIZATION is True: try: #thread.start_new_thread(self.print_table,()) self.visualization_thread = Thread(target = self.print_table, args = () ) self.visualization_thread.start() except: print "Failed to start visualization thread!" print "Successfully Initialization of RL module! (kappa)"
def __init__(self): self.av_table = ActionValueTable(2, 3) self.av_table.initialize(0.1) learner = SARSA() learner._setExplorer(EpsilonGreedyExplorer(0.0)) self.agent = LearningAgent(self.av_table, learner) env = HASSHEnv() task = HASSHTask(env) self.experiment = Experiment(task, self.agent)
def __init__(self): self.av_table = ActionValueTable(4, 5) self.av_table.initialize(0.1) learner = SARSA() learner._setExplorer(EpsilonGreedyExplorer(0.0)) self.agent = LearningAgent(self.av_table, learner) env = HASSHEnv() task = HASSHTask(env) self.experiment = Experiment(task, self.agent)
def setup_RL(): # create the maze with walls (1) envmatrix = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 0, 0, 1, 0, 0, 0, 0, 1], [1, 0, 0, 1, 0, 0, 1, 0, 1], [1, 0, 0, 1, 0, 0, 1, 0, 1], [1, 0, 0, 1, 0, 1, 1, 0, 1], [1, 0, 0, 0, 0, 0, 1, 0, 1], [1, 1, 1, 1, 1, 1, 1, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1]]) env = Maze(envmatrix, (7, 7)) # create task task = MDPMazeTask(env) # create value table and initialize with ones table = ActionValueTable(81, 4) table.initialize(0.) # create agent with controller and learner - use SARSA(), Q() or QLambda() here # learner = Q() learner = SARSA() # create agent agent = LearningAgent(table, learner) # create experiment experiment = Experiment(task, agent) return experiment, agent, table
def createAgent(module): # create agent with controller and learner - use SARSA(), Q() or QLambda() here ## alpha -- learning rate (preference of new information) ## gamma -- discount factor (importance of future reward) # learner = Q(0.5, 0.99) learner = SARSA(0.5, 0.99) # learner = QLambda(0.5, 0.99, 0.9) agent = LearningAgent(module, learner) return agent
def createAgent(module): ### create agent with controller and learner - use SARSA(), Q() or QLambda() here ## alpha -- learning rate (preference of new information -- update value factor) ## gamma -- discount factor (importance of future reward -- next value factor) learner = SARSA(alpha=0.3, gamma=0.99) # learner = Q(alpha=0.3, gamma=0.99) explorer = learner.explorer explorer.epsilon = 0.4 explorer.decay = 0.9999 agent = LearningAgent(module, learner) return agent
def testValueBased(self): """ Test value-based learner. """ mkt = SmartMarket(self.case) exp = MarketExperiment([], [], mkt) for g in self.case.generators: env = DiscreteMarketEnvironment([g], mkt) dim_state, num_actions = (10, 10) exp.tasks.append(ProfitTask(env, dim_state, num_actions)) module = ActionValueTable(dim_state, num_actions) module.initialize(1.0) # module = ActionValueNetwork(dimState=1, numActions=4) learner = SARSA() #Q() QLambda() # learner.explorer = BoltzmannExplorer() # default is e-greedy. exp.agents.append(LearningAgent(module, learner)) for _ in range(1000): exp.doInteractions(24) # interact with the env in batch mode for agent in exp.agents: agent.learn() agent.reset()
warnings.filterwarnings("ignore") # create the maze with walls (1) envmatrix = array([[1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 0, 0, 1, 0, 0, 0, 0, 1], [1, 0, 0, 1, 0, 0, 1, 0, 1], [1, 0, 0, 1, 0, 0, 1, 0, 1], [1, 0, 0, 1, 0, 1, 1, 0, 1], [1, 0, 0, 0, 0, 0, 1, 0, 1], [1, 1, 1, 1, 1, 1, 1, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1]]) env = Maze(envmatrix, (7, 7)) # create task task = MDPMazeTask(env) # create value table and initialize with ones table = ActionValueTable(81, 4) table.initialize(1.) # create agent with controller and learner - use SARSA(), Q() or QLambda() here # learner = Q() learner = SARSA() # standard exploration is e-greedy, but a different type can be chosen as well # learner.explorer = BoltzmannExplorer() # create agent agent = LearningAgent(table, learner) # create experiment # experiment = Experiment(task, agent) experiment = EpisodicExperiment(task, agent) # prepare plotting pylab.gray() pylab.ion() for i in range(50): # interact with the environment (here in batch mode) experiment.doInteractions(100) agent.learn() agent.reset()
task = GymTask.createTask(gymRawEnv) env = task.env env.setTransformation(transformation) env.setCumulativeRewardMode() # create value table and initialize with ones table = ActionValueTable(observationDigitizer.states, env.numActions) table.initialize(0.0) # table.initialize( np.random.rand( table.paramdim ) ) # create agent with controller and learner - use SARSA(), Q() or QLambda() here ## alpha -- learning rate (preference of new information) ## gamma -- discount factor (importance of future reward) # learner = Q(0.5, 0.99) learner = SARSA(0.5, 0.99) # learner = QLambda(0.5, 0.99, 0.9) explorer = learner.explorer explorer.decay = 0.999992 agent = LearningAgent(table, learner) experiment = Experiment(task, agent) ## prevents "ImportError: sys.meta_path is None, Python is likely shutting down" atexit.register(task.close) render_demo = False render_steps = False imax = 7000 period_print = 100
gymRawEnv = gym.make('FrozenLake-v0') task = GymTask.createTask(gymRawEnv) task.env.setTransformation(EnvTransformation()) # create value table and initialize with ones table = ActionValueTable(gymRawEnv.observation_space.n, gymRawEnv.action_space.n) table.initialize(0.0) # table.initialize( np.random.rand( table.paramdim ) ) ### create agent with controller and learner - use SARSA(), Q() or QLambda() here ## alpha -- learning rate (preference of new information -- update value factor) ## gamma -- discount factor (importance of future reward -- next value factor) learner = SARSA(alpha=0.3, gamma=0.99) # learner = Q(alpha=0.3, gamma=0.99) explorer = learner.explorer explorer.epsilon = 0.4 explorer.decay = 0.9999 agent = LearningAgent(table, learner) experiment = Experiment(task, agent) render_steps = False imax = 5000 # prepare plotting if render_steps: pylab.gray()
class KinodynamicController(object): """ @brief: A class to handle interactions between various components of the RL module""" def __init__(self): """ @brief: Setting up internal parameters for the RL module""" # Navigation Task self._environment = NavigationEnvironment() self._task = NavigationTask(self._environment) # Number of States : (read from params.py) self._states = STATES self._state_limits = LIMITS # Total number of states: self._number_of_states = 1 for i in self._states: self._number_of_states *= i # Number of actions self._actions = ACTION_STATES self._action_limits = ACTION_RANGE # Action Value Table directory self.tables_directory = os.path.dirname(__file__) + "/tables/" self.table_code = "S"+str(self._number_of_states)+"_"+"A"+str(self._actions) self._filename = FILENAME + self.table_code # Action Value Table setup self.load_AV_Table() # Declare ROS Service to store Action Value Table store_service = rospy.Service('store_table', StoreAVTable, self.store_cb) # Set up task parameters: self._task.set_params(COMMAND_DURATION, FUSION_WEIGHTS, TIME_GRANULARITY, self._state_limits, MAX_REWARD, COST_THRESHOLD) # Agent set up self._learner = SARSA(alpha,gamma) self._learner._setExplorer(EpsilonGreedyExplorer(epsilon)) self._agent = LearningAgent(self._av_table, self._learner) # Experiment set up self._experiment = Experiment(self._task,self._agent) self._experiment.set_params(STEP_SIZE) # Start print table thread if VISUALIZATION is True: try: #thread.start_new_thread(self.print_table,()) self.visualization_thread = Thread(target = self.print_table, args = () ) self.visualization_thread.start() except: print "Failed to start visualization thread!" print "Successfully Initialization of RL module! (kappa)" def store_cb(self,req): storeData(self._av_table,self._filename) print "Saved AV Table" return True def load_AV_Table(self): load_D = loadData(self._filename) if load_D[1] == True: self._av_table = load_D[0] print "Found Table!" else: self._av_table = ActionValueTable(self._number_of_states, self._actions) self._av_table.initialize(0.0) print "No training for this format. Creating new AV table" def print_table(self): """ @brief: Visual Representation of Action Value Table @return: nothing """ matplotlib.pyplot.ion() while True: data = self._av_table.params.reshape(self._number_of_states,self._actions) matplotlib.pyplot.pcolor(data, cmap = matplotlib.pyplot.cm.RdYlGn, vmin = -MAX_REWARD, vmax = MAX_REWARD) matplotlib.pyplot.draw() rospy.sleep(2) def __del__(self): # Terminate visualization thread if VISUALIZATION is True: self.visualization_thread.join() # Copy learned data to repo if add_to_repo(): print "Copied data to pandora_motion_control" else: pass