def __init__(self, n_threads=4, initial_port=19997, q_table_version=0, batch_size=None, learner=None, explorer=None): self.barrier = Barrier(n_threads + 1, timeout=720) self.n_threads = n_threads self.initial_port = initial_port self.batch_size = batch_size self.controller = MyActionValueTable(q_table_version) if learner is None: self.learner = Q(0.5, 0.9) else: self.learner = learner if explorer is None: self.explorer = self.learner.explorer = EpsilonGreedyExplorer(0.2, 0.998) else: self.explorer = self.learner.explorer = explorer self.agent = LearningAgent(self.controller, self.learner) # Logger initialization self.logger = logging.getLogger('master_logger') self.logger.setLevel(logging.DEBUG) self.logger.addHandler(logging.FileHandler(Utils.DATA_PATH + 'learning-tables/master.log')) self.failed_simulations = [] self.n_episodes = 0 self.simulations = [] self.initialize_simulations()
def __init__(self): """ @brief: Setting up internal parameters for the RL module""" # Navigation Task self._environment = NavigationEnvironment() self._task = NavigationTask(self._environment) # Number of States : (read from params.py) self._states = STATES self._state_limits = LIMITS # Total number of states: self._number_of_states = 1 for i in self._states: self._number_of_states *= i # Number of actions self._actions = ACTION_STATES self._action_limits = ACTION_RANGE # Action Value Table directory self.tables_directory = os.path.dirname(__file__) + "/tables/" self.table_code = "S"+str(self._number_of_states)+"_"+"A"+str(self._actions) self._filename = FILENAME + self.table_code # Action Value Table setup self.load_AV_Table() # Declare ROS Service to store Action Value Table store_service = rospy.Service('store_table', StoreAVTable, self.store_cb) # Set up task parameters: self._task.set_params(COMMAND_DURATION, FUSION_WEIGHTS, TIME_GRANULARITY, self._state_limits, MAX_REWARD, COST_THRESHOLD) # Agent set up self._learner = SARSA(alpha,gamma) self._learner._setExplorer(EpsilonGreedyExplorer(epsilon)) self._agent = LearningAgent(self._av_table, self._learner) # Experiment set up self._experiment = Experiment(self._task,self._agent) self._experiment.set_params(STEP_SIZE) # Start print table thread if VISUALIZATION is True: try: #thread.start_new_thread(self.print_table,()) self.visualization_thread = Thread(target = self.print_table, args = () ) self.visualization_thread.start() except: print "Failed to start visualization thread!" print "Successfully Initialization of RL module! (kappa)"
def __init__(self): self.av_table = ActionValueTable(2, 3) self.av_table.initialize(0.1) learner = SARSA() learner._setExplorer(EpsilonGreedyExplorer(0.0)) self.agent = LearningAgent(self.av_table, learner) env = HASSHEnv() task = HASSHTask(env) self.experiment = Experiment(task, self.agent)
def initExperiment(learnalg='Q', history=None, binEdges='10s', scriptfile='./rlRunExperiment_v2.pl', resetscript='./rlResetExperiment.pl'): if binEdges == '10s': centerBinEdges = centerBinEdges_10s elif binEdges == '30s': centerBinEdges = centerBinEdges_30s elif binEdges == 'lessperturbed': centerBinEdges = centerBinEdges_10s_lessperturbed elif binEdges is None: centerBinEdges = None else: raise Exception("No bins for given binEdges setting") env = OmnetEnvironment(centerBinEdges, scriptfile, resetscript) if history is not None: env.data = history['data'] task = OmnetTask(env, centerBinEdges) if history is not None: task.allrewards = history['rewards'] if learnalg == 'Q': nstates = env.numSensorBins**env.numSensors if history is None: av_table = ActionValueTable(nstates, env.numActions) av_table.initialize(1.) else: av_table = history['av_table'] learner = Q(0.1, 0.9) # alpha, gamma learner._setExplorer(EpsilonGreedyExplorer(0.05)) # epsilon elif learnalg == 'NFQ': av_table = ActionValueNetwork(env.numSensors, env.numActions) learner = NFQ() else: raise Exception("learnalg unknown") agent = LearningAgent(av_table, learner) experiment = Experiment(task, agent) if history is None: experiment.nruns = 0 else: experiment.nruns = history['nruns'] return experiment
def main(): try: n_threads = 4 initial_port = 8000 q_table_version = 0 batch_size = 10 # explorer = EpsilonGreedyBoltzmannExplorer(0.2, 5, 0.998) explorer = EpsilonGreedyExplorer(0.15, 0.999) master = SimulationMaster(n_threads, initial_port, q_table_version, batch_size, explorer=explorer) master.run() except (KeyboardInterrupt, SystemExit): with open('data/learning-tables/standing-up-q.pkl', 'wb') as handle: pickle.dump(master.save_q_table(), handle) master.save_t_table() master.save_t_table() vrep.simxFinish(-1)
def run(): """ number of states is: current value: 0-20 number of actions: Stand=0, Hit=1 """ # define action value table av_table = ActionValueTable(MAX_VAL, MIN_VAL) av_table.initialize(0.) # define Q-learning agent q_learner = Q(Q_ALPHA, Q_GAMMA) q_learner._setExplorer(EpsilonGreedyExplorer(0.0)) agent = LearningAgent(av_table, q_learner) # define the environment env = BlackjackEnv() # define the task task = BlackjackTask(env, verbosity=VERBOSE) # finally, define experiment experiment = Experiment(task, agent) # ready to go, start the process for _ in range(NB_ITERATION): experiment.doInteractions(1) if task.lastreward != 0: if VERBOSE: print "Agent learn" agent.learn() print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|' print '|:-------:|:-------|:-----|:-----|' for i in range(MAX_VAL): print '| %s | %s | %s | %s |' % ( (i + 1), av_table.getActionValues(i)[0], av_table.getActionValues(i)[1], av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1])
def main(): client_id = Utils.connectToVREP() # Define RL elements environment = StandingUpEnvironment(client_id) task = StandingUpTask(environment) controller = MyActionValueTable() learner = Q(0.5, 0.9) learner.explorer = EpsilonGreedyExplorer(0.15, 1) # EpsilonGreedyBoltzmannExplorer() agent = LearningAgent(controller, learner) experiment = EpisodicExperiment(task, agent) controller.initialize(agent) i = 0 try: while True: i += 1 print('Episode ' + str(i)) experiment.doEpisodes() agent.learn() agent.reset() print('mean: '+str(numpy.mean(controller.params))) print('max: '+str(numpy.max(controller.params))) print('min: '+str(numpy.min(controller.params))) if i % 500 == 0: # Save q-table every 500 episodes print('Save q-table') controller.save() task.t_table.save() except (KeyboardInterrupt, SystemExit): with open('../data/standing-up-q.pkl', 'wb') as handle: pickle.dump(controller.params, handle) task.t_table.save() controller.save() vrep.simxFinish(client_id)
def __init__(self, text_to_speech, speech_to_text): Feature.__init__(self) # setup AV Table self.av_table = GameTable(13, 2) if (self.av_table.loadParameters() == False): self.av_table.initialize(0.) # setup a Q-Learning agent learner = Q(0.5, 0.0) learner._setExplorer(EpsilonGreedyExplorer(0.0)) self.agent = LearningAgent(self.av_table, learner) # setup game interaction self.game_interaction = GameInteraction(text_to_speech, speech_to_text) # setup environment environment = GameEnvironment(self.game_interaction) # setup task task = GameTask(environment, self.game_interaction) # setup experiment self.experiment = Experiment(task, self.agent)
from runPacman import RunPacman from ghost import Ghost from pacmanEnvironment import Environment ############################################################### # The main function that begins running our Pacman-In-AI game # ############################################################### if __name__ == "__main__": # Initialize our Action-Environment-Reward Table controller = ActionValueTable(196, 4) controller.initialize(0.) # Initialize Reinforcement Learning learner = Q(0.5, 0.0) learner._setExplorer(EpsilonGreedyExplorer(0.0)) agent = LearningAgent(controller, learner) # Setup the PyBrain and PyGame Environments environment = Environment() game = RunPacman(environment) # Create the Task for the Pac-Man Agent to Accomplish and initialize the first Action task = PacmanTask(environment, game) task.performAction(np.array([1])) # The Experiment is the PyBrain link between the task to be completed and the agent completing it experiment = Experiment(task, agent) currentGame = 1 # Continue to loop program until the 'X' on the GUI is clicked
def runMainProg(): # define Q-learning agents with different attributes to see # which one will come out as the better player. # It would be possible to loop through the number of players N # and create them, however they would all have to be the same # or other code would have to be added to dynamically create # thier attributes. learner = [] learner.append(Q(0.9, 0.0)) learner[0]._setExplorer(EpsilonGreedyExplorer(0., 0.5)) learner.append(Q(0.5, 0.5)) learner[1]._setExplorer(EpsilonGreedyExplorer(0.29, 0.)) learner.append(Q(0.1, 0.5)) learner[2]._setExplorer(EpsilonGreedyExplorer(0.29, 0.5)) learner.append(Q(0.5, 0.0)) learner[3]._setExplorer(DiscreteStateDependentExplorer(0., 0.5)) learner.append(Q(0.5, 0.5)) learner[4]._setExplorer(DiscreteStateDependentExplorer(0.29, 0.5)) #define a blackjack deck theDeck = BlackjackCardDeck() #define action value table, agent, task, and environment arrays av_table = [] agent = [] env = [] task = [] #Loop through the number of players, and set up the action value table, #associated agent, environment, and task, so they can play the game for i in range(0, N): av_table.append(ActionValueTable(22, 2)) av_table[i].initialize(0.) agent.append(LearningAgent(av_table[i], learner[i])) env.append(BlackjackEnv(theDeck)) env[i].createHand() task.append(BlackjackTask(env[i])) #define a Dealer dealer = BlackjackDealer(theDeck) #run the game for a total of 1000 games. This value can be changed. for i in range(0, 1000): #This is the function that plays the game. The code for it is below. playGame(dealer, task, env, agent) #All of the games have been played, and now the results of the games played, #games won, tied, and lost are displayed. for i in range(0, N): print "Games Player ", i + 1, " Won Against The Dealer: ", GamesAgentWon[ i] print "Games Player ", i + 1, " Lost Against The Dealer: ", TotalGames - GamesTied[ i] - GamesAgentWon[i] print "Games Player ", i + 1, " Tied With The Dealer: ", GamesTied[i] print print "Total Games Played: ", TotalGames print #Create some arrays for the action value values, and the hits, and stands. #A new array is needed for the AV values because the AV table used for the program #is not an array that can be easily used to plot results, so below the AV values are #transfered to the array below for processing. theAVTables = [] hits = [] stands = [] #Move the values from the AV table to the array created above, and populate the #hits, and stands tables as well. The values in these tables will be used in the plot below. for i in range(0, N): print "Action Table Values for Player ", i + 1, ":" theAVTables.append([]) hits.append([]) stands.append([]) for j in range(0, 22): print "The AV Value At ", ( j + 1 ), " for Player ", i + 1, " is: ", av_table[i].getActionValues(j) theAVTables[i].append(av_table[i].getActionValues(j)) hits[i].append(theAVTables[i][j][0]) stands[i].append(theAVTables[i][j][1]) print print subPlotVal = 511 #The following uses matplot lib to display a nice graph about the results. for i in range(0, N): plt.figure(1) plt.subplot(subPlotVal) plot(hits[i], label="Hits") plot(stands[i], label="Stands") plt.ylabel('Probability') plt.title('Player ' + str(i + 1)) plt.axis([0, 30, -3, 3]) plt.legend() subPlotVal += 1 plt.xlabel('Hand Value') plt.show()
EpisodicTask.reset(self) self.env.reset() @property def indim(self): return self.env.indim @property def outdim(self): return self.env.outdim env = TetrisEnv(10, 20) #Tetris task = TetrisTask(env) QNet = ActionValueNetwork(10 * 20 + 11, 6) learner = NFQ() #Q()? learner._setExplorer(EpsilonGreedyExplorer(0.2, decay=0.99)) agent = LearningAgent(QNet, learner) experiment = EpisodicExperiment(task, agent) while True: experiment.doEpisodes(1) agent.learn() agent.reset() #or call more sporadically...? task.reset()