def __init__(self, n_threads=4, initial_port=19997, q_table_version=0,
                 batch_size=None, learner=None, explorer=None):
        self.barrier = Barrier(n_threads + 1, timeout=720)
        self.n_threads = n_threads
        self.initial_port = initial_port
        self.batch_size = batch_size

        self.controller = MyActionValueTable(q_table_version)
        if learner is None:
            self.learner = Q(0.5, 0.9)
        else:
            self.learner = learner

        if explorer is None:
            self.explorer = self.learner.explorer = EpsilonGreedyExplorer(0.2, 0.998)
        else:
            self.explorer = self.learner.explorer = explorer
        self.agent = LearningAgent(self.controller, self.learner)
        # Logger initialization
        self.logger = logging.getLogger('master_logger')
        self.logger.setLevel(logging.DEBUG)
        self.logger.addHandler(logging.FileHandler(Utils.DATA_PATH + 'learning-tables/master.log'))
        self.failed_simulations = []
        self.n_episodes = 0
        self.simulations = []
        self.initialize_simulations()
Beispiel #2
0
    def __init__(self):
        """ @brief: Setting up internal parameters for the RL module"""

        # Navigation Task
        self._environment = NavigationEnvironment()
        self._task = NavigationTask(self._environment)

        # Number of States : (read from params.py)
        self._states = STATES
        self._state_limits = LIMITS

        # Total number of states:
        self._number_of_states = 1
        for i in self._states:
            self._number_of_states *= i

        # Number of actions
        self._actions = ACTION_STATES
        self._action_limits = ACTION_RANGE

        # Action Value Table directory
        self.tables_directory = os.path.dirname(__file__) + "/tables/"
        self.table_code = "S"+str(self._number_of_states)+"_"+"A"+str(self._actions)
        self._filename = FILENAME + self.table_code

        # Action Value Table setup
        self.load_AV_Table()

        # Declare ROS Service to store Action Value Table
        store_service = rospy.Service('store_table', StoreAVTable, self.store_cb)

        # Set up task parameters:
        self._task.set_params(COMMAND_DURATION,
                              FUSION_WEIGHTS,
                              TIME_GRANULARITY,
                              self._state_limits,
                              MAX_REWARD,
                              COST_THRESHOLD)

        # Agent set up
        self._learner = SARSA(alpha,gamma)
        self._learner._setExplorer(EpsilonGreedyExplorer(epsilon))
        self._agent = LearningAgent(self._av_table, self._learner)

        # Experiment set up
        self._experiment = Experiment(self._task,self._agent)
        self._experiment.set_params(STEP_SIZE)

        # Start print table thread
        if VISUALIZATION is True:
            try:
                #thread.start_new_thread(self.print_table,())
                self.visualization_thread = Thread(target = self.print_table, args = () )
                self.visualization_thread.start()
            except:
                print "Failed to start visualization thread!"

        print "Successfully Initialization of RL module! (kappa)"
Beispiel #3
0
    def __init__(self):
	self.av_table = ActionValueTable(2, 3)
	self.av_table.initialize(0.1)

	learner = SARSA()
	learner._setExplorer(EpsilonGreedyExplorer(0.0))
	self.agent = LearningAgent(self.av_table, learner)

	env = HASSHEnv()

	task = HASSHTask(env)

	self.experiment = Experiment(task, self.agent)
Beispiel #4
0
def initExperiment(learnalg='Q',
                   history=None,
                   binEdges='10s',
                   scriptfile='./rlRunExperiment_v2.pl',
                   resetscript='./rlResetExperiment.pl'):

    if binEdges == '10s':
        centerBinEdges = centerBinEdges_10s
    elif binEdges == '30s':
        centerBinEdges = centerBinEdges_30s
    elif binEdges == 'lessperturbed':
        centerBinEdges = centerBinEdges_10s_lessperturbed
    elif binEdges is None:
        centerBinEdges = None
    else:
        raise Exception("No bins for given binEdges setting")

    env = OmnetEnvironment(centerBinEdges, scriptfile, resetscript)
    if history is not None:
        env.data = history['data']

    task = OmnetTask(env, centerBinEdges)
    if history is not None:
        task.allrewards = history['rewards']

    if learnalg == 'Q':
        nstates = env.numSensorBins**env.numSensors
        if history is None:
            av_table = ActionValueTable(nstates, env.numActions)
            av_table.initialize(1.)
        else:
            av_table = history['av_table']
        learner = Q(0.1, 0.9)  # alpha, gamma
        learner._setExplorer(EpsilonGreedyExplorer(0.05))  # epsilon
    elif learnalg == 'NFQ':
        av_table = ActionValueNetwork(env.numSensors, env.numActions)
        learner = NFQ()
    else:
        raise Exception("learnalg unknown")

    agent = LearningAgent(av_table, learner)

    experiment = Experiment(task, agent)
    if history is None:
        experiment.nruns = 0
    else:
        experiment.nruns = history['nruns']
    return experiment
def main():
    try:
        n_threads = 4
        initial_port = 8000
        q_table_version = 0
        batch_size = 10
        # explorer = EpsilonGreedyBoltzmannExplorer(0.2, 5, 0.998)
        explorer = EpsilonGreedyExplorer(0.15, 0.999)
        master = SimulationMaster(n_threads, initial_port, q_table_version, batch_size, explorer=explorer)

        master.run()

    except (KeyboardInterrupt, SystemExit):
        with open('data/learning-tables/standing-up-q.pkl', 'wb') as handle:
            pickle.dump(master.save_q_table(), handle)
        master.save_t_table()
        master.save_t_table()
        vrep.simxFinish(-1)
Beispiel #6
0
def run():
    """
    number of states is:
    current value: 0-20

    number of actions:
    Stand=0, Hit=1 """

    # define action value table
    av_table = ActionValueTable(MAX_VAL, MIN_VAL)
    av_table.initialize(0.)

    # define Q-learning agent
    q_learner = Q(Q_ALPHA, Q_GAMMA)
    q_learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(av_table, q_learner)

    # define the environment
    env = BlackjackEnv()

    # define the task
    task = BlackjackTask(env, verbosity=VERBOSE)

    # finally, define experiment
    experiment = Experiment(task, agent)

    # ready to go, start the process
    for _ in range(NB_ITERATION):
        experiment.doInteractions(1)
        if task.lastreward != 0:
            if VERBOSE:
                print "Agent learn"
            agent.learn()

    print '|First State|Choice 0 (Stand)|Choice 1 (Hit)|Relative value of Standing over Hitting|'
    print '|:-------:|:-------|:-----|:-----|'
    for i in range(MAX_VAL):
        print '| %s | %s | %s | %s |' % (
            (i + 1),
            av_table.getActionValues(i)[0], av_table.getActionValues(i)[1],
            av_table.getActionValues(i)[0] - av_table.getActionValues(i)[1])
def main():
    client_id = Utils.connectToVREP()

    # Define RL elements
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)
    controller = MyActionValueTable()
    learner = Q(0.5, 0.9)
    learner.explorer = EpsilonGreedyExplorer(0.15, 1)  # EpsilonGreedyBoltzmannExplorer()
    agent = LearningAgent(controller, learner)
    experiment = EpisodicExperiment(task, agent)

    controller.initialize(agent)

    i = 0
    try:
        while True:
            i += 1
            print('Episode ' + str(i))
            experiment.doEpisodes()
            agent.learn()
            agent.reset()
            print('mean: '+str(numpy.mean(controller.params)))
            print('max: '+str(numpy.max(controller.params)))
            print('min: '+str(numpy.min(controller.params)))

            if i % 500 == 0:  # Save q-table every 500 episodes
                print('Save q-table')
                controller.save()
                task.t_table.save()

    except (KeyboardInterrupt, SystemExit):
        with open('../data/standing-up-q.pkl', 'wb') as handle:
            pickle.dump(controller.params, handle)
        task.t_table.save()
        controller.save()

    vrep.simxFinish(client_id)
    def __init__(self, text_to_speech, speech_to_text):
        Feature.__init__(self)

        # setup AV Table
        self.av_table = GameTable(13, 2)
        if (self.av_table.loadParameters() == False):
            self.av_table.initialize(0.)

        # setup a Q-Learning agent
        learner = Q(0.5, 0.0)
        learner._setExplorer(EpsilonGreedyExplorer(0.0))
        self.agent = LearningAgent(self.av_table, learner)

        # setup game interaction
        self.game_interaction = GameInteraction(text_to_speech, speech_to_text)

        # setup environment
        environment = GameEnvironment(self.game_interaction)

        # setup task
        task = GameTask(environment, self.game_interaction)

        # setup experiment
        self.experiment = Experiment(task, self.agent)
Beispiel #9
0
from runPacman import RunPacman
from ghost import Ghost
from pacmanEnvironment import Environment

###############################################################
# The main function that begins running our Pacman-In-AI game #
###############################################################
if __name__ == "__main__":

    # Initialize our Action-Environment-Reward Table
    controller = ActionValueTable(196, 4)
    controller.initialize(0.)

    # Initialize Reinforcement Learning
    learner = Q(0.5, 0.0)
    learner._setExplorer(EpsilonGreedyExplorer(0.0))
    agent = LearningAgent(controller, learner)

    # Setup the PyBrain and PyGame Environments
    environment = Environment()
    game = RunPacman(environment)

    # Create the Task for the Pac-Man Agent to Accomplish and initialize the first Action
    task = PacmanTask(environment, game)
    task.performAction(np.array([1]))

    # The Experiment is the PyBrain link between the task to be completed and the agent completing it
    experiment = Experiment(task, agent)
    currentGame = 1

    # Continue to loop program until the 'X' on the GUI is clicked
Beispiel #10
0
def runMainProg():
    # define Q-learning agents with different attributes to see
    # which one will come out as the better player.

    # It would be possible to loop through the number of players N
    # and create them, however they would all have to be the same
    # or other code would have to be added to dynamically create
    # thier attributes.
    learner = []
    learner.append(Q(0.9, 0.0))
    learner[0]._setExplorer(EpsilonGreedyExplorer(0., 0.5))
    learner.append(Q(0.5, 0.5))
    learner[1]._setExplorer(EpsilonGreedyExplorer(0.29, 0.))
    learner.append(Q(0.1, 0.5))
    learner[2]._setExplorer(EpsilonGreedyExplorer(0.29, 0.5))
    learner.append(Q(0.5, 0.0))
    learner[3]._setExplorer(DiscreteStateDependentExplorer(0., 0.5))
    learner.append(Q(0.5, 0.5))
    learner[4]._setExplorer(DiscreteStateDependentExplorer(0.29, 0.5))

    #define a blackjack deck
    theDeck = BlackjackCardDeck()

    #define action value table, agent, task, and environment arrays
    av_table = []
    agent = []
    env = []
    task = []

    #Loop through the number of players, and set up the action value table,
    #associated agent, environment, and task, so they can play the game
    for i in range(0, N):
        av_table.append(ActionValueTable(22, 2))
        av_table[i].initialize(0.)
        agent.append(LearningAgent(av_table[i], learner[i]))
        env.append(BlackjackEnv(theDeck))
        env[i].createHand()
        task.append(BlackjackTask(env[i]))

    #define a Dealer
    dealer = BlackjackDealer(theDeck)

    #run the game for a total of 1000 games. This value can be changed.
    for i in range(0, 1000):
        #This is the function that plays the game. The code for it is below.
        playGame(dealer, task, env, agent)

    #All of the games have been played, and now the results of the games played,
    #games won, tied, and lost are displayed.
    for i in range(0, N):
        print "Games Player ", i + 1, " Won Against The Dealer: ", GamesAgentWon[
            i]
        print "Games Player ", i + 1, " Lost Against The Dealer: ", TotalGames - GamesTied[
            i] - GamesAgentWon[i]
        print "Games Player ", i + 1, " Tied With The Dealer: ", GamesTied[i]
        print
    print "Total Games Played: ", TotalGames
    print

    #Create some arrays for the action value values, and the hits, and stands.
    #A new array is needed for the AV values because the AV table used for the program
    #is not an array that can be easily used to plot results, so below the AV values are
    #transfered to the array below for processing.
    theAVTables = []
    hits = []
    stands = []

    #Move the values from the AV table to the array created above, and populate the
    #hits, and stands tables as well. The values in these tables will be used in the plot below.
    for i in range(0, N):
        print "Action Table Values for Player ", i + 1, ":"
        theAVTables.append([])
        hits.append([])
        stands.append([])
        for j in range(0, 22):
            print "The AV Value At ", (
                j + 1
            ), " for Player ", i + 1, " is: ", av_table[i].getActionValues(j)
            theAVTables[i].append(av_table[i].getActionValues(j))
            hits[i].append(theAVTables[i][j][0])
            stands[i].append(theAVTables[i][j][1])
        print
        print

    subPlotVal = 511

    #The following uses matplot lib to display a nice graph about the results.
    for i in range(0, N):
        plt.figure(1)
        plt.subplot(subPlotVal)
        plot(hits[i], label="Hits")
        plot(stands[i], label="Stands")
        plt.ylabel('Probability')
        plt.title('Player ' + str(i + 1))
        plt.axis([0, 30, -3, 3])
        plt.legend()
        subPlotVal += 1

    plt.xlabel('Hand Value')

    plt.show()
Beispiel #11
0
        EpisodicTask.reset(self)
        self.env.reset()

    @property
    def indim(self):
        return self.env.indim

    @property
    def outdim(self):
        return self.env.outdim


env = TetrisEnv(10, 20)  #Tetris
task = TetrisTask(env)

QNet = ActionValueNetwork(10 * 20 + 11, 6)

learner = NFQ()
#Q()?
learner._setExplorer(EpsilonGreedyExplorer(0.2, decay=0.99))

agent = LearningAgent(QNet, learner)

experiment = EpisodicExperiment(task, agent)

while True:
    experiment.doEpisodes(1)
    agent.learn()
    agent.reset()  #or call more sporadically...?
    task.reset()