Esempio n. 1
0
    def __init__(self, event_queue_name, hub_queue_name):
        super().__init__()
        # create environment
        self.conn = boto.sqs.connect_to_region(constants.REGION)
        self.event_queue = self.conn.get_queue(event_queue_name)
        self.event_queue.set_message_class(MHMessage)
        self.env = DogEnv(DogEnv.ALL_QUIET, DogEnv.ALL_QUIET, self.event_queue, hub_queue_name)
        self.env.delay = (self.episodes == 1)

        # create task
        self.task = QuietDogTask(self.env)

        # create value table and initialize with ones
        # TODO: Get number of states from DogEnv
        self.table = ActionValueTable(2*5*4, 5*4)
        self.table.initialize(1.)

        # create agent with controller and learner - use SARSA(), Q() or QLambda() here
        self.learner = SARSA()

        # standard exploration is e-greedy, but a different type can be chosen as well
        self.learner.explorer = BoltzmannExplorer()

        # create agent
        self.agent = DogAgent(self.table, self.learner)

        # create experiment
        self.experiment = Experiment(self.task, self.agent)
Esempio n. 2
0
class RlOp(threading.Thread):
    episodes = 1
    epilen = 200
    def __init__(self, event_queue_name, hub_queue_name):
        super().__init__()
        # create environment
        self.conn = boto.sqs.connect_to_region(constants.REGION)
        self.event_queue = self.conn.get_queue(event_queue_name)
        self.event_queue.set_message_class(MHMessage)
        self.env = DogEnv(DogEnv.ALL_QUIET, DogEnv.ALL_QUIET, self.event_queue, hub_queue_name)
        self.env.delay = (self.episodes == 1)

        # create task
        self.task = QuietDogTask(self.env)

        # create value table and initialize with ones
        # TODO: Get number of states from DogEnv
        self.table = ActionValueTable(2*5*4, 5*4)
        self.table.initialize(1.)

        # create agent with controller and learner - use SARSA(), Q() or QLambda() here
        self.learner = SARSA()

        # standard exploration is e-greedy, but a different type can be chosen as well
        self.learner.explorer = BoltzmannExplorer()

        # create agent
        self.agent = DogAgent(self.table, self.learner)

        # create experiment
        self.experiment = Experiment(self.task, self.agent)

    def run(self):
        self.call_run()

    def call_run(self):
        print('RlOp: running')
        # prepare plotting
        pylab.gray()
        pylab.ion()

        for i in range(1000):

            # interact with the environment (here in batch mode)
            self.experiment.doInteractions(100)
            self.agent.learn()
            self.agent.reset()

            results0 = self.table.params.reshape(2, 4, 5, 20)[0]
            results1 = self.table.params.reshape(2, 4, 5, 20)[1]
            pp.pprint(results0.argmax(2))
            pp.pprint(results1.argmax(2))

            # and draw the table
            #ar=self.table.params.reshape(2,5,4,5,4)
            #for state1 in range(len(constants.SOUNDS)):
            #    for state2 in range(4):
            #        pylab.pcolor(ar[1][state1][state2])
            #        pylab.draw()

        results0 = self.table.params.reshape(2, 4, 5, 20)[0]
        results1 = self.table.params.reshape(2, 4, 5, 20)[1]
        while True:
            time.sleep(60)
            pp.pprint(results0.argmax(2))
            pp.pprint(results1.argmax(2))