예제 #1
0
class Simulation(object):
    def __init__(self,location,shape,plot):
        self.location = location
        self.shape = shape
        self.plot = plot
        self.env = Environment(self.shape)
        self.agent = Agent()
        self.child = Child(self.env.state)

    def run(self,duration):
        start = datetime.now()
        self.env.state = self.child.run()
        self.agent.on()
        if self.plot: save_figure(self.env.x,self.env.y,self.env.state,[self.agent.position[0]],[self.agent.position[1]])
        while self.agent.active:
            self.agent.precepts(self.env.state)
            self.env.state = self.agent.effectors(self.env.state)
            if self.plot: save_figure(self.env.x,self.env.y,self.env.state,[self.agent.position[0]],[self.agent.position[1]])
            else: sleep(0.5)
            if (datetime.now() - start).seconds >= duration:
                self.agent.gohome()
                self.agent.precepts(self.env.state)
                self.env.state = self.agent.effectors(self.env.state)
                if self.plot: save_figure(self.env.x,self.env.y,self.env.state,[self.agent.position[0]],[self.agent.position[1]])
                else: sleep(0.5)
        rwd, clnd = self.agent.reward, self.agent.cleaned
        self.agent.reset()
        return rwd, clnd
예제 #2
0
# Objects
agent1 = Agent()
agent2 = Agent()

# Initializations
writer = SummaryWriter(comment="-q-iteration")
iter_no = 0

# Q learning Algorithm
profits = np.zeros((ITER_BREAK + 2, NUM_EPISODES + 2))

for ep in range(NUM_EPISODES):
    print(ep)
    # 1: initialise Qs
    env.reset()
    agent1.reset()
    agent2.reset()
    iter_no = 0
    s_next = 0
    while True:
        iter_no += 1
        eps = 1 - np.exp(-BETA * (iter_no))
        # 2: agents choose actions simultanously.
        action1 = agent1.act(eps)
        action2 = agent2.act(eps)
        action = action1 * nA + action2
        # 3: outcomes are calculated
        s = s_next
        s_next, reward_n, done, prob = env.step(action)
        # 4: Bellman updates
        agent1.value_update(s, action1, reward_n[0], s_next)