Esempio n. 1
0
    def __init__(self):
        self.ev = BlockerTask()
        self.action_dim = len(self.ev.all_actions)  # dr_i, dc_i, i in 1,2,3
        self.state_dim = 6  # r_i, c_i, i in 1,2,3
        self.state = self.reset()

        self.render_count = 0
Esempio n. 2
0
def detSarsaNNModelMean(path, env="BlockerTask", repeats=5, steps=40000):
    env = BlockerTask() if env == "BlockerTask" else TransportationTask()

    rec_reward = {0: [-1 for i in range(repeats)]}
    for i in tqdm(range(repeats)):
        env = BlockerTask()
        m = NN_model()
        agent = DetSARSA_Model(env, m)
        rews = agent.run_no_rec(steps)
        rec_reward = updateRewardList(rec_reward, rews)
    rec_reward = computeMean(rec_reward)

    with open(path, 'w') as outfile:
        outfile.write(json.dumps(rec_reward))
Esempio n. 3
0
class BlockerEnv():
    def __init__(self):
        self.ev = BlockerTask()
        self.action_dim = len(self.ev.all_actions)  # dr_i, dc_i, i in 1,2,3
        self.state_dim = 6  # r_i, c_i, i in 1,2,3
        self.state = self.reset()

        self.render_count = 0

    def reset(self):
        self.state = self.ev.reset_state()
        return self.convert_state(self.state)

    def render(self, dpi=40):
        maze_record(self.render_count,
                    'Blocking task',
                    self.state,
                    4,
                    7,
                    self.ev.blockers_state,
                    dpi=dpi)
        self.render_count += 1

    def close(self):
        return

    def convert_state(self, s):
        ((r1, c1), (r2, c2), (r3, c3)) = s
        return np.array([r1, c1, r2, c2, r3, c3])

    def step(self, a):
        # convert action from np to tuple
        action = self.ev.all_actions[
            a]  #((a[0],a[1]), (a[2],a[3]), (a[4],a[5]))
        next_state, reward, done = self.ev.step(self.state, action)
        self.state = next_state
        # convert state back from tuple to np
        next_state = self.convert_state(next_state)
        return next_state, reward, done, None

    def output_GIF(self):
        makeGIF('../plots/temp-plots/temp-plots1',
                '../plots/ActorCriticBlockerTaskGIF')
Esempio n. 4
0
def sarsaNoBoltzStepMean(path, env="BlockerTask", repeats=5, steps=300000):
    env = BlockerTask() if env == "BlockerTask" else TransportationTask()

    rec_reward = {0: [-1 for i in range(repeats)]}
    for i in tqdm(range(repeats)):
        agent = SARSA_noBoltzStep(env, α=0.5, γ=0.9, ε=0.3)
        rews = agent.run(epLen=40, mxsteps=steps, rec_any=False)
        rec_reward = updateRewardList(rec_reward, rews)
    rec_reward = computeMean(rec_reward)

    with open(path, 'w') as outfile:
        outfile.write(json.dumps(rec_reward))
Esempio n. 5
0
def QlearningMean(path, env="BlockerTask", repeats=5, steps=300000):
    env = BlockerTask() if env == "BlockerTask" else TransportationTask()

    rec_reward = {0: [-1 for i in range(repeats)]}
    for i in tqdm(range(repeats)):
        agent = Qlearning(env, 0.1)
        rews = agent.run(epLen=40, mxsteps=steps, rec_any=False)
        rec_reward = updateRewardList(rec_reward, rews)
    rec_reward = computeMean(rec_reward)

    with open(path, 'w') as outfile:
        outfile.write(json.dumps(rec_reward))
Esempio n. 6
0
def myDetQlearningMean(path, env="BlockerTask", repeats=5, steps=40000):
    env = BlockerTask() if env == "BlockerTask" else TransportationTask()

    rec_reward = {0: [-1 for i in range(repeats)]}
    for i in tqdm(range(repeats)):
        agent = DeterminantalQlearning(env)
        rews = agent.run_no_rec(steps)
        rec_reward = updateRewardList(rec_reward, rews)
    rec_reward = computeMean(rec_reward)

    with open(path, 'w') as outfile:
        outfile.write(json.dumps(rec_reward))
Esempio n. 7
0
                return t

            # update variables
            state, action, s_idx, a_idx, nF = next_state, next_action, ns_idx, na_idx, nF
        print(state)
        #        print(self.Wa)
        return self.max_n_ep

    def run(self, max_steps):
        self.rec_reward = []
        self.total_reward = 0

        t, ep = 0, 0
        while t < max_steps:
            ep += 1
            print('time: ' + str(t), flush=True)
            dt = self.episode(t, max_steps)
            t += dt
        return self.rec_reward


env = BlockerTask()
agent = ESARSA(env)
results = agent.run(4e5)
#print(agent.a_idx( ((0,1), (0,0), (0,-1)) ))

#steps = 300000
#env = BlockerTask()
#agent = ESARSA( env )
#rews = agent.run( steps )