Ejemplo n.º 1
0
    def __init__(self,
                 env,
                 kind="diff",
                 gamma=0.8,
                 weight=0.9,
                 advantage=False):
        if kind == "qv":
            qmodel, vmodel = self.create_qv_models(
                env.observation_space.shape[0], env.action_space.n)
            model = (qmodel, vmodel)
        else:
            model = self.create_model(env.observation_space.shape[0],
                                      env.action_space.n)

        brain = QBrain(model,
                       kind=kind,
                       advantage=advantage,
                       gamma=gamma,
                       v_selectivity=False,
                       qnet_soft_update=0.01,
                       diff_qnet_weight=weight)
        brain.compile(Adam(lr=1e-3), ["mse"])
        MultiDQNAgent.__init__(self,
                               env,
                               brain,
                               train_sample_size=1000,
                               train_batch_size=50)
Ejemplo n.º 2
0
 def __init__(self, env, kind="diff", gamma=0.99, diff_qnet_weight=0.7):
     model = self.create_model(env.observation_space.shape[0],
                               env.action_space.n)
     brain = QBrain(model,
                    kind=kind,
                    gamma=gamma,
                    v_selectivity=False,
                    qnet_soft_update=0.01,
                    diff_qnet_weight=diff_qnet_weight)
     brain.compile(Adam(lr=1e-3), ["mse"])
     MultiDQNAgent.__init__(self,
                            env,
                            brain,
                            train_sample_size=1000,
                            train_batch_size=50)
Ejemplo n.º 3
0
        self.T += 1
        self.R += 1
        return [(agent, {}) for agent, action in actions]
        
    def feedback(self, agents):
        return [(agent, self.R, {}) for agent in agents]
        
class SeqAgent(MultiDQNAgent):
    pass
    
def create_model():
    inp = Input((1,))
    dense1 = Dense(5, activation="relu", name="dense1")(inp)
    out = Dense(1, activation="linear", name="out_linear")(dense1)
    model = Model(inp, out)
    print("--- model summary ---")
    print(model.summary())
    return model

env = SeqEnv()
model = create_model()
brain = QBrain(model, soft_update=0.0001)
agent = SeqAgent(env, brain)
agents = [agent]
controller = SynchronousMultiAgentController(env, agents)
controller.fit(max_episodes = 1)

for tup in sorted(brain.Memory.ShortTermMemory):
    print tup

Ejemplo n.º 4
0
        action_frequencies = self.Actions / np.sum(self.Actions)
        print "Episode end: %d, rounds: %d, rewards: %s, average q: %s, actions: %s" % \
            (episode, logs["nrounds"], rewards, avq, self.Actions)


env = TankTargetEnv()
memory = ReplayMemory(100000, v_selectivity=True)
tanks = []

for i in xrange(3):

    model = create_model(env.observation_space.shape[-1],
                         env.action_space.shape[-1])
    brain = QBrain(model,
                   typ="diff",
                   memory=memory,
                   soft_update=0.01,
                   gamma=0.99)
    brain.compile(Adam(lr=1e-3), ["mse"])
    if i > 0:
        brain.transfer(tanks[0].Brain)  # make all brains the same initially

    tanks.append(TankAgent(env, brain, train_sample_size=1000))

controller = SynchronousMultiAgentController(env,
                                             tanks,
                                             rounds_between_train=10000,
                                             episodes_between_train=1)

taus = [0.01, 0.1, 1.0, 2.0]
ntaus = len(taus)
Ejemplo n.º 5
0
        for a, action in logs["actions"]:
            self.Actions[action] += 1
    
    def on_episode_end(self, episode, logs):
        avq = self.SumQ/self.NSteps if self.NSteps > 0 else 0.0
        rewards = [r for t, r in logs["episode_rewards"]]
        action_frequencies = self.Actions/np.sum(self.Actions)
        print "Episode end: %d, rounds: %d, rewards: %s, average q: %s, actions: %s" % \
            (episode, logs["nrounds"], rewards, avq, self.Actions)

env = TankDuelEnv()
tanks = []

for _ in (1,2):
    model = create_model(env.observation_space.shape[-1], env.action_space.shape[-1])
    brain = QBrain(model, kind="diff", v_selectivity=False, gamma=0.99)
    brain.compile(Adam(lr=1e-3), ["mse"])
    tanks.append(TankAgent(env, brain, train_sample_size=1000))

controller = SynchronousMultiAgentController(env, tanks,
    rounds_between_train = 10000, episodes_between_train = 1
    )

taus = [2.0, 1.0, 0.1, 0.01]
ntaus = len(taus)
t = 0

test_policy = BoltzmannQPolicy(0.005)

test_run_logger = RunLogger("run_log.csv")
Ejemplo n.º 6
0
if "-h" in opts or "-?" in opts:
    print """Usage:
         python tanks_target.py [-k kind] [-r <run log CVS file>]
    """
    sys.exit(1)

env = TankTargetEnv(kind)
tanks = []

share_brain = True
if share_brain:
    model = create_model(env.observation_space.shape[-1],
                         env.action_space.shape[-1])
    brain = QBrain(model,
                   kind=kind,
                   v_selectivity=False,
                   qnet_hard_update=100000,
                   gamma=gamma)
    brain.compile(Adam(lr=1e-3), ["mse"])

    tanks = [TankAgent(env, brain, train_sample_size=1000) for _ in range(1)]
else:
    for _ in (1, 2, 3):
        model = create_model(env.observation_space.shape[-1],
                             env.action_space.shape[-1])
        brain = QBrain(model,
                       kind=kind,
                       v_selectivity=False,
                       qnet_hard_update=100000,
                       gamma=gamma)
        brain.compile(Adam(lr=1e-3), ["mse"])
Ejemplo n.º 7
0

opts, args = getopt.getopt(sys.argv[1:], "k:r:h?")
opts = dict(opts)
kind = opts.get("-k", "diff")
run_log = opts.get("-r", "run_log.csv")
if "-h" in opts or "-?" in opts:
    print """Usage:
         python cars2d.py [-k kind] [-r <run log CVS file>]
    """
    sys.exit(1)

env = CarsRadEnv()
model = create_model(env.observation_space.shape[-1],
                     env.actions_space.shape[-1])
brain = QBrain(model, kind="diff", gamma=0.99)  #, soft_update=0.01)
brain.compile(Adam(lr=1e-3), ["mse"])

cars = [CarAgent(env, brain, train_sample_size=1000) for _ in range(3)]

#cars = []
#for _ in range(3):
#    model = create_model(env.observation_space.shape[-1], env.actions_space.shape[-1])
#    brain = QBrain(model, soft_update=0.01)
#    brain.compile(Adam(lr=1e-3), ["mse"])
#    cars.append(CarAgent(env, brain))

controller = SynchronousMultiAgentController(env,
                                             cars,
                                             rounds_between_train=10000,
                                             episodes_between_train=1)