def __init__(self, env, kind="diff", gamma=0.8, weight=0.9, advantage=False): if kind == "qv": qmodel, vmodel = self.create_qv_models( env.observation_space.shape[0], env.action_space.n) model = (qmodel, vmodel) else: model = self.create_model(env.observation_space.shape[0], env.action_space.n) brain = QBrain(model, kind=kind, advantage=advantage, gamma=gamma, v_selectivity=False, qnet_soft_update=0.01, diff_qnet_weight=weight) brain.compile(Adam(lr=1e-3), ["mse"]) MultiDQNAgent.__init__(self, env, brain, train_sample_size=1000, train_batch_size=50)
def __init__(self, env, kind="diff", gamma=0.99, diff_qnet_weight=0.7): model = self.create_model(env.observation_space.shape[0], env.action_space.n) brain = QBrain(model, kind=kind, gamma=gamma, v_selectivity=False, qnet_soft_update=0.01, diff_qnet_weight=diff_qnet_weight) brain.compile(Adam(lr=1e-3), ["mse"]) MultiDQNAgent.__init__(self, env, brain, train_sample_size=1000, train_batch_size=50)
env = TankTargetEnv() memory = ReplayMemory(100000, v_selectivity=True) tanks = [] for i in xrange(3): model = create_model(env.observation_space.shape[-1], env.action_space.shape[-1]) brain = QBrain(model, typ="diff", memory=memory, soft_update=0.01, gamma=0.99) brain.compile(Adam(lr=1e-3), ["mse"]) if i > 0: brain.transfer(tanks[0].Brain) # make all brains the same initially tanks.append(TankAgent(env, brain, train_sample_size=1000)) controller = SynchronousMultiAgentController(env, tanks, rounds_between_train=10000, episodes_between_train=1) taus = [0.01, 0.1, 1.0, 2.0] ntaus = len(taus) t = 0 test_policy = BoltzmannQPolicy(0.005)