Exemple #1
0
def test(args):
    # env and random seed
    env = make_env.make_env('simple_tag')
    np.random.seed(int(args['random_seed']))
    tf.set_random_seed(int(args['random_seed']))
    # env.seed(int(args['random_seed']))
    # tensorflow
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    # config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)
    with tf.Session() as sess:
        # agent number
        n = env.n
        ave_n = 0
        good_n = 0
        for i in env.agents:
            if i.adversary:
                ave_n += 1
            else:
                good_n += 1
        # Actor Critic
        n = env.n
        actors = []
        critics = []
        exploration_noise = []
        observation_dim = []
        action_dim = []
        total_action_dim = 0

        for i in range(ave_n):
            total_action_dim = total_action_dim + env.action_space[i].n
        for i in range(n):
            observation_dim.append(env.observation_space[i].shape[0])
            action_dim.append(
                env.action_space[i].n
            )  # assuming discrete action space here -> otherwise change to something like env.action_space[i].shape[0]
            actors.append(
                ActorNetwork(sess, observation_dim[i], action_dim[i],
                             float(args['actor_lr']), float(args['tau'])))
            if i < ave_n:
                # MADDPG - centralized Critic
                critics.append(
                    CriticNetwork(sess, n, observation_dim[i],
                                  total_action_dim, float(args['critic_lr']),
                                  float(args['tau']), float(args['gamma'])))
            else:
                # DDPG
                critics.append(
                    CriticNetwork(sess, n, observation_dim[i], action_dim[i],
                                  float(args['critic_lr']), float(args['tau']),
                                  float(args['gamma'])))
            exploration_noise.append(OUNoise(mu=np.zeros(action_dim[i])))
        for i in range(n):
            actors[i].mainModel.load_weights(args["modelFolder"] + str(i) +
                                             '_weights' + '.h5')
        for ep in range(10):
            s = env.reset()
            reward = 0.0
            for step in range(200):
                time.sleep(0.03)
                env.render()
                actions = []
                for i in range(env.n):
                    state_input = np.reshape(
                        s[i], (-1, env.observation_space[i].shape[0]))
                    noise = OUNoise(mu=np.zeros(5))
                    actions.append((actors[i].predict(
                        np.reshape(
                            s[i],
                            (-1,
                             actors[i].mainModel.input_shape[1])))).reshape(
                                 actors[i].mainModel.output_shape[1], ))
                s, r, d, s2 = env.step(actions)
                for i in range(env.n):
                    reward += r[i]
                if np.all(d):
                    break
            print("Episode: {:d}  | Reward: {:f}".format(ep, reward))
        env.close()
        import sys
        sys.exit("test over!")
def main(args):
    # Master
    if rank == 0:
        #######################
        # Setting up:
        # - environment, random seed
        # - tensorflow option
        # - network
        # - replay
        #########################
        if not os.path.exists(args["modelFolder"]):
            os.makedirs(args["modelFolder"])
        if not os.path.exists(args["summary_dir"]):
            os.makedirs(args["summary_dir"])
        # env and random seed
        env = make_env.make_env('simple_tag')
        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))

        # tensorflow
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.35)
        with tf.Session(config=tf.ConfigProto(
                gpu_options=gpu_options, log_device_placement=False)) as sess:
            # agent number
            n = env.n
            ave_n = 0
            good_n = 0
            for i in env.agents:
                if i.adversary:
                    ave_n += 1
                else:
                    good_n += 1
            # Actor Critic
            n = env.n
            actors = []
            critics = []
            exploration_noise = []
            observation_dim = []
            action_dim = []
            total_action_dim = 0

            # Aversary Agents action spaces
            for i in range(ave_n):
                total_action_dim = total_action_dim + env.action_space[i].n
            # print("total_action_dim {} for cooperative agents".format(total_action_dim))
            for i in range(n):
                observation_dim.append(env.observation_space[i].shape[0])
                action_dim.append(
                    env.action_space[i].n
                )  # assuming discrete action space here -> otherwise change to something like env.action_space[i].shape[0]
                actors.append(
                    ActorNetwork(sess, observation_dim[i], action_dim[i],
                                 float(args['actor_lr']), float(args['tau'])))
                # critics.append(CriticNetwork(sess,n,observation_dim[i],total_action_dim,float(args['critic_lr']),float(args['tau']),float(args['gamma'])))
                if i < ave_n:
                    # MADDPG - centralized Critic
                    critics.append(
                        CriticNetwork(sess, n,
                                      observation_dim[i], total_action_dim,
                                      float(args['critic_lr']),
                                      float(args['tau']),
                                      float(args['gamma'])))
                else:
                    # DDPG
                    critics.append(
                        CriticNetwork(sess, n, observation_dim[i],
                                      action_dim[i], float(args['critic_lr']),
                                      float(args['tau']),
                                      float(args['gamma'])))

                exploration_noise.append(OUNoise(mu=np.zeros(action_dim[i])))

            distributed_train_every_step(sess, env, args, actors, critics,
                                         exploration_noise, ave_n)
    # Worker
    else:
        #######################
        # Setting up:
        # - tensorflow option
        # - network
        #
        #
        env = make_env.make_env('simple_tag')
        np.random.seed(int(args['random_seed']) + rank)
        tf.set_random_seed(int(args['random_seed']) + rank)
        env.seed(int(args['random_seed']) + rank)
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.08)
        with tf.Session(config=tf.ConfigProto(
                gpu_options=gpu_options, log_device_placement=False)) as sess:
            # agent number
            n = env.n
            ave_n = 0
            good_n = 0
            for i in env.agents:
                if i.adversary:
                    ave_n += 1
                else:
                    good_n += 1
            # Actor Critic
            n = env.n
            actors = []
            exploration_noise = []
            observation_dim = []
            action_dim = []

            for i in range(n):
                observation_dim.append(env.observation_space[i].shape[0])
                action_dim.append(env.action_space[i].n)
                actors.append(
                    ActorNetwork(sess, observation_dim[i], action_dim[i],
                                 float(args['actor_lr']), float(args['tau'])))
                exploration_noise.append(OUNoise(mu=np.zeros(action_dim[i])))
            collect_batch(env, args, actors, exploration_noise, ave_n)
Exemple #3
0
def main(args):
    if not os.path.exists(args["modelFolder"]):
        os.makedirs(args["modelFolder"])
    if not os.path.exists(args["summary_dir"]):
        os.makedirs(args["summary_dir"])

    #with tf.device("/gpu:0"):
    # MADDPG for Ave Agent
    # DDPG for Good Agent
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
    config = tf.ConfigProto(device_count={'CPU': 0})
    # config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                          log_device_placement=False)) as sess:
        env = make_env.make_env('simple_tag')

        np.random.seed(int(args['random_seed']))
        tf.set_random_seed(int(args['random_seed']))
        env.seed(int(args['random_seed']))
        ave_n = 0
        good_n = 0
        for i in env.agents:
            if i.adversary:
                ave_n += 1
            else:
                good_n += 1
        print("adversary ", ave_n, "target ", good_n)
        # print("ave_n", ave_n)
        n = env.n
        actors = []
        critics = []
        brains = []
        exploration_noise = []
        observation_dim = []
        action_dim = []
        total_action_dim = 0

        # Aversary Agents action spaces
        for i in range(ave_n):
            total_action_dim = total_action_dim + env.action_space[i].n

        print("total_action_dim", total_action_dim)

        for i in range(n):

            observation_dim.append(env.observation_space[i].shape[0])
            action_dim.append(
                env.action_space[i].n
            )  # assuming discrete action space here -> otherwise change to something like env.action_space[i].shape[0]
            actors.append(
                ActorNetwork(sess, observation_dim[i], action_dim[i],
                             float(args['actor_lr']), float(args['tau'])))
            # critics.append(CriticNetwork(sess,n,observation_dim[i],total_action_dim,float(args['critic_lr']),float(args['tau']),float(args['gamma'])))

            if i < ave_n:
                # MADDPG - centralized Critic
                critics.append(
                    CriticNetwork(sess, n, observation_dim[i],
                                  total_action_dim, float(args['critic_lr']),
                                  float(args['tau']), float(args['gamma'])))
            else:
                # DDPG
                critics.append(
                    CriticNetwork(sess, n, observation_dim[i], action_dim[i],
                                  float(args['critic_lr']), float(args['tau']),
                                  float(args['gamma'])))

            exploration_noise.append(OUNoise(mu=np.zeros(action_dim[i])))

        train(sess, env, args, actors, critics, exploration_noise, ave_n)