def test(args): # env and random seed env = make_env.make_env('simple_tag') np.random.seed(int(args['random_seed'])) tf.set_random_seed(int(args['random_seed'])) # env.seed(int(args['random_seed'])) # tensorflow gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) # config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) with tf.Session() as sess: # agent number n = env.n ave_n = 0 good_n = 0 for i in env.agents: if i.adversary: ave_n += 1 else: good_n += 1 # Actor Critic n = env.n actors = [] critics = [] exploration_noise = [] observation_dim = [] action_dim = [] total_action_dim = 0 for i in range(ave_n): total_action_dim = total_action_dim + env.action_space[i].n for i in range(n): observation_dim.append(env.observation_space[i].shape[0]) action_dim.append( env.action_space[i].n ) # assuming discrete action space here -> otherwise change to something like env.action_space[i].shape[0] actors.append( ActorNetwork(sess, observation_dim[i], action_dim[i], float(args['actor_lr']), float(args['tau']))) if i < ave_n: # MADDPG - centralized Critic critics.append( CriticNetwork(sess, n, observation_dim[i], total_action_dim, float(args['critic_lr']), float(args['tau']), float(args['gamma']))) else: # DDPG critics.append( CriticNetwork(sess, n, observation_dim[i], action_dim[i], float(args['critic_lr']), float(args['tau']), float(args['gamma']))) exploration_noise.append(OUNoise(mu=np.zeros(action_dim[i]))) for i in range(n): actors[i].mainModel.load_weights(args["modelFolder"] + str(i) + '_weights' + '.h5') for ep in range(10): s = env.reset() reward = 0.0 for step in range(200): time.sleep(0.03) env.render() actions = [] for i in range(env.n): state_input = np.reshape( s[i], (-1, env.observation_space[i].shape[0])) noise = OUNoise(mu=np.zeros(5)) actions.append((actors[i].predict( np.reshape( s[i], (-1, actors[i].mainModel.input_shape[1])))).reshape( actors[i].mainModel.output_shape[1], )) s, r, d, s2 = env.step(actions) for i in range(env.n): reward += r[i] if np.all(d): break print("Episode: {:d} | Reward: {:f}".format(ep, reward)) env.close() import sys sys.exit("test over!")
def main(args): # Master if rank == 0: ####################### # Setting up: # - environment, random seed # - tensorflow option # - network # - replay ######################### if not os.path.exists(args["modelFolder"]): os.makedirs(args["modelFolder"]) if not os.path.exists(args["summary_dir"]): os.makedirs(args["summary_dir"]) # env and random seed env = make_env.make_env('simple_tag') np.random.seed(int(args['random_seed'])) tf.set_random_seed(int(args['random_seed'])) env.seed(int(args['random_seed'])) # tensorflow gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.35) with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) as sess: # agent number n = env.n ave_n = 0 good_n = 0 for i in env.agents: if i.adversary: ave_n += 1 else: good_n += 1 # Actor Critic n = env.n actors = [] critics = [] exploration_noise = [] observation_dim = [] action_dim = [] total_action_dim = 0 # Aversary Agents action spaces for i in range(ave_n): total_action_dim = total_action_dim + env.action_space[i].n # print("total_action_dim {} for cooperative agents".format(total_action_dim)) for i in range(n): observation_dim.append(env.observation_space[i].shape[0]) action_dim.append( env.action_space[i].n ) # assuming discrete action space here -> otherwise change to something like env.action_space[i].shape[0] actors.append( ActorNetwork(sess, observation_dim[i], action_dim[i], float(args['actor_lr']), float(args['tau']))) # critics.append(CriticNetwork(sess,n,observation_dim[i],total_action_dim,float(args['critic_lr']),float(args['tau']),float(args['gamma']))) if i < ave_n: # MADDPG - centralized Critic critics.append( CriticNetwork(sess, n, observation_dim[i], total_action_dim, float(args['critic_lr']), float(args['tau']), float(args['gamma']))) else: # DDPG critics.append( CriticNetwork(sess, n, observation_dim[i], action_dim[i], float(args['critic_lr']), float(args['tau']), float(args['gamma']))) exploration_noise.append(OUNoise(mu=np.zeros(action_dim[i]))) distributed_train_every_step(sess, env, args, actors, critics, exploration_noise, ave_n) # Worker else: ####################### # Setting up: # - tensorflow option # - network # # env = make_env.make_env('simple_tag') np.random.seed(int(args['random_seed']) + rank) tf.set_random_seed(int(args['random_seed']) + rank) env.seed(int(args['random_seed']) + rank) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.08) with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) as sess: # agent number n = env.n ave_n = 0 good_n = 0 for i in env.agents: if i.adversary: ave_n += 1 else: good_n += 1 # Actor Critic n = env.n actors = [] exploration_noise = [] observation_dim = [] action_dim = [] for i in range(n): observation_dim.append(env.observation_space[i].shape[0]) action_dim.append(env.action_space[i].n) actors.append( ActorNetwork(sess, observation_dim[i], action_dim[i], float(args['actor_lr']), float(args['tau']))) exploration_noise.append(OUNoise(mu=np.zeros(action_dim[i]))) collect_batch(env, args, actors, exploration_noise, ave_n)
def main(args): if not os.path.exists(args["modelFolder"]): os.makedirs(args["modelFolder"]) if not os.path.exists(args["summary_dir"]): os.makedirs(args["summary_dir"]) #with tf.device("/gpu:0"): # MADDPG for Ave Agent # DDPG for Good Agent gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) config = tf.ConfigProto(device_count={'CPU': 0}) # config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) as sess: env = make_env.make_env('simple_tag') np.random.seed(int(args['random_seed'])) tf.set_random_seed(int(args['random_seed'])) env.seed(int(args['random_seed'])) ave_n = 0 good_n = 0 for i in env.agents: if i.adversary: ave_n += 1 else: good_n += 1 print("adversary ", ave_n, "target ", good_n) # print("ave_n", ave_n) n = env.n actors = [] critics = [] brains = [] exploration_noise = [] observation_dim = [] action_dim = [] total_action_dim = 0 # Aversary Agents action spaces for i in range(ave_n): total_action_dim = total_action_dim + env.action_space[i].n print("total_action_dim", total_action_dim) for i in range(n): observation_dim.append(env.observation_space[i].shape[0]) action_dim.append( env.action_space[i].n ) # assuming discrete action space here -> otherwise change to something like env.action_space[i].shape[0] actors.append( ActorNetwork(sess, observation_dim[i], action_dim[i], float(args['actor_lr']), float(args['tau']))) # critics.append(CriticNetwork(sess,n,observation_dim[i],total_action_dim,float(args['critic_lr']),float(args['tau']),float(args['gamma']))) if i < ave_n: # MADDPG - centralized Critic critics.append( CriticNetwork(sess, n, observation_dim[i], total_action_dim, float(args['critic_lr']), float(args['tau']), float(args['gamma']))) else: # DDPG critics.append( CriticNetwork(sess, n, observation_dim[i], action_dim[i], float(args['critic_lr']), float(args['tau']), float(args['gamma']))) exploration_noise.append(OUNoise(mu=np.zeros(action_dim[i]))) train(sess, env, args, actors, critics, exploration_noise, ave_n)