import tensorflow as tf import numpy as np import matplotlib.pyplot as plt import sys sys.path.insert(1, 'env/') from env import envs from maddpg import MaDDPG state_dim = 5 action_dim = 1 max_edge = 1 num_agents = 3 maddpg = MaDDPG(num_agents, state_dim, action_dim) Env = envs.Environ(num_agents, max_edge) obs = Env.reset() current_state = obs max_episode = 1000000 done_epoch = 0 #print(current_state) max_epoch = 1000 catch_time = [] for episode in range(max_episode): print('episode', episode) #while (True): #Env.re_create_env(num_agents)
import numpy as np from ma_policy import MaPolicy from maddpg import MaDDPG from torch import round DEFAULT_LEARN_STEPS = 10000 DEFAULT_TEST_STEPS = 10000 ENV_NAMES = "PongDuel-v0" OUTPUT_FRAMES_DIR = 'output/video.npz' if __name__ == '__main__': model = MaDDPG( policy=MaPolicy, env=ENV_NAMES, mapper=lambda actions: round(actions * 1.5 + 1).flatten().tolist(), verbose=True) model.learn(DEFAULT_LEARN_STEPS) results = [] for interact in model.execute(DEFAULT_TEST_STEPS): if isinstance(interact, np.ndarray): results.append(interact) if len(results) != 0: np.savez(OUTPUT_FRAMES_DIR, frames=np.array(results))
def system_destroyed(): disfunction_height = 1.0 if (fabs(payload_euler_angle[0, 0]) > 1.0) or (fabs( payload_euler_angle[1, 0]) > 1.0) or (payload_height < disfunction_height): return True else: return False if __name__ == '__main__': state_dim = 20 action_dim = 3 num_agents = 4 maddpg = MaDDPG(num_agents, state_dim, action_dim) rospy.init_node('multi_UAV_gym', anonymous=False, log_level=rospy.INFO) env = gym.make('multi_UAV-v0') topic_name_odom = '/payload/ground_truth/odometry' sub_payload_odometry = rospy.Subscriber(topic_name_odom, Odometry, callback_payload_odometry) #obs_shape_n = [env.observation_space[i].shape for i in range(4)] maddpg.load_network() #trainers = get_trainers(env, obs_shape_n, params) f_reward = open(reward_file, 'r+') episode = 0 for line in f_reward.readlines(): episode = episode + 1 f_reward.close()
from maddpg import MaDDPG import numpy as np from maddpg import MaDDPG import numpy as np state_dim = 3 action_dim =2 num_agents = 4 maddpg = MaDDPG(num_agents,state_dim,action_dim) for ii in range(2000): print('time step {}'.format(ii)) state =np.random.rand(num_agents,state_dim) next_state =np.random.rand(num_agents,state_dim) action = np.random.rand(num_agents, action_dim) reward = np.random.rand(num_agents) done = 0 takeaction = maddpg.noise_action(state) maddpg.perceive(state,action,reward, next_state,done) maddpg.close_session()
from maddpg import MaDDPG import numpy as np state_dim = 3 action_dim = 2 num_agents = 2 states = np.ones((4, state_dim)) states_batch = np.ones((2, 4, state_dim)) maddpg = MaDDPG(num_agents, state_dim, action_dim) maddpg.add_agents(2) print(maddpg.action(states)) actions = maddpg.target_actions(states_batch) noise_action = maddpg.noise_action(states) print(noise_action) maddpg.close_session() #print(maddpg.num_agents)
import numpy as np import time import sys sys.path.insert(1, 'env/') from env import envs from maddpg import MaDDPG # load the pre-trained network and plays the video slowly state_dim = 5 action_dim = 1 max_edge = 1 num_agents = 3 maddpg = MaDDPG(num_agents, state_dim, action_dim) # load saved network maddpg.load_network() Env = envs.Environ(num_agents, max_edge) obs = Env.reset() current_state = obs max_time = 10000 #print(current_state) for epoch in range(max_time): print('epoch', epoch) action = maddpg.action(current_state) #print(action) next_state, reward, done = Env.step(action) #print(reward) #maddpg.perceive(current_state,action,reward,next_state,done) current_state = next_state