Beispiel #1
0
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import sys

sys.path.insert(1, 'env/')
from env import envs
from maddpg import MaDDPG

state_dim = 5
action_dim = 1
max_edge = 1

num_agents = 3
maddpg = MaDDPG(num_agents, state_dim, action_dim)

Env = envs.Environ(num_agents, max_edge)
obs = Env.reset()
current_state = obs

max_episode = 1000000
done_epoch = 0
#print(current_state)
max_epoch = 1000

catch_time = []

for episode in range(max_episode):
    print('episode', episode)
    #while (True):
    #Env.re_create_env(num_agents)
Beispiel #2
0
import numpy as np
from ma_policy import MaPolicy
from maddpg import MaDDPG
from torch import round

DEFAULT_LEARN_STEPS = 10000
DEFAULT_TEST_STEPS = 10000
ENV_NAMES = "PongDuel-v0"
OUTPUT_FRAMES_DIR = 'output/video.npz'

if __name__ == '__main__':
    model = MaDDPG(
        policy=MaPolicy,
        env=ENV_NAMES,
        mapper=lambda actions: round(actions * 1.5 + 1).flatten().tolist(),
        verbose=True)
    model.learn(DEFAULT_LEARN_STEPS)

    results = []

    for interact in model.execute(DEFAULT_TEST_STEPS):
        if isinstance(interact, np.ndarray):
            results.append(interact)

    if len(results) != 0:
        np.savez(OUTPUT_FRAMES_DIR, frames=np.array(results))
def system_destroyed():
    disfunction_height = 1.0
    if (fabs(payload_euler_angle[0, 0]) > 1.0) or (fabs(
            payload_euler_angle[1, 0]) > 1.0) or (payload_height <
                                                  disfunction_height):
        return True
    else:
        return False


if __name__ == '__main__':
    state_dim = 20
    action_dim = 3
    num_agents = 4
    maddpg = MaDDPG(num_agents, state_dim, action_dim)
    rospy.init_node('multi_UAV_gym', anonymous=False, log_level=rospy.INFO)
    env = gym.make('multi_UAV-v0')

    topic_name_odom = '/payload/ground_truth/odometry'
    sub_payload_odometry = rospy.Subscriber(topic_name_odom, Odometry,
                                            callback_payload_odometry)
    #obs_shape_n = [env.observation_space[i].shape for i in range(4)]
    maddpg.load_network()
    #trainers = get_trainers(env, obs_shape_n, params)

    f_reward = open(reward_file, 'r+')
    episode = 0
    for line in f_reward.readlines():
        episode = episode + 1
    f_reward.close()
Beispiel #4
0
from maddpg import MaDDPG
import numpy as np
from maddpg import MaDDPG
import numpy as np
state_dim = 3
action_dim =2
num_agents = 4


maddpg  = MaDDPG(num_agents,state_dim,action_dim)

for ii in range(2000):
    print('time step {}'.format(ii))
    state =np.random.rand(num_agents,state_dim)
    next_state =np.random.rand(num_agents,state_dim)
    action = np.random.rand(num_agents, action_dim)
    reward = np.random.rand(num_agents)
    done = 0

    takeaction = maddpg.noise_action(state)
    maddpg.perceive(state,action,reward, next_state,done)

maddpg.close_session()
Beispiel #5
0
from maddpg import MaDDPG
import numpy as np
state_dim = 3
action_dim = 2
num_agents = 2
states = np.ones((4, state_dim))
states_batch = np.ones((2, 4, state_dim))
maddpg = MaDDPG(num_agents, state_dim, action_dim)
maddpg.add_agents(2)
print(maddpg.action(states))
actions = maddpg.target_actions(states_batch)
noise_action = maddpg.noise_action(states)
print(noise_action)
maddpg.close_session()
#print(maddpg.num_agents)
import numpy as np
import time
import sys
sys.path.insert(1, 'env/')
from env import envs
from maddpg import MaDDPG

# load the pre-trained network and plays the video slowly
state_dim = 5
action_dim = 1
max_edge = 1

num_agents = 3
maddpg = MaDDPG(num_agents, state_dim, action_dim)

# load saved network
maddpg.load_network()

Env = envs.Environ(num_agents, max_edge)
obs = Env.reset()
current_state = obs
max_time = 10000
#print(current_state)
for epoch in range(max_time):
    print('epoch', epoch)
    action = maddpg.action(current_state)
    #print(action)
    next_state, reward, done = Env.step(action)
    #print(reward)
    #maddpg.perceive(current_state,action,reward,next_state,done)
    current_state = next_state