from utils.tensor_board import global_board
from utils.helper_classes import Counter, Timer
from utils.conf import Config
from utils.save_env import SaveEnv
from utils.prep import prep_args

from env.walker.single_walker import BipedalWalker

# definitions
observe_dim = 24
action_dim = 4

# configs
c = Config()
#c.restart_from_trial = "2020_05_06_21_50_57"
c.max_episodes = 5000
c.max_steps = 1000
c.replay_size = 500000

# or: explore_noise_params = [(0, 0.2)] * action_dim
c.explore_noise_params = (0, 0.2)
c.policy_noise_params = (0, 1.0, -0.5, 0.5)
c.device = "cuda:0"
c.root_dir = "/data/AI/tmp/multi_agent/walker/naive_ddpg_td3/"

# train configs
# lr: learning rate, int: interval
# warm up should be less than one epoch
c.ddpg_update_batch_size = 100
c.ddpg_warmup_steps = 200
c.model_save_int = 100  # in episodes
Beispiel #2
0
from utils.tensor_board import global_board
from utils.helper_classes import Counter, Timer
from utils.conf import Config
from utils.save_env import SaveEnv
from utils.prep import prep_args

from env.walker.carrier import BipedalMultiCarrier

# definitions
observe_dim = 28
action_dim = 4

# configs
c = Config()
# c.restart_from_trial = "2020_05_06_21_50_57"
c.max_episodes = 20000
c.max_steps = 2000
c.replay_size = 500000

c.agent_num = 3
c.sub_policy_num = 1
c.explore_noise_params = (0, 0.2)
c.q_increase_rate = 1
c.q_decrease_rate = 1
c.device = "cuda:0"
c.root_dir = "/data/AI/tmp/multi_agent/mcarrier/maddpg/"

# train configs
# lr: learning rate, int: interval
# warm up should be less than one epoch
c.ddpg_update_batch_size = 100