from env import ArmEnv from rl import DDPG MAX_EPISODES = 900 MAX_EP_STEPS = 200 ON_TRAIN = False # set env env = ArmEnv() s_dim = env.state_dim a_dim = env.action_dim a_bound = env.action_bound # set RL method (continuous) rl = DDPG(a_dim, s_dim, a_bound) steps = [] def train(): # start training for i in range(MAX_EPISODES): s = env.reset() ep_r = 0. for j in range(MAX_EP_STEPS): env.render() a = rl.choose_action(s) s_, r, done = env.step(a) rl.store_transition(s, a, r, s_)
"lr": 0.0001, "memory_capacity": 9000 } # set env print(PARAMS) env = ArmEnv(n_arms=PARAMS["n_arms"], random_goal=PARAMS["random_target"], on_mouse=False if PARAMS["training"] else True, show_fps=args.show_fps, fps=args.fps) s_dim = env.state_dim a_dim = env.action_dim a_bound = env.action_bound # set RL method (continuous) rl = DDPG( a_dim, s_dim, a_bound, soft_replace=PARAMS["soft_replace"], tau=PARAMS["tau"], gamma=PARAMS["gamma"], lr=PARAMS["lr"], ) MODEL_DIR = "models/{}arms".format(PARAMS["n_arms"]) if PARAMS["training"]: train() else: eval()
from env import ArmEnv from rl import DDPG import time as t import numpy as np MAX_EPISODES = 500 MAX_EP_STEPS = 200 ON_TRAIN = False # set env env = ArmEnv() s_dim = env.state_dim a_dim = env.action_dim a_bound = env.action_bound # set RL method (continuous) rl = DDPG(s_dim, a_dim, a_bound) steps = [] def train(): # start training for i in range(MAX_EPISODES): s = env.reset() ep_r = 0. for j in range(MAX_EP_STEPS): env.render() #a=env.sample_action() a = rl.choose_action(s) if (np.isnan(a[0])): a[0] = 0
from env import ArmEnv from rl import DDPG import time import numpy as np MAX_EPISODES = 50000 MAX_EP_STEPS = 200 ON_TRAIN = True # set env env = ArmEnv() # set RL method (continuous) rl = DDPG() steps = [] def train(): # start training RENDER = False done_cnt = 0 var = 2.00 for i in range(MAX_EPISODES): s = env.reset() ep_r = 0. for j in range(MAX_EP_STEPS): if RENDER: env.render() if len(rl.memory) <= 9999:
from env import TrafficEnv from rl import DDPG import pandas as pd import matplotlib.pyplot as plt import numpy as np MAX_EPOCH = 500 MAX_EVENT = 100 env = TrafficEnv() s_dim = env.state_dim a_dim = env.action_dim a_bound = env.action_bound r_dim = env.reward_dim rl = DDPG(a_dim, s_dim, r_dim, a_bound) def save_result(d, id): data_dict = { 'FOT_Control.Speed': d[:, 0], 'IMU.Accel_X': d[:, 1], 'SMS.X_Velocity_T0': d[:, 2], 'SMS.X_RANGE_T0': d[:, 3] } d_frame = pd.DataFrame(data_dict) d_frame.to_csv('./data/result_' + str(id) + '.csv', sep=',') def plot(data1, data2): plt.figure(1)
############################################### import os import sys import numpy as np import pandas as pd import matplotlib.pyplot as plt from env import ArmEnv from rl import DDPG MAX_EPISODES = 500 MAX_EP_STEPS = 200 env = ArmEnv() s_dim = env.state_dim a_dim = env.action_dim a_bound = env.action_bound rl = DDPG(a_dim, s_dim, a_bound) for i in range(MAX_EPISODES): s = evn.reset() for j in range(MAX_EP_STEPS): env.render() a = rl.choose_actions(s) s_, r, done = env.step(a) rl.store_transition(s, a, r, s_) if rl.memory_full(): rl.learn() s = s_
from noise import noise MAX_EPISODES = 3000 MAX_EP_STEPS = 100 ON_TRAIN = True # set env env = ArmEnv([0., 0., 0.], [0., 0., 0.], [0., 0., 0.], [0., 0., 0.]) s_dim = env.state_dim a_dim = env.action_dim a_bound = env.action_bound # set RL method (continuous) a_scale = [1000, 3000] #a_scale = [0, 10] rl = DDPG(a_dim, s_dim, a_scale) noise_mean = 0 noise_std_dev = 0.2 noise_theta = 0.15 noise_dt = env.dt noise = noise(a_dim, noise_mean, noise_std_dev, noise_theta, noise_dt) steps = [] def train(): # start training for i in range(MAX_EPISODES): s = env.reset() noise.reset() ep_r = 0.
from rl import DDPG import random MAX_EPISODES = 300 MAX_EP_STEPS = 200 ON_TRAIN = True # set env env = ArmEnv() env.get_train_state = ON_TRAIN s_dim = env.state_dim a_dim = env.action_dim a_bound = env.action_bound # set RL method (continuous) rl = DDPG(a_dim, s_dim, a_bound) rl.get_train_state = ON_TRAIN def train(): # start training sample_goal = [None] * 36 for incx in range(6): for incy in range(6): sample_goal[incy * 6 + incx] = { 'x': (100. + incx * 40), 'y': (100. + incy * 40), 'l': 40 } print(len(sample_goal)) for i in range(MAX_EPISODES):
from env import ArmEnv from rl import DDPG # Gloabel Variable MAX_EPISOSES = 500 MAX_EP_STEPS = 500 # Set the environement env = ArmEnv() s_dim = env.state_dim a_dim = env.action_dim a_bound = env.action_bound # set the RL method rl = DDPG(a_dim, s_dim, a_bound) # start Training for i in range(MAX_EPISOSES): s = env.reset() for j in range(MAX_EP_STEPS): env.render() a = rl.choose_action(s) s_, r, done = env.step(a) rl.store_transitions(s, a, r, s_) if rl.memory_full:
from env import ArmEnv from rl import DDPG MAX_EPISODES = 500 MAX_EP_STEPS = 200 ON_TRAIN = True # set env env = ArmEnv() state_dim = env.state_dim action_dim = env.action_dim action_bound = env.action_bound # set RL method (continuous) rl = DDPG(state_dim, action_dim, action_bound) def train(): # start training for i in range(MAX_EPISODES): state = env.reset() ep_reward = 0. for j in range(MAX_EP_STEPS): env.render() action = rl.choose_action(state) state_, reward, done = env.step(action) # memory storage rl.store_transition(state, action, reward, state_) if rl.memory_full: