class MobileAvoidance(EnvSpace): def env_init(self): self.env = CarEnv() self.state = self.env.reset() self.send_state_get_action(self.state) self.var = 1 def on_predict_response(self, action): self.var = self.var * 0.9995 if self.ep_use_step > cfg['DDPG'][ 'memory_capacity'] else self.var a = np.clip(np.random.normal(action, self.var), *self.env.action_bound) next_state, reward, done, _ = self.env.step(action) # print(next_state) done = True if self.ep_use_step >= EP_MAXSTEP else done self.send_train_get_action(self.state, action, reward, done, next_state) self.state = next_state # print('self.env_name=',self.env_name) if self.ep >= 30 and RENDER: self.env.render() if done: self.state = self.env.reset() self.send_state_get_action(self.state)
class Worker(object): def __init__(self, wid): self.wid = wid self.env = CarEnv() self.ppo = GLOBAL_PPO def work(self): global GLOBAL_EP, GLOBAL_RUNNING_R, GLOBAL_UPDATE_COUNTER while not COORD.should_stop(): s = self.env.reset() ep_r = 0 buffer_s, buffer_a, buffer_r = [], [], [] for t in range(EP_LEN): if not ROLLING_EVENT.is_set(): # while global PPO is updating ROLLING_EVENT.wait() # wait until PPO is updated buffer_s, buffer_a, buffer_r = [], [], [ ] # clear history buffer a = self.ppo.choose_action(s) s_, r, done = self.env.step(a) buffer_s.append(s) buffer_a.append(a) buffer_r.append(r) # normalize reward, find to be useful s = s_ ep_r += r GLOBAL_UPDATE_COUNTER += 1 # count to minimum batch size if t == EP_LEN - 1 or GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE or done == 1: v_s_ = self.ppo.get_v(s_) discounted_r = [] # compute discounted reward for r in buffer_r[::-1]: v_s_ = r + GAMMA * v_s_ discounted_r.append(v_s_) discounted_r.reverse() bs, ba, br = np.vstack(buffer_s), np.vstack( buffer_a), np.array(discounted_r)[:, np.newaxis] buffer_s, buffer_a, buffer_r = [], [], [] QUEUE.put(np.hstack((bs, ba, br))) if GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE: ROLLING_EVENT.clear() # stop collecting data UPDATE_EVENT.set() # globalPPO update if GLOBAL_EP >= EP_MAX: # stop training COORD.request_stop() break if t == EP_LEN - 1 or done == 1: break # record reward changes, plot later if len(GLOBAL_RUNNING_R) == 0: GLOBAL_RUNNING_R.append(ep_r) else: GLOBAL_RUNNING_R.append(GLOBAL_RUNNING_R[-1] * 0.9 + ep_r * 0.1) GLOBAL_EP += 1 print( '{0:.1f}%'.format(GLOBAL_EP / EP_MAX * 100), '|W%i' % self.wid, '|Ep_r: %.2f' % ep_r, )
def main(): global RADAR_MEM data = open('radar.txt', 'w') try: car_env = CarEnv(port=2069) sensors = car_env.sensor_list car = car_env.vehicle_list[0] car.set_autopilot(enabled=True) for sensor in sensors: if sensor.type_id == 'sensor.other.radar': sensor.listen(lambda data: save_data(data, 0)) time.sleep(5) while True: for point in RADAR_MEM: if len(RADAR_MEM) % 50 == 0: RADAR_MEM = [] data.write('\n') if point.size != 0: stuff = str(str(point).replace('[', '')).replace(']', '') print(stuff) data.write(f"{stuff}|") else: pass except (KeyboardInterrupt, SystemExit): data.close() car.destroy() for sensor in sensors: sensor.destroy() sys.exit() exit() raise
def main(): with tf.Session() as sess: env = CarEnv() np.random.seed(1) tf.set_random_seed(1) state_dim = env.state_dim action_dim = env.action_dim action_bound = env.action_bound_high actor = ActorNetwork(sess, state_dim, action_dim, action_bound, 0.001, 0.01, 64) critic = CriticNetwork(sess, state_dim, action_dim, 0.001, 0.01, 0.9, actor.get_num_trainable_vars()) action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim)) train(sess, env, actor, critic, action_noise)
class Worker(object): def __init__(self, name, globalAC): self.env = CarEnv() self.name = name self.AC = ACNet(name, globalAC) def work(self): global GLOBAL_RUNNING_R, GLOBAL_EP total_step = 1 buffer_s, buffer_a, buffer_r = [], [], [] while not COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP: s = self.env.reset() ep_r = 0 for ep_t in range(MAX_EP_STEP): # if self.name == 'W_0': # self.env.render() a = self.AC.choose_action(s) s_, r, done = self.env.step(a) if ep_t == MAX_EP_STEP - 1: done = True ep_r += r buffer_s.append(s) buffer_a.append(a) buffer_r.append(r) if total_step % UPDATE_GLOBAL_ITER == 0 or done or ep_t == MAX_EP_STEP - 1: # update global and assign to local net if done: v_s_ = 0 # terminal else: v_s_ = SESS.run(self.AC.v, {self.AC.s: s_[np.newaxis, :]})[0, 0] buffer_v_target = [] for r in buffer_r[::-1]: # reverse buffer r v_s_ = r + GAMMA * v_s_ buffer_v_target.append(v_s_) buffer_v_target.reverse() buffer_s, buffer_a, buffer_v_target = np.vstack( buffer_s), np.vstack(buffer_a), np.vstack( buffer_v_target) feed_dict = { self.AC.s: buffer_s, self.AC.a_his: buffer_a, self.AC.v_target: buffer_v_target, } test = self.AC.update_global(feed_dict) buffer_s, buffer_a, buffer_r = [], [], [] self.AC.pull_global() s = s_ total_step += 1 if done or ep_t == MAX_EP_STEP - 1: if len(GLOBAL_RUNNING_R ) == 0: # record running episode reward GLOBAL_RUNNING_R.append(ep_r) else: GLOBAL_RUNNING_R.append(0.9 * GLOBAL_RUNNING_R[-1] + 0.1 * ep_r) print( self.name, "Ep:", GLOBAL_EP, "| Ep_r: %i" % GLOBAL_RUNNING_R[-1], '| Var:', test, ) GLOBAL_EP += 1 break
tf.reset_default_graph() MAX_GLOBAL_EP = 1000 MAX_EP_STEP = 100 UPDATE_GLOBAL_ITER = 5 N_WORKERS = multiprocessing.cpu_count() LR_A = 1e-4 # learning rate for actor LR_C = 2e-4 # learning rate for critic GAMMA = 0.9 # reward discount MODE = ['easy', 'hard'] n_model = 1 GLOBAL_NET_SCOPE = 'Global_Net' ENTROPY_BETA = 0.01 GLOBAL_RUNNING_R = [] GLOBAL_EP = 0 env = CarEnv() N_S = 2 + env.O_LC N_A = 1 A_BOUND = env.action_bound del env class ACNet(object): def __init__(self, scope, globalAC=None): if scope == GLOBAL_NET_SCOPE: # get global network with tf.variable_scope(scope): self.s = tf.placeholder(tf.float32, [None, N_S], 'S') self._build_net() self.a_params = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/actor')
################################# # setup ################################# base_output_dir = 'run-out-' + time.strftime("%Y-%m-%d-%H-%M-%S") os.makedirs(base_output_dir) tensorboard_dir = base_output_dir + "/tensorboard/" os.makedirs(tensorboard_dir) summary_writer = tf.summary.create_file_writer(tensorboard_dir) with summary_writer.as_default(): tf.summary.text('params', str(args), step=0) State.setup(args) environment = CarEnv(args) replay_memory = replay.ReplayMemory(base_output_dir, args) dqn = dqn.DeepQNetwork(environment.get_num_actions(), environment.get_state_size(), replay_memory, base_output_dir, tensorboard_dir, args) train_epsilon = args.epsilon #don't want to reset epsilon between epoch start_time = datetime.datetime.now() train_episodes = 0 eval_episodes = 0 episode_train_reward_list = [] episode_eval_reward_list = [] ################################# # stop handler #################################
from car_env import CarEnv import pygame env = CarEnv() state = env.reset() close_screen = False while True: action = 4 for event in pygame.event.get(): if event.type == pygame.KEYDOWN and event.key == pygame.K_DOWN: action = 0 if event.type == pygame.KEYDOWN and event.key == pygame.K_RIGHT: action = 1 if event.type == pygame.KEYDOWN and event.key == pygame.K_UP: action = 2 if event.type == pygame.KEYDOWN and event.key == pygame.K_LEFT: action = 3 if event.type == pygame.QUIT: close_screen = True next_state, reward, done, info = env.step(action) env.render() if done or close_screen: break pygame.display.quit() pygame.quit()
color = (255, 128, 0) size = 5 pygame.draw.rect(screen, color, pygame.Rect(env.x_target, env.y_target, size, size)) def draw_state(screen, env): car_info = env.Car.get_info() draw_car(screen, car_info) draw_target(screen, env) if __name__ == "__main__": env = CarEnv() agent = Agent() pygame.init() screen = pygame.display.set_mode((env.x_upper, env.y_upper)) clock = pygame.time.Clock() done = False while not env.is_done(): agent.step(env) screen.fill((0, 0, 0)) # erase screen draw_state(screen, env) for event in pygame.event.get(): pass pygame.display.flip() # refresh screen
import threading, queue from car_env import CarEnv tf.reset_default_graph() EP_MAX = 1000 EP_LEN = 100 N_WORKER = 4 # parallel workers GAMMA = 0.9 # reward discount factor A_LR = 0.0001 # learning rate for actor C_LR = 0.0005 # learning rate for critic MIN_BATCH_SIZE = 64 # minimum batch size for updating PPO UPDATE_STEP = 5 # loop update operation n-steps EPSILON = 0.2 # Clipped surrogate objective n_model = 1 env = CarEnv() S_DIM = 2 + env.O_LC A_DIM = 1 A_BOUND = env.action_bound[1] class PPO(object): def __init__(self): self.sess = tf.Session() self.tfs = tf.placeholder(tf.float32, [None, S_DIM], 'state') # critic l1 = tf.layers.dense(self.tfs, 100, tf.nn.relu) self.v = tf.layers.dense(l1, 1) self.tfdc_r = tf.placeholder(tf.float32, [None, 1], 'discounted_r')
MAX_EPISODES = 2000 MAX_EP_STEPS = 1000 LR_A = 1e-4 # learning rate for actor LR_C = 1e-4 # learning rate for critic GAMMA = 0.9 # reward discount REPLACE_ITER_A = 800 REPLACE_ITER_C = 700 MEMORY_CAPACITY = 2000 BATCH_SIZE = 16 VAR_MIN = 0.1 RENDER = True LOAD = True DISCRETE_ACTION = False env = CarEnv(discrete_action=DISCRETE_ACTION) env2 = CarEnv(discrete_action=DISCRETE_ACTION, self_obstacle=True) STATE_DIM = env.state_dim ACTION_DIM = env.action_dim ACTION_BOUND = env.action_bound # all placeholder for tf with tf.name_scope('S'): S = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s') with tf.name_scope('R'): R = tf.placeholder(tf.float32, [None, 1], name='r') with tf.name_scope('S_'): S_ = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s_') class Actor(object):
sys.version_info.minor, 'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0]) except IndexError: print("Failed to find carla's .egg directory") import carla from carla import * from car_env import CarEnv import random import time run = False PORT = 2069 IMG_WIDTH = 640 IMG_HEIGHT = 480 spawn = CarEnv(port=2069) # <-- Remove when not testing :D spawn.vehicle_list[0].set_autopilot(enabled=True) #black = np.zeros(shape=(IMG_HEIGHT, IMG_WIDTH,3)) CAMERA_MEM = None# --> stores current frame received from cameras - inits as empty (None) def processImage(data, folder): global CAMERA_MEM i = np.array(data.raw_data) i2 = np.reshape(i, (IMG_HEIGHT, IMG_WIDTH, 4)) i3 = i2[:, :, :3] CAMERA_MEM = i3 data.save_to_disk(f'{folder}/%06d.png' % data.frame) print(dir(data)) print(type(CAMERA_MEM)) # return i3/255.0
def main(): try: global RADAR_MEM car_env = CarEnv(port=2069) car = car_env.vehicle_list[0] sensors = car_env.sensor_list car.set_autopilot(enabled=True) HFOV = (car_env.hfov*math.pi)/180 VFOV = (car_env.vfov*math.pi)/180 for sensor in sensors: if sensor.type_id == 'sensor.other.radar': sensor.listen(lambda data: parse_data(data)) clock.tick(60) pygame.init() display = (1280, 720) pygame.display.set_mode(display, DOUBLEBUF|OPENGL) gluPerspective(120, (display[0]/display[1]), 0.1, 200.0) glRotate(0, 0, 0, 0) glTranslatef(0.0, 0.0, -3) while True: events = pygame.event.get() keys = pygame.key.get_pressed() pressed_mouse = pygame.mouse.get_pressed() for event in events: if event.type == pygame.QUIT: pygame.quit() exit() if event.type == pygame.MOUSEBUTTONDOWN: if event.button == 4: # wheel rolled up glScaled(1.10, 1.10, 1.10) if event.button == 5: # wheel rolled down glScaled(0.9, 0.9, 0.9) if pressed_mouse[1]: ms = pygame.mouse.get_rel() glRotate(2, ms[1], ms[0], 0) if pressed_mouse[2]: ms = pygame.mouse.get_rel() glTranslatef(ms[0]/100, -1 * ms[1]/100, 0) if keys[pygame.K_UP]: glRotate(0.1, -1, 0, 0) if keys[pygame.K_DOWN]: glRotate(0.1, 1, 0, 0) if keys[pygame.K_LEFT]: glRotate(0.1, 0, -1, 0) if keys[pygame.K_RIGHT]: glRotate(0.1, 0, 1, 0) if keys[pygame.K_s]: glTranslatef(0.0, 0.0, -1) if keys[pygame.K_w]: glTranslatef(0.0, 0.0, 1) glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT) Render() pygame.display.flip() if len(RADAR_MEM) >= car_env.radartick: RADAR_MEM = [] except (KeyboardInterrupt, SystemExit): car.destroy() for sensor in sensors: sensor.destroy() pygame.quit() sys.exit() exit()
def __init__(self, name, globalAC): self.env = CarEnv() self.name = name self.AC = ACNet(name, globalAC)
def __init__(self, wid): self.wid = wid self.env = CarEnv() self.ppo = GLOBAL_PPO
MAX_EPISODES = 500 # 最大 episode MAX_EP_STEPS = 600 # 最大步数设置 LR_A = 1e-4 # learning rate for actor LR_C = 1e-4 # learning rate for critic GAMMA = 0.9 # reward discount REPLACE_ITER_A = 800 REPLACE_ITER_C = 700 MEMORY_CAPACITY = 2000 #记忆容量 BATCH_SIZE = 16 VAR_MIN = 0.1 RENDER = True #开启窗口 LOAD = False #重新训练,不载入之前训练过的 DISCRETE_ACTION = False env = CarEnv(discrete_action=DISCRETE_ACTION) STATE_DIM = env.state_dim ACTION_DIM = env.action_dim ACTION_BOUND = env.action_bound # all placeholder for tf with tf.name_scope('S'): S = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s') with tf.name_scope('R'): R = tf.placeholder(tf.float32, [None, 1], name='r') with tf.name_scope('S_'): S_ = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s_') class Actor(object): # Actor 函数网络 def __init__(self, sess, action_dim, action_bound, learning_rate, t_replace_iter):
for steps in range(MAX_EP_STEPS): if RENDER: env.render() # Put Your Code Here if done: agent.append_data(ep_steps=steps) break if __name__ == '__main__': global agent, ep, TEST env = CarEnv(map_set=0) agent = DQN( n_input=env.n_sensor, n_output=env.n_actions, gamma=0.96, beta=0.3, memory_size=2000, batch_size=32, epsilon=0.8, epsilon_decay=0.996, epsilon_min=0.02, show=True, ) agent._build_net()
'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0]) except IndexError: print("Failed to find carla's .egg directory") import carla from carla import * import random parser = argparse.ArgumentParser(description='loads a model and runs it') parser.add_argument('model', metavar='-m', type=str, nargs='?', help='directory to get model') args = parser.parse_args() # Some variables IMAGE_MEM = [] car_env = CarEnv(port=2069) car = car_env.vehicle sensors = car_env.sensor_list SCL = 4 img_h = int(car_env.im_height/SCL) img_w = int(car_env.im_width/SCL) def clean(): car_env.destroy() sys.exit() def processimg(data, sensorID): i = np.array(data.raw_data) i2 = np.reshape(i, (img_h*SCL, img_w*SCL, 4))
default=0.1, help="epsilon with decay doesn't fall below epsilon min") parser.add_argument("--tensorboard-logging-freq", type=int, default=300, help="save training statistics once every X steps") args = parser.parse_args() print('Arguments: ', (args)) baseOutputDir = 'run-out-' + time.strftime("%Y-%m-%d-%H-%M-%S") os.makedirs(baseOutputDir) State.setup(args) environment = CarEnv(args, baseOutputDir) dqn = dqn.DeepQNetwork(environment.getNumActions(), baseOutputDir, args) replayMemory = replay.ReplayMemory(args) stop = False def stop_handler(): global stop while not stop: user_input = input() if user_input == 'q': print("Stopping...") stop = True
def env_init(self): self.env = CarEnv() self.state = self.env.reset() self.send_state_get_action(self.state) self.var = 1
tf.set_random_seed(1) MAX_EPISODES = 4000 MAX_EP_STEPS = 800 LR_A = 1e-4 # learning rate for actor LR_C = 1e-4 # learning rate for critic GAMMA = 0.98 # reward discount REPLACE_ITER_A = 800 REPLACE_ITER_C = 700 MEMORY_CAPACITY = 4000 BATCH_SIZE = 16 VAR_MIN = 0.02 RENDER = False LOAD = False carn = 1 env = CarEnv(False, carn) STATE_DIM = env.state_dim + 3 ACTION_DIM = env.action_dim ACTION_BOUND = env.action_bound # all placeholder for tf with tf.name_scope('S'): S = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s') with tf.name_scope('R'): R = tf.placeholder(tf.float32, [None, 1], name='r') with tf.name_scope('S_'): S_ = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s_') class Actor(object): def __init__(self, sess, action_dim, action_bound, learning_rate,