Beispiel #1
0
class MobileAvoidance(EnvSpace):
    def env_init(self):
        self.env = CarEnv()
        self.state = self.env.reset()
        self.send_state_get_action(self.state)

        self.var = 1

    def on_predict_response(self, action):
        self.var = self.var * 0.9995 if self.ep_use_step > cfg['DDPG'][
            'memory_capacity'] else self.var
        a = np.clip(np.random.normal(action, self.var), *self.env.action_bound)
        next_state, reward, done, _ = self.env.step(action)
        # print(next_state)
        done = True if self.ep_use_step >= EP_MAXSTEP else done
        self.send_train_get_action(self.state, action, reward, done,
                                   next_state)
        self.state = next_state

        # print('self.env_name=',self.env_name)
        if self.ep >= 30 and RENDER:
            self.env.render()
        if done:
            self.state = self.env.reset()
            self.send_state_get_action(self.state)
Beispiel #2
0
class Worker(object):
    def __init__(self, wid):
        self.wid = wid
        self.env = CarEnv()
        self.ppo = GLOBAL_PPO

    def work(self):
        global GLOBAL_EP, GLOBAL_RUNNING_R, GLOBAL_UPDATE_COUNTER
        while not COORD.should_stop():
            s = self.env.reset()
            ep_r = 0
            buffer_s, buffer_a, buffer_r = [], [], []
            for t in range(EP_LEN):
                if not ROLLING_EVENT.is_set():  # while global PPO is updating
                    ROLLING_EVENT.wait()  # wait until PPO is updated
                    buffer_s, buffer_a, buffer_r = [], [], [
                    ]  # clear history buffer
                a = self.ppo.choose_action(s)
                s_, r, done = self.env.step(a)
                buffer_s.append(s)
                buffer_a.append(a)
                buffer_r.append(r)  # normalize reward, find to be useful
                s = s_
                ep_r += r

                GLOBAL_UPDATE_COUNTER += 1  # count to minimum batch size
                if t == EP_LEN - 1 or GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE or done == 1:
                    v_s_ = self.ppo.get_v(s_)
                    discounted_r = []  # compute discounted reward
                    for r in buffer_r[::-1]:
                        v_s_ = r + GAMMA * v_s_
                        discounted_r.append(v_s_)
                    discounted_r.reverse()

                    bs, ba, br = np.vstack(buffer_s), np.vstack(
                        buffer_a), np.array(discounted_r)[:, np.newaxis]
                    buffer_s, buffer_a, buffer_r = [], [], []
                    QUEUE.put(np.hstack((bs, ba, br)))
                    if GLOBAL_UPDATE_COUNTER >= MIN_BATCH_SIZE:
                        ROLLING_EVENT.clear()  # stop collecting data
                        UPDATE_EVENT.set()  # globalPPO update

                    if GLOBAL_EP >= EP_MAX:  # stop training
                        COORD.request_stop()
                        break
                    if t == EP_LEN - 1 or done == 1:
                        break

            # record reward changes, plot later
            if len(GLOBAL_RUNNING_R) == 0: GLOBAL_RUNNING_R.append(ep_r)
            else:
                GLOBAL_RUNNING_R.append(GLOBAL_RUNNING_R[-1] * 0.9 +
                                        ep_r * 0.1)
            GLOBAL_EP += 1
            print(
                '{0:.1f}%'.format(GLOBAL_EP / EP_MAX * 100),
                '|W%i' % self.wid,
                '|Ep_r: %.2f' % ep_r,
            )
Beispiel #3
0
def main():
    global RADAR_MEM
    data = open('radar.txt', 'w')
    try:
        car_env = CarEnv(port=2069)
        sensors = car_env.sensor_list
        car = car_env.vehicle_list[0]
        car.set_autopilot(enabled=True)

        for sensor in sensors:
            if sensor.type_id == 'sensor.other.radar':
                sensor.listen(lambda data: save_data(data, 0))

        time.sleep(5)

        while True:
            for point in RADAR_MEM:
                if len(RADAR_MEM) % 50 == 0:
                    RADAR_MEM = []
                    data.write('\n')
                if point.size != 0:
                    stuff = str(str(point).replace('[', '')).replace(']', '')
                    print(stuff)
                    data.write(f"{stuff}|")
                else:
                    pass

    except (KeyboardInterrupt, SystemExit):
        data.close()
        car.destroy()
        for sensor in sensors:
            sensor.destroy()
        sys.exit()
        exit()
        raise
Beispiel #4
0
def main():

    with tf.Session() as sess:

        env = CarEnv()
        np.random.seed(1)
        tf.set_random_seed(1)

        state_dim = env.state_dim
        action_dim = env.action_dim
        action_bound = env.action_bound_high

        actor = ActorNetwork(sess, state_dim, action_dim, action_bound, 0.001,
                             0.01, 64)

        critic = CriticNetwork(sess, state_dim, action_dim, 0.001, 0.01, 0.9,
                               actor.get_num_trainable_vars())

        action_noise = OrnsteinUhlenbeckActionNoise(mu=np.zeros(action_dim))

        train(sess, env, actor, critic, action_noise)
Beispiel #5
0
class Worker(object):
    def __init__(self, name, globalAC):
        self.env = CarEnv()
        self.name = name
        self.AC = ACNet(name, globalAC)

    def work(self):
        global GLOBAL_RUNNING_R, GLOBAL_EP
        total_step = 1
        buffer_s, buffer_a, buffer_r = [], [], []
        while not COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP:
            s = self.env.reset()
            ep_r = 0
            for ep_t in range(MAX_EP_STEP):
                #                if self.name == 'W_0':
                #                    self.env.render()
                a = self.AC.choose_action(s)
                s_, r, done = self.env.step(a)
                if ep_t == MAX_EP_STEP - 1: done = True
                ep_r += r
                buffer_s.append(s)
                buffer_a.append(a)
                buffer_r.append(r)

                if total_step % UPDATE_GLOBAL_ITER == 0 or done or ep_t == MAX_EP_STEP - 1:  # update global and assign to local net
                    if done:
                        v_s_ = 0  # terminal
                    else:
                        v_s_ = SESS.run(self.AC.v,
                                        {self.AC.s: s_[np.newaxis, :]})[0, 0]
                    buffer_v_target = []
                    for r in buffer_r[::-1]:  # reverse buffer r
                        v_s_ = r + GAMMA * v_s_
                        buffer_v_target.append(v_s_)
                    buffer_v_target.reverse()

                    buffer_s, buffer_a, buffer_v_target = np.vstack(
                        buffer_s), np.vstack(buffer_a), np.vstack(
                            buffer_v_target)
                    feed_dict = {
                        self.AC.s: buffer_s,
                        self.AC.a_his: buffer_a,
                        self.AC.v_target: buffer_v_target,
                    }
                    test = self.AC.update_global(feed_dict)
                    buffer_s, buffer_a, buffer_r = [], [], []
                    self.AC.pull_global()

                s = s_
                total_step += 1
                if done or ep_t == MAX_EP_STEP - 1:
                    if len(GLOBAL_RUNNING_R
                           ) == 0:  # record running episode reward
                        GLOBAL_RUNNING_R.append(ep_r)
                    else:
                        GLOBAL_RUNNING_R.append(0.9 * GLOBAL_RUNNING_R[-1] +
                                                0.1 * ep_r)
                    print(
                        self.name,
                        "Ep:",
                        GLOBAL_EP,
                        "| Ep_r: %i" % GLOBAL_RUNNING_R[-1],
                        '| Var:',
                        test,
                    )
                    GLOBAL_EP += 1
                    break
Beispiel #6
0
tf.reset_default_graph()
MAX_GLOBAL_EP = 1000
MAX_EP_STEP = 100
UPDATE_GLOBAL_ITER = 5
N_WORKERS = multiprocessing.cpu_count()
LR_A = 1e-4  # learning rate for actor
LR_C = 2e-4  # learning rate for critic
GAMMA = 0.9  # reward discount
MODE = ['easy', 'hard']
n_model = 1
GLOBAL_NET_SCOPE = 'Global_Net'
ENTROPY_BETA = 0.01
GLOBAL_RUNNING_R = []
GLOBAL_EP = 0

env = CarEnv()
N_S = 2 + env.O_LC
N_A = 1
A_BOUND = env.action_bound
del env


class ACNet(object):
    def __init__(self, scope, globalAC=None):

        if scope == GLOBAL_NET_SCOPE:  # get global network
            with tf.variable_scope(scope):
                self.s = tf.placeholder(tf.float32, [None, N_S], 'S')
                self._build_net()
                self.a_params = tf.get_collection(
                    tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope + '/actor')
Beispiel #7
0
#################################
# setup
#################################

base_output_dir = 'run-out-' + time.strftime("%Y-%m-%d-%H-%M-%S")
os.makedirs(base_output_dir)

tensorboard_dir = base_output_dir + "/tensorboard/"
os.makedirs(tensorboard_dir)
summary_writer = tf.summary.create_file_writer(tensorboard_dir)
with summary_writer.as_default():
    tf.summary.text('params', str(args), step=0)

State.setup(args)

environment = CarEnv(args)
replay_memory = replay.ReplayMemory(base_output_dir, args)
dqn = dqn.DeepQNetwork(environment.get_num_actions(),
                       environment.get_state_size(), replay_memory,
                       base_output_dir, tensorboard_dir, args)

train_epsilon = args.epsilon  #don't want to reset epsilon between epoch
start_time = datetime.datetime.now()
train_episodes = 0
eval_episodes = 0
episode_train_reward_list = []
episode_eval_reward_list = []

#################################
# stop handler
#################################
Beispiel #8
0
from car_env import CarEnv
import pygame

env = CarEnv()

state = env.reset()
close_screen = False

while True:
    action = 4

    for event in pygame.event.get():
        if event.type == pygame.KEYDOWN and event.key == pygame.K_DOWN: action = 0
        if event.type == pygame.KEYDOWN and event.key == pygame.K_RIGHT: action = 1
        if event.type == pygame.KEYDOWN and event.key == pygame.K_UP: action = 2
        if event.type == pygame.KEYDOWN and event.key == pygame.K_LEFT: action = 3
        if event.type == pygame.QUIT:
            close_screen = True
    next_state, reward, done, info = env.step(action)
    env.render()

    if done or close_screen:
        break
pygame.display.quit()
pygame.quit()

Beispiel #9
0
    color = (255, 128, 0)
    size = 5
    pygame.draw.rect(screen, color,
                     pygame.Rect(env.x_target, env.y_target, size, size))


def draw_state(screen, env):
    car_info = env.Car.get_info()
    draw_car(screen, car_info)

    draw_target(screen, env)


if __name__ == "__main__":

    env = CarEnv()
    agent = Agent()

    pygame.init()
    screen = pygame.display.set_mode((env.x_upper, env.y_upper))
    clock = pygame.time.Clock()
    done = False

    while not env.is_done():
        agent.step(env)
        screen.fill((0, 0, 0))  # erase screen
        draw_state(screen, env)
        for event in pygame.event.get():
            pass

        pygame.display.flip()  # refresh screen
Beispiel #10
0
import threading, queue
from car_env import CarEnv

tf.reset_default_graph()
EP_MAX = 1000
EP_LEN = 100
N_WORKER = 4  # parallel workers
GAMMA = 0.9  # reward discount factor
A_LR = 0.0001  # learning rate for actor
C_LR = 0.0005  # learning rate for critic
MIN_BATCH_SIZE = 64  # minimum batch size for updating PPO
UPDATE_STEP = 5  # loop update operation n-steps
EPSILON = 0.2  # Clipped surrogate objective
n_model = 1

env = CarEnv()
S_DIM = 2 + env.O_LC
A_DIM = 1
A_BOUND = env.action_bound[1]


class PPO(object):
    def __init__(self):
        self.sess = tf.Session()

        self.tfs = tf.placeholder(tf.float32, [None, S_DIM], 'state')

        # critic
        l1 = tf.layers.dense(self.tfs, 100, tf.nn.relu)
        self.v = tf.layers.dense(l1, 1)
        self.tfdc_r = tf.placeholder(tf.float32, [None, 1], 'discounted_r')
Beispiel #11
0
MAX_EPISODES = 2000
MAX_EP_STEPS = 1000
LR_A = 1e-4  # learning rate for actor
LR_C = 1e-4  # learning rate for critic
GAMMA = 0.9  # reward discount
REPLACE_ITER_A = 800
REPLACE_ITER_C = 700
MEMORY_CAPACITY = 2000
BATCH_SIZE = 16
VAR_MIN = 0.1
RENDER = True
LOAD = True
DISCRETE_ACTION = False

env = CarEnv(discrete_action=DISCRETE_ACTION)
env2 = CarEnv(discrete_action=DISCRETE_ACTION, self_obstacle=True)
STATE_DIM = env.state_dim
ACTION_DIM = env.action_dim
ACTION_BOUND = env.action_bound

# all placeholder for tf
with tf.name_scope('S'):
    S = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s')
with tf.name_scope('R'):
    R = tf.placeholder(tf.float32, [None, 1], name='r')
with tf.name_scope('S_'):
    S_ = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s_')


class Actor(object):
Beispiel #12
0
        sys.version_info.minor,
        'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
except IndexError:
    print("Failed to find carla's .egg directory")

import carla
from carla import *
from car_env import CarEnv
import random
import time

run = False 
PORT = 2069
IMG_WIDTH = 640
IMG_HEIGHT = 480
spawn = CarEnv(port=2069) # <-- Remove when not testing :D 
spawn.vehicle_list[0].set_autopilot(enabled=True)

#black = np.zeros(shape=(IMG_HEIGHT, IMG_WIDTH,3))
CAMERA_MEM = None# --> stores current frame received from cameras - inits as empty (None)

def processImage(data, folder):
    global CAMERA_MEM
    i = np.array(data.raw_data)
    i2 = np.reshape(i, (IMG_HEIGHT, IMG_WIDTH, 4))
    i3 = i2[:, :, :3]
    CAMERA_MEM = i3
    data.save_to_disk(f'{folder}/%06d.png' % data.frame)
    print(dir(data))
    print(type(CAMERA_MEM))
    # return i3/255.0
Beispiel #13
0
def main():
    try:
        global RADAR_MEM
        car_env = CarEnv(port=2069)
        car = car_env.vehicle_list[0]
        sensors = car_env.sensor_list
        car.set_autopilot(enabled=True)
        HFOV = (car_env.hfov*math.pi)/180
        VFOV = (car_env.vfov*math.pi)/180

        for sensor in sensors:
            if sensor.type_id == 'sensor.other.radar':
                sensor.listen(lambda data: parse_data(data))

        clock.tick(60)
        pygame.init()
        display =  (1280, 720)
        pygame.display.set_mode(display, DOUBLEBUF|OPENGL)

        gluPerspective(120, (display[0]/display[1]), 0.1, 200.0)

        glRotate(0, 0, 0, 0)
        glTranslatef(0.0, 0.0, -3)

        while True:
            events = pygame.event.get() keys = pygame.key.get_pressed()
            pressed_mouse = pygame.mouse.get_pressed()

            for event in events:
                if event.type == pygame.QUIT:
                    pygame.quit() 
                    exit()

                if event.type == pygame.MOUSEBUTTONDOWN:
                    if event.button == 4: # wheel rolled up
                        glScaled(1.10, 1.10, 1.10)
                    if event.button == 5: # wheel rolled down
                        glScaled(0.9, 0.9, 0.9)

            if pressed_mouse[1]:
                ms = pygame.mouse.get_rel()
                glRotate(2, ms[1], ms[0], 0)

            if pressed_mouse[2]:
                ms = pygame.mouse.get_rel()
                glTranslatef(ms[0]/100, -1 * ms[1]/100, 0)

            if keys[pygame.K_UP]:
                glRotate(0.1, -1, 0, 0)
            if keys[pygame.K_DOWN]:
                glRotate(0.1, 1, 0, 0)
            if keys[pygame.K_LEFT]:
                glRotate(0.1, 0, -1, 0)
            if keys[pygame.K_RIGHT]:
                glRotate(0.1, 0, 1, 0)
            if keys[pygame.K_s]:
                glTranslatef(0.0, 0.0, -1)
            if keys[pygame.K_w]:
                glTranslatef(0.0, 0.0, 1)
                   

            glClear(GL_COLOR_BUFFER_BIT|GL_DEPTH_BUFFER_BIT)
            Render()
            pygame.display.flip()

            if len(RADAR_MEM) >= car_env.radartick:
                RADAR_MEM = []

    except (KeyboardInterrupt, SystemExit):
        car.destroy()
        for sensor in sensors:
            sensor.destroy()
        pygame.quit()
        sys.exit()
        exit()
Beispiel #14
0
 def __init__(self, name, globalAC):
     self.env = CarEnv()
     self.name = name
     self.AC = ACNet(name, globalAC)
Beispiel #15
0
 def __init__(self, wid):
     self.wid = wid
     self.env = CarEnv()
     self.ppo = GLOBAL_PPO
Beispiel #16
0
MAX_EPISODES = 500  # 最大 episode
MAX_EP_STEPS = 600  # 最大步数设置
LR_A = 1e-4  # learning rate for actor
LR_C = 1e-4  # learning rate for critic
GAMMA = 0.9  # reward discount
REPLACE_ITER_A = 800
REPLACE_ITER_C = 700
MEMORY_CAPACITY = 2000  #记忆容量
BATCH_SIZE = 16
VAR_MIN = 0.1
RENDER = True  #开启窗口
LOAD = False  #重新训练,不载入之前训练过的
DISCRETE_ACTION = False

env = CarEnv(discrete_action=DISCRETE_ACTION)
STATE_DIM = env.state_dim
ACTION_DIM = env.action_dim
ACTION_BOUND = env.action_bound

# all placeholder for tf
with tf.name_scope('S'):
    S = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s')
with tf.name_scope('R'):
    R = tf.placeholder(tf.float32, [None, 1], name='r')
with tf.name_scope('S_'):
    S_ = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s_')


class Actor(object): # Actor 函数网络
    def __init__(self, sess, action_dim, action_bound, learning_rate, t_replace_iter):
Beispiel #17
0
        for steps in range(MAX_EP_STEPS):

            if RENDER: env.render()

            # Put Your Code Here

            if done:
                agent.append_data(ep_steps=steps)
                break


if __name__ == '__main__':

    global agent, ep, TEST

    env = CarEnv(map_set=0)

    agent = DQN(
        n_input=env.n_sensor,
        n_output=env.n_actions,
        gamma=0.96,
        beta=0.3,
        memory_size=2000,
        batch_size=32,
        epsilon=0.8,
        epsilon_decay=0.996,
        epsilon_min=0.02,
        show=True,
    )

    agent._build_net()
Beispiel #18
0
        'win-amd64' if os.name == 'nt' else 'linux-x86_64'))[0])
except IndexError:
    print("Failed to find carla's .egg directory")
import carla
from carla import *

import random

parser = argparse.ArgumentParser(description='loads a model and runs it')
parser.add_argument('model', metavar='-m', type=str, nargs='?', help='directory to get model')
args = parser.parse_args()

# Some variables
IMAGE_MEM = []

car_env = CarEnv(port=2069)
car = car_env.vehicle
sensors = car_env.sensor_list

SCL = 4
img_h = int(car_env.im_height/SCL)
img_w = int(car_env.im_width/SCL)


def clean():
    car_env.destroy()
    sys.exit()

def processimg(data, sensorID):
    i = np.array(data.raw_data)
    i2 = np.reshape(i, (img_h*SCL, img_w*SCL, 4))
Beispiel #19
0
                    default=0.1,
                    help="epsilon with decay doesn't fall below epsilon min")
parser.add_argument("--tensorboard-logging-freq",
                    type=int,
                    default=300,
                    help="save training statistics once every X steps")
args = parser.parse_args()

print('Arguments: ', (args))

baseOutputDir = 'run-out-' + time.strftime("%Y-%m-%d-%H-%M-%S")
os.makedirs(baseOutputDir)

State.setup(args)

environment = CarEnv(args, baseOutputDir)
dqn = dqn.DeepQNetwork(environment.getNumActions(), baseOutputDir, args)
replayMemory = replay.ReplayMemory(args)

stop = False


def stop_handler():
    global stop
    while not stop:
        user_input = input()
        if user_input == 'q':
            print("Stopping...")
            stop = True

Beispiel #20
0
    def env_init(self):
        self.env = CarEnv()
        self.state = self.env.reset()
        self.send_state_get_action(self.state)

        self.var = 1
Beispiel #21
0
tf.set_random_seed(1)

MAX_EPISODES = 4000
MAX_EP_STEPS = 800
LR_A = 1e-4  # learning rate for actor
LR_C = 1e-4  # learning rate for critic
GAMMA = 0.98  # reward discount
REPLACE_ITER_A = 800
REPLACE_ITER_C = 700
MEMORY_CAPACITY = 4000
BATCH_SIZE = 16
VAR_MIN = 0.02
RENDER = False
LOAD = False
carn = 1
env = CarEnv(False, carn)
STATE_DIM = env.state_dim + 3
ACTION_DIM = env.action_dim
ACTION_BOUND = env.action_bound

# all placeholder for tf
with tf.name_scope('S'):
    S = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s')
with tf.name_scope('R'):
    R = tf.placeholder(tf.float32, [None, 1], name='r')
with tf.name_scope('S_'):
    S_ = tf.placeholder(tf.float32, shape=[None, STATE_DIM], name='s_')


class Actor(object):
    def __init__(self, sess, action_dim, action_bound, learning_rate,