def run(): env = CarRacing( allow_reverse=False, show_info_panel=False, num_tracks=2, num_lanes=2, num_lanes_changes=0, num_obstacles=100, random_obstacle_x_position=False, random_obstacle_shape=False, ) x_count = 0 t_count = 0 num_tracks = 300 for i in range(num_tracks): env.reset() print("track %i / %i" % (i, num_tracks)) if (env.info['x'] == True).sum() > 0: x_count += 1 if (env.info['t'] == True).sum() > 0: t_count += 1 print('') i += 1 print('num of tracks:', str(num_tracks)) print('time took per track is {0}s'.format(1)) print('{0}% of tracks have x intersections'.format(x_count / num_tracks)) print('{0}% of tracks have t intersections'.format(t_count / num_tracks))
def find_roads(): path = './touching_tracks_tests' # Check if dir exists TODO if os.path.isdir(path): # Remove files TODO shutil.rmtree(path) # Create dir TODO os.mkdir(path) env = CarRacing( allow_reverse=False, show_info_panel=False, num_tracks=2, num_lanes=2, num_lanes_changes=0, num_obstacles=100, random_obstacle_x_position=False, random_obstacle_shape=False,) env.change_zoom() for j in range(100): env.reset() for i in range(len(env.tracks[0])): prev_tile = env.tracks[0][i-2] curr_tile = env.tracks[0][i-1] next_tile = env.tracks[0][i] if any(curr_tile[0] != prev_tile[1]): set_trace() elif any(curr_tile[1] != next_tile[0]): set_trace() env.screenshot(path,name=str(j),quality='high') np.save(path + "/info_" + str(j) + ".csv", env.info) np.save(path + "/track0_" + str(j) + ".csv", env.tracks[0]) np.save(path + "/track1_" + str(j) + ".csv", env.tracks[1])
def __init__(self, name="car_racing", renders=False, record_data=False, is_discrete=True, state_dim=-1, learn_states=False, save_path='srl_zoo/data/', srl_model="raw_pixels", env_rank=0, srl_pipe=None, lookahead=5, **_): """ Gym wrapper for Racing car environment WARNING: to be compatible with kuka scripts, additional keyword arguments are discarded :param name: (str) name of the folder where recorded data will be stored :param renders: (bool) Whether to display the GUI or not :param record_data: (bool) Set to true, record frames with the rewards. :param is_discrete: (bool) Whether to use discrete or continuous actions :param state_dim: (int) When learning states :param learn_states: (bool) :param save_path: (str) location where the saved data should go :param srl_model: (str) The SRL_model used :param env_rank: (int) the number ID of the environment :param srl_pipe: (Queue, [Queue]) contains the input and output of the SRL model :param lookahead: (int) How many segments ahead of the current position of the track should the target be """ SRLGymEnv.__init__(self, srl_model=srl_model, relative_pos=RELATIVE_POS, env_rank=env_rank, srl_pipe=srl_pipe) GymCarRacing.__init__(self) self._renders = renders self._width = RENDER_WIDTH self._height = RENDER_HEIGHT self._is_discrete = is_discrete self.lookahead = lookahead self.relative_pos = RELATIVE_POS self._env_step_counter = 0 self._observation = None self.saver = None if record_data: self.saver = EpisodeSaver(name, None, state_dim, globals_=getGlobals(), relative_pos=RELATIVE_POS, learn_states=learn_states, path=save_path) # Accelerate, brake, stear left, stear right if self._is_discrete: self.action_space = spaces.Discrete(N_DISCRETE_ACTIONS) else: self.action_space = spaces.Box(low=-1, high=1, shape=(3,), dtype=np.float32) if self.srl_model == "ground_truth": self.state_dim = self.getGroundTruthDim() if self.srl_model == "raw_pixels": self.observation_space = spaces.Box(low=0, high=255, shape=(self._height, self._width, 3), dtype=np.uint8) else: self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.state_dim,), dtype=np.float32)
def test_two_lanes_with_no_lane_changes(self): env = CarRacing(num_lanes=2,num_lanes_changes=0) env.reset() assert env._get_extremes_of_position(0,border=1) == (-3-1/3,3+1/3) assert env._get_extremes_of_position(1,border=0) == (-6-2/3,+6+2/3) env.close() del env
def evaluate(): """ """ # action variables a = np.array([0.0, 0.0, 0.0]) # init environement env = CarRacing() env.render() env.reset() for episode in range(5): observation = env.reset() # init modules of the pipeline LD_module = LaneDetection() LatC_module = LateralController() LongC_module = LongitudinalController() reward_per_episode = 0 for t in range(500): # perform step s, r, done, speed, info = env.step(a) # lane detection lane1, lane2 = LD_module.lane_detection(s) # waypoint and target_speed prediction waypoints = waypoint_prediction(lane1, lane2) target_speed = target_speed_prediction(waypoints, max_speed=60, exp_constant=4.5) # control a[0] = LatC_module.stanley(waypoints, speed) a[1], a[2] = LongC_module.control(speed, target_speed) # reward reward_per_episode += r env.render() print('episode %d \t reward %f' % (episode, reward_per_episode))
def game_runner(): from pyglet.window import key a = np.array([0.0, 0.0, 0.0]) def key_press(k, mod): global restart if k == 0xff0d: restart = True if k == key.LEFT: a[0] = -1.0 if k == key.RIGHT: a[0] = +1.0 if k == key.UP: a[1] = +1.0 if k == key.DOWN: a[2] = +0.8 def key_release(k, mod): if k == key.LEFT and a[0] == -1.0: a[0] = 0 if k == key.RIGHT and a[0] == +1.0: a[0] = 0 if k == key.UP: a[1] = 0 if k == key.DOWN: a[2] = 0 env = CarRacing() env.render() env.viewer.window.on_key_press = key_press env.viewer.window.on_key_release = key_release while True: env.reset() total_reward = 0.0 steps = 0 restart = False while True: s, r, done, info = env.step(a) total_reward += r if steps == 900: print("\n") print("_______________________________") print("\n") print("Human Intelligence Result:") print("Total Steps: {}".format(steps)) print("Total Reward: {:.0f}".format(total_reward)) print("\n") print("_______________________________") print("\n") break steps += 1 env.render() if restart: break env.monitor.close()
def test_two_tracks(self): env = CarRacing(num_tracks=2) env.reset() assert len(env.tracks) == 2 env.close()
def multiple_runs(v, on): env = CarRacing() z_set = [] action_set = [] for run in range(MAX_RUNS): zs = [] actions = [] state = env.reset() env.render() # must have! # done = False counter = 0 for game_time in range(MAX_GAME_TIME): # env.render() action = generate_action() obs = state_to_1_batch_tensor(state) _, _, _, z = v(obs) z = z.detach().numpy() z = z.reshape(32) # print(z.shape) # if game_time == 5: # plt.imshow(state) # plt.show() # state = _process_frame(state) # plt.imshow(state) # plt.show() zs.append(z) actions.append(action) state, r, done, _ = env.step(action) # print(r) print( 'RUN:{},GT:{},DATA:{}'.format( run, game_time, len(actions) ) ) # if counter == REST_NUM: # # position = np.random.randint(len(env.track)) # env.car = Car(env.world, *env.track[position][1:4]) # counter = 0 # counter += 1 zs = np.array(zs, dtype=np.float16) # print(zs.shape) actions = np.array(actions, dtype=np.float16) # print(actions.shape) # np.save(dst + '/' + save_name, frame_and_action) # np.savez_compressed(dst + '/' + save_name, action=actions, z=zs) z_set.append(zs) action_set.append(actions) z_set = np.array(z_set) # print(z_set.shape) action_set = np.array(action_set) # print(action_set.shape) save_name = name_this + '_{}.npz'.format(on) np.savez_compressed(dst + '/' + save_name, action=action_set, z=z_set)
def test_one_track(self): env = CarRacing() # Tracks should not exist before any reset with pytest.raises(AttributeError): env.tracks env.reset() assert len(env.tracks) == 1 env.close()
def run_caracing_by_hunman(): a = np.array([0.0, 0.0, 0.0]) def key_press(k, mod): global restart if k == 0xff0d: restart = True if k == key.LEFT: a[0] = -1.0 if k == key.RIGHT: a[0] = +1.0 if k == key.UP: a[1] = +1.0 if k == key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation def key_release(k, mod): if k == key.LEFT and a[0] == -1.0: a[0] = 0 if k == key.RIGHT and a[0] == +1.0: a[0] = 0 if k == key.UP: a[1] = 0 if k == key.DOWN: a[2] = 0 env = CarRacing() env.render() env.viewer.window.on_key_press = key_press env.viewer.window.on_key_release = key_release while True: env.reset() total_reward = 0.0 steps = 0 restart = False while True: s, r, done, info = env.step(a) total_reward += r if steps % 200 == 0 or done: print("\naction " + str(["{:+0.2f}".format(x) for x in a])) print("step {} total_reward {:+0.2f}".format( steps, total_reward)) steps += 1 env.render() if done or restart: break env.monitor.close()
def manual_check_of_not_allowing_touching_tracks(): path = './touching_tracks_tests' # Check if dir exists TODO if os.path.isdir(path): # Remove files TODO shutil.rmtree(path) # Create dir TODO os.mkdir(path) env = CarRacing( allow_reverse=False, show_info_panel=False, num_tracks=2, num_lanes=2, num_lanes_changes=0, num_obstacles=100, random_obstacle_x_position=False, random_obstacle_shape=False, ) env.change_zoom() for i in range(100): env.reset() env.screenshot(path, name=str(i), quality='high')
from gym.envs.box2d.car_racing import CarRacing if __name__ == '__main__': env = CarRacing() repeat = int(1.5e6) for i in range(repeat): print("%i of %i" % (i, repeat)) env.reset() print("")
print('vae load success') # from pyglet.window import key action = np.array( [0.0, 0.0, 0.0] ) # def key_press(k, mod): # global restart # if k==0xff0d: restart = True # if k==key.LEFT: a[0] = -1.0 # if k==key.RIGHT: a[0] = +1.0e # if k==key.UP: a[1] = +1.0 # if k==key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation # def key_release(k, mod): # if k==key.LEFT and a[0]==-1.0: a[0] = 0 # if k==key.RIGHT and a[0]==+1.0: a[0] = 0 # if k==key.UP: a[1] = 0 # if k==key.DOWN: a[2] = 0 env = CarRacing() env.render() # env.viewer.window.on_key_press = key_press # env.viewer.window.on_key_release = key_release while True: env.reset() total_reward = 0.0 steps = 0 restart = False angle=[] while True: obs, reward, done, info = env.step(action) total_reward += reward obs=transform(obs).view(1,3,64,64) recon_c, mu_c, var_c = model(obs) mu, sigma = mu_c, var_c
def key_press(k, mod): global restart if k == 0xff0d: restart = True if k == key.LEFT: a[0] = -1.0 if k == key.RIGHT: a[0] = +1.0 if k == key.UP: a[1] = +1.0 if k == key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation def key_release(k, mod): if k == key.LEFT and a[0] == -1.0: a[0] = 0 if k == key.RIGHT and a[0] == +1.0: a[0] = 0 if k == key.UP: a[1] = 0 if k == key.DOWN: a[2] = 0 env = CarRacing() env.render() env.viewer.window.on_key_press = key_press env.viewer.window.on_key_release = key_release while True: env.reset() total_reward = 0.0 steps = 0 restart = False while True: s, r, done, info = env.step(a) total_reward += r if steps % 200 == 0 or done: print("\naction " + str(["{:+0.2f}".format(x) for x in a])) print("step {} total_reward {:+0.2f}".format( steps, total_reward))
self.z_dim * 2] sigma = torch.exp(sigma) # N * seq_len, n_mixture pi = mixture[..., -self.n_mixture:] pi = F.softmax(pi, -1) # add temperature if self.tau > 0: pi /= self.tau sigma *= self.tau**0.5 return pi, mean, sigma, hidden_state # help(cma) # help(cma.CMAEvolutionStrategy) env = CarRacing() path = '/media/ray/SSD/workspace/python/dataset/save_here/model/' vae_name = 'vae_model.save' V = torch.load(path + vae_name) V = V.cpu() V.train(False) rnn_name = 'rnn.save' M = torch.load(path + rnn_name) M.cpu() M.train(False) num_z = 32 num_h = 256 input_size = num_z + num_h output_size = 3 para = np.random.random(input_size * output_size + output_size) print(para.shape)
def calculate_score_for_leaderboard(): """ Evaluate the performance of the network. This is the function to be used for the final ranking on the course-wide leader-board, only with a different set of seeds. Better not change it. """ # action variables a = np.array([0.0, 0.0, 0.0]) # init environement env = CarRacing() env.render() env.reset() seeds = [ 22597174, 68545857, 75568192, 91140053, 86018367, 49636746, 66759182, 91294619, 84274995, 31531469 ] total_reward = 0 for episode in range(10): env.seed(seeds[episode]) observation = env.reset() # init modules of the pipeline LD_module = LaneDetection(gradient_threshold=25, spline_smoothness=20) LatC_module = LateralController(gain_constant=1.8, damping_constant=0.05) LongC_module = LongitudinalController(KD=0.001) reward_per_episode = 0 for t in range(600): # perform step s, r, done, speed, info = env.step(a) # lane detection lane1, lane2 = LD_module.lane_detection(s) # waypoint and target_speed prediction waypoints = waypoint_prediction(lane1, lane2) target_speed = target_speed_prediction(waypoints, max_speed=60, exp_constant=6) # control a[0] = LatC_module.stanley(waypoints, speed) a[1], a[2] = LongC_module.control(speed, target_speed) # reward reward_per_episode += r env.render() print('episode %d \t reward %f' % (episode, reward_per_episode)) total_reward += np.clip(reward_per_episode, 0, np.infty) print('---------------------------') print(' total score: %f' % (total_reward / 10)) print('---------------------------')
def key_press(k, mod): global restart if k==0xff0d: restart = True if k==key.LEFT: a[0] = -1.0 if k==key.RIGHT: a[0] = +1.0 if k==key.UP: a[1] = +1.0 if k==key.DOWN: a[2] = +0.8 # set 1.0 for wheels to block to zero rotation def key_release(k, mod): if k==key.LEFT and a[0]==-1.0: a[0] = 0 if k==key.RIGHT and a[0]==+1.0: a[0] = 0 if k==key.UP: a[1] = 0 if k==key.DOWN: a[2] = 0 # init environement env = CarRacing() env.render() env.viewer.window.on_key_press = key_press env.viewer.window.on_key_release = key_release env.reset() # define variables total_reward = 0.0 steps = 0 restart = False # init modules of the pipeline LD_module = LaneDetection() # init extra plot fig = plt.figure()
from gym.envs.box2d.car_racing import CarRacing from tqdm import tqdm import numpy as np if __name__ == "__main__": env = CarRacing() env.reset() a = np.zeros(3) for _ in tqdm(range(1000000)): env.step(a) # env.render()
import numpy as np from gym.envs.box2d.car_racing import CarRacing if __name__ == '__main__': env = CarRacing(load_tracks_from="/hdd/Documents/HRL/tracks") env.tracks_df = env.tracks_df[(env.tracks_df['x']) | (env.tracks_df['t'])] repeat = 1000 for i in range(repeat): print("%i of %i" % (i, repeat)) env.reset() dictionary = env.understand_intersection( np.random.choice(np.where(env.info['intersection_id'] != -1)[0]), 1) # Checking that only one value can be None print(dictionary) if list(dictionary.values()).count(None) > 1: print("breaking, more than 1 None") break for val in dictionary.values(): if val is not None and type(val) is not list and len(val) != 2: print("Breaking, there is some value that \ is not a list or a different kind of list") break print("")