def validate_network(network_filepath, track, max_speed, reward_writer, n_lap=10): print("Validating...") def action_limit_function(speed, action, observation): if action[1] > 1: action[1] = 1 if observation[21] * 300 > speed: action[1] = 0 return action load_filepath = network_filepath save_filepath = network_filepath.split('.')[0] + '_validated.h5f' print('Validating with speed', max_speed) DDPGTorcs.train(reward_writer, load=True, gui=True, save=True, track=track, load_file_path=load_filepath, save_file_path=save_filepath, verbose=1, timeout=40000, epsilon=0, nb_steps=300000, action_limit_function=lambda a, s: action_limit_function(max_speed, a, s), nb_max_episode_steps=1000000, n_lap=n_lap) reward_writer.completed_track()
def train_on_chosen_tracks(chosen_tracks, epsilons, steps, root_dir): root_dir = 'runs/' + root_dir + '/' rewards_filepath = root_dir + 'rewards.csv' remaining_tracks_filepath = root_dir + 'tracks_to_test.json' last_network_filepath = root_dir + 'last_network.txt' if not os.path.exists(root_dir): os.makedirs(root_dir) load_filepath = '' i = 0 else: load_filepath, i = TrackUtilities.load_last_network_path( last_network_filepath) reward_writer = RewardWriter(rewards_filepath) tracks = TrackUtilities.load_tracks(remaining_tracks_filepath) if not tracks: tracks = TrackUtilities.create_tracks_list(chosen_tracks, epsilons) TrackUtilities.order_tracks(tracks) save_filepath = load_filepath for epsilon in epsilons: while len(tracks[str(epsilon)]) > 0: track = tracks[str(epsilon)][0] load_filepath = save_filepath save_filepath = root_dir + str(i) + '_' + track + '_' + str( epsilon) + '.h5f' reward_writer.write_track(track, epsilon) print('Track name:', tracks[str(epsilon)][0]) print('Epsilon:', epsilon) DDPGTorcs.train(reward_writer, load=True, gui=True, save=True, track=track, nb_steps=steps, load_file_path=load_filepath, save_file_path=save_filepath, verbose=1, timeout=40000, epsilon=epsilon) tracks[str(epsilon)].remove(track) i += 1 TrackUtilities.save_remaining_tracks( tracks, remaining_tracks_filepath) TrackUtilities.save_last_network_path(last_network_filepath, save_filepath, i) reward_writer.completed_track() print() print()
def convert_all(h5f_folder, dlj4_folder): actor_model = DDPGTorcs.get_actor((29, ), (2, )) for file_name in os.listdir(h5f_folder): if '.h5f' in file_name: out_file = file_name.replace('h5f', 'ffn') convert_h5f_dlj4(actor_model, h5f_folder + '/' + file_name, dlj4_folder + '/' + out_file)
def train_on_single_track(root_dir, track='aalborg', epsilon=0.5, steps=500000, load=False, load_filepath='', noise=1, n_lap=None): root_dir = 'runs/' + root_dir rewards_filepath = root_dir + '/rewards.csv' gui = True save = True if not os.path.exists(root_dir): os.makedirs(root_dir) reward_writer = RewardWriter(rewards_filepath) save_file_path = root_dir + '/' + track + '_' + str(epsilon) + '.h5f' reward_writer.write_track(track, epsilon) print('Track name:', track) print('Epsilon:', epsilon) DDPGTorcs.train(reward_writer, load=load, gui=gui, save=save, track=track, nb_steps=steps, load_file_path=load_filepath, save_file_path=save_file_path, verbose=1, timeout=40000, epsilon=epsilon, noise=noise, n_lap=n_lap) reward_writer.completed_track() print() print()
def test_network(track, load_filepath, n_lap): # DDPGTorcs.test(None, load_filepath, track=track) env = TorcsEnv(gui=True, timeout=10000, track=track, reward=DefaultReward(), n_lap=n_lap) model = DDPGTorcs.get_loaded_actor(load_filepath, env.observation_space.shape, env.action_space.shape) observation = env.reset() while True: action = model.predict(np.array([np.array([observation])]))[0] observation, reward, done, d = env.step(action)
def test_ensemble(models_filepaths, track, n_lap): env = TorcsEnv(gui=True, timeout=10000, track=track, reward=DefaultReward(), n_lap=n_lap) accel_dump = open('dumps/accel_dump.dat', 'w') steer_dump = open('dumps/steer_dump.dat', 'w') models = [] for filepath in models_filepaths: models.append( DDPGTorcs.get_loaded_actor(filepath, env.observation_space.shape, env.action_space.shape)) observation = env.reset() sensors = open("sensors_python.dat", "w") while True: actions = [] for model in models: result = model.predict(np.array([np.array([observation])]))[0] actions.append(result) for action in actions: print(action[1], file=accel_dump) print(action[0], file=steer_dump) print('', file=accel_dump) print('', file=steer_dump) # action = TrackUtilities.Elaborations.avg_min_elaboration(actions) action = TrackUtilities.Elaborations.avg_avg_elaboration(actions) observation, reward, done, d = env.step(action) for ob in observation: sensors.write(" " + str(ob)) sensors.write("\n")
def train_on_all_tracks(root_dir='all_tracks'): # This is used if you want to restart everything but you want to have a trained network at the start start_with_trained_network = False root_dir = 'runs/' + root_dir + '/' epsilons = [0.5, 0.1, 0] tracks_to_test = root_dir + 'tracks_to_test.json' network_filepath = root_dir + 'trained_networks/test_' last_network_filepath = root_dir + 'trained_networks/last_network.txt' tracks_to_test_filepath = root_dir + 'tracks_to_test.json' rewards_filepath = root_dir + 'rewards.csv' reward_writer = RewardWriter(rewards_filepath) tracks = TrackUtilities.load_tracks(tracks_to_test) if not tracks: tracks = TrackUtilities.create_complete_tracks_list(epsilons) TrackUtilities.order_tracks(tracks) # load the right network if start_with_trained_network: load_filepath = 'trained_networks/pre_trained.h5f' i = 0 else: load_filepath, i = TrackUtilities.load_last_network_path( last_network_filepath) save_filepath = load_filepath for epsilon in epsilons: while len(tracks[str(epsilon)]) > 0: track = tracks[str(epsilon)][0] if i != 0: load_filepath = save_filepath save_filepath = network_filepath + str( i) + '_' + track + '_' + str(epsilon) + '.h5f' print('Track name:', track) print('Epsilon:', epsilon) print() # write track name reward_writer.write_track(track, epsilon) try: DDPGTorcs.train(reward_writer, load=True, gui=True, save=True, track=track, nb_steps=100000, load_file_path=load_filepath, save_file_path=save_filepath, verbose=1, timeout=40000, epsilon=epsilon) i += 1 tracks[str(epsilon)].remove(track) TrackUtilities.save_remaining_tracks( tracks, tracks_to_test_filepath) TrackUtilities.save_last_network_path( last_network_filepath, save_filepath, i) reward_writer.completed_track() except: # Torcs f****d up, so now we fix everything save_file_path = load_filepath reward_writer.bad_run() reward_writer.completed_track()
def curriculum_learning_on_track(track, root_dir, initial_speed=30, initial_epsilon=0.5, max_speed=350, speed_step=5, n_lap=2, validation_lap_number=3, nb_steps=300000): speed = initial_speed epsilon = initial_epsilon last_working_network_filepath = '' def action_limit_function(speed, action, observation): forward_distance = observation[10] * 200 speed_x = observation[21] * 300 breaking_space = 0.000851898 * pow( speed_x, 2) + 0.104532 * speed_x - 2.03841 if forward_distance < breaking_space + 15: if action[1] > 0: action[1] = 0 elif speed_x > speed: action[1] = 0 elif speed_x < 25: action[1] = 1 return action root_dir = 'runs/' + root_dir + '/' rewards_filepath = root_dir + 'rewards.csv' last_network_filepath = root_dir + 'last_network.txt' if not os.path.exists(root_dir): os.makedirs(root_dir) load_filepath = '' else: load_filepath = TrackUtilities.load_last_network_path( last_network_filepath) reward_writer = RewardWriter(rewards_filepath) save_filepath = load_filepath while speed < max_speed: load_filepath = save_filepath save_filepath = root_dir + track + '_speed' + str(speed) + '.h5f' reward_writer.write_track(track, epsilon) print('max_speed:', speed) laps = DDPGTorcs.train(reward_writer, load=True, gui=False, save=True, track=track, load_file_path=load_filepath, save_file_path=save_filepath, verbose=1, timeout=40000, epsilon=epsilon, action_limit_function=lambda a, s: action_limit_function(speed, a, s), nb_steps=nb_steps, nb_max_episode_steps=1000000, n_lap=n_lap) print() print() if laps == n_lap: reward_writer.completed_track() TrackUtilities.save_last_network_path(last_network_filepath, save_filepath) speed += speed_step last_working_network_filepath = save_filepath else: break TrackUtilities.validate_network(last_working_network_filepath, track, speed - speed_step, reward_writer, n_lap=validation_lap_number)