Python DDPGTorcsの例、ddpg_torcs.DDPGTorcs Pythonの例

コード例 #1

0

ファイルを表示

    def validate_network(network_filepath,
                         track,
                         max_speed,
                         reward_writer,
                         n_lap=10):
        print("Validating...")

        def action_limit_function(speed, action, observation):
            if action[1] > 1:
                action[1] = 1
            if observation[21] * 300 > speed:
                action[1] = 0
            return action

        load_filepath = network_filepath

        save_filepath = network_filepath.split('.')[0] + '_validated.h5f'

        print('Validating with speed', max_speed)
        DDPGTorcs.train(reward_writer,
                        load=True,
                        gui=True,
                        save=True,
                        track=track,
                        load_file_path=load_filepath,
                        save_file_path=save_filepath,
                        verbose=1,
                        timeout=40000,
                        epsilon=0,
                        nb_steps=300000,
                        action_limit_function=lambda a, s:
                        action_limit_function(max_speed, a, s),
                        nb_max_episode_steps=1000000,
                        n_lap=n_lap)
        reward_writer.completed_track()

コード例 #2

0

ファイルを表示

    def train_on_chosen_tracks(chosen_tracks, epsilons, steps, root_dir):
        root_dir = 'runs/' + root_dir + '/'
        rewards_filepath = root_dir + 'rewards.csv'
        remaining_tracks_filepath = root_dir + 'tracks_to_test.json'
        last_network_filepath = root_dir + 'last_network.txt'

        if not os.path.exists(root_dir):
            os.makedirs(root_dir)
            load_filepath = ''
            i = 0
        else:
            load_filepath, i = TrackUtilities.load_last_network_path(
                last_network_filepath)

        reward_writer = RewardWriter(rewards_filepath)

        tracks = TrackUtilities.load_tracks(remaining_tracks_filepath)
        if not tracks:
            tracks = TrackUtilities.create_tracks_list(chosen_tracks, epsilons)
        TrackUtilities.order_tracks(tracks)

        save_filepath = load_filepath

        for epsilon in epsilons:
            while len(tracks[str(epsilon)]) > 0:
                track = tracks[str(epsilon)][0]

                load_filepath = save_filepath
                save_filepath = root_dir + str(i) + '_' + track + '_' + str(
                    epsilon) + '.h5f'

                reward_writer.write_track(track, epsilon)

                print('Track name:', tracks[str(epsilon)][0])
                print('Epsilon:', epsilon)

                DDPGTorcs.train(reward_writer,
                                load=True,
                                gui=True,
                                save=True,
                                track=track,
                                nb_steps=steps,
                                load_file_path=load_filepath,
                                save_file_path=save_filepath,
                                verbose=1,
                                timeout=40000,
                                epsilon=epsilon)

                tracks[str(epsilon)].remove(track)
                i += 1
                TrackUtilities.save_remaining_tracks(
                    tracks, remaining_tracks_filepath)
                TrackUtilities.save_last_network_path(last_network_filepath,
                                                      save_filepath, i)
                reward_writer.completed_track()
                print()
                print()

コード例 #3

0

ファイルを表示

def convert_all(h5f_folder, dlj4_folder):
    actor_model = DDPGTorcs.get_actor((29, ), (2, ))
    for file_name in os.listdir(h5f_folder):
        if '.h5f' in file_name:
            out_file = file_name.replace('h5f', 'ffn')
            convert_h5f_dlj4(actor_model, h5f_folder + '/' + file_name,
                             dlj4_folder + '/' + out_file)

コード例 #4

0

ファイルを表示

    def train_on_single_track(root_dir,
                              track='aalborg',
                              epsilon=0.5,
                              steps=500000,
                              load=False,
                              load_filepath='',
                              noise=1,
                              n_lap=None):
        root_dir = 'runs/' + root_dir
        rewards_filepath = root_dir + '/rewards.csv'

        gui = True
        save = True

        if not os.path.exists(root_dir):
            os.makedirs(root_dir)

        reward_writer = RewardWriter(rewards_filepath)
        save_file_path = root_dir + '/' + track + '_' + str(epsilon) + '.h5f'

        reward_writer.write_track(track, epsilon)
        print('Track name:', track)
        print('Epsilon:', epsilon)

        DDPGTorcs.train(reward_writer,
                        load=load,
                        gui=gui,
                        save=save,
                        track=track,
                        nb_steps=steps,
                        load_file_path=load_filepath,
                        save_file_path=save_file_path,
                        verbose=1,
                        timeout=40000,
                        epsilon=epsilon,
                        noise=noise,
                        n_lap=n_lap)

        reward_writer.completed_track()
        print()
        print()

コード例 #5

0

ファイルを表示

    def test_network(track, load_filepath, n_lap):
        # DDPGTorcs.test(None, load_filepath, track=track)
        env = TorcsEnv(gui=True,
                       timeout=10000,
                       track=track,
                       reward=DefaultReward(),
                       n_lap=n_lap)
        model = DDPGTorcs.get_loaded_actor(load_filepath,
                                           env.observation_space.shape,
                                           env.action_space.shape)
        observation = env.reset()

        while True:
            action = model.predict(np.array([np.array([observation])]))[0]
            observation, reward, done, d = env.step(action)

コード例 #6

0

ファイルを表示

    def test_ensemble(models_filepaths, track, n_lap):
        env = TorcsEnv(gui=True,
                       timeout=10000,
                       track=track,
                       reward=DefaultReward(),
                       n_lap=n_lap)

        accel_dump = open('dumps/accel_dump.dat', 'w')
        steer_dump = open('dumps/steer_dump.dat', 'w')

        models = []
        for filepath in models_filepaths:
            models.append(
                DDPGTorcs.get_loaded_actor(filepath,
                                           env.observation_space.shape,
                                           env.action_space.shape))

        observation = env.reset()

        sensors = open("sensors_python.dat", "w")
        while True:
            actions = []
            for model in models:
                result = model.predict(np.array([np.array([observation])]))[0]
                actions.append(result)
            for action in actions:
                print(action[1], file=accel_dump)
                print(action[0], file=steer_dump)
            print('', file=accel_dump)
            print('', file=steer_dump)

            # action = TrackUtilities.Elaborations.avg_min_elaboration(actions)
            action = TrackUtilities.Elaborations.avg_avg_elaboration(actions)
            observation, reward, done, d = env.step(action)
            for ob in observation:
                sensors.write(" " + str(ob))
            sensors.write("\n")

コード例 #7

0

ファイルを表示

    def train_on_all_tracks(root_dir='all_tracks'):
        # This is used if you want to restart everything but you want to have a trained network at the start
        start_with_trained_network = False

        root_dir = 'runs/' + root_dir + '/'

        epsilons = [0.5, 0.1, 0]
        tracks_to_test = root_dir + 'tracks_to_test.json'
        network_filepath = root_dir + 'trained_networks/test_'
        last_network_filepath = root_dir + 'trained_networks/last_network.txt'
        tracks_to_test_filepath = root_dir + 'tracks_to_test.json'
        rewards_filepath = root_dir + 'rewards.csv'

        reward_writer = RewardWriter(rewards_filepath)

        tracks = TrackUtilities.load_tracks(tracks_to_test)
        if not tracks:
            tracks = TrackUtilities.create_complete_tracks_list(epsilons)
        TrackUtilities.order_tracks(tracks)

        # load the right network
        if start_with_trained_network:
            load_filepath = 'trained_networks/pre_trained.h5f'
            i = 0
        else:
            load_filepath, i = TrackUtilities.load_last_network_path(
                last_network_filepath)

        save_filepath = load_filepath

        for epsilon in epsilons:
            while len(tracks[str(epsilon)]) > 0:
                track = tracks[str(epsilon)][0]

                if i != 0:
                    load_filepath = save_filepath
                save_filepath = network_filepath + str(
                    i) + '_' + track + '_' + str(epsilon) + '.h5f'

                print('Track name:', track)
                print('Epsilon:', epsilon)
                print()

                # write track name
                reward_writer.write_track(track, epsilon)

                try:
                    DDPGTorcs.train(reward_writer,
                                    load=True,
                                    gui=True,
                                    save=True,
                                    track=track,
                                    nb_steps=100000,
                                    load_file_path=load_filepath,
                                    save_file_path=save_filepath,
                                    verbose=1,
                                    timeout=40000,
                                    epsilon=epsilon)

                    i += 1
                    tracks[str(epsilon)].remove(track)
                    TrackUtilities.save_remaining_tracks(
                        tracks, tracks_to_test_filepath)
                    TrackUtilities.save_last_network_path(
                        last_network_filepath, save_filepath, i)

                    reward_writer.completed_track()
                except:
                    # Torcs f****d up, so now we fix everything
                    save_file_path = load_filepath
                    reward_writer.bad_run()
                    reward_writer.completed_track()

コード例 #8

0

ファイルを表示

    def curriculum_learning_on_track(track,
                                     root_dir,
                                     initial_speed=30,
                                     initial_epsilon=0.5,
                                     max_speed=350,
                                     speed_step=5,
                                     n_lap=2,
                                     validation_lap_number=3,
                                     nb_steps=300000):
        speed = initial_speed
        epsilon = initial_epsilon
        last_working_network_filepath = ''

        def action_limit_function(speed, action, observation):
            forward_distance = observation[10] * 200
            speed_x = observation[21] * 300
            breaking_space = 0.000851898 * pow(
                speed_x, 2) + 0.104532 * speed_x - 2.03841
            if forward_distance < breaking_space + 15:
                if action[1] > 0:
                    action[1] = 0
            elif speed_x > speed:
                action[1] = 0
            elif speed_x < 25:
                action[1] = 1
            return action

        root_dir = 'runs/' + root_dir + '/'
        rewards_filepath = root_dir + 'rewards.csv'
        last_network_filepath = root_dir + 'last_network.txt'

        if not os.path.exists(root_dir):
            os.makedirs(root_dir)
            load_filepath = ''
        else:
            load_filepath = TrackUtilities.load_last_network_path(
                last_network_filepath)
        reward_writer = RewardWriter(rewards_filepath)

        save_filepath = load_filepath

        while speed < max_speed:
            load_filepath = save_filepath
            save_filepath = root_dir + track + '_speed' + str(speed) + '.h5f'
            reward_writer.write_track(track, epsilon)

            print('max_speed:', speed)
            laps = DDPGTorcs.train(reward_writer,
                                   load=True,
                                   gui=False,
                                   save=True,
                                   track=track,
                                   load_file_path=load_filepath,
                                   save_file_path=save_filepath,
                                   verbose=1,
                                   timeout=40000,
                                   epsilon=epsilon,
                                   action_limit_function=lambda a, s:
                                   action_limit_function(speed, a, s),
                                   nb_steps=nb_steps,
                                   nb_max_episode_steps=1000000,
                                   n_lap=n_lap)

            print()
            print()
            if laps == n_lap:
                reward_writer.completed_track()
                TrackUtilities.save_last_network_path(last_network_filepath,
                                                      save_filepath)
                speed += speed_step
                last_working_network_filepath = save_filepath
            else:
                break

        TrackUtilities.validate_network(last_working_network_filepath,
                                        track,
                                        speed - speed_step,
                                        reward_writer,
                                        n_lap=validation_lap_number)