Beispiel #1
0
def test_filter():
    # make an RGB observation smaller
    squeeze_filter = InputFilter()
    squeeze_filter.add_observation_filter('observation', 'squeeze',
                                          ObservationSqueezeFilter())
    squeeze_filter_with_axis = InputFilter()
    squeeze_filter_with_axis.add_observation_filter(
        'observation', 'squeeze', ObservationSqueezeFilter(2))

    observation = np.random.rand(20, 30, 1, 3)
    env_response = EnvResponse(next_state={'observation': observation},
                               reward=0,
                               game_over=False)

    result = squeeze_filter.filter(env_response)[0]
    result_with_axis = squeeze_filter_with_axis.filter(env_response)[0]
    unfiltered_observation_shape = env_response.next_state['observation'].shape
    filtered_observation_shape = result.next_state['observation'].shape
    filtered_observation_with_axis_shape = result_with_axis.next_state[
        'observation'].shape

    # make sure the original observation is unchanged
    assert unfiltered_observation_shape == observation.shape

    # make sure the filtering is done correctly
    assert filtered_observation_shape == (20, 30, 3)
    assert filtered_observation_with_axis_shape == (20, 30, 3)

    observation = np.random.rand(1, 30, 1, 3)
    env_response = EnvResponse(next_state={'observation': observation},
                               reward=0,
                               game_over=False)

    result = squeeze_filter.filter(env_response)[0]
    assert result.next_state['observation'].shape == (30, 3)
def test_filter(env_response):
    crop_low = np.array([0, 5, 10])
    crop_high = np.array([5, 10, 20])
    crop_filter = InputFilter()
    crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))

    result = crop_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (5, 5, 10)

    # validate the content of the filtered observation
    assert np.all(filtered_observation == unfiltered_observation[0:5, 5:10, 10:20])

    # crop with -1 on some axes
    crop_low = np.array([0, 0, 0])
    crop_high = np.array([5, -1, -1])
    crop_filter = InputFilter()
    crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))

    result = crop_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (5, 20, 30)

    # validate the content of the filtered observation
    assert np.all(filtered_observation == unfiltered_observation[0:5, :, :])
Beispiel #3
0
def test_get_filtered_observation_space():
    # error on wrong number of channels
    rescale_filter = InputFilter()
    rescale_filter.add_observation_filter(
        'observation', 'rescale',
        ObservationRescaleSizeByFactorFilter(
            0.5, RescaleInterpolationType.BILINEAR))
    observation_space = ObservationSpace(np.array([10, 20, 5]))
    with pytest.raises(ValueError):
        filtered_observation_space = rescale_filter.get_filtered_observation_space(
            'observation', observation_space)

    # error on wrong number of dimensions
    observation_space = ObservationSpace(np.array([10, 20, 10, 3]))
    with pytest.raises(ValueError):
        filtered_observation_space = rescale_filter.get_filtered_observation_space(
            'observation', observation_space)

    # make sure the new observation space shape is calculated correctly
    observation_space = ObservationSpace(np.array([10, 20, 3]))
    filtered_observation_space = rescale_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(filtered_observation_space.shape == np.array([5, 10, 3]))

    # make sure the original observation space is unchanged
    assert np.all(observation_space.shape == np.array([10, 20, 3]))
Beispiel #4
0
def test_get_filtered_observation_space():
    # error on observation space with shape not matching the filter squeeze axis configuration
    squeeze_filter = InputFilter()
    squeeze_filter.add_observation_filter('observation', 'squeeze',
                                          ObservationSqueezeFilter(axis=3))

    observation_space = ObservationSpace(np.array([20, 1, 30, 3]), 0, 100)
    small_observation_space = ObservationSpace(np.array([20, 1, 30]), 0, 100)
    with pytest.raises(ValueError):
        squeeze_filter.get_filtered_observation_space('observation',
                                                      observation_space)
        squeeze_filter.get_filtered_observation_space('observation',
                                                      small_observation_space)

    # verify output observation space is correct
    observation_space = ObservationSpace(np.array([1, 2, 3, 1]), 0, 200)
    result = squeeze_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(result.shape == np.array([1, 2, 3]))

    squeeze_filter = InputFilter()
    squeeze_filter.add_observation_filter('observation', 'squeeze',
                                          ObservationSqueezeFilter())

    result = squeeze_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(result.shape == np.array([2, 3]))
def test_filter():
    # make an RGB observation smaller
    env_response = EnvResponse(
        next_state={'observation': np.ones([20, 30, 3])},
        reward=0,
        game_over=False)
    rescale_filter = InputFilter()
    rescale_filter.add_observation_filter(
        'observation', 'rescale', ObservationRescaleSizeByFactorFilter(0.5))

    result = rescale_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (20, 30, 3)

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (10, 15, 3)

    # make a grayscale observation bigger
    env_response = EnvResponse(next_state={'observation': np.ones([20, 30])},
                               reward=0,
                               game_over=False)
    rescale_filter = InputFilter()
    rescale_filter.add_observation_filter(
        'observation', 'rescale', ObservationRescaleSizeByFactorFilter(2))
    result = rescale_filter.filter(env_response)[0]
    filtered_observation = result.next_state['observation']

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (40, 60)
    assert np.all(filtered_observation == np.ones([40, 60]))
def test_filter():
    # make an RGB observation smaller
    transition = EnvResponse(next_state={'observation': np.ones([20, 30, 3])},
                             reward=0,
                             game_over=False)
    rescale_filter = InputFilter()
    rescale_filter.add_observation_filter(
        'observation', 'rescale',
        ObservationRescaleToSizeFilter(
            ImageObservationSpace(np.array([10, 20, 3]), high=255)))

    result = rescale_filter.filter(transition)[0]
    unfiltered_observation = transition.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (20, 30, 3)

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (10, 20, 3)
    assert np.all(filtered_observation == np.ones([10, 20, 3]))

    # make a grayscale observation bigger
    transition = EnvResponse(next_state={'observation': np.ones([20, 30])},
                             reward=0,
                             game_over=False)
    rescale_filter = InputFilter()
    rescale_filter.add_observation_filter(
        'observation', 'rescale',
        ObservationRescaleToSizeFilter(
            ImageObservationSpace(np.array([40, 60]), high=255)))
    result = rescale_filter.filter(transition)[0]
    filtered_observation = result.next_state['observation']

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (40, 60)
    assert np.all(filtered_observation == np.ones([40, 60]))

    # rescale channels -> error
    # with pytest.raises(ValueError):
    #     InputFilter(
    #         observation_filters=OrderedDict([('rescale',
    #                                          ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([10, 20, 1]),
    #                                                                                               high=255)
    #                                                                        ))]))

    # TODO: validate input to filter
    # different number of axes -> error
    # env_response = EnvResponse(state={'observation': np.ones([20, 30, 3])}, reward=0, game_over=False)
    # rescale_filter = ObservationRescaleToSizeFilter(ObservationSpace(np.array([10, 20]))
    #                                                 )
    # with pytest.raises(ValueError):
    #     result = rescale_filter.filter(transition)

    # channels first -> error
    with pytest.raises(ValueError):
        ObservationRescaleToSizeFilter(
            ImageObservationSpace(np.array([3, 10, 20]), high=255))
def test_filter():
    # Keep
    observation_space = VectorObservationSpace(
        3, measurements_names=['a', 'b', 'c'])
    env_response = EnvResponse(next_state={'observation': np.ones([3])},
                               reward=0,
                               game_over=False)
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep))

    reduction_filter.get_filtered_observation_space('observation',
                                                    observation_space)
    result = reduction_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (3, )

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (1, )

    # Discard
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard))
    reduction_filter.get_filtered_observation_space('observation',
                                                    observation_space)
    result = reduction_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.shape == (3, )

    # validate the shape of the filtered observation
    assert filtered_observation.shape == (2, )
def test_get_filtered_observation_space():
    # error on observation space with values not matching the filter configuration
    uint8_filter = InputFilter()
    uint8_filter.add_observation_filter(
        'observation', 'to_uint8',
        ObservationToUInt8Filter(input_low=0, input_high=200))

    observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 100)
    with pytest.raises(ValueError):
        uint8_filter.get_filtered_observation_space('observation',
                                                    observation_space)

    # verify output observation space is correct
    observation_space = ObservationSpace(np.array([1, 2, 3]), 0, 200)
    result = uint8_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(result.high == 255)
    assert np.all(result.low == 0)
    assert np.all(result.shape == observation_space.shape)
def test_filter():
    # make an RGB observation smaller
    uint8_filter = InputFilter()
    uint8_filter.add_observation_filter(
        'observation', 'to_uint8',
        ObservationToUInt8Filter(input_low=0, input_high=255))

    observation = np.random.rand(20, 30, 3) * 255.0
    env_response = EnvResponse(next_state={'observation': observation},
                               reward=0,
                               game_over=False)

    result = uint8_filter.filter(env_response)[0]
    unfiltered_observation = env_response.next_state['observation']
    filtered_observation = result.next_state['observation']

    # make sure the original observation is unchanged
    assert unfiltered_observation.dtype == 'float64'

    # make sure the filtering is done correctly
    assert filtered_observation.dtype == 'uint8'
    assert np.all(filtered_observation == observation.astype('uint8'))
Beispiel #10
0
def test_get_filtered_observation_space():
    # error on wrong number of channels
    with pytest.raises(ValueError):
        observation_filters = InputFilter()
        observation_filters.add_observation_filter(
            'observation', 'rescale',
            ObservationRescaleToSizeFilter(
                ImageObservationSpace(np.array([5, 10, 5]), high=255),
                RescaleInterpolationType.BILINEAR))

    # mismatch and wrong number of channels
    rescale_filter = InputFilter()
    rescale_filter.add_observation_filter(
        'observation', 'rescale',
        ObservationRescaleToSizeFilter(
            ImageObservationSpace(np.array([5, 10, 3]), high=255),
            RescaleInterpolationType.BILINEAR))

    observation_space = PlanarMapsObservationSpace(np.array([10, 20, 5]),
                                                   low=0,
                                                   high=255)
    with pytest.raises(ValueError):
        rescale_filter.get_filtered_observation_space('observation',
                                                      observation_space)

    # error on wrong number of dimensions
    observation_space = ObservationSpace(np.array([10, 20, 10, 3]), high=255)
    with pytest.raises(ValueError):
        rescale_filter.get_filtered_observation_space('observation',
                                                      observation_space)

    # make sure the new observation space shape is calculated correctly
    observation_space = ImageObservationSpace(np.array([10, 20, 3]), high=255)
    filtered_observation_space = rescale_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(filtered_observation_space.shape == np.array([5, 10, 3]))

    # make sure the original observation space is unchanged
    assert np.all(observation_space.shape == np.array([10, 20, 3]))
def test_get_filtered_observation_space():
    # Keep
    observation_space = VectorObservationSpace(
        3, measurements_names=['a', 'b', 'c'])
    env_response = EnvResponse(next_state={'observation': np.ones([3])},
                               reward=0,
                               game_over=False)
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Keep))

    filtered_observation_space = reduction_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(filtered_observation_space.shape == np.array([1]))
    assert filtered_observation_space.measurements_names == ['a']

    # Discard
    observation_space = VectorObservationSpace(
        3, measurements_names=['a', 'b', 'c'])
    env_response = EnvResponse(next_state={'observation': np.ones([3])},
                               reward=0,
                               game_over=False)
    reduction_filter = InputFilter()
    reduction_filter.add_observation_filter(
        'observation', 'reduce',
        ObservationReductionBySubPartsNameFilter(
            ["a"],
            ObservationReductionBySubPartsNameFilter.ReductionMethod.Discard))

    filtered_observation_space = reduction_filter.get_filtered_observation_space(
        'observation', observation_space)
    assert np.all(filtered_observation_space.shape == np.array([2]))
    assert filtered_observation_space.measurements_names == ['b', 'c']
def test_get_filtered_observation_space():
    crop_low = np.array([0, 5, 10])
    crop_high = np.array([5, 10, 20])
    crop_filter = InputFilter()
    crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))

    observation_space = ObservationSpace(np.array([5, 10, 20]))
    filtered_observation_space = crop_filter.get_filtered_observation_space('observation', observation_space)

    # make sure the new observation space shape is calculated correctly
    assert np.all(filtered_observation_space.shape == np.array([5, 5, 10]))

    # make sure the original observation space is unchanged
    assert np.all(observation_space.shape == np.array([5, 10, 20]))

    # crop_high is bigger than the observation space
    high_error_observation_space = ObservationSpace(np.array([3, 8, 14]))
    with pytest.raises(ValueError):
        crop_filter.get_filtered_observation_space('observation', high_error_observation_space)

    # crop_low is bigger than the observation space
    low_error_observation_space = ObservationSpace(np.array([3, 3, 10]))
    with pytest.raises(ValueError):
        crop_filter.get_filtered_observation_space('observation', low_error_observation_space)

    # crop with -1 on some axes
    crop_low = np.array([0, 0, 0])
    crop_high = np.array([5, -1, -1])
    crop_filter = InputFilter()
    crop_filter.add_observation_filter('observation', 'crop', ObservationCropFilter(crop_low, crop_high))

    observation_space = ObservationSpace(np.array([5, 10, 20]))
    filtered_observation_space = crop_filter.get_filtered_observation_space('observation', observation_space)

    # make sure the new observation space shape is calculated correctly
    assert np.all(filtered_observation_space.shape == np.array([5, 10, 20]))
Beispiel #13
0
def get_graph_manager(hp_dict,
                      agent_list,
                      run_phase_subject,
                      enable_domain_randomization=False,
                      done_condition=any,
                      run_type=str(RunType.ROLLOUT_WORKER),
                      pause_physics=None,
                      unpause_physics=None):
    ####################
    # Hyperparameters #
    ####################
    training_algorithm = agent_list[
        0].ctrl.model_metadata.training_algorithm if agent_list else None
    params = get_updated_hyper_parameters(hp_dict, training_algorithm)
    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(
        params[HyperParameterKeys.TERMINATION_CONDITION_MAX_EPISODES.value])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    trainable_agents_list = list()
    non_trainable_agents_list = list()

    for agent in agent_list:
        if agent.network_settings:
            if TrainingAlgorithm.SAC.value == training_algorithm:
                agent_params = get_sac_params(DeepRacerSACAgentParams(), agent,
                                              params, run_type)
            else:
                agent_params = get_clipped_ppo_params(
                    DeepRacerClippedPPOAgentParams(), agent, params)
            agent_params.env_agent = agent
            input_filter = InputFilter(is_a_reference_filter=True)
            for observation in agent.network_settings['input_embedders'].keys(
            ):
                if observation == Input.LEFT_CAMERA.value or observation == Input.CAMERA.value or\
                observation == Input.OBSERVATION.value:
                    input_filter.add_observation_filter(
                        observation, 'to_grayscale', ObservationRGBToYFilter())
                    input_filter.add_observation_filter(
                        observation, 'to_uint8',
                        ObservationToUInt8Filter(0, 255))
                    input_filter.add_observation_filter(
                        observation, 'stacking', ObservationStackingFilter(1))

                if observation == Input.STEREO.value:
                    input_filter.add_observation_filter(
                        observation, 'to_uint8',
                        ObservationToUInt8Filter(0, 255))

                if observation == Input.LIDAR.value:
                    input_filter.add_observation_filter(
                        observation, 'clipping',
                        ObservationClippingFilter(0.15, 1.0))
                if observation == Input.SECTOR_LIDAR.value:
                    input_filter.add_observation_filter(
                        observation, 'binary', ObservationBinarySectorFilter())
            agent_params.input_filter = input_filter()
            trainable_agents_list.append(agent_params)
        else:
            non_trainable_agents_list.append(agent)

    ###############
    # Environment #
    ###############
    env_params = DeepRacerRacetrackEnvParameters()
    env_params.agents_params = trainable_agents_list
    env_params.non_trainable_agents = non_trainable_agents_list
    env_params.level = 'DeepRacerRacetrackEnv-v0'
    env_params.run_phase_subject = run_phase_subject
    env_params.enable_domain_randomization = enable_domain_randomization
    env_params.done_condition = done_condition
    env_params.pause_physics = pause_physics
    env_params.unpause_physics = unpause_physics
    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 10000

    graph_manager = MultiAgentGraphManager(
        agents_params=trainable_agents_list,
        env_params=env_params,
        schedule_params=schedule_params,
        vis_params=vis_params,
        preset_validation_params=preset_validation_params,
        done_condition=done_condition)
    return graph_manager, params_json
Beispiel #14
0
def rgb_to_y_filter():
    rgb_to_y_filter = InputFilter()
    rgb_to_y_filter.add_observation_filter('observation', 'rgb_to_y',
                                           ObservationRGBToYFilter())
    return rgb_to_y_filter
Beispiel #15
0
def get_graph_manager(hp_dict,
                      agent_list,
                      run_phase_subject,
                      enable_domain_randomization=False,
                      done_condition=any,
                      run_type=str(RunType.ROLLOUT_WORKER),
                      pause_physics=None,
                      unpause_physics=None):
    ####################
    # Hyperparameters #
    ####################
    # Note: The following three line hard-coded to pick the first agent's trainig algorithm
    # and dump the hyper parameters for the particular training algorithm into json
    # for training jobs (so that the console display the training hyperparameters correctly)
    # since right now, we only support training one model at a time.
    # TODO: clean these lines up when we support multi-agent training.
    training_algorithm = agent_list[
        0].ctrl.model_metadata.training_algorithm if agent_list else None
    params = get_updated_hyper_parameters(hp_dict, training_algorithm)
    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(
        params[HyperParameterKeys.TERMINATION_CONDITION_MAX_EPISODES.value])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    trainable_agents_list = list()
    non_trainable_agents_list = list()

    for agent in agent_list:
        if agent.network_settings:
            training_algorithm = agent.ctrl.model_metadata.training_algorithm
            params = get_updated_hyper_parameters(hp_dict, training_algorithm)
            if TrainingAlgorithm.SAC.value == training_algorithm:
                agent_params = get_sac_params(DeepRacerSACAgentParams(), agent,
                                              params, run_type)
            else:
                agent_params = get_clipped_ppo_params(
                    DeepRacerClippedPPOAgentParams(), agent, params)
            agent_params.env_agent = agent
            input_filter = InputFilter(is_a_reference_filter=True)
            for observation in agent.network_settings['input_embedders'].keys(
            ):
                if observation == Input.LEFT_CAMERA.value or observation == Input.CAMERA.value or \
                        observation == Input.OBSERVATION.value:
                    input_filter.add_observation_filter(
                        observation, 'to_grayscale', ObservationRGBToYFilter())
                    input_filter.add_observation_filter(
                        observation, 'to_uint8',
                        ObservationToUInt8Filter(0, 255))
                    input_filter.add_observation_filter(
                        observation, 'stacking', ObservationStackingFilter(1))

                if observation == Input.STEREO.value:
                    input_filter.add_observation_filter(
                        observation, 'to_uint8',
                        ObservationToUInt8Filter(0, 255))

                if observation == Input.LIDAR.value:
                    input_filter.add_observation_filter(
                        observation, 'clipping',
                        ObservationClippingFilter(0.15, 1.0))
                if observation == Input.SECTOR_LIDAR.value:
                    sector_binary_filter = ObservationSectorDiscretizeFilter(
                        num_sectors=NUMBER_OF_LIDAR_SECTORS,
                        num_values_per_sector=1,
                        clipping_dist=SECTOR_LIDAR_CLIPPING_DIST)
                    input_filter.add_observation_filter(
                        observation, 'binary', sector_binary_filter)
                if observation == Input.DISCRETIZED_SECTOR_LIDAR.value:
                    num_sectors = agent.ctrl.model_metadata.lidar_num_sectors
                    num_values_per_sector = agent.ctrl.model_metadata.lidar_num_values_per_sector
                    clipping_dist = agent.ctrl.model_metadata.lidar_clipping_dist

                    sector_discretize_filter = ObservationSectorDiscretizeFilter(
                        num_sectors=num_sectors,
                        num_values_per_sector=num_values_per_sector,
                        clipping_dist=clipping_dist)
                    input_filter.add_observation_filter(
                        observation, 'discrete', sector_discretize_filter)
            agent_params.input_filter = input_filter()
            trainable_agents_list.append(agent_params)
        else:
            non_trainable_agents_list.append(agent)

    ###############
    # Environment #
    ###############
    env_params = DeepRacerRacetrackEnvParameters()
    env_params.agents_params = trainable_agents_list
    env_params.non_trainable_agents = non_trainable_agents_list
    env_params.level = 'DeepRacerRacetrackEnv-v0'
    env_params.run_phase_subject = run_phase_subject
    env_params.enable_domain_randomization = enable_domain_randomization
    env_params.done_condition = done_condition
    env_params.pause_physics = pause_physics
    env_params.unpause_physics = unpause_physics
    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 10000

    graph_manager = MultiAgentGraphManager(
        agents_params=trainable_agents_list,
        env_params=env_params,
        schedule_params=schedule_params,
        vis_params=vis_params,
        preset_validation_params=preset_validation_params,
        done_condition=done_condition)
    return graph_manager, params_json
Beispiel #16
0
def get_graph_manager(hp_dict, agent_list, run_phase_subject):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["exploration_type"] = (hp_dict.get("exploration_type",
                                              "categorical")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type",
                                      "Mean squared error").lower()
    params["num_episodes_between_training"] = int(
        hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(
        hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(
        hp_dict.get("term_cond_avg_score", 100000))

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(
        params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    trainable_agents_list = list()
    non_trainable_agents_list = list()

    for agent in agent_list:
        agent_params = DeepRacerAgentParams()
        if agent.network_settings:
            agent_params.env_agent = agent
            agent_params.network_wrappers['main'].learning_rate = params["lr"]

            agent_params.network_wrappers['main'].input_embedders_parameters = \
                create_input_embedder(agent.network_settings['input_embedders'],
                                      agent.network_settings['embedder_type'],
                                      agent.network_settings['activation_function'])
            agent_params.network_wrappers['main'].middleware_parameters = \
                create_middle_embedder(agent.network_settings['middleware_embedders'],
                                       agent.network_settings['embedder_type'],
                                       agent.network_settings['activation_function'])

            input_filter = InputFilter(is_a_reference_filter=True)
            for observation in agent.network_settings['input_embedders'].keys(
            ):
                if observation == Input.LEFT_CAMERA.value or observation == Input.CAMERA.value or\
                observation == Input.OBSERVATION.value:
                    input_filter.add_observation_filter(
                        observation, 'to_grayscale', ObservationRGBToYFilter())
                    input_filter.add_observation_filter(
                        observation, 'to_uint8',
                        ObservationToUInt8Filter(0, 255))
                    input_filter.add_observation_filter(
                        observation, 'stacking', ObservationStackingFilter(1))

                if observation == Input.STEREO.value:
                    input_filter.add_observation_filter(
                        observation, 'to_uint8',
                        ObservationToUInt8Filter(0, 255))

                if observation == Input.LIDAR.value:
                    input_filter.add_observation_filter(
                        observation, 'clipping',
                        ObservationClippingFilter(0.15, 1.0))
                if observation == Input.SECTOR_LIDAR.value:
                    input_filter.add_observation_filter(
                        observation, 'binary', ObservationBinarySectorFilter())
            agent_params.input_filter = input_filter()

            agent_params.network_wrappers['main'].batch_size = params[
                "batch_size"]
            agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
            agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

            if params["loss_type"] == "huber":
                agent_params.network_wrappers[
                    'main'].replace_mse_with_huber_loss = True

            agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
            agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
                1.0, 0, 1000000)
            agent_params.algorithm.beta_entropy = params["beta_entropy"]
            agent_params.algorithm.gae_lambda = 0.95
            agent_params.algorithm.discount = params["discount_factor"]
            agent_params.algorithm.optimization_epochs = params["num_epochs"]
            agent_params.algorithm.estimate_state_value_using_gae = True
            agent_params.algorithm.num_steps_between_copying_online_weights_to_target = \
                EnvironmentEpisodes(params["num_episodes_between_training"])
            agent_params.algorithm.num_consecutive_playing_steps = \
                EnvironmentEpisodes(params["num_episodes_between_training"])

            agent_params.algorithm.distributed_coach_synchronization_type = \
                DistributedCoachSynchronizationType.SYNC

            if params["exploration_type"] == "categorical":
                agent_params.exploration = CategoricalParameters()
            else:
                agent_params.exploration = EGreedyParameters()
                agent_params.exploration.epsilon_schedule = LinearSchedule(
                    1.0, params["e_greedy_value"], params["epsilon_steps"])

            trainable_agents_list.append(agent_params)
        else:
            non_trainable_agents_list.append(agent)

    ###############
    # Environment #
    ###############
    env_params = DeepRacerRacetrackEnvParameters()
    env_params.agents_params = trainable_agents_list
    env_params.non_trainable_agents = non_trainable_agents_list
    env_params.level = 'DeepRacerRacetrackEnv-v0'
    env_params.run_phase_subject = run_phase_subject

    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 10000

    graph_manager = MultiAgentGraphManager(
        agents_params=trainable_agents_list,
        env_params=env_params,
        schedule_params=schedule_params,
        vis_params=vis_params,
        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
Beispiel #17
0
# Fetch
gym_fetch_envs = ['reach', 'slide', 'push', 'pick_and_place']
fetch_v1 = {
    e: "{}".format('Fetch' + lower_under_to_upper(e) + '-v1')
    for e in gym_fetch_envs
}
"""
Atari Environment Components
"""

AtariInputFilter = InputFilter(is_a_reference_filter=True)
AtariInputFilter.add_reward_filter('clipping', RewardClippingFilter(-1.0, 1.0))
AtariInputFilter.add_observation_filter(
    'observation',
    'rescaling',
    ObservationRescaleToSizeFilter(
        ImageObservationSpace(
            np.array([84, 84, 3]),  #np.array([224, 224, 3]),
            high=255)))
#AtariInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
#AtariInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
#AtariInputFilter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(4))
AtariOutputFilter = NoOutputFilter()


class Atari(GymEnvironmentParameters):
    def __init__(self, level=None):
        super().__init__(level=level)
        self.frame_skip = 4
        self.max_over_num_frames = 2
        self.random_initialization_steps = 30
Beispiel #18
0
    'SELECT_WEAPON5': ord("5"),
    'SELECT_WEAPON6': ord("6"),
    'SELECT_WEAPON7': ord("7"),
    'SELECT_WEAPON8': ord("8"),
    'SELECT_WEAPON9': ord("9"),
    'SPEED': 304,  # shift
    'STRAFE': 9,  # tab
    'TURN180': ord("u"),
    'TURN_LEFT': ord("a"),  # left arrow
    'TURN_RIGHT': ord("d"),  # right arrow
    'USE': ord("f"),
}

DoomInputFilter = InputFilter(is_a_reference_filter=True)
DoomInputFilter.add_observation_filter(
    'observation', 'rescaling',
    ObservationRescaleToSizeFilter(
        ImageObservationSpace(np.array([60, 76, 3]), high=255)))
DoomInputFilter.add_observation_filter('observation', 'to_grayscale',
                                       ObservationRGBToYFilter())
DoomInputFilter.add_observation_filter('observation', 'to_uint8',
                                       ObservationToUInt8Filter(0, 255))
DoomInputFilter.add_observation_filter('observation', 'stacking',
                                       ObservationStackingFilter(3))

DoomOutputFilter = OutputFilter(is_a_reference_filter=True)
DoomOutputFilter.add_action_filter('to_discrete', FullDiscreteActionSpaceMap())


class DoomEnvironmentParameters(EnvironmentParameters):
    def __init__(self, level=None):
        super().__init__(level=level)
Beispiel #19
0

key_map = {
    'BRAKE': (274, ),  # down arrow
    'GAS': (273, ),  # up arrow
    'TURN_LEFT': (276, ),  # left arrow
    'TURN_RIGHT': (275, ),  # right arrow
    'GAS_AND_TURN_LEFT': (273, 276),
    'GAS_AND_TURN_RIGHT': (273, 275),
    'BRAKE_AND_TURN_LEFT': (274, 276),
    'BRAKE_AND_TURN_RIGHT': (274, 275),
}

CarlaInputFilter = InputFilter(is_a_reference_filter=True)
CarlaInputFilter.add_observation_filter(
    'forward_camera', 'rescaling',
    ObservationRescaleToSizeFilter(
        ImageObservationSpace(np.array([128, 180, 3]), high=255)))
CarlaInputFilter.add_observation_filter('forward_camera', 'to_grayscale',
                                        ObservationRGBToYFilter())
CarlaInputFilter.add_observation_filter('forward_camera', 'to_uint8',
                                        ObservationToUInt8Filter(0, 255))
CarlaInputFilter.add_observation_filter('forward_camera', 'stacking',
                                        ObservationStackingFilter(4))

CarlaOutputFilter = NoOutputFilter()


class CameraTypes(Enum):
    FRONT = "forward_camera"
    LEFT = "left_camera"
    RIGHT = "right_camera"
def stack_filter():
    stack_filter = InputFilter()
    stack_filter.add_observation_filter(
        'observation', 'stack', ObservationStackingFilter(4, stacking_axis=-1))
    return stack_filter
Beispiel #21
0
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["lr_decay_rate"] = float(hp_dict.get("lr_decay_rate", 0))
    params["lr_decay_steps"] = float(hp_dict.get("lr_decay_steps", 0))
    params["exploration_type"] = (hp_dict.get("exploration_type", "categorical")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower()
    params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000))
    params["tensorboard"] = hp_dict.get("tensorboard", False)
    params["dump_mp4"] = hp_dict.get("dump_mp4", False)
    params["dump_gifs"] = hp_dict.get("dump_gifs", False)

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].learning_rate_decay_rate = params["lr_decay_rate"]
    agent_params.network_wrappers['main'].learning_rate_decay_steps = params["lr_decay_steps"]
    agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu'
    # Replace the default CNN with single layer Conv2d(32, 3, 1)
#   agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = EmbedderScheme.Shallow

#   agent_params.network_wrappers['main'].input_embedders_parameters['observation'].dropout_rate = 0.3
    agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
#   agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Shallow
#   agent_params.network_wrappers['main'].middleware_parameters.dropout_rate = 0.3
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999
#   agent_params.network_wrappers['main'].l2_regularization = 2e-5

    if params["loss_type"] == "huber":
        agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(1.0,
                                                                   params["e_greedy_value"],
                                                                   params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    DeepRacerInputFilter = InputFilter(is_a_reference_filter=True)
    # Add an observation image pertubation for many aspects
#   DeepRacerInputFilter.add_observation_filter('observation', 'perturb_color', ObservationColorPerturbation(0.2))
    # Rescale to much smaller input when using shallow networks to avoid OOM
#   DeepRacerInputFilter.add_observation_filter('observation', 'rescaling',
#                                           ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([84, 84, 3]),
#                                                                                            high=255)))
    DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
    DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    DeepRacerInputFilter.add_observation_filter('observation', 'stacking',
                                                  ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = DeepRacerInputFilter
    env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0'

    vis_params = VisualizationParameters()
    vis_params.tensorboard = params["tensorboard"]
    vis_params.dump_mp4 = params["dump_mp4"]
    vis_params.dump_gifs = params["dump_gifs"]
    # AlwaysDumpFilter, MaxDumpFilter, EveryNEpisodesDumpFilter, SelectedPhaseOnlyDumpFilter
    vis_params.video_dump_filters = [AlwaysDumpFilter()]

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 10000

    graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                        schedule_params=schedule_params, vis_params=vis_params,
                                        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
Beispiel #22
0
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
    20)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20)

agent_params.exploration = CategoricalParameters()

agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

###############
# Environment #
###############
SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)

SilverstoneInputFilter.add_observation_filter('front_camera', 'to_grayscale',
                                              ObservationRGBToYFilter())
SilverstoneInputFilter.add_observation_filter('front_camera', 'to_uint8',
                                              ObservationToUInt8Filter(0, 255))
SilverstoneInputFilter.add_observation_filter('front_camera', 'stacking',
                                              ObservationStackingFilter(1))
SilverstoneInputFilter.add_observation_filter(
    'lidar', 'clipping', ObservationClippingFilter(0.15, 1.0))

env_params = GymVectorEnvironment()
env_params.default_input_filter = SilverstoneInputFilter
env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
Beispiel #23
0
# Starcraft Constants
_NOOP = actions.FUNCTIONS.no_op.id
_MOVE_SCREEN = actions.FUNCTIONS.Move_screen.id
_SELECT_ARMY = actions.FUNCTIONS.select_army.id
_PLAYER_RELATIVE = features.SCREEN_FEATURES.player_relative.index
_NOT_QUEUED = [0]
_SELECT_ALL = [0]


class StarcraftObservationType(Enum):
    Features = 0
    RGB = 1


StarcraftInputFilter = InputFilter(is_a_reference_filter=True)
StarcraftInputFilter.add_observation_filter('screen', 'move_axis',
                                            ObservationMoveAxisFilter(0, -1))
StarcraftInputFilter.add_observation_filter(
    'screen', 'rescaling',
    ObservationRescaleToSizeFilter(
        PlanarMapsObservationSpace(np.array([84, 84, 1]),
                                   low=0,
                                   high=255,
                                   channels_axis=-1)))
StarcraftInputFilter.add_observation_filter('screen', 'to_uint8',
                                            ObservationToUInt8Filter(0, 255))

StarcraftInputFilter.add_observation_filter('minimap', 'move_axis',
                                            ObservationMoveAxisFilter(0, -1))
StarcraftInputFilter.add_observation_filter(
    'minimap', 'rescaling',
    ObservationRescaleToSizeFilter(
Beispiel #24
0
agent_params.network_wrappers[
    'main'].gradients_clipping_method = GradientClippingMethod.ClipByValue

###############
# Environment #
###############
import jsbsim
import gym_jsbsim
from rl_coach.filters.filter import NoInputFilter, NoOutputFilter
from rl_coach.filters.filter import InputFilter
from rl_coach.filters.observation.observation_stacking_filter import ObservationStackingFilter

vis_params = VisualizationParameters(native_rendering=True)

input = InputFilter(is_a_reference_filter=True)
input.add_observation_filter('observation', 'stacking',
                             ObservationStackingFilter(10))


class MyGymVectorEnvironment(GymVectorEnvironment):
    def __init__(self, level=None):
        super().__init__(level=level)
        self.frame_skip = 1
        self.default_input_filter = NoInputFilter(
        )  # hrmm.. my custom input filter errored out
        self.default_output_filter = NoOutputFilter()


env_params = MyGymVectorEnvironment(
    level='JSBSim-HeadingControlTask-Cessna172P-Shaping.EXTRA-FG-v0')

# this preset is currently broken - no test
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["exploration_type"] = (hp_dict.get("exploration_type",
                                              "huber")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type",
                                      "Mean squared error").lower()
    params["num_episodes_between_training"] = int(
        hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(
        hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(
        hp_dict.get("term_cond_avg_score", 100000))

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(
        params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].input_embedders_parameters[
        'observation'].activation_function = 'relu'
    agent_params.network_wrappers[
        'main'].middleware_parameters.activation_function = 'relu'
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

    if params["loss_type"] == "huber":
        agent_params.network_wrappers[
            'main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
        1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(
        params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(
            1.0, params["e_greedy_value"], params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)
    SilverstoneInputFilter.add_observation_filter('observation',
                                                  'to_grayscale',
                                                  ObservationRGBToYFilter())
    SilverstoneInputFilter.add_observation_filter(
        'observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    SilverstoneInputFilter.add_observation_filter(
        'observation', 'stacking',
        ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = SilverstoneInputFilter
    env_params.level = 'SilverstoneRacetrack-Discrete-v0'

    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 1000

    graph_manager = BasicRLGraphManager(
        agent_params=agent_params,
        env_params=env_params,
        schedule_params=schedule_params,
        vis_params=vis_params,
        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["exploration_type"] = (hp_dict.get("exploration_type", "huber")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower()
    params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000))

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu'
    agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

    if params["loss_type"] == "huber":
        agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(1.0,
                                                                   params["e_greedy_value"],
                                                                   params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)
    SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
    SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    SilverstoneInputFilter.add_observation_filter('observation', 'stacking',
                                                  ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = SilverstoneInputFilter
    env_params.level = 'SilverstoneRacetrack-Discrete-v0'

    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 1000

    graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                        schedule_params=schedule_params, vis_params=vis_params,
                                        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
Beispiel #27
0
#huber loss
agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True

agent_params.exploration = CategoricalParameters()

agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

###############
# Environment #
###############
SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)

# SilverstoneInputFilter.add_observation_filter('left_camera', 'to_grayscale', ObservationRGBToYFilter())
# SilverstoneInputFilter.add_observation_filter('left_camera', 'to_uint8', ObservationToUInt8Filter(0, 255))
# SilverstoneInputFilter.add_observation_filter('left_camera', 'stacking', ObservationStackingFilter(1))
SilverstoneInputFilter.add_observation_filter('STEREO_CAMERAS', 'to_uint8',
                                              ObservationToUInt8Filter(0, 255))
SilverstoneInputFilter.add_observation_filter(
    'LIDAR', 'clipping', ObservationClippingFilter(0.1, 0.5))

env_params = GymVectorEnvironment()
env_params.default_input_filter = SilverstoneInputFilter
env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
agent_params.algorithm.beta_entropy = 0.01  # also try 0.001
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 0.999
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
    20)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20)
agent_params.exploration = CategoricalParameters()
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5)

###############
# Environment #
###############
turtlebot3_input_filter = InputFilter(is_a_reference_filter=True)
turtlebot3_input_filter.add_observation_filter('observation', 'to_grayscale',
                                               ObservationRGBToYFilter())
turtlebot3_input_filter.add_observation_filter(
    'observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
turtlebot3_input_filter.add_observation_filter('observation', 'stacking',
                                               ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = turtlebot3_input_filter
env_params.level = 'RoboMaker-ObjectTracker-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
    20)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20)

agent_params.exploration = CategoricalParameters()

agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

###############
# Environment #
###############
SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)

SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale',
                                              ObservationRGBToYFilter())
SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8',
                                              ObservationToUInt8Filter(0, 255))
SilverstoneInputFilter.add_observation_filter('observation', 'stacking',
                                              ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = SilverstoneInputFilter
env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000)
agent_params.algorithm.beta_entropy = 0.01  # also try 0.001
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 0.999
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(20)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20)
agent_params.exploration = CategoricalParameters()

###############
# Environment #
###############
turtlebot3_input_filter = InputFilter(is_a_reference_filter=True)
turtlebot3_input_filter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
turtlebot3_input_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
turtlebot3_input_filter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = turtlebot3_input_filter
env_params.level = 'SageMaker-TurtleBot3-Discrete-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True