コード例 #1
0
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5)

###############
# Environment #
###############
DeepRacerInputFilter = InputFilter(is_a_reference_filter=True)
DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale',
                                            ObservationRGBToYFilter())
DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8',
                                            ObservationToUInt8Filter(0, 255))
DeepRacerInputFilter.add_observation_filter('observation', 'stacking',
                                            ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = DeepRacerInputFilter
env_params.level = 'RoboMaker-DeepRacer-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False
vis_params.tensorboard = True
vis_params.print_networks_summary = True

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(
コード例 #2
0
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["exploration_type"] = (hp_dict.get("exploration_type",
                                              "huber")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type",
                                      "Mean squared error").lower()
    params["num_episodes_between_training"] = int(
        hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(
        hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(
        hp_dict.get("term_cond_avg_score", 100000))

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(
        params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].input_embedders_parameters[
        'observation'].activation_function = 'relu'
    agent_params.network_wrappers[
        'main'].middleware_parameters.activation_function = 'relu'
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

    if params["loss_type"] == "huber":
        agent_params.network_wrappers[
            'main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(
        1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(
        params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(
            1.0, params["e_greedy_value"], params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)
    SilverstoneInputFilter.add_observation_filter('observation',
                                                  'to_grayscale',
                                                  ObservationRGBToYFilter())
    SilverstoneInputFilter.add_observation_filter(
        'observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    SilverstoneInputFilter.add_observation_filter(
        'observation', 'stacking',
        ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = SilverstoneInputFilter
    env_params.level = 'SilverstoneRacetrack-Discrete-v0'

    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 1000

    graph_manager = BasicRLGraphManager(
        agent_params=agent_params,
        env_params=env_params,
        schedule_params=schedule_params,
        vis_params=vis_params,
        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
コード例 #3
0
agent_params.exploration = CategoricalParameters()

###############
# Environment #
###############
DeepRacerInputFilter = InputFilter(is_a_reference_filter=True)
DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale',
                                            ObservationRGBToYFilter())
DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8',
                                            ObservationToUInt8Filter(0, 255))
DeepRacerInputFilter.add_observation_filter('observation', 'stacking',
                                            ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = DeepRacerInputFilter
env_params.level = 'SageMaker-DeepRacer-Discrete-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 4000
preset_validation_params.max_episodes_to_achieve_reward = 20

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
コード例 #4
0
ファイル: ADC_ClippedPPO.py プロジェクト: zackzhao1/Distiller
agent_params.algorithm.beta_entropy = 0
agent_params.algorithm.gae_lambda = 0.95
agent_params.algorithm.discount = 1
# How many epochs to train the network using supervised methods
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True

# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

agent_params.pre_network_filter = InputFilter()
agent_params.pre_network_filter.add_observation_filter(
    'observation', 'normalize_observation',
    ObservationNormalizationFilter(name='normalize_observation'))

###############
# Environment #
###############
env_params = GymVectorEnvironment()
env_params.level = './environment.py:DistillerWrapperEnvironment'

vis_params = VisualizationParameters()
vis_params.dump_parameters_documentation = False
vis_params.render = True
vis_params.native_rendering = True
vis_params.dump_signals_to_csv_every_x_episodes = 1
graph_manager = BasicRLGraphManager(agent_params=agent_params,
                                    env_params=env_params,
                                    schedule_params=schedule_params,
                                    vis_params=vis_params)
コード例 #5
0
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(20)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20)
agent_params.exploration = CategoricalParameters()

###############
# Environment #
###############
turtlebot3_input_filter = InputFilter(is_a_reference_filter=True)
turtlebot3_input_filter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
turtlebot3_input_filter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
turtlebot3_input_filter.add_observation_filter('observation', 'stacking', ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = turtlebot3_input_filter
env_params.level = 'SageMaker-TurtleBot3-Discrete-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                    schedule_params=schedule_params, vis_params=vis_params,
                                    preset_validation_params=preset_validation_params)
コード例 #6
0
agent_params.network_wrappers['critic'].middleware_parameters.scheme = [
    Dense(64)
]

agent_params.input_filter = InputFilter()
agent_params.input_filter.add_observation_filter(
    'observation', 'normalize', ObservationNormalizationFilter())

# Distributed Coach synchronization type.
agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

###############
# Environment #
###############
env_params = GymVectorEnvironment()
env_params.level = '../automated_deep_compression/ADC.py:DistillerWrapperEnvironment'

vis_params = VisualizationParameters()
vis_params.dump_parameters_documentation = False
vis_params.render = True
vis_params.native_rendering = True
vis_params.dump_signals_to_csv_every_x_episodes = 1
graph_manager = BasicRLGraphManager(agent_params=agent_params,
                                    env_params=env_params,
                                    schedule_params=schedule_params,
                                    vis_params=vis_params)

########
# Test #
########
# preset_validation_params = PresetValidationParameters()
コード例 #7
0
ファイル: deeprotor.py プロジェクト: Kolefn/cantina-deeprotor
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5)

###############
# Environment #
###############
DeepRotorInputFilter = InputFilter(is_a_reference_filter=True)
DeepRotorInputFilter.add_observation_filter('observation', 'to_grayscale',
                                            ObservationRGBToYFilter())
DeepRotorInputFilter.add_observation_filter('observation', 'to_uint8',
                                            ObservationToUInt8Filter(0, 255))
DeepRotorInputFilter.add_observation_filter('observation', 'stacking',
                                            ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = DeepRotorInputFilter
env_params.level = 'RoboMaker-DeepRotor-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
コード例 #8
0
agent_params.exploration = CategoricalParameters()

###############
# Environment #
###############
turtlebot3_input_filter = InputFilter(is_a_reference_filter=True)
turtlebot3_input_filter.add_observation_filter('observation', 'to_grayscale',
                                               ObservationRGBToYFilter())
turtlebot3_input_filter.add_observation_filter(
    'observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
turtlebot3_input_filter.add_observation_filter('observation', 'stacking',
                                               ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = turtlebot3_input_filter
env_params.level = 'SageMaker-TurtleBot3-Discrete-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
コード例 #9
0
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["exploration_type"] = (hp_dict.get("exploration_type", "huber")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower()
    params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000))

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu'
    agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

    if params["loss_type"] == "huber":
        agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(1.0,
                                                                   params["e_greedy_value"],
                                                                   params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)
    SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
    SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    SilverstoneInputFilter.add_observation_filter('observation', 'stacking',
                                                  ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = SilverstoneInputFilter
    env_params.level = 'SilverstoneRacetrack-Discrete-v0'

    vis_params = VisualizationParameters()
    vis_params.dump_mp4 = False

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 1000

    graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                        schedule_params=schedule_params, vis_params=vis_params,
                                        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
コード例 #10
0
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5)

###############
# Environment #
###############
turtlebot3_input_filter = InputFilter(is_a_reference_filter=True)
turtlebot3_input_filter.add_observation_filter('observation', 'to_grayscale',
                                               ObservationRGBToYFilter())
turtlebot3_input_filter.add_observation_filter(
    'observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
turtlebot3_input_filter.add_observation_filter('observation', 'stacking',
                                               ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = turtlebot3_input_filter
env_params.level = 'RoboMaker-ObjectTracker-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
コード例 #11
0
# agent_params.network_wrappers['actor'].clip_gradients = 100

agent_params.algorithm.rate_for_copying_weights_to_target = 0.01  # Tau pg. 11
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentSteps(1)
agent_params.algorithm.heatup_using_network_decisions = True
agent_params.algorithm.discount = 1
# Replay buffer size
agent_params.memory.max_size = (MemoryGranularity.Transitions, 2000)
agent_params.exploration = TruncatedNormalParameters()
agent_params.exploration.noise_percentage_schedule = PieceWiseSchedule([
    (ConstantSchedule(0.5), EnvironmentSteps(100*steps_per_episode)),
    (ExponentialSchedule(0.5, 0, 0.996), EnvironmentSteps(300*steps_per_episode))])
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentSteps(1)
agent_params.network_wrappers['actor'].learning_rate = 0.0001
agent_params.network_wrappers['critic'].learning_rate = 0.001

##############################
#      Gym                   #
##############################
env_params = GymVectorEnvironment()
env_params.level = '../automated_deep_compression/ADC.py:CNNEnvironment'


vis_params = VisualizationParameters()
vis_params.dump_parameters_documentation = False
vis_params.render = True
vis_params.native_rendering = True
vis_params.dump_signals_to_csv_every_x_episodes = 1
graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                    schedule_params=schedule_params, vis_params=vis_params)
コード例 #12
0
def get_graph_manager(**hp_dict):
    ####################
    # All Default Parameters #
    ####################
    params = {}
    params["batch_size"] = int(hp_dict.get("batch_size", 64))
    params["num_epochs"] = int(hp_dict.get("num_epochs", 10))
    params["stack_size"] = int(hp_dict.get("stack_size", 1))
    params["lr"] = float(hp_dict.get("lr", 0.0003))
    params["lr_decay_rate"] = float(hp_dict.get("lr_decay_rate", 0))
    params["lr_decay_steps"] = float(hp_dict.get("lr_decay_steps", 0))
    params["exploration_type"] = (hp_dict.get("exploration_type", "categorical")).lower()
    params["e_greedy_value"] = float(hp_dict.get("e_greedy_value", .05))
    params["epsilon_steps"] = int(hp_dict.get("epsilon_steps", 10000))
    params["beta_entropy"] = float(hp_dict.get("beta_entropy", .01))
    params["discount_factor"] = float(hp_dict.get("discount_factor", .999))
    params["loss_type"] = hp_dict.get("loss_type", "Mean squared error").lower()
    params["num_episodes_between_training"] = int(hp_dict.get("num_episodes_between_training", 20))
    params["term_cond_max_episodes"] = int(hp_dict.get("term_cond_max_episodes", 100000))
    params["term_cond_avg_score"] = float(hp_dict.get("term_cond_avg_score", 100000))
    params["tensorboard"] = hp_dict.get("tensorboard", False)
    params["dump_mp4"] = hp_dict.get("dump_mp4", False)
    params["dump_gifs"] = hp_dict.get("dump_gifs", False)

    params_json = json.dumps(params, indent=2, sort_keys=True)
    print("Using the following hyper-parameters", params_json, sep='\n')

    ####################
    # Graph Scheduling #
    ####################
    schedule_params = ScheduleParameters()
    schedule_params.improve_steps = TrainingSteps(params["term_cond_max_episodes"])
    schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(40)
    schedule_params.evaluation_steps = EnvironmentEpisodes(5)
    schedule_params.heatup_steps = EnvironmentSteps(0)

    #########
    # Agent #
    #########
    agent_params = ClippedPPOAgentParameters()

    agent_params.network_wrappers['main'].learning_rate = params["lr"]
    agent_params.network_wrappers['main'].learning_rate_decay_rate = params["lr_decay_rate"]
    agent_params.network_wrappers['main'].learning_rate_decay_steps = params["lr_decay_steps"]
    agent_params.network_wrappers['main'].input_embedders_parameters['observation'].activation_function = 'relu'
    # Replace the default CNN with single layer Conv2d(32, 3, 1)
#   agent_params.network_wrappers['main'].input_embedders_parameters['observation'].scheme = EmbedderScheme.Shallow

#   agent_params.network_wrappers['main'].input_embedders_parameters['observation'].dropout_rate = 0.3
    agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
#   agent_params.network_wrappers['main'].middleware_parameters.scheme = MiddlewareScheme.Shallow
#   agent_params.network_wrappers['main'].middleware_parameters.dropout_rate = 0.3
    agent_params.network_wrappers['main'].batch_size = params["batch_size"]
    agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
    agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999
#   agent_params.network_wrappers['main'].l2_regularization = 2e-5

    if params["loss_type"] == "huber":
        agent_params.network_wrappers['main'].replace_mse_with_huber_loss = True

    agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
    agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000)
    agent_params.algorithm.beta_entropy = params["beta_entropy"]
    agent_params.algorithm.gae_lambda = 0.95
    agent_params.algorithm.discount = params["discount_factor"]
    agent_params.algorithm.optimization_epochs = params["num_epochs"]
    agent_params.algorithm.estimate_state_value_using_gae = True
    agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
        params["num_episodes_between_training"])
    agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(params["num_episodes_between_training"])

    agent_params.algorithm.distributed_coach_synchronization_type = DistributedCoachSynchronizationType.SYNC

    if params["exploration_type"] == "categorical":
        agent_params.exploration = CategoricalParameters()
    else:
        agent_params.exploration = EGreedyParameters()
        agent_params.exploration.epsilon_schedule = LinearSchedule(1.0,
                                                                   params["e_greedy_value"],
                                                                   params["epsilon_steps"])

    ###############
    # Environment #
    ###############
    DeepRacerInputFilter = InputFilter(is_a_reference_filter=True)
    # Add an observation image pertubation for many aspects
#   DeepRacerInputFilter.add_observation_filter('observation', 'perturb_color', ObservationColorPerturbation(0.2))
    # Rescale to much smaller input when using shallow networks to avoid OOM
#   DeepRacerInputFilter.add_observation_filter('observation', 'rescaling',
#                                           ObservationRescaleToSizeFilter(ImageObservationSpace(np.array([84, 84, 3]),
#                                                                                            high=255)))
    DeepRacerInputFilter.add_observation_filter('observation', 'to_grayscale', ObservationRGBToYFilter())
    DeepRacerInputFilter.add_observation_filter('observation', 'to_uint8', ObservationToUInt8Filter(0, 255))
    DeepRacerInputFilter.add_observation_filter('observation', 'stacking',
                                                  ObservationStackingFilter(params["stack_size"]))

    env_params = GymVectorEnvironment()
    env_params.default_input_filter = DeepRacerInputFilter
    env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0'

    vis_params = VisualizationParameters()
    vis_params.tensorboard = params["tensorboard"]
    vis_params.dump_mp4 = params["dump_mp4"]
    vis_params.dump_gifs = params["dump_gifs"]
    # AlwaysDumpFilter, MaxDumpFilter, EveryNEpisodesDumpFilter, SelectedPhaseOnlyDumpFilter
    vis_params.video_dump_filters = [AlwaysDumpFilter()]

    ########
    # Test #
    ########
    preset_validation_params = PresetValidationParameters()
    preset_validation_params.test = True
    preset_validation_params.min_reward_threshold = 400
    preset_validation_params.max_episodes_to_achieve_reward = 10000

    graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                        schedule_params=schedule_params, vis_params=vis_params,
                                        preset_validation_params=preset_validation_params)
    return graph_manager, params_json
コード例 #13
0
agent_params.algorithm.discount = 0.90
agent_params.algorithm.optimization_epochs = 5
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
    10)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(10)
agent_params.exploration = CategoricalParameters()
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5)

###############
# Environment #
###############
Training_Ground_Filter = InputFilter(is_a_reference_filter=True)

env_params = GymVectorEnvironment()
env_params.level = 'Mars-v1'

vis_params = VisualizationParameters()
vis_params.dump_csv = True
vis_params.dump_signals_to_csv_every_x_episodes = 1
vis_params.tensorboard = True
vis_params.print_networks_summary = True

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 2000
preset_validation_params.max_episodes_to_achieve_reward = 1000
agent_params.algorithm.discount = 0.999
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(
    20)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20)
agent_params.exploration = CategoricalParameters()
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5)

###############
# Environment #
###############
MeiroRunnerFilter = InputFilter(is_a_reference_filter=True)

env_params = GymVectorEnvironment()
env_params.level = 'RoboMaker-MeiroRunner-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
コード例 #15
0
# NN configuration
agent_params.network_wrappers['main'].learning_rate = 0.00025
agent_params.network_wrappers['main'].replace_mse_with_huber_loss = False

# ER size
agent_params.memory.max_size = (MemoryGranularity.Transitions, 40000)

# E-Greedy schedule
agent_params.exploration.epsilon_schedule = LinearSchedule(1.0, 0.01, 10000)

################
#  Environment #
################
env_params = GymVectorEnvironment()
env_params.level = "env.gym_super_mario_bros.smb_env:SuperMarioBrosEnv"

#################
# Visualization #
#################

vis_params = VisualizationParameters()
vis_params.dump_gifs = True

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 150
preset_validation_params.max_episodes_to_achieve_reward = 250
コード例 #16
0
###############
# Environment #
###############
SilverstoneInputFilter = InputFilter(is_a_reference_filter=True)

SilverstoneInputFilter.add_observation_filter('observation', 'to_grayscale',
                                              ObservationRGBToYFilter())
SilverstoneInputFilter.add_observation_filter('observation', 'to_uint8',
                                              ObservationToUInt8Filter(0, 255))
SilverstoneInputFilter.add_observation_filter('observation', 'stacking',
                                              ObservationStackingFilter(1))

env_params = GymVectorEnvironment()
env_params.default_input_filter = SilverstoneInputFilter
env_params.level = 'DeepRacerRacetrackCustomActionSpaceEnv-v0'

vis_params = VisualizationParameters()
vis_params.dump_mp4 = False

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 400
preset_validation_params.max_episodes_to_achieve_reward = 1000

graph_manager = BasicRLGraphManager(
    agent_params=agent_params,
    env_params=env_params,
コード例 #17
0
agent_params.algorithm.optimization_epochs = 10
agent_params.algorithm.estimate_state_value_using_gae = True
agent_params.algorithm.num_steps_between_copying_online_weights_to_target = EnvironmentEpisodes(20)
agent_params.algorithm.num_consecutive_playing_steps = EnvironmentEpisodes(20)
agent_params.exploration = CategoricalParameters()
agent_params.memory.max_size = (MemoryGranularity.Transitions, 10**5)


###############
# Environment #
###############
Training_Ground_Filter = InputFilter(is_a_reference_filter=True)


env_params = GymVectorEnvironment()
env_params.level = 'Rover-TrainingGrounds-v2'


vis_params = VisualizationParameters()
vis_params.dump_csv = True
vis_params.dump_signals_to_csv_every_x_episodes = 1
vis_params.tensorboard = True

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.test = True
preset_validation_params.min_reward_threshold = 2000
preset_validation_params.max_episodes_to_achieve_reward = 1000