コード例 #1
0
    def __init__(self):
        super().__init__()
        self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='leaky_relu'),
                                            'measurements': InputEmbedderParameters(activation_function='leaky_relu'),
                                            'goal': InputEmbedderParameters(activation_function='leaky_relu')}

        self.input_embedders_parameters['observation'].scheme = [
            Conv2d(32, 8, 4),
            Conv2d(64, 4, 2),
            Conv2d(64, 3, 1),
            Dense(512),
        ]

        self.input_embedders_parameters['measurements'].scheme = [
            Dense(128),
            Dense(128),
            Dense(128),
        ]

        self.input_embedders_parameters['goal'].scheme = [
            Dense(128),
            Dense(128),
            Dense(128),
        ]

        self.middleware_parameters = FCMiddlewareParameters(activation_function='leaky_relu',
                                                            scheme=MiddlewareScheme.Empty)
        self.heads_parameters = [MeasurementsPredictionHeadParameters(activation_function='leaky_relu')]
        self.async_training = False
        self.batch_size = 64
        self.adam_optimizer_beta1 = 0.95
コード例 #2
0
ファイル: nec_agent.py プロジェクト: itaicaspi/coach
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [DNDQHeadParameters()]
     self.optimizer_type = 'Adam'
     self.should_get_softmax_probabilities = False
コード例 #3
0
ファイル: actor_critic_agent.py プロジェクト: itaicaspi/coach
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [VHeadParameters(loss_weight=0.5), PolicyHeadParameters(loss_weight=1.0)]
     self.optimizer_type = 'Adam'
     self.clip_gradients = 40.0
     self.async_training = True
コード例 #4
0
ファイル: nec_agent.py プロジェクト: sarikayamehmet/coach
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [DNDQHeadParameters()]
     self.optimizer_type = 'Adam'
コード例 #5
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [PolicyHeadParameters()]
     self.async_training = True
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium)
     self.heads_parameters = [RegressionHeadParameters()]
     self.optimizer_type = 'Adam'
     self.batch_size = 32
     self.replace_mse_with_huber_loss = False
     self.create_target_network = False
コード例 #7
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters()}
     self.middleware_parameters = FCMiddlewareParameters()
     self.heads_parameters = [QHeadParameters()]
     self.optimizer_type = 'Adam'
     self.async_training = True
     self.shared_optimizer = True
     self.create_target_network = True
コード例 #8
0
def test_fc_middleware():
    params = FCMiddlewareParameters(scheme=MiddlewareScheme.Medium)
    mid = FCMiddleware(params=params)
    mid.initialize()
    embedded_data = mx.nd.random.uniform(low=0, high=1, shape=(10, 100))
    output = mid(embedded_data)
    assert output.ndim == 2  # since last block was flatten
    assert output.shape[0] == 10  # since batch_size is 10
    assert output.shape[
        1] == 512  # since last layer of middleware (middle scheme) had 512 units
コード例 #9
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='leaky_relu',
                                                                               input_rescaling={'image': 1.0})}
     self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Empty)
     self.heads_parameters = [RNDHeadParameters()]
     self.create_target_network = False
     self.optimizer_type = 'Adam'
     self.batch_size = 100
     self.learning_rate = 0.0001
     self.should_get_softmax_probabilities = False
コード例 #10
0
ファイル: ddqn_rnd_agent.py プロジェクト: shadiendrawis/coach
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters()
     }
     self.middleware_parameters = FCMiddlewareParameters(
         activation_function='none')
     self.heads_parameters = [RNDHeadParameters()]
     self.optimizer_type = 'Adam'
     self.clip_gradients = None
     self.create_target_network = False
コード例 #11
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty)}
     self.middleware_parameters = FCMiddlewareParameters(scheme=MiddlewareScheme.Empty)
     self.heads_parameters = [SACQHeadParameters()]      # SACQHeadParameters includes the topology of the head
     self.rescale_gradient_from_head_by_factor = [1]
     self.optimizer_type = 'Adam'
     self.batch_size = 256
     self.async_training = False
     self.learning_rate = 0.0003
     self.create_target_network = False
コード例 #12
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='relu')}
     self.middleware_parameters = FCMiddlewareParameters(activation_function='relu')
     self.heads_parameters = [SACPolicyHeadParameters()]
     self.rescale_gradient_from_head_by_factor = [1]
     self.optimizer_type = 'Adam'
     self.batch_size = 256
     self.async_training = False
     self.learning_rate = 0.0003
     self.create_target_network = False
     self.l2_regularization = 0      # weight decay regularization. not used in the original paper
コード例 #13
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='relu')}
     self.middleware_parameters = FCMiddlewareParameters(activation_function='relu')
     self.heads_parameters = [VHeadParameters(initializer='xavier')]
     self.rescale_gradient_from_head_by_factor = [1]
     self.optimizer_type = 'Adam'
     self.batch_size = 256
     self.async_training = False
     self.learning_rate = 0.0003     # 3e-4 see appendix D in the paper
     # tau is set in SoftActorCriticAlgorithmParameters.rate_for_copying_weights_to_target
     self.create_target_network = True
コード例 #14
0
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters(activation_function='tanh')
     }
     self.middleware_parameters = FCMiddlewareParameters(
         activation_function='tanh')
     self.heads_parameters = [VHeadParameters()]
     self.async_training = True
     self.l2_regularization = 0
     self.create_target_network = True
     self.batch_size = 128
コード例 #15
0
ファイル: ddpg_agent.py プロジェクト: rickychau2780/deepracer
 def __init__(self):
     super().__init__()
     self.input_embedders_parameters = {
         'observation': InputEmbedderParameters(batchnorm=True)
     }
     self.middleware_parameters = FCMiddlewareParameters(batchnorm=True)
     self.heads_parameters = [DDPGActorHeadParameters()]
     self.optimizer_type = 'Adam'
     self.batch_size = 64
     self.async_training = False
     self.learning_rate = 0.0001
     self.create_target_network = True
     self.shared_optimizer = True
     self.scale_down_gradients_by_number_of_workers_for_sync_training = False
 def __init__(self, num_q_networks):
     super().__init__()
     self.input_embedders_parameters = {'observation': InputEmbedderParameters(),
                                         'action': InputEmbedderParameters(scheme=EmbedderScheme.Shallow)}
     self.middleware_parameters = FCMiddlewareParameters(num_streams=num_q_networks)
     self.heads_parameters = [TD3VHeadParameters()]
     self.optimizer_type = 'Adam'
     self.adam_optimizer_beta2 = 0.999
     self.optimizer_epsilon = 1e-8
     self.batch_size = 100
     self.async_training = False
     self.learning_rate = 0.001
     self.create_target_network = True
     self.shared_optimizer = True
     self.scale_down_gradients_by_number_of_workers_for_sync_training = False
コード例 #17
0
    def __init__(self):
        super().__init__()
        self.input_embedders_parameters = {'observation': InputEmbedderParameters(activation_function='tanh')}
        self.middleware_parameters = FCMiddlewareParameters(activation_function='tanh')
        self.heads_parameters = [VHeadParameters(), PPOHeadParameters()]
        self.batch_size = 64
        self.optimizer_type = 'Adam'
        self.clip_gradients = None
        self.use_separate_networks_per_head = True
        self.async_training = False
        self.l2_regularization = 0

        # The target network is used in order to freeze the old policy, while making updates to the new one
        # in train_network()
        self.create_target_network = True
        self.shared_optimizer = True
        self.scale_down_gradients_by_number_of_workers_for_sync_training = True
コード例 #18
0
def create_middle_embedder(scheme_dict, embedder_type, activation_function):
    """Creates rl coach middleware
    scheme_dict - Dictionary containing all the information required by
                  the scheme creation methods defined above.
    embedder_type - String indicating desired inputembedder type, available
                     types are defined in SCHEME_TYPE
    activation_function - Desired activationfunction for the embdedder
    """
    try:
        if not ActivationFunctions.has_activation_function(activation_function):
            raise Exception("Invalid activation function for middleware")

        scheme = SCHEME_TYPE[embedder_type](scheme_dict)

        return FCMiddlewareParameters(scheme=scheme, activation_function=activation_function)

    except KeyError as err:
        raise Exception("Middleware, key {} not found".format(err.args[0]))
    except Exception as err:
        raise Exception("Error while creating middleware: {}".format(err))
コード例 #19
0
# Agent Params #
################
agent_params = DDPGAgentParameters()

# actor
actor_network = agent_params.network_wrappers['actor']
actor_network.learning_rate = 0.001
actor_network.batch_size = 256
actor_network.optimizer_epsilon = 1e-08
actor_network.adam_optimizer_beta1 = 0.9
actor_network.adam_optimizer_beta2 = 0.999
actor_network.input_embedders_parameters = {
    'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
actor_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense(256), Dense(256), Dense(256)])
actor_network.heads_parameters[0].batchnorm = False

# critic
critic_network = agent_params.network_wrappers['critic']
critic_network.learning_rate = 0.001
critic_network.batch_size = 256
critic_network.optimizer_epsilon = 1e-08
critic_network.adam_optimizer_beta1 = 0.9
critic_network.adam_optimizer_beta2 = 0.999
critic_network.input_embedders_parameters = {
    'action': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'desired_goal': InputEmbedderParameters(scheme=EmbedderScheme.Empty),
    'observation': InputEmbedderParameters(scheme=EmbedderScheme.Empty)
}
critic_network.middleware_parameters = FCMiddlewareParameters(scheme=[Dense(256), Dense(256), Dense(256)])
コード例 #20
0
 def __init__(self):
     super().__init__()
     self.heads_parameters = [RainbowQHeadParameters()]
     self.middleware_parameters = FCMiddlewareParameters(
         scheme=MiddlewareScheme.Empty)
コード例 #21
0
            activation_function='none'),
        'lidar': InputEmbedderParameters(
            scheme=[
                Dense(256),
                BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5),
                Dense(256),
                BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5)
             ],
             activation_function='none')      # we define the activation function for each layer explicitly
        }
agent_params.network_wrappers['main'].middleware_parameters = \
    FCMiddlewareParameters(
        scheme=[
            Dense(256),
            BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5),
            Dense(128),
            BatchnormActivationDropout(activation_function='relu', dropout_rate=0.5)
        ],
        activation_function='none'
    )
agent_params.network_wrappers['main'].learning_rate = 0.0003
# agent_params.network_wrappers['main'].input_embedders_parameters['front_camera'].activation_function = 'relu'
# agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
agent_params.network_wrappers['main'].batch_size = 128
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999
agent_params.network_wrappers['main'].learning_rate_decay_steps = 50000
agent_params.network_wrappers['main'].learning_rate_decay_rate = 0.95

agent_params.algorithm.clip_likelihood_ratio_using_epsilon = 0.2
agent_params.algorithm.clipping_decay_schedule = LinearSchedule(1.0, 0, 1000000)
schedule_params = ScheduleParameters()
schedule_params.improve_steps = TrainingSteps(10000000000)
schedule_params.steps_between_evaluation_periods = EnvironmentEpisodes(100)
schedule_params.evaluation_steps = EnvironmentEpisodes(3)
schedule_params.heatup_steps = EnvironmentSteps(0)

#########
# Agent #
#########
agent_params = ActorCriticAgentParameters()

agent_params.algorithm.apply_gradients_every_x_episodes = 1
agent_params.algorithm.num_steps_between_gradient_updates = 20
agent_params.algorithm.beta_entropy = 0.05

agent_params.network_wrappers['main'].middleware_parameters = FCMiddlewareParameters()
agent_params.network_wrappers['main'].learning_rate = 0.0001

###############
# Environment #
###############
env_params = Atari(level=SingleLevelSelection(atari_deterministic_v4))

########
# Test #
########
preset_validation_params = PresetValidationParameters()
preset_validation_params.trace_test_levels = ['breakout', 'pong', 'space_invaders']

graph_manager = BasicRLGraphManager(agent_params=agent_params, env_params=env_params,
                                    schedule_params=schedule_params, vis_params=VisualizationParameters(),
コード例 #23
0
    InputEmbedderParameters(
        scheme=[Conv2d(32, 8, 4),
                Conv2d(32, 4, 2),
                Conv2d(64, 4, 2)],
        activation_function='relu',
        dropout_rate=0.3),
    'LIDAR':
    InputEmbedderParameters(scheme=[Dense(64), Dense(32)],
                            activation_function='relu',
                            dropout_rate=0.3)
}

agent_params.network_wrappers['main'].middleware_parameters = \
     FCMiddlewareParameters(
         scheme=[
             Dense(256)
         ],
         activation_function='relu', dropout_rate=0.3
     )

agent_params.network_wrappers['main'].learning_rate = 0.0003
#agent_params.network_wrappers['main'].middleware_parameters.activation_function = 'relu'
agent_params.network_wrappers['main'].batch_size = 64
agent_params.network_wrappers['main'].optimizer_epsilon = 1e-5
agent_params.network_wrappers['main'].adam_optimizer_beta2 = 0.999

# agent_params.network_wrappers['main'].learning_rate_decay_steps = 60000
# agent_params.network_wrappers['main'].learning_rate_decay_rate = 0.95
# agent_params.network_wrappers['main'].input_embedders_parameters['observation'].batchnorm = True
# agent_params.network_wrappers['main'].input_embedders_parameters['observation'].dropout_rate = 0.3
# agent_params.network_wrappers['main'].l2_regularization = 2e-5
agent_params.algorithm.beta_entropy = 0.001
コード例 #24
0
    Conv2d(64, 3, 1),
    Conv2d(64, 3, 2),
    Conv2d(128, 3, 1)
])
agent_params.network_wrappers['main'].input_embedders_parameters = {
    'observation': input_embedder_params
}
agent_params.network_wrappers['constant'].input_embedders_parameters = {
    'observation': input_embedder_params
}
agent_params.network_wrappers['predictor'].input_embedders_parameters = {
    'observation': input_embedder_params
}
middleware_scheme = MiddlewareScheme.Medium
agent_params.network_wrappers[
    'main'].middleware_parameters = FCMiddlewareParameters(
        scheme=middleware_scheme)
agent_params.network_wrappers[
    'constant'].middleware_parameters = FCMiddlewareParameters(
        activation_function='none', scheme=middleware_scheme)
agent_params.network_wrappers[
    'predictor'].middleware_parameters = FCMiddlewareParameters(
        activation_function='none', scheme=middleware_scheme)
agent_params.network_wrappers['main'].heads_parameters = [
    DuelingQHeadParameters()
]

# ER
agent_params.memory.max_size = (MemoryGranularity.Transitions, 4096)

################
#  Environment #
コード例 #25
0
                                       dropout_rate=0.5),
            Dense(128),
            BatchnormActivationDropout(activation_function='tanh',
                                       dropout_rate=0.5)
        ],
        activation_function=
        'none'  # we define the activation function for each layer explicitly
    )
}

# simple fc middleware
agent_params.network_wrappers['main'].middleware_parameters = \
    FCMiddlewareParameters(
        scheme=[
            Dense(512),
            BatchnormActivationDropout(activation_function='tanh', dropout_rate=0.5)
        ],
        activation_function='none'
    )

# output branches
agent_params.network_wrappers['main'].heads_parameters = [
    RegressionHeadParameters(
        scheme=[
            Dense(256),
            BatchnormActivationDropout(activation_function='tanh',
                                       dropout_rate=0.5),
            Dense(256),
            BatchnormActivationDropout(activation_function='tanh')
        ],
        num_output_head_copies=4  # follow lane, left, right, straight