def __init__( self, conv_filters, conv_filter_sizes, conv_strides, conv_pads, conv_nonlinearity=LN.rectify, hidden_sizes=[], hidden_nonlinearity=LN.tanh, output_pi_nonlinearity=LN.softmax, pixel_scale=255., alternating_sampler=False, **kwargs ): """ The policy consists of several convolution layers followed by recurrent layers and softmax :param env_spec: A spec for the mdp. :param conv_filters, conv_filter_sizes, conv_strides, conv_pads: specify the convolutional layers. See rllab.core.network.ConvNetwork for details. :param hidden_sizes: list of sizes for the fully connected hidden layers :param hidden_nonlinearity: nonlinearity used for each hidden layer :param prob_network: manually specified network for this policy, other network params are ignored :param feature_layer_index: index of the feature layer. Default -2 means the last layer before fc-softmax :param eps: mixture weight on uniform distribution; useful to force exploration :return: """ save_args(vars()) super().__init__(**kwargs)
def __init__( self, algo, policy, sampler, seed, affinities, rank, n_runners, use_gpu=True, ): save_args(vars(), underscore=False)
def __init__( self, rank, envs, sync, segs_buf, step_buf, horizon, max_path_length, discount, ): save_args(vars(), underscore=False) self.n_envs = len(envs)
def __init__( self, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_sizes=[], hidden_nonlinearity=LN.rectify, pixel_scale=255., epsilon=1, dueling=False, shared_last_bias=False, **kwargs ): save_args(vars()) super().__init__(**kwargs)
def __init__( self, discount, gae_lambda, v_loss_coeff=1, ent_loss_coeff=0.01, standardize_adv=False, lr_schedule=None, ): if lr_schedule is not None and lr_schedule not in LR_SCHEDULES: raise ValueError("Unrecognized lr_schedule: {}, should be None " "(for constant) or in: {}".format( lr_schedule, LR_SCHEDULES)) save_args(vars(), underscore=False) self.need_extra_obs = True # (signal sent to the sampler)
def __init__( self, discount=0.99, batch_size=32, min_steps_learn=int(5e4), delta_clip=1, replay_size=int(1e6), training_intensity=8, # avg number of training uses per datum target_update_steps=int(1e4), reward_horizon=1, OptimizerCls=None, optimizer_args=None, eps_greedy_args=None, double_dqn=False, dueling_dqn=False, # Just a shortcut for optimizer args prioritized_replay=False, priority_args=None, ): save_args(vars(), underscore=False) opt_args, eps_args, pri_args = self._get_default_sub_args() if optimizer_args is not None: opt_args.update(optimizer_args) if OptimizerCls is None: OptimizerCls = DqnOptimizer self.optimizer = OptimizerCls(**opt_args) if eps_greedy_args is not None: eps_args.update(eps_greedy_args) self._eps_initial = eps_args["initial"] self._eps_final = eps_args["final"] self._eps_eval = eps_args["eval"] self._eps_anneal_steps = eps_args["anneal_steps"] if prioritized_replay: if priority_args is not None: pri_args.update(priority_args) self._priority_beta_initial = pri_args["beta_initial"] self._priority_beta_final = pri_args["beta_final"] self._priority_beta_anneal_steps = pri_args["beta_anneal_steps"] self._priority_args = dict( alpha=pri_args["alpha"], beta_initial=pri_args["beta_initial"], default_priority=pri_args["default_priority"], ) self.need_extra_obs = False # (for the sampler; should clean this up)
def __init__( self, learning_rate, update_method_name, update_method_args, grad_norm_clip=None, scale_conv_grads=False, n_update_chunks=3, ): assert update_method_name in ["rmsprop", "adam"] save_args(vars(), underscore=True) self.n_update_chunks = n_update_chunks if n_update_chunks == 1: self._push_update = self._single_lock_push else: self._push_update = self._cycle_locks_push
def __init__( self, conv_filters, conv_filter_sizes, conv_strides, conv_pads, hidden_sizes=[], hidden_nonlinearity=LN.rectify, pixel_scale=255., epsilon=1, # shared_last_bias=False, factorized=True, common_noise=False, sigma_0=0.4, use_mu_init=True, **kwargs ): save_args(vars()) super().__init__(**kwargs)
def __init__( self, EnvCls, env_args, horizon, n_parallel=1, envs_per=1, max_path_length=np.inf, mid_batch_reset=True, max_decorrelation_steps=2000, profile_pathname=None, ): save_args(vars(), underscore=False) self.common_kwargs = vars(self).copy() self.common_kwargs.pop("n_parallel")
def __init__( self, algo, policy, sampler, n_steps, seed=None, affinities=None, use_gpu=True, ): n_steps = int(n_steps) save_args(vars(), underscore=False) if affinities is None: self.affinities = dict() if algo.optimizer.parallelism_tag != self.parallelism_tag: raise TypeError("Had mismatched parallelism between Runner ({}) " "and Optimizer: {}".format( self.parallelism_tag, algo.optimizer.parallelism_tag))