def make_parallel(self, n): """Make a parallelized version of this network. A parallel network has ``n`` copies of network with the same structure but different independently initialized parameters. For supported network structures (currently, networks with only FC layers) it will create ``ParallelEncodingNetwork`` (PEN). Otherwise, it will create a ``NaiveParallelNetwork`` (NPN). However, PEN is not always faster than NPN. Especially for small ``n`` and large batch_size. See ``test_make_parallel()`` in critic_networks_test.py for detail. Returns: Network: A parallel network """ if (self.saved_args.get('input_preprocessors') is None and (self._preprocessing_combiner == math_ops.identity or isinstance( self._preprocessing_combiner, (alf.nest.utils.NestSum, alf.nest.utils.NestConcat)))): parallel_enc_net_args = dict(**self.saved_args) parallel_enc_net_args.update(n=n, name="parallel_" + self.name) return ParallelEncodingNetwork(**parallel_enc_net_args) else: common.warning_once( " ``NaiveParallelNetwork`` is used by ``make_parallel()`` !") return super().make_parallel(n)
def _prioritized_sample(self, batch_size, batch_length): if batch_length != self._mini_batch_length: if self._mini_batch_length > 1: warning_once( "It is not advisable to use different batch_length " "for different calls to get_batch(). Previous batch_length=%d " "new batch_length=%d" % (self._mini_batch_length, batch_length)) self._change_mini_batch_length(batch_length) total_weight = self._sum_tree.summary() assert total_weight > 0, ( "There is no data in the " "buffer or the data of all the environments are shorter than " "batch_length=%s" % batch_length) r = torch.rand((batch_size, )) if not self._with_replacement: r = (r + torch.arange(batch_size, dtype=torch.float32)) / batch_size r = r * total_weight indices = self._sum_tree.find_sum_bound(r) env_ids, idx = self._index_to_env_id_idx(indices) info = BatchInfo(env_ids=env_ids, positions=self._pad(idx, env_ids)) avg_weight = self._sum_tree.nnz / total_weight info = info._replace(importance_weights=self._sum_tree[indices] * avg_weight) return info
def _summarize_training_setting(self): # We need to wait for one iteration to get the operative args # Right just give a fixed gin file name to store operative args common.write_gin_configs(self._root_dir, "configured.gin") with alf.summary.record_if(lambda: True): def _markdownify(paragraph): return " ".join( (os.linesep + paragraph).splitlines(keepends=True)) common.summarize_gin_config() alf.summary.text('commandline', ' '.join(sys.argv)) alf.summary.text( 'optimizers', _markdownify(self._algorithm.get_optimizer_info())) alf.summary.text('revision', git_utils.get_revision()) alf.summary.text('diff', _markdownify(git_utils.get_diff())) alf.summary.text('seed', str(self._random_seed)) if self._config.code_snapshots is not None: for f in self._config.code_snapshots: path = os.path.join( os.path.abspath(os.path.dirname(__file__)), "..", f) if not os.path.isfile(path): common.warning_once( "The code file '%s' for summary is invalid" % path) continue with open(path, 'r') as fin: code = fin.read() # adding "<pre>" will make TB show raw text instead of MD alf.summary.text('code/%s' % f, "<pre>" + code + "</pre>")
def _train(self, experience, num_updates, mini_batch_size, mini_batch_length): """Train using experience.""" experience = self.transform_timestep(experience) experience = self.preprocess_experience(experience) length = experience.step_type.shape[1] mini_batch_length = (mini_batch_length or length) assert length % mini_batch_length == 0, ( "length=%s not a multiple of mini_batch_length=%s" % (length, mini_batch_length)) if len(tf.nest.flatten( self.train_state_spec)) > 0 and not self._use_rollout_state: if mini_batch_length == 1: logging.fatal( "Should use TrainerConfig.use_rollout_state=True " "for off-policy training of RNN when minibatch_length==1.") else: common.warning_once( "Consider using TrainerConfig.use_rollout_state=True " "for off-policy training of RNN.") experience = tf.nest.map_structure( lambda x: tf.reshape( x, common.concat_shape([-1, mini_batch_length], tf.shape(x)[2:])), experience) batch_size = tf.shape(experience.step_type)[0] mini_batch_size = (mini_batch_size or batch_size) def _make_time_major(nest): """Put the time dim to axis=0.""" return tf.nest.map_structure(lambda x: common.transpose2(x, 0, 1), nest) for u in tf.range(num_updates): if mini_batch_size < batch_size: indices = tf.random.shuffle( tf.range(tf.shape(experience.step_type)[0])) experience = tf.nest.map_structure( lambda x: tf.gather(x, indices), experience) for b in tf.range(0, batch_size, mini_batch_size): batch = tf.nest.map_structure( lambda x: x[b:tf.minimum(batch_size, b + mini_batch_size)], experience) batch = _make_time_major(batch) training_info, loss_info, grads_and_vars = self._update( batch, weight=tf.cast(tf.shape(batch.step_type)[1], tf.float32) / float(mini_batch_size)) common.get_global_counter().assign_add(1) self.training_summary(training_info, loss_info, grads_and_vars) self.metric_summary() train_steps = batch_size * mini_batch_length * num_updates return train_steps
def capture_frame(self, pred_info=None): """Render ``self.env`` and add the resulting frame to the video. Also plot information in ``pred_info``. Args: pred_info (nested): a nest """ if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) if pred_info is not None: assert not self.ansi_mode, "Only supports rgb_array mode!" render_mode = 'rgb_array' else: render_mode = 'ansi' if self.ansi_mode else 'rgb_array' frame = self.env.render(mode=render_mode) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn( 'Env returned None on render(). Disabling further ' 'rendering for video recorder by marking as disabled: ' 'path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: if pred_info is not None: if plt is not None: frame = self._plot_pred_info(frame, pred_info) else: common.warning_once( "matplotlib is not installed; prediction info will not " "be plotted when rendering videos.") self.last_frame = frame if self.ansi_mode: self._encode_ansi_frame(frame) else: self._encode_image_frame(frame) assert not self.broken, ( "The output file is broken! Check warning messages.")
def test_warning_once(self): warning_messages = ["warning message 1", "warning message 2"] # omit non-customized logging messages logging._warn_preinit_stderr = 0 with StringIO() as log_stream, redirect_stderr(log_stream): for _ in range(10): common.warning_once(warning_messages[0]) common.warning_once(warning_messages[1]) generated_warning_messages = log_stream.getvalue() generated_warning_messages = generated_warning_messages.rstrip().split( '\n') # previouly we only get one warining message here, although # warning once has been called multiple times at difference places assert len(warning_messages) == len(generated_warning_messages) for msg, gen_msg in zip(warning_messages, generated_warning_messages): assert msg in gen_msg
def _train(self, experience, num_updates, mini_batch_size, mini_batch_length, update_counter_every_mini_batch, should_summarize): """Train using experience.""" experience = nest_utils.params_to_distributions( experience, self.experience_spec) experience = self.transform_timestep(experience) experience = self.preprocess_experience(experience) experience = nest_utils.distributions_to_params(experience) length = experience.step_type.shape[1] mini_batch_length = (mini_batch_length or length) assert length % mini_batch_length == 0, ( "length=%s not a multiple of mini_batch_length=%s" % (length, mini_batch_length)) if len(tf.nest.flatten( self.train_state_spec)) > 0 and not self._use_rollout_state: if mini_batch_length == 1: logging.fatal( "Should use TrainerConfig.use_rollout_state=True " "for off-policy training of RNN when minibatch_length==1.") else: common.warning_once( "Consider using TrainerConfig.use_rollout_state=True " "for off-policy training of RNN.") experience = tf.nest.map_structure( lambda x: tf.reshape( x, common.concat_shape([-1, mini_batch_length], tf.shape(x)[2:])), experience) batch_size = tf.shape(experience.step_type)[0] mini_batch_size = (mini_batch_size or batch_size) def _make_time_major(nest): """Put the time dim to axis=0.""" return tf.nest.map_structure(lambda x: common.transpose2(x, 0, 1), nest) scope = get_current_scope() for u in tf.range(num_updates): if mini_batch_size < batch_size: indices = tf.random.shuffle( tf.range(tf.shape(experience.step_type)[0])) experience = tf.nest.map_structure( lambda x: tf.gather(x, indices), experience) for b in tf.range(0, batch_size, mini_batch_size): if update_counter_every_mini_batch: common.get_global_counter().assign_add(1) is_last_mini_batch = tf.logical_and( tf.equal(u, num_updates - 1), tf.greater_equal(b + mini_batch_size, batch_size)) do_summary = tf.logical_or(is_last_mini_batch, update_counter_every_mini_batch) common.enable_summary(do_summary) batch = tf.nest.map_structure( lambda x: x[b:tf.minimum(batch_size, b + mini_batch_size)], experience) batch = _make_time_major(batch) # Tensorflow graph mode loses the original name scope here. We # need to restore the original name scope with tf.name_scope(scope): training_info, loss_info, grads_and_vars = self._update( batch, weight=tf.cast( tf.shape(batch.step_type)[1], tf.float32) / float(mini_batch_size)) if should_summarize: if do_summary: # Putting `if do_summary` under the above `with` statement # does not help. Somehow `if` statement will also lose # the original name scope. with tf.name_scope(scope): self.summarize_train(training_info, loss_info, grads_and_vars) train_steps = batch_size * mini_batch_length * num_updates return train_steps
def __init__(self, input_tensor_spec, n, output_tensor_spec=None, input_preprocessors=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=None, activation=torch.relu_, kernel_initializer=None, use_fc_bn=False, last_layer_size=None, last_activation=None, last_kernel_initializer=None, last_use_fc_bn=False, name="ParallelEncodingNetwork"): """ Args: input_tensor_spec (nested TensorSpec): the (nested) tensor spec of the input. If nested, then ``preprocessing_combiner`` must not be None. n (int): number of parallel networks output_tensor_spec (None|TensorSpec): spec for the output, excluding the dimension of paralle networks ``n``. If None, the output tensor spec will be assumed as ``TensorSpec((n, output_size, ))``, where ``output_size`` is inferred from network output. Otherwise, the output tensor spec will be ``TensorSpec((n, *output_tensor_spec.shape))`` and the network output will be reshaped accordingly. Note that ``output_tensor_spec`` is only used for reshaping the network outputs for interpretation purpose and is not used for specifying any network layers. input_preprocessors (None): must be ``None``. preprocessing_combiner (NestCombiner): preprocessing called on complex inputs. Note that this combiner must also accept ``input_tensor_spec`` as the input to compute the processed tensor spec. For example, see ``alf.nest.utils.NestConcat``. This arg is helpful if you want to combine inputs by configuring a gin file without changing the code. conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(filters, kernel_size, strides, padding)``, where ``padding`` is optional. fc_layer_params (tuple[int]): a tuple of integers representing FC layer sizes. activation (nn.functional): activation used for all the layers but the last layer. kernel_initializer (Callable): initializer for all the layers but the last layer. If None, a variance_scaling_initializer will be used. use_fc_bn (bool): whether use Batch Normalization for fc layers. last_layer_size (int): an optional size of an additional layer appended at the very end. Note that if ``last_activation`` is specified, ``last_layer_size`` has to be specified explicitly. last_activation (nn.functional): activation function of the additional layer specified by ``last_layer_size``. Note that if ``last_layer_size`` is not None, ``last_activation`` has to be specified explicitly. last_kernel_initializer (Callable): initializer for the the additional layer specified by ``last_layer_size``. If None, it will be the same with ``kernel_initializer``. If ``last_layer_size`` is None, ``last_kernel_initializer`` will not be used. last_use_fc_bn (bool): whether use Batch Normalization for the last fc layer. name (str): """ super().__init__(input_tensor_spec, input_preprocessors=None, preprocessing_combiner=preprocessing_combiner, name=name) # TODO: handle input_preprocessors assert input_preprocessors is None if kernel_initializer is None: kernel_initializer = functools.partial( variance_scaling_init, mode='fan_in', distribution='truncated_normal', nonlinearity=activation) self._img_encoding_net = None if conv_layer_params: assert isinstance(conv_layer_params, tuple), \ "The input params {} should be tuple".format(conv_layer_params) assert len(self._processed_input_tensor_spec.shape) == 3, \ "The input shape {} should be like (C,H,W)!".format( self._processed_input_tensor_spec.shape) input_channels, height, width = self._processed_input_tensor_spec.shape self._img_encoding_net = ParallelImageEncodingNetwork( input_channels, (height, width), n, conv_layer_params, activation=activation, kernel_initializer=kernel_initializer, flatten_output=True) input_size = self._img_encoding_net.output_spec.shape[1] else: assert self._processed_input_tensor_spec.ndim == 1, \ "The input shape {} should be like (N,)!".format( self._processed_input_tensor_spec.shape) input_size = self._processed_input_tensor_spec.shape[0] self._fc_layers = nn.ModuleList() if fc_layer_params is None: fc_layer_params = [] else: assert isinstance(fc_layer_params, tuple) fc_layer_params = list(fc_layer_params) for size in fc_layer_params: self._fc_layers.append( layers.ParallelFC(input_size, size, n, activation=activation, kernel_initializer=kernel_initializer, use_bn=use_fc_bn)) input_size = size if last_layer_size is not None or last_activation is not None: assert last_layer_size is not None and last_activation is not None, \ "Both last_layer_size and last_activation need to be specified!" if last_kernel_initializer is None: common.warning_once( "last_kernel_initializer is not specified " "for the last layer of size {}.".format(last_layer_size)) last_kernel_initializer = kernel_initializer self._fc_layers.append( layers.ParallelFC(input_size, last_layer_size, n, activation=last_activation, kernel_initializer=last_kernel_initializer, use_bn=last_use_fc_bn)) input_size = last_layer_size if output_tensor_spec is not None: assert output_tensor_spec.numel == input_size, ( "network output " "size {a} is inconsisent with specified out_tensor_spec " "of size {b}".format(a=input_size, b=output_tensor_spec.numel)) self._output_spec = TensorSpec( (n, *output_tensor_spec.shape), dtype=self._processed_input_tensor_spec.dtype) self._reshape_output = True else: self._output_spec = TensorSpec( (n, input_size), dtype=self._processed_input_tensor_spec.dtype) self._reshape_output = False self._n = n
def __init__(self, input_tensor_spec, input_preprocessors=None, preprocessing_combiner=None, conv_layer_params=None, fc_layer_params=None, activation=torch.relu_, kernel_initializer=None, use_fc_bn=False, last_layer_size=None, last_activation=None, last_kernel_initializer=None, last_use_fc_bn=False, name="EncodingNetwork"): """ Args: input_tensor_spec (nested TensorSpec): the (nested) tensor spec of the input. If nested, then ``preprocessing_combiner`` must not be None. input_preprocessors (nested InputPreprocessor): a nest of ``InputPreprocessor``, each of which will be applied to the corresponding input. If not None, then it must have the same structure with ``input_tensor_spec``. This arg is helpful if you want to have separate preprocessings for different inputs by configuring a gin file without changing the code. For example, embedding a discrete input before concatenating it to another continuous vector. preprocessing_combiner (NestCombiner): preprocessing called on complex inputs. Note that this combiner must also accept ``input_tensor_spec`` as the input to compute the processed tensor spec. For example, see ``alf.nest.utils.NestConcat``. This arg is helpful if you want to combine inputs by configuring a gin file without changing the code. conv_layer_params (tuple[tuple]): a tuple of tuples where each tuple takes a format ``(filters, kernel_size, strides, padding)``, where ``padding`` is optional. fc_layer_params (tuple[int]): a tuple of integers representing FC layer sizes. activation (nn.functional): activation used for all the layers but the last layer. kernel_initializer (Callable): initializer for all the layers but the last layer. If None, a variance_scaling_initializer will be used. use_fc_bn (bool): whether use Batch Normalization for fc layers. last_layer_size (int): an optional size of an additional layer appended at the very end. Note that if ``last_activation`` is specified, ``last_layer_size`` has to be specified explicitly. last_activation (nn.functional): activation function of the additional layer specified by ``last_layer_size``. Note that if ``last_layer_size`` is not None, ``last_activation`` has to be specified explicitly. last_use_fc_bn (bool): whether use Batch Normalization for the last fc layer. last_kernel_initializer (Callable): initializer for the the additional layer specified by ``last_layer_size``. If None, it will be the same with ``kernel_initializer``. If ``last_layer_size`` is None, ``last_kernel_initializer`` will not be used. name (str): """ super().__init__(input_tensor_spec, input_preprocessors, preprocessing_combiner, name=name) if kernel_initializer is None: kernel_initializer = functools.partial( variance_scaling_init, mode='fan_in', distribution='truncated_normal', nonlinearity=activation) self._img_encoding_net = None if conv_layer_params: assert isinstance(conv_layer_params, tuple), \ "The input params {} should be tuple".format(conv_layer_params) assert len(self._processed_input_tensor_spec.shape) == 3, \ "The input shape {} should be like (C,H,W)!".format( self._processed_input_tensor_spec.shape) input_channels, height, width = self._processed_input_tensor_spec.shape self._img_encoding_net = ImageEncodingNetwork( input_channels, (height, width), conv_layer_params, activation=activation, kernel_initializer=kernel_initializer, flatten_output=True) input_size = self._img_encoding_net.output_spec.shape[0] else: assert self._processed_input_tensor_spec.ndim == 1, \ "The input shape {} should be like (N,)!".format( self._processed_input_tensor_spec.shape) input_size = self._processed_input_tensor_spec.shape[0] self._fc_layers = nn.ModuleList() if fc_layer_params is None: fc_layer_params = [] else: assert isinstance(fc_layer_params, tuple) fc_layer_params = list(fc_layer_params) for size in fc_layer_params: self._fc_layers.append( layers.FC(input_size, size, activation=activation, use_bn=use_fc_bn, kernel_initializer=kernel_initializer)) input_size = size if last_layer_size is not None or last_activation is not None: assert last_layer_size is not None and last_activation is not None, \ "Both last_layer_size and last_activation need to be specified!" if last_kernel_initializer is None: common.warning_once( "last_kernel_initializer is not specified " "for the last layer of size {}.".format(last_layer_size)) last_kernel_initializer = kernel_initializer self._fc_layers.append( layers.FC(input_size, last_layer_size, activation=last_activation, use_bn=last_use_fc_bn, kernel_initializer=last_kernel_initializer)) input_size = last_layer_size self._output_spec = TensorSpec( (input_size, ), dtype=self._processed_input_tensor_spec.dtype)
def capture_frame(self, time_step=None, policy_step=None, is_last_step=False, info_func=None): """Render ``self.env`` and add the resulting frame to the video. Also plot information extracted from time step and policy step depending on the rendering mode. When future_steps >0, the related information (e.g. observation, reward, action etc.) will be cached in a recorder buffer and the encoding of them to video frames is deferred to the time when ``future_steps`` of future frames are available. Args: time_step (None|TimeStep): not used when future_steps <= 0. When future_steps > 0, time_step must not be None. policy_step (None|PolicyStep): policy step providing several information for displaying: - info: if not None, it wil be displayed in the frame - action: it will be displayed when future_steps > 0 is_last_step (bool): whether the current time step is the last step of the episode, either due to game over or time limits. It is used in the defer mode to properly handle the last few frames before the episode end by encoding all the frames left in the buffer. info_func (None|callable): a callable for calculating some customized information (e.g. predicted future reward) to be plotted based on the observation at each time step and action sequences from the current time step to the next ``future_steps`` steps (if available). It is called as ``pred_info=info_func(current_observation, action_sequences)``. Currently only support displaying scalar predictive information returned from info_func. """ if not self.functional: return logger.debug('Capturing video frame: path=%s', self.path) if self._future_steps > 0: defer_mode = True assert time_step is not None and policy_step is not None, ( "need to provide both time_step and policy_step " "when future_steps > 0") else: defer_mode = False pred_info = None if policy_step is None else policy_step.info if pred_info is not None: assert not self.ansi_mode, "Only supports rgb_array mode!" render_mode = 'rgb_array' else: render_mode = 'ansi' if self.ansi_mode else 'rgb_array' frame = self.env.render(mode=render_mode) if frame is None: if self._async: return else: # Indicates a bug in the environment: don't want to raise # an error here. logger.warn( 'Env returned None on render(). Disabling further ' 'rendering for video recorder by marking as disabled: ' 'path=%s metadata_path=%s', self.path, self.metadata_path) self.broken = True else: if self._render_pred_info: if pred_info is not None: if plt is not None: frame = self._plot_pred_info(frame, pred_info) else: common.warning_once( "matplotlib is not installed; prediction info will not " "be plotted when rendering videos.") else: common.warning_once( "You have choosen to render prediction info, but no " " prediction info is provided. Skipping this.") self._last_frame = frame if defer_mode: self._recorder_buffer.append_fields(self._fields, [ frame, time_step.observation, time_step.reward, policy_step.output ]) self._encode_with_future_info(info_func=info_func, encode_all=is_last_step) else: self._encode_frame(frame) if self._append_blank_frames > 0 and is_last_step: if self._blank_frame is None: self._blank_frame = np.zeros_like(self._last_frame) for _ in range(self._append_blank_frames): self._encode_frame(self._blank_frame) assert not self.broken, ( "The output file is broken! Check warning messages.")