Ejemplo n.º 1
0
    def replace(self, episodes, length, rows=None):
        """Replace full episodes.

    Args:
      episodes: Tuple of transition quantities with batch and time dimensions.
      length: Batch of sequence lengths.
      rows: Episodes to replace, defaults to all.

    Returns:
      Operation.
    """
        rows = tf.range(self._capacity) if rows is None else rows
        assert rows.shape.ndims == 1
        assert_capacity = tf.assert_less(rows,
                                         self._capacity,
                                         message='capacity exceeded')
        with tf.control_dependencies([assert_capacity]):
            assert_max_length = tf.assert_less_equal(
                length, self._max_length, message='max length exceeded')
        replace_ops = []
        with tf.control_dependencies([assert_max_length]):
            for buffer_, elements in zip(self._buffers, episodes):
                replace_op = tf.scatter_update(buffer_, rows, elements)
                replace_ops.append(replace_op)
        with tf.control_dependencies(replace_ops):
            return tf.scatter_update(self._length, rows, length)
Ejemplo n.º 2
0
    def constraint_def(self):
        """
        Constraint on embedding vector, such as unit norm, non negative
        """
        # Note that variable has raw shape, not including batchsize, constraint on axis 1 is each emb.
        # Also note that, for efficiency, only compute norm and updates embedding of ent/rel modified by gradient update
        if self.config.constraint == 'nonneg':
            def constraint_ent(x):
                return tf.keras.constraints.non_neg()(x)
            def constraint_rel(x):
                return tf.keras.constraints.non_neg()(x)
        elif self.config.constraint == 'unitnorm':
            def constraint_ent(x):
                return tf.keras.constraints.unit_norm(axis=self.config.constrain_axis_ent)(x)
            def constraint_rel(x):
                return tf.keras.constraints.unit_norm(axis=self.config.constrain_axis_rel)(x)
        elif self.config.constraint == 'unitnorm_nonneg':
            def constraint_ent(x):
                return tf.keras.constraints.unit_norm(axis=self.config.constrain_axis_ent)(tf.keras.constraints.non_neg()(x))
            def constraint_rel(x):
                return tf.keras.constraints.unit_norm(axis=self.config.constrain_axis_rel)(tf.keras.constraints.non_neg()(x))
        elif self.config.constraint == 'maxnorm':
            def constraint_ent(x):
                return tf.keras.constraints.max_norm(max_value=2, axis=self.config.constrain_axis_ent)(x)
            def constraint_rel(x):
                return tf.keras.constraints.max_norm(max_value=2, axis=self.config.constrain_axis_rel)(x)
        elif self.config.constraint == 'maxnorm_nonneg':
            def constraint_ent(x):
                return tf.keras.constraints.max_norm(max_value=2, axis=self.config.constrain_axis_ent)(tf.keras.constraints.non_neg()(x))
            def constraint_rel(x):
                return tf.keras.constraints.max_norm(max_value=2, axis=self.config.constrain_axis_rel)(tf.keras.constraints.non_neg()(x))
        else:
            return

        # assign and scatter_update seem to have no gradient, stop_gradient just to make sure
        self.constraint_all_embs_ops = []  # update all embs
        if 'ent' in self.config.to_constrain:
            self.constraint_all_embs_ops.append(tf.stop_gradient(
                tf.assign(self.ent_embs, constraint_ent(self.ent_embs))))
        if 'rel' in self.config.to_constrain:
            self.constraint_all_embs_ops.append(tf.stop_gradient(
                tf.assign(self.rel_embs, constraint_rel(self.rel_embs))))

        self.constraint_scatter_embs_ops = []  # only update active embs
        if 'ent' in self.config.to_constrain:
            self.constraint_scatter_embs_ops.append(tf.stop_gradient(
                tf.scatter_update(self.ent_embs, self.h, constraint_ent(self.hem))))
            self.constraint_scatter_embs_ops.append(tf.stop_gradient(
                tf.scatter_update(self.ent_embs, self.t, constraint_ent(self.tem))))
        if 'rel' in self.config.to_constrain:
            self.constraint_scatter_embs_ops.append(tf.stop_gradient(
                tf.scatter_update(self.rel_embs, self.r, constraint_rel(self.rem))))
 def _reset_non_empty(self, indices):
   # pylint: disable=protected-access
   new_values = self._batch_env._reset_non_empty(indices)
   # pylint: enable=protected-access
   assign_op = tf.scatter_update(self._observ, indices, new_values)
   with tf.control_dependencies([assign_op]):
     return tf.identity(new_values)
Ejemplo n.º 4
0
def I_sgaba(G, V):
    G4 = tf.pow(G, 4) / (tf.pow(G, 4) + K_sgaba)
    G_ = tf.Variable([0.0] * n_n**2, dtype=tf.float64)
    ind = tf.boolean_mask(tf.range(n_n**2), sgaba_mat.reshape(-1) == 1)
    G_ = tf.scatter_update(G_, ind, G4)
    G_ = tf.transpose(tf.reshape(G_, (n_n, n_n)))
    return tf.reduce_sum(tf.transpose((G_ * (V - E_sgaba)) * G_sgaba), 1)
Ejemplo n.º 5
0
def add_val_to_col(var, col, val):
    vector_with_zeros = tf.Variable(tf.zeros(var.get_shape()[1]),
                                    dtype=tf.float32)
    vector_with_zeros = tf.scatter_update(vector_with_zeros,[col],[val])
    vector_with_zeros = tf.reshape(vector_with_zeros,
                                   [1,var.get_shape().as_list()[1]])
    return var+vector_with_zeros
Ejemplo n.º 6
0
 def update_task_vector(self):
     task_vector_update = tf.assign(self.task_vector,
                                    np.zeros(self.get_shape()[1]))
     task_vector_update = tf.scatter_update(task_vector_update,
                                            [self._task_index],
                                            [self._active])
     deeplift.util.get_session().run(task_vector_update)
Ejemplo n.º 7
0
    def perform(self, agent_indices, observ):
        """Compute batch of actions and a summary for a batch of observation.

    Args:
      agent_indices: Tensor containing current batch indices.
      observ: Tensor of a batch of observations for all algorithms.

    Returns:
      Tuple of action batch tensor and summary tensor.
    """
        with tf.name_scope('perform/'):
            observ = self._observ_filter.transform(observ)
            if self._last_state is None:
                state = None
            else:
                state = tf.contrib.framework.nest.map_structure(
                    lambda x: tf.gather(x, agent_indices), self._last_state)
            output = self._network(observ[:, None], tf.ones(observ.shape[0]),
                                   state)
            action = tf.cond(self._is_training, output.policy.sample,
                             lambda: output.mean)
            logprob = output.policy.log_prob(action)[:, 0]
            # pylint: disable=g-long-lambda
            summary = tf.cond(
                self._should_log, lambda: tf.summary.merge([
                    tf.summary.histogram('mean', output.mean[:, 0]),
                    tf.summary.histogram('std', tf.exp(output.logstd[:, 0])),
                    tf.summary.histogram('action', action[:, 0]),
                    tf.summary.histogram('logprob', logprob)
                ]), str)
            # Remember current policy to append to memory in the experience callback.
            if self._last_state is None:
                assign_state = tf.no_op()
            else:
                assign_state = utility.assign_nested_vars(
                    self._last_state, output.state, agent_indices)
            with tf.control_dependencies([
                    assign_state,
                    tf.scatter_update(self._last_action, agent_indices,
                                      action[:, 0]),
                    tf.scatter_update(self._last_mean, agent_indices,
                                      output.mean[:, 0]),
                    tf.scatter_update(self._last_logstd, agent_indices,
                                      output.logstd[:, 0])
            ]):
                return tf.check_numerics(action[:, 0],
                                         'action'), tf.identity(summary)
Ejemplo n.º 8
0
 def add_transitions(self, transitions):
     assert isinstance(transitions, Transition)
     batch_size = transitions.s1.shape[0]
     effective_batch_size = tf.minimum(batch_size,
                                       self._size - self._current_idx)
     indices = self._current_idx + tf.range(effective_batch_size)
     for key in transitions._asdict().keys():
         data = getattr(self._data, key)
         batch = getattr(transitions, key)
         tf.scatter_update(data, indices, batch[:effective_batch_size])
     # Update size and index.
     if tf.less(self._current_size, self._size):
         self._current_size.assign_add(effective_batch_size)
     self._current_idx.assign_add(effective_batch_size)
     if self._circular:
         if tf.greater_equal(self._current_idx, self._size):
             self._current_idx.assign(0)
Ejemplo n.º 9
0
    def _update_history(self, hist_idx, hist_size, zs, Hzs, s_hists, y_hists):
        ops = []

        for z, Hz, s_hist, y_hist in zip(zs, Hzs, s_hists, y_hists):
            # Use the Hessian or the Gauss-Newton approximation instead of a simple
            # difference of gradients.
            ops.append(tf.scatter_update(s_hist, hist_idx, z))
            ops.append(tf.scatter_update(y_hist, hist_idx, Hz))

        hist_idx_new = tf.mod(hist_idx + 1, self._conf['hist'])
        hist_size_new = tf.minimum(hist_size + 1, self._conf['hist'])

        with tf.control_dependencies(ops):
            ops.append(tf.assign(hist_idx, hist_idx_new))
            ops.append(tf.assign(hist_size, hist_size_new))

        return tf.group(ops)
Ejemplo n.º 10
0
    def _compute_y(self, hist_idx, grads, grads_prev, s_hists, y_hists):
        ops = []

        for s_hist, y_hist, g, g_prev in zip(s_hists, y_hists, grads,
                                             grads_prev):
            # Compute the difference of gradients and add the regularization term.
            y_new = g - g_prev + self._conf['reg'] * s_hist[hist_idx]
            ops.append(tf.scatter_update(y_hist, hist_idx, y_new))

        return tf.group(ops)
Ejemplo n.º 11
0
    def _define_begin_episode(agent_indices):
        """Reset environments, intermediate scores and durations for new episodes.

    Args:
      agent_indices: Tensor containing batch indices starting an episode.

    Returns:
      Summary tensor.
    """
        assert agent_indices.shape.ndims == 1
        zero_scores = tf.zeros_like(agent_indices, tf.float32)
        zero_durations = tf.zeros_like(agent_indices)
        reset_ops = [
            batch_env.reset(agent_indices),
            tf.scatter_update(score, agent_indices, zero_scores),
            tf.scatter_update(length, agent_indices, zero_durations)
        ]
        with tf.control_dependencies(reset_ops):
            return algo.begin_episode(agent_indices)
Ejemplo n.º 12
0
    def reset(self, entries_to_reset):
        """Reset the entries in the memory.

    Args:
      entries_to_reset: a 1D tensor.
    Returns:
      the reset op.
    """
        num_updates = tf.size(entries_to_reset)
        update_vals = tf.scatter_update(
            self.mem_vals, entries_to_reset,
            tf.tile(
                tf.expand_dims(tf.fill([self.memory_size, self.val_depth], .0),
                               0), [num_updates, 1, 1]))
        update_logits = tf.scatter_update(
            self.mean_logits, entries_to_reset,
            tf.tile(tf.expand_dims(tf.fill([self.memory_size], .0), 0),
                    [num_updates, 1]))
        reset_op = tf.group([update_vals, update_logits])
        return reset_op
Ejemplo n.º 13
0
  def reset(self, indices=None):
    """Reset the batch of environments.

    Args:
      indices: The batch indices of the environments to reset; defaults to all.

    Returns:
      Batch tensor of the new observations.
    """
    if indices is None:
      indices = tf.range(len(self._batch_env))
    observ_dtype = self._parse_dtype(self._batch_env.observation_space)
    observ = tf.py_func(self._batch_env.reset, [indices], observ_dtype, name='reset')
    observ = tf.check_numerics(observ, 'observ')
    reward = tf.zeros_like(indices, tf.float32)
    done = tf.zeros_like(indices, tf.bool)
    with tf.control_dependencies([
        tf.scatter_update(self._observ, indices, observ),
        tf.scatter_update(self._reward, indices, reward),
        tf.scatter_update(self._done, indices, done)
    ]):
      return tf.identity(observ)
Ejemplo n.º 14
0
    def _build_model(self):
        self.graph_built = True
        tf.set_random_seed(self.seed)
        self.user_indices = tf.placeholder(tf.int32, shape=[None])
        self.item_indices = tf.placeholder(tf.int32, shape=[None])
        self.user_interacted_seq = tf.placeholder(
            tf.int32, shape=[None, self.interaction_num])
        self.user_interacted_len = tf.placeholder(tf.float32, shape=[None])
        self.labels = tf.placeholder(tf.float32, shape=[None])
        self.is_training = tf.placeholder_with_default(False, shape=[])
        self.concat_embed = []

        user_features = tf.get_variable(
            name="user_features",
            shape=[self.n_users + 1, self.embed_size],
            initializer=tf_truncated_normal(0.0, 0.01),
            regularizer=self.reg)
        item_features = tf.get_variable(
            name="item_features",
            shape=[self.n_items + 1, self.embed_size],
            initializer=tf_truncated_normal(0.0, 0.01),
            regularizer=self.reg)
        user_embed = tf.nn.embedding_lookup(user_features, self.user_indices)
        item_embed = tf.nn.embedding_lookup(item_features, self.item_indices)

        # unknown items are padded to 0-vector
        zero_padding_op = tf.scatter_update(
            item_features, self.n_items,
            tf.zeros([self.embed_size], dtype=tf.float32))
        with tf.control_dependencies([zero_padding_op]):
            multi_item_embed = tf.nn.embedding_lookup(
                item_features, self.user_interacted_seq)  # B * seq * K
        pooled_embed = tf.div_no_nan(
            tf.reduce_sum(multi_item_embed, axis=1),
            tf.expand_dims(tf.sqrt(self.user_interacted_len), axis=1))
        self.concat_embed.extend([user_embed, item_embed, pooled_embed])

        if self.sparse:
            self._build_sparse()
        if self.dense:
            self._build_dense()

        concat_embed = tf.concat(self.concat_embed, axis=1)
        mlp_layer = dense_nn(concat_embed,
                             self.hidden_units,
                             use_bn=self.use_bn,
                             dropout_rate=self.dropout_rate,
                             is_training=self.is_training)
        self.output = tf.reshape(tf.layers.dense(inputs=mlp_layer, units=1),
                                 [-1])
        count_params()
Ejemplo n.º 15
0
def create_binary_mask_from_scores(score_tensor, f=None, n_zeros=None):
    """Given an arbitrary tensor and a fraction returns the binary(0-1) tensor.

  Given a numerical tensor with any shape and N elements it returns a 0-1 tensor
  with same shape where a fraction `f` of the smallest values are set to zero.
  The indices which are set to 0 are selected according to the values provided
  in the `score_tensor`. We select smallest M=floor(N*f) indices.

  One should use either `f` or `n_zeros`; not together.
  Args:
    score_tensor: an arbitrary numerical tensor.
    f: fraction of zeros to be set: a number 0<f<1.
    n_zeros: int, number of zeros to be set: n_zeros<n_elements.
  Returns:
    a binary Tensor with a same shape as the `score_tensor` and same-type.
    It should have n_zero = floor(n_elements*f) many zeros.
  """
    # Assert only one of the name arguments is in use.
    assert (f is None) ^ (n_zeros is None)
    n_elements = tf.size(score_tensor).numpy()
    if f is not None:
        assert f > 0 and f < 1
        n_ones = n_elements - int(math.floor(n_elements * f))
    else:
        assert isinstance(n_zeros, int)
        n_ones = n_elements - n_zeros
    flat_score_tensor = (tf.reshape(score_tensor, [-1])
                         if len(score_tensor.shape) > 1 else score_tensor)
    mask = tf.Variable(tf.zeros_like(flat_score_tensor))
    _, indices = tf.nn.top_k(flat_score_tensor, n_ones)
    tf.scatter_update(mask, indices, 1)
    res = mask.read_value()
    # Reshaping back to the original shape.
    if len(score_tensor.shape) > 1:
        res = tf.reshape(res, score_tensor.shape)
    return res
  def _reset_non_empty(self, indices):
    # pylint: disable=protected-access
    new_values = self._batch_env._reset_non_empty(indices)
    # pylint: enable=protected-access
    initial_frames = getattr(self._batch_env, "history_observations", None)

    num_dimensions_in_env_observation = len(self.old_shape)

    if initial_frames is None:
      inx = [1, self.history] + ([1] * num_dimensions_in_env_observation)
      initial_frames = tf.tile(tf.expand_dims(new_values, axis=1), inx)
    with tf.control_dependencies([new_values]):
      assign_op = tf.scatter_update(self._observ, indices, initial_frames)
    with tf.control_dependencies([assign_op]):
      return tf.gather(self.observ, indices)
Ejemplo n.º 17
0
    def clear(self, rows=None):
        """Reset episodes in the memory.

    Internally, this only sets their lengths to zero. The memory entries will
    be overridden by future calls to append() or replace().

    Args:
      rows: Episodes to clear, defaults to all.

    Returns:
      Operation.
    """
        rows = tf.range(self._capacity) if rows is None else rows
        assert rows.shape.ndims == 1
        return tf.scatter_update(self._length, rows, tf.zeros_like(rows))
Ejemplo n.º 18
0
  def _reset_non_empty(self, indices):
    """Reset the batch of environments.

    Args:
      indices: The batch indices of the environments to reset; defaults to all.

    Returns:
      Batch tensor of the new observations.
    """
    observ = tf.py_func(
        self._batch_env.reset, [indices], self.observ_dtype, name="reset")
    observ.set_shape(indices.get_shape().concatenate(self.observ_shape))
    with tf.control_dependencies([
        tf.scatter_update(self._observ, indices, observ)]):
      return tf.identity(observ)
Ejemplo n.º 19
0
def reinit_nested_vars(variables, indices=None):
    """Reset all variables in a nested tuple to zeros.

  Args:
    variables: Nested tuple or list of variaables.
    indices: Indices along the first dimension to reset, defaults to all.

  Returns:
    Operation.
  """
    if isinstance(variables, (tuple, list)):
        return tf.group(
            *[reinit_nested_vars(variable, indices) for variable in variables])
    if indices is None:
        return variables.assign(tf.zeros_like(variables))
    else:
        zeros = tf.zeros([tf.shape(indices)[0]] +
                         variables.shape[1:].as_list())
        return tf.scatter_update(variables, indices, zeros)
Ejemplo n.º 20
0
    def _curvature_range(self):
        """Curvature range.

    Returns:
      h_max_t, h_min_t ops
    """
        self._curv_win = tf.get_variable("curv_win",
                                         dtype=tf.float32,
                                         trainable=False,
                                         shape=[
                                             self.curvature_window_width,
                                         ],
                                         initializer=tf.zeros_initializer)
        # We use log smoothing for curvature range
        self._curv_win = tf.scatter_update(
            self._curv_win, self._step % self.curvature_window_width,
            tf.log(self._grad_norm_squared))
        # Note here the iterations start from iteration 0
        valid_window = tf.slice(
            self._curv_win, tf.constant([
                0,
            ]),
            tf.expand_dims(tf.minimum(tf.constant(self.curvature_window_width),
                                      self._step + 1),
                           dim=0))
        self._h_min_t = tf.reduce_min(valid_window)
        self._h_max_t = tf.reduce_max(valid_window)

        curv_range_ops = []
        with tf.control_dependencies([self._h_min_t, self._h_max_t]):
            avg_op = self._moving_averager.apply(
                [self._h_min_t, self._h_max_t])
            with tf.control_dependencies([avg_op]):
                self._h_min = tf.exp(
                    tf.identity(self._moving_averager.average(self._h_min_t)))
                self._h_max = tf.exp(
                    tf.identity(self._moving_averager.average(self._h_max_t)))
                if self._sparsity_debias:
                    self._h_min *= self._sparsity_avg
                    self._h_max *= self._sparsity_avg
        curv_range_ops.append(avg_op)
        return curv_range_ops  # h_max_t, h_min_t
Ejemplo n.º 21
0
    def _apply_sparse_shared(self, grad_values, grad_indices, var):
        shape = np.array(var.get_shape())
        var_rank = len(shape)
        # For sparse case, we only update the accumulator representing the sparse
        # dimension. In this case SM3 is similar to isotropic adagrad but with
        # better bound (due to the max operator).
        #
        # We do not use the column accumulator because it will updated for
        # every gradient step and will significantly overestimate the gradient
        # square. While, the row accumulator can take advantage of the sparsity
        # in the gradients. Even if one implements the column accumulator - it
        # will result in a no-op because the row accumulators will have lower
        # values.
        #
        # Note that: We do not run this code paths for our experiments in our paper
        # as on TPU all the sparse gradients are densified.
        if var_rank > 1:
            accumulator_var = self.get_slot(var, "accumulator_" + str(0))
            accumulator = tf.gather(accumulator_var, grad_indices)
            shape_for_broadcasting = tf.concat(
                [[tf.shape(accumulator)[0]], [1] * (var_rank - 1)], 0)
            accumulator = tf.reshape(accumulator, shape_for_broadcasting)
            accumulator += grad_values * grad_values
        else:
            accumulator_var = self.get_slot(var, "accumulator")
            accumulator = tf.scatter_add(accumulator_var, grad_indices,
                                         grad_values * grad_values)

        accumulator_inv_sqrt = tf.rsqrt(accumulator + 1e-30)
        scaled_g = (grad_values * accumulator_inv_sqrt)
        updates = []
        with tf.control_dependencies([scaled_g]):
            if var_rank > 1:
                axes = list(range(1, var_rank))
                new_accumulator = tf.reduce_max(accumulator, axis=axes)
                updates = [
                    tf.scatter_update(accumulator_var, grad_indices,
                                      new_accumulator)
                ]
        with tf.control_dependencies(updates):
            return tf.scatter_sub(var, grad_indices,
                                  self._learning_rate_tensor * scaled_g)
Ejemplo n.º 22
0
    def _sparse_moving_average(self, x_tm1, idxs, a_t_, name, beta=.9):
        """ """

        b_tm1 = self.get_accumulator(x_tm1, '%s' % name)
        b_tm1_ = tf.gather(b_tm1, idxs)
        shape = self.get_variable_shape(x_tm1)
        tm1 = self.get_accumulator(x_tm1,
                                   '%s/tm1' % name,
                                   shape=[shape[0]] + [1] * (len(shape) - 1))
        tm1_ = tf.gather(tm1, idxs)
        t = tf.scatter_add(tm1, idxs, tf.ones_like(tm1_))
        t_ = tf.gather(t, idxs)
        if beta < 1:
            beta_t = tf.convert_to_tensor(beta, name='%s/decay' % name)
            beta_t_ = beta_t * (1 - beta_t**tm1_) / (1 - beta_t**t_)
        else:
            beta_t_ = tm1_ / t_
        b_t = tf.scatter_update(b_tm1, idxs, beta_t_ * b_tm1_)
        b_t = tf.scatter_add(b_t, idxs, (1 - beta_t_) * a_t_)
        return b_t, t
Ejemplo n.º 23
0
def assign_nested_vars(variables, tensors, indices=None):
    """Assign tensors to matching nested tuple of variables.

  Args:
    variables: Nested tuple or list of variables to update.
    tensors: Nested tuple or list of tensors to assign.
    indices: Batch indices to assign to; default to all.

  Returns:
    Operation.
  """
    if isinstance(variables, (tuple, list)):
        return tf.group(*[
            assign_nested_vars(variable, tensor)
            for variable, tensor in zip(variables, tensors)
        ])
    if indices is None:
        return variables.assign(tensors)
    else:
        return tf.scatter_update(variables, indices, tensors)
Ejemplo n.º 24
0
    def _update_local_control_variate(self,
                                      control_variate,
                                      idx,
                                      elbo_hmc,
                                      assign=True,
                                      decay=0.9):
        """
        Moving average update of control variate if using a control variate for each point in training set.
        Assumes that for first few (self.control_var_independent_iters) iterations, use global control variate,
        and then remaining iterations use local one. Note that sometimes we want to update the control variate,
        sometimes we don't. This is controlled by the `assign` argument.
        """
        # Find update for relevant elements in control_variate vector
        # and calculate their updated values
        control_variate_local_update = decay * tf.gather(
            control_variate, idx) + (1. - decay) * elbo_hmc

        # Find update if using using global control variate at current iteration
        control_variate_global_update = decay * control_variate[0] + (
            1. - decay) * tf.reduce_mean(elbo_hmc)
        if assign:
            # control_variate is a variable
            control_variate_local = tf.scatter_update(
                control_variate, idx, control_variate_local_update)
        else:
            # control_variate is a tensor
            control_variate_local = tf.tensor_scatter_nd_update(
                control_variate, tf.expand_dims(idx, 1),
                control_variate_local_update)

        # Tile scalar control variate to match shape of local control variate
        control_variate_global = tf.fill(control_variate.get_shape(),
                                         control_variate_global_update)
        new_cv_value = tf.where(
            self.global_step > self.control_var_independent_iters,
            control_variate_local, control_variate_global)
        if assign:
            cv_update = control_variate.assign(new_cv_value)
            return cv_update
        else:
            return new_cv_value
Ejemplo n.º 25
0
 def dyn_setup(nodes):
     if not nodes.net.fixed:
         nodes.net.asg["NL_F"] = (nodes.net.f_node.assign(
             nodes.Force_End), )
         nodes.net.assignments_forces += nodes.net.asg["NL_F"]
         nodes.net.f_node_max = tf.reduce_max(tf.abs(nodes.net.f_node))
         # *tf.to_float(not nodes.net.fixed) +1e-4
         nodes.dp = nodes.net.f_mild(nodes.net.f_node * nodes.net.dt)
     else:
         nodes.net.f_node_max = 1e-4
     nodes.net.asg["N"] = ((nodes.points.assign_add(nodes.dp), )
                           if not nodes.net.fixed else tuple())
     l = []  # pairs of node, link indices
     for i in range(len(nodes.net.pts)):
         ni = nodes.link_ends[i]
         l += zip([i] * len(ni), ni)
     l = array(l)
     n2 = tf.gather(nodes.points, l[:, 0])
     nodes.net.asg["NL"] = (tf.scatter_update(nodes.net.links.points,
                                              l[:, 1], n2), )
     nodes.net.assignments_points += nodes.net.asg["N"] + nodes.net.asg["NL"]
Ejemplo n.º 26
0
 def reset(self, indices):
     initial_frames = tf.gather(self._initial_frames, indices)
     scatter_op = tf.scatter_update(self._history_buff, indices,
                                    initial_frames)
     with tf.control_dependencies([scatter_op]):
         return self._history_buff.read_value()
Ejemplo n.º 27
0
        def step(index, scores_sum, scores_num):
            """Single step."""
            index %= epoch_length  # Only needed in eval runs.
            # Note - the only way to ensure making a copy of tensor is to run simple
            # operation. We are waiting for tf.copy:
            # https://github.com/tensorflow/tensorflow/issues/11186
            obs_copy = batch_env.observ + 0
            value_fun_shape = (num_agents, )
            if distributional_size > 1:
                value_fun_shape = (num_agents, distributional_size)

            def env_step(arg1, arg2, arg3):  # pylint: disable=unused-argument
                """Step of the environment."""

                (logits, value_function) = get_policy(obs_copy, ppo_hparams,
                                                      batch_env.action_space,
                                                      distributional_size)
                action = common_layers.sample_with_temperature(
                    logits, sampling_temp)
                action = tf.cast(action, tf.int32)
                action = tf.reshape(action, shape=(num_agents, ))

                reward, done = batch_env.simulate(action)

                pdf = tfp.distributions.Categorical(logits=logits).prob(action)
                pdf = tf.reshape(pdf, shape=(num_agents, ))
                value_function = tf.reshape(value_function,
                                            shape=value_fun_shape)
                done = tf.reshape(done, shape=(num_agents, ))

                with tf.control_dependencies([reward, done]):
                    return tf.identity(pdf), tf.identity(value_function), \
                           tf.identity(done)

            # TODO(piotrmilos): while_body is executed at most once,
            # thus should be replaced with tf.cond
            pdf, value_function, top_level_done = tf.while_loop(
                lambda _1, _2, _3: tf.equal(speculum.size(), 0),
                env_step,
                [
                    tf.constant(0.0, shape=(num_agents, )),
                    tf.constant(0.0, shape=value_fun_shape),
                    tf.constant(False, shape=(num_agents, ))
                ],
                parallel_iterations=1,
                back_prop=False,
            )

            with tf.control_dependencies([pdf, value_function]):
                obs, reward, done, action = speculum.dequeue()
                to_save = [obs, reward, done, action, pdf, value_function]
                save_ops = [
                    tf.scatter_update(memory_slot, index, value)
                    for memory_slot, value in zip(memory, to_save)
                ]
                cumulate_rewards_op = cumulative_rewards.assign_add(reward)

                agent_indices_to_reset = tf.where(top_level_done)[:, 0]
            with tf.control_dependencies([cumulate_rewards_op]):
                # TODO(piotrmilos): possibly we need cumulative_rewards.read_value()
                scores_sum_delta = tf.reduce_sum(
                    tf.gather(cumulative_rewards.read_value(),
                              agent_indices_to_reset))
                scores_num_delta = tf.count_nonzero(done, dtype=tf.int32)
            with tf.control_dependencies(save_ops +
                                         [scores_sum_delta, scores_num_delta]):
                reset_env_op = batch_env.reset(agent_indices_to_reset)
                reset_cumulative_rewards_op = tf.scatter_update(
                    cumulative_rewards, agent_indices_to_reset,
                    tf.gather(zeros_tensor, agent_indices_to_reset))
            with tf.control_dependencies(
                [reset_env_op, reset_cumulative_rewards_op]):
                return [
                    index + 1, scores_sum + scores_sum_delta,
                    scores_num + scores_num_delta
                ]
Ejemplo n.º 28
0
    def _compute_step(self, hist_idx, hist_size, grads, s_hists, y_hists):
        ops = []

        # Create tensors to compute dot products from the histories of s and y.
        sTy_hist = []
        yTy_hist = []
        # We construct the list so that sTy_history[-i] will compute the dot product
        # (s_{k-i}, y_{k-i}) etc.
        for i in reversed(range(1, self._conf['hist'] + 1)):
            sTy = tf.zeros([])
            yTy = tf.zeros([])
            idx = tf.mod(hist_idx - i, self._conf['hist'])
            for s_hist, y_hist in zip(s_hists, y_hists):
                sTy += tf.reduce_sum(s_hist[idx] * y_hist[idx])
                yTy += tf.reduce_sum(y_hist[idx] * y_hist[idx])
            sTy_hist.append(sTy)
            yTy_hist.append(yTy)

        # Start with the negative gradient.
        ps = []
        for g in grads:
            ps.append(-g)

        # First stage of the update (alg. 3, step 2 in the paper)
        alphas = []

        # Create a TensorFlow group that bundles all updates from an iteration
        # within the first stage.
        for i in range(1, self._conf['hist'] + 1):
            idx = tf.mod(hist_idx - i, self._conf['hist'])
            # Compute coefficient alpha (alg. 3, step 2a).
            sTp = tf.zeros(shape=[])
            for p, s_hist in zip(ps, s_hists):
                sTp += tf.reduce_sum(s_hist[idx] * p)
            alpha = tf.cond(i <= hist_size, lambda: sTp / sTy_hist[-i],
                            lambda: tf.zeros(shape=[]))
            alphas.append(alpha)
            # Update direction (alg. 3, step 2b).
            for j, y_hist in enumerate(y_hists):
                ps[j] -= alpha * y_hist[idx]

        # Second stage of the update (alg. 3, step 3 and eq. 14)
        coeff = tf.zeros(shape=[])
        for i in range(1, self._conf['hist'] + 1):
            coeff = tf.cond(i <= hist_size,
                            lambda: coeff + sTy_hist[-i] / yTy_hist[-i],
                            lambda: coeff)
        coeff = tf.cond(tf.equal(hist_size, 0), lambda: self._conf['eps'],
                        lambda: coeff / tf.cast(hist_size, dtype=coeff.dtype))
        #coeff = tf.Print(coeff, [hist_size], message='hist_size = ')
        for j in range(len(ps)):
            ps[j] *= coeff

        # Third stage of the update (alg. 3, step 4)
        for i in reversed(range(1, self._conf['hist'] + 1)):
            idx = tf.mod(hist_idx - i, self._conf['hist'])
            yTp = tf.zeros(shape=[])
            for p, y_hist in zip(ps, y_hists):
                yTp += tf.reduce_sum(y_hist[idx] * p)
            beta = yTp / sTy_hist[-i]
            alpha_minus_beta = tf.cond(i <= hist_size,
                                       lambda: alphas[i - 1] - beta,
                                       lambda: tf.zeros(shape=[]))
            for j, s_hist in enumerate(s_hists):
                ps[j] += alpha_minus_beta * s_hist[idx]

        # Save update.
        for p, s_hist in zip(ps, s_hists):
            s = self._conf['lr'] * p
            ops.append(tf.scatter_update(s_hist, hist_idx, s))

        return tf.group(ops)
Ejemplo n.º 29
0
def setup_dynamic_ops(n_y):
    """Set up ops to move / copy mixture component weights for dynamic expansion.

    Args:
      n_y: int, dimensionality of discrete latent variable y.

    Returns:
      A dict containing all of the ops required for dynamic updating.

    """
    # Set up graph ops to dynamically modify component params.
    graph = tf.get_default_graph()

    # 1) Ops to get and set latent encoder params (entire tensors)
    latent_enc_tensors = {}
    for k in range(n_y):
        latent_enc_tensors['latent_w_' + str(k)] = graph.get_tensor_by_name(
            'latent_encoder/mlp_latent_encoder_{}/w:0'.format(k))
        latent_enc_tensors['latent_b_' + str(k)] = graph.get_tensor_by_name(
            'latent_encoder/mlp_latent_encoder_{}/b:0'.format(k))

    latent_enc_assign_ops = {}
    latent_enc_phs = {}
    for key, tensor in latent_enc_tensors.items():
        latent_enc_phs[key] = tfc.placeholder(tensor.dtype,
                                              tensor.shape,
                                              name='latent_enc_phs')
        latent_enc_assign_ops[key] = tf.assign(tensor, latent_enc_phs[key])

    # 2) Ops to get and set cluster encoder params (columns of a tensor)
    # We will be copying column ind_from to column ind_to.
    cluster_w = graph.get_tensor_by_name(
        'cluster_encoder/mlp_cluster_encoder_final/w:0')
    cluster_b = graph.get_tensor_by_name(
        'cluster_encoder/mlp_cluster_encoder_final/b:0')

    ind_from = tfc.placeholder(dtype=tf.int32, name='ind_from')
    ind_to = tfc.placeholder(dtype=tf.int32, name='inf_to')

    # Determine indices of cluster encoder weights and biases to be updated
    w_indices = tf.transpose(
        tf.stack([
            tf.range(cluster_w.shape[0], dtype=tf.int32),
            ind_to * tf.ones(shape=(cluster_w.shape[0], ), dtype=tf.int32)
        ]))
    b_indices = ind_to
    # Determine updates themselves
    cluster_w_updates = tf.squeeze(
        tf.slice(cluster_w, begin=(0, ind_from), size=(cluster_w.shape[0], 1)))
    cluster_b_updates = cluster_b[ind_from]
    # Create update ops
    cluster_w_update_op = tf.scatter_nd_update(cluster_w, w_indices,
                                               cluster_w_updates)
    cluster_b_update_op = tf.scatter_update(cluster_b, b_indices,
                                            cluster_b_updates)

    # 3) Ops to get and set latent prior params (columns of a tensor)
    # We will be copying column ind_from to column ind_to.
    latent_prior_mu_w = graph.get_tensor_by_name(
        'latent_decoder/latent_prior_mu/w:0')
    latent_prior_sigma_w = graph.get_tensor_by_name(
        'latent_decoder/latent_prior_sigma/w:0')

    mu_indices = tf.transpose(
        tf.stack([
            ind_to *
            tf.ones(shape=(latent_prior_mu_w.shape[1], ), dtype=tf.int32),
            tf.range(latent_prior_mu_w.shape[1], dtype=tf.int32)
        ]))
    mu_updates = tf.squeeze(
        tf.slice(latent_prior_mu_w,
                 begin=(ind_from, 0),
                 size=(1, latent_prior_mu_w.shape[1])))
    mu_update_op = tf.scatter_nd_update(latent_prior_mu_w, mu_indices,
                                        mu_updates)
    sigma_indices = tf.transpose(
        tf.stack([
            ind_to *
            tf.ones(shape=(latent_prior_sigma_w.shape[1], ), dtype=tf.int32),
            tf.range(latent_prior_sigma_w.shape[1], dtype=tf.int32)
        ]))
    sigma_updates = tf.squeeze(
        tf.slice(latent_prior_sigma_w,
                 begin=(ind_from, 0),
                 size=(1, latent_prior_sigma_w.shape[1])))
    sigma_update_op = tf.scatter_nd_update(latent_prior_sigma_w, sigma_indices,
                                           sigma_updates)

    dynamic_ops = {
        'ind_from_ph': ind_from,
        'ind_to_ph': ind_to,
        'latent_enc_tensors': latent_enc_tensors,
        'latent_enc_assign_ops': latent_enc_assign_ops,
        'latent_enc_phs': latent_enc_phs,
        'cluster_w_update_op': cluster_w_update_op,
        'cluster_b_update_op': cluster_b_update_op,
        'mu_update_op': mu_update_op,
        'sigma_update_op': sigma_update_op
    }

    return dynamic_ops
Ejemplo n.º 30
0
    def testLoss(self):
        """
        Tests the loss of the FasterRCNN
        """

        # Create prediction_dict's structure
        prediction_dict_random = {
            'rpn_prediction': {},
            'classification_prediction': {
                'rcnn': {
                    'cls_score': None,
                    'bbox_offsets': None
                },
                'target': {},
                '_debug': {
                    'losses': {}
                }
            }
        }
        prediction_dict_perf = {
            'rpn_prediction': {},
            'classification_prediction': {
                'rcnn': {
                    'cls_score': None,
                    'bbox_offsets': None
                },
                'target': {},
                '_debug': {
                    'losses': {}
                }
            }
        }

        # Set seeds for stable results
        rand_seed = 13
        target_seed = 43
        image_size = (60, 80)
        num_anchors = 1000

        config = EasyDict(self.config)
        config.model.rpn.l2_regularization_scale = 0.0
        config.model.rcnn.l2_regularization_scale = 0.0
        config.model.base_network.arg_scope.weight_decay = 0.0

        #   RPN

        # Random generation of cls_targets for rpn
        # where:
        #       {-1}:   Ignore
        #       { 0}:   Background
        #       { 1}:   Object
        rpn_cls_target = tf.floor(
            tf.random_uniform([num_anchors],
                              minval=-1,
                              maxval=2,
                              dtype=tf.float32,
                              seed=target_seed,
                              name=None))

        # Creation of cls_scores with:
        #   score 100 in correct class
        #   score 0 in wrong class

        # Generation of opposite cls_score for rpn
        rpn_cls_score = tf.cast(
            tf.one_hot(tf.cast(tf.mod(tf.identity(rpn_cls_target) + 1, 2),
                               tf.int32),
                       depth=2,
                       on_value=10), tf.float32)
        # Generation of correct cls_score for rpn
        rpn_cls_perf_score = tf.cast(
            tf.one_hot(tf.cast(tf.identity(rpn_cls_target), tf.int32),
                       depth=2,
                       on_value=100), tf.float32)

        # Random generation of target bbox deltas
        rpn_bbox_target = tf.floor(
            tf.random_uniform([num_anchors, 4],
                              minval=-1,
                              maxval=1,
                              dtype=tf.float32,
                              seed=target_seed,
                              name=None))

        # Random generation of predicted bbox deltas
        rpn_bbox_predictions = tf.floor(
            tf.random_uniform([num_anchors, 4],
                              minval=-1,
                              maxval=1,
                              dtype=tf.float32,
                              seed=rand_seed,
                              name=None))

        prediction_dict_random['rpn_prediction'][
            'rpn_cls_score'] = rpn_cls_score
        prediction_dict_random['rpn_prediction'][
            'rpn_cls_target'] = rpn_cls_target
        prediction_dict_random['rpn_prediction'][
            'rpn_bbox_target'] = rpn_bbox_target
        prediction_dict_random['rpn_prediction'][
            'rpn_bbox_pred'] = rpn_bbox_predictions

        prediction_dict_perf['rpn_prediction'][
            'rpn_cls_score'] = rpn_cls_perf_score
        prediction_dict_perf['rpn_prediction'][
            'rpn_cls_target'] = rpn_cls_target
        prediction_dict_perf['rpn_prediction'][
            'rpn_bbox_target'] = rpn_bbox_target
        prediction_dict_perf['rpn_prediction'][
            'rpn_bbox_pred'] = rpn_bbox_target

        #   RCNN

        # Set the number of classes
        num_classes = config.model.network.num_classes

        # Randomly generate the bbox_offsets for the correct class = 1
        prediction_dict_random['classification_prediction']['target'] = {
            'bbox_offsets':
            tf.random_uniform([1, 4],
                              minval=-1,
                              maxval=1,
                              dtype=tf.float32,
                              seed=target_seed,
                              name=None),
            'cls': [1]
        }

        # Set the same bbox_offsets and cls for the perfect prediction
        prediction_dict_perf['classification_prediction'][
            'target'] = prediction_dict_random['classification_prediction'][
                'target'].copy()

        # Generate random scores for the num_classes + the background class
        rcnn_cls_score = tf.random_uniform([1, num_classes + 1],
                                           minval=-100,
                                           maxval=100,
                                           dtype=tf.float32,
                                           seed=rand_seed,
                                           name=None)

        # Generate a perfect prediction with the correct class score = 100
        # and the rest set to 0
        rcnn_cls_perf_score = tf.cast(
            tf.one_hot([1], depth=num_classes + 1, on_value=100), tf.float32)

        # Generate the random delta prediction for each class
        rcnn_bbox_offsets = tf.random_uniform([1, num_classes * 4],
                                              minval=-1,
                                              maxval=1,
                                              dtype=tf.float32,
                                              seed=rand_seed,
                                              name=None)

        # Copy the random prediction and set the correct class prediction
        # as the target one
        target_bbox_offsets = prediction_dict_random[
            'classification_prediction']['target']['bbox_offsets']
        initial_val = 1 * 4  # cls value * 4
        rcnn_bbox_perf_offsets = tf.Variable(
            tf.reshape(
                tf.random_uniform([1, num_classes * 4],
                                  minval=-1,
                                  maxval=1,
                                  dtype=tf.float32,
                                  seed=target_seed,
                                  name=None), [-1]))
        rcnn_bbox_perf_offsets = tf.reshape(
            tf.scatter_update(rcnn_bbox_perf_offsets,
                              tf.range(initial_val, initial_val + 4),
                              tf.reshape(target_bbox_offsets, [-1])), [1, -1])

        prediction_dict_random['classification_prediction']['rcnn'][
            'cls_score'] = rcnn_cls_score
        prediction_dict_random['classification_prediction']['rcnn'][
            'bbox_offsets'] = rcnn_bbox_offsets

        prediction_dict_perf['classification_prediction']['rcnn'][
            'cls_score'] = rcnn_cls_perf_score
        prediction_dict_perf['classification_prediction']['rcnn'][
            'bbox_offsets'] = rcnn_bbox_perf_offsets

        loss_perfect = self._get_losses(config, prediction_dict_perf,
                                        image_size)
        loss_random = self._get_losses(config, prediction_dict_random,
                                       image_size)

        loss_random_compare = {
            'rcnn_cls_loss': 5,
            'rcnn_reg_loss': 3,
            'rpn_cls_loss': 5,
            'rpn_reg_loss': 3,
            'no_reg_loss': 16,
            'regularization_loss': 0,
            'total_loss': 22,
        }
        for loss in loss_random:
            self.assertGreaterEqual(loss_random[loss],
                                    loss_random_compare[loss], loss)
            self.assertEqual(loss_perfect[loss], 0, loss)