def replace(self, episodes, length, rows=None): """Replace full episodes. Args: episodes: Tuple of transition quantities with batch and time dimensions. length: Batch of sequence lengths. rows: Episodes to replace, defaults to all. Returns: Operation. """ rows = tf.range(self._capacity) if rows is None else rows assert rows.shape.ndims == 1 assert_capacity = tf.assert_less(rows, self._capacity, message='capacity exceeded') with tf.control_dependencies([assert_capacity]): assert_max_length = tf.assert_less_equal( length, self._max_length, message='max length exceeded') replace_ops = [] with tf.control_dependencies([assert_max_length]): for buffer_, elements in zip(self._buffers, episodes): replace_op = tf.scatter_update(buffer_, rows, elements) replace_ops.append(replace_op) with tf.control_dependencies(replace_ops): return tf.scatter_update(self._length, rows, length)
def constraint_def(self): """ Constraint on embedding vector, such as unit norm, non negative """ # Note that variable has raw shape, not including batchsize, constraint on axis 1 is each emb. # Also note that, for efficiency, only compute norm and updates embedding of ent/rel modified by gradient update if self.config.constraint == 'nonneg': def constraint_ent(x): return tf.keras.constraints.non_neg()(x) def constraint_rel(x): return tf.keras.constraints.non_neg()(x) elif self.config.constraint == 'unitnorm': def constraint_ent(x): return tf.keras.constraints.unit_norm(axis=self.config.constrain_axis_ent)(x) def constraint_rel(x): return tf.keras.constraints.unit_norm(axis=self.config.constrain_axis_rel)(x) elif self.config.constraint == 'unitnorm_nonneg': def constraint_ent(x): return tf.keras.constraints.unit_norm(axis=self.config.constrain_axis_ent)(tf.keras.constraints.non_neg()(x)) def constraint_rel(x): return tf.keras.constraints.unit_norm(axis=self.config.constrain_axis_rel)(tf.keras.constraints.non_neg()(x)) elif self.config.constraint == 'maxnorm': def constraint_ent(x): return tf.keras.constraints.max_norm(max_value=2, axis=self.config.constrain_axis_ent)(x) def constraint_rel(x): return tf.keras.constraints.max_norm(max_value=2, axis=self.config.constrain_axis_rel)(x) elif self.config.constraint == 'maxnorm_nonneg': def constraint_ent(x): return tf.keras.constraints.max_norm(max_value=2, axis=self.config.constrain_axis_ent)(tf.keras.constraints.non_neg()(x)) def constraint_rel(x): return tf.keras.constraints.max_norm(max_value=2, axis=self.config.constrain_axis_rel)(tf.keras.constraints.non_neg()(x)) else: return # assign and scatter_update seem to have no gradient, stop_gradient just to make sure self.constraint_all_embs_ops = [] # update all embs if 'ent' in self.config.to_constrain: self.constraint_all_embs_ops.append(tf.stop_gradient( tf.assign(self.ent_embs, constraint_ent(self.ent_embs)))) if 'rel' in self.config.to_constrain: self.constraint_all_embs_ops.append(tf.stop_gradient( tf.assign(self.rel_embs, constraint_rel(self.rel_embs)))) self.constraint_scatter_embs_ops = [] # only update active embs if 'ent' in self.config.to_constrain: self.constraint_scatter_embs_ops.append(tf.stop_gradient( tf.scatter_update(self.ent_embs, self.h, constraint_ent(self.hem)))) self.constraint_scatter_embs_ops.append(tf.stop_gradient( tf.scatter_update(self.ent_embs, self.t, constraint_ent(self.tem)))) if 'rel' in self.config.to_constrain: self.constraint_scatter_embs_ops.append(tf.stop_gradient( tf.scatter_update(self.rel_embs, self.r, constraint_rel(self.rem))))
def _reset_non_empty(self, indices): # pylint: disable=protected-access new_values = self._batch_env._reset_non_empty(indices) # pylint: enable=protected-access assign_op = tf.scatter_update(self._observ, indices, new_values) with tf.control_dependencies([assign_op]): return tf.identity(new_values)
def I_sgaba(G, V): G4 = tf.pow(G, 4) / (tf.pow(G, 4) + K_sgaba) G_ = tf.Variable([0.0] * n_n**2, dtype=tf.float64) ind = tf.boolean_mask(tf.range(n_n**2), sgaba_mat.reshape(-1) == 1) G_ = tf.scatter_update(G_, ind, G4) G_ = tf.transpose(tf.reshape(G_, (n_n, n_n))) return tf.reduce_sum(tf.transpose((G_ * (V - E_sgaba)) * G_sgaba), 1)
def add_val_to_col(var, col, val): vector_with_zeros = tf.Variable(tf.zeros(var.get_shape()[1]), dtype=tf.float32) vector_with_zeros = tf.scatter_update(vector_with_zeros,[col],[val]) vector_with_zeros = tf.reshape(vector_with_zeros, [1,var.get_shape().as_list()[1]]) return var+vector_with_zeros
def update_task_vector(self): task_vector_update = tf.assign(self.task_vector, np.zeros(self.get_shape()[1])) task_vector_update = tf.scatter_update(task_vector_update, [self._task_index], [self._active]) deeplift.util.get_session().run(task_vector_update)
def perform(self, agent_indices, observ): """Compute batch of actions and a summary for a batch of observation. Args: agent_indices: Tensor containing current batch indices. observ: Tensor of a batch of observations for all algorithms. Returns: Tuple of action batch tensor and summary tensor. """ with tf.name_scope('perform/'): observ = self._observ_filter.transform(observ) if self._last_state is None: state = None else: state = tf.contrib.framework.nest.map_structure( lambda x: tf.gather(x, agent_indices), self._last_state) output = self._network(observ[:, None], tf.ones(observ.shape[0]), state) action = tf.cond(self._is_training, output.policy.sample, lambda: output.mean) logprob = output.policy.log_prob(action)[:, 0] # pylint: disable=g-long-lambda summary = tf.cond( self._should_log, lambda: tf.summary.merge([ tf.summary.histogram('mean', output.mean[:, 0]), tf.summary.histogram('std', tf.exp(output.logstd[:, 0])), tf.summary.histogram('action', action[:, 0]), tf.summary.histogram('logprob', logprob) ]), str) # Remember current policy to append to memory in the experience callback. if self._last_state is None: assign_state = tf.no_op() else: assign_state = utility.assign_nested_vars( self._last_state, output.state, agent_indices) with tf.control_dependencies([ assign_state, tf.scatter_update(self._last_action, agent_indices, action[:, 0]), tf.scatter_update(self._last_mean, agent_indices, output.mean[:, 0]), tf.scatter_update(self._last_logstd, agent_indices, output.logstd[:, 0]) ]): return tf.check_numerics(action[:, 0], 'action'), tf.identity(summary)
def add_transitions(self, transitions): assert isinstance(transitions, Transition) batch_size = transitions.s1.shape[0] effective_batch_size = tf.minimum(batch_size, self._size - self._current_idx) indices = self._current_idx + tf.range(effective_batch_size) for key in transitions._asdict().keys(): data = getattr(self._data, key) batch = getattr(transitions, key) tf.scatter_update(data, indices, batch[:effective_batch_size]) # Update size and index. if tf.less(self._current_size, self._size): self._current_size.assign_add(effective_batch_size) self._current_idx.assign_add(effective_batch_size) if self._circular: if tf.greater_equal(self._current_idx, self._size): self._current_idx.assign(0)
def _update_history(self, hist_idx, hist_size, zs, Hzs, s_hists, y_hists): ops = [] for z, Hz, s_hist, y_hist in zip(zs, Hzs, s_hists, y_hists): # Use the Hessian or the Gauss-Newton approximation instead of a simple # difference of gradients. ops.append(tf.scatter_update(s_hist, hist_idx, z)) ops.append(tf.scatter_update(y_hist, hist_idx, Hz)) hist_idx_new = tf.mod(hist_idx + 1, self._conf['hist']) hist_size_new = tf.minimum(hist_size + 1, self._conf['hist']) with tf.control_dependencies(ops): ops.append(tf.assign(hist_idx, hist_idx_new)) ops.append(tf.assign(hist_size, hist_size_new)) return tf.group(ops)
def _compute_y(self, hist_idx, grads, grads_prev, s_hists, y_hists): ops = [] for s_hist, y_hist, g, g_prev in zip(s_hists, y_hists, grads, grads_prev): # Compute the difference of gradients and add the regularization term. y_new = g - g_prev + self._conf['reg'] * s_hist[hist_idx] ops.append(tf.scatter_update(y_hist, hist_idx, y_new)) return tf.group(ops)
def _define_begin_episode(agent_indices): """Reset environments, intermediate scores and durations for new episodes. Args: agent_indices: Tensor containing batch indices starting an episode. Returns: Summary tensor. """ assert agent_indices.shape.ndims == 1 zero_scores = tf.zeros_like(agent_indices, tf.float32) zero_durations = tf.zeros_like(agent_indices) reset_ops = [ batch_env.reset(agent_indices), tf.scatter_update(score, agent_indices, zero_scores), tf.scatter_update(length, agent_indices, zero_durations) ] with tf.control_dependencies(reset_ops): return algo.begin_episode(agent_indices)
def reset(self, entries_to_reset): """Reset the entries in the memory. Args: entries_to_reset: a 1D tensor. Returns: the reset op. """ num_updates = tf.size(entries_to_reset) update_vals = tf.scatter_update( self.mem_vals, entries_to_reset, tf.tile( tf.expand_dims(tf.fill([self.memory_size, self.val_depth], .0), 0), [num_updates, 1, 1])) update_logits = tf.scatter_update( self.mean_logits, entries_to_reset, tf.tile(tf.expand_dims(tf.fill([self.memory_size], .0), 0), [num_updates, 1])) reset_op = tf.group([update_vals, update_logits]) return reset_op
def reset(self, indices=None): """Reset the batch of environments. Args: indices: The batch indices of the environments to reset; defaults to all. Returns: Batch tensor of the new observations. """ if indices is None: indices = tf.range(len(self._batch_env)) observ_dtype = self._parse_dtype(self._batch_env.observation_space) observ = tf.py_func(self._batch_env.reset, [indices], observ_dtype, name='reset') observ = tf.check_numerics(observ, 'observ') reward = tf.zeros_like(indices, tf.float32) done = tf.zeros_like(indices, tf.bool) with tf.control_dependencies([ tf.scatter_update(self._observ, indices, observ), tf.scatter_update(self._reward, indices, reward), tf.scatter_update(self._done, indices, done) ]): return tf.identity(observ)
def _build_model(self): self.graph_built = True tf.set_random_seed(self.seed) self.user_indices = tf.placeholder(tf.int32, shape=[None]) self.item_indices = tf.placeholder(tf.int32, shape=[None]) self.user_interacted_seq = tf.placeholder( tf.int32, shape=[None, self.interaction_num]) self.user_interacted_len = tf.placeholder(tf.float32, shape=[None]) self.labels = tf.placeholder(tf.float32, shape=[None]) self.is_training = tf.placeholder_with_default(False, shape=[]) self.concat_embed = [] user_features = tf.get_variable( name="user_features", shape=[self.n_users + 1, self.embed_size], initializer=tf_truncated_normal(0.0, 0.01), regularizer=self.reg) item_features = tf.get_variable( name="item_features", shape=[self.n_items + 1, self.embed_size], initializer=tf_truncated_normal(0.0, 0.01), regularizer=self.reg) user_embed = tf.nn.embedding_lookup(user_features, self.user_indices) item_embed = tf.nn.embedding_lookup(item_features, self.item_indices) # unknown items are padded to 0-vector zero_padding_op = tf.scatter_update( item_features, self.n_items, tf.zeros([self.embed_size], dtype=tf.float32)) with tf.control_dependencies([zero_padding_op]): multi_item_embed = tf.nn.embedding_lookup( item_features, self.user_interacted_seq) # B * seq * K pooled_embed = tf.div_no_nan( tf.reduce_sum(multi_item_embed, axis=1), tf.expand_dims(tf.sqrt(self.user_interacted_len), axis=1)) self.concat_embed.extend([user_embed, item_embed, pooled_embed]) if self.sparse: self._build_sparse() if self.dense: self._build_dense() concat_embed = tf.concat(self.concat_embed, axis=1) mlp_layer = dense_nn(concat_embed, self.hidden_units, use_bn=self.use_bn, dropout_rate=self.dropout_rate, is_training=self.is_training) self.output = tf.reshape(tf.layers.dense(inputs=mlp_layer, units=1), [-1]) count_params()
def create_binary_mask_from_scores(score_tensor, f=None, n_zeros=None): """Given an arbitrary tensor and a fraction returns the binary(0-1) tensor. Given a numerical tensor with any shape and N elements it returns a 0-1 tensor with same shape where a fraction `f` of the smallest values are set to zero. The indices which are set to 0 are selected according to the values provided in the `score_tensor`. We select smallest M=floor(N*f) indices. One should use either `f` or `n_zeros`; not together. Args: score_tensor: an arbitrary numerical tensor. f: fraction of zeros to be set: a number 0<f<1. n_zeros: int, number of zeros to be set: n_zeros<n_elements. Returns: a binary Tensor with a same shape as the `score_tensor` and same-type. It should have n_zero = floor(n_elements*f) many zeros. """ # Assert only one of the name arguments is in use. assert (f is None) ^ (n_zeros is None) n_elements = tf.size(score_tensor).numpy() if f is not None: assert f > 0 and f < 1 n_ones = n_elements - int(math.floor(n_elements * f)) else: assert isinstance(n_zeros, int) n_ones = n_elements - n_zeros flat_score_tensor = (tf.reshape(score_tensor, [-1]) if len(score_tensor.shape) > 1 else score_tensor) mask = tf.Variable(tf.zeros_like(flat_score_tensor)) _, indices = tf.nn.top_k(flat_score_tensor, n_ones) tf.scatter_update(mask, indices, 1) res = mask.read_value() # Reshaping back to the original shape. if len(score_tensor.shape) > 1: res = tf.reshape(res, score_tensor.shape) return res
def _reset_non_empty(self, indices): # pylint: disable=protected-access new_values = self._batch_env._reset_non_empty(indices) # pylint: enable=protected-access initial_frames = getattr(self._batch_env, "history_observations", None) num_dimensions_in_env_observation = len(self.old_shape) if initial_frames is None: inx = [1, self.history] + ([1] * num_dimensions_in_env_observation) initial_frames = tf.tile(tf.expand_dims(new_values, axis=1), inx) with tf.control_dependencies([new_values]): assign_op = tf.scatter_update(self._observ, indices, initial_frames) with tf.control_dependencies([assign_op]): return tf.gather(self.observ, indices)
def clear(self, rows=None): """Reset episodes in the memory. Internally, this only sets their lengths to zero. The memory entries will be overridden by future calls to append() or replace(). Args: rows: Episodes to clear, defaults to all. Returns: Operation. """ rows = tf.range(self._capacity) if rows is None else rows assert rows.shape.ndims == 1 return tf.scatter_update(self._length, rows, tf.zeros_like(rows))
def _reset_non_empty(self, indices): """Reset the batch of environments. Args: indices: The batch indices of the environments to reset; defaults to all. Returns: Batch tensor of the new observations. """ observ = tf.py_func( self._batch_env.reset, [indices], self.observ_dtype, name="reset") observ.set_shape(indices.get_shape().concatenate(self.observ_shape)) with tf.control_dependencies([ tf.scatter_update(self._observ, indices, observ)]): return tf.identity(observ)
def reinit_nested_vars(variables, indices=None): """Reset all variables in a nested tuple to zeros. Args: variables: Nested tuple or list of variaables. indices: Indices along the first dimension to reset, defaults to all. Returns: Operation. """ if isinstance(variables, (tuple, list)): return tf.group( *[reinit_nested_vars(variable, indices) for variable in variables]) if indices is None: return variables.assign(tf.zeros_like(variables)) else: zeros = tf.zeros([tf.shape(indices)[0]] + variables.shape[1:].as_list()) return tf.scatter_update(variables, indices, zeros)
def _curvature_range(self): """Curvature range. Returns: h_max_t, h_min_t ops """ self._curv_win = tf.get_variable("curv_win", dtype=tf.float32, trainable=False, shape=[ self.curvature_window_width, ], initializer=tf.zeros_initializer) # We use log smoothing for curvature range self._curv_win = tf.scatter_update( self._curv_win, self._step % self.curvature_window_width, tf.log(self._grad_norm_squared)) # Note here the iterations start from iteration 0 valid_window = tf.slice( self._curv_win, tf.constant([ 0, ]), tf.expand_dims(tf.minimum(tf.constant(self.curvature_window_width), self._step + 1), dim=0)) self._h_min_t = tf.reduce_min(valid_window) self._h_max_t = tf.reduce_max(valid_window) curv_range_ops = [] with tf.control_dependencies([self._h_min_t, self._h_max_t]): avg_op = self._moving_averager.apply( [self._h_min_t, self._h_max_t]) with tf.control_dependencies([avg_op]): self._h_min = tf.exp( tf.identity(self._moving_averager.average(self._h_min_t))) self._h_max = tf.exp( tf.identity(self._moving_averager.average(self._h_max_t))) if self._sparsity_debias: self._h_min *= self._sparsity_avg self._h_max *= self._sparsity_avg curv_range_ops.append(avg_op) return curv_range_ops # h_max_t, h_min_t
def _apply_sparse_shared(self, grad_values, grad_indices, var): shape = np.array(var.get_shape()) var_rank = len(shape) # For sparse case, we only update the accumulator representing the sparse # dimension. In this case SM3 is similar to isotropic adagrad but with # better bound (due to the max operator). # # We do not use the column accumulator because it will updated for # every gradient step and will significantly overestimate the gradient # square. While, the row accumulator can take advantage of the sparsity # in the gradients. Even if one implements the column accumulator - it # will result in a no-op because the row accumulators will have lower # values. # # Note that: We do not run this code paths for our experiments in our paper # as on TPU all the sparse gradients are densified. if var_rank > 1: accumulator_var = self.get_slot(var, "accumulator_" + str(0)) accumulator = tf.gather(accumulator_var, grad_indices) shape_for_broadcasting = tf.concat( [[tf.shape(accumulator)[0]], [1] * (var_rank - 1)], 0) accumulator = tf.reshape(accumulator, shape_for_broadcasting) accumulator += grad_values * grad_values else: accumulator_var = self.get_slot(var, "accumulator") accumulator = tf.scatter_add(accumulator_var, grad_indices, grad_values * grad_values) accumulator_inv_sqrt = tf.rsqrt(accumulator + 1e-30) scaled_g = (grad_values * accumulator_inv_sqrt) updates = [] with tf.control_dependencies([scaled_g]): if var_rank > 1: axes = list(range(1, var_rank)) new_accumulator = tf.reduce_max(accumulator, axis=axes) updates = [ tf.scatter_update(accumulator_var, grad_indices, new_accumulator) ] with tf.control_dependencies(updates): return tf.scatter_sub(var, grad_indices, self._learning_rate_tensor * scaled_g)
def _sparse_moving_average(self, x_tm1, idxs, a_t_, name, beta=.9): """ """ b_tm1 = self.get_accumulator(x_tm1, '%s' % name) b_tm1_ = tf.gather(b_tm1, idxs) shape = self.get_variable_shape(x_tm1) tm1 = self.get_accumulator(x_tm1, '%s/tm1' % name, shape=[shape[0]] + [1] * (len(shape) - 1)) tm1_ = tf.gather(tm1, idxs) t = tf.scatter_add(tm1, idxs, tf.ones_like(tm1_)) t_ = tf.gather(t, idxs) if beta < 1: beta_t = tf.convert_to_tensor(beta, name='%s/decay' % name) beta_t_ = beta_t * (1 - beta_t**tm1_) / (1 - beta_t**t_) else: beta_t_ = tm1_ / t_ b_t = tf.scatter_update(b_tm1, idxs, beta_t_ * b_tm1_) b_t = tf.scatter_add(b_t, idxs, (1 - beta_t_) * a_t_) return b_t, t
def assign_nested_vars(variables, tensors, indices=None): """Assign tensors to matching nested tuple of variables. Args: variables: Nested tuple or list of variables to update. tensors: Nested tuple or list of tensors to assign. indices: Batch indices to assign to; default to all. Returns: Operation. """ if isinstance(variables, (tuple, list)): return tf.group(*[ assign_nested_vars(variable, tensor) for variable, tensor in zip(variables, tensors) ]) if indices is None: return variables.assign(tensors) else: return tf.scatter_update(variables, indices, tensors)
def _update_local_control_variate(self, control_variate, idx, elbo_hmc, assign=True, decay=0.9): """ Moving average update of control variate if using a control variate for each point in training set. Assumes that for first few (self.control_var_independent_iters) iterations, use global control variate, and then remaining iterations use local one. Note that sometimes we want to update the control variate, sometimes we don't. This is controlled by the `assign` argument. """ # Find update for relevant elements in control_variate vector # and calculate their updated values control_variate_local_update = decay * tf.gather( control_variate, idx) + (1. - decay) * elbo_hmc # Find update if using using global control variate at current iteration control_variate_global_update = decay * control_variate[0] + ( 1. - decay) * tf.reduce_mean(elbo_hmc) if assign: # control_variate is a variable control_variate_local = tf.scatter_update( control_variate, idx, control_variate_local_update) else: # control_variate is a tensor control_variate_local = tf.tensor_scatter_nd_update( control_variate, tf.expand_dims(idx, 1), control_variate_local_update) # Tile scalar control variate to match shape of local control variate control_variate_global = tf.fill(control_variate.get_shape(), control_variate_global_update) new_cv_value = tf.where( self.global_step > self.control_var_independent_iters, control_variate_local, control_variate_global) if assign: cv_update = control_variate.assign(new_cv_value) return cv_update else: return new_cv_value
def dyn_setup(nodes): if not nodes.net.fixed: nodes.net.asg["NL_F"] = (nodes.net.f_node.assign( nodes.Force_End), ) nodes.net.assignments_forces += nodes.net.asg["NL_F"] nodes.net.f_node_max = tf.reduce_max(tf.abs(nodes.net.f_node)) # *tf.to_float(not nodes.net.fixed) +1e-4 nodes.dp = nodes.net.f_mild(nodes.net.f_node * nodes.net.dt) else: nodes.net.f_node_max = 1e-4 nodes.net.asg["N"] = ((nodes.points.assign_add(nodes.dp), ) if not nodes.net.fixed else tuple()) l = [] # pairs of node, link indices for i in range(len(nodes.net.pts)): ni = nodes.link_ends[i] l += zip([i] * len(ni), ni) l = array(l) n2 = tf.gather(nodes.points, l[:, 0]) nodes.net.asg["NL"] = (tf.scatter_update(nodes.net.links.points, l[:, 1], n2), ) nodes.net.assignments_points += nodes.net.asg["N"] + nodes.net.asg["NL"]
def reset(self, indices): initial_frames = tf.gather(self._initial_frames, indices) scatter_op = tf.scatter_update(self._history_buff, indices, initial_frames) with tf.control_dependencies([scatter_op]): return self._history_buff.read_value()
def step(index, scores_sum, scores_num): """Single step.""" index %= epoch_length # Only needed in eval runs. # Note - the only way to ensure making a copy of tensor is to run simple # operation. We are waiting for tf.copy: # https://github.com/tensorflow/tensorflow/issues/11186 obs_copy = batch_env.observ + 0 value_fun_shape = (num_agents, ) if distributional_size > 1: value_fun_shape = (num_agents, distributional_size) def env_step(arg1, arg2, arg3): # pylint: disable=unused-argument """Step of the environment.""" (logits, value_function) = get_policy(obs_copy, ppo_hparams, batch_env.action_space, distributional_size) action = common_layers.sample_with_temperature( logits, sampling_temp) action = tf.cast(action, tf.int32) action = tf.reshape(action, shape=(num_agents, )) reward, done = batch_env.simulate(action) pdf = tfp.distributions.Categorical(logits=logits).prob(action) pdf = tf.reshape(pdf, shape=(num_agents, )) value_function = tf.reshape(value_function, shape=value_fun_shape) done = tf.reshape(done, shape=(num_agents, )) with tf.control_dependencies([reward, done]): return tf.identity(pdf), tf.identity(value_function), \ tf.identity(done) # TODO(piotrmilos): while_body is executed at most once, # thus should be replaced with tf.cond pdf, value_function, top_level_done = tf.while_loop( lambda _1, _2, _3: tf.equal(speculum.size(), 0), env_step, [ tf.constant(0.0, shape=(num_agents, )), tf.constant(0.0, shape=value_fun_shape), tf.constant(False, shape=(num_agents, )) ], parallel_iterations=1, back_prop=False, ) with tf.control_dependencies([pdf, value_function]): obs, reward, done, action = speculum.dequeue() to_save = [obs, reward, done, action, pdf, value_function] save_ops = [ tf.scatter_update(memory_slot, index, value) for memory_slot, value in zip(memory, to_save) ] cumulate_rewards_op = cumulative_rewards.assign_add(reward) agent_indices_to_reset = tf.where(top_level_done)[:, 0] with tf.control_dependencies([cumulate_rewards_op]): # TODO(piotrmilos): possibly we need cumulative_rewards.read_value() scores_sum_delta = tf.reduce_sum( tf.gather(cumulative_rewards.read_value(), agent_indices_to_reset)) scores_num_delta = tf.count_nonzero(done, dtype=tf.int32) with tf.control_dependencies(save_ops + [scores_sum_delta, scores_num_delta]): reset_env_op = batch_env.reset(agent_indices_to_reset) reset_cumulative_rewards_op = tf.scatter_update( cumulative_rewards, agent_indices_to_reset, tf.gather(zeros_tensor, agent_indices_to_reset)) with tf.control_dependencies( [reset_env_op, reset_cumulative_rewards_op]): return [ index + 1, scores_sum + scores_sum_delta, scores_num + scores_num_delta ]
def _compute_step(self, hist_idx, hist_size, grads, s_hists, y_hists): ops = [] # Create tensors to compute dot products from the histories of s and y. sTy_hist = [] yTy_hist = [] # We construct the list so that sTy_history[-i] will compute the dot product # (s_{k-i}, y_{k-i}) etc. for i in reversed(range(1, self._conf['hist'] + 1)): sTy = tf.zeros([]) yTy = tf.zeros([]) idx = tf.mod(hist_idx - i, self._conf['hist']) for s_hist, y_hist in zip(s_hists, y_hists): sTy += tf.reduce_sum(s_hist[idx] * y_hist[idx]) yTy += tf.reduce_sum(y_hist[idx] * y_hist[idx]) sTy_hist.append(sTy) yTy_hist.append(yTy) # Start with the negative gradient. ps = [] for g in grads: ps.append(-g) # First stage of the update (alg. 3, step 2 in the paper) alphas = [] # Create a TensorFlow group that bundles all updates from an iteration # within the first stage. for i in range(1, self._conf['hist'] + 1): idx = tf.mod(hist_idx - i, self._conf['hist']) # Compute coefficient alpha (alg. 3, step 2a). sTp = tf.zeros(shape=[]) for p, s_hist in zip(ps, s_hists): sTp += tf.reduce_sum(s_hist[idx] * p) alpha = tf.cond(i <= hist_size, lambda: sTp / sTy_hist[-i], lambda: tf.zeros(shape=[])) alphas.append(alpha) # Update direction (alg. 3, step 2b). for j, y_hist in enumerate(y_hists): ps[j] -= alpha * y_hist[idx] # Second stage of the update (alg. 3, step 3 and eq. 14) coeff = tf.zeros(shape=[]) for i in range(1, self._conf['hist'] + 1): coeff = tf.cond(i <= hist_size, lambda: coeff + sTy_hist[-i] / yTy_hist[-i], lambda: coeff) coeff = tf.cond(tf.equal(hist_size, 0), lambda: self._conf['eps'], lambda: coeff / tf.cast(hist_size, dtype=coeff.dtype)) #coeff = tf.Print(coeff, [hist_size], message='hist_size = ') for j in range(len(ps)): ps[j] *= coeff # Third stage of the update (alg. 3, step 4) for i in reversed(range(1, self._conf['hist'] + 1)): idx = tf.mod(hist_idx - i, self._conf['hist']) yTp = tf.zeros(shape=[]) for p, y_hist in zip(ps, y_hists): yTp += tf.reduce_sum(y_hist[idx] * p) beta = yTp / sTy_hist[-i] alpha_minus_beta = tf.cond(i <= hist_size, lambda: alphas[i - 1] - beta, lambda: tf.zeros(shape=[])) for j, s_hist in enumerate(s_hists): ps[j] += alpha_minus_beta * s_hist[idx] # Save update. for p, s_hist in zip(ps, s_hists): s = self._conf['lr'] * p ops.append(tf.scatter_update(s_hist, hist_idx, s)) return tf.group(ops)
def setup_dynamic_ops(n_y): """Set up ops to move / copy mixture component weights for dynamic expansion. Args: n_y: int, dimensionality of discrete latent variable y. Returns: A dict containing all of the ops required for dynamic updating. """ # Set up graph ops to dynamically modify component params. graph = tf.get_default_graph() # 1) Ops to get and set latent encoder params (entire tensors) latent_enc_tensors = {} for k in range(n_y): latent_enc_tensors['latent_w_' + str(k)] = graph.get_tensor_by_name( 'latent_encoder/mlp_latent_encoder_{}/w:0'.format(k)) latent_enc_tensors['latent_b_' + str(k)] = graph.get_tensor_by_name( 'latent_encoder/mlp_latent_encoder_{}/b:0'.format(k)) latent_enc_assign_ops = {} latent_enc_phs = {} for key, tensor in latent_enc_tensors.items(): latent_enc_phs[key] = tfc.placeholder(tensor.dtype, tensor.shape, name='latent_enc_phs') latent_enc_assign_ops[key] = tf.assign(tensor, latent_enc_phs[key]) # 2) Ops to get and set cluster encoder params (columns of a tensor) # We will be copying column ind_from to column ind_to. cluster_w = graph.get_tensor_by_name( 'cluster_encoder/mlp_cluster_encoder_final/w:0') cluster_b = graph.get_tensor_by_name( 'cluster_encoder/mlp_cluster_encoder_final/b:0') ind_from = tfc.placeholder(dtype=tf.int32, name='ind_from') ind_to = tfc.placeholder(dtype=tf.int32, name='inf_to') # Determine indices of cluster encoder weights and biases to be updated w_indices = tf.transpose( tf.stack([ tf.range(cluster_w.shape[0], dtype=tf.int32), ind_to * tf.ones(shape=(cluster_w.shape[0], ), dtype=tf.int32) ])) b_indices = ind_to # Determine updates themselves cluster_w_updates = tf.squeeze( tf.slice(cluster_w, begin=(0, ind_from), size=(cluster_w.shape[0], 1))) cluster_b_updates = cluster_b[ind_from] # Create update ops cluster_w_update_op = tf.scatter_nd_update(cluster_w, w_indices, cluster_w_updates) cluster_b_update_op = tf.scatter_update(cluster_b, b_indices, cluster_b_updates) # 3) Ops to get and set latent prior params (columns of a tensor) # We will be copying column ind_from to column ind_to. latent_prior_mu_w = graph.get_tensor_by_name( 'latent_decoder/latent_prior_mu/w:0') latent_prior_sigma_w = graph.get_tensor_by_name( 'latent_decoder/latent_prior_sigma/w:0') mu_indices = tf.transpose( tf.stack([ ind_to * tf.ones(shape=(latent_prior_mu_w.shape[1], ), dtype=tf.int32), tf.range(latent_prior_mu_w.shape[1], dtype=tf.int32) ])) mu_updates = tf.squeeze( tf.slice(latent_prior_mu_w, begin=(ind_from, 0), size=(1, latent_prior_mu_w.shape[1]))) mu_update_op = tf.scatter_nd_update(latent_prior_mu_w, mu_indices, mu_updates) sigma_indices = tf.transpose( tf.stack([ ind_to * tf.ones(shape=(latent_prior_sigma_w.shape[1], ), dtype=tf.int32), tf.range(latent_prior_sigma_w.shape[1], dtype=tf.int32) ])) sigma_updates = tf.squeeze( tf.slice(latent_prior_sigma_w, begin=(ind_from, 0), size=(1, latent_prior_sigma_w.shape[1]))) sigma_update_op = tf.scatter_nd_update(latent_prior_sigma_w, sigma_indices, sigma_updates) dynamic_ops = { 'ind_from_ph': ind_from, 'ind_to_ph': ind_to, 'latent_enc_tensors': latent_enc_tensors, 'latent_enc_assign_ops': latent_enc_assign_ops, 'latent_enc_phs': latent_enc_phs, 'cluster_w_update_op': cluster_w_update_op, 'cluster_b_update_op': cluster_b_update_op, 'mu_update_op': mu_update_op, 'sigma_update_op': sigma_update_op } return dynamic_ops
def testLoss(self): """ Tests the loss of the FasterRCNN """ # Create prediction_dict's structure prediction_dict_random = { 'rpn_prediction': {}, 'classification_prediction': { 'rcnn': { 'cls_score': None, 'bbox_offsets': None }, 'target': {}, '_debug': { 'losses': {} } } } prediction_dict_perf = { 'rpn_prediction': {}, 'classification_prediction': { 'rcnn': { 'cls_score': None, 'bbox_offsets': None }, 'target': {}, '_debug': { 'losses': {} } } } # Set seeds for stable results rand_seed = 13 target_seed = 43 image_size = (60, 80) num_anchors = 1000 config = EasyDict(self.config) config.model.rpn.l2_regularization_scale = 0.0 config.model.rcnn.l2_regularization_scale = 0.0 config.model.base_network.arg_scope.weight_decay = 0.0 # RPN # Random generation of cls_targets for rpn # where: # {-1}: Ignore # { 0}: Background # { 1}: Object rpn_cls_target = tf.floor( tf.random_uniform([num_anchors], minval=-1, maxval=2, dtype=tf.float32, seed=target_seed, name=None)) # Creation of cls_scores with: # score 100 in correct class # score 0 in wrong class # Generation of opposite cls_score for rpn rpn_cls_score = tf.cast( tf.one_hot(tf.cast(tf.mod(tf.identity(rpn_cls_target) + 1, 2), tf.int32), depth=2, on_value=10), tf.float32) # Generation of correct cls_score for rpn rpn_cls_perf_score = tf.cast( tf.one_hot(tf.cast(tf.identity(rpn_cls_target), tf.int32), depth=2, on_value=100), tf.float32) # Random generation of target bbox deltas rpn_bbox_target = tf.floor( tf.random_uniform([num_anchors, 4], minval=-1, maxval=1, dtype=tf.float32, seed=target_seed, name=None)) # Random generation of predicted bbox deltas rpn_bbox_predictions = tf.floor( tf.random_uniform([num_anchors, 4], minval=-1, maxval=1, dtype=tf.float32, seed=rand_seed, name=None)) prediction_dict_random['rpn_prediction'][ 'rpn_cls_score'] = rpn_cls_score prediction_dict_random['rpn_prediction'][ 'rpn_cls_target'] = rpn_cls_target prediction_dict_random['rpn_prediction'][ 'rpn_bbox_target'] = rpn_bbox_target prediction_dict_random['rpn_prediction'][ 'rpn_bbox_pred'] = rpn_bbox_predictions prediction_dict_perf['rpn_prediction'][ 'rpn_cls_score'] = rpn_cls_perf_score prediction_dict_perf['rpn_prediction'][ 'rpn_cls_target'] = rpn_cls_target prediction_dict_perf['rpn_prediction'][ 'rpn_bbox_target'] = rpn_bbox_target prediction_dict_perf['rpn_prediction'][ 'rpn_bbox_pred'] = rpn_bbox_target # RCNN # Set the number of classes num_classes = config.model.network.num_classes # Randomly generate the bbox_offsets for the correct class = 1 prediction_dict_random['classification_prediction']['target'] = { 'bbox_offsets': tf.random_uniform([1, 4], minval=-1, maxval=1, dtype=tf.float32, seed=target_seed, name=None), 'cls': [1] } # Set the same bbox_offsets and cls for the perfect prediction prediction_dict_perf['classification_prediction'][ 'target'] = prediction_dict_random['classification_prediction'][ 'target'].copy() # Generate random scores for the num_classes + the background class rcnn_cls_score = tf.random_uniform([1, num_classes + 1], minval=-100, maxval=100, dtype=tf.float32, seed=rand_seed, name=None) # Generate a perfect prediction with the correct class score = 100 # and the rest set to 0 rcnn_cls_perf_score = tf.cast( tf.one_hot([1], depth=num_classes + 1, on_value=100), tf.float32) # Generate the random delta prediction for each class rcnn_bbox_offsets = tf.random_uniform([1, num_classes * 4], minval=-1, maxval=1, dtype=tf.float32, seed=rand_seed, name=None) # Copy the random prediction and set the correct class prediction # as the target one target_bbox_offsets = prediction_dict_random[ 'classification_prediction']['target']['bbox_offsets'] initial_val = 1 * 4 # cls value * 4 rcnn_bbox_perf_offsets = tf.Variable( tf.reshape( tf.random_uniform([1, num_classes * 4], minval=-1, maxval=1, dtype=tf.float32, seed=target_seed, name=None), [-1])) rcnn_bbox_perf_offsets = tf.reshape( tf.scatter_update(rcnn_bbox_perf_offsets, tf.range(initial_val, initial_val + 4), tf.reshape(target_bbox_offsets, [-1])), [1, -1]) prediction_dict_random['classification_prediction']['rcnn'][ 'cls_score'] = rcnn_cls_score prediction_dict_random['classification_prediction']['rcnn'][ 'bbox_offsets'] = rcnn_bbox_offsets prediction_dict_perf['classification_prediction']['rcnn'][ 'cls_score'] = rcnn_cls_perf_score prediction_dict_perf['classification_prediction']['rcnn'][ 'bbox_offsets'] = rcnn_bbox_perf_offsets loss_perfect = self._get_losses(config, prediction_dict_perf, image_size) loss_random = self._get_losses(config, prediction_dict_random, image_size) loss_random_compare = { 'rcnn_cls_loss': 5, 'rcnn_reg_loss': 3, 'rpn_cls_loss': 5, 'rpn_reg_loss': 3, 'no_reg_loss': 16, 'regularization_loss': 0, 'total_loss': 22, } for loss in loss_random: self.assertGreaterEqual(loss_random[loss], loss_random_compare[loss], loss) self.assertEqual(loss_perfect[loss], 0, loss)