def _variance(self): with tf.control_dependencies(self._runtime_assertions): probs = self._marginal_hidden_probs() # probs :: num_steps batch_shape num_states means = self._observation_distribution.mean() # means :: observation_batch_shape[:-1] num_states # observation_event_shape means_shape = tf.concat( [self.batch_shape_tensor(), [self._num_states], self._observation_distribution.event_shape_tensor()], axis=0) means = tf.broadcast_to(means, means_shape) # means :: batch_shape num_states observation_event_shape observation_event_shape = ( self._observation_distribution.event_shape_tensor()) batch_size = tf.reduce_prod(self.batch_shape_tensor()) flat_probs_shape = [self._num_steps, batch_size, self._num_states] flat_means_shape = [ batch_size, 1, self._num_states, tf.reduce_prod(observation_event_shape)] flat_probs = tf.reshape(probs, flat_probs_shape) # flat_probs :: num_steps batch_size num_states flat_means = tf.reshape(means, flat_means_shape) # flat_means :: batch_size 1 num_states observation_event_size flat_mean = tf.einsum("ijk,jmkl->jiml", flat_probs, flat_means) # flat_mean :: batch_size num_steps 1 observation_event_size variances = self._observation_distribution.variance() variances = tf.broadcast_to(variances, means_shape) # variances :: batch_shape num_states observation_event_shape flat_variances = tf.reshape(variances, flat_means_shape) # flat_variances :: batch_size 1 num_states observation_event_size # For a mixture of n distributions with mixture probabilities # p[i], and where the individual distributions have means and # variances given by mean[i] and var[i], the variance of # the mixture is given by: # # var = sum i=1..n p[i] * ((mean[i] - mean)**2 + var[i]**2) flat_variance = tf.einsum("ijk,jikl->jil", flat_probs, (flat_means - flat_mean)**2 + flat_variances) # flat_variance :: batch_size num_steps observation_event_size unflat_mean_shape = tf.concat( [self.batch_shape_tensor(), [self._num_steps], observation_event_shape], axis=0) # returns :: batch_shape num_steps observation_event_shape return tf.reshape(flat_variance, unflat_mean_shape)
def _log_prob(self, value): with tf.control_dependencies(self._runtime_assertions): # The argument `value` is a tensor of sequences of observations. # `observation_batch_shape` is the shape of that tensor with the # sequence part removed. # `observation_batch_shape` is then broadcast to the full batch shape # to give the `working_shape` that defines the shape of the result. observation_batch_shape = tf.shape( value)[:-1 - self._underlying_event_rank] # value :: observation_batch_shape num_steps observation_event_shape working_shape = tf.broadcast_dynamic_shape(observation_batch_shape, self.batch_shape_tensor()) log_init = tf.broadcast_to(self._log_init, tf.concat([working_shape, [self._num_states]], axis=0)) # log_init :: working_shape num_states log_transition = self._log_trans # `observation_event_shape` is the shape of each sequence of observations # emitted by the model. observation_event_shape = tf.shape( value)[-1 - self._underlying_event_rank:] working_obs = tf.broadcast_to(value, tf.concat([working_shape, observation_event_shape], axis=0)) # working_obs :: working_shape observation_event_shape r = self._underlying_event_rank # Move index into sequence of observations to front so we can apply # tf.foldl working_obs = util.move_dimension(working_obs, -1 - r, 0)[..., tf.newaxis] # working_obs :: num_steps working_shape underlying_event_shape observation_probs = ( self._observation_distribution.log_prob(working_obs)) def forward_step(log_prev_step, log_observation): return _log_vector_matrix(log_prev_step, log_transition) + log_observation fwd_prob = tf.foldl(forward_step, observation_probs, initializer=log_init) # fwd_prob :: working_shape num_states log_prob = tf.reduce_logsumexp(fwd_prob, axis=-1) # log_prob :: working_shape return log_prob
def _marginal_hidden_probs(self): """Compute marginal pdf for each individual observable.""" initial_log_probs = tf.broadcast_to(self._log_init, tf.concat([self.batch_shape_tensor(), [self._num_states]], axis=0)) # initial_log_probs :: batch_shape num_states if self._num_steps > 1: transition_log_probs = self._log_trans def forward_step(log_probs, _): return _log_vector_matrix(log_probs, transition_log_probs) dummy_index = tf.zeros(self._num_steps - 1, dtype=tf.float32) forward_log_probs = tf.scan(forward_step, dummy_index, initializer=initial_log_probs, name="forward_log_probs") forward_log_probs = tf.concat([[initial_log_probs], forward_log_probs], axis=0) else: forward_log_probs = initial_log_probs[tf.newaxis, ...] # returns :: num_steps batch_shape num_states return tf.exp(forward_log_probs)
def _log_unnormalized_prob(self, x): safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x), 1.) y = -self.power * tf.log(safe_x) is_supported = tf.broadcast_to(tf.equal(x, safe_x), tf.shape(y)) neg_inf = tf.fill( tf.shape(y), value=np.array(-np.inf, dtype=y.dtype.as_numpy_dtype)) return tf.where(is_supported, y, neg_inf)
def _mean_image_subtraction(image, means, num_channels): """Subtracts the given means from each image channel. For example: means = [123.68, 116.779, 103.939] image = _mean_image_subtraction(image, means) Note that the rank of `image` must be known. Args: image: a tensor of size [height, width, C]. means: a C-vector of values to subtract from each channel. num_channels: number of color channels in the image that will be distorted. Returns: the centered image. Raises: ValueError: If the rank of `image` is unknown, if `image` has a rank other than three or if the number of channels in `image` doesn't match the number of values in `means`. """ if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') if len(means) != num_channels: raise ValueError('len(means) must match the number of channels') # We have a 1-D tensor of means; convert to 3-D. # Note(b/130245863): we explicitly call `broadcast` instead of simply # expanding dimensions for better performance. means = tf.broadcast_to(means, tf.shape(image)) return image - means
def _log_unnormalized_prob(self, x): # The log-probability at negative points is always -inf. # Catch such x's and set the output value accordingly. safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x), 0.) y = safe_x * self.log_rate - tf.lgamma(1. + safe_x) is_supported = tf.broadcast_to(tf.equal(x, safe_x), tf.shape(y)) neg_inf = tf.fill(tf.shape(y), value=np.array(-np.inf, dtype=y.dtype.as_numpy_dtype)) return tf.where(is_supported, y, neg_inf)
def _cdf(self, x): # CDF is the probability that the Poisson variable is less or equal to x. # For fractional x, the CDF is equal to the CDF at n = floor(x). # For negative x, the CDF is zero, but tf.igammac gives NaNs, so we impute # the values and handle this case explicitly. safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x), 0.) cdf = tf.igammac(1. + safe_x, self.rate) return tf.where(tf.broadcast_to(x < 0., tf.shape(cdf)), tf.zeros_like(cdf), cdf)
def test_coin_toss_batch(self): initial_prob_ = tf.constant([0.5, 0.5], dtype=self.dtype) transition_matrix = tf.constant([[0.5, 0.5], [0.5, 0.5]], dtype=self.dtype) observation_matrix = tf.constant([[1.0, 0.0], [0.0, 1.0]], dtype=self.dtype) initial_prob = tf.broadcast_to(initial_prob_, [3, 2, 2]) transition_matrix = tf.broadcast_to(transition_matrix, [3, 2, 2, 2]) model = tfd.HiddenMarkovModel(tfd.Categorical(probs=initial_prob), tfd.Categorical(probs=transition_matrix), tfd.Categorical(probs=observation_matrix), num_steps=5) examples = [tf.zeros(5, dtype=tf.int32), tf.ones(5, dtype=tf.int32)] examples = tf.broadcast_to(examples, [7, 3, 2, 5]) computed_log_prob = model.log_prob(examples) expected_log_prob = tf.broadcast_to([np.log(.5**5)], [7, 3, 2]) self.assertAllClose(computed_log_prob, expected_log_prob, rtol=1e-4, atol=0.0)
def _cdf(self, x): # CDF(x) at positive integer x is the probability that the Zipf variable is # less than or equal to x; given by the formula: # CDF(x) = 1 - (zeta(power, x + 1) / Z) # For fractional x, the CDF is equal to the CDF at n = floor(x). # For x < 1, the CDF is zero. # If interpolate_nondiscrete is True, we return a continuous relaxation # which agrees with the CDF at integer points. x = tf.cast(x, self.power.dtype) safe_x = tf.maximum(x if self.interpolate_nondiscrete else tf.floor(x), 0.) cdf = 1. - (tf.zeta(self.power, safe_x + 1.) / tf.zeta(self.power, 1.)) return tf.where( tf.broadcast_to(tf.less(x, 1.), tf.shape(cdf)), tf.zeros_like(cdf), cdf)
def _mean(self): with tf.control_dependencies(self._runtime_assertions): probs = self._marginal_hidden_probs() # probs :: num_steps batch_shape num_states means = self._observation_distribution.mean() # means :: observation_batch_shape[:-1] num_states # observation_event_shape means_shape = tf.concat( [self.batch_shape_tensor(), [self._num_states], self._observation_distribution.event_shape_tensor()], axis=0) means = tf.broadcast_to(means, means_shape) # means :: batch_shape num_states observation_event_shape observation_event_shape = ( self._observation_distribution.event_shape_tensor()) batch_size = tf.reduce_prod(self.batch_shape_tensor()) flat_probs_shape = [self._num_steps, batch_size, self._num_states] flat_means_shape = [ batch_size, self._num_states, tf.reduce_prod(observation_event_shape)] flat_probs = tf.reshape(probs, flat_probs_shape) # flat_probs :: num_steps batch_size num_states flat_means = tf.reshape(means, flat_means_shape) # flat_means :: batch_size num_states observation_event_size flat_mean = tf.einsum("ijk,jkl->jil", flat_probs, flat_means) # flat_mean :: batch_size num_steps observation_event_size unflat_mean_shape = tf.concat( [self.batch_shape_tensor(), [self._num_steps], observation_event_shape], axis=0) # returns :: batch_shape num_steps observation_event_shape return tf.reshape(flat_mean, unflat_mean_shape)
def __init__(self, sess, prior, debug=0, summary=True, # RNN cell hyperparameters cell='lstm', num_layers=1, num_units=32, initializer='zeros', # Embedding hyperparameters embedding=False, embedding_size=4, # Optimizer hyperparameters optimizer='adam', learning_rate=0.001, # Observation space hyperparameters observe_action=True, observe_parent=True, observe_sibling=True, # Loss hyperparameters entropy_weight=0.0, # PPO hyperparameters ppo=False, ppo_clip_ratio=0.2, ppo_n_iters=10, ppo_n_mb=4, # PQT hyperparameters pqt=False, pqt_k=10, pqt_batch_size=1, pqt_weight=200.0, pqt_use_pg=False, # Other hyperparameters max_length=None): self.sess = sess self.prior = prior self.summary = summary self.rng = np.random.RandomState(0) # Used for PPO minibatch sampling lib = Program.library # Find max_length from the LengthConstraint prior, if it exists prior_max_length = None for single_prior in self.prior.priors: if isinstance(single_prior, LengthConstraint): if single_prior.max is not None: prior_max_length = single_prior.max self.max_length = prior_max_length break if prior_max_length is None: assert max_length is not None, "max_length must be specified if "\ "there is no LengthConstraint." self.max_length = max_length print("WARNING: Maximum length not constrained. Sequences will " "stop at {} and complete by repeating the first input " "variable.".format(self.max_length)) elif max_length is not None and max_length != self.max_length: print("WARNING: max_length ({}) will be overridden by value from " "LengthConstraint ({}).".format(max_length, self.max_length)) max_length = self.max_length # Hyperparameters self.observe_parent = observe_parent self.observe_sibling = observe_sibling self.entropy_weight = entropy_weight self.ppo = ppo self.ppo_n_iters = ppo_n_iters self.ppo_n_mb = ppo_n_mb self.pqt = pqt self.pqt_k = pqt_k self.pqt_batch_size = pqt_batch_size n_choices = lib.L # Placeholders, computed after instantiating expressions self.batch_size = tf.placeholder(dtype=tf.int32, shape=(), name="batch_size") self.baseline = tf.placeholder(dtype=tf.float32, shape=(), name="baseline") # Parameter assertions/warnings assert observe_action + observe_parent + observe_sibling > 0, "Must include at least one observation." self.compute_parents_siblings = any([self.observe_parent, self.observe_sibling, self.prior.requires_parents_siblings]) # Build controller RNN with tf.name_scope("controller"): def make_initializer(name): if name == "zeros": return tf.zeros_initializer() if name == "var_scale": return tf.contrib.layers.variance_scaling_initializer( factor=0.5, mode='FAN_AVG', uniform=True, seed=0) raise ValueError("Did not recognize initializer '{}'".format(name)) def make_cell(name, num_units, initializer): if name == 'lstm': return tf.nn.rnn_cell.LSTMCell(num_units, initializer=initializer) if name == 'gru': return tf.nn.rnn_cell.GRUCell(num_units, kernel_initializer=initializer, bias_initializer=initializer) raise ValueError("Did not recognize cell type '{}'".format(name)) # Create recurrent cell if isinstance(num_units, int): num_units = [num_units] * num_layers initializer = make_initializer(initializer) cell = tf.contrib.rnn.MultiRNNCell( [make_cell(cell, n, initializer=initializer) for n in num_units]) cell = LinearWrapper(cell=cell, output_size=n_choices) # Define input dimensions n_action_inputs = n_choices + 1 # lib tokens + empty token n_parent_inputs = n_choices + 1 - len(lib.terminal_tokens) # Parent sub-lib tokens + empty token n_sibling_inputs = n_choices + 1 # lib tokens + empty tokens # Create embeddings if embedding: with tf.variable_scope("embeddings", initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0, seed=0)): if observe_action: action_embeddings = tf.get_variable("action_embeddings", [n_action_inputs, embedding_size], trainable=True) if observe_parent: parent_embeddings = tf.get_variable("parent_embeddings", [n_parent_inputs, embedding_size], trainable=True) if observe_sibling: sibling_embeddings = tf.get_variable("sibling_embeddings", [n_sibling_inputs, embedding_size], trainable=True) # First observation is all empty tokens initial_obs = tuple() for n in [n_action_inputs, n_parent_inputs, n_sibling_inputs]: obs = tf.constant(n - 1, dtype=np.int32) obs = tf.broadcast_to(obs, [self.batch_size]) initial_obs += (obs,) # Get initial prior initial_prior = self.prior.initial_prior() initial_prior = tf.constant(initial_prior, dtype=tf.float32) prior_dims = tf.stack([self.batch_size, n_choices]) initial_prior = tf.broadcast_to(initial_prior, prior_dims) # arities = np.array([Program.arities[i] for i in range(n_choices)]) # prior = np.zeros(n_choices, dtype=np.float32) # if self.min_length is not None and self.min_length > 1: # prior[arities == 0] = -np.inf # prior = tf.constant(prior, dtype=tf.float32) # prior_dims = tf.stack([self.batch_size, n_choices]) # prior = tf.broadcast_to(prior, prior_dims) # initial_prior = prior # Returns concatenated one-hot or embeddings from observation tokens # Used for both raw_rnn and dynamic_rnn def get_input(obs): action, parent, sibling = obs observations = [] if observe_action: if embedding: obs = tf.nn.embedding_lookup(action_embeddings, action) else: obs = tf.one_hot(action, depth=n_action_inputs) observations.append(obs) if observe_parent: if embedding: obs = tf.nn.embedding_lookup(parent_embeddings, parent) else: obs = tf.one_hot(parent, depth=n_parent_inputs) observations.append(obs) if observe_sibling: if embedding: obs = tf.nn.embedding_lookup(sibling_embeddings, sibling) else: obs = tf.one_hot(sibling, depth=n_sibling_inputs) observations.append(obs) input_ = tf.concat(observations, -1) return input_ # Applies constraints def get_action_parent_sibling_prior_dangling(actions, dangling): n = actions.shape[0] # Batch size i = actions.shape[1] - 1 # Current index action = actions[:, -1] # Current action # Depending on the constraints, may need to compute parents and siblings if self.compute_parents_siblings: parent, sibling = parents_siblings(actions, arities=lib.arities, parent_adjust=lib.parent_adjust) else: parent = np.zeros(n, dtype=np.int32) sibling = np.zeros(n, dtype=np.int32) # Update dangling with (arity - 1) for each element in action dangling += lib.arities[action] - 1 prior = self.prior(actions, parent, sibling, dangling) return action, parent, sibling, prior, dangling # Given the actions chosen so far, return the observation, the prior, and the updated dangling # Uses py_func to retrieve action/parent/sibling/dangling def get_next_obs_prior_dangling(actions_ta, dangling): # Get current action batch actions = tf.transpose(actions_ta.stack()) # Shape: (?, time) # Compute parent, sibling, prior, and dangling action, parent, sibling, prior, dangling = tf.py_func(func=get_action_parent_sibling_prior_dangling, inp=[actions, dangling], Tout=[tf.int32, tf.int32, tf.int32, tf.float32, tf.int32]) # Observe previous action, parent, and/or sibling obs = (action, parent, sibling) # Set the shapes for returned Tensors action.set_shape([None]) parent.set_shape([None]) sibling.set_shape([None]) prior.set_shape([None, lib.L]) dangling.set_shape([None]) return obs, prior, dangling # Define loop function to be used by tf.nn.raw_rnn. initial_cell_input = get_input(initial_obs) def loop_fn(time, cell_output, cell_state, loop_state): if cell_output is None: # time == 0 finished = tf.zeros(shape=[self.batch_size], dtype=tf.bool) obs = initial_obs next_input = get_input(obs) next_cell_state = cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) # 2-tuple, each shape (?, num_units) emit_output = None actions_ta = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=False) # Read twice obs_tas = (tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=True), # Action inputs tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=True), # Parent inputs tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True, clear_after_read=True)) # Sibling inputs priors_ta = tf.TensorArray(dtype=tf.float32, size=0, dynamic_size=True, clear_after_read=True) prior = initial_prior lengths = tf.ones(shape=[self.batch_size], dtype=tf.int32) dangling = tf.ones(shape=[self.batch_size], dtype=tf.int32) next_loop_state = ( actions_ta, obs_tas, priors_ta, obs, prior, dangling, lengths, # Unused until implementing variable length finished) else: actions_ta, obs_tas, priors_ta, obs, prior, dangling, lengths, finished = loop_state logits = cell_output + prior next_cell_state = cell_state emit_output = logits action = tf.multinomial(logits=logits, num_samples=1, output_dtype=tf.int32, seed=1)[:, 0] # When implementing variable length: # action = tf.where( # tf.logical_not(finished), # tf.multinomial(logits=logits, num_samples=1, output_dtype=tf.int32)[:, 0], # tf.zeros(shape=[self.batch_size], dtype=tf.int32)) next_actions_ta = actions_ta.write(time - 1, action) # Write chosen actions next_obs, next_prior, next_dangling = get_next_obs_prior_dangling(next_actions_ta, dangling) next_input = get_input(next_obs) next_obs_tas = ( # Write OLD observation obs_tas[0].write(time - 1, obs[0]), # Action inputs obs_tas[1].write(time - 1, obs[1]), # Parent inputs obs_tas[2].write(time - 1, obs[2])) # Sibling inputs next_priors_ta = priors_ta.write(time - 1, prior) # Write OLD prior finished = next_finished = tf.logical_or( finished, time >= max_length) # When implementing variable length: # finished = next_finished = tf.logical_or(tf.logical_or( # finished, # Already finished # next_dangling == 0), # Currently, this will be 0 not just the first time, but also at max_length # time >= max_length) next_lengths = tf.where( finished, # Ever finished lengths, tf.tile(tf.expand_dims(time + 1, 0), [self.batch_size])) next_loop_state = (next_actions_ta, next_obs_tas, next_priors_ta, next_obs, next_prior, next_dangling, next_lengths, next_finished) return (finished, next_input, next_cell_state, emit_output, next_loop_state) # Returns RNN emit outputs TensorArray (i.e. logits), final cell state, and final loop state with tf.variable_scope('policy'): _, _, loop_state = tf.nn.raw_rnn(cell=cell, loop_fn=loop_fn) actions_ta, obs_tas, priors_ta, _, _, _, _, _ = loop_state self.actions = tf.transpose(actions_ta.stack(), perm=[1, 0]) # (?, max_length) self.obs = [tf.transpose(obs_ta.stack(), perm=[1, 0]) for obs_ta in obs_tas] # [(?, max_length)] * 3 self.priors = tf.transpose(priors_ta.stack(), perm=[1, 0, 2]) # (?, max_length, n_choices) # Generates dictionary containing placeholders needed for a batch of sequences def make_batch_ph(name): with tf.name_scope(name): batch_ph = { "actions" : tf.placeholder(tf.int32, [None, max_length]), "obs" : (tf.placeholder(tf.int32, [None, max_length]), tf.placeholder(tf.int32, [None, max_length]), tf.placeholder(tf.int32, [None, max_length])), "priors" : tf.placeholder(tf.float32, [None, max_length, n_choices]), "lengths" : tf.placeholder(tf.int32, [None,]), "rewards" : tf.placeholder(tf.float32, [None], name="r") } batch_ph = Batch(**batch_ph) return batch_ph def safe_cross_entropy(p, logq, axis=-1): safe_logq = tf.where(tf.equal(p, 0.), tf.ones_like(logq), logq) return - tf.reduce_sum(p * safe_logq, axis) # Generates tensor for neglogp of a given batch def make_neglogp_and_entropy(B): with tf.variable_scope('policy', reuse=True): logits, _ = tf.nn.dynamic_rnn(cell=cell, inputs=get_input(B.obs), sequence_length=B.lengths, # Backpropagates only through sequence length dtype=tf.float32) logits += B.priors probs = tf.nn.softmax(logits) logprobs = tf.nn.log_softmax(logits) # Generate mask from sequence lengths # NOTE: Using this mask for neglogp and entropy actually does NOT # affect training because gradients are zero outside the lengths. # However, the mask makes tensorflow summaries accurate. mask = tf.sequence_mask(B.lengths, maxlen=max_length, dtype=tf.float32) # Negative log probabilities of sequences actions_one_hot = tf.one_hot(B.actions, depth=n_choices, axis=-1, dtype=tf.float32) neglogp_per_step = safe_cross_entropy(actions_one_hot, logprobs, axis=2) # Sum over action dim neglogp = tf.reduce_sum(neglogp_per_step * mask, axis=1) # Sum over time dim # NOTE 1: The above implementation is the same as the one below: # neglogp_per_step = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=actions) # neglogp = tf.reduce_sum(neglogp_per_step, axis=1) # Sum over time # NOTE 2: The above implementation is also the same as the one below, with a few caveats: # Exactly equivalent when removing priors. # Equivalent up to precision when including clipped prior. # Crashes when prior is not clipped due to multiplying zero by -inf. # neglogp_per_step = -tf.nn.log_softmax(logits + tf.clip_by_value(priors, -2.4e38, 0)) * actions_one_hot # neglogp_per_step = tf.reduce_sum(neglogp_per_step, axis=2) # neglogp = tf.reduce_sum(neglogp_per_step, axis=1) # Sum over time entropy_per_step = safe_cross_entropy(probs, logprobs, axis=2) # Sum over action dim -> (batch_size, max_length) entropy = tf.reduce_sum(entropy_per_step * mask, axis=1) # Sum over time dim -> (batch_size, ) return neglogp, entropy # On policy batch self.sampled_batch_ph = make_batch_ph("sampled_batch") # Memory batch self.memory_batch_ph = make_batch_ph("memory_batch") memory_neglogp, _ = make_neglogp_and_entropy(self.memory_batch_ph) self.memory_probs = tf.exp(-memory_neglogp) self.memory_logps = -memory_neglogp # PQT batch if pqt: self.pqt_batch_ph = make_batch_ph("pqt_batch") # Setup losses with tf.name_scope("losses"): neglogp, entropy = make_neglogp_and_entropy(self.sampled_batch_ph) r = self.sampled_batch_ph.rewards # Entropy loss entropy_loss = -self.entropy_weight * tf.reduce_mean(entropy, name="entropy_loss") loss = entropy_loss # PPO loss if ppo: assert not pqt, "PPO is not compatible with PQT" self.old_neglogp_ph = tf.placeholder(dtype=tf.float32, shape=(None,), name="old_neglogp") ratio = tf.exp(self.old_neglogp_ph - neglogp) clipped_ratio = tf.clip_by_value(ratio, 1. - ppo_clip_ratio, 1. + ppo_clip_ratio) ppo_loss = -tf.reduce_mean(tf.minimum(ratio * (r - self.baseline), clipped_ratio * (r - self.baseline))) loss += ppo_loss # Define PPO diagnostics clipped = tf.logical_or(ratio < (1. - ppo_clip_ratio), ratio > 1. + ppo_clip_ratio) self.clip_fraction = tf.reduce_mean(tf.cast(clipped, tf.float32)) self.sample_kl = tf.reduce_mean(neglogp - self.old_neglogp_ph) # Policy gradient loss else: if not pqt or (pqt and pqt_use_pg): pg_loss = tf.reduce_mean((r - self.baseline) * neglogp, name="pg_loss") loss += pg_loss # Priority queue training loss if pqt: pqt_neglogp, _ = make_neglogp_and_entropy(self.pqt_batch_ph) pqt_loss = pqt_weight * tf.reduce_mean(pqt_neglogp, name="pqt_loss") loss += pqt_loss self.loss = loss def make_optimizer(name, learning_rate): if name == "adam": return tf.train.AdamOptimizer(learning_rate=learning_rate) if name == "rmsprop": return tf.train.RMSPropOptimizer(learning_rate=learning_rate, decay=0.99) if name == "sgd": return tf.train.GradientDescentOptimizer(learning_rate=learning_rate) raise ValueError("Did not recognize optimizer '{}'".format(name)) # Create training op optimizer = make_optimizer(name=optimizer, learning_rate=learning_rate) with tf.name_scope("train"): self.grads_and_vars = optimizer.compute_gradients(self.loss) self.train_op = optimizer.apply_gradients(self.grads_and_vars) # The two lines above are equivalent to: # self.train_op = optimizer.minimize(self.loss) with tf.name_scope("grad_norm"): self.grads, _ = list(zip(*self.grads_and_vars)) self.norms = tf.global_norm(self.grads) if debug >= 1: total_parameters = 0 print("") for variable in tf.trainable_variables(): shape = variable.get_shape() n_parameters = np.product(shape) total_parameters += n_parameters print("Variable: ", variable.name) print(" Shape: ", shape) print(" Parameters:", n_parameters) print("Total parameters:", total_parameters) # Create summaries with tf.name_scope("summary"): if self.summary: if ppo: tf.summary.scalar("ppo_loss", ppo_loss) else: if not pqt or (pqt and pqt_use_pg): tf.summary.scalar("pg_loss", pg_loss) if pqt: tf.summary.scalar("pqt_loss", pqt_loss) tf.summary.scalar("entropy_loss", entropy_loss) tf.summary.scalar("total_loss", self.loss) tf.summary.scalar("reward", tf.reduce_mean(r)) tf.summary.scalar("baseline", self.baseline) tf.summary.histogram("reward", r) tf.summary.histogram("length", self.sampled_batch_ph.lengths) for g, v in self.grads_and_vars: tf.summary.histogram(v.name, v) tf.summary.scalar(v.name + '_norm', tf.norm(v)) tf.summary.histogram(v.name + '_grad', g) tf.summary.scalar(v.name + '_grad_norm', tf.norm(g)) tf.summary.scalar('gradient norm', self.norms) self.summaries = tf.summary.merge_all()
b4 = tf.Variable(tf.zeros([10])) lr = 1e-3 for epoch in range(10): # iterate db for 10 for step, (x, y) in enumerate(train_db): # for every batch # x:[128, 28, 28] # y: [128] # [b, 28, 28] => [b, 28*28] x = tf.reshape(x, [-1, 28 * 28]) with tf.GradientTape() as tape: # tf.Variable # x: [b, 28*28] # 隐藏层1前向计算,[b, 28*28] => [b, 256] h1 = x @ w1 + tf.broadcast_to(b1, [x.shape[0], 256]) h1 = tf.nn.relu(h1) # 隐藏层2前向计算,[b, 256] => [b, 128] h2 = h1 @ w2 + b2 h2 = tf.nn.relu(h2) # 隐藏层3前向计算,[b, 128] => [b, 64] h3 = h2 @ w3 + b3 h3 = tf.nn.relu(h3) # 输出层前向计算,[b, 64] => [b, 10] h4 = h3 @ w4 + b4 out = h4 # compute loss # out: [b, 10] # y: [b] => [b, 10] y_onehot = tf.one_hot(y, depth=10)
def call(self, x, training=None): x_shape = tf.shape(x) N, H, W, C = x_shape[0], x_shape[1], x_shape[2], x_shape[3] block = self.block frozen = self.freeze and (self.counter > self.freeze_iter) if training and self.track_running_stats: counter = self.counter + 1 counter = counter % (self.freeze_iter * 10) self.counter.assign(counter) if training and (not frozen): # 1. im2col: N x cols x pixels -> N*pixles x cols if self.kernel_size[0] > 1: # [N, L, L, C * K^2] X = tf.image.extract_patches(tf.cast(x, tf.float32), sizes=[1] + list(self.kernel_size) + [1], strides=[1, self.sampling_stride, self.sampling_stride, 1], rates=[1, self.dilation_rate[0], self.dilation_rate[1], 1], padding=str(self.padding).upper()) X = tf.cast(X, x.dtype) X = tf.reshape(X, [N, -1, C * self.kernel_size[0] * self.kernel_size[1]]) # [N, L^2, C * K^2] else: # channel wise ([N, H, W, C] -> [N * H * W, C] -> [N * H / S * W / S, C] X = tf.reshape(x, [-1, C])[::self.sampling_stride ** 2, :] if self.groups == 1: # (C//B*N*pixels,k*k*B) X = tf.reshape(X, [-1, self.num_features, C // block]) X = tf.transpose(X, [0, 2, 1]) X = tf.reshape(X, [-1, self.num_features]) else: X_shape_ = tf.shape(X) X = tf.reshape(X, [-1, X_shape_[-1]]) # [N, L^2, C * K^2] -> [N * L^2, C * K^2] # 2. subtract mean # X = tf.cast(X, tf.float32) X_mean = tf.reduce_mean(X, axis=0) X = X - tf.expand_dims(X_mean, axis=0) # 3. calculate COV, COV^(-0.5), then deconv if self.groups == 1: scale = tf.cast(tf.shape(X)[0], X.dtype) Id = tf.eye(X.shape[1], dtype=X.dtype) # addmm op Cov = self.eps * Id + (1. / scale) * tf.matmul(tf.transpose(X), X) deconv = isqrt_newton_schulz_autograd(Cov, self.n_iter) else: X = tf.cast(X, tf.float32) X = tf.reshape(X, [-1, self.groups, self.num_features]) X = tf.transpose(X, [1, 0, 2]) # [groups, -1, num_features] Id = tf.eye(self.num_features, dtype=X.dtype) Id = tf.broadcast_to(Id, [self.groups, self.num_features, self.num_features]) scale = tf.cast(tf.shape(X)[1], X.dtype) Cov = self.eps * Id + (1. / scale) * tf.matmul(tf.transpose(X, [0, 2, 1]), X) deconv = isqrt_newton_schulz_autograd_batch(Cov, self.n_iter) deconv = tf.cast(deconv, x.dtype) if self.track_running_stats: running_mean = tf.cast(self.momentum * X_mean, self.running_mean.dtype) + (1. - self.momentum) * self.running_mean running_deconv = tf.cast(self.momentum * deconv, self.running_deconv.dtype) + (1. - self.momentum) * self.running_deconv # track stats for evaluation self.running_mean.assign(running_mean) self.running_deconv.assign(running_deconv) else: X_mean = tf.cast(self.running_mean, x.dtype) deconv = tf.cast(self.running_deconv, x.dtype) # 4. X * deconv * conv = X * (deconv * conv) if self.groups == 1: w = tf.reshape(self.kernel, [C // block, self.num_features, -1]) w = tf.transpose(w, [0, 2, 1]) w = tf.reshape(w, [-1, self.num_features]) w = tf.matmul(w, tf.cast(deconv, w.dtype)) if self.use_bias: b_dash = tf.matmul(w, tf.cast(tf.expand_dims(X_mean, axis=-1), dtype=w.dtype)) b_dash = tf.reshape(b_dash, [self.filters, -1]) b_dash = tf.reduce_sum(b_dash, axis=1) b = self.bias - b_dash else: b = 0. w = tf.reshape(w, [C // block, -1, self.num_features]) w = tf.transpose(w, [0, 2, 1]) else: w = tf.reshape(self.kernel, [C // block, -1, self.num_features]) w = tf.matmul(w, tf.cast(deconv, w.dtype)) if self.use_bias: b_dash = tf.matmul(w, tf.cast(tf.reshape(X_mean, [-1, self.num_features, 1]), dtype=w.dtype)) b_dash = tf.reshape(b_dash, self.bias.shape) b = self.bias - b_dash else: b = 0. w = tf.reshape(w, self.kernel.shape) x = tf.nn.conv2d(x, w, self.strides, str(self.padding).upper(), dilations=self.dilation_rate) if self.use_bias: x = tf.nn.bias_add(x, b, data_format="NHWC") if self.activation is not None: return self.activation(x) else: return x
def uadd(a, *b): """Workaround to support + with uint32 (not supported in TF).""" # Note: Tensorflow's add_n doesn't support broadcasting. b = [tf.broadcast_to(b, tf.shape(a)) for b in b] return tf.add_n([a] + b)
def matrix_from_intrinsics( focal: type_alias.TensorLike, principal_point: type_alias.TensorLike, skew: type_alias.TensorLike = (0.0,), name: str = "perspective_matrix_from_intrinsics") -> tf.Tensor: r"""Builds calibration matrix from intrinsic parameters. Builds the camera calibration matrix as $$ \mathbf{C} = \begin{bmatrix} f_x & sc & c_x \\ 0 & f_y & c_y \\ 0 & 0 & 1 \\ \end{bmatrix} $$ from the focal length \\((f_x, f_y)\\) and the principal point \\((c_x, c_y)\\). Note: In the following, A1 to An are optional batch dimensions. Args: focal: A tensor of shape `[A1, ..., An, 2]`, where the last dimension represents a camera focal length. principal_point: A tensor of shape `[A1, ..., An, 2]`, where the last dimension represents a camera principal point. skew: A tensor of shape `[A1, ..., An, 1]`, where the last dimension represents a skew coefficient. name: A name for this op that defaults to "perspective_matrix_from_intrinsics". Returns: A tensor of shape `[A1, ..., An, 3, 3]`, where the last two dimensions represent a camera calibration matrix. Raises: ValueError: If the shape of `focal`, or `principal_point` is not supported. """ with tf.name_scope(name): focal = tf.convert_to_tensor(value=focal) principal_point = tf.convert_to_tensor(value=principal_point) skew = tf.convert_to_tensor(value=skew) common_batch_shape = shape.get_broadcasted_shape(focal.shape[:-1], skew.shape[:-1]) def dim_value(dim): return 1 if dim is None else tf.compat.dimension_value(dim) common_batch_shape = [dim_value(dim) for dim in common_batch_shape] skew = tf.broadcast_to(skew, common_batch_shape + [1]) shape.check_static( tensor=focal, tensor_name="focal", has_dim_equals=(-1, 2)) shape.check_static( tensor=principal_point, tensor_name="principal_point", has_dim_equals=(-1, 2)) shape.check_static( tensor=skew, tensor_name="skew", has_dim_equals=(-1, 1), ) shape.compare_batch_dimensions( tensors=(focal, principal_point, skew), tensor_names=("focal", "principal_point", "skew"), last_axes=-2, broadcast_compatible=False) fx, fy = tf.unstack(focal, axis=-1) cx, cy = tf.unstack(principal_point, axis=-1) zero = tf.zeros_like(fx) one = tf.ones_like(fx) skew = tf.reshape(skew, tf.shape(fx)) matrix = tf.stack((fx, skew, cx, zero, fy, cy, zero, zero, one), axis=-1) # pyformat: disable matrix_shape = tf.shape(input=matrix) output_shape = tf.concat((matrix_shape[:-1], (3, 3)), axis=-1) return tf.reshape(matrix, shape=output_shape)
def test_feature_steered_convolution_only_self_edges( self, batch_size, num_vertices, in_channels, out_channels, num_weight_matrices): """Test convolution when the graph only has self edges.""" data, neighbors = _random_data(batch_size, num_vertices, in_channels, padding=False, only_self_edges=True) u, v, c, w, b = _random_variables(in_channels, out_channels, num_weight_matrices) with self.subTest(name="w=0_expect_output=b"): y = gc.feature_steered_convolution(data=data, neighbors=neighbors, sizes=None, var_u=u, var_v=v, var_c=c, var_w=tf.zeros_like(w), var_b=b) y_expected = tf.broadcast_to(b, y.shape) self.assertAllEqual(y, y_expected) with self.subTest(name="translation_invariant_self_edges"): y = gc.feature_steered_convolution(data=data, neighbors=neighbors, sizes=None, var_u=u, var_v=-u, var_c=c, var_w=w, var_b=b) q = tf.reshape( tf.exp(c) / tf.reduce_sum(input_tensor=tf.exp(c)), (num_weight_matrices, 1, 1)) if batch_size > 0: q_times_w = tf.reduce_sum(input_tensor=q * w, axis=0, keepdims=True) q_times_w = tf.tile(q_times_w, (batch_size, 1, 1)) else: q_times_w = tf.reduce_sum(input_tensor=q * w, axis=0) y_expected = tf.matmul(data, q_times_w) + tf.broadcast_to( b, y.shape) self.assertAllClose(y, y_expected) with self.subTest(name="constant_signal"): if batch_size > 0: constant_data = np.tile( np.random.uniform(size=(batch_size, 1, in_channels)).astype(np.float32), (1, num_vertices, 1)) else: constant_data = np.tile( np.random.uniform(size=(1, in_channels)).astype(np.float32), (num_vertices, 1)) y = gc.feature_steered_convolution(data=constant_data, neighbors=neighbors, sizes=None, var_u=u, var_v=v, var_c=c, var_w=w, var_b=b) if batch_size > 0: y_expected = tf.tile(y[:, :1, :], (1, num_vertices, 1)) else: y_expected = tf.tile(y[:1, :], (num_vertices, 1)) self.assertAllClose(y, y_expected)
def _setup_action_selection(self, state_ph): """ Computes the best action from the current state by using randomly sampled action sequences to predict future states, evaluating these predictions according to a cost function, selecting the action sequence with the lowest cost, and returning the first action in that sequence returns: best_action: the action that minimizes the cost function (tensor with shape [self._action_dim]) implementation details (in order): (a) We will assume state_ph has a batch size of 1 whenever action selection is performed (b) Randomly sample uniformly self._num_random_action_selection number of action sequences, each of length self._horizon (c) Starting from the input state, unroll each action sequence using your neural network dynamics model (d) While unrolling the action sequences, keep track of the cost of each action sequence using self._cost_fn (e) Find the action sequence with the lowest cost, and return the first action in that sequence Hints: (i) self._cost_fn takes three arguments: states, actions, and next states. These arguments are 2-dimensional tensors, where the 1st dimension is the batch size and the 2nd dimension is the state or action size (ii) You should call self._dynamics_func and self._cost_fn a total of self._horizon times (iii) Use tf.random_uniform(...) to generate the random action sequences """ ### PROBLEM 2 ### YOUR CODE HERE """ action_dic = {} for _ in range(self._num_random_action_selection): # Test a random action sequence # Record the first action first_a = tf.random_uniform([1,self._action_dim], minval= self._init_dataset.action_mean-self._init_dataset.action_std, maxval= self._init_dataset.action_mean+self._init_dataset.action_std, dtype=tf.float32) s = self._dynamics_func(state_ph, first_a, reuse=True) t_cost = self._cost_fn(state_ph, first_a, s) # The rest of the sequence for __ in range(self._horizon - 1): ra = tf.random_uniform([1,self._action_dim], minval= self._init_dataset.action_mean-self._init_dataset.action_std, maxval= self._init_dataset.action_mean+self._init_dataset.action_std, dtype=tf.float32) pred_s = self._dynamics_func(s, ra, reuse=True) t_cost = t_cost + self._cost_fn(s, ra, pred_s) # if there is two identical random actions, choose the one with smaller cost; else, store the value if first_a in action_dic: action_dic[first_a] = min(action_dic[first_a], t_cost) else: action_dic[first_a] = t_cost # Choose the action with minimum cost best_action = min(action_dic, key=action_dic.get) """ ### First first_s = tf.broadcast_to( state_ph, [self._num_random_action_selection, self._state_dim]) first_a = tf.random_uniform( [self._num_random_action_selection, self._action_dim], minval=self._action_space_low, maxval=self._action_space_high, dtype=tf.float32) pred_s = self._dynamics_func(first_s, first_a, reuse=True) t_cost = self._cost_fn(first_s, first_a, pred_s) for _ in range(self._horizon - 1): s = pred_s ra = tf.random_uniform( [self._num_random_action_selection, self._action_dim], minval=self._action_space_low, maxval=self._action_space_high, dtype=tf.float32) pred_s = self._dynamics_func(s, ra, reuse=True) t_cost = t_cost + self._cost_fn(s, ra, pred_s) best_action = first_a[tf.argmin(t_cost), :] return best_action
def render_rays(ray_batch, network_fn, network_query_fn, N_samples, retraw=False, lindisp=False, perturb=0., N_importance=0, network_fine=None, white_bkgd=False, raw_noise_std=0., verbose=False): """Volumetric rendering. Args: ray_batch: array of shape [batch_size, ...]. All information necessary for sampling along a ray, including: ray origin, ray direction, min dist, max dist, and unit-magnitude viewing direction. network_fn: function. Model for predicting RGB and density at each point in space. network_query_fn: function used for passing queries to network_fn. N_samples: int. Number of different times to sample along each ray. retraw: bool. If True, include model's raw, unprocessed predictions. lindisp: bool. If True, sample linearly in inverse depth rather than in depth. perturb: float, 0 or 1. If non-zero, each ray is sampled at stratified random points in time. N_importance: int. Number of additional times to sample along each ray. These samples are only passed to network_fine. network_fine: "fine" network with same spec as network_fn. white_bkgd: bool. If True, assume a white background. raw_noise_std: ... verbose: bool. If True, print more debugging info. Returns: rgb_map: [num_rays, 3]. Estimated RGB color of a ray. Comes from fine model. disp_map: [num_rays]. Disparity map. 1 / depth. acc_map: [num_rays]. Accumulated opacity along each ray. Comes from fine model. raw: [num_rays, num_samples, 4]. Raw predictions from model. rgb0: See rgb_map. Output for coarse model. disp0: See disp_map. Output for coarse model. acc0: See acc_map. Output for coarse model. z_std: [num_rays]. Standard deviation of distances along ray for each sample. """ def raw2outputs(raw, z_vals, rays_d): """Transforms model's predictions to semantically meaningful values. Args: raw: [num_rays, num_samples along ray, 4]. Prediction from model. z_vals: [num_rays, num_samples along ray]. Integration time. rays_d: [num_rays, 3]. Direction of each ray. Returns: rgb_map: [num_rays, 3]. Estimated RGB color of a ray. disp_map: [num_rays]. Disparity map. Inverse of depth map. acc_map: [num_rays]. Sum of weights along each ray. weights: [num_rays, num_samples]. Weights assigned to each sampled color. depth_map: [num_rays]. Estimated distance to object. """ # Function for computing density from model prediction. This value is # strictly between [0, 1]. def raw2alpha(raw, dists, act_fn=tf.nn.relu): return 1.0 - \ tf.exp(-act_fn(raw) * dists) # Compute 'distance' (in time) between each integration time along a ray. dists = z_vals[..., 1:] - z_vals[..., :-1] # The 'distance' from the last integration time is infinity. dists = tf.concat( [dists, tf.broadcast_to([1e10], dists[..., :1].shape)], axis=-1) # [N_rays, N_samples] # Multiply each distance by the norm of its corresponding direction ray # to convert to real world distance (accounts for non-unit directions). dists = dists * tf.linalg.norm(rays_d[..., None, :], axis=-1) # Extract RGB of each sample position along each ray. rgb = tf.math.sigmoid(raw[..., :3]) # [N_rays, N_samples, 3] # Add noise to model's predictions for density. Can be used to # regularize network during training (prevents floater artifacts). noise = 0. if raw_noise_std > 0.: noise = tf.random.normal(raw[..., 3].shape) * raw_noise_std # Predict density of each sample along each ray. Higher values imply # higher likelihood of being absorbed at this point. alpha = raw2alpha(raw[..., 3] + noise, dists) # [N_rays, N_samples] # Compute weight for RGB of each sample along each ray. A cumprod() is # used to express the idea of the ray not having reflected up to this # sample yet. # [N_rays, N_samples] weights = alpha * \ tf.math.cumprod(1.-alpha + 1e-10, axis=-1, exclusive=True) # Computed weighted color of each sample along each ray. rgb_map = tf.reduce_sum(weights[..., None] * rgb, axis=-2) # [N_rays, 3] # Estimated depth map is expected distance. depth_map = tf.reduce_sum(weights * z_vals, axis=-1) # Disparity map is inverse depth. disp_map = 1. / tf.maximum(1e-10, depth_map / tf.reduce_sum(weights, axis=-1)) # Sum of weights along each ray. This value is in [0, 1] up to numerical error. acc_map = tf.reduce_sum(weights, -1) # To composite onto a white background, use the accumulated alpha map. if white_bkgd: rgb_map = rgb_map + (1. - acc_map[..., None]) return rgb_map, disp_map, acc_map, weights, depth_map ############################### # batch size N_rays = ray_batch.shape[0] # Extract ray origin, direction. rays_o, rays_d = ray_batch[:, 0:3], ray_batch[:, 3:6] # [N_rays, 3] each # Extract unit-normalized viewing direction. viewdirs = ray_batch[:, -3:] if ray_batch.shape[-1] > 8 else None # Extract lower, upper bound for ray distance. bounds = tf.reshape(ray_batch[..., 6:8], [-1, 1, 2]) near, far = bounds[..., 0], bounds[..., 1] # [-1,1] # Decide where to sample along each ray. Under the logic, all rays will be sampled at # the same times. t_vals = tf.linspace(0., 1., N_samples) if not lindisp: # Space integration times linearly between 'near' and 'far'. Same # integration points will be used for all rays. z_vals = near * (1. - t_vals) + far * (t_vals) else: # Sample linearly in inverse depth (disparity). z_vals = 1. / (1. / near * (1. - t_vals) + 1. / far * (t_vals)) z_vals = tf.broadcast_to(z_vals, [N_rays, N_samples]) # Perturb sampling time along each ray. if perturb > 0.: # get intervals between samples mids = .5 * (z_vals[..., 1:] + z_vals[..., :-1]) upper = tf.concat([mids, z_vals[..., -1:]], -1) lower = tf.concat([z_vals[..., :1], mids], -1) # stratified samples in those intervals t_rand = tf.random.uniform(z_vals.shape) z_vals = lower + (upper - lower) * t_rand # Points in space to evaluate model at. pts = rays_o[..., None, :] + rays_d[..., None, :] * \ z_vals[..., :, None] # [N_rays, N_samples, 3] # Evaluate model at each point. raw = network_query_fn(pts, viewdirs, network_fn) # [N_rays, N_samples, 4] rgb_map, disp_map, acc_map, weights, depth_map = raw2outputs( raw, z_vals, rays_d) if N_importance > 0: rgb_map_0, disp_map_0, acc_map_0 = rgb_map, disp_map, acc_map # Obtain additional integration times to evaluate based on the weights # assigned to colors in the coarse model. z_vals_mid = .5 * (z_vals[..., 1:] + z_vals[..., :-1]) z_samples = sample_pdf(z_vals_mid, weights[..., 1:-1], N_importance, det=(perturb == 0.)) z_samples = tf.stop_gradient(z_samples) # Obtain all points to evaluate color, density at. z_vals = tf.sort(tf.concat([z_vals, z_samples], -1), -1) pts = rays_o[..., None, :] + rays_d[..., None, :] * \ z_vals[..., :, None] # [N_rays, N_samples + N_importance, 3] # Make predictions with network_fine. run_fn = network_fn if network_fine is None else network_fine raw = network_query_fn(pts, viewdirs, run_fn) rgb_map, disp_map, acc_map, weights, depth_map = raw2outputs( raw, z_vals, rays_d) ret = {'rgb_map': rgb_map, 'disp_map': disp_map, 'acc_map': acc_map} if retraw: ret['raw'] = raw if N_importance > 0: ret['rgb0'] = rgb_map_0 ret['disp0'] = disp_map_0 ret['acc0'] = acc_map_0 ret['z_std'] = tf.math.reduce_std(z_samples, -1) # [N_rays] for k in ret: tf.debugging.check_numerics(ret[k], 'output {}'.format(k)) return ret
def __call__(self, inputs, *args, **kwargs): # loc_data: (batch_size, num_priors, 4) # conf_data: (batch_size, num_priors, num_classes) loc_data, conf_data = self.model(inputs, training=False) conf_data = tf.nn.softmax(conf_data) batch_size = loc_data.shape[0] num_priors = self.priors.shape[0] # output = tf.zeros(shape=(batch_size, self.num_classes, self.top_k, 5)) conf_preds = tf.transpose( a=conf_data, perm=[0, 2, 1]) # (batch_size, num_classes, num_priors) # 解码 output = list() for i in range(batch_size): decoded_boxes = InferenceProcedure._decode( loc_data[i], self.priors, self.variance) # (num_priors, 4) (xmin, ymin, xmax, ymax)格式 conf_scores = conf_preds[i] # (num_classes, num_priors) t1 = list() t1.append(tf.zeros(shape=(self.top_k, 6))) for cl in range(1, self.num_classes): # shape: (num_priors,) dtype: bool c_mask = tf.math.greater(conf_scores[cl], self.conf_thresh) scores = tf.boolean_mask(conf_scores[cl], c_mask) if scores.shape[0] == 0: continue # shape: (num_priors, 1) dtype: bool l_mask = tf.broadcast_to(tf.expand_dims(c_mask, axis=1), shape=decoded_boxes.shape) # shape: (num_boxes, 4) boxes = tf.reshape(tf.boolean_mask(decoded_boxes, l_mask), shape=(-1, 4)) selected_indices = tf.image.non_max_suppression( boxes=boxes, scores=scores, max_output_size=self.top_k, iou_threshold=self.nms_thresh) selected_boxes = tf.gather(params=boxes, indices=selected_indices) num_boxes = selected_boxes.shape[0] selected_boxes = tf.pad(tensor=selected_boxes, paddings=[[0, self.top_k - num_boxes], [0, 0]]) # (self.top_k, 4) selected_scores = tf.expand_dims(tf.gather( params=scores, indices=selected_indices), axis=1) selected_scores = tf.pad(tensor=selected_scores, paddings=[[0, self.top_k - num_boxes], [0, 0]]) # (self.top_k, 1) selected_classes = tf.fill(dims=[self.top_k, 1], value=cl) selected_classes = tf.cast(selected_classes, dtype=tf.float32) # (self.top_k, 6(conf, xmin, ymin, xmax, ymax, class_idx)) targets = tf.concat( values=[selected_scores, selected_boxes, selected_classes], axis=1) t1.append(targets) t2 = tf.stack(values=t1, axis=0) output.append(t2) # (batch_size, C, self.top_k, 6) <dtype: 'float32'> output = tf.stack(values=output, axis=0) flt = tf.reshape( output, shape=(batch_size, -1, 6)) # (batch_size, self.num_classes * self.top_k, 6) idx = tf.argsort(values=flt[:, :, 0], axis=1, direction="DESCENDING" ) # (batch_size, self.num_classes * self.top_k,) rank = tf.argsort(values=idx, axis=1, direction="ASCENDING" ) # (batch_size, self.num_classes * self.top_k,) mask = rank < self.top_k mask = tf.expand_dims(mask, axis=-1) mask = tf.broadcast_to(mask, shape=flt.shape) flt = tf.where(condition=mask, x=0, y=flt) return tf.reshape(flt, shape=(batch_size, -1, self.top_k, 6))
def _queue_push(queue, should_update, new_vecs): """Conditionally push new vectors into a batch of first-in-first-out queues. The `queue` of shape `[k, ..., n]` can be thought of as a batch of queues, each holding `k` n-D vectors; while `new_vecs` of shape `[..., n]` is a fresh new batch of n-D vectors. The `should_update` batch of Boolean scalars, i.e. shape `[...]`, indicates batch members whose corresponding n-D vector in `new_vecs` should be added at the back of its queue, pushing out the corresponding n-D vector from the front. Batch members in `new_vecs` for which `should_update` is False are ignored. Note: the choice of placing `k` at the dimension 0 of the queue is constrained by the L-BFGS two-loop algorithm above. The algorithm uses tf.scan to iterate over the `k` correction pairs simulatneously across all batches, and tf.scan itself can only iterate over dimension 0. For example: ```python k, b, n = (3, 2, 5) queue = tf.reshape(tf.range(30), (k, b, n)) # => [[[ 0, 1, 2, 3, 4], # [ 5, 6, 7, 8, 9]], # # [[10, 11, 12, 13, 14], # [15, 16, 17, 18, 19]], # # [[20, 21, 22, 23, 24], # [25, 26, 27, 28, 29]]] element = tf.reshape(tf.range(30, 40), (b, n)) # => [[30, 31, 32, 33, 34], [35, 36, 37, 38, 39]] should_update = tf.constant([True, False]) # Shape: (b,) _queue_add(should_update, queue, element) # => [[[10, 11, 12, 13, 14], # [ 5, 6, 7, 8, 9]], # # [[20, 21, 22, 23, 24], # [15, 16, 17, 18, 19]], # # [[30, 31, 32, 33, 34], # [25, 26, 27, 28, 29]]] ``` Args: queue: A `tf.Tensor` of shape `[k, ..., n]`; a batch of queues each with `k` n-D vectors. should_update: A Boolean `tf.Tensor` of shape `[...]` indicating batch members where new vectors should be added to their queues. new_vecs: A `tf.Tensor` of shape `[..., n]`; a batch of n-D vectors to add at the end of their respective queues, pushing out the first element from each. Returns: A new `tf.Tensor` of shape `[k, ..., n]`. """ new_queue = tf.concat([queue[1:], [new_vecs]], axis=0) update_pattern = tf.broadcast_to( should_update[tf.newaxis, ..., tf.newaxis], distribution_util.prefer_static_shape(queue)) return tf.compat.v1.where(update_pattern, new_queue, queue)
def mask_layer(layer, mask): return tf.multiply( tf.broadcast_to(tf.expand_dims(mask, -1), tf.shape(layer)), layer)
def fetch_fn(idx): r""" Documentation of tensorflow coding style in this function: tf1.py_func defines a python function (the getters of the InputData object slots) as a tensorflow operation. Here, the shape of the tensor is lost and has to be set with set_shape. For size factors, we use explicit broadcasting as explained below. """ # Catch dimension collapse error if idx is only one element long, ie. 0D: if len(idx.shape) == 0: idx = tf.expand_dims(idx, axis=0) if isinstance(input_data.x, scipy.sparse.csr_matrix): X_tensor_idx, X_tensor_val, X_shape = tf.py_function( func=input_data.fetch_x_sparse, inp=[idx], Tout=[np.int64, np.float64, np.int64]) # Note on Tout: np.float64 for val seems to be required to avoid crashing v1.12. X_tensor_idx = tf.cast(X_tensor_idx, dtype=tf.int64) X_shape = tf.cast(X_shape, dtype=tf.int64) X_tensor_val = tf.cast(X_tensor_val, dtype=dtype) X_tensor = (X_tensor_idx, X_tensor_val, X_shape) else: X_tensor = tf.py_function(func=input_data.fetch_x_dense, inp=[idx], Tout=input_data.x.dtype) X_tensor.set_shape(idx.get_shape().as_list() + [input_data.num_features]) X_tensor = (tf.cast(X_tensor, dtype=dtype), ) design_loc_tensor = tf.py_function( func=input_data.fetch_design_loc, inp=[idx], Tout=input_data.design_loc.dtype) design_loc_tensor.set_shape(idx.get_shape().as_list() + [input_data.num_design_loc_params]) design_loc_tensor = tf.cast(design_loc_tensor, dtype=dtype) design_scale_tensor = tf.py_function( func=input_data.fetch_design_scale, inp=[idx], Tout=input_data.design_scale.dtype) design_scale_tensor.set_shape(idx.get_shape().as_list() + [input_data.num_design_scale_params]) design_scale_tensor = tf.cast(design_scale_tensor, dtype=dtype) if input_data.size_factors is not None and noise_model in [ "nb", "norm" ]: size_factors_tensor = tf.py_function( func=input_data.fetch_size_factors, inp=[idx], Tout=input_data.size_factors.dtype) size_factors_tensor.set_shape(idx.get_shape()) size_factors_tensor = tf.expand_dims(size_factors_tensor, axis=-1) size_factors_tensor = tf.cast(size_factors_tensor, dtype=dtype) else: size_factors_tensor = tf.constant(1, shape=[1, 1], dtype=dtype) size_factors_tensor = tf.broadcast_to( size_factors_tensor, shape=[tf.size(idx), input_data.num_features]) # return idx, data return idx, (X_tensor, design_loc_tensor, design_scale_tensor, size_factors_tensor)
def render_rays( ray_batch, network_fn, network_query_fn, N_samples, # embed_fn=tf.identity, retraw=False, lindisp=False, perturb=0., N_importance=0, network_fine=None, # embeddirs_fn=None, white_bkgd=False, raw_noise_std=0., # netchunk=1024*64, verbose=False): # def batchify(fn, chunk=netchunk): # if chunk is None: # return fn # def ret(inputs): # return tf.concat([fn(inputs[i:i+chunk]) for i in range(0, inputs.shape[0], chunk)], 0) # return ret # def run_network(inputs, N_vdirs=None, fn=network_fn): # inputs_flat = tf.reshape(inputs, [-1, inputs.shape[-1]]) # embedded = embed_fn(inputs_flat) # if embeddirs_fn is not None: # viewdirs = ray_batch[:,-3:] # input_dirs = tf.broadcast_to(viewdirs[:,None], inputs.shape) # input_dirs_flat = tf.reshape(input_dirs, [-1, input_dirs.shape[-1]]) # embedded_dirs = embeddirs_fn(input_dirs_flat) # embedded = tf.concat([embedded, embedded_dirs], -1) # if verbose: print('embedded range', embedded.numpy().min(), embedded.numpy().max(), embedded.numpy().mean()) # outputs_flat = batchify(fn)(embedded) # outputs = tf.reshape(outputs_flat, list(inputs.shape[:-1]) + [outputs_flat.shape[-1]]) # return outputs def raw2outputs(raw, z_vals, rays_d): raw2alpha = lambda raw, dists, act_fn=tf.nn.relu: 1. - tf.exp(-act_fn( raw) * dists) dists = z_vals[..., 1:] - z_vals[..., :-1] dists = tf.concat( [dists, tf.broadcast_to([1e10], dists[..., :1].shape)], -1) # [N_rays, N_samples] dists = dists * tf.linalg.norm(rays_d[..., None, :], axis=-1) rgb = tf.math.sigmoid(raw[..., :3]) # [N_rays, N_samples, 3] noise = 0. if raw_noise_std > 0.: noise = tf.random.normal(raw[..., 3].shape) * raw_noise_std alpha = raw2alpha(raw[..., 3] + noise, dists) # [N_rays, N_samples] weights = alpha * tf.math.cumprod( 1. - alpha + 1e-10, -1, exclusive=True) rgb_map = tf.reduce_sum(weights[..., None] * rgb, -2) # [N_rays, 3] depth_map = tf.reduce_sum(weights * z_vals, -1) disp_map = 1. / tf.maximum(1e-10, depth_map / tf.reduce_sum(weights, -1)) acc_map = tf.reduce_sum(weights, -1) if white_bkgd: rgb_map = rgb_map + (1. - acc_map[..., None]) return rgb_map, disp_map, acc_map, weights, depth_map ############################### N_rays = ray_batch.shape[0] rays_o, rays_d = ray_batch[:, 0:3], ray_batch[:, 3:6] # [N_rays, 3] each viewdirs = ray_batch[:, -3:] if ray_batch.shape[-1] > 8 else None bounds = tf.reshape(ray_batch[..., 6:8], [-1, 1, 2]) near, far = bounds[..., 0], bounds[..., 1] # [-1,1] t_vals = tf.linspace(0., 1., N_samples) if not lindisp: z_vals = near * (1. - t_vals) + far * (t_vals) else: z_vals = 1. / (1. / near * (1. - t_vals) + 1. / far * (t_vals)) z_vals = tf.broadcast_to(z_vals, [N_rays, N_samples]) if perturb > 0.: # get intervals between samples mids = .5 * (z_vals[..., 1:] + z_vals[..., :-1]) upper = tf.concat([mids, z_vals[..., -1:]], -1) lower = tf.concat([z_vals[..., :1], mids], -1) # stratified samples in those intervals t_rand = tf.random.uniform(z_vals.shape) z_vals = lower + (upper - lower) * t_rand pts = rays_o[..., None, :] + rays_d[..., None, :] * z_vals[ ..., :, None] # [N_rays, N_samples, 3] # raw = run_network(pts) raw = network_query_fn(pts, viewdirs, network_fn) rgb_map, disp_map, acc_map, weights, depth_map = raw2outputs( raw, z_vals, rays_d) if N_importance > 0: rgb_map_0, disp_map_0, acc_map_0 = rgb_map, disp_map, acc_map z_vals_mid = .5 * (z_vals[..., 1:] + z_vals[..., :-1]) z_samples = sample_pdf(z_vals_mid, weights[..., 1:-1], N_importance, det=(perturb == 0.)) z_samples = tf.stop_gradient(z_samples) z_vals = tf.sort(tf.concat([z_vals, z_samples], -1), -1) pts = rays_o[..., None, :] + rays_d[..., None, :] * z_vals[ ..., :, None] # [N_rays, N_samples + N_importance, 3] run_fn = network_fn if network_fine is None else network_fine # raw = run_network(pts, fn=run_fn) raw = network_query_fn(pts, viewdirs, run_fn) rgb_map, disp_map, acc_map, weights, depth_map = raw2outputs( raw, z_vals, rays_d) ret = {'rgb_map': rgb_map, 'disp_map': disp_map, 'acc_map': acc_map} if retraw: ret['raw'] = raw if N_importance > 0: ret['rgb0'] = rgb_map_0 ret['disp0'] = disp_map_0 ret['acc0'] = acc_map_0 ret['z_std'] = tf.math.reduce_std(z_samples, -1) # [N_rays] for k in ret: tf.debugging.check_numerics(ret[k], 'output {}'.format(k)) return ret
def brdf(direction_incoming_light, direction_outgoing_light, surface_normal, shininess, albedo, brdf_normalization=True, name=None): """Evaluates the specular brdf of the Phong model. Note: In the following, A1 to An are optional batch dimensions, which must be broadcast compatible. Note: The gradient of this function is not smooth when the dot product of the normal with any light is 0.0. Args: direction_incoming_light: A tensor of shape `[A1, ..., An, 3]`, where the last dimension represents a normalized incoming light vector. direction_outgoing_light: A tensor of shape `[A1, ..., An, 3]`, where the last dimension represents a normalized outgoing light vector. surface_normal: A tensor of shape `[A1, ..., An, 3]`, where the last dimension represents a normalized surface normal. shininess: A tensor of shape `[A1, ..., An, 1]`, where the last dimension represents a non-negative shininess coefficient. albedo: A tensor of shape `[A1, ..., An, 3]`, where the last dimension represents albedo with values in [0,1]. brdf_normalization: A `bool` indicating whether normalization should be applied to enforce the energy conservation property of BRDFs. Note that `brdf_normalization` must be set to False in order to use the original Blinn specular model. name: A name for this op. Defaults to "phong_brdf". Returns: A tensor of shape `[A1, ..., An, 3]`, where the last dimension represents the amount of light reflected in the outgoing light direction. Raises: ValueError: if the shape of `direction_incoming_light`, `direction_outgoing_light`, `surface_normal`, `shininess` or `albedo` is not supported. InvalidArgumentError: if not all of shininess values are non-negative, or if at least one element of `albedo` is outside of [0,1]. """ with tf.compat.v1.name_scope(name, "phong_brdf", [ direction_incoming_light, direction_outgoing_light, surface_normal, shininess, albedo ]): direction_incoming_light = tf.convert_to_tensor( value=direction_incoming_light) direction_outgoing_light = tf.convert_to_tensor( value=direction_outgoing_light) surface_normal = tf.convert_to_tensor(value=surface_normal) shininess = tf.convert_to_tensor(value=shininess) albedo = tf.convert_to_tensor(value=albedo) shape.check_static( tensor=direction_incoming_light, tensor_name="direction_incoming_light", has_dim_equals=(-1, 3)) shape.check_static( tensor=direction_outgoing_light, tensor_name="direction_outgoing_light", has_dim_equals=(-1, 3)) shape.check_static( tensor=surface_normal, tensor_name="surface_normal", has_dim_equals=(-1, 3)) shape.check_static( tensor=shininess, tensor_name="shininess", has_dim_equals=(-1, 1)) shape.check_static( tensor=albedo, tensor_name="albedo", has_dim_equals=(-1, 3)) shape.compare_batch_dimensions( tensors=(direction_incoming_light, direction_outgoing_light, surface_normal, shininess, albedo), tensor_names=("direction_incoming_light", "direction_outgoing_light", "surface_normal", "shininess", "albedo"), last_axes=-2, broadcast_compatible=True) direction_incoming_light = asserts.assert_normalized( direction_incoming_light) direction_outgoing_light = asserts.assert_normalized( direction_outgoing_light) surface_normal = asserts.assert_normalized(surface_normal) albedo = asserts.assert_all_in_range(albedo, 0.0, 1.0, open_bounds=False) shininess = asserts.assert_all_above(shininess, 0.0, open_bound=False) # Checks whether the incoming or outgoing light point behind the surface. dot_incoming_light_surface_normal = vector.dot(-direction_incoming_light, surface_normal) dot_outgoing_light_surface_normal = vector.dot(direction_outgoing_light, surface_normal) min_dot = tf.minimum(dot_incoming_light_surface_normal, dot_outgoing_light_surface_normal) perfect_reflection_direction = vector.reflect(direction_incoming_light, surface_normal) perfect_reflection_direction = tf.math.l2_normalize( perfect_reflection_direction, axis=-1) cos_alpha = vector.dot( perfect_reflection_direction, direction_outgoing_light, axis=-1) cos_alpha = tf.maximum(cos_alpha, tf.zeros_like(cos_alpha)) phong_model = albedo * tf.pow(cos_alpha, shininess) if brdf_normalization: phong_model *= _brdf_normalization_factor(shininess) common_shape = shape.get_broadcasted_shape(min_dot.shape, phong_model.shape) d_val = lambda dim: 1 if dim is None else tf.compat.v1.dimension_value(dim) common_shape = [d_val(dim) for dim in common_shape] condition = tf.broadcast_to(tf.greater_equal(min_dot, 0.0), common_shape) phong_model = tf.broadcast_to(phong_model, common_shape) return tf.where(condition, phong_model, tf.zeros_like(phong_model))
def G_style( latents_in, # First input: Latent vectors (Z) [minibatch, latent_size]. labels_in, # Second input: Conditioning labels [minibatch, label_size]. truncation_psi=0.7, # Style strength multiplier for the truncation trick. None = disable. truncation_cutoff=8, # Number of layers for which to apply the truncation trick. None = disable. truncation_psi_val=None, # Value for truncation_psi to use during validation. truncation_cutoff_val=None, # Value for truncation_cutoff to use during validation. dlatent_avg_beta=0.995, # Decay for tracking the moving average of W during training. None = disable. style_mixing_prob=0.9, # Probability of mixing styles during training. None = disable. is_training=False, # Network is under training? Enables and disables specific features. is_validation=False, # Network is under validation? Chooses which value to use for truncation_psi. is_template_graph=False, # True = template graph constructed by the Network class, False = actual evaluation. components=dnnlib.EasyDict( ), # Container for sub-networks. Retained between calls. **kwargs): # Arguments for sub-networks (G_mapping and G_synthesis). # Validate arguments. assert not is_training or not is_validation assert isinstance(components, dnnlib.EasyDict) if is_validation: truncation_psi = truncation_psi_val truncation_cutoff = truncation_cutoff_val if is_training or (truncation_psi is not None and not tflib.is_tf_expression(truncation_psi) and truncation_psi == 1): truncation_psi = None if is_training or (truncation_cutoff is not None and not tflib.is_tf_expression(truncation_cutoff) and truncation_cutoff <= 0): truncation_cutoff = None if not is_training or (dlatent_avg_beta is not None and not tflib.is_tf_expression(dlatent_avg_beta) and dlatent_avg_beta == 1): dlatent_avg_beta = None if not is_training or (style_mixing_prob is not None and not tflib.is_tf_expression(style_mixing_prob) and style_mixing_prob <= 0): style_mixing_prob = None # Setup components. if 'synthesis' not in components: components.synthesis = tflib.Network('G_synthesis', func_name=G_synthesis, **kwargs) num_layers = components.synthesis.input_shape[1] dlatent_size = components.synthesis.input_shape[2] if 'mapping' not in components: components.mapping = tflib.Network('G_mapping', func_name=G_mapping, dlatent_broadcast=num_layers, **kwargs) # Setup variables. lod_in = tf.get_variable('lod', initializer=np.float32(0), trainable=False) dlatent_avg = tf.get_variable('dlatent_avg', shape=[dlatent_size], initializer=tf.initializers.zeros(), trainable=False) # Evaluate mapping network. dlatents = components.mapping.get_output_for(latents_in, labels_in, **kwargs) # Update moving average of W. if dlatent_avg_beta is not None: with tf.variable_scope('DlatentAvg'): batch_avg = tf.reduce_mean(dlatents[:, 0], axis=0) update_op = tf.assign( dlatent_avg, tflib.lerp(batch_avg, dlatent_avg, dlatent_avg_beta)) with tf.control_dependencies([update_op]): dlatents = tf.identity(dlatents) # Perform style mixing regularization. if style_mixing_prob is not None: with tf.name_scope('StyleMix'): latents2 = tf.random_normal(tf.shape(latents_in)) dlatents2 = components.mapping.get_output_for( latents2, labels_in, **kwargs) layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] cur_layers = num_layers - tf.cast(lod_in, tf.int32) * 2 mixing_cutoff = tf.cond( tf.random_uniform([], 0.0, 1.0) < style_mixing_prob, lambda: tf.random_uniform([], 1, cur_layers, dtype=tf.int32), lambda: cur_layers) dlatents = tf.where( tf.broadcast_to(layer_idx < mixing_cutoff, tf.shape(dlatents)), dlatents, dlatents2) # Apply truncation trick. if truncation_psi is not None and truncation_cutoff is not None: with tf.variable_scope('Truncation'): layer_idx = np.arange(num_layers)[np.newaxis, :, np.newaxis] ones = np.ones(layer_idx.shape, dtype=np.float32) coefs = tf.where(layer_idx < truncation_cutoff, truncation_psi * ones, ones) dlatents = tflib.lerp(dlatent_avg, dlatents, coefs) # Evaluate synthesis network. with tf.control_dependencies( [tf.assign(components.synthesis.find_var('lod'), lod_in)]): images_out = components.synthesis.get_output_for( dlatents, force_clean_graph=is_template_graph, **kwargs) return tf.identity(images_out, name='images_out')
def build_controller_model(num_node, hidden_size, controller_temperature, controller_tanh_constant): """ Args: type_embedding_dim: 节点类型嵌入维度 link_embedding_dim: 连接嵌入维度 num_node:节点总数 num_type:节点类型数量 hidden_size: LSTM输出维度 Returns: controller模型, 返回两个状态矩阵 [b,num_node,num_node]的连接矩阵 和[num_node, num_type]的类型矩阵 """ input_tensor = tf.keras.Input(shape=[1], name="input_tensor", dtype=tf.float32) # [B,1] batch_size = tf.shape(input_tensor)[0] link_embedding_layer = tf.keras.layers.Embedding( input_dim=num_node - 1, output_dim=hidden_size, name="link_embedding_layer") link_lstm_layer = tf.keras.layers.LSTM(hidden_size, return_sequences=False, return_state=True, trainable=True, recurrent_activation=None, name="link_lstm") init_link_input = tf.keras.layers.Dense(hidden_size, use_bias=False, activation=None, trainable=True, name="init_link_inputs") # 加性注意力层 link_atten_w_1 = tf.keras.layers.Dense(hidden_size, use_bias=False, activation=None, trainable=True, name="w_1") link_atten_w_2 = tf.keras.layers.Dense(hidden_size, use_bias=False, activation=None, trainable=True, name="w_2") link_atten_w_a = tf.keras.layers.Dense(1, use_bias=False, activation=None, trainable=True, name="w_a") # 初始化输入 init_link_embedding = init_link_input( input_tensor) # [B, link_embedding_dim] all_h = [ tf.broadcast_to(tf.zeros(shape=[1, hidden_size]), shape=[batch_size, hidden_size]) ] # 连接向量lstm层的输出 [j, B, link_embedding_dim] all_h_w = [ tf.broadcast_to(tf.zeros(shape=[1, hidden_size]), shape=[batch_size, hidden_size]) ] all_links = [ tf.broadcast_to(tf.zeros(shape=[1, num_node]), shape=[batch_size, num_node]) ] # 生成的连接向量 [b,1,n],最后会堆叠成[b,n,n] all_ce_loss = [] # 损失[B,num_node-1] all_prob = [] # [B,num_node-1(stack axis), num_node] lstm_input = tf.expand_dims(init_link_embedding, 1) # [B,1, link_embedding_dim] lstm_state = None for j in range(2, num_node + 1): _, link_c, link_h = link_lstm_layer( lstm_input, initial_state=lstm_state) # [B, link_embedding_dim] lstm_state = [link_c, link_h] all_h.append(link_h) # [j, B, link_embedding_dim] all_h_w.append(link_atten_w_1(link_h)) query = link_atten_w_2(link_h) key = tf.transpose(tf.stack(all_h_w[:-1], axis=0), perm=[1, 0, 2]) # [B,j-1, link_embedding_dim] query = tf.reshape( query, [batch_size, 1, hidden_size]) # [B,1, link_embedding_dim] query = tf.nn.tanh(query + key) # [B,j-1, link_embedding_dim] logits = link_atten_w_a(query) # [B,j-1, 1] logits = logits / controller_temperature logits = controller_tanh_constant * tf.nn.tanh(logits) logits = tf.squeeze(logits, -1) # [B, j-1] 前置节点概率 prob = tf.pad(logits, [[0, 0], [0, num_node - j + 1]]) # [B, num_node] all_prob.append(prob) # 根据概率采样获得前置节点id和前置节点向量表示 input_node_id = tf.squeeze(tf.random.categorical(logits, 1), axis=[-1]) # [B] link = tf.one_hot(input_node_id, depth=num_node) # [B,num_node] link_embedding = link_embedding_layer(tf.expand_dims( input_node_id, -1)) # [B,1,link_embedding_dim] # 计算损失 ce_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.stop_gradient(input_node_id), name=f"controller_ce_{j}") # [B] all_links.append(link) all_ce_loss.append(ce_loss) lstm_input = link_embedding # [B, 1, link_embedding_dim] all_prob = tf.stack(all_prob, 1) # [B, num_node-1, num_node] all_links = tf.stack(all_links, 1) all_ce_loss = tf.stack(all_ce_loss, axis=-1) # [B,num_node-1] model = tf.keras.Model(inputs=[input_tensor], outputs=[all_links, all_ce_loss, all_prob]) return model
def build_graph(self, inputs, training=None): if self._params.wordwise_output_: tar_seq_length = inputs["word_seq_length"] else: tar_seq_length = inputs["seq_length"] bert_graph_out = self.pretrained_bert(inputs, return_dict=True) bert_graph_out = self._dropout(bert_graph_out.last_hidden_state) if self._params.wordwise_output_: if self._params.wwo_mode_ == "first": bert_graph_out = self.gather_batch(bert_graph_out, inputs["wwo_indexes"]) elif self._params.wwo_mode_ in ["mean", "max"]: bert_graph_out = self.reduce_token_to_word(bert_graph_out, inputs["wwo_indexes"]) if self._params.bet_tagging_: probs_cls = self._layer_cls(bert_graph_out) # tf.print("bert_graph_out", tf.shape(bert_graph_out)) # tf.print("probs_cls", tf.shape(probs_cls)) probs_start = self._layer_start(bert_graph_out) probs_end = self._layer_end(bert_graph_out) p_cse = tf.concat( ( probs_cls, probs_start, probs_end, ), axis=-1, ) # pred_ids_ = tf.py_function(self.py_func2, [p_cse], Tout=[tf.int32])[0] return_dict = { "probabilities_cls": probs_cls, "probabilities_start": tf.squeeze(probs_start, axis=-1), "probabilities_end": tf.squeeze(probs_end, axis=-1), "probabilities_cse": p_cse, # "pred_ids": pred_ids_, } # tf.print("probabilities_cse", return_dict["probabilities_cse"].shape) # tf.print("probabilities_start", return_dict["probabilities_start"].shape) # tf.print("probabilities_cls", return_dict["probabilities_cls"].shape) else: final_output = self._last_layer(bert_graph_out) # (batch_size, tar_seq_len, target_vocab_size) if self._params.use_crf: trans_params = self._trans_params(tf.range(self.tag_vocab_size)) if self._params.crf_with_ner_rule or self.params.crf_with_ner_forb_trans: if self._params.crf_with_ner_rule: penalty_factor = self._penalty_factor(tf.range(1))[0][0] penalty_absolute = self._penalty_absolute(tf.range(1))[0][0] else: penalty_factor = self._penalty_factor penalty_absolute = self._penalty_absolute factor = self._allowed_transitions + tf.math.scalar_mul(penalty_factor, self._forbidden_transitions) absolute = tf.math.scalar_mul(penalty_absolute, self._forbidden_transitions) trans_params = trans_params * factor - absolute # CRFs pred_ids, _ = tfa.text.crf_decode(final_output, trans_params, tar_seq_length[:, 0]) pred_idsfp, _ = tfa.text.crf_decode( final_output, trans_params - 1000000 * self._forbidden_transitions, tar_seq_length[:, 0] ) # broadcasting because of the lav engine: it needs netoutputs with the first shape dimension of the batch size trans_params = tf.broadcast_to( trans_params, [tf.shape(pred_ids)[0], tf.shape(trans_params)[0], tf.shape(trans_params)[1]] ) return_dict = { "pred_ids": pred_ids, "logits": final_output, "probabilities": final_output, "trans_params": trans_params, "pred_idsfp": pred_idsfp, } else: pred_ids = tf.argmax(input=final_output, axis=2, output_type=tf.int32) return_dict = {"pred_ids": pred_ids, "logits": final_output, "probabilities": final_output} return return_dict
def raw2outputs(raw, z_vals, rays_d): """Transforms model's predictions to semantically meaningful values. Args: raw: [num_rays, num_samples along ray, 4]. Prediction from model. z_vals: [num_rays, num_samples along ray]. Integration time. rays_d: [num_rays, 3]. Direction of each ray. Returns: rgb_map: [num_rays, 3]. Estimated RGB color of a ray. disp_map: [num_rays]. Disparity map. Inverse of depth map. acc_map: [num_rays]. Sum of weights along each ray. weights: [num_rays, num_samples]. Weights assigned to each sampled color. depth_map: [num_rays]. Estimated distance to object. """ # Function for computing density from model prediction. This value is # strictly between [0, 1]. def raw2alpha(raw, dists, act_fn=tf.nn.relu): return 1.0 - \ tf.exp(-act_fn(raw) * dists) # Compute 'distance' (in time) between each integration time along a ray. dists = z_vals[..., 1:] - z_vals[..., :-1] # The 'distance' from the last integration time is infinity. dists = tf.concat( [dists, tf.broadcast_to([1e10], dists[..., :1].shape)], axis=-1) # [N_rays, N_samples] # Multiply each distance by the norm of its corresponding direction ray # to convert to real world distance (accounts for non-unit directions). dists = dists * tf.linalg.norm(rays_d[..., None, :], axis=-1) # Extract RGB of each sample position along each ray. rgb = tf.math.sigmoid(raw[..., :3]) # [N_rays, N_samples, 3] # Add noise to model's predictions for density. Can be used to # regularize network during training (prevents floater artifacts). noise = 0. if raw_noise_std > 0.: noise = tf.random.normal(raw[..., 3].shape) * raw_noise_std # Predict density of each sample along each ray. Higher values imply # higher likelihood of being absorbed at this point. alpha = raw2alpha(raw[..., 3] + noise, dists) # [N_rays, N_samples] # Compute weight for RGB of each sample along each ray. A cumprod() is # used to express the idea of the ray not having reflected up to this # sample yet. # [N_rays, N_samples] weights = alpha * \ tf.math.cumprod(1.-alpha + 1e-10, axis=-1, exclusive=True) # Computed weighted color of each sample along each ray. rgb_map = tf.reduce_sum(weights[..., None] * rgb, axis=-2) # [N_rays, 3] # Estimated depth map is expected distance. depth_map = tf.reduce_sum(weights * z_vals, axis=-1) # Disparity map is inverse depth. disp_map = 1. / tf.maximum(1e-10, depth_map / tf.reduce_sum(weights, axis=-1)) # Sum of weights along each ray. This value is in [0, 1] up to numerical error. acc_map = tf.reduce_sum(weights, -1) # To composite onto a white background, use the accumulated alpha map. if white_bkgd: rgb_map = rgb_map + (1. - acc_map[..., None]) return rgb_map, disp_map, acc_map, weights, depth_map
def build_graph(self, inputs, training=None): inp = dict() inp["text"] = inputs["sentence"] # inp["sentence"] = inputs["sentence"] inp["seq_length"] = inputs["seq_length"] if self._params.whole_word_attention_: inp["word_length_vector"] = inputs["word_length_vector"] inp["segment_ids"] = inputs["segment_ids"] bert_graph_out = self.pretrained_bert(inp, training=training) bert_graph_out = self._dropout(bert_graph_out["enc_output"]) if self._params.wordwise_output_: inp["wwo_indexes"] = inputs["wwo_indexes"] if self._params.wwo_mode_ == "first": bert_graph_out = self.gather_batch(bert_graph_out, inp["wwo_indexes"]) elif self._params.wwo_mode_ in ["mean", "max"]: bert_graph_out = self.reduce_token_to_word(bert_graph_out, inp["wwo_indexes"]) inp["tar_seq_length"] = inputs["word_seq_length"] else: inp["tar_seq_length"] = inputs["seq_length"] if self._params.bet_tagging_: if self._params.loss_se_mode == "logreg": logits_start = self._layer_start(bert_graph_out) logits_end = self._layer_end(bert_graph_out) probs_start = tf.sigmoid(logits_start) probs_end = tf.sigmoid(logits_end) else: probs_start = self._layer_start(bert_graph_out) probs_end = self._layer_end(bert_graph_out) probs_cls = self._layer_cls(bert_graph_out) p_cse = tf.concat( ( probs_cls, probs_start, probs_end, ), axis=-1, ) return_dict = { "probabilities_cls": probs_cls, "probabilities_start": tf.squeeze(probs_start, axis=-1), "probabilities_end": tf.squeeze(probs_end, axis=-1), "probabilities_cse": p_cse, } if self._params.loss_se_mode == "logreg": return_dict["logits_start"] = tf.squeeze(logits_start, axis=-1) return_dict["logits_end"] = tf.squeeze(logits_end, axis=-1) else: final_output = self._last_layer(bert_graph_out) # (batch_size, tar_seq_len, target_vocab_size) if self._params.use_crf: trans_params = self._trans_params(tf.range(self.tag_vocab_size)) if self._params.crf_with_ner_rule or self.params.crf_with_ner_forb_trans: if self._params.crf_with_ner_rule: penalty_factor = self._penalty_factor(tf.range(1))[0][0] penalty_absolute = self._penalty_absolute(tf.range(1))[0][0] else: penalty_factor = self._penalty_factor penalty_absolute = self._penalty_absolute factor = self._allowed_transitions + tf.math.scalar_mul(penalty_factor, self._forbidden_transitions) absolute = tf.math.scalar_mul(penalty_absolute, self._forbidden_transitions) trans_params = trans_params * factor - absolute # CRFs pred_ids, _ = tfa.text.crf_decode(final_output, trans_params, inp["tar_seq_length"][:, 0]) pred_idsfp, _ = tfa.text.crf_decode( final_output, trans_params - 1000000 * self._forbidden_transitions, inp["tar_seq_length"][:, 0] ) # broadcasting because of the lav engine: it needs netoutputs with the first shape dimension of the batch size trans_params = tf.broadcast_to( trans_params, [tf.shape(pred_ids)[0], tf.shape(trans_params)[0], tf.shape(trans_params)[1]] ) return_dict = { "pred_ids": pred_ids, "logits": final_output, "probabilities": final_output, "trans_params": trans_params, "pred_idsfp": pred_idsfp, } else: pred_ids = tf.argmax(input=final_output, axis=2, output_type=tf.int32) return_dict = {"pred_ids": pred_ids, "logits": final_output, "probabilities": final_output} return return_dict
def model_fn(features, labels, mode, params): """ Defines the model_fn to feed in to estimator :param features: dict containing the features in data :param labels: dict containing labels in data :param mode: running mode, in TRAIN/EVAL/PREDICT :param params: hparams used :return: tf.estimator.EstimatorSpec """ query_field = features.get('query', None) uid = features.get('uid', None) weight = features.get('weight', None) wide_ftrs = features.get('wide_ftrs', None) wide_ftrs_sp_idx = features.get('wide_ftrs_sp_idx', None) wide_ftrs_sp_val = features.get('wide_ftrs_sp_val', None) doc_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('doc_')] if len(doc_fields) == 0: doc_fields = None usr_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('usr_')] if len(usr_fields) == 0: usr_fields = None doc_id_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('docId_')] if len(doc_id_fields) == 0: doc_id_fields = None usr_id_fields = [features[ftr_name] for ftr_name in features if ftr_name.startswith('usrId_')] if len(usr_id_fields) == 0: usr_id_fields = None label_field = labels['label'] if mode != tf.estimator.ModeKeys.PREDICT else None labels_passthrough = features['label'] group_size_field = features['group_size'] if mode != tf.estimator.ModeKeys.PREDICT else None # For multitask training task_id_field = features.get('task_id', None) # shape=[batch_size,] # Update the weight with each task's weight such that weight per document = weight * task_weight if params.task_ids is not None: task_ids = params.task_ids # e.g. [0, 1, 2] task_weights = params.task_weights # e.g. [0.1, 0.3, 0.6] # Expand task_id_field with shape [batch_size, num_tasks] expanded_task_id_field = tf.transpose(tf.broadcast_to(task_id_field, [len(task_ids), tf.shape(task_id_field)[0]])) task_mask = tf.cast(tf.equal(expanded_task_id_field, task_ids), dtype=tf.float32) weight *= tf.reduce_sum(task_mask * task_weights, 1) # shape=[batch_size,] # build graph model = DeepMatch(query=query_field, wide_ftrs=wide_ftrs, doc_fields=doc_fields, usr_fields=usr_fields, doc_id_fields=doc_id_fields, usr_id_fields=usr_id_fields, hparams=params, mode=mode, wide_ftrs_sp_idx=wide_ftrs_sp_idx, wide_ftrs_sp_val=wide_ftrs_sp_val, task_id_field=task_id_field) if mode == tf.estimator.ModeKeys.TRAIN: loss = compute_loss(params, model.scores, label_field, group_size_field, weight) train_op, _, _ = optimization.create_optimizer(params, loss) global_step = tf.train.get_global_step() train_tensors_log = {'loss': loss, 'global_step': global_step} logging_hook = tf.train.LoggingTensorHook(train_tensors_log, every_n_iter=10) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, training_hooks=[logging_hook]) elif mode == tf.estimator.ModeKeys.EVAL: loss = compute_loss(params, model.scores, label_field, group_size_field, weight) eval_metric_ops = {} for metric_name in params.all_metrics: metric_op_name = 'metric/{}'.format(metric_name) topk = int(metric_name.split('@')[1]) if '@' in metric_name else 10 # Default topk if metric_name.startswith('ndcg'): metric = metrics.compute_ndcg_tfr(model.scores, label_field, features, topk) elif metric_name.startswith('mrr'): metric = metrics.compute_mrr_tfr(model.scores, label_field, features) elif metric_name.startswith('precision'): metric = metrics.compute_precision_tfr(model.scores, label_field, features, topk) elif metric_name.startswith('traditional_ndcg'): metric = metrics.compute_ndcg(model.scores, label_field, group_size_field, topk) elif metric_name.startswith('li_mrr'): metric = metrics.compute_mrr(model.scores, labels['label'], features['group_size'], topk) elif metric_name == 'auc': metric = metrics.compute_auc(model.scores, label_field) elif metric_name == 'accuracy': metric = metrics.compute_accuracy(model.scores, label_field) elif metric_name == 'confusion_matrix': metric = metrics.compute_confusion_matrix(model.scores, label_field, params.num_classes) else: raise ValueError(f"Unsupported metrics: {metric_name}") eval_metric_ops[metric_op_name] = metric return tf.estimator.EstimatorSpec(mode, loss=loss, eval_metric_ops=eval_metric_ops) elif mode == tf.estimator.ModeKeys.PREDICT: # Prediction field for scoring models predictions = { 'uid': uid, 'scores': model.original_scores, 'weight': weight, 'label': labels_passthrough } # multiclass classification: export the probabilities across classes by applying softmax if params.num_classes > 1: predictions['multiclass_probabilities'] = tf.nn.softmax(model.scores) export_outputs = { 'prediction': tf.estimator.export.PredictOutput(predictions) } # Provide an estimator spec for `ModeKeys.PREDICT` mode. return tf.estimator.EstimatorSpec(mode, predictions=predictions, export_outputs=export_outputs) else: raise ValueError("Only support mode as TRAIN/EVAL/PREDICT")
def _reshape_tensor(self, input_tensor, to_shape): input_tensor = tf.reshape(input_tensor, [-1, 1]) return tf.broadcast_to(input_tensor, to_shape)
def prepare_args(model_matrix, response, model_coefficients, predicted_linear_response, offset, name=None): """Helper to `fit` which sanitizes input args. Args: model_matrix: (Batch of) `float`-like, matrix-shaped `Tensor` where each row represents a sample's features. response: (Batch of) vector-shaped `Tensor` where each element represents a sample's observed response (to the corresponding row of features). Must have same `dtype` as `model_matrix`. model_coefficients: Optional (batch of) vector-shaped `Tensor` representing the model coefficients, one for each column in `model_matrix`. Must have same `dtype` as `model_matrix`. Default value: `tf.zeros(tf.shape(model_matrix)[-1], model_matrix.dtype)`. predicted_linear_response: Optional `Tensor` with `shape`, `dtype` matching `response`; represents `offset` shifted initial linear predictions based on current `model_coefficients`. Default value: `offset` if `model_coefficients is None`, and `tf.linalg.matvec(model_matrix, model_coefficients_start) + offset` otherwise. offset: Optional `Tensor` with `shape`, `dtype` matching `response`; represents constant shift applied to `predicted_linear_response`. Default value: `None` (i.e., `tf.zeros_like(response)`). name: Python `str` used as name prefix to ops created by this function. Default value: `"prepare_args"`. Returns: model_matrix: A `Tensor` with `shape`, `dtype` and values of the `model_matrix` argument. response: A `Tensor` with `shape`, `dtype` and values of the `response` argument. model_coefficients_start: A `Tensor` with `shape`, `dtype` and values of the `model_coefficients_start` argument if specified. A (batch of) vector-shaped `Tensors` with `dtype` matching `model_matrix` containing the default starting point otherwise. predicted_linear_response: A `Tensor` with `shape`, `dtype` and values of the `predicted_linear_response` argument if specified. A `Tensor` with `shape`, `dtype` matching `response` containing the default value otherwise. offset: A `Tensor` with `shape`, `dtype` and values of the `offset` argument if specified or `None` otherwise. """ graph_deps = [model_matrix, response, model_coefficients, predicted_linear_response, offset] with tf.name_scope(name, 'prepare_args', graph_deps): dtype = dtype_util.common_dtype(graph_deps, np.float32) model_matrix = tf.convert_to_tensor( model_matrix, dtype=dtype, name='model_matrix') if offset is not None: offset = tf.convert_to_tensor(offset, dtype=dtype, name='offset') response = tf.convert_to_tensor(response, dtype=dtype, name='response') use_default_model_coefficients = model_coefficients is None if use_default_model_coefficients: # User did not supply model coefficients; assume they're all zero. batch_shape = tf.shape(model_matrix)[:-2] num_columns = tf.shape(model_matrix)[-1] model_coefficients = tf.zeros( shape=tf.concat([batch_shape, [num_columns]], axis=0), dtype=dtype, name='model_coefficients') else: # User did supply model coefficients; convert to Tensor in case it's # numpy or literal. model_coefficients = tf.convert_to_tensor( model_coefficients, dtype=dtype, name='model_coefficients') if predicted_linear_response is None: if use_default_model_coefficients: # Since we're using zeros for model_coefficients, we know the predicted # linear response will also be all zeros. if offset is None: predicted_linear_response = tf.zeros_like( response, dtype, name='predicted_linear_response') else: predicted_linear_response = tf.broadcast_to( offset, tf.shape(response), name='predicted_linear_response') else: # We were given model_coefficients but not the predicted linear # response. predicted_linear_response = calculate_linear_predictor( model_matrix, model_coefficients, offset) else: predicted_linear_response = tf.convert_to_tensor( predicted_linear_response, dtype=dtype, name='predicted_linear_response') return [ model_matrix, response, model_coefficients, predicted_linear_response, offset, ]
def _broadcast_inputs(self, inputs): shape = tf.broadcast_dynamic_shape( tf.shape(inputs), self.batch_shape_tensor()) return tf.broadcast_to(inputs, shape)
def create_and_check_gpt2_model_attention_mask_past( self, config, input_ids, input_mask, head_mask, token_type_ids, *args): model = TFGPT2Model(config=config) # create attention mask half_seq_length = self.seq_length // 2 attn_mask_begin = tf.ones((self.batch_size, half_seq_length), dtype=tf.int32) attn_mask_end = tf.zeros( (self.batch_size, self.seq_length - half_seq_length), dtype=tf.int32) attn_mask = tf.concat([attn_mask_begin, attn_mask_end], axis=1) # first forward pass output, past = model(input_ids, attention_mask=attn_mask) # create hypothetical next token and extent to next_input_ids next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size) # change a random masked slice from input_ids random_seq_idx_to_change = ids_tensor( (1, ), half_seq_length).numpy() + 1 random_other_next_tokens = ids_tensor( (self.batch_size, self.seq_length), config.vocab_size) vector_condition = tf.range( self.seq_length) == (self.seq_length - random_seq_idx_to_change) condition = tf.transpose( tf.broadcast_to(tf.expand_dims(vector_condition, -1), (self.seq_length, self.batch_size))) input_ids = tf.where(condition, random_other_next_tokens, input_ids) # append to next input_ids and attn_mask next_input_ids = tf.concat([input_ids, next_tokens], axis=-1) attn_mask = tf.concat([ attn_mask, tf.ones((shape_list(attn_mask)[0], 1), dtype=tf.int32) ], axis=1) # get two different outputs output_from_no_past, _ = model(next_input_ids, attention_mask=attn_mask) output_from_past, _ = model(next_tokens, past=past, attention_mask=attn_mask) # select random slice random_slice_idx = int( ids_tensor((1, ), shape_list(output_from_past)[-1])) output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx] output_from_past_slice = output_from_past[:, 0, random_slice_idx] # test that outputs are equal for slice tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-12)
def __init__(self, config, features, dropout_keep_prob, init_embeddings=None): super(AttendedInputModel).__init__() input_ids = features["input_ids"] input_dicts = features["input_dicts"] seq_length = features["seq_length"] label_ids = features["label_ids"] self.label_ids = label_ids self.dict = input_dicts self.seq_length = seq_length dict_shape = model_utils.get_shape_list(input_dicts, expected_rank=3) self.dict_dim = dict_shape[2] x, self.batch_size, feat_size = model_utils.input_embedding( input_ids, config, init_embeddings=init_embeddings) # with tf.variable_scope('dict'): # self.dict = tf.cast(self.dict, dtype=tf.float32) # (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( # cell_fw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), # cell_bw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), # inputs=self.dict, # sequence_length=self.seq_length, # dtype=tf.float32 # ) # dict_output = tf.concat([forward_output, backword_output], axis=2) dict_output = tf.cast(self.dict, dtype=tf.float32) with tf.variable_scope('input_attention'): input_attention = layers.fully_connected(inputs=dict_output, num_outputs=feat_size, activation_fn=tf.sigmoid) input_bias = layers.fully_connected(inputs=dict_output, num_outputs=feat_size, activation_fn=tf.sigmoid) # [B, L, F] * [B, L, F, E] -> [B, L, F, E] input_attention = tf.expand_dims(input_attention, -1) attend_input = tf.multiply(x, input_attention) + tf.expand_dims( input_bias, axis=-1) attend_input = tf.reshape( attend_input, [self.batch_size, -1, feat_size * config.embedding_size]) attend_input = tf.nn.dropout(attend_input, dropout_keep_prob) with tf.variable_scope('character'): (forward_output, backword_output), _ = tf.nn.bidirectional_dynamic_rnn( cell_fw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), cell_bw=model_utils.multi_lstm_cell(config.hidden_size, config.num_hidden_layers, dropout_keep_prob), inputs=attend_input, sequence_length=self.seq_length, dtype=tf.float32) output = tf.concat([forward_output, backword_output], axis=2) with tf.variable_scope('output'): output = tf.concat([dict_output, output], axis=2) scores = layers.fully_connected(inputs=output, num_outputs=config.num_classes, activation_fn=None) transition_param = tf.get_variable( "transitions", [config.num_classes, config.num_classes]) self.prediction, _ = crf.crf_decode(scores, transition_param, self.seq_length) # with tf.variable_scope('loss'): # # crf # if config.multitag: # self.label_ids = tf.cast(self.label_ids, dtype=tf.bool) # self.log_likelihood, _ = model_utils.crf_multitag_log_likelihood( # scores, self.label_ids, self.seq_length, transition_param) # else: # self.log_likelihood, _ = crf.crf_log_likelihood( # scores, self.label_ids, self.seq_length, transition_param) # self.loss = tf.reduce_mean(-self.log_likelihood) with tf.variable_scope('noise_correct'): pure_noise_matrix = tf.Variable(config.noise_matrix, dtype=tf.float32, name='noise_matrix', trainable=False) tf.logging.info(f"\n{config.noise_matrix}") if config.fix_noise: noise_matrix = pure_noise_matrix else: eye_matrix = tf.Variable(np.eye(4), dtype=tf.float32, name='eye_matrix', trainable=False) rate = tf.Variable(np.ones([4, 1]), dtype=tf.float32, name='rate') norm_rate = tf.sigmoid(rate) noise_matrix = tf.broadcast_to(norm_rate, [4, 4]) * pure_noise_matrix + \ tf.broadcast_to((1 - norm_rate), [4, 4]) * eye_matrix with tf.variable_scope('loss'): # crf if config.multitag: prob = tf.nn.softmax(scores, axis=-1) candidate_label_num = tf.reduce_sum(self.label_ids, axis=2) full_label_data = tf.equal( tf.reduce_max(candidate_label_num, axis=-1), 1) self.label_ids = tf.cast(self.label_ids, dtype=tf.bool) full_label_seq_len = tf.where(full_label_data, self.seq_length, tf.zeros_like(self.seq_length)) self.log_likelihood, _ = model_utils.crf_multitag_log_likelihood( scores, self.label_ids, full_label_seq_len, transition_param) gt = tf.cast(self.label_ids, dtype=tf.float32) nll_loss = -self.log_likelihood part_label_mask = tf.cast(tf.logical_and( candidate_label_num > 1, candidate_label_num < config.num_classes), dtype=tf.float32) j_l0_norm = 1.0 / (1e-12 + tf.reduce_sum(part_label_mask, axis=-1)) if config.log_dot_loss: ## log dot loss dot_loss = -j_l0_norm * tf.reduce_sum( part_label_mask * tf.log( tf.clip_by_value(tf.einsum( "bld, bld->bl", gt, tf.einsum("ji, blj->bli", noise_matrix, prob)), clip_value_min=1e-16, clip_value_max=1)), axis=-1) else: ## dot loss dot_loss = j_l0_norm * tf.reduce_sum( part_label_mask * (1 - tf.clip_by_value(tf.einsum( "bld, bld->bl", gt, tf.einsum("ji, blj->bli", noise_matrix, prob)), clip_value_min=0, clip_value_max=1)), axis=-1) else: raise ValueError("PartLabelModel request multi-tag") self.loss = tf.reduce_mean(nll_loss + 0.01 * dot_loss)
def matrix_initializer(i=i): init = log_expm1(1 / scale / filters[i + 1]) init = tf.cast(init, dtype=self.dtype) init = tf.broadcast_to(init, (channels, filters[i + 1], filters[i])) return init
def _broadcast_in_dim(operand, shape, broadcast_dimensions): inshape = tuple(1 if i not in broadcast_dimensions else d for i, d in enumerate(shape)) return tf.broadcast_to(tf.reshape(operand, inshape), shape)
def crop_image(frames: tf.Tensor, target_height: int, target_width: int, random: bool = False, num_crops: int = 1, seed: Optional[int] = None) -> tf.Tensor: """Crops the image sequence of images. If requested size is bigger than image size, image is padded with 0. If not random cropping, a central crop is performed if num_crops is 1. Args: frames: A Tensor of dimension [timesteps, in_height, in_width, channels]. target_height: Target cropped image height. target_width: Target cropped image width. random: A boolean indicating if crop should be randomized. num_crops: Number of crops (support 1 for central crop and 3 for 3-crop). seed: A deterministic seed to use when random cropping. Returns: A Tensor of shape [timesteps, out_height, out_width, channels] of type uint8 with the cropped images. """ if random: # Random spatial crop. shape = tf.shape(frames) # If a static_shape is available (e.g. when using this method from add_image # method), it will be used to have an output tensor with static shape. static_shape = frames.shape.as_list() seq_len = shape[0] if static_shape[0] is None else static_shape[0] channels = shape[3] if static_shape[3] is None else static_shape[3] frames = tf.image.random_crop( frames, (seq_len, target_height, target_width, channels), seed) else: if num_crops == 1: # Central crop or pad. frames = tf.image.resize_with_crop_or_pad(frames, target_height, target_width) elif num_crops == 3: # Three-crop evaluation. shape = tf.shape(frames) static_shape = frames.shape.as_list() seq_len = shape[0] if static_shape[0] is None else static_shape[0] height = shape[1] if static_shape[1] is None else static_shape[1] width = shape[2] if static_shape[2] is None else static_shape[2] channels = shape[3] if static_shape[3] is None else static_shape[3] size = tf.convert_to_tensor( (seq_len, target_height, target_width, channels)) offset_1 = tf.broadcast_to([0, 0, 0, 0], [4]) # pylint:disable=g-long-lambda offset_2 = tf.cond( tf.greater_equal(height, width), true_fn=lambda: tf.broadcast_to([ 0, tf.cast(height, tf.float32) / 2 - target_height // 2, 0, 0 ], [4]), false_fn=lambda: tf.broadcast_to([ 0, 0, tf.cast(width, tf.float32) / 2 - target_width // 2, 0 ], [4])) offset_3 = tf.cond( tf.greater_equal(height, width), true_fn=lambda: tf.broadcast_to( [0, tf.cast(height, tf.float32) - target_height, 0, 0], [4]), false_fn=lambda: tf.broadcast_to( [0, 0, tf.cast(width, tf.float32) - target_width, 0], [4])) # pylint:disable=g-long-lambda crops = [] for offset in [offset_1, offset_2, offset_3]: offset = tf.cast(tf.math.round(offset), tf.int32) crops.append(tf.slice(frames, offset, size)) frames = tf.concat(crops, axis=0) else: raise NotImplementedError( f"Only 1-crop and 3-crop are supported. Found {num_crops!r}.") return frames
def testSampleBatch(self): action_spec = { 'continuous1': tensor_spec.BoundedTensorSpec([_ACTION_SIZE_CONTINUOUS], tf.float32, 0.0, 1.0), 'continuous2': tensor_spec.BoundedTensorSpec([_ACTION_SIZE_CONTINUOUS], tf.float32, 0.0, 1.0), 'discrete': tensor_spec.BoundedTensorSpec([_ACTION_SIZE_DISCRETE], tf.int32, 0, 1) } sampler = cem_actions_sampler_continuous_and_one_hot.GaussianActionsSampler( action_spec=action_spec, sample_clippers=[[], [], []], sub_actions_fields=[['continuous1'], ['discrete'], ['continuous2']], sample_rejecters=[None, None, dummy_sample_rejecter]) mean = tf.constant(_MEAN) var = tf.constant(_VAR) mean = { 'continuous1': tf.broadcast_to(mean, [_BATCH, _ACTION_SIZE_CONTINUOUS]), 'continuous2': tf.broadcast_to(mean, [_BATCH, _ACTION_SIZE_CONTINUOUS]), 'discrete': tf.zeros([_BATCH, _ACTION_SIZE_DISCRETE]) } var = { 'continuous1': tf.broadcast_to(var, [_BATCH, _ACTION_SIZE_CONTINUOUS]), 'continuous2': tf.broadcast_to(var, [_BATCH, _ACTION_SIZE_CONTINUOUS]), 'discrete': tf.zeros([_BATCH, _ACTION_SIZE_DISCRETE]) } actions = sampler.sample_batch_and_clip(_NUM_SAMPLES, mean, var) self.assertEqual((_BATCH, _NUM_SAMPLES, _ACTION_SIZE_CONTINUOUS), actions['continuous1'].shape) self.assertEqual((_BATCH, _NUM_SAMPLES, _ACTION_SIZE_CONTINUOUS), actions['continuous2'].shape) self.assertEqual((_BATCH, _NUM_SAMPLES, _ACTION_SIZE_DISCRETE), actions['discrete'].shape) actions_ = self.evaluate(actions) flat_actions = tf.nest.flatten(actions_) flat_action_spec = tf.nest.flatten(action_spec) for i in range(len(flat_action_spec)): self.assertTrue( (flat_actions[i] <= flat_action_spec[i].maximum).all()) self.assertTrue( (flat_actions[i] >= flat_action_spec[i].minimum).all()) # make sure discrete part # make sure 0 for continuous part expected_actions_discrete = tf.broadcast_to( tf.constant([[1., 0., 0.], [1., 0., 0.], [0., 1., 0.], [0., 0., 1.], [0., 0., 1.]]), tf.constant([_BATCH, _NUM_SAMPLES, _ACTION_SIZE_DISCRETE])) self.assertAllClose(expected_actions_discrete, actions_['discrete']) expected_actions_continuous1 = tf.zeros( [_BATCH, 2, _ACTION_SIZE_CONTINUOUS]) self.assertAllClose(expected_actions_continuous1, actions_['continuous1'][:, 3:, :]) self.assertAllGreaterEqual(actions_['continuous1'][:, 0:3, :], 0.0) self.assertAllLessEqual(actions_['continuous1'][:, 0:3, :], 1.0) expected_actions_continuous2 = tf.zeros( [_BATCH, 2, _ACTION_SIZE_CONTINUOUS]) self.assertAllClose(expected_actions_continuous2, actions_['continuous2'][:, 1:3, :]) self.assertAllGreaterEqual(actions_['continuous2'][:, 2:, :], 0.0) self.assertAllLessEqual(actions_['continuous2'][:, 2:, :], 1.0)
def _broadcast(operand, sizes): return tf.broadcast_to(operand, sizes + tf.shape(operand))
def broadcast_to(input, shape): if not SWITCH_ON or is_tensor(input): return tf.broadcast_to(input, shape) else: return np.broadcast_to(input, shape)