def CombineArcAndRootPotentials(arcs, roots): """Combines arc and root potentials into a single set of potentials. Args: arcs: [B,N,N] tensor of batched arc potentials. roots: [B,N] matrix of batched root potentials. Returns: [B,N,N] tensor P of combined potentials where P_{b,s,t} = s == t ? roots[b,t] : arcs[b,s,t] """ # All arguments must have statically-known rank. check.Eq(arcs.get_shape().ndims, 3, 'arcs must be rank 3') check.Eq(roots.get_shape().ndims, 2, 'roots must be a matrix') # All arguments must share the same type. dtype = arcs.dtype.base_dtype check.Same([dtype, roots.dtype.base_dtype], 'dtype mismatch') roots_shape = tf.shape(roots) arcs_shape = tf.shape(arcs) batch_size = roots_shape[0] num_tokens = roots_shape[1] with tf.control_dependencies([ tf.assert_equal(batch_size, arcs_shape[0]), tf.assert_equal(num_tokens, arcs_shape[1]), tf.assert_equal(num_tokens, arcs_shape[2])]): return tf.matrix_set_diag(arcs, roots)
def logp(self, F, Y): with tf.control_dependencies( [ tf.assert_equal(tf.shape(Y)[1], 1), tf.assert_equal(tf.cast(tf.shape(F)[1], settings.int_type), tf.cast(self.num_classes, settings.int_type)) ]): return -tf.nn.sparse_softmax_cross_entropy_with_logits(logits=F, labels=Y[:, 0])[:, None]
def square_error(estimated, target): with tf.name_scope('evaluation'): with tf.control_dependencies([tf.assert_equal(count(tf.to_int32(target) - tf.to_int32(target)), 0.)]): tf.assert_equal(count(tf.cast(target - estimated, tf.int32)), 0.) squared_difference = tf.pow(estimated - target, 2, name='squared_difference') square_error = tf.reduce_sum(squared_difference, name='summing_square_errors') square_error = tf.to_float(square_error) return square_error
def discretized_mix_logistic_loss(y_hat, y, num_classes=256, log_scale_min=-7.0, reduce=True): '''Discretized mix of logistic distributions loss. Note that it is assumed that input is scaled to [-1, 1] Args: y_hat: Tensor [batch_size, channels, time_length], predicted output. y: Tensor [batch_size, time_length, 1], Target. Returns: Tensor loss ''' with tf.control_dependencies([tf.assert_equal(tf.mod(tf.shape(y_hat)[1], 3), 0), tf.assert_equal(tf.rank(y_hat), 3)]): nr_mix = tf.shape(y_hat)[1] // 3 #[Batch_size, time_length, channels] y_hat = tf.transpose(y_hat, [0, 2, 1]) #unpack parameters. [batch_size, time_length, num_mixtures] x 3 logit_probs = y_hat[:, :, :nr_mix] means = y_hat[:, :, nr_mix:2 * nr_mix] log_scales = tf.maximum(y_hat[:, :, 2* nr_mix: 3 * nr_mix], log_scale_min) #[batch_size, time_length, 1] -> [batch_size, time_length, num_mixtures] y = y * tf.ones(shape=[1, 1, nr_mix], dtype=tf.float32) centered_y = y - means inv_stdv = tf.exp(-log_scales) plus_in = inv_stdv * (centered_y + 1. / (num_classes - 1)) cdf_plus = tf.nn.sigmoid(plus_in) min_in = inv_stdv * (centered_y - 1. / (num_classes - 1)) cdf_min = tf.nn.sigmoid(min_in) log_cdf_plus = plus_in - tf.nn.softplus(plus_in) # log probability for edge case of 0 (before scaling) log_one_minus_cdf_min = -tf.nn.softplus(min_in) # log probability for edge case of 255 (before scaling) #probability for all other cases cdf_delta = cdf_plus - cdf_min mid_in = inv_stdv * centered_y #log probability in the center of the bin, to be used in extreme cases #(not actually used in this code) log_pdf_mid = mid_in - log_scales - 2. * tf.nn.softplus(mid_in) log_probs = tf.where(y < -0.999, log_cdf_plus, tf.where(y > 0.999, log_one_minus_cdf_min, tf.where(cdf_delta > 1e-5, tf.log(tf.maximum(cdf_delta, 1e-12)), log_pdf_mid - np.log((num_classes - 1) / 2)))) #log_probs = log_probs + tf.nn.log_softmax(logit_probs, -1) log_probs = log_probs + log_prob_from_logits(logit_probs) if reduce: return -tf.reduce_sum(log_sum_exp(log_probs)) else: return -tf.expand_dims(log_sum_exp(log_probs), [-1])
def prepare_serialized_examples(self, serialized_example, max_quantized_value=2, min_quantized_value=-2): contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={"id": tf.FixedLenFeature( [], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1, validate_indices=False), tf.bool)) # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) num_frames = -1 # the number of frames in the video feature_matrices = [None] * num_features # an array of different features for feature_index in range(num_features): feature_matrix, num_frames_in_this_feature = self.get_video_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) if num_frames == -1: num_frames = num_frames_in_this_feature else: tf.assert_equal(num_frames, num_frames_in_this_feature) feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features video_matrix = tf.concat(feature_matrices, 1) # convert to batch format. # TODO: Do proper batch reads to remove the IO bottleneck. batch_video_ids = tf.expand_dims(contexts["id"], 0) batch_video_matrix = tf.expand_dims(video_matrix, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def step(self, x, c=None, g=None, softmax=False): """Forward step Args: x: Tensor of shape [batch_size, channels, time_length], One-hot encoded audio signal. c: Tensor of shape [batch_size, cin_channels, time_length], Local conditioning features. g: Tensor of shape [batch_size, gin_channels, 1] or Ids of shape [batch_size, 1], Global conditioning features. Note: set hparams.use_speaker_embedding to False to disable embedding layer and use extrnal One-hot encoded features. softmax: Boolean, Whether to apply softmax. Returns: a Tensor of shape [batch_size, out_channels, time_length] """ #[batch_size, channels, time_length] -> [batch_size, time_length, channels] batch_size = tf.shape(x)[0] time_length = tf.shape(x)[-1] if g is not None: if self.embed_speakers is not None: #[batch_size, 1] ==> [batch_size, 1, gin_channels] g = self.embed_speakers(tf.reshape(g, [batch_size, -1])) #[batch_size, gin_channels, 1] with tf.control_dependencies([tf.assert_equal(tf.rank(g), 3)]): g = tf.transpose(g, [0, 2, 1]) #Expand global conditioning features to all time steps g_bct = _expand_global_features(batch_size, time_length, g, data_format='BCT') if c is not None and self.upsample_conv is not None: #[batch_size, 1, cin_channels, time_length] c = tf.expand_dims(c, axis=1) for transposed_conv in self.upsample_conv: c = transposed_conv(c) #[batch_size, cin_channels, time_length] c = tf.squeeze(c, [1]) with tf.control_dependencies([tf.assert_equal(tf.shape(c)[-1], tf.shape(x)[-1])]): c = tf.identity(c, name='control_c_and_x_shape') #Feed data to network x = self.first_conv(x) skips = None for conv in self.conv_layers: x, h = conv(x, c, g_bct) if skips is None: skips = h else: skips = skips + h x = skips for conv in self.last_conv_layers: x = conv(x) return tf.nn.softmax(x, axis=1) if softmax else x
def _get_window(window_length, dtype): if self._window == "hanning": window = tf.contrib.signal.hann_window(window_length, dtype=dtype) if self._window == "blackman": tf.assert_equal(frame_size, window_length) import scipy.signal window = tf.constant(scipy.signal.blackman(frame_size), dtype=tf.float32) if self._window == "None" or self._window == "ones": window = tf.ones((window_length,), dtype=dtype) return window
def __init__(self, l_overwrite=None, p_overwrite=None, q_overwrite=None, filter_input=None, parameters=None, noise_estimation=None, average_parameters=False, **kwargs): """ :param float|None l_overwrite: if given overwrites the l value of the parametric wiener filter with the given constant :param float|None p_overwrite: if given overwrites the p value of the parametric wiener filter with the given constant :param float|None q_overwrite: if given overwrites the q value of the parametric wiener filter with the given constant :param LayerBase|None filter_input: name of layer containing input for wiener filter :param LayerBase|None parameters: name of layer containing parameters for wiener filter :param LayerBase|None noise_estimation: name of layer containing noise estimate for wiener filter :param bool average_parameters: if set to true the parameters l, p and q are averaged over the time axis """ from tfSi6Proc.audioProcessing.enhancement.singleChannel import TfParametricWienerFilter super(ParametricWienerFilterLayer, self).__init__(**kwargs) class _NoiseEstimator(object): def __init__(self, noise_power_spectrum_tensor): self._noise_power_spectrum_tensor = noise_power_spectrum_tensor @classmethod def from_layer(cls, layer): return cls(layer.output.get_placeholder_as_batch_major()) def getNoisePowerSpectrum(self): return self._noise_power_spectrum_tensor def _getParametersFromConstructorInputs(parameters, l_overwrite, p_overwrite, q_overwrite, average_parameters): parameter_vector = None if parameters is not None: parameter_vector = parameters.output.get_placeholder_as_batch_major() tf.assert_equal(parameter_vector.shape[-1], 3) if (l_overwrite is None) or (p_overwrite is None) or (q_overwrite is None): assert parameter_vector is not None if average_parameters: parameter_vector= tf.tile(tf.reduce_mean(parameter_vector, axis=1, keep_dims=True), [1, tf.shape(parameter_vector)[1], 1]) if l_overwrite is not None: l = tf.constant(l_overwrite, dtype=tf.float32) else: l = tf.expand_dims(parameter_vector[:, :, 0], axis=-1) if p_overwrite is not None: p = tf.constant(p_overwrite, dtype=tf.float32) else: p = tf.expand_dims(parameter_vector[:, :, 1], axis=-1) if q_overwrite is not None: q = tf.constant(q_overwrite, dtype=tf.float32) else: q = tf.expand_dims(parameter_vector[:, :, 2], axis=-1) return l, p, q filter_input_placeholder = filter_input.output.get_placeholder_as_batch_major() if filter_input_placeholder.dtype != tf.complex64: filter_input_placeholder = tf.cast(filter_input_placeholder, dtype=tf.complex64) tf.assert_equal(noise_estimation.output.get_placeholder_as_batch_major().shape[-1], filter_input_placeholder.shape[-1]) ne = _NoiseEstimator.from_layer(noise_estimation) l, p, q = _getParametersFromConstructorInputs(parameters, l_overwrite, p_overwrite, q_overwrite, average_parameters) wiener = TfParametricWienerFilter(ne, [], l, p, q, inputTensorFreqDomain=filter_input_placeholder) self.output.placeholder = wiener.getFrequencyDomainOutputSignal()
def compute_loss(self, unreduced_loss): """Computes scaled loss based on mask out size.""" # construct mask to identify zero padding that was introduced to # make the batch rectangular batch_duration = tf.shape(self.pianorolls)[1] indices = tf.to_float(tf.range(batch_duration)) pad_mask = tf.to_float( indices[None, :, None, None] < self.lengths[:, None, None, None]) # construct mask and its complement, respecting pad mask mask = pad_mask * self.masks unmask = pad_mask * (1. - self.masks) # Compute numbers of variables # #timesteps * #variables per timestep variable_axis = 3 if self.hparams.use_softmax_loss else 2 dd = ( self.lengths[:, None, None, None] * tf.to_float( tf.shape(self.pianorolls)[variable_axis])) reduced_dd = tf.reduce_sum(dd) # Compute numbers of variables to be predicted/conditioned on mask_size = tf.reduce_sum(mask, axis=[1, variable_axis], keep_dims=True) unmask_size = tf.reduce_sum(unmask, axis=[1, variable_axis], keep_dims=True) unreduced_loss *= pad_mask if self.hparams.rescale_loss: unreduced_loss *= dd / mask_size # Compute average loss over entire set of variables self.loss_total = tf.reduce_sum(unreduced_loss) / reduced_dd # Compute separate losses for masked/unmasked variables # NOTE: indexing the pitch dimension with 0 because the mask is constant # across pitch. Except in the sigmoid case, but then the pitch dimension # will have been reduced over. self.reduced_mask_size = tf.reduce_sum(mask_size[:, :, 0, :]) self.reduced_unmask_size = tf.reduce_sum(unmask_size[:, :, 0, :]) assert_partition_op = tf.group( tf.assert_equal(tf.reduce_sum(mask * unmask), 0.), tf.assert_equal(self.reduced_mask_size + self.reduced_unmask_size, reduced_dd)) with tf.control_dependencies([assert_partition_op]): self.loss_mask = ( tf.reduce_sum(mask * unreduced_loss) / self.reduced_mask_size) self.loss_unmask = ( tf.reduce_sum(unmask * unreduced_loss) / self.reduced_unmask_size) # Check which loss to use as objective function. self.loss = ( self.loss_mask if self.hparams.optimize_mask_only else self.loss_total)
def _kl_independent(a, b, name="kl_independent"): """Batched KL divergence `KL(a || b)` for Independent distributions. We can leverage the fact that ``` KL(Independent(a) || Independent(b)) = sum(KL(a || b)) ``` where the sum is over the `reinterpreted_batch_ndims`. Args: a: Instance of `Independent`. b: Instance of `Independent`. name: (optional) name to use for created ops. Default "kl_independent". Returns: Batchwise `KL(a || b)`. Raises: ValueError: If the event space for `a` and `b`, or their underlying distributions don't match. """ p = a.distribution q = b.distribution # The KL between any two (non)-batched distributions is a scalar. # Given that the KL between two factored distributions is the sum, i.e. # KL(p1(x)p2(y) || q1(x)q2(y)) = KL(p1 || q1) + KL(q1 || q2), we compute # KL(p || q) and do a `reduce_sum` on the reinterpreted batch dimensions. if a.event_shape.is_fully_defined() and b.event_shape.is_fully_defined(): if a.event_shape == b.event_shape: if p.event_shape == q.event_shape: num_reduce_dims = a.event_shape.ndims - p.event_shape.ndims reduce_dims = [-i - 1 for i in range(0, num_reduce_dims)] return tf.reduce_sum( kullback_leibler.kl_divergence(p, q, name=name), axis=reduce_dims) else: raise NotImplementedError("KL between Independents with different " "event shapes not supported.") else: raise ValueError("Event shapes do not match.") else: with tf.control_dependencies([ tf.assert_equal(a.event_shape_tensor(), b.event_shape_tensor()), tf.assert_equal(p.event_shape_tensor(), q.event_shape_tensor()) ]): num_reduce_dims = ( tf.shape(a.event_shape_tensor()[0]) - tf.shape( p.event_shape_tensor()[0])) reduce_dims = tf.range(-num_reduce_dims - 1, -1, 1) return tf.reduce_sum( kullback_leibler.kl_divergence(p, q, name=name), axis=reduce_dims)
def _build_clp_multiplication(self, clp_kernel): from TFUtil import safe_log input_placeholder = self.input_data.get_placeholder_as_batch_major() tf.assert_equal(tf.shape(clp_kernel)[1], tf.shape(input_placeholder)[2] // 2) tf.assert_equal(tf.shape(clp_kernel)[2], self._nr_of_filters) input_real = tf.strided_slice(input_placeholder, [0, 0, 0], tf.shape(input_placeholder), [1, 1, 2]) input_imag = tf.strided_slice(input_placeholder, [0, 0, 1], tf.shape(input_placeholder), [1, 1, 2]) kernel_real = self._clp_kernel[0, :, :] kernel_imag = self._clp_kernel[1, :, :] output_real = tf.einsum('btf,fp->btp', input_real, kernel_real) - tf.einsum('btf,fp->btp', input_imag, kernel_imag) output_imag = tf.einsum('btf,fp->btp', input_imag, kernel_real) + tf.einsum('btf,fp->btp', input_real, kernel_imag) output_uncompressed = tf.sqrt(tf.pow(output_real, 2) + tf.pow(output_imag, 2)) output_compressed = safe_log(output_uncompressed) return output_compressed
def __init__(self, tensors: List[tf.Tensor], cluster_indexes: tf.Tensor, n_splits, seed, train_sampling=1.0, test_sampling=1.0): size = tensors[0].shape[0].value self.seed = seed clustered_index = self.cluster_pages(cluster_indexes) index_len = tf.shape(clustered_index)[0] assert_op = tf.assert_equal(index_len, size, message='n_pages is not equals to size of clustered index') with tf.control_dependencies([assert_op]): split_nitems = int(round(size / n_splits)) split_size = [split_nitems] * n_splits split_size[-1] = size - (n_splits - 1) * split_nitems splits = tf.split(clustered_index, split_size) complements = [tf.random_shuffle(tf.concat(splits[:i] + splits[i + 1:], axis=0), seed) for i in range(n_splits)] splits = [tf.random_shuffle(split, seed) for split in splits] def mk_name(prefix, tensor): return prefix + '_' + tensor.name[:-2] def prepare_split(i): test_size = split_size[i] train_size = size - test_size test_sampled_size = int(round(test_size * test_sampling)) train_sampled_size = int(round(train_size * train_sampling)) test_idx = splits[i][:test_sampled_size] train_idx = complements[i][:train_sampled_size] test_set = [tf.gather(tensor, test_idx, name=mk_name('test', tensor)) for tensor in tensors] tran_set = [tf.gather(tensor, train_idx, name=mk_name('train', tensor)) for tensor in tensors] return Split(test_set, tran_set, test_sampled_size, train_sampled_size) self.splits = [prepare_split(i) for i in range(n_splits)]
def encode(self, sequence, sequence_length): if self._use_cudnn: with tf.control_dependencies( [tf.assert_equal( sequence_length, tf.shape(sequence)[1], message='`use_cudnn_enc` must be False if sequence lengths vary.') ]): _, (states_h, _) = self._cudnn_enc_lstm( tf.transpose(sequence, [1, 0, 2]), training=self._is_training) # Note we access the outputs (h) from the states since the backward # ouputs are reversed to the input order in the returned outputs. last_h_fw, last_h_bw = states_h[-2], states_h[-1] else: _, states_fw, states_bw = rnn.stack_bidirectional_dynamic_rnn( self._enc_cells_fw, self._enc_cells_bw, sequence, sequence_length=sequence_length, time_major=False, dtype=tf.float32, scope='encoder') # Note we access the outputs (h) from the states since the backward # ouputs are reversed to the input order in the returned outputs. last_h_fw = states_fw[-1][-1].h last_h_bw = states_bw[-1][-1].h # Concatenate the final outputs for each direction. last_h = tf.concat([last_h_fw, last_h_bw], 1) return last_h
def zero_state(self, batch_size, dtype): with tf.name_scope(type(self).__name__ + "ZeroState", values=[batch_size]): if self._initial_cell_state is not None: cell_state = self._initial_cell_state else: cell_state = self._cell.zero_state(batch_size, dtype) error_message = ( "zero_state of AttentionWrapper %s: " % self._base_name + "Non-matching batch sizes between the memory " "(encoder output) and the requested batch size.") with tf.control_dependencies( [tf.assert_equal(batch_size, self._attention_mechanism.batch_size, message=error_message)]): cell_state = nest.map_structure( lambda s: tf.identity(s, name="checked_cell_state"), cell_state) alignment_history = () _zero_state_tensors = rnn_cell_impl._zero_state_tensors return AttentionWrapperState( cell_state=cell_state, time=tf.zeros([], dtype=tf.int32), attention=_zero_state_tensors(self._attention_size, batch_size, dtype), alignments=self._attention_mechanism.initial_alignments( batch_size, dtype), alignment_history=alignment_history)
def assert_shape_equal(shape_a, shape_b): """Asserts that shape_a and shape_b are equal. If the shapes are static, raises a ValueError when the shapes mismatch. If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes mismatch. Args: shape_a: a list containing shape of the first tensor. shape_b: a list containing shape of the second tensor. Returns: Either a tf.no_op() when shapes are all static and a tf.assert_equal() op when the shapes are dynamic. Raises: ValueError: When shapes are both static and unequal. """ if (all(isinstance(dim, int) for dim in shape_a) and all(isinstance(dim, int) for dim in shape_b)): if shape_a != shape_b: raise ValueError('Unequal shapes {}, {}'.format(shape_a, shape_b)) else: return tf.no_op() else: return tf.assert_equal(shape_a, shape_b)
def _maybe_validate_perm(perm, validate_args, name=None): """Checks that `perm` is valid.""" with tf.name_scope(name, 'maybe_validate_perm', [perm]): assertions = [] if not perm.dtype.is_integer: raise TypeError('`perm` must be integer type') msg = '`perm` must be a vector.' if perm.shape.ndims is not None: if perm.shape.ndims != 1: raise ValueError( msg[:-1] + ', saw rank: {}.'.format(perm.shape.ndims)) elif validate_args: assertions += [tf.assert_rank(perm, 1, message=msg)] perm_ = tf.contrib.util.constant_value(perm) msg = '`perm` must be a valid permutation vector.' if perm_ is not None: if not np.all(np.arange(np.size(perm_)) == np.sort(perm_)): raise ValueError(msg[:-1] + ', saw: {}.'.format(perm_)) elif validate_args: assertions += [tf.assert_equal( tf.contrib.framework.sort(perm), tf.range(tf.size(perm)), message=msg)] return assertions
def test_doesnt_raise_when_equal_and_broadcastable_shapes(self): with self.test_session(): small = tf.constant([1, 2], name="small") small_2 = tf.constant([1, 2], name="small_2") with tf.control_dependencies([tf.assert_equal(small, small_2)]): out = tf.identity(small) out.eval()
def sample_from_discretized_mix_logistic(y, log_scale_min=-7.): ''' Args: y: Tensor, [batch_size, channels, time_length] Returns: Tensor: sample in range of [-1, 1] ''' with tf.control_dependencies([tf.assert_equal(tf.mod(tf.shape(y)[1], 3), 0)]): nr_mix = tf.shape(y)[1] // 3 #[batch_size, time_length, channels] y = tf.transpose(y, [0, 2, 1]) logit_probs = y[:, :, :nr_mix] #sample mixture indicator from softmax temp = tf.random_uniform(tf.shape(logit_probs), minval=1e-5, maxval=1. - 1e-5) temp = logit_probs - tf.log(-tf.log(temp)) argmax = tf.argmax(temp, -1) #[batch_size, time_length] -> [batch_size, time_length, nr_mix] one_hot = tf.one_hot(argmax, depth=nr_mix, dtype=tf.float32) #select logistic parameters means = tf.reduce_sum(y[:, :, nr_mix:2 * nr_mix] * one_hot, axis=-1) log_scales = tf.maximum(tf.reduce_sum( y[:, :, 2 * nr_mix:3 * nr_mix] * one_hot, axis=-1), log_scale_min) #sample from logistic & clip to interval #we don't actually round to the nearest 8-bit value when sampling u = tf.random_uniform(tf.shape(means), minval=1e-5, maxval=1. - 1e-5) x = means + tf.exp(log_scales) * (tf.log(u) - tf.log(1 -u)) return tf.minimum(tf.maximum(x, -1.), 1.)
def _expectation(p, mean, none, kern, feat, nghp=None): """ Compute the expectation: expectation[n] = <x_n K_{x_n, Z}>_p(x_n) - K_{.,.} :: RBF kernel :return: NxDxM """ Xmu, Xcov = p.mu, p.cov with tf.control_dependencies([tf.assert_equal( tf.shape(Xmu)[1], tf.constant(kern.input_dim, settings.tf_int), message="Currently cannot handle slicing in exKxz.")]): Xmu = tf.identity(Xmu) with params_as_tensors_for(kern), params_as_tensors_for(feat): D = tf.shape(Xmu)[1] lengthscales = kern.lengthscales if kern.ARD \ else tf.zeros((D,), dtype=settings.float_type) + kern.lengthscales chol_L_plus_Xcov = tf.cholesky(tf.matrix_diag(lengthscales ** 2) + Xcov) # NxDxD all_diffs = tf.transpose(feat.Z) - tf.expand_dims(Xmu, 2) # NxDxM sqrt_det_L = tf.reduce_prod(lengthscales) sqrt_det_L_plus_Xcov = tf.exp(tf.reduce_sum(tf.log(tf.matrix_diag_part(chol_L_plus_Xcov)), axis=1)) determinants = sqrt_det_L / sqrt_det_L_plus_Xcov # N exponent_mahalanobis = tf.cholesky_solve(chol_L_plus_Xcov, all_diffs) # NxDxM non_exponent_term = tf.matmul(Xcov, exponent_mahalanobis, transpose_a=True) non_exponent_term = tf.expand_dims(Xmu, 2) + non_exponent_term # NxDxM exponent_mahalanobis = tf.reduce_sum(all_diffs * exponent_mahalanobis, 1) # NxM exponent_mahalanobis = tf.exp(-0.5 * exponent_mahalanobis) # NxM return kern.variance * (determinants[:, None] * exponent_mahalanobis)[:, None, :] * non_exponent_term
def maybe_check_quadrature_param(param, name, validate_args): """Helper which checks validity of `loc` and `scale` init args.""" with tf.name_scope(name="check_" + name, values=[param]): assertions = [] if param.shape.ndims is not None: if param.shape.ndims == 0: raise ValueError("Mixing params must be a (batch of) vector; " "{}.rank={} is not at least one.".format( name, param.shape.ndims)) elif validate_args: assertions.append( tf.assert_rank_at_least( param, 1, message=("Mixing params must be a (batch of) vector; " "{}.rank is not at least one.".format(name)))) # TODO(jvdillon): Remove once we support k-mixtures. if param.shape.with_rank_at_least(1)[-1] is not None: if param.shape[-1].value != 1: raise NotImplementedError("Currently only bimixtures are supported; " "{}.shape[-1]={} is not 1.".format( name, param.shape[-1].value)) elif validate_args: assertions.append( tf.assert_equal( tf.shape(param)[-1], 1, message=("Currently only bimixtures are supported; " "{}.shape[-1] is not 1.".format(name)))) if assertions: return control_flow_ops.with_dependencies(assertions, param) return param
def _filtering_step(self, current_times, current_values, state, predictions): """Update model state based on observations. Note that we don't do much here aside from computing a loss. In this case it's easier to update the RNN state in _prediction_step, since that covers running the RNN both on observations (from this method) and our own predictions. This distinction can be important for probabilistic models, where repeatedly predicting without filtering should lead to low-confidence predictions. Args: current_times: A [batch size] integer Tensor. current_values: A [batch size, self.num_features] floating point Tensor with new observations. state: The model's state tuple. predictions: The output of the previous `_prediction_step`. Returns: A tuple of new state and a predictions dictionary updated to include a loss (note that we could also return other measures of goodness of fit, although only "loss" will be optimized). """ state_from_time, prediction, lstm_state = state with tf.control_dependencies( [tf.assert_equal(current_times, state_from_time)]): transformed_values = self._transform(current_values) # Use mean squared error across features for the loss. predictions["loss"] = tf.reduce_mean( (prediction - transformed_values) ** 2, axis=-1) # Keep track of the new observation in model state. It won't be run # through the LSTM until the next _imputation_step. new_state_tuple = (current_times, transformed_values, lstm_state) return (new_state_tuple, predictions)
def _training(self): """Perform multiple training iterations of both policy and value baseline. Training on the episodes collected in the memory. Reset the memory afterwards. Always returns a summary string. Returns: Summary tensor. """ with tf.name_scope('training'): assert_full = tf.assert_equal(self._memory_index, self._config.update_every) with tf.control_dependencies([assert_full]): data = self._memory.data() (observ, action, old_mean, old_logstd, reward), length = data with tf.control_dependencies([tf.assert_greater(length, 0)]): length = tf.identity(length) observ = self._observ_filter.transform(observ) reward = self._reward_filter.transform(reward) update_summary = self._perform_update_steps(observ, action, old_mean, old_logstd, reward, length) with tf.control_dependencies([update_summary]): penalty_summary = self._adjust_penalty(observ, old_mean, old_logstd, length) with tf.control_dependencies([penalty_summary]): clear_memory = tf.group(self._memory.clear(), self._memory_index.assign(0)) with tf.control_dependencies([clear_memory]): weight_summary = utility.variable_summaries(tf.trainable_variables(), self._config.weight_summaries) return tf.summary.merge([update_summary, penalty_summary, weight_summary])
def test_doesnt_raise_when_both_empty(self): with self.test_session(): larry = tf.constant([]) curly = tf.constant([]) with tf.control_dependencies([tf.assert_equal(larry, curly)]): out = tf.identity(larry) out.eval()
def assert_shape_equal_along_first_dimension(shape_a, shape_b): """Asserts that shape_a and shape_b are the same along the 0th-dimension. If the shapes are static, raises a ValueError when the shapes mismatch. If the shapes are dynamic, raises a tf InvalidArgumentError when the shapes mismatch. Args: shape_a: a list containing shape of the first tensor. shape_b: a list containing shape of the second tensor. Returns: Either a tf.no_op() when shapes are all static and a tf.assert_equal() op when the shapes are dynamic. Raises: ValueError: When shapes are both static and unequal. """ if isinstance(shape_a[0], int) and isinstance(shape_b[0], int): if shape_a[0] != shape_b[0]: raise ValueError('Unequal first dimension {}, {}'.format( shape_a[0], shape_b[0])) else: return tf.no_op() else: return tf.assert_equal(shape_a[0], shape_b[0])
def get_mu_tensor(self): const_fact = self._dist_to_opt_avg**2 * self._h_min**2 / 2 / self._grad_var coef = tf.Variable([-1.0, 3.0, 0.0, 1.0], dtype=tf.float32, name="cubic_solver_coef") coef = tf.scatter_update(coef, tf.constant(2), -(3 + const_fact) ) roots = tf.py_func(np.roots, [coef], Tout=tf.complex64, stateful=False) # filter out the correct root root_idx = tf.logical_and(tf.logical_and(tf.greater(tf.real(roots), tf.constant(0.0) ), tf.less(tf.real(roots), tf.constant(1.0) ) ), tf.less(tf.abs(tf.imag(roots) ), 1e-5) ) # in case there are two duplicated roots satisfying the above condition root = tf.reshape(tf.gather(tf.gather(roots, tf.where(root_idx) ), tf.constant(0) ), shape=[] ) tf.assert_equal(tf.size(root), tf.constant(1) ) dr = self._h_max / self._h_min mu = tf.maximum(tf.real(root)**2, ( (tf.sqrt(dr) - 1)/(tf.sqrt(dr) + 1) )**2) return mu
def calculate_reshape(original_shape, new_shape, validate=False, name=None): """Calculates the reshaped dimensions (replacing up to one -1 in reshape).""" batch_shape_static = tensor_util.constant_value_as_shape(new_shape) if batch_shape_static.is_fully_defined(): return np.int32(batch_shape_static.as_list()), batch_shape_static, [] with tf.name_scope(name, "calculate_reshape", [original_shape, new_shape]): original_size = tf.reduce_prod(original_shape) implicit_dim = tf.equal(new_shape, -1) size_implicit_dim = ( original_size // tf.maximum(1, -tf.reduce_prod(new_shape))) new_ndims = tf.shape(new_shape) expanded_new_shape = tf.where( # Assumes exactly one `-1`. implicit_dim, tf.fill(new_ndims, size_implicit_dim), new_shape) validations = [] if not validate else [ tf.assert_rank( original_shape, 1, message="Original shape must be a vector."), tf.assert_rank(new_shape, 1, message="New shape must be a vector."), tf.assert_less_equal( tf.count_nonzero(implicit_dim, dtype=tf.int32), 1, message="At most one dimension can be unknown."), tf.assert_positive( expanded_new_shape, message="Shape elements must be >=-1."), tf.assert_equal( tf.reduce_prod(expanded_new_shape), original_size, message="Shape sizes do not match."), ] return expanded_new_shape, batch_shape_static, validations
def _validate_dimension(self, x): x = tf.convert_to_tensor(x, name='x') if x.shape[-2:].is_fully_defined(): if x.shape.dims[-2] == x.shape.dims[-1] == self.dimension: pass else: raise ValueError( 'Input dimension mismatch: expected [..., {}, {}], got {}'.format( self.dimension, self.dimension, x.shape.dims)) elif self.validate_args: msg = 'Input dimension mismatch: expected [..., {}, {}], got {}'.format( self.dimension, self.dimension, tf.shape(x)) with tf.control_dependencies( [tf.assert_equal(tf.shape(x)[-2], self.dimension, message=msg), tf.assert_equal(tf.shape(x)[-1], self.dimension, message=msg)]): x = tf.identity(x) return x
def test_raises_when_less(self): with self.test_session(): small = tf.constant([3, 1], name="small") big = tf.constant([4, 2], name="big") with tf.control_dependencies([tf.assert_equal(small, big)]): out = tf.identity(small) with self.assertRaisesOpError("small.*big"): out.eval()
def test_raises_when_equal_but_non_broadcastable_shapes(self): with self.test_session(): small = tf.constant([1, 1, 1], name="small") small_2 = tf.constant([1, 1], name="small_2") with self.assertRaisesRegexp(ValueError, "broadcast"): with tf.control_dependencies([tf.assert_equal(small, small_2)]): out = tf.identity(small) out.eval()
def test_raises_when_greater(self): with self.test_session(): small = tf.constant([1, 2], name="small") big = tf.constant([3, 4], name="big") with tf.control_dependencies([tf.assert_equal(big, small)]): out = tf.identity(small) with self.assertRaisesOpError("big.*small"): out.eval()
def load_examples(): if a.input_dir is None or not os.path.exists(a.input_dir): raise Exception("input_dir does not exist") input_paths = glob.glob(os.path.join(a.input_dir, "*.jpg")) decode = tf.image.decode_jpeg if len(input_paths) == 0: input_paths = glob.glob(os.path.join(a.input_dir, "*.png")) decode = tf.image.decode_png if len(input_paths) == 0: raise Exception("input_dir contains no image files") def get_name(path): name, _ = os.path.splitext(os.path.basename(path)) return name if all(get_name(path).isdigit() for path in input_paths): input_paths = sorted(input_paths, key=lambda path: int(get_name(path))) else: input_paths = sorted(input_paths) with tf.name_scope("load_images"): path_queue = tf.train.string_input_producer(input_paths, shuffle=a.mode == "train") reader = tf.WholeFileReader() paths, contents = reader.read(path_queue) raw_input = decode(contents) raw_input = tf.image.convert_image_dtype(raw_input, dtype=tf.float32) assertion = tf.assert_equal(tf.shape(raw_input)[2], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): raw_input = tf.identity(raw_input) raw_input.set_shape([None, None, 3]) width = tf.shape(raw_input)[1] # [height, width, channels] a_images = preprocess(raw_input[:, :width // 2, :]) b_images = preprocess(raw_input[:, width // 2:, :]) inputs, targets = [b_images, a_images] # synchronize seed for image operations so that we do the same operations to both # input and output images seed = random.randint(0, 2**31 - 1) def transform(image): r = image if a.flip: r = tf.image.random_flip_left_right(r, seed=seed) #r = tf.image.resize_images(r, [a.img_h, a.img_w], method=tf.image.ResizeMethod.AREA) # area produces a nice downscaling, but does nearest neighbor for upscaling # assume we're going to be doing downscaling here #r = tf.image.resize_images(r, [a.scale_size, a.scale_size], method=tf.image.ResizeMethod.AREA) h, w, _ = r.get_shape() if h < a.img_h or w < a.img_w: r = tf.image.resize_images(r, [a.img_h, a.img_w], method=tf.image.ResizeMethod.AREA) if a.mode == "train": offsize1 = h - a.img_h offsize2 = w - a.img_w if offsize1 > 0 and offsize2 > 0: offsize = offsize1 if (offsize1 < offsize2) else offsize2 offset = tf.cast(tf.floor( tf.random_uniform([2], 0, offsize, seed=seed)), dtype=tf.int32) r = tf.image.crop_to_bounding_box(r, offset[0], offset[1], a.img_h, a.img_w) else: r = tf.image.resize_images(r, [a.img_h, a.img_w], method=tf.image.ResizeMethod.AREA) if a.mode == "test": #a.h_ratio = h/a.img_h #a.w_ratio = w/a.img_w #a.h_ratio=tf.divide(h, a.img_h, name=None) #a.w_ratio=tf.divide(w, a.img_w, name=None) r = tf.image.resize_images(r, [a.img_h, a.img_w], method=tf.image.ResizeMethod.AREA) return r with tf.name_scope("input_images"): input_images = transform(inputs) with tf.name_scope("target_images"): target_images = transform(targets) paths_batch, inputs_batch, targets_batch = tf.train.batch( [paths, input_images, target_images], batch_size=a.batch_size) steps_per_epoch = int(math.ceil(len(input_paths) / a.batch_size)) return Examples( paths=paths_batch, inputs=inputs_batch, targets=targets_batch, steps_per_epoch=steps_per_epoch, )
def concat_distributions(dists: List[tfd.Distribution], axis: Optional[int] = None, validate_args: bool = False, allow_nan_stats: bool = True, name: Optional[Text] = None) -> tfd.Distribution: """This layer create a new `Distribution` by concatenate parameters of multiple distributions of the same type along given `axis` Note ---- If your distribution is the output from `DistributionLambda`, this function will remove all the keras history """ dists = as_tuple(dists) if len(dists) == 1: return dists[0] if len(dists) == 0: raise ValueError("No distributions were given") axis = _find_axis_for_stack(dists, given_axis=axis) # ====== get the proper distribution type ====== # dist_type = type(dists[0]) # _TensorCoercible will messing up with the parameters of the # distribution if issubclass(dist_type, dtc._TensorCoercible): dist_type = type.mro(dist_type)[2] assert issubclass(dist_type, tfd.Distribution) and not issubclass( dist_type, dtc._TensorCoercible) #TODO: issues concatenating JointDistribution, use Batchwise. if issubclass(dist_type, JointDistribution): from odin.bay.distributions.batchwise import Batchwise return Batchwise(dists, axis=axis, validate_args=validate_args, name=name) # ====== special cases ====== # dist_func = None if dist_type == obd.MultivariateNormalDiag: dist_func = _MVNdiag elif dist_type == obd.MultivariateNormalTriL: dist_func = _MVNtril elif dist_type == obd.MultivariateNormalFullCovariance: dist_func = _MVNfull if dist_func is not None: kwargs = dict(validate_args=validate_args, allow_nan_stats=allow_nan_stats) if name is not None: kwargs['name'] = name return dist_func(dists, axis, kwargs) ### no more distribution, tensor of parameters is return during the # recursive operator if issubclass(dist_type, (tuple, list)) and all( isinstance(i, obd.Distribution) or tf.is_tensor(i) for i in tf.nest.flatten(dists)): return [ tf.concat(x, axis=axis) if tf.is_tensor(x) else concat_distributions(x, axis=axis) for x in zip(*dists) ] elif issubclass(dist_type, tf.Tensor): shapes = [d.shape for d in dists] if shapes[0].ndims == 0 or all(i == 1 for i in shapes[0]): # make sure all the number is the same (we cannot concatenate numbers) for d in dists[1:]: tf.assert_equal(d, dists[0]) return dists[0] return tf.concat(dists, axis=axis) elif issubclass(dist_type, obd.Distribution): pass # continue with all distribution parameters else: return dists[0] # get all params for concatenate if dist_type not in dist_params: raise RuntimeError( "Unknown distribution of type '%s' for concatenation" % str(dist_type)) params_name = dist_params[dist_type] # start concat the params params = {} for p in params_name: attrs = [getattr(d, p) for d in dists] is_method = False if inspect.ismethod(attrs[0]): attrs = [a() for a in attrs] is_method = True if is_method and '_parameter' == p[-10:]: p = p[:-10] params[p] = concat_distributions(attrs, axis=axis) # extra arguments if name is not None: params['name'] = name args = inspect.getfullargspec(dist_type.__init__).args if 'allow_nan_stats' in args: params['allow_nan_stats'] = allow_nan_stats if 'validate_args' in args: params['validate_args'] = validate_args dist = dist_type(**params) return dist
def __init__(self, cat, components, validate_args=False, allow_nan_stats=True, use_static_graph=False, name="Mixture"): """Initialize a Mixture distribution. A `Mixture` is defined by a `Categorical` (`cat`, representing the mixture probabilities) and a list of `Distribution` objects all having matching dtype, batch shape, event shape, and continuity properties (the components). The `num_classes` of `cat` must be possible to infer at graph construction time and match `len(components)`. Args: cat: A `Categorical` distribution instance, representing the probabilities of `distributions`. components: A list or tuple of `Distribution` instances. Each instance must have the same type, be defined on the same domain, and have matching `event_shape` and `batch_shape`. validate_args: Python `bool`, default `False`. If `True`, raise a runtime error if batch or event ranks are inconsistent between cat and any of the distributions. This is only checked if the ranks cannot be determined statically at graph construction time. allow_nan_stats: Boolean, default `True`. If `False`, raise an exception if a statistic (e.g. mean/mode/etc...) is undefined for any batch member. If `True`, batch members with valid parameters leading to undefined statistics will return NaN for this statistic. use_static_graph: Calls to `sample` will not rely on dynamic tensor indexing, allowing for some static graph compilation optimizations, but at the expense of sampling all underlying distributions in the mixture. (Possibly useful when running on TPUs). Default value: `False` (i.e., use dynamic indexing). name: A name for this distribution (optional). Raises: TypeError: If cat is not a `Categorical`, or `components` is not a list or tuple, or the elements of `components` are not instances of `Distribution`, or do not have matching `dtype`. ValueError: If `components` is an empty list or tuple, or its elements do not have a statically known event rank. If `cat.num_classes` cannot be inferred at graph creation time, or the constant value of `cat.num_classes` is not equal to `len(components)`, or all `components` and `cat` do not have matching static batch shapes, or all components do not have matching static event shapes. """ parameters = dict(locals()) if not isinstance(cat, tf.distributions.Categorical): raise TypeError( "cat must be a Categorical distribution, but saw: %s" % cat) if not components: raise ValueError("components must be a non-empty list or tuple") if not isinstance(components, (list, tuple)): raise TypeError("components must be a list or tuple, but saw: %s" % components) if not all( isinstance(c, tf.distributions.Distribution) for c in components): raise TypeError( "all entries in components must be Distribution instances" " but saw: %s" % components) dtype = components[0].dtype if not all(d.dtype == dtype for d in components): raise TypeError("All components must have the same dtype, but saw " "dtypes: %s" % [(d.name, d.dtype) for d in components]) static_event_shape = components[0].event_shape static_batch_shape = cat.batch_shape for d in components: static_event_shape = static_event_shape.merge_with(d.event_shape) static_batch_shape = static_batch_shape.merge_with(d.batch_shape) if static_event_shape.ndims is None: raise ValueError( "Expected to know rank(event_shape) from components, but " "none of the components provide a static number of ndims") # Ensure that all batch and event ndims are consistent. with tf.name_scope(name, values=[cat.logits]) as name: num_components = cat.event_size static_num_components = tensor_util.constant_value(num_components) if static_num_components is None: raise ValueError( "Could not infer number of classes from cat and unable " "to compare this value to the number of components passed in." ) # Possibly convert from numpy 0-D array. static_num_components = int(static_num_components) if static_num_components != len(components): raise ValueError( "cat.num_classes != len(components): %d vs. %d" % (static_num_components, len(components))) cat_batch_shape = cat.batch_shape_tensor() cat_batch_rank = tf.size(cat_batch_shape) if validate_args: batch_shapes = [d.batch_shape_tensor() for d in components] batch_ranks = [tf.size(bs) for bs in batch_shapes] check_message = ("components[%d] batch shape must match cat " "batch shape") self._assertions = [ tf.assert_equal(cat_batch_rank, batch_ranks[di], message=check_message % di) for di in range(len(components)) ] self._assertions += [ tf.assert_equal(cat_batch_shape, batch_shapes[di], message=check_message % di) for di in range(len(components)) ] else: self._assertions = [] self._cat = cat self._components = list(components) self._num_components = static_num_components self._static_event_shape = static_event_shape self._static_batch_shape = static_batch_shape self._use_static_graph = use_static_graph if use_static_graph and static_num_components is None: raise ValueError( "Number of categories must be known statically when " "`static_sample=True`.") # We let the Mixture distribution access _graph_parents since its arguably # more like a baseclass. graph_parents = self._cat._graph_parents # pylint: disable=protected-access for c in self._components: graph_parents += c._graph_parents # pylint: disable=protected-access super(Mixture, self).__init__( dtype=dtype, reparameterization_type=tf.distributions.NOT_REPARAMETERIZED, validate_args=validate_args, allow_nan_stats=allow_nan_stats, parameters=parameters, graph_parents=graph_parents, name=name)
def __init__(self, path='train', shuffle=True, reshape=None, batch_size=None, dtype=tf.float32, num_threads=1, min_queue_examples=1000): """ :param path: image folder or image path lists :param shuffle: :param reshape: be a tuple or a list (width, height) :param dtype: convert input images to tf.uint8 (0-255) or tf.float32(0-1) """ """ check if path is a dir/.tfrecords file or a image file list """ self._tfrecords = False if type(path) == str: if not os.path.isdir(path): if '.tfrecords' in path: self._tfrecords = True else: raise Exception("Directory '%s' does not exist" % path) else: input_paths = self._get_input_paths(path) else: input_paths = path """ load images """ with tf.name_scope("load_images"): path_queue = tf.train.string_input_producer(input_paths, shuffle=shuffle) if self._tfrecords: reader = tf.TFRecordReader() _, serialized_example = reader.read(path_queue) features = tf.parse_single_example( serialized_example, features={ 'image/file_name': tf.FixedLenFeature([], tf.string), 'image/encoded_image': tf.FixedLenFeature([], tf.string), }) image_buffer = features['image/encoded_image'] raw_input = tf.image.decode_jpeg(image_buffer, channels=3) else: reader = tf.WholeFileReader() paths, contents = reader.read(path_queue) raw_input = self.__decode(contents) raw_input = tf.image.convert_image_dtype(raw_input, dtype=dtype) assertion = tf.assert_equal( tf.shape(raw_input)[2], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): raw_input = tf.identity(raw_input) raw_input.set_shape([None, None, 3]) if reshape is not None: raw_input = tf.image.resize_images( raw_input, size=[reshape[1], reshape[0]], method=tf.image.ResizeMethod.BICUBIC) color_print("%d images loaded in '%s'" % (len(input_paths), path), 2) self.__images = raw_input self.__image_paths = input_paths self._size = len(input_paths) self._batch_size = batch_size self._shuffled = shuffle self._image_shape = reshape self._paths = paths self._num_threads = num_threads self._min_queue_examples = min_queue_examples
def losses(self): """ Return the losses from a set of RPN predictions and their associated ground-truth. Returns: dict[loss name -> loss value]: A dict mapping from loss name to loss value. Loss names are: `loss_rpn_cls` for objectness classification and `loss_rpn_loc` for proposal localization. """ def resample(label): """ Randomly sample a subset of positive and negative examples by overwriting the label vector to the ignore value (-1) for all elements that are not included in the sample. """ pos_idx, neg_idx = subsample_labels(label, self.batch_size_per_image, self.positive_fraction, 0) # Fill with the ignore label (-1), then set positive and negative labels all_idx = tf.range(tf.shape(label)[0]) label = tf.dynamic_stitch([ all_idx, tf.cast(pos_idx, tf.int32), tf.cast(neg_idx, tf.int32) ], [ tf.zeros_like(label) - 1, tf.ones_like(pos_idx), tf.zeros_like(neg_idx) ]) return label gt_objectness_logits, gt_anchor_deltas = self._get_ground_truth() # resample: (N, num_anchors_per_image) gt_objectness_logits = tf.map_fn(resample, gt_objectness_logits) # Collect all objectness labels and delta targets over feature maps and images # The final ordering is L, N, H, W, A from slowest to fastest axis. num_anchors_per_map = [ tf.reduce_prod(tf.shape(x)[1:]) for x in self.pred_objectness_logits ] num_anchors_per_image = tf.reduce_sum(num_anchors_per_map) # Log the number of positive/negative anchors per-image that's used in training num_pos_anchors = tf.count_nonzero(tf.equal(gt_objectness_logits, 1), dtype=tf.int32) num_neg_anchors = tf.count_nonzero(tf.equal(gt_objectness_logits, 0), dtype=tf.int32) tf.summary.scalar("rpn/num_pos_anchors", num_pos_anchors / self.num_images) tf.summary.scalar("rpn/num_neg_anchors", num_neg_anchors / self.num_images) with tf.control_dependencies([ tf.assert_equal( tf.shape(gt_objectness_logits)[1], num_anchors_per_image) ]): # Split to tuple of L tensors, each with shape (N, num_anchors_per_map) gt_objectness_logits = tf.split(gt_objectness_logits, num_anchors_per_map, axis=1) # Concat from all feature maps gt_objectness_logits = tf.concat( [tf.reshape(x, [-1]) for x in gt_objectness_logits], axis=0) gt_anchor_deltas_shape = shape_utils.combined_static_and_dynamic_shape( gt_anchor_deltas) with tf.control_dependencies([ tf.assert_equal(gt_anchor_deltas_shape[1], num_anchors_per_image) ]): B = gt_anchor_deltas_shape[2] # box dimension (4 or 5) # Split to tuple of L tensors, each with shape (N, num_anchors_per_image) gt_anchor_deltas = tf.split(gt_anchor_deltas, num_anchors_per_map, axis=1) # Concat from all feature maps gt_anchor_deltas = tf.concat( [tf.reshape(x, [-1, B]) for x in gt_anchor_deltas], axis=0) # Collect all objectness logits and delta predictions over feature maps # and images to arrive at the same shape as the labels and targets # The final ordering is L, N, H, W, A from slowest to fastest axis. pred_objectness_logits = tf.concat( [ # Reshape: (N, Hi, Wi, A) -> (N*Hi*Wi*A, ) tf.reshape(x, [-1]) for x in self.pred_objectness_logits ], axis=0, ) pred_anchor_deltas = tf.concat( [ # Reshape: (N, Hi, Wi, A*B) -> (N*Hi*Wi*A, B) tf.reshape(x, [-1, B]) for x in self.pred_anchor_deltas ], axis=0, ) objectness_loss, localization_loss = rpn_losses( gt_objectness_logits, gt_anchor_deltas, pred_objectness_logits, pred_anchor_deltas, self.smooth_l1_beta, ) normalizer = 1.0 / tf.cast(self.batch_size_per_image * self.num_images, tf.float32) loss_cls = objectness_loss * normalizer # cls: classification loss loss_loc = localization_loss * normalizer # loc: localization loss losses = {"loss_rpn_cls": loss_cls, "loss_rpn_loc": loss_loc} return losses
def auto_encoder(x, x_org, opt, opt_t=None): # print x.get_shape() # batch L if not opt_t: opt_t = opt x_emb, W_norm = embedding(x, opt) # batch L emb x_emb = tf.expand_dims(x_emb, 3) # batch L emb 1 res = {} # cnn encoder if opt.layer == 4: H_enc = conv_model_4layer(x_emb, opt) elif opt.layer == 3: H_enc = conv_model_3layer(x_emb, opt) else: H_enc = conv_model(x_emb, opt) H_dec = H_enc # deconv decoder if opt.layer == 4: x_rec = deconv_model_4layer(H_dec, opt_t) # batch L emb 1 elif opt.layer == 3: x_rec = deconv_model_3layer(H_dec, opt_t) # batch L emb 1 else: x_rec = deconv_model(H_dec, opt_t) # batch L emb 1 print("Encoder len %d Decoder len %d Output len %d" % ( x_emb.get_shape()[1], x_rec.get_shape()[1], x_org.get_shape()[1])) tf.assert_equal(x_rec.get_shape(), x_emb.get_shape()) tf.assert_equal(x_emb.get_shape()[1], x_org.get_shape()[1]) x_rec_norm = normalizing(x_rec, 2) # batch L emb if opt.fix_emb: # cosine sim # Batch L emb loss = -tf.reduce_sum(x_rec_norm * x_emb) rec_sent = tf.argmax(tf.tensordot(tf.squeeze(x_rec_norm), W_norm, [[2], [1]]), 2) res['rec_sents'] = rec_sent else: x_temp = tf.reshape(x_org, [-1, ]) prob_logits = tf.tensordot(tf.squeeze(x_rec_norm), W_norm, [[2], [1]]) # c_blv = sum_e x_ble W_ve prob = tf.nn.log_softmax(prob_logits * opt_t.L, dim=-1, name=None) rec_sent = tf.squeeze(tf.argmax(prob, 2)) prob = tf.reshape(prob, [-1, opt_t.n_words]) idx = tf.range(opt.batch_size * opt_t.sent_len) all_idx = tf.transpose(tf.stack(values=[idx, x_temp])) all_prob = tf.gather_nd(prob, all_idx) gen_temp = tf.cast(tf.reshape(rec_sent, [-1, ]), tf.int32) gen_idx = tf.transpose(tf.stack(values=[idx, gen_temp])) gen_prob = tf.gather_nd(prob, gen_idx) res['rec_sents'] = rec_sent res['gen_p'] = tf.exp(gen_prob[0:opt.sent_len]) res['all_p'] = tf.exp(all_prob[0:opt.sent_len]) if opt.discrimination: logits_real, _ = discriminator(x_org, W_norm, opt_t) prob_one_hot = tf.nn.log_softmax(prob_logits * opt_t.L * 100, dim=-1, name=None) logits_syn, _ = discriminator(tf.exp(prob_one_hot), W_norm, opt_t, is_prob=True, is_reuse=True) res['prob_r'] = tf.reduce_mean(tf.nn.sigmoid(logits_real)) res['prob_f'] = tf.reduce_mean(tf.nn.sigmoid(logits_syn)) loss = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.ones_like(logits_real), logits=logits_real)) + \ tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.zeros_like(logits_syn), logits=logits_syn)) else: loss = -tf.reduce_mean(all_prob) tf.summary.scalar('loss', loss) train_op = layers.optimize_loss( loss, framework.get_global_step(), optimizer='Adam', learning_rate=opt.lr) return res, loss, train_op
def create_network(features, labels, mode, params): """ Create the inference model """ # we can't change the parameter names in the function declaration or TF gets angry, # so let's rename them here hyperparameters = params # this is the current slice of tokens (chars/words) tokens = features # this is the next slice of tokens, one ahead of `tokens`. Note that `next_tokens` # and tokens will overlap; they are only off by one token but they are `unroll_steps` # long. None if we are doing prediction next_tokens = labels unroll_steps = hyperparameters.unroll_steps num_classes = hyperparameters.language_model.num_classes # ... as opposed to prediction. during prediction, labels are not present # and there's no need to calculate loss. is_training = mode == ModeKeys.TRAIN or mode == ModeKeys.EVAL dropout = hyperparameters.dropout if mode == ModeKeys.TRAIN else 0 with tf.control_dependencies([ tf.assert_equal(tf.shape(tokens), [hyperparameters.batch_size, unroll_steps]) ] + [ tf.assert_equal(tf.shape(next_tokens), [hyperparameters.batch_size, unroll_steps]) ] if is_training else []): embedding_size = hyperparameters.embedding_size if embedding_size: embeddings = tf.get_variable('input_embeddings', [num_classes, embedding_size]) inputs = tf.nn.embedding_lookup(embeddings, tokens) else: # convert from tokens (e.g. "2") to one-hot representation (e.g. "[0, 0, 1, 0]") inputs = tf.one_hot(tokens, num_classes) # note: the batch size is 1 during prediction batch_size = hyperparameters.batch_size def create_lstm_cell(layer): if hyperparameters.layer_norm: if hyperparameters.num_proj: raise Exception( 'No support for layer normalization together with projection layer.' ) cell = rnn.LayerNormBasicLSTMCell( hyperparameters.lstm_state_size, # here, we use the local variable dropout that is set to 0 # if we are evaluating. dropout_keep_prob=1 - dropout, layer_norm=hyperparameters.layer_norm) else: if hyperparameters.num_proj: cell = rnn.LSTMCell(hyperparameters.lstm_state_size, num_proj=hyperparameters.num_proj) else: cell = rnn.LSTMBlockCell(hyperparameters.lstm_state_size, forget_bias=0) if dropout > 0: cell = rnn.DropoutWrapper(cell, output_keep_prob=1 - dropout) return cell rnn_cell = rnn.MultiRNNCell([ create_lstm_cell(layer) for layer in range(hyperparameters.layers) ]) initial_state = rnn_cell.zero_state(batch_size, TF_DATA_TYPE) # TODO: switch to dynamic_rnn? # `static_rnn` requires `inputs` to be a list of one-dimensional # (one-hot encoded) values inputs = tf.unstack(inputs, num=unroll_steps, axis=1) # outputs has shape [ unroll_steps, batch_size, lstm_state_size (or num_proj if set) ] outputs, states = rnn.static_rnn(rnn_cell, inputs, initial_state=initial_state, dtype=TF_DATA_TYPE) output_size = hyperparameters.lstm_state_size if not hyperparameters.num_proj else hyperparameters.num_proj outputs = tf.reshape(outputs, [-1, output_size]) # and now [ unroll_steps * batch_size, lstm_state_size ] # let output be a linear combination of the activation of the last layer of the RNN softmax_w = tf.get_variable("softmax_w", [output_size, num_classes], dtype=TF_DATA_TYPE) softmax_b = tf.get_variable("softmax_b", [num_classes], dtype=TF_DATA_TYPE) prediction_logits = tf.nn.xw_plus_b(outputs, softmax_w, softmax_b) # prediction_logits is now [ unroll_steps * batch_size, CLASSES ] prediction_logits = tf.reshape(prediction_logits, [unroll_steps, -1, num_classes], name='logits') # prediction_logits is now [ unroll_steps, batch_size, CLASSES ] prediction_logits = tf.transpose(prediction_logits, [1, 0, 2]) # prediction_logits is now [ batch_size, unroll_steps, CLASSES ] # sample the probability distribution represented by the logits to arrive at a # predicted next character (only used during prediction, not training) # -1 means we take the output of the last step, i.e. the predicted last # character. On the other hand, we set `unroll_steps` to 1 during prediction, # so we could replace it with 0. prediction_class_id = tf.multinomial(prediction_logits[:, -1, :], num_samples=1) # store the various state tensors in collections # where the hooks can retrieve them later tf.add_to_collection(PREDICTION_LOGITS, prediction_logits) tf.add_to_collection(PREDICTION_CLASS_ID, prediction_class_id) for single_layer_initial_state in initial_state: tf.add_to_collection(INITIAL_LSTM_STATE_C, single_layer_initial_state.c) tf.add_to_collection(INITIAL_LSTM_STATE_H, single_layer_initial_state.h) for single_layer_state in states: tf.add_to_collection(LSTM_STATE_C, single_layer_state.c) tf.add_to_collection(LSTM_STATE_H, single_layer_state.h) if is_training: # the loss is the difference between the predicted distribution # and the actual next tokens. loss = tf.contrib.seq2seq.sequence_loss( prediction_logits, next_tokens, tf.ones(tf.shape(next_tokens), dtype=TF_DATA_TYPE), average_across_timesteps=True, average_across_batch=True) learning_rate = tf.train.exponential_decay( hyperparameters.learning_rate, tf.train.get_global_step(), decay_steps=hyperparameters.lr_decay_steps, decay_rate=hyperparameters.lr_decay_rate, staircase=True) train_op = tf.contrib.layers.optimize_loss( loss=loss, global_step=tf.train.get_global_step(), clip_gradients=hyperparameters.clip_gradients, learning_rate=learning_rate, optimizer=hyperparameters.optimizer) perplexity = tf.exp(loss) tf.summary.scalar('perplexity', perplexity) eval_metric_ops = { 'eval_avg_loss': tf.metrics.mean(loss), 'perplexity': tf.metrics.mean(perplexity) } else: loss = None train_op = None eval_metric_ops = None return tf.estimator.EstimatorSpec( mode=mode, predictions={ PREDICTION_LOGITS: prediction_logits, PREDICTION_CLASS_ID: prediction_class_id }, loss=loss, train_op=train_op, eval_metric_ops=eval_metric_ops, export_outputs={ "predicted": tf.estimator.export.PredictOutput(prediction_logits) })
def load_examples(): if a.test_rain is None or not os.path.exists(a.test_rain): raise Exception("input_dir does not exist") input_paths = glob.glob(os.path.join(a.test_rain, "*.jpg")) decode = tf.image.decode_jpeg if len(input_paths) == 0: input_paths = glob.glob(os.path.join(a.test_rain, "*.png")) decode = tf.image.decode_png if len(input_paths) == 0: raise Exception("input_dir contains no image files") def get_name(path): name, _ = os.path.splitext(os.path.basename(path)) return name if all(get_name(path).isdigit() for path in input_paths): input_paths = sorted(input_paths, key=lambda path: int(get_name(path))) else: input_paths = sorted(input_paths) with tf.name_scope("load_images"): path_queue = tf.train.string_input_producer(input_paths, shuffle=a.mode == "train") reader = tf.WholeFileReader() paths, contents = reader.read(path_queue) raw_input = decode(contents) raw_input = tf.image.convert_image_dtype(raw_input, dtype=tf.float32) assertion = tf.assert_equal(tf.shape(raw_input)[2], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): raw_input = tf.identity(raw_input) raw_input.set_shape([None, None, 3]) a_images = preprocess(raw_input) ######################## if a.test_ground is None or not os.path.exists(a.test_ground): raise Exception("input_dir does not exist") input_paths2 = glob.glob(os.path.join(a.test_ground, "*.jpg")) decode = tf.image.decode_jpeg if len(input_paths2) == 0: input_paths2 = glob.glob(os.path.join(a.test_ground, "*.png")) decode = tf.image.decode_png if len(input_paths2) == 0: raise Exception("input_dir contains no image files") def get_name(path): name, _ = os.path.splitext(os.path.basename(path)) return name if all(get_name(path).isdigit() for path in input_paths2): input_paths2 = sorted(input_paths2, key=lambda path: int(get_name(path))) else: input_paths2 = sorted(input_paths2) with tf.name_scope("load_images"): path_queue2 = tf.train.string_input_producer( input_paths2, shuffle=a.mode == "train") reader = tf.WholeFileReader() paths, contents = reader.read(path_queue2) raw_input2 = decode(contents) raw_input2 = tf.image.convert_image_dtype(raw_input2, dtype=tf.float32) assertion2 = tf.assert_equal( tf.shape(raw_input2)[2], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion2]): raw_input2 = tf.identity(raw_input2) raw_input2.set_shape([None, None, 3]) b_images = preprocess(raw_input2) inputs, targets = [a_images, b_images] ####################### # input and output images seed = random.randint(0, 2**31 - 1) def transform(image): r = image if a.flip: r = tf.image.random_flip_left_right(r, seed=seed) h, w, _ = r.get_shape() if h < a.img_h or w < a.img_w: r = tf.image.resize_images(r, [a.img_h, a.img_w], method=tf.image.ResizeMethod.AREA) if a.mode == "train": offsize1 = h - a.img_h offsize2 = w - a.img_w if offsize1 > 0 and offsize2 > 0: offsize = offsize1 if (offsize1 < offsize2) else offsize2 offset = tf.cast(tf.floor( tf.random_uniform([2], 0, offsize, seed=seed)), dtype=tf.int32) r = tf.image.crop_to_bounding_box(r, offset[0], offset[1], a.img_h, a.img_w) else: r = tf.image.resize_images(r, [a.img_h, a.img_w], method=tf.image.ResizeMethod.AREA) if a.mode == "test": r = tf.image.resize_images(r, [a.img_h, a.img_w], method=tf.image.ResizeMethod.AREA) return r with tf.name_scope("input_images"): input_images = transform(inputs) with tf.name_scope("target_images"): target_images = transform(targets) paths_batch, inputs_batch, targets_batch = tf.train.batch( [paths, input_images, target_images], batch_size=a.batch_size) steps_per_epoch = int(math.ceil(len(input_paths) / a.batch_size)) return Examples( paths=paths_batch, inputs=inputs_batch, targets=targets_batch, steps_per_epoch=steps_per_epoch, )
def prepare_reader(self, filename_queue, max_quantized_value=2, min_quantized_value=-2): """Creates a single reader thread for YouTube8M SequenceExamples. Args: filename_queue: A tensorflow queue of filename locations. max_quantized_value: the maximum of the quantized value. min_quantized_value: the minimum of the quantized value. Returns: A tuple of video indexes, video features, labels, and padding data. """ reader = tf.TFRecordReader() _, serialized_example = reader.read(filename_queue) contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={ "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64) }, sequence_features={ feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes, ), 1, validate_indices=False), tf.bool)) # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) num_frames = -1 # the number of frames in the video feature_matrices = [None ] * num_features # an array of different features for feature_index in range(num_features): feature_matrix, num_frames_in_this_feature = self.get_video_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) if num_frames == -1: num_frames = num_frames_in_this_feature else: tf.assert_equal(num_frames, num_frames_in_this_feature) feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features video_matrix = tf.concat(feature_matrices, 1) return contexts["video_id"], video_matrix, labels, num_frames
def oc_per_batch_element( beta, x, q_min, object_weights, # V x 1 !! truth_idx, is_spectator, payload_loss, S_B=1., noise_q_min=None, distance_scale=None, payload_weight_function=None, #receives betas as K x V x 1 as input, and a threshold val payload_weight_threshold=0.8, use_mean_x=0., cont_beta_loss=False, prob_repulsion=False, phase_transition=False, phase_transition_double_weight=False, payload_beta_gradient_damping_strength=0., kalpha_damping_strength=0., beta_gradient_damping=0., soft_q_scaling=True, weight_by_q=False, repulsion_q_min=-1., super_repulsion=False, super_attraction=False, div_repulsion=False, soft_att=True, dynamic_payload_scaling_onset=-0.03): ''' all inputs V x X , where X can be 1 ''' tf.assert_equal(True, is_spectator >= 0.) tf.assert_equal(True, beta >= 0.) if prob_repulsion: raise ValueError("prob_repulsion not implemented") if phase_transition_double_weight: raise ValueError("phase_transition_double_weight not implemented") if payload_weight_function is not None: raise ValueError("payload_weight_function not implemented") #set all spectators invalid here, everything scales with beta, so: if beta_gradient_damping > 0.: beta = beta_gradient_damping * tf.stop_gradient(beta) + ( 1. - beta_gradient_damping) * beta beta_in = beta beta = tf.clip_by_value(beta, 0., 1. - 1e-4) q_min *= (1. - is_spectator) qraw = tf.math.atanh(beta)**2 if soft_q_scaling: qraw = tf.math.atanh(beta_in / 1.002)**2 #beta_in**4 *20. is_noise = tf.where(truth_idx < 0, tf.zeros_like(truth_idx, dtype='float32') + 1., 0.) #V x 1 if noise_q_min is not None: q_min = (1. - is_noise) * q_min + is_noise * noise_q_min q_min = tf.where( q_min < 0, 0., q_min) #just safety in case there are some numerical effects q = qraw + q_min # V x 1 #q = tf.where(beta_in<1.-1e-4, q, tf.math.atanh(1.-1e-4)**2 + q_min + beta_in) #just give the rest above clip a gradient N = tf.cast(beta.shape[0], dtype='float32') Msel, M_not, N_per_obj = CreateMidx(truth_idx, calc_m_not=True) #use eager here if Msel is None: #V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen print( '>>> WARNING: Event has no objects, only noise! Will return zero loss. <<<' ) zero_tensor = tf.reduce_mean(q, axis=0) * 0. zero_payload = tf.reduce_mean(payload_loss, axis=0) * 0. return zero_tensor, zero_tensor, zero_tensor, zero_tensor, zero_payload, zero_tensor N_per_obj = tf.cast(N_per_obj, dtype='float32') # K x 1 K = tf.cast(Msel.shape[0], dtype='float32') ######################################################## #sanity check, use none of the following for the loss calculation truth_m = SelectWithDefault(Msel, truth_idx, -2) #K x V-obj x 1 truth_same = truth_m[:, 0:1] == truth_m truth_same = tf.where(truth_m == -2, True, truth_same) tf.assert_equal( tf.reduce_all(truth_same), True, message="truth indices do not match object selection, serious bug") #end sanity check ######################################################## padmask_m = SelectWithDefault(Msel, tf.zeros_like(beta_in) + 1., 0.) #K x V-obj x 1 x_m = SelectWithDefault(Msel, x, 0.) #K x V-obj x C beta_m = SelectWithDefault(Msel, beta, 0.) #K x V-obj x 1 is_spectator_m = SelectWithDefault(Msel, is_spectator, 0.) #K x V-obj x 1 q_m = SelectWithDefault(Msel, q, 0.) #K x V-obj x 1 object_weights_m = SelectWithDefault(Msel, object_weights, 0.) distance_scale += 1e-3 distance_scale_m = SelectWithDefault(Msel, distance_scale, 1.) tf.assert_greater(distance_scale_m, 0., message="predicted distances must be greater zero") kalpha_m = tf.argmax((1. - is_spectator_m) * beta_m, axis=1) # K x 1 x_kalpha_m = tf.gather_nd(x_m, kalpha_m, batch_dims=1) # K x C if use_mean_x > 0: x_kalpha_m_m = tf.reduce_sum(beta_m * q_m * x_m * padmask_m, axis=1) # K x C x_kalpha_m_m = tf.math.divide_no_nan( x_kalpha_m_m, tf.reduce_sum(beta_m * q_m * padmask_m, axis=1) + 1e-9) x_kalpha_m = use_mean_x * x_kalpha_m_m + (1. - use_mean_x) * x_kalpha_m if kalpha_damping_strength > 0: x_kalpha_m = kalpha_damping_strength * tf.stop_gradient(x_kalpha_m) + ( 1. - kalpha_damping_strength) * x_kalpha_m q_kalpha_m = tf.gather_nd(q_m, kalpha_m, batch_dims=1) # K x 1 beta_kalpha_m = tf.gather_nd(beta_m, kalpha_m, batch_dims=1) # K x 1 object_weights_kalpha_m = tf.gather_nd(object_weights_m, kalpha_m, batch_dims=1) # K x 1 #make the distance scale a beta weighted mean so that there is more than 1 impact per object distance_scale_kalpha_m = tf.math.divide_no_nan( tf.reduce_sum(distance_scale_m * beta_m * padmask_m, axis=1), tf.reduce_sum(beta_m * padmask_m, axis=1) + 1e-3) + 1e-3 #K x 1 #distance_scale_kalpha_m = tf.gather_nd(distance_scale_m,kalpha_m, batch_dims=1) # K x 1 distance_scale_kalpha_m_exp = tf.expand_dims(distance_scale_kalpha_m, axis=2) # K x 1 x 1 distancesq_m = tf.reduce_sum((tf.expand_dims(x_kalpha_m, axis=1) - x_m)**2, axis=-1, keepdims=True) #K x V-obj x 1 distancesq_m = tf.math.divide_no_nan( distancesq_m, 2. * distance_scale_kalpha_m_exp**2 + 1e-6) absdist = tf.sqrt(distancesq_m + 1e-6) huberdistsq = huber(absdist, d=4) #acts at 4 if super_attraction: huberdistsq += 1. - tf.math.exp(-100. * absdist) V_att = q_m * tf.expand_dims(q_kalpha_m, axis=1) * huberdistsq #K x V-obj x 1 if soft_att: V_att = q_m * tf.math.log(tf.math.exp(1.) * distancesq_m + 1.) V_att = V_att * tf.expand_dims(object_weights_kalpha_m, axis=1) #K x V-obj x 1 if weight_by_q: V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att, axis=1), tf.reduce_sum(q_m, axis=1)) # K x 1 else: V_att = tf.math.divide_no_nan(tf.reduce_sum(padmask_m * V_att, axis=1), N_per_obj + 1e-9) # K x 1 # opt. used later in payload loss V_att_K = V_att V_att = tf.math.divide_no_nan(tf.reduce_sum(V_att, axis=0), K + 1e-9) # 1 #what if Vatt and Vrep are weighted by q, not scaled by it? q_rep = q if repulsion_q_min >= 0: raise ValueError("repulsion_q_min >= 0: spectators TBI") q_rep = (qraw + repulsion_q_min) * (1. - is_spectator) q_kalpha_m += repulsion_q_min - q_min #now the bit that needs Mnot Mnot_distances = tf.expand_dims(x_kalpha_m, axis=1) #K x 1 x C Mnot_distances = Mnot_distances - tf.expand_dims(x, axis=0) #K x V x C rep_distances = tf.reduce_sum(Mnot_distances**2, axis=-1, keepdims=True) #K x V x 1 rep_distances = tf.math.divide_no_nan( rep_distances, 2. * distance_scale_kalpha_m_exp**2 + 1e-6) V_rep = tf.math.exp( -rep_distances ) #1. / (V_rep + 0.1) #-2.*tf.math.log(1.-tf.math.exp(-V_rep/2.)+1e-5) if super_repulsion: V_rep += 10. * tf.math.exp(-100. * tf.sqrt(rep_distances + 1e-6)) if div_repulsion: V_rep = 1. / (rep_distances + 0.1) #spec weights are in q V_rep *= M_not * tf.expand_dims(q_rep, axis=0) #K x V x 1 V_rep = tf.reduce_sum(V_rep, axis=1) #K x 1 V_rep *= object_weights_kalpha_m * q_kalpha_m #K x 1 if weight_by_q: sumq = tf.reduce_sum(M_not * tf.expand_dims(q_rep, axis=0), axis=1) V_rep = tf.math.divide_no_nan(V_rep, sumq) # K x 1 else: V_rep = tf.math.divide_no_nan( V_rep, tf.expand_dims(tf.expand_dims(N, axis=0), axis=0) - N_per_obj + 1e-9) # K x 1 # opt used later in payload loss V_rep_K = V_rep V_rep = tf.math.divide_no_nan(tf.reduce_sum(V_rep, axis=0), K + 1e-9) # 1 B_pen = None def bpenhelp(b_m, exponent: int): b_mes = tf.reduce_sum(b_m**exponent, axis=1) if not exponent == 1: b_mes = (b_mes + 1e-16)**(1. / float(exponent)) return tf.math.log((1. - b_mes)**2 + 1. + 1e-8) if phase_transition: ## beta terms B_pen = -tf.reduce_sum(padmask_m * 1. / (20. * distancesq_m + 1.), axis=1) # K x 1 B_pen += 1. #remove self-interaction term (just for offset) B_pen *= object_weights_kalpha_m * beta_kalpha_m B_pen = tf.math.divide_no_nan(B_pen, N_per_obj + 1e-9) # K x 1 #now 'standard' 1-beta B_pen -= 0.2 * object_weights_kalpha_m * ( tf.math.log(beta_kalpha_m + 1e-9) ) #tf.math.sqrt(beta_kalpha_m+1e-6) #another "-> 1, but slower" per object B_pen = tf.math.divide_no_nan(tf.reduce_sum(B_pen, axis=0), K + 1e-9) # 1 else: B_pen_po = object_weights_kalpha_m * (1. - beta_kalpha_m) B_pen = tf.math.divide_no_nan(tf.reduce_sum(B_pen_po, axis=0), K + 1e-9) #1 #get out of random gradients in the beginning #introduces gradients on all betas of hits rather than just the max one B_up = tf.math.divide_no_nan( tf.reduce_sum((1. - is_noise) * (1. - beta_in)), N - tf.reduce_sum(is_noise)) B_pen += 0.01 * B_pen * B_up #if it's high try to elevate all betas if cont_beta_loss: B_pen = bpenhelp(beta_m, 2) + bpenhelp(beta_m, 4) B_pen = tf.math.divide_no_nan( tf.reduce_sum(object_weights_kalpha_m * B_pen, axis=0), K + 1e-9) too_much_B_pen = object_weights_kalpha_m * bpenhelp( beta_m, 1) #K x 1, don't make it steep too_much_B_pen = tf.math.divide_no_nan(tf.reduce_sum(too_much_B_pen), K + 1e-9) Noise_pen = S_B * tf.math.divide_no_nan(tf.reduce_sum(is_noise * beta_in), tf.reduce_sum(is_noise) + 1e-3) #explicit payload weight function here, the old one was odd #too aggressive scaling is bad for high learning rates. p_w = padmask_m * tf.math.atanh(beta_m / 1.002)**2 #this is well behaved if payload_beta_gradient_damping_strength > 0: p_w = payload_beta_gradient_damping_strength * tf.stop_gradient(p_w) + \ (1.- payload_beta_gradient_damping_strength)* p_w payload_loss_m = p_w * SelectWithDefault( Msel, (1. - is_noise) * payload_loss, 0.) #K x V_perobj x P payload_loss_m = object_weights_kalpha_m * tf.reduce_sum(payload_loss_m, axis=1) # K x P #here normalisation per object payload_loss_m = tf.math.divide_no_nan(payload_loss_m, tf.reduce_sum(p_w, axis=1)) #print('dynamic_payload_scaling_onset',dynamic_payload_scaling_onset) if dynamic_payload_scaling_onset > 0: #stop gradient V_scaler = tf.stop_gradient(V_rep_K + V_att_K) # K x 1 #print('N_per_obj[V_scaler=0]',N_per_obj[V_scaler==0]) #max of V_scaler is around 1 given the potentials scaling = tf.exp(-tf.math.log(2.) * V_scaler / (dynamic_payload_scaling_onset / 5.)) #print('affected fraction',tf.math.count_nonzero(scaling>0.5,dtype='float32')/K,'max',tf.reduce_max(V_scaler,axis=0,keepdims=True)) payload_loss_m *= scaling #basically the onset of the rise #pll = tf.math.divide_no_nan(payload_loss_m, N_per_obj+1e-9) # K x P #really? pll = tf.math.divide_no_nan(tf.reduce_sum(payload_loss_m, axis=0), K + 1e-3) # P return V_att, V_rep, Noise_pen, B_pen, pll, too_much_B_pen
def reward_func(sample_solution, decode_len=0.0, n_nodes=0.0, depot=None): """The reward for the VRP task is defined as the negative value of the route length Args: sample_solution : a list tensor of size decode_len of shape [batch_size x input_dim] depot: if not None, then means that we are aiming at decreasing the number of return to thde depot Returns: rewards: tensor of size [batch_size] Example: sample_solution = [[[1,1],[2,2]],[[3,3],[4,4]],[[5,5],[6,6]]] sourceL = 3 batch_size = 2 input_dim = 2 sample_solution_tilted[ [[5,5] # [6,6]] # [[1,1] # [2,2]] # [[3,3] # [4,4]] ] """ if depot != None: counter = tf.zeros_like(depot)[:, 0] depot_visits = tf.cast(tf.equal(sample_solution[0], depot), tf.float32)[:, 0] tf.assert_equal(depot_visits, tf.ones_like(depot_visits)) for i in range(1, len(sample_solution)): interm_depot = tf.cast(tf.equal(sample_solution[i], depot), tf.float32)[:, 0] counter = tf.add(tf.multiply(counter, interm_depot), interm_depot) depot_visits = tf.add( depot_visits, tf.multiply(interm_depot, tf.cast(tf.less(counter, 1.5), tf.float32))) # depot_visits = tf.add(depot_visits,tf.cast(tf.equal(sample_solution[i], depot), tf.float32)[:,0]) # make sample_solution of shape [sourceL x batch_size x input_dim] sample_solution = tf.stack(sample_solution, 0) if not depot is None: max_length = tf.stack([depot for d in range(decode_len)], 0) interm_max_lens = tf.multiply( (sample_solution[:, :, 1] - max_length[:, :, 1]), tf.cos( tf.scalar_mul( 0.5, (sample_solution[:, :, 0] + max_length[:, :, 0])))) distance_decoded = tf.scalar_mul( 6371, tf.sqrt( tf.square(interm_max_lens) + tf.square(sample_solution[:, :, 0] - max_length[:, :, 0]))) max_lens_decoded = tf.reduce_sum(distance_decoded, 0) # make sure that we only take x,y (and not b_tw and e_tw) sample_solution = sample_solution[:, :, :2] sample_solution_tilted = tf.concat( (tf.expand_dims(sample_solution[-1], 0), sample_solution[:-1]), 0) # get the reward based on the route lengths route_lens_decoded = tf.reduce_sum(tf.pow(tf.reduce_sum(tf.pow(\ (sample_solution_tilted - sample_solution) ,2), 2) , .5), 0) if not depot is None: # reward = tf.add(tf.scalar_mul(70.0,tf.scalar_mul(1.0/n_nodes,depot_visits)),tf.scalar_mul(30.0,tf.divide(route_lens_decoded,max_lens_decoded))) reward = tf.add(tf.scalar_mul(100.0, depot_visits), route_lens_decoded) return reward else: return route_lens_decoded
def prepare_serialized_examples(self, serialized_example, max_quantized_value=2, min_quantized_value=-2): contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={"video_id": tf.FixedLenFeature( [], tf.string), "labels": tf.VarLenFeature(tf.int64)}, sequence_features={ feature_name : tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes,), 1, validate_indices=False), tf.bool)) # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format( \ len(self.feature_names), len(self.feature_sizes)) num_frames = -1 # the number of frames in the video feature_matrices = [None] * num_features # an array of different features for feature_index in range(num_features): #print "feature : {}".format(tf.reshape(features[self.feature_names[0]], [-1, 16384])) feature_matrix, num_frames_in_this_feature = self.get_video_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) print ("feature_matrix : {}".format(feature_matrix.get_shape())) # maxframe x featuresize if num_frames == -1: num_frames = num_frames_in_this_feature else: tf.assert_equal(num_frames, num_frames_in_this_feature) feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features video_matrix = tf.concat(feature_matrices, 1) # convert to batch format. # TODO: Do proper batch reads to remove the IO bottleneck. batch_video_ids = tf.expand_dims(contexts["video_id"], 0) batch_video_matrix = tf.expand_dims(video_matrix, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) return batch_video_ids, batch_video_matrix, batch_labels, batch_frames
def prepare_serialized_examples(self, serialized_example, max_quantized_value=2, min_quantized_value=-2): if self.num_classes == 1: contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={ "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.FixedLenFeature([], tf.int64) }, sequence_features={ feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names }) # read ground truth labels # For binary_crossentropy the labels shape should be [batch_size, 1] # The batch_size will be added later in the expand_dims, but the [,1] # should be set here. Hence the [] around the label value (since label # is a scalar value of either 1 or 0) labels = [tf.cast(contexts["labels"], tf.float32)] else: contexts, features = tf.parse_single_sequence_example( serialized_example, context_features={ "video_id": tf.FixedLenFeature([], tf.string), "labels": tf.VarLenFeature(tf.int64) }, sequence_features={ feature_name: tf.FixedLenSequenceFeature([], dtype=tf.string) for feature_name in self.feature_names }) # read ground truth labels labels = (tf.cast( tf.sparse_to_dense(contexts["labels"].values, (self.num_classes, ), 1, validate_indices=False), tf.float32)) # Keep this commented out, just wanted to see what the labels are # labels = tf.Print(labels, [labels], "labels tensor values: ") # loads (potentially) different types of features and concatenates them num_features = len(self.feature_names) assert num_features > 0, "No feature selected: feature_names is empty!" assert len(self.feature_names) == len(self.feature_sizes), \ "length of feature_names (={}) != length of feature_sizes (={})".format(len(self.feature_names), len(self.feature_sizes)) num_frames = -1 # the number of frames in the video feature_matrices = [None ] * num_features # an array of different features for feature_index in range(num_features): feature_matrix, num_frames_in_this_feature = self.get_audio_matrix( features[self.feature_names[feature_index]], self.feature_sizes[feature_index], self.max_frames, max_quantized_value, min_quantized_value) if num_frames == -1: num_frames = num_frames_in_this_feature else: tf.assert_equal(num_frames, num_frames_in_this_feature) feature_matrices[feature_index] = feature_matrix # cap the number of frames at self.max_frames num_frames = tf.minimum(num_frames, self.max_frames) # concatenate different features audio_matrix = tf.concat(feature_matrices, 1) # Normalize input features on feature dimensions axis, that is, in a [1][300][128] tensor feature_dim = len(audio_matrix.get_shape()) - 1 audio_matrix = tf.nn.l2_normalize(audio_matrix, feature_dim) # Keep this commented out, just wanted to see the audio_matrix tensors picked up properly # audio_matrix = tf.Print(audio_matrix, [audio_matrix], "video matrix tensor values") # convert to batch format. batch_video_ids = tf.expand_dims(contexts["video_id"], 0) batch_audio_matrix = tf.expand_dims(audio_matrix, 0) batch_labels = tf.expand_dims(labels, 0) batch_frames = tf.expand_dims(num_frames, 0) return batch_video_ids, batch_audio_matrix, batch_labels, batch_frames
def create_id3_embedding(videos): """Embeds the given videos using the Inflated 3D Convolution network. Downloads the graph of the I3D from tf.hub and adds it to the graph on the first call. Args: videos: <float32>[batch_size, num_frames, height=224, width=224, depth=3]. Expected range is [-1, 1]. Returns: embedding: <float32>[batch_size, embedding_size]. embedding_size depends on the model used. Raises: ValueError: when a provided embedding_layer is not supported. """ batch_size = 16 module_spec = "https://tfhub.dev/deepmind/i3d-kinetics-400/1" # Making sure that we import the graph separately for # each different input video tensor. module_name = "fvd_kinetics-400_id3_module_" + six.ensure_str( videos.name).replace(":", "_") assert_ops = [ tf.Assert( tf.reduce_max(videos) <= 1.001, ["max value in frame is > 1", videos]), tf.Assert( tf.reduce_min(videos) >= -1.001, ["min value in frame is < -1", videos]), tf.assert_equal(tf.shape(videos)[0], batch_size, ["invalid frame batch size: ", tf.shape(videos)], summarize=6), ] with tf.control_dependencies(assert_ops): videos = tf.identity(videos) module_scope = "%s_apply_default/" % module_name # To check whether the module has already been loaded into the graph, we look # for a given tensor name. If this tensor name exists, we assume the function # has been called before and the graph was imported. Otherwise we import it. # Note: in theory, the tensor could exist, but have wrong shapes. # This will happen if create_id3_embedding is called with a frames_placehoder # of wrong size/batch size, because even though that will throw a tf.Assert # on graph-execution time, it will insert the tensor (with wrong shape) into # the graph. This is why we need the following assert. video_batch_size = int(videos.shape[0]) assert video_batch_size in [batch_size, -1, None], "Invalid batch size" tensor_name = module_scope + "RGB/inception_i3d/Mean:0" if not _is_in_graph(tensor_name): i3d_model = hub.Module(module_spec, name=module_name) i3d_model(videos) # gets the kinetics-i3d-400-logits layer tensor_name = module_scope + "RGB/inception_i3d/Mean:0" tensor = tf.get_default_graph().get_tensor_by_name(tensor_name) return tensor
def map_fun(args, ctx, model_name="resnet_new", img_h=64, img_w=64, img_c=3): import numpy import time from train_mitoses import create_model, compute_data_loss, compute_metrics logging.info("Start the training worker at {}".format( socket.gethostname())) worker_num = ctx.worker_num job_name = ctx.job_name task_index = ctx.task_index # Delay PS node a bit, since workers seem to reserve GPUs quickly/reliably (w/o conflict) if job_name == "ps": time.sleep((worker_num + 1) * 5) # Parameters batch_size = args.batch_size # Get TF cluster and server instances cluster, server = ctx.start_cluster_server(1, args.rdma) def feed_dict(batch): # Convert from [(labels, images)] to two numpy arrays of the proper type images = [] labels = [] for item in batch: images.append(item[1]) labels.append(item[0]) img_batch = numpy.array(images, dtype=numpy.float32) label_batch = numpy.array(labels, dtype=numpy.float32).reshape(-1, 1) return img_batch, label_batch if job_name == "ps": server.join() elif job_name == "worker": # Assigns ops to the local worker by default with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:{0}".format(task_index), cluster=cluster)): # Placeholders or QueueRunner/Readers for input data with tf.name_scope('inputs'): images_var = tf.placeholder(tf.float32, (None, img_h, img_w, img_c), name="train_img") labels_var = tf.placeholder(tf.float32, (None, 1), name="train_label") tf.summary.image("train_img", images_var) with tf.name_scope('model'): model_tower, model_base = create_model(model_name, (img_h, img_w, img_c), images_var) model = model_tower logits = model.output probs = tf.nn.sigmoid(logits, name="probs") preds = tf.round(probs, name="preds") global_step = tf.train.get_or_create_global_step() with tf.name_scope("loss"): with tf.control_dependencies([ tf.assert_equal( tf.shape(labels_var)[0], tf.shape(preds)[0]) ]): loss = compute_data_loss(labels_var, logits) tf.summary.scalar("loss", loss) with tf.name_scope("train"): train_op = tf.train.AdagradOptimizer(0.01).minimize( loss, global_step=global_step) with tf.name_scope("metrics"): num_thresholds = 8 mean_loss, acc, ppv, sens, f1, pr, f1s, metric_update_ops, metric_reset_ops \ = compute_metrics(loss, labels_var, preds, probs, num_thresholds) f1_max = tf.reduce_max(f1s) thresh_max = pr.thresholds[tf.argmax(f1s)] tf.summary.scalar("acc", acc) tf.summary.scalar("f1", f1) tf.summary.scalar("f1_max", f1_max) tf.summary.scalar("thresh_max", thresh_max) summary_op = tf.summary.merge_all() logdir = ctx.absolute_path(args.model) logging.info("tensorflow model path: {0}".format(logdir)) if job_name == "worker" and task_index == 0: summary_writer = tf.summary.FileWriter( logdir, graph=tf.get_default_graph()) # The MonitoredTrainingSession takes care of session initialization, restoring from # a checkpoint, and closing when done or an error occurs with tf.train.MonitoredTrainingSession( master=server.target, is_chief=(task_index == 0), checkpoint_dir=logdir, hooks=[tf.train.StopAtStepHook(last_step=args.steps)], chief_only_hooks=[ ExportHook(ctx.absolute_path(args.export_dir), images_var, preds) ]) as mon_sess: step = 0 tf_feed = ctx.get_data_feed(args.mode == "train") logging.info("Start the training on worker {} at {}".format( socket.gethostname(), datetime.now().isoformat())) start_time = time.time() while not mon_sess.should_stop() and not tf_feed.should_stop(): fetch = tf_feed.next_batch(batch_size) batch_imgs, batch_labels = feed_dict(fetch) feed = {images_var: batch_imgs, labels_var: batch_labels} if len(batch_imgs) > 0: if args.mode == "train": _, summary, step, metric_update, probs_output, preds_output, labels_output\ = mon_sess.run([train_op, summary_op, global_step, metric_update_ops, probs, preds, labels_var], feed_dict=feed) # print accuary and save model checkpoints to HDFS every 1000 steps if step % 100 == 0: end_time = time.time() logging.info("Step {} took {} ms".format( step, (end_time - start_time) * 1000)) start_time = time.time() # logging.info("{0} step: {1} accuracy: {2} probs: {3} preds: {4} labels: {5}".format( # datetime.now().isoformat(), # step, # mon_sess.run(acc), # probs_output, # preds_output, # labels_output # )) if task_index == 0: summary_writer.add_summary(summary, step) else: labels_output, preds_output, acc_output = mon_sess.run( [labels_var, preds, acc], feed_dict=feed) results = [ "{0} Label: {1}, Prediction: {2}".format( datetime.now().isoformat(), l, p) for l, p in zip(labels_output, preds_output) ] tf_feed.bath_results(results) print("results: {0}, acc: {1}".format( results, acc_output)) logging.info("Finish the training on worker {} at {}".format( socket.gethostname(), datetime.now().isoformat())) if mon_sess.should_stop() or step >= args.steps: logging.info( "Trigger the termination of tf_feed: mon_sess.should_stop " "{}, step >= args.steps {}".format(mon_sess.should_stop(), step >= args.steps)) tf_feed.terminate() # Ask for all the services to stop logging.info("{0} stopping MonitoredTrainingSession".format( datetime.now().isoformat())) if job_name == "worker" and task_index == 0: summary_writer.close()
def assign(self, anchors, groundtruth_boxes, groundtruth_labels=None, **params): """Assign classification and regression targets to each anchor. For a given set of anchors and groundtruth detections, match anchors to groundtruth_boxes and assign classification and regression targets to each anchor as well as weights based on the resulting match (specifying, e.g., which anchors should not contribute to training loss). Anchors that are not matched to anything are given a classification target of self._unmatched_cls_target which can be specified via the constructor. Args: anchors: a BoxList representing N anchors groundtruth_boxes: a BoxList representing M groundtruth boxes groundtruth_labels: a tensor of shape [M, d_1, ... d_k] with labels for each of the ground_truth boxes. The subshape [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set to None, groundtruth_labels assumes a binary problem where all ground_truth boxes get a positive label (of 1). **params: Additional keyword arguments for specific implementations of the Matcher. Returns: cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has shape [num_gt_boxes, d_1, d_2, ... d_k]. cls_weights: a float32 tensor with shape [num_anchors] reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension] reg_weights: a float32 tensor with shape [num_anchors] match: a matcher.Match object encoding the match between anchors and groundtruth boxes, with rows corresponding to groundtruth boxes and columns corresponding to anchors. Raises: ValueError: if anchors or groundtruth_boxes are not of type box_list.BoxList """ if not isinstance(anchors, box_list.BoxList): raise ValueError('anchors must be an BoxList') if not isinstance(groundtruth_boxes, box_list.BoxList): raise ValueError('groundtruth_boxes must be an BoxList') if groundtruth_labels is None: groundtruth_labels = tf.ones( tf.expand_dims(groundtruth_boxes.num_boxes(), 0)) groundtruth_labels = tf.expand_dims(groundtruth_labels, -1) unmatched_shape_assert = tf.assert_equal( tf.shape(groundtruth_labels)[1:], tf.shape(self._unmatched_cls_target), message='Unmatched class target shape incompatible ' 'with groundtruth labels shape!') labels_and_box_shapes_assert = tf.assert_equal( tf.shape(groundtruth_labels)[0], groundtruth_boxes.num_boxes(), message='Groundtruth boxes and labels have incompatible shapes!') with tf.control_dependencies( [unmatched_shape_assert, labels_and_box_shapes_assert]): match_quality_matrix = self._similarity_calc.compare( groundtruth_boxes, anchors) match = self._matcher.match(match_quality_matrix, **params) reg_targets = self._create_regression_targets( anchors, groundtruth_boxes, match) cls_targets = self._create_classification_targets( groundtruth_labels, match) reg_weights = self._create_regression_weights(match) cls_weights = self._create_classification_weights( match, self._positive_class_weight, self._negative_class_weight) num_anchors = anchors.num_boxes_static() if num_anchors is not None: reg_targets = self._reset_target_shape(reg_targets, num_anchors) cls_targets = self._reset_target_shape(cls_targets, num_anchors) reg_weights = self._reset_target_shape(reg_weights, num_anchors) cls_weights = self._reset_target_shape(cls_weights, num_anchors) return cls_targets, cls_weights, reg_targets, reg_weights, match
def get_loss_and_counters(self, batch_ph, cached_enc_state, is_train, eos_coeff=None, entropy_reg=0.0, loss_use_logp_any_ref=True): # encode with cached enc state enc_batch_size = tf.shape(cached_enc_state['out'])[0] with tf.control_dependencies([ tf.assert_equal(tf.shape(tensor)[0], enc_batch_size) for tensor in nested_flatten(cached_enc_state) ]): enc_reordered = { k: tf.gather(v, batch_ph['out_to_inp_indices']) for k, v in cached_enc_state.items() } logp = self.model.compute_action_logprobs(batch_ph, is_train=is_train, enc=enc_reordered) insert_logprobas = logp['insert'] # [batch, nout, voc_size] finish_logprobas = logp['finish'] # [batch] # get reference inserts is_ref_insert = inserts_coo_to_tensor(batch_ph['ref_inserts'], tf.shape(batch_ph['out']), len(self.model.out_voc)) is_chosen_insert = inserts_coo_to_tensor(batch_ph['chosen_inserts'], tf.shape(batch_ph['out']), len(self.model.out_voc)) # compute log-probability of any reference insert neg_inf_like_logp = tf.fill(tf.shape(insert_logprobas), -1e9) ref_logp = tf.where(is_ref_insert, insert_logprobas, neg_inf_like_logp) chosen_logp = tf.where(is_chosen_insert, insert_logprobas, neg_inf_like_logp) logp_ref_inserts = tf.reduce_logsumexp( ref_logp if loss_use_logp_any_ref else chosen_logp, axis=(1, 2)) # ^-- [batch_size] should_finish = tf.reduce_any(is_ref_insert[:, :, self.model.out_voc.eos], axis=-1) xent_values = -tf.where(should_finish, finish_logprobas, logp_ref_inserts) # ^-- [batch_size] # reweighting if eos_coeff is None: xent_numerator = tf.reduce_sum(xent_values) else: samples_per_line = tf.to_float(batch_ph['ref_len']) weights = tf.where(should_finish, eos_coeff * samples_per_line, (1.0 - eos_coeff) * samples_per_line / (samples_per_line - 1.0)) # ^-- [batch_size] xent_numerator = tf.reduce_sum(xent_values * weights) batch_size = tf.shape(insert_logprobas)[0] counters = dict( batch_size=tf.to_float(batch_size), xent_numerator=xent_numerator, ) # assemble loss (crossentropy with some extra steps) loss_numerator = xent_numerator if entropy_reg != 0.0: insert_probas = tf.exp( insert_logprobas) # [batch_size, nout, voc_size] insert_p_logp_sum = tf.reduce_sum(insert_probas * insert_logprobas, axis=2) # [batch_size, nout] mask = lib.ops.infer_mask(batch_ph['out'], self.model.out_voc.eos, dtype=tf.float32) # [batch_size, nout] insert_p_logp_sum = tf.reduce_sum(insert_p_logp_sum * mask, axis=1) # [batch_size] finish_p_logp_sum = finish_logprobas * tf.exp( finish_logprobas) # [batch_size] entropy_values = -finish_p_logp_sum - insert_p_logp_sum # [batch_size] entropy_numerator = tf.reduce_sum(entropy_values) loss_numerator -= entropy_reg * entropy_numerator counters.update(entropy_numerator=entropy_numerator) # metrics p_correct_numerator = tf.reduce_sum(tf.exp(-xent_values)) argmax_flat = tf.argmax(tf.reshape(insert_logprobas, [batch_size, -1]), axis=-1) is_argmax_correct = tf.gather_nd( tf.reshape(is_ref_insert, [batch_size, -1]), tf.stack([tf.range(batch_size), tf.to_int32(argmax_flat)], -1)) is_argmax_correct = tf.where(should_finish, tf.exp(finish_logprobas) >= 0.5, is_argmax_correct) acc_numerator = tf.reduce_sum(tf.to_float(is_argmax_correct)) counters.update( loss_numerator=loss_numerator, acc_numerator=acc_numerator, p_correct_numerator=p_correct_numerator, ) return loss_numerator, counters
def get_loss_and_counters(self, batch_ph, cached_enc_state, is_train, loss_use_logp_chosen=False, eos_coeff=None, **kwargs): # encode with cached enc state enc_batch_size = tf.shape(cached_enc_state['out'])[0] with tf.control_dependencies([ tf.assert_equal(tf.shape(tensor)[0], enc_batch_size) for tensor in nested_flatten(cached_enc_state) ]): enc_reordered = { k: tf.gather(v, batch_ph['out_to_inp_indices']) for k, v in cached_enc_state.items() } logp = self.model.compute_action_logprobs(batch_ph, is_train=is_train, enc=enc_reordered) insert_logprobas = logp['insert'] # [batch] finish_logprobas = logp['finish'] # [batch, nout, voc_size] # get reference inserts is_ref_insert = inserts_coo_to_tensor(batch_ph['ref_inserts'], tf.shape(batch_ph['out']), len(self.model.out_voc)) is_chosen_insert = inserts_coo_to_tensor(batch_ph['chosen_inserts'], tf.shape(batch_ph['out']), len(self.model.out_voc)) mask_correct = is_chosen_insert if loss_use_logp_chosen else is_ref_insert # assumes that reference inserts for ended hypo are EOS tokens and after-reference are NULL should_finish = tf.reduce_any(is_ref_insert[:, :, self.model.out_voc.eos], axis=-1) logp_ref = tf.einsum("btl,btl->b", insert_logprobas, tf.to_float(mask_correct)) # equivalent to tf.reduce_sum(insert_logprobas * mask_correct, (1, 2)), but without tmp tensor xent_values = logp_ref / (tf.reduce_sum(tf.to_float(mask_correct), (-2, -1)) + 1e-5) # logp_ref is divided by number of correct labels to properly compute xent xent_values = -tf.where(should_finish, finish_logprobas, xent_values) # ^-- [batch_size] if eos_coeff is None: xent_numerator = tf.reduce_sum(xent_values) else: samples_per_line = tf.to_float(batch_ph['ref_len']) weights = tf.where(should_finish, eos_coeff * samples_per_line, (1.0 - eos_coeff) * samples_per_line / (samples_per_line - 1.0)) # ^-- [batch_size] xent_numerator = tf.reduce_sum(xent_values * weights) batch_size = tf.shape(insert_logprobas)[0] counters = dict( batch_size=tf.to_float(batch_size), xent_numerator=xent_numerator, ) # assemble loss (crossentropy) loss_numerator = xent_numerator # metrics p_correct_numerator = tf.reduce_sum(tf.exp(logp_ref)) argmax_flat = tf.argmax(tf.reshape(insert_logprobas, [batch_size, -1]), axis=-1) is_argmax_correct = tf.gather_nd( tf.reshape(is_ref_insert, [batch_size, -1]), tf.stack([tf.range(batch_size), tf.to_int32(argmax_flat)], -1)) is_argmax_correct = tf.where(should_finish, tf.exp(finish_logprobas) >= 0.5, is_argmax_correct) acc_numerator = tf.reduce_sum(tf.to_float(is_argmax_correct)) counters.update( loss_numerator=loss_numerator, acc_numerator=acc_numerator, p_correct_numerator=p_correct_numerator, ) return loss_numerator, counters
def call(self, inputs, states): norm_layer = ops.get_norm_layer(self.hparams.norm_layer) downsample_layer = ops.get_downsample_layer(self.hparams.downsample_layer) upsample_layer = ops.get_upsample_layer(self.hparams.upsample_layer) image_shape = inputs['images'].get_shape().as_list() batch_size, height, width, color_channels = image_shape conv_rnn_states = states['conv_rnn_states'] time = states['time'] with tf.control_dependencies([tf.assert_equal(time[1:], time[0])]): t = tf.to_int32(tf.identity(time[0])) image = tf.where(self.ground_truth[t], inputs['images'], states['gen_image']) # schedule sampling (if any) last_images = states['last_images'][1:] + [image] if 'pix_distribs' in inputs: pix_distrib = tf.where(self.ground_truth[t], inputs['pix_distribs'], states['gen_pix_distrib']) last_pix_distribs = states['last_pix_distribs'][1:] + [pix_distrib] if 'states' in inputs: state = tf.where(self.ground_truth[t], inputs['states'], states['gen_state']) state_action = [] state_action_z = [] if 'actions' in inputs: state_action.append(inputs['actions']) state_action_z.append(inputs['actions']) if 'states' in inputs: state_action.append(state) # don't backpropagate the convnet through the state dynamics state_action_z.append(tf.stop_gradient(state)) if 'zs' in inputs: if self.hparams.use_rnn_z: with tf.variable_scope('%s_z' % ('fc' if self.hparams.ablation_rnn else self.hparams.rnn)): if self.hparams.ablation_rnn: rnn_z = dense(inputs['zs'], self.hparams.nz) rnn_z = tf.nn.tanh(rnn_z) else: rnn_z, rnn_z_state = self._rnn_func(inputs['zs'], states['rnn_z_state'], self.hparams.nz) state_action_z.append(rnn_z) else: state_action_z.append(inputs['zs']) def concat(tensors, axis): if len(tensors) == 0: return tf.zeros([batch_size, 0]) elif len(tensors) == 1: return tensors[0] else: return tf.concat(tensors, axis=axis) state_action = concat(state_action, axis=-1) state_action_z = concat(state_action_z, axis=-1) if 'actions' in inputs: gen_input = tile_concat([image, inputs['actions'][:, None, None, :]], axis=-1) else: gen_input = image layers = [] new_conv_rnn_states = [] for i, (out_channels, use_conv_rnn) in enumerate(self.encoder_layer_specs): with tf.variable_scope('h%d' % i): if i == 0: h = tf.concat([image, self.inputs['images'][0]], axis=-1) kernel_size = (5, 5) else: h = layers[-1][-1] kernel_size = (3, 3) if self.hparams.where_add == 'all' or (self.hparams.where_add == 'input' and i == 0): h = tile_concat([h, state_action_z[:, None, None, :]], axis=-1) h = downsample_layer(h, out_channels, kernel_size=kernel_size, strides=(2, 2)) h = norm_layer(h) h = tf.nn.relu(h) if use_conv_rnn: with tf.variable_scope('%s_h%d' % ('conv' if self.hparams.ablation_rnn else self.hparams.conv_rnn, i)): if self.hparams.where_add == 'all': conv_rnn_h = tile_concat([h, state_action_z[:, None, None, :]], axis=-1) else: conv_rnn_h = h if self.hparams.ablation_rnn: conv_rnn_h = conv2d(conv_rnn_h, out_channels, kernel_size=(5, 5)) conv_rnn_h = norm_layer(conv_rnn_h) conv_rnn_h = tf.nn.relu(conv_rnn_h) else: conv_rnn_state = conv_rnn_states[len(new_conv_rnn_states)] conv_rnn_h, conv_rnn_state = self._conv_rnn_func(conv_rnn_h, conv_rnn_state, out_channels) new_conv_rnn_states.append(conv_rnn_state) layers.append((h, conv_rnn_h) if use_conv_rnn else (h,)) num_encoder_layers = len(layers) for i, (out_channels, use_conv_rnn) in enumerate(self.decoder_layer_specs): with tf.variable_scope('h%d' % len(layers)): if i == 0: h = layers[-1][-1] else: h = tf.concat([layers[-1][-1], layers[num_encoder_layers - i - 1][-1]], axis=-1) if self.hparams.where_add == 'all' or (self.hparams.where_add == 'middle' and i == 0): h = tile_concat([h, state_action_z[:, None, None, :]], axis=-1) h = upsample_layer(h, out_channels, kernel_size=(3, 3), strides=(2, 2)) h = norm_layer(h) h = tf.nn.relu(h) if use_conv_rnn: with tf.variable_scope('%s_h%d' % ('conv' if self.hparams.ablation_rnn else self.hparams.conv_rnn, len(layers))): if self.hparams.where_add == 'all': conv_rnn_h = tile_concat([h, state_action_z[:, None, None, :]], axis=-1) else: conv_rnn_h = h if self.hparams.ablation_rnn: conv_rnn_h = conv2d(conv_rnn_h, out_channels, kernel_size=(5, 5)) conv_rnn_h = norm_layer(conv_rnn_h) conv_rnn_h = tf.nn.relu(conv_rnn_h) else: conv_rnn_state = conv_rnn_states[len(new_conv_rnn_states)] conv_rnn_h, conv_rnn_state = self._conv_rnn_func(conv_rnn_h, conv_rnn_state, out_channels) new_conv_rnn_states.append(conv_rnn_state) layers.append((h, conv_rnn_h) if use_conv_rnn else (h,)) assert len(new_conv_rnn_states) == len(conv_rnn_states) if self.hparams.last_frames and self.hparams.num_transformed_images: if self.hparams.transformation == 'flow': with tf.variable_scope('h%d_flow' % len(layers)): h_flow = conv2d(layers[-1][-1], self.hparams.ngf, kernel_size=(3, 3), strides=(1, 1)) h_flow = norm_layer(h_flow) h_flow = tf.nn.relu(h_flow) with tf.variable_scope('flows'): flows = conv2d(h_flow, 2 * self.hparams.last_frames * self.hparams.num_transformed_images, kernel_size=(3, 3), strides=(1, 1)) flows = tf.reshape(flows, [batch_size, height, width, 2, self.hparams.last_frames * self.hparams.num_transformed_images]) else: assert len(self.hparams.kernel_size) == 2 kernel_shape = list(self.hparams.kernel_size) + [self.hparams.last_frames * self.hparams.num_transformed_images] if self.hparams.transformation == 'dna': with tf.variable_scope('h%d_dna_kernel' % len(layers)): h_dna_kernel = conv2d(layers[-1][-1], self.hparams.ngf, kernel_size=(3, 3), strides=(1, 1)) h_dna_kernel = norm_layer(h_dna_kernel) h_dna_kernel = tf.nn.relu(h_dna_kernel) # Using largest hidden state for predicting untied conv kernels. with tf.variable_scope('dna_kernels'): kernels = conv2d(h_dna_kernel, np.prod(kernel_shape), kernel_size=(3, 3), strides=(1, 1)) kernels = tf.reshape(kernels, [batch_size, height, width] + kernel_shape) kernels = kernels + identity_kernel(self.hparams.kernel_size)[None, None, None, :, :, None] kernel_spatial_axes = [3, 4] elif self.hparams.transformation == 'cdna': with tf.variable_scope('cdna_kernels'): smallest_layer = layers[num_encoder_layers - 1][-1] kernels = dense(flatten(smallest_layer), np.prod(kernel_shape)) kernels = tf.reshape(kernels, [batch_size] + kernel_shape) kernels = kernels + identity_kernel(self.hparams.kernel_size)[None, :, :, None] kernel_spatial_axes = [1, 2] else: raise ValueError('Invalid transformation %s' % self.hparams.transformation) if self.hparams.transformation != 'flow': with tf.name_scope('kernel_normalization'): kernels = tf.nn.relu(kernels - RELU_SHIFT) + RELU_SHIFT kernels /= tf.reduce_sum(kernels, axis=kernel_spatial_axes, keepdims=True) if self.hparams.generate_scratch_image: with tf.variable_scope('h%d_scratch' % len(layers)): h_scratch = conv2d(layers[-1][-1], self.hparams.ngf, kernel_size=(3, 3), strides=(1, 1)) h_scratch = norm_layer(h_scratch) h_scratch = tf.nn.relu(h_scratch) # Using largest hidden state for predicting a new image layer. # This allows the network to also generate one image from scratch, # which is useful when regions of the image become unoccluded. with tf.variable_scope('scratch_image'): scratch_image = conv2d(h_scratch, color_channels, kernel_size=(3, 3), strides=(1, 1)) scratch_image = tf.nn.sigmoid(scratch_image) with tf.name_scope('transformed_images'): transformed_images = [] if self.hparams.last_frames and self.hparams.num_transformed_images: if self.hparams.transformation == 'flow': transformed_images.extend(apply_flows(last_images, flows)) else: transformed_images.extend(apply_kernels(last_images, kernels, self.hparams.dilation_rate)) if self.hparams.prev_image_background: transformed_images.append(image) if self.hparams.first_image_background and not self.hparams.context_images_background: transformed_images.append(self.inputs['images'][0]) if self.hparams.context_images_background: transformed_images.extend(tf.unstack(self.inputs['images'][:self.hparams.context_frames])) if self.hparams.generate_scratch_image: transformed_images.append(scratch_image) if 'pix_distribs' in inputs: with tf.name_scope('transformed_pix_distribs'): transformed_pix_distribs = [] if self.hparams.last_frames and self.hparams.num_transformed_images: if self.hparams.transformation == 'flow': transformed_pix_distribs.extend(apply_flows(last_pix_distribs, flows)) else: transformed_pix_distribs.extend(apply_kernels(last_pix_distribs, kernels, self.hparams.dilation_rate)) if self.hparams.prev_image_background: transformed_pix_distribs.append(pix_distrib) if self.hparams.first_image_background and not self.hparams.context_images_background: transformed_pix_distribs.append(self.inputs['pix_distribs'][0]) if self.hparams.context_images_background: transformed_pix_distribs.extend(tf.unstack(self.inputs['pix_distribs'][:self.hparams.context_frames])) if self.hparams.generate_scratch_image: transformed_pix_distribs.append(pix_distrib) with tf.name_scope('masks'): if len(transformed_images) > 1: with tf.variable_scope('h%d_masks' % len(layers)): h_masks = conv2d(layers[-1][-1], self.hparams.ngf, kernel_size=(3, 3), strides=(1, 1)) h_masks = norm_layer(h_masks) h_masks = tf.nn.relu(h_masks) with tf.variable_scope('masks'): if self.hparams.dependent_mask: h_masks = tf.concat([h_masks] + transformed_images, axis=-1) masks = conv2d(h_masks, len(transformed_images), kernel_size=(3, 3), strides=(1, 1)) masks = tf.nn.softmax(masks) masks = tf.split(masks, len(transformed_images), axis=-1) elif len(transformed_images) == 1: masks = [tf.ones([batch_size, height, width, 1])] else: raise ValueError("Either one of the following should be true: " "last_frames and num_transformed_images, first_image_background, " "prev_image_background, generate_scratch_image") with tf.name_scope('gen_images'): assert len(transformed_images) == len(masks) gen_image = tf.add_n([transformed_image * mask for transformed_image, mask in zip(transformed_images, masks)]) if 'pix_distribs' in inputs: with tf.name_scope('gen_pix_distribs'): assert len(transformed_pix_distribs) == len(masks) gen_pix_distrib = tf.add_n([transformed_pix_distrib * mask for transformed_pix_distrib, mask in zip(transformed_pix_distribs, masks)]) gen_pix_distrib /= tf.reduce_sum(gen_pix_distrib, axis=(1, 2), keepdims=True) if 'states' in inputs: with tf.name_scope('gen_states'): with tf.variable_scope('state_pred'): gen_state = dense(state_action, inputs['states'].shape[-1].value) outputs = {'gen_images': gen_image, 'gen_inputs': gen_input, 'transformed_images': tf.stack(transformed_images, axis=-1), 'masks': tf.stack(masks, axis=-1)} if 'pix_distribs' in inputs: outputs['gen_pix_distribs'] = gen_pix_distrib outputs['transformed_pix_distribs'] = tf.stack(transformed_pix_distribs, axis=-1) if 'states' in inputs: outputs['gen_states'] = gen_state if self.hparams.transformation == 'flow': outputs['gen_flows'] = flows new_states = {'time': time + 1, 'gen_image': gen_image, 'last_images': last_images, 'conv_rnn_states': new_conv_rnn_states} if 'zs' in inputs and self.hparams.use_rnn_z and not self.hparams.ablation_rnn: new_states['rnn_z_state'] = rnn_z_state if 'pix_distribs' in inputs: new_states['gen_pix_distrib'] = gen_pix_distrib new_states['last_pix_distribs'] = last_pix_distribs if 'states' in inputs: new_states['gen_state'] = gen_state return outputs, new_states
def _set_operations(self, embedding, task, params): self.is_train = params.is_train with tf.device("/cpu:0"): self.embedding = tf.get_variable(shape=embedding.shape, trainable=params.update_embedding, name="embedding_weights", dtype=tf.float32) turns_embedded = tf.nn.embedding_lookup(self.embedding, self.turns) turns_boW = tf.reduce_sum(turns_embedded, axis=2, name="BoW") # Bag of Words inputs = _sender_aware_encoding(turns_boW, self.senders) with tf.name_scope("shared_rnn"): def cell_fn(): return rnn_cell.DropoutWrapper(params.cell(params.hidden_size), output_keep_prob=1 - self.dropout, variational_recurrent=True, dtype=tf.float32) shared_fw_cells = [cell_fn() for _ in range(params.num_layers)] shared_bw_cells = [cell_fn() for _ in range(params.num_layers)] shared_output, _, _ = stack_bidirectional_dynamic_rnn( shared_fw_cells, shared_bw_cells, inputs, sequence_length=self.dialogue_lengths, dtype=tf.float32) # Quality task loss quality_dialogue_repr = tf.reduce_sum(shared_output, axis=1) quality_logits = [] for _ in data.QUALITY_MEASURES: quality_logits.append( tf.layers.dense(quality_dialogue_repr, len(data.QUALITY_SCALES))) quality_logits = tf.stack(quality_logits, axis=1) dist_loss = quality_loss(quality_logits, self.quality_labels) self.quality_prediction = (tf.nn.softmax(quality_logits, axis=-1)) left_prediction, _ = tf.split(self.quality_prediction, [4, 1], 2) _, right_prediction = tf.split(self.quality_prediction, [1, 4], 2) left_quality_labels, _ = tf.split(self.quality_labels, [4, 1], 2) _, right_quality_labels = tf.split(self.quality_labels, [1, 4], 2) diff_prediction = tf.subtract(left_prediction, right_prediction) diff_quality_labels = tf.subtract(left_quality_labels, right_quality_labels) diff_loss = tf.losses.mean_squared_error(diff_prediction, diff_quality_labels) alpha = tf.constant(params.alpha) self.quality_loss = alpha * dist_loss + (1 - alpha) * diff_loss # Nugget Task loss nugget_shared_repr = shared_output # not use max_time = tf.shape(nugget_shared_repr)[1] customer_index = tf.range(start=0, delta=2, limit=max_time) helpdesk_index = tf.range(start=1, delta=2, limit=max_time) customer_output = tf.gather(nugget_shared_repr, indices=customer_index, axis=1) helpdesk_output = tf.gather(nugget_shared_repr, indices=helpdesk_index, axis=1) assert_op = tf.assert_equal( tf.shape(customer_output)[1] + tf.shape(helpdesk_output)[1], max_time) with tf.control_dependencies([assert_op]): self.c_nuggets_logits = tf.layers.dense( customer_output, len(data.CUSTOMER_NUGGET_TYPES_WITH_PAD)) self.h_nuggets_logits = tf.layers.dense( helpdesk_output, len(data.HELPDESK_NUGGET_TYPES_WITH_PAD)) self.nugget_loss = nugget_loss(self.c_nuggets_logits, self.h_nuggets_logits, self.c_nuggets_labels, self.h_nuggets_labels, self.dialogue_lengths, tf.shape(self.turns)[1]) self.nugget_prediction = (tf.nn.softmax(self.c_nuggets_logits, axis=-1), tf.nn.softmax(self.h_nuggets_logits, axis=-1)) # Train operations self.loss = [] self.train_op = [] for task in ["quality", "nugget"]: task_loss = getattr(self, "%s_loss" % task) self.loss.append(task_loss) self.train_op.append( build_train_op(task_loss, tf.train.get_or_create_global_step(), lr=params.learning_rate, optimizer=params.optimizer))
def apply_2site_schmidt_canonical_python(op, L0, G1, L1, G2, L2, max_bond_dim=None, auto_trunc_max_err=0.0): """Applies a two-site local operator to an MPS. Takes Lambda and Gamma tensors (Schmidt canonical form) and returns new ones, as well as the new norm of the state. """ if tf.executing_eagerly(): # FIXME: Not ideal, but these ops are very costly at compile time op_shp = tf.shape(op) tf.assert_equal( tf.shape(G1)[1], op_shp[2], message="Operator dimensions do not match MPS physical dimensions." ) tf.assert_equal( tf.shape(G2)[1], op_shp[3], message="Operator dimensions do not match MPS physical dimensions." ) # TODO(ash): Can we assume these are diagonal? L0_i = tf.matrix_inverse(L0) L2_i = tf.matrix_inverse(L2) net = tn.TensorNetwork() nL0_i = net.add_node(L0_i, axis_names=["L", "R"]) nL0 = net.add_node(L0, axis_names=["L", "R"]) nG1 = net.add_node(G1, axis_names=["L", "p", "R"]) nL1 = net.add_node(L1, axis_names=["L", "R"]) nG2 = net.add_node(G2, axis_names=["L", "p", "R"]) nL2 = net.add_node(L2, axis_names=["L", "R"]) nL2_i = net.add_node(L2_i, axis_names=["L", "R"]) nop = net.add_node(op, axis_names=["p_out_1", "p_out_2", "p_in_1", "p_in_2"]) b0 = net.connect(nL0_i["R"], nL0["L"]) b1 = net.connect(nL0["R"], nG1["L"]) b2 = net.connect(nG1["R"], nL1["L"]) b3 = net.connect(nL1["R"], nG2["L"]) b4 = net.connect(nG2["R"], nL2["L"]) b5 = net.connect(nL2["R"], nL2_i["L"]) net.connect(nG1["p"], nop["p_in_1"]) net.connect(nG2["p"], nop["p_in_2"]) output_order = [nL0["L"], nop["p_out_1"], nop["p_out_2"], nL2["R"]] net.contract(b1) net.contract(b2) net.contract(b3) n_mps = net.contract(b4) n_block = net.contract_between(nop, n_mps) nu, ns, nvh, s_rest = net.split_node_full_svd( n_block, output_order[:2], output_order[2:], max_singular_values=max_bond_dim, max_truncation_err=auto_trunc_max_err) trunc_err = tf.norm(s_rest) nrm = tf.norm(ns.tensor) ns.tensor = tf.divide(ns.tensor, nrm) L1_new = ns.tensor #output_order = [nL0_i["L"], nu["p_out_1"], es1] output_order = [nL0_i["L"], nu[1], ns[0]] nG1_new = net.contract(b0) nG1_new.reorder_edges(output_order) G1_new = nG1_new.tensor #output_order = [es2, nvh["p_out_2"], nL2_i["R"]] output_order = [ns[1], nvh[1], nL2_i["R"]] nG2_new = net.contract(b5) nG2_new.reorder_edges(output_order) G2_new = nG2_new.tensor return G1_new, L1_new, G2_new, nrm, trunc_err
def load_examples(): if a.input_dir is None or not os.path.exists(a.input_dir): raise Exception("input_dir does not exist") input_paths = glob.glob(os.path.join(a.input_dir, "*.jpg")) decode = tf.image.decode_jpeg if len(input_paths) == 0: input_paths = glob.glob(os.path.join(a.input_dir, "*.png")) decode = tf.image.decode_png if len(input_paths) == 0: raise Exception("input_dir contains no image files") def get_name(path): name, _ = os.path.splitext(os.path.basename(path)) return name # if the image names are numbers, sort by the value rather than asciibetically # having sorted inputs means that the outputs are sorted in test mode if all(get_name(path).isdigit() for path in input_paths): input_paths = sorted(input_paths, key=lambda path: int(get_name(path))) else: input_paths = sorted(input_paths) with tf.name_scope("load_images"): path_queue = tf.train.string_input_producer(input_paths, shuffle=a.mode == "train") reader = tf.WholeFileReader() paths, contents = reader.read(path_queue) raw_input = decode(contents) raw_input = tf.image.convert_image_dtype(raw_input, dtype=tf.float32) assertion = tf.assert_equal(tf.shape(raw_input)[2], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): raw_input = tf.identity(raw_input) raw_input.set_shape([None, None, 3]) if a.lab_colorization: # load color and brightness from image, no B image exists here lab = rgb_to_lab(raw_input) L_chan, a_chan, b_chan = preprocess_lab(lab) a_images = tf.expand_dims(L_chan, axis=2) b_images = tf.stack([a_chan, b_chan], axis=2) else: # break apart image pair and move to range [-1, 1] width = tf.shape(raw_input)[1] # [height, width, channels] a_images = preprocess(raw_input[:, :width // 2, :]) b_images = preprocess(raw_input[:, width // 2:, :]) if a.which_direction == "AtoB": inputs, targets = [a_images, b_images] elif a.which_direction == "BtoA": inputs, targets = [b_images, a_images] else: raise Exception("invalid direction") # synchronize seed for image operations so that we do the same operations to both # input and output images seed = random.randint(0, 2**31 - 1) def transform(image): r = image if a.flip: r = tf.image.random_flip_left_right(r, seed=seed) # area produces a nice downscaling, but does nearest neighbor for upscaling # assume we're going to be doing downscaling here r = tf.image.resize_images(r, [a.scale_size, a.scale_size], method=tf.image.ResizeMethod.AREA) offset = tf.cast(tf.floor( tf.random_uniform([2], 0, a.scale_size - CROP_SIZE + 1, seed=seed)), dtype=tf.int32) if a.scale_size > CROP_SIZE: r = tf.image.crop_to_bounding_box(r, offset[0], offset[1], CROP_SIZE, CROP_SIZE) elif a.scale_size < CROP_SIZE: raise Exception("scale size cannot be less than crop size") return r with tf.name_scope("input_images"): input_images = transform(inputs) with tf.name_scope("target_images"): target_images = transform(targets) paths_batch, inputs_batch, targets_batch = tf.train.batch( [paths, input_images, target_images], batch_size=a.batch_size) steps_per_epoch = int(math.ceil(len(input_paths) / a.batch_size)) return Examples( paths=paths_batch, inputs=inputs_batch, targets=targets_batch, count=len(input_paths), steps_per_epoch=steps_per_epoch, )
def load_examples(): if a.input_dir is None or not os.path.exists(a.input_dir): raise Exception("input_dir does not exist") input_paths = glob.glob(os.path.join(a.input_dir, "*.jpg")) decode = tf.image.decode_jpeg if len(input_paths) == 0: input_paths = glob.glob(os.path.join(a.input_dir, "*.png")) decode = tf.image.decode_png if len(input_paths) == 0: raise Exception("input_dir contains no image files") def get_name(path): name, _ = os.path.splitext(os.path.basename(path)) return name # if the image names are numbers, sort by the value rather than asciibetically # having sorted inputs means that the outputs are sorted in test mode if all(get_name(path).isdigit() for path in input_paths): input_paths = sorted(input_paths, key=lambda path: int(get_name(path))) else: input_paths = sorted(input_paths) with tf.name_scope("load_images"): path_queue = tf.train.string_input_producer(input_paths, shuffle=a.mode == "train") reader = tf.WholeFileReader() paths, contents = reader.read(path_queue) raw_input = decode(contents) raw_input = tf.image.convert_image_dtype(raw_input, dtype=tf.float32) assertion = tf.assert_equal(tf.shape(raw_input)[2], 3, message="image does not have 3 channels") with tf.control_dependencies([assertion]): raw_input = tf.identity(raw_input) raw_input.set_shape([None, None, 3]) # break apart image pair and move to range [-1, 1] width = tf.shape(raw_input)[1] # [height, width, channels] a_images = preprocess(raw_input[:, :width // 2, :]) b_images = preprocess(raw_input[:, width // 2:, :]) if a.which_direction == "AtoB": inputs, targets = [a_images, b_images] elif a.which_direction == "BtoA": inputs, targets = [b_images, a_images] else: raise Exception("invalid direction") # synchronize seed for image operations so that we do the same operations to both # input and output images seed = random.randint(0, 2**31 - 1) def transform(image): r = image if a.flip: r = tf.image.random_flip_left_right(r, seed=seed) r = tf.image.resize_images(r, [a.scale_size, a.scale_size], method=tf.image.ResizeMethod.AREA) return r with tf.name_scope("input_images"): input_images = transform(inputs) with tf.name_scope("target_images"): target_images = transform(targets) paths_batch, inputs_batch, targets_batch = tf.train.batch( [paths, input_images, target_images], batch_size=a.batch_size) steps_per_epoch = int(math.ceil(len(input_paths) / a.batch_size)) return Examples( paths=paths_batch, inputs=inputs_batch, targets=targets_batch, count=len(input_paths), steps_per_epoch=steps_per_epoch, )
def get_score(self, x, y=None, n_z=None, mcmc_iteration=None, last_point_only=True): """ Get the reconstruction probability for `x` and `y`. The larger `reconstruction probability`, the less likely a point is anomaly. You may take the negative of the score, if you want something to directly indicate the severity of anomaly. Args: x (tf.Tensor): 2-D `float32` :class:`tf.Tensor`, the windows of KPI observations in a mini-batch. y (tf.Tensor): 2-D `int32` :class:`tf.Tensor`, the windows of missing point indicators in a mini-batch. n_z (int or None): Number of `z` samples to take for each `x`. (default :obj:`None`, one sample without explicit sampling dimension) mcmc_iteration (int or tf.Tensor): Iteration count for MCMC missing data imputation. (default :obj:`None`, no iteration) last_point_only (bool): Whether to obtain the reconstruction probability of only the last point in each window? (default :obj:`True`) Returns: tf.Tensor: The reconstruction probability, with the shape ``(len(x) - self.x_dims + 1,)`` if `last_point_only` is :obj:`True`, or ``(len(x) - self.x_dims + 1, self.x_dims)`` if `last_point_only` is :obj:`False`. This is because the first ``self.x_dims - 1`` points are not the last point of any window. """ with tf.name_scope('Donut.get_score'): # MCMC missing data imputation if y is not None and mcmc_iteration: x_r = iterative_masked_reconstruct( reconstruct=self.vae.reconstruct, x=x, mask=y, iter_count=mcmc_iteration, back_prop=False, ) else: x_r = x # get the reconstruction probability q_net = self.vae.variational(x=x_r, n_z=n_z) # notice: x=x_r p_net = self.vae.model(z=q_net['z'], x=x, n_z=n_z) # notice: x=x r_prob = p_net['x'].log_prob(group_ndims=0) if n_z is not None: n_z = validate_n_samples(n_z, 'n_z') assert_shape_op = tf.assert_equal( tf.shape(r_prob), tf.stack([n_z, tf.shape(x)[0], self.x_dims]), message='Unexpected shape of reconstruction prob') with tf.control_dependencies([assert_shape_op]): r_prob = tf.reduce_mean(r_prob, axis=0) if last_point_only: r_prob = r_prob[:, -1] return r_prob
def _batch_size_checks(self, batch_size, error_message): return [tf.assert_equal(batch_size, attention_mechanism.batch_size, message=error_message) for attention_mechanism in self._attention_mechanisms]
def merge_boxes_with_multiple_labels(boxes, classes, confidences, num_classes, quantization_bins=10000): """Merges boxes with same coordinates and returns K-hot encoded classes. Args: boxes: A tf.float32 tensor with shape [N, 4] holding N boxes. Only normalized coordinates are allowed. classes: A tf.int32 tensor with shape [N] holding class indices. The class index starts at 0. confidences: A tf.float32 tensor with shape [N] holding class confidences. num_classes: total number of classes to use for K-hot encoding. quantization_bins: the number of bins used to quantize the box coordinate. Returns: merged_boxes: A tf.float32 tensor with shape [N', 4] holding boxes, where N' <= N. class_encodings: A tf.int32 tensor with shape [N', num_classes] holding K-hot encodings for the merged boxes. confidence_encodings: A tf.float32 tensor with shape [N', num_classes] holding encodings of confidences for the merged boxes. merged_box_indices: A tf.int32 tensor with shape [N'] holding original indices of the boxes. """ boxes_shape = tf.shape(boxes) classes_shape = tf.shape(classes) confidences_shape = tf.shape(confidences) box_class_shape_assert = shape_utils.assert_shape_equal_along_first_dimension( boxes_shape, classes_shape) box_confidence_shape_assert = ( shape_utils.assert_shape_equal_along_first_dimension( boxes_shape, confidences_shape)) box_dimension_assert = tf.assert_equal(boxes_shape[1], 4) box_normalized_assert = shape_utils.assert_box_normalized(boxes) with tf.control_dependencies([ box_class_shape_assert, box_confidence_shape_assert, box_dimension_assert, box_normalized_assert ]): quantized_boxes = tf.to_int64(boxes * (quantization_bins - 1)) ymin, xmin, ymax, xmax = tf.unstack(quantized_boxes, axis=1) hashcodes = ( ymin + xmin * quantization_bins + ymax * quantization_bins * quantization_bins + xmax * quantization_bins * quantization_bins * quantization_bins) unique_hashcodes, unique_indices = tf.unique(hashcodes) num_boxes = tf.shape(boxes)[0] num_unique_boxes = tf.shape(unique_hashcodes)[0] merged_box_indices = tf.unsorted_segment_min(tf.range(num_boxes), unique_indices, num_unique_boxes) merged_boxes = tf.gather(boxes, merged_box_indices) def map_box_encodings(i): """Produces box K-hot and score encodings for each class index.""" box_mask = tf.equal(unique_indices, i * tf.ones(num_boxes, dtype=tf.int32)) box_mask = tf.reshape(box_mask, [-1]) box_indices = tf.boolean_mask(classes, box_mask) box_confidences = tf.boolean_mask(confidences, box_mask) box_class_encodings = tf.sparse_to_dense(box_indices, [num_classes], 1, validate_indices=False) box_confidence_encodings = tf.sparse_to_dense( box_indices, [num_classes], box_confidences, validate_indices=False) return box_class_encodings, box_confidence_encodings class_encodings, confidence_encodings = tf.map_fn( map_box_encodings, tf.range(num_unique_boxes), back_prop=False, dtype=(tf.int32, tf.float32)) merged_boxes = tf.reshape(merged_boxes, [-1, 4]) class_encodings = tf.reshape(class_encodings, [-1, num_classes]) confidence_encodings = tf.reshape(confidence_encodings, [-1, num_classes]) merged_box_indices = tf.reshape(merged_box_indices, [-1]) return (merged_boxes, class_encodings, confidence_encodings, merged_box_indices)
def _build(self, proposals, bbox_pred, cls_prob, im_shape): """ Args: proposals: Tensor with the RPN proposals bounding boxes. Shape (num_proposals, 5). Where num_proposals is less than POST_NMS_TOP_N (We don't know exactly beforehand) bbox_pred: Tensor with the RCNN delta predictions for each proposal for each class. Shape (num_proposals, 4 * num_classes) cls_prob: A softmax probability for each proposal where the idx = 0 is the background class (which we should ignore). Shape (num_proposals, num_classes + 1) Returns: objects: Shape (final_num_proposals, 4) Where final_num_proposals is unknown before-hand (it depends on NMS). The 4-length Tensor for each corresponds to: (x_min, y_min, x_max, y_max). objects_label: Shape (final_num_proposals,) objects_label_prob: Shape (final_num_proposals,) """ # remove batch_id from proposals with tf.control_dependencies([tf.equal(tf.shape(proposals)[-1], 5)]): proposals = proposals[:, 1:] # First we want get the most probable label for each proposal # We still have the background on idx 0 so we subtract 1 to the idxs. proposal_label = tf.argmax(cls_prob, axis=1) - 1 # Get the probability for the selected label for each proposal. proposal_label_prob = tf.reduce_max(cls_prob, axis=1) # We are going to use only the non-background proposals. non_background_filter = tf.greater_equal(proposal_label, 0) # Filter proposals with less than threshold probability. min_prob_filter = tf.greater_equal(proposal_label_prob, self._min_prob_threshold) proposal_filter = tf.logical_and(non_background_filter, min_prob_filter) total_proposals = tf.shape(proposals)[0] equal_shapes = tf.assert_equal( tf.shape(proposals)[0], tf.shape(bbox_pred)[0]) with tf.control_dependencies([equal_shapes]): # Filter all tensors for getting all non-background proposals. proposals = tf.boolean_mask(proposals, proposal_filter) proposal_label = tf.boolean_mask(proposal_label, proposal_filter) proposal_label_prob = tf.boolean_mask(proposal_label_prob, proposal_filter) bbox_pred = tf.boolean_mask(bbox_pred, proposal_filter) filtered_proposals = tf.shape(proposals)[0] tf.summary.scalar('background_or_low_prob_proposals', total_proposals - filtered_proposals, ['rcnn']) # Create one hot with labels for using it to filter bbox_predictions. label_one_hot = tf.one_hot(proposal_label, depth=self._num_classes) # Flatten label_one_hot to get # (num_non_background_proposals * num_classes, 1) for filtering. label_one_hot_flatten = tf.cast(tf.reshape(label_one_hot, [-1]), tf.bool) # Flatten bbox_predictions getting # (num_non_background_proposals * num_classes, 4). bbox_pred_flatten = tf.reshape(bbox_pred, [-1, 4]) equal_shapes = tf.assert_equal( tf.shape(bbox_pred_flatten)[0], tf.shape(label_one_hot_flatten)[0]) with tf.control_dependencies([equal_shapes]): # Control same number of dimensions between bbox and mask. bbox_pred = tf.boolean_mask(bbox_pred_flatten, label_one_hot_flatten) # Using the bbox_pred and the proposals we generate the objects. raw_objects = decode(proposals, bbox_pred) # Clip boxes to image. clipped_objects = clip_boxes(raw_objects, im_shape) # Filter objects that have an non-valid area. (x_min, y_min, x_max, y_max) = tf.unstack(clipped_objects, axis=1) object_filter = tf.greater_equal( tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0), 0.0) total_raw_objects = tf.shape(raw_objects)[0] objects = tf.boolean_mask(clipped_objects, object_filter) proposal_label = tf.boolean_mask(proposal_label, object_filter) proposal_label_prob = tf.boolean_mask(proposal_label_prob, object_filter) total_objects = tf.shape(objects)[0] tf.summary.scalar('invalid_proposals', total_objects - total_raw_objects, ['rcnn']) tf.summary.scalar( 'valid_proposals_ratio', tf.cast(total_proposals, tf.float32) / tf.cast(total_objects, tf.float32), ['rcnn']) # We have to use the TensorFlow's bounding box convention to use the # included function for NMS. # After gathering results we should normalize it back. objects_tf = change_order(objects) selected_boxes = [] selected_probs = [] selected_labels = [] # For each class we want to filter those objects and apply NMS to them. for class_id in range(self._num_classes): # Filter objects Tensors with class. class_filter = tf.equal(proposal_label, class_id) class_objects_tf = tf.boolean_mask(objects_tf, class_filter) class_prob = tf.boolean_mask(proposal_label_prob, class_filter) # Apply class NMS. class_selected_idx = tf.image.non_max_suppression( class_objects_tf, class_prob, self._class_max_detections, iou_threshold=self._class_nms_threshold) # Using NMS resulting indices, gather values from Tensors. class_objects_tf = tf.gather(class_objects_tf, class_selected_idx) class_prob = tf.gather(class_prob, class_selected_idx) # We append values to a regular list which will later be transform # to a proper Tensor. selected_boxes.append(class_objects_tf) selected_probs.append(class_prob) # In the case of the class_id, since it is a loop on classes, we # already have a fixed class_id. We use `tf.tile` to create that # Tensor with the total number of indices returned by the NMS. selected_labels.append( tf.tile([class_id], [tf.shape(class_selected_idx)[0]])) # We use concat (axis=0) to generate a Tensor where the rows are # stacked on top of each other objects_tf = tf.concat(selected_boxes, axis=0) # Return to the original convention. objects = change_order(objects_tf) proposal_label = tf.concat(selected_labels, axis=0) proposal_label_prob = tf.concat(selected_probs, axis=0) # Get topK detections of all classes. k = tf.minimum(self._total_max_detections, tf.shape(proposal_label_prob)[0]) top_k = tf.nn.top_k(proposal_label_prob, k=k) top_k_proposal_label_prob = top_k.values top_k_objects = tf.gather(objects, top_k.indices) top_k_proposal_label = tf.gather(proposal_label, top_k.indices) return { 'raw_objects': raw_objects, 'objects': top_k_objects, 'proposal_label': top_k_proposal_label, 'proposal_label_prob': top_k_proposal_label_prob, 'selected_boxes': selected_boxes, 'selected_probs': selected_probs, 'selected_labels': selected_labels, }
def _forward_log_det_jacobian(self, x): # Let Y be a symmetric, positive definite matrix and write: # Y = X X.T # where X is lower-triangular. # # Observe that, # dY[i,j]/dX[a,b] # = d/dX[a,b] { X[i,:] X[j,:] } # = sum_{d=1}^p { I[i=a] I[d=b] X[j,d] + I[j=a] I[d=b] X[i,d] } # # To compute the Jacobian dX/dY we must represent X,Y as vectors. Since Y is # symmetric and X is lower-triangular, we need vectors of dimension: # d = p (p + 1) / 2 # where X, Y are p x p matrices, p > 0. We use a row-major mapping, i.e., # k = { i (i + 1) / 2 + j i>=j # { undef i<j # and assume zero-based indexes. When k is undef, the element is dropped. # Example: # j k # 0 1 2 3 / # 0 [ 0 . . . ] # i 1 [ 1 2 . . ] # 2 [ 3 4 5 . ] # 3 [ 6 7 8 9 ] # Write vec[.] to indicate transforming a matrix to vector via k(i,j). (With # slight abuse: k(i,j)=undef means the element is dropped.) # # We now show d vec[Y] / d vec[X] is lower triangular. Assuming both are # defined, observe that k(i,j) < k(a,b) iff (1) i<a or (2) i=a and j<b. # In both cases dvec[Y]/dvec[X]@[k(i,j),k(a,b)] = 0 since: # (1) j<=i<a thus i,j!=a. # (2) i=a>j thus i,j!=a. # # Since the Jacobian is lower-triangular, we need only compute the product # of diagonal elements: # d vec[Y] / d vec[X] @[k(i,j), k(i,j)] # = X[j,j] + I[i=j] X[i,j] # = 2 X[j,j]. # Since there is a 2 X[j,j] term for every lower-triangular element of X we # conclude: # |Jac(d vec[Y]/d vec[X])| = 2^p prod_{j=0}^{p-1} X[j,j]^{p-j}. diag = tf.matrix_diag_part(x) # We now ensure diag is columnar. Eg, if `diag = [1, 2, 3]` then the output # is `[[1], [2], [3]]` and if `diag = [[1, 2, 3], [4, 5, 6]]` then the # output is unchanged. diag = self._make_columnar(diag) if self.validate_args: is_matrix = tf.assert_rank_at_least( x, 2, message="Input must be a (batch of) matrix.") shape = tf.shape(x) is_square = tf.assert_equal( shape[-2], shape[-1], message="Input must be a (batch of) square matrix.") # Assuming lower-triangular means we only need check diag>0. is_positive_definite = tf.assert_positive( diag, message="Input must be positive definite.") x = control_flow_ops.with_dependencies( [is_matrix, is_square, is_positive_definite], x) # Create a vector equal to: [p, p-1, ..., 2, 1]. if x.get_shape().ndims is None or x.get_shape()[-1].value is None: p_int = tf.shape(x)[-1] p_float = tf.cast(p_int, dtype=x.dtype) else: p_int = x.get_shape()[-1].value p_float = np.array(p_int, dtype=x.dtype.as_numpy_dtype) exponents = tf.linspace(p_float, 1., p_int) sum_weighted_log_diag = tf.squeeze(tf.matmul( tf.log(diag), exponents[..., tf.newaxis]), axis=-1) fldj = p_float * np.log(2.) + sum_weighted_log_diag return fldj
def construct_network(self): self.word_ids = tf.placeholder(tf.int32, [None, None], name="word_ids") self.char_ids = tf.placeholder(tf.int32, [None, None, None], name="char_ids") self.additional_features = tf.placeholder(tf.float32, [None, None, self.config['num_additional_feature_vectors'], self.config['num_additional_features']], name="additional_features") self.categoricals = tf.placeholder(tf.int32, [None, None], name="categoricals") self.sentence_lengths = tf.placeholder(tf.int32, [None], name="sentence_lengths") self.word_lengths = tf.placeholder(tf.int32, [None, None], name="word_lengths") self.label_ids = tf.placeholder(tf.int32, [None, None], name="label_ids") self.learningrate = tf.placeholder(tf.float32, name="learningrate") self.is_training = tf.placeholder(tf.int32, name="is_training") self.loss = 0.0 input_tensor = None self.initializer = None if self.config["initializer"] == "normal": self.initializer = tf.random_normal_initializer(mean=0.0, stddev=0.1) elif self.config["initializer"] == "glorot": self.initializer = tf.glorot_uniform_initializer() elif self.config["initializer"] == "xavier": self.initializer = tf.glorot_normal_initializer() else: raise ValueError("Unknown initializer") self.word_embeddings = tf.get_variable("word_embeddings", shape=[len(self.word2id), self.config["word_embedding_size"]], initializer=(tf.zeros_initializer() if self.config["emb_initial_zero"] == True else self.initializer), trainable=(True if self.config["train_embeddings"] == True else False)) input_tensor = tf.nn.embedding_lookup(self.word_embeddings, self.word_ids) l2_regularizer = tf.contrib.layers.l2_regularizer(scale=self.config['l2_lambda']) if self.config.get('l2_lambda') else None if self.config.get('num_additional_feature_vectors', 1) == 1: additional_features = tf.squeeze(self.additional_features) additional_features.set_shape((input_tensor.shape[0], input_tensor.shape[1], self.config['num_additional_features'])) elif self.config.get('additional_feature_integration_method', 'attention') == 'layerwise-sum': W = tf.get_variable( 'w_layerwise', shape=(self.config['num_additional_feature_vectors'],), initializer=tf.zeros_initializer, regularizer=l2_regularizer, trainable=True) normed_weights = tf.split( tf.nn.softmax(W + 1.0 / self.config['num_additional_feature_vectors']), self.config['num_additional_feature_vectors']) layers = tf.split(self.additional_features, self.config['num_additional_feature_vectors'], axis=2) weighted_layers = [w * tf.squeeze(t, squeeze_dims=2) for w, t in zip(normed_weights, layers)] additional_features = tf.add_n(weighted_layers) if self.config.get('scale_additional_features', False): gamma = tf.get_variable('additional_features_gamma', shape=(1, ), initializer=tf.ones_initializer, trainable=True) additional_features = additional_features * gamma if self.config['add_features_to_input']: input_tensor = tf.concat([input_tensor, additional_features], axis=2) if self.config["char_embedding_size"] > 0 and self.config["char_recurrent_size"] > 0: with tf.variable_scope("chars"), tf.control_dependencies([tf.assert_equal(tf.shape(self.char_ids)[2], tf.reduce_max(self.word_lengths), message="Char dimensions don't match")]): self.char_embeddings = tf.get_variable("char_embeddings", shape=[len(self.char2id), self.config["char_embedding_size"]], initializer=self.initializer, trainable=True) char_input_tensor = tf.nn.embedding_lookup(self.char_embeddings, self.char_ids) s = tf.shape(char_input_tensor) char_input_tensor = tf.reshape(char_input_tensor, shape=[s[0]*s[1], s[2], self.config["char_embedding_size"]]) _word_lengths = tf.reshape(self.word_lengths, shape=[s[0]*s[1]]) char_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config["char_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) char_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config["char_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) char_lstm_outputs = tf.nn.bidirectional_dynamic_rnn(char_lstm_cell_fw, char_lstm_cell_bw, char_input_tensor, sequence_length=_word_lengths, dtype=tf.float32, time_major=False) _, ((_, char_output_fw), (_, char_output_bw)) = char_lstm_outputs char_output_tensor = tf.concat([char_output_fw, char_output_bw], axis=-1) char_output_tensor = tf.reshape(char_output_tensor, shape=[s[0], s[1], 2 * self.config["char_recurrent_size"]]) char_output_vector_size = 2 * self.config["char_recurrent_size"] if self.config["lmcost_char_gamma"] > 0.0: self.loss += self.config["lmcost_char_gamma"] * self.construct_lmcost(char_output_tensor, char_output_tensor, self.sentence_lengths, self.word_ids, "separate", "lmcost_char_separate") if self.config["lmcost_joint_char_gamma"] > 0.0: self.loss += self.config["lmcost_joint_char_gamma"] * self.construct_lmcost(char_output_tensor, char_output_tensor, self.sentence_lengths, self.word_ids, "joint", "lmcost_char_joint") if self.config["char_hidden_layer_size"] > 0: char_hidden_layer_size = self.config["word_embedding_size"] if self.config["char_integration_method"] == "attention" else self.config["char_hidden_layer_size"] char_output_tensor = tf.layers.dense(char_output_tensor, char_hidden_layer_size, activation=tf.tanh, kernel_initializer=self.initializer) # The input tensor is word_embedding_size + num_additional_features, # because we added the additional features tensor. Therefore add a slice of # zeroes to the character tensor so the dimensions match. if self.config['add_features_to_input']: char_output_tensor = tf.pad(char_output_tensor, tf.constant([[0, 0], [0, 0], [0, self.config['num_additional_features']]])) char_output_vector_size = char_hidden_layer_size if self.config["char_integration_method"] == "concat": input_tensor = tf.concat([input_tensor, char_output_tensor], axis=-1) elif self.config["char_integration_method"] == "attention": assert(char_output_vector_size == self.config["word_embedding_size"]), "This method requires the char representation to have the same size as word embeddings" static_input_tensor = tf.stop_gradient(input_tensor) is_unk = tf.equal(self.word_ids, self.word2id[self.UNK]) char_output_tensor_normalised = tf.nn.l2_normalize(char_output_tensor, 2) static_input_tensor_normalised = tf.nn.l2_normalize(static_input_tensor, 2) cosine_cost = 1.0 - tf.reduce_sum(tf.multiply(char_output_tensor_normalised, static_input_tensor_normalised), axis=2) is_padding = tf.logical_not(tf.sequence_mask(self.sentence_lengths, maxlen=tf.shape(self.word_ids)[1])) cosine_cost_unk = tf.where(tf.logical_or(is_unk, is_padding), x=tf.zeros_like(cosine_cost), y=cosine_cost) self.loss += self.config["char_attention_cosine_cost"] * tf.reduce_sum(cosine_cost_unk) attention_evidence_tensor = tf.concat([input_tensor, char_output_tensor], axis=2) # Attention layers should be word_embedding_size + num_additional_features # because we added our arbitrary feature vector to the embedding d = self.config['word_embedding_size'] + (self.config['num_additional_features'] if self.config['add_features_to_input'] else 0) attention_output = tf.layers.dense(attention_evidence_tensor, d, activation=tf.tanh, kernel_initializer=self.initializer) attention_output = tf.layers.dense(attention_output, d, activation=tf.sigmoid, kernel_initializer=self.initializer) input_tensor = tf.multiply(input_tensor, attention_output) + tf.multiply(char_output_tensor, (1.0 - attention_output)) elif self.config["char_integration_method"] == "none": input_tensor = input_tensor else: raise ValueError("Unknown char integration method") if self.config["dropout_input"] > 0: dropout_input = self.config["dropout_input"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32)) input_tensor = tf.nn.dropout(input_tensor, dropout_input, name="dropout_word") word_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config["word_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) word_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config["word_recurrent_size"], use_peepholes=self.config["lstm_use_peepholes"], state_is_tuple=True, initializer=self.initializer, reuse=False) with tf.control_dependencies([tf.assert_equal(tf.shape(self.word_ids)[1], tf.reduce_max(self.sentence_lengths), message="Sentence dimensions don't match")]): (lstm_outputs_fw, lstm_outputs_bw), _ = tf.nn.bidirectional_dynamic_rnn(word_lstm_cell_fw, word_lstm_cell_bw, input_tensor, sequence_length=self.sentence_lengths, dtype=tf.float32, time_major=False) if self.config["dropout_word_lstm"]: dropout_word_lstm = self.config["dropout_word_lstm"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32)) lstm_outputs_fw = tf.nn.dropout(lstm_outputs_fw, dropout_word_lstm) lstm_outputs_bw = tf.nn.dropout(lstm_outputs_bw, dropout_word_lstm) if self.config["lmcost_lstm_gamma"] > 0.0: self.loss += self.config["lmcost_lstm_gamma"] * self.construct_lmcost(lstm_outputs_fw, lstm_outputs_bw, self.sentence_lengths, self.word_ids, "separate", "lmcost_lstm_separate") if self.config["lmcost_joint_lstm_gamma"] > 0.0: self.loss += self.config["lmcost_joint_lstm_gamma"] * self.construct_lmcost(lstm_outputs_fw, lstm_outputs_bw, self.sentence_lengths, self.word_ids, "joint", "lmcost_lstm_joint") processed_tensor = tf.concat([lstm_outputs_fw, lstm_outputs_bw], 2) if self.config['add_features_to_output']: with tf.variable_scope("features"): processed_tensor = tf.concat([processed_tensor, additional_features], 2) if self.config.get('lstm_over_features'): if self.config.get("additional_features_projection", 0) > 0: processed_tensor = tf.layers.dense(processed_tensor, self.config["additional_features_projection"], activation=tf.tanh, kernel_initializer=self.initializer) feature_lstm_cell_fw = tf.nn.rnn_cell.LSTMCell(self.config["word_recurrent_size"], state_is_tuple=True, initializer=self.initializer, reuse=False) feature_lstm_cell_bw = tf.nn.rnn_cell.LSTMCell(self.config["word_recurrent_size"], state_is_tuple=True, initializer=self.initializer, reuse=False) (feature_lstm_outputs_fw, feature_lstm_outputs_bw), _ = tf.nn.bidirectional_dynamic_rnn(feature_lstm_cell_fw, feature_lstm_cell_bw, processed_tensor, sequence_length=self.sentence_lengths, dtype=tf.float32, time_major=False) dropout_feature_lstm = self.config["dropout_word_lstm"] * tf.cast(self.is_training, tf.float32) + (1.0 - tf.cast(self.is_training, tf.float32)) feature_lstm_outputs_fw = tf.nn.dropout(feature_lstm_outputs_fw, dropout_feature_lstm) feature_lstm_outputs_bw = tf.nn.dropout(feature_lstm_outputs_bw, dropout_feature_lstm) processed_tensor = tf.concat([feature_lstm_outputs_fw, feature_lstm_outputs_bw], 2) if self.config["hidden_layer_size"] > 0: processed_tensor = tf.layers.dense(processed_tensor, self.config["hidden_layer_size"], activation=tf.tanh, kernel_initializer=self.initializer) self.scores = tf.layers.dense(processed_tensor, len(self.label2id), activation=None, kernel_initializer=self.initializer, name="output_ff") if self.config["crf_on_top"] == True: crf_num_tags = self.scores.get_shape()[2].value self.crf_transition_params = tf.get_variable("output_crf_transitions", [crf_num_tags, crf_num_tags], initializer=self.initializer) log_likelihood, self.crf_transition_params = tf.contrib.crf.crf_log_likelihood(self.scores, self.label_ids, self.sentence_lengths, transition_params=self.crf_transition_params) self.loss += self.config["main_cost"] * tf.reduce_sum(-log_likelihood) else: self.probabilities = tf.nn.softmax(self.scores) self.predictions = tf.argmax(self.probabilities, 2) loss_ = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.scores, labels=self.label_ids) mask = tf.sequence_mask(self.sentence_lengths, maxlen=tf.shape(self.word_ids)[1]) loss_ = tf.boolean_mask(loss_, mask) self.loss += self.config["main_cost"] * tf.reduce_sum(loss_) self.train_op = self.construct_optimizer(self.config["opt_strategy"], self.loss, self.learningrate, self.config["clip"])
def _reshape_helper(self, x, event_shape_in, event_shape_out): """Reshape only the event_shape of an input `Tensor`.""" event_ndims_in_ = _static_ndims_from_shape(event_shape_in) event_ndims_in = _ndims_from_shape(event_shape_in) x_ndims_, x_ndims = x.shape.ndims, tf.rank(x) assertions = [] # Ensure x.event_shape is compatible with event_shape_in. if (event_ndims_in_ is not None and x_ndims_ is not None and x.shape.with_rank_at_least(event_ndims_in_) [x_ndims_ - event_ndims_in_:].is_fully_defined()): x_event_shape_, x_event_shape = [ # pylint: disable=unbalanced-tuple-unpacking np.int32(x.shape[x_ndims_ - event_ndims_in_:]) ] * 2 else: x_event_shape_, x_event_shape = (None, tf.shape(x)[x_ndims - event_ndims_in:]) event_shape_in_ = tensor_util.constant_value(event_shape_in) if x_event_shape_ is not None and event_shape_in_ is not None: # Compare the shape dimensions that are fully specified in the # input (i.e., for which event_shape_in is not -1). If x_event_shape # matches along all of these dimensions, it is compatible with # the desired input shape and any further mismatches (i.e., # imcompatibility with the desired *output* shape) will be # caught inside of tf.reshape() below. x_event_shape_specified_ = x_event_shape_[event_shape_in_ >= 0] event_shape_in_specified_ = event_shape_in_[event_shape_in_ >= 0] if not np.equal(x_event_shape_specified_, event_shape_in_specified_).all(): raise ValueError( "Input `event_shape` does not match `event_shape_in` ({} vs {})." .format(x_event_shape_, event_shape_in_)) elif self.validate_args: # Similarly to the static case, we compare the shape dimensions # that are fully specified in the input. We extract these # dimensions using boolean_mask(), which requires that the mask # have known ndims. We can assume that shape Tensors always have # ndims==1 (this assumption is verified inside of # _maybe_check_valid_shape), so the reshape operation is just a # no-op that formally encodes this fact to make boolean_mask() # happy. event_shape_mask = tf.reshape(event_shape_in >= 0, [-1]) x_event_shape_specified = tf.boolean_mask(x_event_shape, event_shape_mask) event_shape_in_specified = tf.boolean_mask(event_shape_in, event_shape_mask) assertions.append( tf.assert_equal( x_event_shape_specified, event_shape_in_specified, message= "Input `event_shape` does not match `event_shape_in`.")) if assertions: x = control_flow_ops.with_dependencies(assertions, x) # get the parts of shape(x) that will not change sample_and_batch_shape = tf.shape(x) ndims = (x.shape.ndims if x.shape.ndims is not None else tf.rank(x)) sample_and_batch_shape = sample_and_batch_shape[:( ndims - tf.abs(event_ndims_in))] if (event_ndims_in_ is not None and x_ndims_ is not None and event_ndims_in_ == x_ndims_): # Hack to allow forward/inverse_event_shape to do shape # inference by calling this helper method with a dummy Tensor of # shape event_shape_in. In this special case, # sample_and_batch_shape will be empty so we can preserve static # shape information by avoiding the concat operation below # (which would be a no-op). new_shape = event_shape_out else: new_shape = tf.concat([sample_and_batch_shape, event_shape_out], axis=0) return tf.reshape(x, new_shape)