def sign_magnitude_positive_definite( raw, off_diagonal_scale=0., overall_scale=0.): """Constructs a positive definite matrix from an unconstrained input matrix. We want to keep the whole matrix on a log scale, but also allow off-diagonal elements to be negative, so the sign of off-diagonal elements is modeled separately from their magnitude (using the lower and upper triangles respectively). Specifically: for i < j, we have: output_cholesky[i, j] = raw[j, i] / (abs(raw[j, i]) + 1) * exp((off_diagonal_scale + overall_scale + raw[i, j]) / 2) output_cholesky[i, i] = exp((raw[i, i] + overall_scale) / 2) output = output_cholesky^T * output_cholesky where raw, off_diagonal_scale, and overall_scale are un-constrained real-valued variables. The resulting values are stable around zero due to the exponential (and the softsign keeps the function smooth). Args: raw: A [..., M, M] Tensor. off_diagonal_scale: A scalar or [...] shaped Tensor controlling the relative scale of off-diagonal values in the output matrix. overall_scale: A scalar or [...] shaped Tensor controlling the overall scale of the output matrix. Returns: The `output` matrix described above, a [..., M, M] positive definite matrix. """ raw = ops.convert_to_tensor(raw) diagonal = array_ops.matrix_diag_part(raw) def _right_pad_with_ones(tensor, target_rank): # Allow broadcasting even if overall_scale and off_diagonal_scale have batch # dimensions tensor = ops.convert_to_tensor(tensor, dtype=raw.dtype.base_dtype) return array_ops.reshape(tensor, array_ops.concat( [ array_ops.shape(tensor), array_ops.ones( [target_rank - array_ops.rank(tensor)], dtype=target_rank.dtype) ], axis=0)) # We divide the log values by 2 to compensate for the squaring that happens # when transforming Cholesky factors into positive definite matrices. sign_magnitude = (gen_math_ops.exp( (raw + _right_pad_with_ones(off_diagonal_scale, array_ops.rank(raw)) + _right_pad_with_ones(overall_scale, array_ops.rank(raw))) / 2.) * nn.softsign(array_ops.matrix_transpose(raw))) sign_magnitude.set_shape(raw.get_shape()) cholesky_factor = array_ops.matrix_set_diag( input=array_ops.matrix_band_part(sign_magnitude, 0, -1), diagonal=gen_math_ops.exp((diagonal + _right_pad_with_ones( overall_scale, array_ops.rank(diagonal))) / 2.)) return math_ops.matmul(cholesky_factor, cholesky_factor, transpose_a=True)
def GraphFn(self, x1, x2): x = x1 q = math_ops.abs(x) q = q + 1.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = array_ops.squeeze(q, axis=-2) q = math_ops.abs(q) q = q + 2.2 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = array_ops.squeeze(q, axis=3) q = math_ops.abs(q) q = q + 3.0 a = gen_math_ops.reciprocal(q) # this chain of operations has a batch size of 5, which is different from # the batch size for the other operations. x = constant_op.constant(np.random.randn(5, 8, 12), dtype=x.dtype) q = math_ops.abs(x) q = q + 2.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 2.1 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 4.0 b = gen_math_ops.reciprocal(q) # TODO(jie): this one will break, broadcasting on batch. x = x2 q = math_ops.abs(x) q = q + 5.0 q = gen_math_ops.exp(q) q = array_ops.squeeze(q, axis=[-1, -2, 3]) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 5.1 q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12]) q = array_ops.squeeze(q, axis=[5, 2, 3]) q = gen_math_ops.sqrt(q) q = math_ops.abs(q) q = q + 5.2 q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 5.3 c = gen_math_ops.reciprocal(q) q = a * b q = q / c return array_ops.squeeze(q, name="output_0")
def __call__(self, inputs, state, scope=None): ''' Inputs have shape [batch_size, output_size] State has shape [batch_size, state_size] Splitting of the input into meaningful parameters has to be performed here. Since there's no easy way to bring one-hot encodings of characters into the RNNCell *and* align them with batch numbers, this cell simply returns the RNN-ified parameters of the window cell. ''' dtype = tf.float32 with vs.variable_scope(scope or 'window_cell'): resized_input = tf.matmul(inputs, self.weight) + self.bias #print("windowcell inputs info:", inputs) [alphas, betas, kappas] = array_ops.split(resized_input, [ self._state_size, ] * self.NUM_FREE_PARAMS, axis=1) kappa_hats = gen_math_ops.exp(kappas) + state alpha_hats = gen_math_ops.exp(alphas) beta_hats = gen_math_ops.exp(betas) #beta_hats = 8*gen_math_ops.sigmoid(betas) + 0.1 u = tf.range(tf.cast(self.num_chars + 1, dtype), dtype=dtype) # Integer values of 'u' in phi kappa_hat_list = tf.split(kappa_hats, [ 1, ] * self.num_windows, axis=1) beta_hat_list = tf.split(beta_hats, [ 1, ] * self.num_windows, axis=1) alpha_hat_list = tf.split(alpha_hats, [ 1, ] * self.num_windows, axis=1) phi = 0 for i in range(self.num_windows): kappa_hat_tiled = tf.tile(kappa_hat_list[i], [1, self.num_chars + 1]) beta_hat_tiled = tf.tile(beta_hat_list[i], [1, self.num_chars + 1]) alpha_hat_tiled = tf.tile(alpha_hat_list[i], [1, self.num_chars + 1]) z = -1 * beta_hat_tiled * tf.square(kappa_hat_tiled - u) phi += alpha_hat_tiled * tf.exp(z) print("information about phi:", phi) return phi, kappa_hats
def call(self, inputs): first_threshold = 1 / (1 + gen_math_ops.exp(-5e1 * (inputs - self.threshold1))) second_threshold = 1 / ( 1 + gen_math_ops.exp(-5e1 * (inputs * first_threshold - self.threshold2))) third_threshold = 1 / (1 + gen_math_ops.exp( -5e1 * (inputs * second_threshold - self.threshold3))) fourth_threshold = 1 / ( 1 + gen_math_ops.exp(-5e1 * (inputs * third_threshold - self.threshold4))) output = 1 + first_threshold + second_threshold + third_threshold + fourth_threshold return output
def fucking_deep_gaze_logsumexp(input_tensor,axis=None, keepdims=False, name=None): """ Adaptd from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/math_ops.py. It is the same as the classic logsumexp instead you substact log(N) where N in the number of tensor over which compute the logsumexp (if you have 10 readout nets, N=10). I don't know why they do this. """ keepdims = False if keepdims is None else keepdims input_tensor = ops.convert_to_tensor(input_tensor) with ops.name_scope(name, "ReduceLogSumExp", [input_tensor]) as name: raw_max = tf.reduce_max(input_tensor, axis=axis, keep_dims=True) my_max = array_ops.stop_gradient( array_ops.where( gen_math_ops.is_finite(raw_max), raw_max, array_ops.zeros_like(raw_max))) result = gen_math_ops.log( #reduce_sum( # normal logsumexp tf.reduce_mean( # fuckimg modif from deep_gaze for the output only gen_math_ops.exp(tf.subtract(input_tensor, my_max)), axis, keep_dims=keepdims)) if not keepdims: my_max = array_ops.reshape(my_max, array_ops.shape(result)) result = gen_math_ops.add(result, my_max) return result
def _create_anomaly_ops(self, times, values, prediction_ops_dict): anomaly_log_param = variable_scope.get_variable( "anomaly_log_param", shape=[], dtype=self.dtype, initializer=init_ops.zeros_initializer()) # Anomaly param is the variance for Gaussian and scale for Cauchy # distribution. prediction_ops_dict["anomaly_params"] = gen_math_ops.exp(anomaly_log_param)
def call(self, input_window_features, output_window_features): """Compute predictions from input and output windows.""" _, state_h, state_c = self._encoder(input_window_features) encoder_states = [state_h, state_c] decoder_output = self._decoder(output_window_features, initial_state=encoder_states) predicted_mean = self._mean_transform(decoder_output) predicted_covariance = gen_math_ops.exp( self._covariance_transform(decoder_output)) return {"mean": predicted_mean, "covariance": predicted_covariance}
def call(self, inputs): inputs = ops.convert_to_tensor(inputs, dtype=self.dtype) if inputs.shape.rank != 2: raise ValueError('`WalkerModel` only takes "rank 2" inputs.') sig = 1 / (1 + gen_math_ops.exp(self.kernel[0] * inputs[:, 1])) gamma = sig * self.kernel[1] C = self.kernel[2] / ((1 - inputs[:, 1])**(self.kernel[3] * (1 - gamma))) output = C * (inputs[:, 0]**self.kernel[3]) output = array_ops.reshape(output, (array_ops.shape(output)[0], 1)) return output
def softmax_2d(input_tensor, axis=None, keepdims=False, name=None): """ Adaptd from https://gist.github.com/raingo/a5808fe356b8da031837 """ keepdims = False if keepdims is None else keepdims input_tensor = ops.convert_to_tensor(input_tensor) with ops.name_scope(name, "softmax_2d", [input_tensor]) as name: raw_max = tf.reduce_max(input_tensor, axis=axis, keep_dims=True) my_max = array_ops.stop_gradient( array_ops.where( gen_math_ops.is_finite(raw_max), raw_max, array_ops.zeros_like(raw_max))) target_exp = gen_math_ops.exp(tf.subtract(input_tensor, my_max)) normalize = tf.reduce_sum(target_exp, axis, keep_dims=True) softmax = target_exp / normalize return softmax
def call(self, input_window_features, output_window_features): """Compute predictions from input and output windows. Args: input_window_features: A floating point Tensor with shape [batch size, input window size, input features]. The batch dimension may not have static shape information, but the window size and number of input features are known at graph construction time and recorded in the static shape information for the `input_window_features` `Tensor`. Note that `input_window_size` may be zero. output_window_features: A floating point Tensor with shape [batch size, output window size, output features]. As with `input_window_features`, the last two dimensions have static shape information. If there are no output features, the size of the last dimension will be zero. Returns: A dictionary of predictions with keys "mean" and "covariance" (only diagonal covariances are currently supported). Each has shape [batch size, output window size, num_features], where num_features is the same as the constructor argument. """ if input_window_features.shape[1].value == 0: # TODO(allenl): Make reshape()'s static shape information work on # zero-size Tensors? Currently this special case is required because # otherwise the Dense layers get unknown last dimensions. activation = self._output_flatten(output_window_features) elif output_window_features.shape[2].value == 0: activation = self._input_flatten(input_window_features) else: activation = array_ops.concat( [self._input_flatten(input_window_features), self._output_flatten(output_window_features)], axis=1) if self._hidden_layers: activation = self._hidden_layers(activation) predicted_mean = array_ops.reshape( self._mean_transform(activation), self._prediction_shape) predicted_covariance = array_ops.reshape( gen_math_ops.exp(self._covariance_transform(activation)), self._prediction_shape) return {"mean": predicted_mean, "covariance": predicted_covariance}
def call(self, input_window_features, output_window_features): """Compute predictions from input and output windows. Args: input_window_features: A floating point Tensor with shape [batch size, input window size, input features]. The batch dimension may not have static shape information, but the window size and number of input features are known at graph construction time and recorded in the static shape information for the `input_window_features` `Tensor`. Note that `input_window_size` may be zero. output_window_features: A floating point Tensor with shape [batch size, output window size, output features]. As with `input_window_features`, the last two dimensions have static shape information. If there are no output features, the size of the last dimension will be zero. Returns: A dictionary of predictions with keys "mean" and "covariance" (only diagonal covariances are currently supported). Each has shape [batch size, output window size, num_features], where num_features is the same as the constructor argument. """ if input_window_features.shape.dims[1].value == 0: # TODO(allenl): Make reshape()'s static shape information work on # zero-size Tensors? Currently this special case is required because # otherwise the Dense layers get unknown last dimensions. activation = self._output_flatten(output_window_features) elif output_window_features.shape.dims[2].value == 0: activation = self._input_flatten(input_window_features) else: activation = array_ops.concat( [self._input_flatten(input_window_features), self._output_flatten(output_window_features)], axis=1) if self._hidden_layers: activation = self._hidden_layers(activation) predicted_mean = array_ops.reshape( self._mean_transform(activation), self._prediction_shape) predicted_covariance = array_ops.reshape( gen_math_ops.exp(self._covariance_transform(activation)), self._prediction_shape) return {"mean": predicted_mean, "covariance": predicted_covariance}
def _predicted_covariance_op(self, activations, num_values): activation, activation_size = activations[-1] if self.loss == ARModel.NORMAL_LIKELIHOOD_LOSS: log_sigma_square = model_utils.fully_connected( activation, activation_size, self.output_window_size * num_values, name="log_sigma_square", activation=None) predicted_covariance = gen_math_ops.exp(log_sigma_square) predicted_covariance = array_ops.reshape( predicted_covariance, [-1, self.output_window_size, num_values]) else: shape = array_ops.stack([ array_ops.shape(activation)[0], constant_op.constant(self.output_window_size), constant_op.constant(num_values) ]) predicted_covariance = array_ops.ones(shape=shape, dtype=activation.dtype) return predicted_covariance
def call(self, input_window_features, output_window_features): """Compute predictions from input and output windows.""" # Convert to time major input_window_features = array_ops.transpose(input_window_features, [1, 0, 2]) output_window_features = array_ops.transpose(output_window_features, [1, 0, 2]) _, encoder_state = self._encoder( input_window_features, dtype=self.dtype) decoder_output, _ = self._decoder( output_window_features, dtype=self.dtype, initial_state=encoder_state) # Switch back to batch major decoder_output = array_ops.transpose(decoder_output, [1, 0, 2]) predicted_mean = self._mean_transform(decoder_output) predicted_covariance = gen_math_ops.exp( self._covariance_transform(decoder_output)) return {"mean": predicted_mean, "covariance": predicted_covariance}
def loss_op(self, targets, prediction_ops): """Create loss_op.""" prediction = prediction_ops["mean"] covariance = prediction_ops["covariance"] # Normal data log probability. sigma = math_ops.sqrt(gen_math_ops.maximum(covariance, 1e-5)) log_prob1 = math_utils.normal_log_prob(targets, sigma, prediction) log_prob1 += math_ops.log(1 - self._anomaly_prior_probability) # Anomaly log probability. log_prob2 = self._anomaly_log_prob(targets, prediction_ops) log_prob2 += math_ops.log(self._anomaly_prior_probability) # We need to compute log(exp(log_prob1) + exp(log_prob2). For numerical # stability, we rewrite the expression as below. p1 = gen_math_ops.minimum(log_prob1, log_prob2) p2 = gen_math_ops.maximum(log_prob1, log_prob2) mixed_log_prob = p2 + math_ops.log(1 + gen_math_ops.exp(p1 - p2)) loss_op = -math_ops.reduce_sum(mixed_log_prob) loss_op /= math_ops.cast( math_ops.reduce_prod(array_ops.shape(targets)), self.dtype) return loss_op
def normal_pdf(x, mean, sigma): result = -0.5 * gen_math_ops.square((x - mean) / sigma) result = gen_math_ops.exp(result) result /= (2 * pi)**0.5 result /= sigma return result
def GetParams(self): """Test for unary operations in TF-TRT.""" dtype = dtypes.float32 input_name = "input" input_dims = [12, 5, 8, 1, 1, 12] input2_name = "input_2" input2_dims = [12, 5, 8, 1, 12, 1, 1] g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) q = math_ops.abs(x) q = q + 1.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = array_ops.squeeze(q, axis=-2) q = math_ops.abs(q) q = q + 2.2 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = array_ops.squeeze(q, axis=3) q = math_ops.abs(q) q = q + 3.0 a = gen_math_ops.reciprocal(q) x = constant_op.constant(np.random.randn(5, 8, 12), dtype=dtype) q = math_ops.abs(x) q = q + 2.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 2.1 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 4.0 b = gen_math_ops.reciprocal(q) # TODO(jie): this one will break, broadcasting on batch. x = array_ops.placeholder( dtype=dtype, shape=input2_dims, name=input2_name) q = math_ops.abs(x) q = q + 5.0 q = gen_math_ops.exp(q) q = array_ops.squeeze(q, axis=[-1, -2, 3]) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 5.1 q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12]) q = array_ops.squeeze(q, axis=[5, 2, 3]) q = gen_math_ops.sqrt(q) q = math_ops.abs(q) q = q + 5.2 q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 5.3 c = gen_math_ops.reciprocal(q) q = a * b q = q / c array_ops.squeeze(q, name=self.output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name, input2_name], input_dims=[input_dims, input2_dims], num_expected_engines=5, expected_output_dims=(12, 5, 8, 12), allclose_atol=1.e-03, allclose_rtol=1.e-03)
def GetParams(self): """Test for unary operations in TF-TRT.""" dtype = dtypes.float32 input_name = "input" input_dims = [12, 5, 8, 1, 1, 12] output_name = "output" input2_name = "input_2" input2_dims = [12, 5, 8, 1, 12, 1, 1] g = ops.Graph() with g.as_default(): x = array_ops.placeholder(dtype=dtype, shape=input_dims, name=input_name) q = math_ops.abs(x) q = q + 1.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = array_ops.squeeze(q, axis=-2) q = math_ops.abs(q) q = q + 2.2 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = array_ops.squeeze(q, axis=3) q = math_ops.abs(q) q = q + 3.0 a = gen_math_ops.reciprocal(q) x = constant_op.constant(np.random.randn(5, 8, 12), dtype=dtype) q = math_ops.abs(x) q = q + 2.0 q = gen_math_ops.exp(q) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 2.1 q = gen_math_ops.sqrt(q) q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 4.0 b = gen_math_ops.reciprocal(q) # TODO(jie): this one will break, broadcasting on batch. x = array_ops.placeholder( dtype=dtype, shape=input2_dims, name=input2_name) q = math_ops.abs(x) q = q + 5.0 q = gen_math_ops.exp(q) q = array_ops.squeeze(q, axis=[-1, -2, 3]) q = gen_math_ops.log(q) q = math_ops.abs(q) q = q + 5.1 q = gen_array_ops.reshape(q, [12, 5, 1, 1, 8, 1, 12]) q = array_ops.squeeze(q, axis=[5, 2, 3]) q = gen_math_ops.sqrt(q) q = math_ops.abs(q) q = q + 5.2 q = gen_math_ops.rsqrt(q) q = math_ops.negative(q) q = math_ops.abs(q) q = q + 5.3 c = gen_math_ops.reciprocal(q) q = a * b q = q / c array_ops.squeeze(q, name=output_name) return trt_test.TfTrtIntegrationTestParams( gdef=g.as_graph_def(), input_names=[input_name, input2_name], input_dims=[input_dims, input2_dims], output_names=[output_name], expected_output_dims=[(12, 5, 8, 12)])