Exemple #1
 def GraphFn(self, x):
     dtype = x.dtype
     # scale
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r1 = x / a
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r2 = a / x
     a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype)
     r3 = a + x
     a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype)
     r4 = x * a
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r5 = x - a
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r6 = a - x
     a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
     r7 = x - a
     a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
     r8 = a - x
     a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
     r9 = gen_math_ops.maximum(x, a)
     a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
     r10 = gen_math_ops.minimum(a, x)
     a = constant_op.constant(np.random.randn(3), dtype=dtype)
     r11 = x * a
     a = constant_op.constant(np.random.randn(1), dtype=dtype)
     r12 = a * x
     concat1 = array_ops.concat([r1, r2, r3, r4, r5, r6], axis=-1)
     concat2 = array_ops.concat([r7, r8, r9, r10, r11, r12], axis=3)
     x = array_ops.concat([concat1, concat2], axis=-1)
     return gen_array_ops.reshape(x, [2, -1], name="output_0")
Exemple #2
def gen_non_linearity(A, non_linearity):
    Returns required activation for a tensor based on the inputs
    if non_linearity == "tanh":
        return math_ops.tanh(A)
    elif non_linearity == "sigmoid":
        return math_ops.sigmoid(A)
    elif non_linearity == "relu":
        return gen_math_ops.maximum(A, 0.0)
    elif non_linearity == "quantTanh":
        return gen_math_ops.maximum(gen_math_ops.minimum(A, 1.0), -1.0)
    elif non_linearity == "quantSigm":
        A = (A + 1.0) / 2.0
        return gen_math_ops.maximum(gen_math_ops.minimum(A, 1.0), 0.0)
        return math_ops.tanh(A)
    def initialize_graph(self, features, update_statistics=True):
        """Create any ops needed to provide input statistics.

    Should be called before statistics are requested.

      features: A dictionary, the output of a `TimeSeriesInputFn` (with keys
          TrainEvalFeatures.TIMES and TrainEvalFeatures.VALUES).
      update_statistics: Whether `features` should be used to update adaptive
          statistics. Typically True for training and false for evaluation.
      An InputStatistics object composed of Variables, which will be updated
      based on mini-batches of data if requested.
        if (TrainEvalFeatures.TIMES in features
                and TrainEvalFeatures.VALUES in features):
            times = features[TrainEvalFeatures.TIMES]
            values = features[TrainEvalFeatures.VALUES]
            # times and values may not be available, for example during prediction. We
            # still need to retrieve our variables so that they can be read from, even
            # if we're not going to update them.
            times = None
            values = None
        # Create/retrieve variables representing input statistics, initialized
        # without data to avoid deadlocking if variables are initialized before
        # queue runners are started.
        with variable_scope.variable_scope("input_statistics",
            statistics = self._create_variable_statistics_object()
        with variable_scope.variable_scope("input_statistics_auxiliary",
            # Secondary statistics, necessary for the incremental computation of the
            # primary statistics (e.g. counts and sums for computing a mean
            # incrementally).
            auxiliary_variables = self._AdaptiveInputAuxiliaryStatistics(
                num_features=self._num_features, dtype=self._dtype)
        if update_statistics and times is not None and values is not None:
            # If we have times and values from mini-batch input, create update ops to
            # take the new data into account.
            assign_op = self._update_statistics_from_mini_batch(
                statistics, auxiliary_variables, times, values)
            with ops.control_dependencies([assign_op]):
                stat_variables = nest.pack_sequence_as(statistics, [
                    for tensor in nest.flatten(statistics)
                # Since start time updates have a race condition, ensure that the
                # reported start time is at least as low as the lowest time in this
                # mini-batch. The start time should converge on the correct value
                # eventually even with the race condition, but for example state space
                # models have an assertion which could fail without this
                # post-processing.
                return stat_variables._replace(start_time=gen_math_ops.minimum(
                    stat_variables.start_time, math_ops.reduce_min(times)))
            return statistics
 def _while_body(iteration_number, input_times, input_values,
                 input_exogenous_regressors, mean_ta, covariance_ta):
   """Predict self.output_window_size values."""
   prediction_ops = self.prediction_ops(
       input_times, input_values, input_exogenous_regressors)
   predicted_mean = prediction_ops["mean"]
   predicted_covariance = prediction_ops["covariance"]
   offset = self.output_window_size * gen_math_ops.minimum(
       iteration_number + 1, prediction_iterations - 1)
   if self.input_window_size > 0:
     if self.output_window_size < self.input_window_size:
       new_input_values = array_ops.concat(
           [input_values[:, self.output_window_size:, :], predicted_mean], 1)
       new_input_exogenous_regressors = array_ops.concat(
           [input_exogenous_regressors[:, -self.input_window_size:, :],
                :, offset:offset + self.output_window_size, :]],
       new_input_times = array_ops.concat([
           input_times[:, -self.input_window_size:],
           predict_times[:, offset:offset + self.output_window_size]
       ], 1)
       new_input_values = predicted_mean[:, -self.input_window_size:, :]
       new_input_exogenous_regressors = exogenous_regressors[
           offset - self.input_window_size:offset + self.output_window_size,
       new_input_times = predict_times[
           offset - self.input_window_size:offset + self.output_window_size]
     new_input_values = input_values
     new_input_exogenous_regressors = exogenous_regressors[
         :, offset:offset + self.output_window_size, :]
     new_input_times = predict_times[:,
                                     offset:offset + self.output_window_size]
   new_mean_ta = mean_ta.write(iteration_number, predicted_mean)
   if isinstance(covariance_ta, tensor_array_ops.TensorArray):
     new_covariance_ta = covariance_ta.write(iteration_number,
     new_covariance_ta = covariance_ta
   return (iteration_number + 1,
Exemple #5
def gen_non_linearity(A, non_linearity):
    if non_linearity == "tanh":
        return math_ops.tanh(A)
    elif non_linearity == "sigmoid":
        return math_ops.sigmoid(A)
    elif non_linearity == "relu":
        return gen_math_ops.maximum(A, 0.0)
    elif non_linearity == "quantTanh":
        return gen_math_ops.maximum(gen_math_ops.minimum(A, 1.0), -1.0)
    elif non_linearity == "quantSigm":
        A = (A + 1.0) / 2.0
        return gen_math_ops.maximum(gen_math_ops.minimum(A, 1.0), 0.0)
    elif non_linearity == "quantSigm4":
        A = (A + 2.0) / 4.0
        return gen_math_ops.maximum(gen_math_ops.minimum(A, 1.0), 0.0)
        # non_linearity is a user specified function
        if not callable(non_linearity):
            raise ValueError("non_linearity is either a callable or a value " +
                             + "['tanh', 'sigmoid', 'relu', 'quantTanh', " +
        return non_linearity(A)
Exemple #6
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = []

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) )

        with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
            t = math_ops.cast(self.iterations, K.floatx())
        lr_t = gen_math_ops.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t))

        lower_bound = self.lr_boost * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = self.lr_boost * (1. + 1. / (self.gamma * t))
        if self.sgdcorr:
            m_rate = 1. - self.beta_1 / (self.gamma * t + 1.)
            m_rate = 1. - self.beta_1

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
            vhats = [K.zeros(1) for _ in params]
        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            m_t = (self.beta_1 * m) + m_rate * g
            v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g)
            if self.amsgrad:
                vhat_t = math_ops.maximum(vhat, v_t)
                lr_v = gen_math_ops.reciprocal(gen_math_ops.sqrt(vhat_t) + self.epsilon)
                self.updates.append(state_ops.assign(vhat, vhat_t))
                lr_v = gen_math_ops.reciprocal(gen_math_ops.sqrt(v_t) + self.epsilon)

            lr_bound = gen_math_ops.minimum(gen_math_ops.maximum(lr_t * lr_v, lower_bound), upper_bound)
            p_t = p - lr * lr_bound * m_t

            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Exemple #7
    def __sample_w_rej(self, n, seed):
        c = math_ops.sqrt((4 * (self.scale**2)) + (self.__mf - 1)**2)
        b_true = (-2 * self.scale + c) / (self.__mf - 1)

        # using Taylor approximation with a smooth swift from 10 < scale < 11
        # to avoid numerical errors for large scale
        b_app = (self.__mf - 1) / (4 * self.scale)
        s = gen_math_ops.minimum(gen_math_ops.maximum(0., self.scale - 10), 1.)
        b = b_app * s + b_true * (1 - s)

        a = (self.__mf - 1 + 2 * self.scale + c) / 4
        d = (4 * a * b) / (1 + b) - (self.__mf - 1) * math_ops.log(self.__mf -

        self.__b, (self.__e, self.__w) = b, self.__while_loop(b, a, d, n, seed)
        return self.__w
 def GetParams(self):
     """Testing Concatenation in TF-TRT conversion."""
     dtype = dtypes.float32
     input_name = "input"
     input_dims = [2, 3, 3, 1]
     output_name = "output"
     g = ops.Graph()
     with g.as_default():
         x = array_ops.placeholder(dtype=dtype,
         # scale
         a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
         r1 = x / a
         a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
         r2 = a / x
         a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype)
         r3 = a + x
         a = constant_op.constant(np.random.randn(1, 3, 1), dtype=dtype)
         r4 = x * a
         a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
         r5 = x - a
         a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
         r6 = a - x
         a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
         r7 = x - a
         a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
         r8 = a - x
         a = constant_op.constant(np.random.randn(3, 1, 1), dtype=dtype)
         r9 = gen_math_ops.maximum(x, a)
         a = constant_op.constant(np.random.randn(3, 1), dtype=dtype)
         r10 = gen_math_ops.minimum(a, x)
         a = constant_op.constant(np.random.randn(3), dtype=dtype)
         r11 = x * a
         a = constant_op.constant(np.random.randn(1), dtype=dtype)
         r12 = a * x
         concat1 = array_ops.concat([r1, r2, r3, r4, r5, r6], axis=-1)
         concat2 = array_ops.concat([r7, r8, r9, r10, r11, r12], axis=3)
         x = array_ops.concat([concat1, concat2], axis=-1)
         gen_array_ops.reshape(x, [2, -1], name=output_name)
     return trt_test.TfTrtIntegrationTestParams(gdef=g.as_graph_def(),
                                                    (2, 126)
 def loss_op(self, targets, prediction_ops):
   """Create loss_op."""
   prediction = prediction_ops["mean"]
   covariance = prediction_ops["covariance"]
   # Normal data log probability.
   sigma = math_ops.sqrt(gen_math_ops.maximum(covariance, 1e-5))
   log_prob1 = math_utils.normal_log_prob(targets, sigma, prediction)
   log_prob1 += math_ops.log(1 - self._anomaly_prior_probability)
   # Anomaly log probability.
   log_prob2 = self._anomaly_log_prob(targets, prediction_ops)
   log_prob2 += math_ops.log(self._anomaly_prior_probability)
   # We need to compute log(exp(log_prob1) + exp(log_prob2). For numerical
   # stability, we rewrite the expression as below.
   p1 = gen_math_ops.minimum(log_prob1, log_prob2)
   p2 = gen_math_ops.maximum(log_prob1, log_prob2)
   mixed_log_prob = p2 + math_ops.log(1 + gen_math_ops.exp(p1 - p2))
   loss_op = -math_ops.reduce_sum(mixed_log_prob)
   loss_op /= math_ops.cast(
       math_ops.reduce_prod(array_ops.shape(targets)), self.dtype)
   return loss_op
Exemple #10
def bincount(arr,
    """Counts the number of occurrences of each value in an integer array.

  If `minlength` and `maxlength` are not given, returns a vector with length
  `tf.reduce_max(arr) + 1` if `arr` is non-empty, and length 0 otherwise.
  If `weights` are non-None, then index `i` of the output stores the sum of the
  value in `weights` at each index where the corresponding value in `arr` is

  values = tf.constant([1,1,2,3,2,4,4,5])
  tf.math.bincount(values) #[0 2 2 1 2 1]
  Vector length = Maximum element in vector `values` is 5. Adding 1, which is 6
                  will be the vector length.

  Each bin value in the output indicates number of occurrences of the particular
  index. Here, index 1 in output has a value 2. This indicates value 1 occurs
  two times in `values`.

  values = tf.constant([1,1,2,3,2,4,4,5])
  weights = tf.constant([1,5,0,1,0,5,4,5])
  tf.math.bincount(values, weights=weights) #[0 6 0 1 9 5]
  Bin will be incremented by the corresponding weight instead of 1.
  Here, index 1 in output has a value 6. This is the summation of weights
  corresponding to the value in `values`.

  **Bin-counting on a certain axis**

  This example takes a 2 dimensional input and returns a `Tensor` with
  bincounting on each sample.

  >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32)
  >>> tf.math.bincount(data, axis=-1)
  <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
    array([[1, 1, 1, 1],
           [2, 1, 1, 0]], dtype=int32)>

  **Bin-counting with binary_output**

  This example gives binary output instead of counting the occurrence.

  >>> data = np.array([[1, 2, 3, 0], [0, 0, 1, 2]], dtype=np.int32)
  >>> tf.math.bincount(data, axis=-1, binary_output=True)
  <tf.Tensor: shape=(2, 4), dtype=int32, numpy=
    array([[1, 1, 1, 1],
           [1, 1, 1, 0]], dtype=int32)>

    arr: A Tensor, RaggedTensor, or SparseTensor whose values should be counted.
      These tensors must have a rank of 2 if `axis=-1`.
    weights: If non-None, must be the same shape as arr. For each value in
      `arr`, the bin will be incremented by the corresponding weight instead of
    minlength: If given, ensures the output has length at least `minlength`,
      padding with zeros at the end if necessary.
    maxlength: If given, skips values in `arr` that are equal or greater than
      `maxlength`, ensuring that the output has length at most `maxlength`.
    dtype: If `weights` is None, determines the type of the output bins.
    name: A name scope for the associated operations (optional).
    axis: The axis to slice over. Axes at and below `axis` will be flattened
      before bin counting. Currently, only `0`, and `-1` are supported. If None,
      all axes will be flattened (identical to passing `0`).
    binary_output: If True, this op will output 1 instead of the number of times
      a token appears (equivalent to one_hot + reduce_any instead of one_hot +
      reduce_add). Defaults to False.

    A vector with the same dtype as `weights` or the given `dtype`. The bin

    `InvalidArgumentError` if negative values are provided as an input.

    name = "bincount" if name is None else name
    with ops.name_scope(name):
        # Somehow forward compatible needs to be False.
        if not binary_output and axis is None:
            arr = ops.convert_to_tensor(arr, name="arr", dtype=dtypes.int32)
            array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0
            output_size = math_ops.cast(array_is_nonempty, dtypes.int32) * (
                math_ops.reduce_max(arr) + 1)
            if minlength is not None:
                minlength = ops.convert_to_tensor(minlength,
                output_size = gen_math_ops.maximum(minlength, output_size)
            if maxlength is not None:
                maxlength = ops.convert_to_tensor(maxlength,
                output_size = gen_math_ops.minimum(maxlength, output_size)
            if weights is not None:
                weights = ops.convert_to_tensor(weights, name="weights")
                return gen_math_ops.unsorted_segment_sum(
                    weights, arr, output_size)
            weights = constant_op.constant([], dtype)
            arr = array_ops.reshape(arr, [-1])
            return gen_math_ops.bincount(arr, output_size, weights)

        if not isinstance(arr, sparse_tensor.SparseTensor):
            arr = ragged_tensor.convert_to_tensor_or_ragged_tensor(arr,
        if weights is not None:
            if not isinstance(weights, sparse_tensor.SparseTensor):
                weights = ragged_tensor.convert_to_tensor_or_ragged_tensor(
                    weights, name="weights")

        if weights is not None and binary_output:
            raise ValueError(
                "Arguments `binary_output` and `weights` are mutually "
                "exclusive. Please specify only one.")

        if not arr.dtype.is_integer:
            arr = math_ops.cast(arr, dtypes.int32)
        if axis is None:
            axis = 0

        if axis not in [0, -1]:
            raise ValueError(
                f"Unsupported value for argument axis={axis}. Only 0 and"
                " -1 are currently supported.")

        if isinstance(arr, ragged_tensor.RaggedTensor):
            array_is_nonempty = math_ops.reduce_prod(
                array_ops.shape(arr.values)) > 0
            array_is_nonempty = math_ops.reduce_prod(array_ops.shape(arr)) > 0
        if isinstance(arr, sparse_tensor.SparseTensor):
            output_size = math_ops.cast(array_is_nonempty, arr.dtype) * (
                math_ops.reduce_max(arr.values) + 1)
            output_size = math_ops.cast(
                array_is_nonempty, arr.dtype) * (math_ops.reduce_max(arr) + 1)
        if minlength is not None:
            minlength = ops.convert_to_tensor(minlength,
            output_size = gen_math_ops.maximum(minlength, output_size)
        if maxlength is not None:
            maxlength = ops.convert_to_tensor(maxlength,
            output_size = gen_math_ops.minimum(maxlength, output_size)

        if axis == 0:
            if isinstance(arr, sparse_tensor.SparseTensor):
                if weights is not None:
                    weights = validate_sparse_weights(arr, weights, dtype)
                arr = arr.values
            elif isinstance(arr, ragged_tensor.RaggedTensor):
                if weights is not None:
                    weights = validate_ragged_weights(arr, weights, dtype)
                arr = arr.values
                if weights is not None:
                    weights = array_ops.reshape(weights, [-1])
                arr = array_ops.reshape(arr, [-1])

        if isinstance(arr, sparse_tensor.SparseTensor):
            weights = validate_sparse_weights(arr, weights, dtype)
            return gen_math_ops.sparse_bincount(indices=arr.indices,
        elif isinstance(arr, ragged_tensor.RaggedTensor):
            weights = validate_ragged_weights(arr, weights, dtype)
            return gen_math_ops.ragged_bincount(splits=arr.row_splits,
            weights = validate_dense_weights(arr, weights, dtype)
            return gen_math_ops.dense_bincount(input=arr,
    def _update_statistics_from_mini_batch(self, statistics,
                                           auxiliary_variables, times, values):
        """Given mini-batch input, update `statistics` and `auxiliary_variables`."""
        values = math_ops.cast(values, self._dtype)
        # The density (measured in times per observation) that we see in each part
        # of the mini-batch.
        batch_inter_observation_duration = (
                math_ops.reduce_max(times, axis=1) -
                math_ops.reduce_min(times, axis=1), self._dtype) /
            math_ops.cast(array_ops.shape(times)[1] - 1, self._dtype))
        # Co-locate updates with their variables to minimize race conditions when
        # updating statistics.
        with ops.colocate_with(auxiliary_variables.max_time_seen):
            # There is a race condition if this value is being updated from multiple
            # workers. However, it should eventually reach the correct value if the
            # last chunk is presented enough times.
            max_time_seen_assign = state_ops.assign(
        with ops.colocate_with(auxiliary_variables.chunk_count):
            chunk_count_assign = state_ops.assign_add(
                array_ops.shape(times, out_type=dtypes.int64)[0])
        with ops.colocate_with(
            inter_observation_duration_assign = state_ops.assign_add(
        with ops.colocate_with(auxiliary_variables.example_count):
            example_count_assign = state_ops.assign_add(
                array_ops.size(times, out_type=dtypes.int64))
        # Note: These mean/variance updates assume that all points are equally
        # likely, which is not true if _chunks_ are sampled uniformly from the space
        # of all possible contiguous chunks, since points at the start and end of
        # the series are then members of fewer chunks. For series which are much
        # longer than the chunk size (the usual/expected case), this effect becomes
        # irrelevant.
        with ops.colocate_with(auxiliary_variables.overall_feature_sum):
            overall_feature_sum_assign = state_ops.assign_add(
                math_ops.reduce_sum(values, axis=[0, 1]))
        with ops.colocate_with(
            overall_feature_sum_of_squares_assign = state_ops.assign_add(
                math_ops.reduce_sum(values**2, axis=[0, 1]))
        per_chunk_aux_updates = control_flow_ops.group(
            max_time_seen_assign, chunk_count_assign,
            inter_observation_duration_assign, example_count_assign,
            overall_feature_sum_assign, overall_feature_sum_of_squares_assign)
        with ops.control_dependencies([per_chunk_aux_updates]):
            example_count_float = math_ops.cast(
                auxiliary_variables.example_count, self._dtype)
            new_feature_mean = (auxiliary_variables.overall_feature_sum /
            overall_feature_mean_update = state_ops.assign(
                statistics.overall_feature_moments.mean, new_feature_mean)
            overall_feature_var_update = state_ops.assign(
                # De-biased n / (n - 1) variance correction
                example_count_float / (example_count_float - 1.) *
                (auxiliary_variables.overall_feature_sum_of_squares /
                 example_count_float - new_feature_mean**2))
            # TODO(b/35675805): Remove this cast
            min_time_batch = math_ops.cast(math_ops.argmin(times[:, 0]),

            def series_start_updates():
                # If this is the lowest-time chunk that we have seen so far, update
                # series start moments to reflect that. Note that these statistics are
                # "best effort", as there are race conditions in the update (however,
                # they should eventually converge if the start of the series is
                # presented enough times).
                mean, variance = nn.moments(values[
                    min_time_batch, :self._starting_variance_window_size],
                return control_flow_ops.group(

            with ops.colocate_with(statistics.start_time):
                series_start_update = control_flow_ops.cond(
                    # Update moments whenever we even match the lowest time seen so far,
                    # to ensure that series start statistics are eventually updated to
                    # their correct values, despite race conditions (i.e. eventually
                    # statistics.start_time will reflect the global lowest time, and
                    # given that we will eventually update the series start moments to
                    # their correct values).
                    math_ops.less_equal(times[min_time_batch, 0],
                with ops.control_dependencies([series_start_update]):
                    # There is a race condition if this update is performed in parallel on
                    # multiple workers. Since models may be sensitive to being presented
                    # with times before the putative start time, the value of this
                    # variable is post-processed above to guarantee that each worker is
                    # presented with a start time which is at least as low as the lowest
                    # time in its current mini-batch.
                    start_time_update = state_ops.assign(
            inter_observation_duration_estimate = (
                auxiliary_variables.inter_observation_duration_sum /
                math_ops.cast(auxiliary_variables.chunk_count, self._dtype))
            # Estimate the total number of observations as:
            #   (end time - start time + 1) * average intra-chunk time density
            total_observation_count_update = state_ops.assign(
                            auxiliary_variables.max_time_seen -
                            statistics.start_time + 1, self._dtype) /
                        inter_observation_duration_estimate), dtypes.int64))
            per_chunk_stat_updates = control_flow_ops.group(
                overall_feature_mean_update, overall_feature_var_update,
                series_start_update, start_time_update,
        return per_chunk_stat_updates
    def head_pieces(input_,
        N, M = mem_size
        S = shift_range
        center = int(S / 2.)
        shift_bias = np.zeros(S)
        shift_bias[center + 1] = 2.5

        heads = array_ops.split(input_, num_heads)
        write_heads = []
        read_heads = []
        for head in heads:
            splits = [M + S + 3, 3 * M + S + 3]
            read_head_raw, write_head_raw = array_ops.split(head,

            write_pieces = array_ops.split(write_head_raw,
                                           [M, S, 1, 1, 1, M, M],
            read_pieces = array_ops.split(read_head_raw, [M, S, 1, 1, 1],

            key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w = write_pieces

            shift_w = nn_ops.softmax(shift_w + shift_bias)
            gamma_w = gen_math_ops.minimum(nn_ops.softplus(gamma_w) + 1, 21.)
            beta_w = nn_ops.softplus(beta_w)
            g_w = math_ops.sigmoid(g_w)
            add_w = math_ops.sigmoid(add_w)
            erase_w = math_ops.sigmoid(erase_w)

            key_r, shift_r, gamma_r, beta_r, g_r = read_pieces

            shift_r = nn_ops.softmax(shift_r + shift_bias)
            gamma_r = gen_math_ops.minimum(nn_ops.softplus(gamma_r) + 1, 21.)
            beta_r = nn_ops.softplus(beta_r)
            g_r = math_ops.sigmoid(g_r)

            if style == 'tuple':
                    (key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w))
                read_heads.append((key_r, shift_r, gamma_r, beta_r, g_r))
                    'key' : key_w,
                    'shift' : shift_w,
                    'gamma' : gamma_w,
                    'beta' : beta_w,
                    'g' : g_w,
                    'add' : add_w,
                    'erase' : erase_w,

                    'key' : key_r,
                    'shift' : shift_r,
                    'gamma' : gamma_r,
                    'beta' : beta_r,
                    'g' : g_r,

        return write_heads, read_heads
  def head_pieces(head, mem_size, num_shifts=3, axis=1):
    There are several activation functions applied to the output of the
    LSTM or FF controller, this method performs the necessary operations
    to produce the shift vector, interpolation, sharpening, key, and beta
    for the read/write operations. Also produces the add and erase vectors
    for modifying the memory matrix. This method is used outside of the
    class as well, which is why it's static.

      head - Tensor of the raw output of the controller network.
      mem_size - Tuple of integers stating the size of the memory (NxM).
      num_shifts - Integer that is used to determine the magnitude and
        direction of possible shifts for the read and write heads.
      axis - The axis of 'head' where splitting should occur. This is used
        for instances when 'head' is a rank 3 or rank 2 tensor. The default
        value is 1.
        (This should be eliminated to perform splitting on the last axis
        of the tensor... can probably be changed to '-1' without problems)
    num_slots, num_bits = mem_size
    _ = num_slots
    #center = int(num_shifts/2.)
    shift_bias = np.zeros(num_shifts)
    #shift_bias[center] = 2.5 # Temporarily commented out for regression
                              # testing with NP implementation.
    #print(write_head_raw.get_shape(), read_head_raw.get_shape())

    # Number of elements in the read/write heads, respectively.
    splits = [num_bits+num_shifts+3, 3*num_bits+num_shifts+3]
    read_head_raw, write_head_raw = array_ops.split(head, splits,

    write_splits = [num_bits, num_shifts, 1, 1, 1, num_bits, num_bits]
    read_splits = [num_bits, num_shifts, 1, 1, 1]
    write_pieces = array_ops.split(write_head_raw, write_splits, axis=axis)
    read_pieces = array_ops.split(read_head_raw, read_splits, axis=axis)

    key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w = write_pieces

    # Multiple operations are applied to the pieces of the write head,
    # see the original paper or this project's writeup for the breakdown.
    shift_w = nn_ops.softmax(shift_w + shift_bias)
    gamma_w = gen_math_ops.minimum(nn_ops.softplus(gamma_w) + 1, 21.)
    beta_w = nn_ops.softplus(beta_w)
    g_w = math_ops.sigmoid(g_w)
    add_w = math_ops.sigmoid(add_w)
    erase_w = math_ops.sigmoid(erase_w)

    key_r, shift_r, gamma_r, beta_r, g_r = read_pieces

    # Operations applied to the pieces of the read head.
    shift_r = nn_ops.softmax(shift_r + shift_bias)
    gamma_r = gen_math_ops.minimum(nn_ops.softplus(gamma_r) + 1, 21.)
    beta_r = nn_ops.softplus(beta_r)
    g_r = math_ops.sigmoid(g_r)

    write_head = (key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w)
    read_head = (key_r, shift_r, gamma_r, beta_r, g_r)

    return write_head, read_head
Exemple #14
    def get_updates(self, loss, params):
        grads = self.get_gradients(loss, params)
        self.updates = [state_ops.assign_add(self.iterations, 1)]

        lr = self.lr
        if self.initial_decay > 0:
            lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations,K.dtype(self.decay))) )

        t = math_ops.cast(self.iterations, K.floatx()) + 1

        lower_bound = self.lr_boost * (1. - 1. / (self.gamma * t + 1.))
        upper_bound = self.lr_boost * (1. + 1. / (self.gamma * t))
        if self.sgdcorr:
            m_rate = 1. - self.beta_1 / (self.gamma * t + 1.)
            m_rate = 1. - self.beta_1

        # Due to the recommendations in [2], i.e. warming momentum schedule
        momentum_cache_t = self.beta_1 * (
            1. - 0.5 *
            (math_ops.pow(K.cast_to_floatx(0.96), t * self.schedule_decay)))
        momentum_cache_t_1 = self.beta_1 * (
            1. - 0.5 *
            (math_ops.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay)))
        m_schedule_new = self.m_schedule * momentum_cache_t
        m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1
        self.updates.append((self.m_schedule, m_schedule_new))

        ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
        if self.amsgrad:
            vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params]
            vhats = [K.zeros(1) for _ in params]

        self.weights = [self.iterations] + ms + vs + vhats

        for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats):
            # the following equations given in [1]
            g_prime = g / (1. - m_schedule_new)
            m_t = self.beta_1 * m + m_rate * g
            m_t_prime = m_t / (1. - m_schedule_next)
            v_t = self.beta_2 * v + (1. - self.beta_2) * math_ops.square(g)
            if self.amsgrad:
                vhat_t = math_ops.maximum(vhat, v_t)
                self.updates.append(state_ops.assign(vhat, vhat_t))
                v_t_prime = vhat_t / (1. - math_ops.pow(self.beta_2, t))
                v_t_prime = v_t / (1. - math_ops.pow(self.beta_2, t))
            m_t_bar = (m_rate / (1.-self.beta_1)) * (1. - momentum_cache_t) * g_prime + momentum_cache_t_1 * m_t_prime
            beta_1_reduce = 1. - math_ops.pow(self.beta_1, t)
            lr_v = gen_math_ops.reciprocal((gen_math_ops.sqrt(v_t_prime) + self.epsilon) * beta_1_reduce)

            self.updates.append(state_ops.assign(m, m_t))
            self.updates.append(state_ops.assign(v, v_t))

            lr_bound = gen_math_ops.minimum(gen_math_ops.maximum(lr_v, lower_bound), upper_bound)
            p_t = p - lr * lr_bound * beta_1_reduce * m_t_bar
            new_p = p_t

            # Apply constraints.
            if getattr(p, 'constraint', None) is not None:
                new_p = p.constraint(new_p)

            self.updates.append(state_ops.assign(p, new_p))
        return self.updates
Exemple #15
    def head_pieces(head, mem_size, shift_range, axis=1, style='tuple'):
        There are several activation functions applied to the output of the
        LSTM or FF controller, this method performs the necessary operations
        to produce the shift vector, interpolation, sharpening, key, and beta
        for the read/write operations. Also produces the add and erase vectors
        for modifying the memory matrix. This method is used outside of the
        class as well, which is why it's static.

          head - Tensor of the raw output of the controller network.
          mem_size - Tuple of integers stating the size of the memory (NxM).
          shift_range - Integer that is used to determine the magnitude and
            direction of possible shifts for the read and write heads.
          axis - The axis of 'head' where splitting should occur. This is used
            for instances when 'head' is a rank 3 or rank 2 tensor. The default
            value is 1.
            (This should be eliminated to perform splitting on the last axis
            of the tensor... can probably be changed to '-1' without problems)
          style - How the head data should be reported, as a tuple or as a
            dictionary. The tuple formulation is used for the internal
            calculations of the NTMCell class; the dictionary form is used
            for troubleshooting.
            Possble values: "tuple" or "dict"
        N, M = mem_size
        S = shift_range
        _ = N
        center = int(S / 2.)
        shift_bias = np.zeros(S)
        shift_bias[center + 1] = 2.5
        #print(write_head_raw.get_shape(), read_head_raw.get_shape())

        # Number of elements in the read/write heads, respectively.
        splits = [M + S + 3, 3 * M + S + 3]
        read_head_raw, write_head_raw = array_ops.split(head,

        write_pieces = array_ops.split(write_head_raw, [M, S, 1, 1, 1, M, M],
        read_pieces = array_ops.split(read_head_raw, [M, S, 1, 1, 1],

        key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w = write_pieces

        # Multiple operations are applied to the pieces of the write head,
        # see the original paper or this project's writeup for the breakdown.
        shift_w = nn_ops.softmax(shift_w + shift_bias)
        gamma_w = gen_math_ops.minimum(nn_ops.softplus(gamma_w) + 1, 21.)
        beta_w = nn_ops.softplus(beta_w)
        g_w = math_ops.sigmoid(g_w)
        add_w = math_ops.sigmoid(add_w)
        erase_w = math_ops.sigmoid(erase_w)

        key_r, shift_r, gamma_r, beta_r, g_r = read_pieces

        # Operations applied to the pieces of the read head.
        shift_r = nn_ops.softmax(shift_r + shift_bias)
        gamma_r = gen_math_ops.minimum(nn_ops.softplus(gamma_r) + 1, 21.)
        beta_r = nn_ops.softplus(beta_r)
        g_r = math_ops.sigmoid(g_r)

        if style == 'tuple':
            write_head = (key_w, shift_w, gamma_w, beta_w, g_w, add_w, erase_w)

            read_head = (key_r, shift_r, gamma_r, beta_r, g_r)
            write_head = \
                'key' : key_w,
                'shift' : shift_w,
                'gamma' : gamma_w,
                'beta' : beta_w,
                'g' : g_w,
                'add' : add_w,
                'erase' : erase_w,

            read_head = \
                'key' : key_r,
                'shift' : shift_r,
                'gamma' : gamma_r,
                'beta' : beta_r,
                'g' : g_r,

        return write_head, read_head