Beispiel #1
0
    def loss(self, y_true, y_pred):
        """ categorical crossentropy loss """

        if self.crop_indices is not None:
            y_true = utils.batch_gather(y_true, self.crop_indices)
            y_pred = utils.batch_gather(y_pred, self.crop_indices)

        if self.use_float16:
            y_true = K.cast(y_true, 'float16')
            y_pred = K.cast(y_pred, 'float16')

        # scale and clip probabilities
        # this should not be necessary for softmax output.
        y_pred /= K.sum(y_pred, axis=-1, keepdims=True)
        y_pred = K.clip(y_pred, K.epsilon(), 1)

        # compute log probability
        log_post = K.log(y_pred)  # likelihood

        # loss
        loss = - y_true * log_post

        # weighted loss
        if self.weights is not None:
            loss *= self.weights

        if self.vox_weights is not None:
            loss *= self.vox_weights

        # take the total loss
        # loss = K.batch_flatten(loss)
        mloss = K.mean(K.sum(K.cast(loss, 'float32'), -1))
        tf.verify_tensor_all_finite(mloss, 'Loss not finite')
        return mloss
Beispiel #2
0
def kl_multivariate_normal(loc_one, scale_one, loc_two=0.0, scale_two=1.0):
    """Calculate the KL of multivariate normal distributions with
    diagonal covariances.

    Parameters
    ----------
    loc_one : tf.Tensor
        A 0-D tensor, 1-D tensor of length n, or 2-D tensor of shape M
        x n where each row represents the mean of a n-dimensional
        Gaussian.
    scale_one : tf.Tensor
        A tensor of same shape as ``loc_one``, representing the
        standard deviation.
    loc_two : tf.Tensor, optional
        A tensor of same shape as ``loc_one``, representing the
        mean of another Gaussian.
    scale_two : tf.Tensor, optional
        A tensor of same shape as ``loc_one``, representing the
        standard deviation of another Gaussian.

    Returns
    -------
    tf.Tensor
        For 0-D or 1-D tensor inputs, outputs the 0-D tensor
        ``KL( N(z; loc_one, scale_one) || N(z; loc_two, scale_two) )``
        For 2-D tensor inputs, outputs the 1-D tensor
        ``[KL( N(z; loc_one[m,:], scale_one[m,:]) || N(z; loc_two[m,:], scale_two[m,:]) )]_{m=1}^M``

    Raises
    ------
    InvalidArgumentError
        If the location variables have Inf or NaN values, or if the scale
        variables are not positive.
    """
    dependencies = [tf.verify_tensor_all_finite(loc_one, msg=''),
                    tf.verify_tensor_all_finite(loc_two, msg=''),
                    tf.assert_positive(scale_one),
                    tf.assert_positive(scale_two)]
    loc_one = control_flow_ops.with_dependencies(dependencies, loc_one)
    scale_one = control_flow_ops.with_dependencies(dependencies, scale_one)
    loc_one = tf.cast(loc_one, tf.float32)
    scale_one = tf.cast(scale_one, tf.float32)

    if loc_two == 0.0 and scale_two == 1.0:
        # With default arguments, we can avoid some intermediate computation.
        out = tf.square(scale_one) + tf.square(loc_one) - \
              1.0 - 2.0 * tf.log(scale_one)
    else:
        loc_two = control_flow_ops.with_dependencies(dependencies, loc_two)
        scale_two = control_flow_ops.with_dependencies(dependencies, scale_two)
        loc_two = tf.cast(loc_two, tf.float32)
        scale_two = tf.cast(scale_two, tf.float32)
        out = tf.square(scale_one/scale_two) + \
              tf.square((loc_two - loc_one)/scale_two) - \
              1.0 + 2.0 * tf.log(scale_two) - 2.0 * tf.log(scale_one)

    if len(out.get_shape()) <= 1: # scalar or vector
        return 0.5 * tf.reduce_sum(out)
    else: # matrix
        return 0.5 * tf.reduce_sum(out, 1)
def create_generative(parameters):
    print('Creating the neural network model.')
    
    tf.reset_default_graph()
    # tf Graph input
    x = tf.placeholder(tf.float32, shape=(1, parameters['n_input']), name='input')
    x = tf.verify_tensor_all_finite(x, "X not finite!")
    y = tf.placeholder(tf.float32, shape=(1, parameters['n_output']), name='expected_output')
    y = tf.verify_tensor_all_finite(y, "Y not finite!")
    x = tf.Print(x, [x], "X: ")
    y = tf.Print(y, [y], "Y: ")
    lstm_state_size = np.sum(parameters['lstm_layers']) * 2
    # Note: Batch size is the first dimension in istate.
    istate = tf.placeholder(tf.float32, shape=(None, lstm_state_size), name='internal_state')
    lr = tf.placeholder(tf.float32, name='learning_rate')

    # The target to track itself and its peers, each with x, y ## and velocity x and y.
    input_size = (parameters['n_peers'] + 1) * 2
    inputToRnn = parameters['input_layer']
    if (parameters['input_layer'] == None):
        inputToRnn = parameters['n_input']

    cells = [rnn_cell.LSTMCell(l, parameters['lstm_layers'][i-1] if (i > 0) else inputToRnn,
                               num_proj=parameters['lstm_layers'][i],
                               cell_clip=parameters['lstm_clip'],
                               use_peepholes=True) for i,l in enumerate(parameters['lstm_layers'])] 
    # TODO: GRUCell support here.
    # cells = [rnn_cell.GRUCell(l, parameters['lstm_layers'][i-1] if (i > 0) else inputToRnn) for i,l in enumerate(parameters['lstm_layers'])]
    model = {
        'input_weights': tf.Variable(tf.random_normal(
            [input_size, parameters['input_layer']]), name='input_weights'),
        'input_bias': tf.Variable(tf.random_normal([parameters['input_layer']]), name='input_bias'),
        'output_weights': tf.Variable(tf.random_normal([parameters['lstm_layers'][-1],
                                                        # 6 = 2 sigma, 2 mean, weight, rho
                                                        parameters['n_mixtures'] * 6]),
                                      name='output_weights'),
        # We need to put at least the standard deviation output biases to about 5 to prevent zeros and infinities.
        # , mean = 5.0, stddev = 3.0
        'output_bias': tf.Variable(tf.random_normal([parameters['n_mixtures'] * 6]),
                                   name='output_bias'),
        'rnn_cell': rnn_cell.MultiRNNCell(cells),
        'lr': lr,
        'x': x,
        'y': y,
        'keep_prob': tf.placeholder(tf.float32),
        'istate': istate
    }

    # The next variables need to be remapped, because we don't have RNN context anymore:
    # RNN/MultiRNNCell/Cell0/LSTMCell/ -> MultiRNNCell/Cell0/LSTMCell/
    # B, W_F_diag, W_O_diag, W_I_diag, W_0
    with tf.variable_scope("RNN"):
        pred = RNN_generative(parameters, x, model, istate)
    
    model['pred'] = pred[0]
    model['last_state'] = pred[1]

    return model
Beispiel #4
0
def hessian(y, xs):
    """Calculate Hessian of y with respect to each x in xs.

    Parameters
    ----------
    y : tf.Tensor
        Tensor to calculate Hessian of.
    xs : list of tf.Variable
        List of TensorFlow variables to calculate with respect to.
        The variables can have different shapes.

    Returns
    -------
    tf.Tensor
        A 2-D tensor where each row is
        .. math:: \partial_{xs} ( [ \partial_{xs} y ]_j ).

    Raises
    ------
    InvalidArgumentError
        If the inputs have Inf or NaN values.
    """
    dependencies = [tf.verify_tensor_all_finite(y, msg='')]
    dependencies.extend([tf.verify_tensor_all_finite(x, msg='') for x in xs])

    with tf.control_dependencies(dependencies):
        # Calculate flattened vector grad_{xs} y.
        grads = tf.gradients(y, xs)
        grads = [tf.reshape(grad, [-1]) for grad in grads]
        grads = tf.concat(0, grads)
        # Loop over each element in the vector.
        mat = []
        d = grads.get_shape()[0]
        if not isinstance(d, int):
            d = grads.eval().shape[0]

        for j in range(d):
            # Calculate grad_{xs} ( [ grad_{xs} y ]_j ).
            gradjgrads = tf.gradients(grads[j], xs)
            # Flatten into vector.
            hi = []
            for l in range(len(xs)):
                hij = gradjgrads[l]
                # return 0 if gradient doesn't exist; TensorFlow returns None
                if hij is None:
                    hij = tf.zeros(xs[l].get_shape(), dtype=tf.float32)

                hij = tf.reshape(hij, [-1])
                hi.append(hij)

            hi = tf.concat(0, hi)
            mat.append(hi)

        # Form matrix where each row is grad_{xs} ( [ grad_{xs} y ]_j ).
        return tf.pack(mat)
 def _validate(self):
   vops = [tf.assert_positive(self._scale),
           tf.assert_positive(self._high - self._low),
           tf.verify_tensor_all_finite(self._high,
                                       "Upper bound not finite"),
           tf.verify_tensor_all_finite(self._low,
                                       "Lower bound not finite"),
           tf.verify_tensor_all_finite(self._loc,
                                       "Loc not finite"),
           tf.verify_tensor_all_finite(self._scale,
                                       "Scale not finite"),
          ]
   return tf.group(*vops, name="ValidationOps")
Beispiel #6
0
def kl_multivariate_normal(loc_one, scale_one, loc_two=0.0, scale_two=1.0):
    """Calculate the KL of multivariate normal distributions with
    diagonal covariances.

    Parameters
    ----------
    loc_one : tf.Tensor
        n-dimensional vector, or M x n-dimensional matrix where each
        row represents the mean of a n-dimensional Gaussian
    scale_one : tf.Tensor
        n-dimensional vector, or M x n-dimensional matrix where each
        row represents the standard deviation of a n-dimensional Gaussian
    loc_two : tf.Tensor, optional
        n-dimensional vector, or M x n-dimensional matrix where each
        row represents the mean of a n-dimensional Gaussian
    scale_two : tf.Tensor, optional
        n-dimensional vector, or M x n-dimensional matrix where each
        row represents the standard deviation of a n-dimensional Gaussian

    Returns
    -------
    tf.Tensor
        for scalar or vector inputs, outputs the scalar
        ``KL( N(z; loc_one, scale_one) || N(z; loc_two, scale_two) )``
        for matrix inputs, outputs the vector
        ``[KL( N(z; loc_one[m,:], scale_one[m,:]) || N(z; loc_two[m,:], scale_two[m,:]) )]_{m=1}^M``

    Raises
    ------
    InvalidArgumentError
        If the location variables have Inf or NaN values, or if the scale
        variables are not positive.
    """
    dependencies = [tf.verify_tensor_all_finite(loc_one, msg=''),
                  tf.verify_tensor_all_finite(loc_two, msg=''),
                  tf.assert_positive(scale_one),
                  tf.assert_positive(scale_two)]
    loc_one = control_flow_ops.with_dependencies(dependencies, loc_one)
    loc_two = control_flow_ops.with_dependencies(dependencies, loc_two)
    scale_one = control_flow_ops.with_dependencies(dependencies, scale_one)
    scale_two = control_flow_ops.with_dependencies(dependencies, scale_two)

    if loc_two == 0.0 and scale_two == 1.0:
        return 0.5 * tf.reduce_sum(
            tf.square(scale_one) + tf.square(loc_one) - \
            1.0 - 2.0 * tf.log(scale_one))
    else:
        return 0.5 * tf.reduce_sum(
            tf.square(scale_one/scale_two) + \
            tf.square((loc_two - loc_one)/scale_two) - \
            1.0 + 2.0 * tf.log(scale_two) - 2.0 * tf.log(scale_one), 1)
Beispiel #7
0
def cumprod(xs):
    """Cumulative product of a tensor along its outer dimension.

    https://github.com/tensorflow/tensorflow/issues/813

    Parameters
    ----------
    xs : tf.Tensor
        A 1-D or higher tensor.

    Returns
    -------
    tf.Tensor
        A tensor with `cumprod` applied along its outer dimension.

    Raises
    ------
    InvalidArgumentError
        If the input has Inf or NaN values.
    """
    dependencies = [tf.verify_tensor_all_finite(xs, msg='')]
    xs = control_flow_ops.with_dependencies(dependencies, xs)
    xs = tf.cast(xs, dtype=tf.float32)

    values = tf.unpack(xs)
    out = []
    prev = tf.ones_like(values[0])
    for val in values:
        s = prev * val
        out.append(s)
        prev = s

    result = tf.pack(out)
    return result
Beispiel #8
0
    def mean_dice(self, y_true, y_pred):
        """ weighted mean dice across all patches and labels """

        # compute dice, which will now be [batch_size, nb_labels]
        dice_metric = self.dice(y_true, y_pred)

        # weigh the entries in the dice matrix:
        if self.weights is not None:
            dice_metric *= self.weights
        if self.vox_weights is not None:
            dice_metric *= self.vox_weights

        # return one minus mean dice as loss
        mean_dice_metric = K.mean(dice_metric)
        tf.verify_tensor_all_finite(mean_dice_metric, 'metric not finite')
        return mean_dice_metric
Beispiel #9
0
 def testVerifyTensorAllFiniteSucceeds(self):
     x_shape = [5, 4]
     x = np.random.random_sample(x_shape).astype(np.float32)
     with self.test_session():
         t = tf.constant(x, shape=x_shape, dtype=tf.float32)
         t_verified = tf.verify_tensor_all_finite(t, "Input is not a number.")
         self.assertAllClose(x, t_verified.eval())
Beispiel #10
0
    def __init__(self, rnn_states, type_embedder, name='DelexicalizedDynamicPredicateEmbedder'):
        """Construct DelexicalizedDynamicPredicateEmbedder.

        Args:
            rnn_states (SequenceBatch): of shape (num_contexts, seq_length, rnn_state_dim)
            type_embedder (TokenEmbedder)
            name (str)
        """
        self._type_embedder = type_embedder

        with tf.name_scope(name):
            # column indices of rnn_states (indexes time)
            self._col_indices = FeedSequenceBatch()  # (num_predicates, max_predicate_mentions)

            # row indices of rnn_states (indexes utterance)
            self._row_indices = tf.placeholder(dtype=tf.int32, shape=[None])  # (num_predicates,)
            row_indices_expanded = expand_dims_for_broadcast(self._row_indices, self._col_indices.values)

            # (num_predicates, max_predicate_mentions, rnn_state_dim)
            rnn_states_selected = SequenceBatch(
                gather_2d(rnn_states.values, row_indices_expanded, self._col_indices.values),
                self._col_indices.mask)

            # (num_predicates, rnn_state_dim)
            rnn_embeds = reduce_mean(rnn_states_selected, allow_empty=True)
            rnn_embeds = tf.verify_tensor_all_finite(rnn_embeds, "RNN-state-based embeddings")

            self._type_seq_embedder = MeanSequenceEmbedder(type_embedder.embeds, name='TypeEmbedder')
            self._embeds = tf.concat(1, [rnn_embeds, self._type_seq_embedder.embeds])
Beispiel #11
0
def log_sum_exp(input_tensor, reduction_indices=None, keep_dims=False):
    """Compute the ``log_sum_exp`` of elements in a tensor, taking
    the sum across axes given by ``reduction_indices``.

    Parameters
    ----------
    input_tensor : tf.Tensor
        The tensor to reduce. Should have numeric type.
    reduction_indices : int or list of int, optional
        The dimensions to reduce. If `None` (the default), reduces all
        dimensions.
    keep_dims : bool, optional
        If true, retains reduced dimensions with length 1.

    Returns
    -------
    tf.Tensor
        The reduced tensor.

    Raises
    ------
    InvalidArgumentError
        If the input has Inf or NaN values.
    """
    dependencies = [tf.verify_tensor_all_finite(input_tensor, msg='')]
    input_tensor = control_flow_ops.with_dependencies(dependencies, input_tensor);
    input_tensor = tf.cast(input_tensor, dtype=tf.float32)

    x_max = tf.reduce_max(input_tensor, reduction_indices, keep_dims=True)
    return tf.squeeze(x_max) + tf.log(tf.reduce_sum(
        tf.exp(input_tensor - x_max), reduction_indices, keep_dims))
Beispiel #12
0
def log_sum_exp(x):
    """Compute the ``log_sum_exp`` of the elements in x.

    Parameters
    ----------
    x : tf.Tensor
        vector or matrix with second dimension 1
        shape=TensorShape([Dimension(N)])
        shape=TensorShape([Dimension(N), Dimension(1)])

    Returns
    -------
    tf.Tensor
        scalar if vector input, vector if matrix tensor input
    
    Raises
    ------
    InvalidArgumentError
        If the input has Inf or NaN values.
    """
    dependencies = [tf.verify_tensor_all_finite(x, msg='')]
    x = control_flow_ops.with_dependencies(dependencies, x);

    x_max = tf.reduce_max(x)
    return tf.add(x_max, tf.log(tf.reduce_sum(tf.exp(tf.sub(x, x_max)))))
Beispiel #13
0
    def loss(self, y_true, y_pred):
        """ the loss. Assumes y_pred is prob (in [0,1] and sum_row = 1) """

        # compute dice, which will now be [batch_size, nb_labels]
        dice_metric = self.dice(y_true, y_pred)

        # loss
        dice_loss = 1 - dice_metric

        # weigh the entries in the dice matrix:
        if self.weights is not None:
            dice_loss *= self.weights

        # return one minus mean dice as loss
        mean_dice_loss = K.mean(dice_loss)
        tf.verify_tensor_all_finite(mean_dice_loss, 'Loss not finite')
        return mean_dice_loss
Beispiel #14
0
 def init_target(self):
     with tf.name_scope('target') as scope:
         self.target = self.reduced_loss + self.reg * self.regularization
         self.checked_target = tf.verify_tensor_all_finite(
             self.target,
             msg='NaN or Inf in target value', 
             name='target')
         tf.summary.scalar('target', self.checked_target)
Beispiel #15
0
def multivariate_rbf(x, y=0.0, sigma=1.0, l=1.0):
    """Squared-exponential kernel

    .. math:: k(x, y) = \sigma^2 \exp{ -1/(2l^2) \sum_i (x_i - y_i)^2 }

    Parameters
    ----------
    x : tf.Tensor
        A n-D tensor.
    y : tf.Tensor, optional
        A tensor of same shape as ``x``.
    sigma : tf.Tensor, optional
        A 0-D tensor, representing the standard deviation of radial
        basis function.
    l : tf.Tensor, optional
        A 0-D tensor, representing the lengthscale of radial basis
        function.

    Returns
    -------
    tf.Tensor
        A tensor of one less dimension than the input.

    Raises
    ------
    InvalidArgumentError
        If the mean variables have Inf or NaN values, or if the scale
        and length variables are not positive.
    """
    dependencies = [tf.verify_tensor_all_finite(x, msg=''),
                    tf.verify_tensor_all_finite(y, msg=''),
                    tf.assert_positive(sigma),
                    tf.assert_positive(l)]
    x = control_flow_ops.with_dependencies(dependencies, x)
    y = control_flow_ops.with_dependencies(dependencies, y)
    sigma = control_flow_ops.with_dependencies(dependencies, sigma)
    l = control_flow_ops.with_dependencies(dependencies, l)
    x = tf.cast(x, dtype=tf.float32)
    y = tf.cast(y, dtype=tf.float32)
    sigma = tf.cast(sigma, dtype=tf.float32)
    l = tf.cast(l, dtype=tf.float32)

    return tf.pow(sigma, 2.0) * \
           tf.exp(-1.0/(2.0*tf.pow(l, 2.0)) * \
           tf.reduce_sum(tf.pow(x - y , 2.0)))
Beispiel #16
0
def l2_normalize(x, dim, name=None):
  """l2 normalizes x and caps the gradient of the Square Root.

  Args:
    x: The tensor to normalize.
    dim: The dimension to normalize along.
    name: Optional name for this op.
  Returns:
    x normalized along dim.
  """
  with tf.op_scope([x], name, 'l2_normalize') as scope:
    x = tf.convert_to_tensor(x, name='x')
    x = tf.verify_tensor_all_finite(x, 'Error at input %s' % scope)
    x_norm = capped_sqrt(tf.reduce_sum(tf.square(x), [dim], keep_dims=True))
    return tf.verify_tensor_all_finite(tf.div(x,
                                              x_norm,
                                              name=scope),
                                       'Error at %s' % scope)
Beispiel #17
0
def l1_normalize(x, dim, name=None):
  """l1 normalizes x.

  Args:
    x: The tensor to normalize.
    dim: The dimension to normalize along.
    name: Optional name for this op.
  Returns:
    x normalized along dim.
  """
  with tf.op_scope([x], name, 'l1_normalize') as scope:
    x = tf.convert_to_tensor(x, name='x')
    x = tf.verify_tensor_all_finite(x, 'Error at input %s' % scope)
    x_norm = tf.reduce_sum(tf.abs(x), [dim], keep_dims=True)
    return tf.verify_tensor_all_finite(tf.div(x,
                                              x_norm,
                                              name=scope),
                                       'Error at %s' % scope)
Beispiel #18
0
def embed(sequence_batch, embeds):
    mask = sequence_batch.mask
    embedded_values = tf.gather(embeds, sequence_batch.values)
    embedded_values = tf.verify_tensor_all_finite(embedded_values, 'embedded_values')

    # set all pad embeddings to zero
    broadcasted_mask = expand_dims_for_broadcast(mask, embedded_values)
    embedded_values *= broadcasted_mask

    return SequenceBatch(embedded_values, mask)
Beispiel #19
0
def to_simplex(x):
    """Transform real vector of length ``(K-1)`` to a simplex of dimension ``K``
    using a backward stick breaking construction.

    Parameters
    ----------
    x : tf.Tensor
        A 1-D or 2-D tensor.

    Returns
    -------
    tf.Tensor
        A tensor of same shape as input but with last dimension of
        size ``K``.

    Raises
    ------
    InvalidArgumentError
        If the input has Inf or NaN values.

    Notes
    -----
    x as a 3-D or higher tensor is not guaranteed to be supported.
    """
    dependencies = [tf.verify_tensor_all_finite(x, msg='')]
    x = control_flow_ops.with_dependencies(dependencies, x)
    x = tf.cast(x, dtype=tf.float32)

    if isinstance(x, tf.Tensor) or isinstance(x, tf.Variable):
        shape = get_dims(x)
    else:
        shape = x.shape

    if len(shape) == 1:
        n_rows = ()
        K_minus_one = shape[0]
        eq = -tf.log(tf.cast(K_minus_one - tf.range(K_minus_one),
                             dtype=tf.float32))
        z = tf.sigmoid(eq + x)
        pil = tf.concat(0, [z, tf.constant([1.0])])
        piu = tf.concat(0, [tf.constant([1.0]), 1.0 - z])
        S = cumprod(piu)
        return S * pil
    else:
        n_rows = shape[0]
        K_minus_one = shape[1]
        eq = -tf.log(tf.cast(K_minus_one - tf.range(K_minus_one),
                             dtype=tf.float32))
        z = tf.sigmoid(eq + x)
        pil = tf.concat(1, [z, tf.ones([n_rows, 1])])
        piu = tf.concat(1, [tf.ones([n_rows, 1]), 1.0 - z])
        # cumulative product along 1st axis
        S = tf.pack([cumprod(piu_x) for piu_x in tf.unpack(piu)])
        return S * pil
Beispiel #20
0
    def testVerifyTensorAllFiniteFails(self):
        x_shape = [5, 4]
        x = np.random.random_sample(x_shape).astype(np.float32)
        my_msg = "Input is not a number."

        # Test NaN.
        x[0] = np.nan
        with self.test_session():
            with self.assertRaisesOpError(my_msg):
                t = tf.constant(x, shape=x_shape, dtype=tf.float32)
                t_verified = tf.verify_tensor_all_finite(t, my_msg)
                t_verified.eval()

        # Test Inf.
        x[0] = np.inf
        with self.test_session():
            with self.assertRaisesOpError(my_msg):
                t = tf.constant(x, shape=x_shape, dtype=tf.float32)
                t_verified = tf.verify_tensor_all_finite(t, my_msg)
                t_verified.eval()
Beispiel #21
0
 def init_learnable_params(self):
     self.w = [None] * self.order
     for i in range(1, self.order + 1):
         r = self.rank
         if i == 1:
             r = 1
         rnd_weights = tf.random_uniform([self.n_features, r], -self.init_std, self.init_std)
         self.w[i - 1] = tf.verify_tensor_all_finite(
             tf.Variable(rnd_weights, trainable=True, name='embedding_' + str(i)),
             msg='NaN or Inf in w[{}].'.format(i-1))
     self.b = tf.Variable(self.init_std, trainable=True, name='bias')
     tf.summary.scalar('bias', self.b)
Beispiel #22
0
def multivariate_rbf(x, y=0.0, sigma=1.0, l=1.0):
    """Squared-exponential kernel

    .. math:: k(x, y) = \sigma^2 \exp{ -1/(2l^2) \sum_i (x_i - y_i)^2 }

    Parameters
    ----------
    x : tf.Tensor
        scalar, vector, matrix, or n-Tensor
    y : Optional[tf.Tensor], default 0.0
        scalar, vector, matrix, or n-Tensor
    sigma : Optional[double], default 1.0
        standard deviation of radial basis function
    l : Optional[double], default 1.0
        lengthscale of radial basis function

    Returns
    -------
    tf.Tensor
        scalar if vector input, rank-(n-1) if n-Tensor input

    Raises
    ------
    InvalidArgumentError
        If the mean variables have Inf or NaN values, or if the scale
        and length variables are not positive.
    """
    dependencies = [tf.verify_tensor_all_finite(x, msg=''),
                    tf.verify_tensor_all_finite(y, msg=''),
                    tf.assert_positive(sigma),
                    tf.assert_positive(l)]
    x = control_flow_ops.with_dependencies(dependencies, x)
    y = control_flow_ops.with_dependencies(dependencies, y)
    sigma = control_flow_ops.with_dependencies(dependencies, sigma)
    l = control_flow_ops.with_dependencies(dependencies, l)

    return tf.pow(sigma, 2.0) * \
            tf.exp(-1.0/(2.0*tf.pow(l, 2.0)) * \
            tf.reduce_sum(tf.pow(x - y , 2.0)))
Beispiel #23
0
def dot(x, y):
    """Compute dot product between a 2-D tensor and a 1-D tensor.

    If x is a ``[M x N]`` matrix, then y is a ``M``-vector.

    If x is a ``M``-vector, then y is a ``[M x N]`` matrix.

    Parameters
    ----------
    x : tf.Tensor
        A 1-D or 2-D tensor (see above).
    y : tf.Tensor
        A 1-D or 2-D tensor (see above).

    Returns
    -------
    tf.Tensor
        A 1-D tensor of length ``N``.

    Raises
    ------
    InvalidArgumentError
        If the inputs have Inf or NaN values.
    """
    dependencies = [tf.verify_tensor_all_finite(x, msg=''),
                    tf.verify_tensor_all_finite(y, msg='')]
    x = control_flow_ops.with_dependencies(dependencies, x)
    y = control_flow_ops.with_dependencies(dependencies, y)
    x = tf.cast(x, dtype=tf.float32)
    y = tf.cast(y, dtype=tf.float32)

    if len(x.get_shape()) == 1:
        vec = x
        mat = y
        return tf.matmul(tf.expand_dims(vec, 0), mat)
    else:
        mat = x
        vec = y
        return tf.matmul(mat, tf.expand_dims(vec, 1))
Beispiel #24
0
def dot(x, y):
  """Compute dot product between a 2-D tensor and a 1-D tensor.

  If x is a `[M x N]` matrix, then y is a `M`-vector.

  If x is a `M`-vector, then y is a `[M x N]` matrix.

  Args:
    x: tf.Tensor.
      A 1-D or 2-D tensor (see above).
    y: tf.Tensor.
      A 1-D or 2-D tensor (see above).

  Returns:
    tf.Tensor.
    A 1-D tensor of length `N`.

  Raises:
    InvalidArgumentError.
    If the inputs have Inf or NaN values.
  """
  x = tf.convert_to_tensor(x)
  y = tf.convert_to_tensor(y)
  dependencies = [tf.verify_tensor_all_finite(x, msg=''),
                  tf.verify_tensor_all_finite(y, msg='')]
  x = control_flow_ops.with_dependencies(dependencies, x)
  y = control_flow_ops.with_dependencies(dependencies, y)

  if len(x.shape) == 1:
    vec = x
    mat = y
    return tf.reshape(tf.matmul(tf.expand_dims(vec, 0), mat), [-1])
  else:
    mat = x
    vec = y
    return tf.reshape(tf.matmul(mat, tf.expand_dims(vec, 1)), [-1])
Beispiel #25
0
def to_simplex(x):
  """Transform real vector of length `(K-1)` to a simplex of dimension `K`
  using a backward stick breaking construction.

  Args:
    x: tf.Tensor.
      A 1-D or 2-D tensor.

  Returns:
    tf.Tensor.
    A tensor of same shape as input but with last dimension of
    size `K`.

  Raises:
    InvalidArgumentError.
    If the input has Inf or NaN values.

  #### Notes

  x as a 3-D or higher tensor is not guaranteed to be supported.
  """
  x = tf.cast(x, dtype=tf.float32)
  dependencies = [tf.verify_tensor_all_finite(x, msg='')]
  x = control_flow_ops.with_dependencies(dependencies, x)

  if isinstance(x, (tf.Tensor, tf.Variable)):
    shape = x.get_shape().as_list()
  else:
    shape = x.shape

  if len(shape) == 1:
    K_minus_one = shape[0]
    eq = -tf.log(tf.cast(K_minus_one - tf.range(K_minus_one), dtype=tf.float32))
    z = tf.sigmoid(eq + x)
    pil = tf.concat([z, tf.constant([1.0])], 0)
    piu = tf.concat([tf.constant([1.0]), 1.0 - z], 0)
    S = tf.cumprod(piu)
    return S * pil
  else:
    n_rows = shape[0]
    K_minus_one = shape[1]
    eq = -tf.log(tf.cast(K_minus_one - tf.range(K_minus_one), dtype=tf.float32))
    z = tf.sigmoid(eq + x)
    pil = tf.concat([z, tf.ones([n_rows, 1])], 1)
    piu = tf.concat([tf.ones([n_rows, 1]), 1.0 - z], 1)
    S = tf.cumprod(piu, axis=1)
    return S * pil
Beispiel #26
0
def l1_normalize(x, dim, epsilon=1e-12, name=None):
    """l1 normalizes x.

  Args:
    x: The tensor to normalize.
    dim: The dimension to normalize along.
    epsilon: Lower bound on the norm, used to avoid exploding gradients as the
      norm approaches 0.
    name: Optional name for this op.
  Returns:
    x normalized along dim.
  """
    with tf.name_scope(name, "l1_normalize", [x]) as scope:
        x = tf.convert_to_tensor(x, name="x")
        x = tf.verify_tensor_all_finite(x, "Error at input %s" % scope)
        x_norm = tf.maximum(tf.reduce_sum(tf.abs(x), [dim], keep_dims=True), epsilon)
        return tf.div(x, x_norm, name=scope)
def RNN(parameters, input, model, initial_state):
    # The model is:
    # 1. input
    # 2. linear layer
    # 3 - n. LSTM layers
    # n+1. linear layer
    # n+1. output
    input = tf.verify_tensor_all_finite(input, "Input not finite!")
    # input shape: (batch_size, n_steps, n_input)
    input = tf.transpose(input, [1, 0, 2])  # permute n_steps and batch_size
    input = tf.verify_tensor_all_finite(input, "Input not finite2!")
    
    # Reshape to prepare input to the linear layer
    input = tf.reshape(input, [-1, parameters['n_input']]) # (n_steps*batch_size, n_input)
    input = tf.verify_tensor_all_finite(input, "Input not finite3!")
    
    # 1. layer, linear activation for each batch and step.
    if (model.has_key('input_weights')):
        input = tf.matmul(input, model['input_weights']) + model['input_bias']
        # input = tf.nn.dropout(input, model['keep_prob'])

    # Split data because rnn cell needs a list of inputs for the RNN inner loop,
    # that is, a n_steps length list of tensors shaped: (batch_size, n_inputs)
    # This is not well documented, but check for yourself here: https://goo.gl/NzA5pX
    input = tf.split(0, parameters['n_steps'], input) # n_steps * (batch_size, :)

    initial_state = tf.verify_tensor_all_finite(initial_state, "Initial state not finite!")
    # Note: States is shaped: batch_size x cell.state_size
    outputs, states = rnn.rnn(model['rnn_cell'], input, initial_state=initial_state)
    #outputs[-1] = tf.Print(outputs[-1], [outputs[-1]], "LSTM Output: ", summarize = 100)
    lastOutput = tf.verify_tensor_all_finite(outputs[-1], "LSTM Outputs not finite!")
    #lastOutput = tf.nn.dropout(lastOutput, model['keep_prob'])
    # Only the last output is interesting for error back propagation and prediction.
    # Note that all batches are handled together here.

    raw_output = tf.matmul(lastOutput, model['output_weights']) + model['output_bias']
    raw_output = tf.verify_tensor_all_finite(raw_output, "Raw output not finite!")
    
    n_mixtures = parameters['n_mixtures']
    batch_size = parameters['batch_size']
    # And now, instead of just outputting the expected value, we output mixture distributions.
    # The number of mixtures is intuitively the number of possible actions the target can take.
    # The output is divided into triplets of n_mixtures mixture parameters for the 2 absolute position coordinates.
    output = softmax_mixtures(raw_output, n_mixtures, batch_size)
    #output = tf.Print(output, [output], "Output: ", summarize = 100)
    output = tf.verify_tensor_all_finite(output, "Final output not finite!")

    return (output, states)
Beispiel #28
0
def softplus(x):
    """Elementwise Softplus function

    .. math:: \log(1 + \exp(x))

    If input `x < -30`, returns `0.0` exactly.

    If input `x > 30`, returns `x` exactly.

    TensorFlow can't currently autodiff through ``tf.nn.softplus()``.

    Parameters
    ----------
    x : tf.Tensor
        A n-D tensor.

    Returns
    -------
    tf.Tensor
        A tensor of same shape as input.

    Raises
    ------
    InvalidArgumentError
        If the input has Inf or NaN values.
    """
    dependencies = [tf.verify_tensor_all_finite(x, msg='')]
    x = control_flow_ops.with_dependencies(dependencies, x)
    x = tf.cast(x, dtype=tf.float32)

    result = tf.log(1.0 + tf.exp(x))

    less_than_thirty = tf.less(x, -30.0)
    result = tf.select(less_than_thirty, tf.zeros_like(x), result)

    greater_than_thirty = tf.greater(x, 30.0)
    result = tf.select(greater_than_thirty, x, result)

    return result
def mixture_loss(pred, y, n_mixtures, batch_size):
    pred = tf.verify_tensor_all_finite(pred, "Pred not finite!")
    out_pi, out_sigma, out_mu, out_rho = splitMix(pred, n_mixtures, batch_size)
    result_binorm, result_delta = tf_bivariate_normal(y, out_mu, out_sigma, out_rho, n_mixtures, batch_size)
    
    result_binorm = tf.verify_tensor_all_finite(result_binorm, "Result not finite1!")
    result_weighted = tf.mul(result_binorm, out_pi)
    result_weighted = tf.verify_tensor_all_finite(result_weighted, "Result not finite2!")
    result_raw = tf.reduce_sum(result_weighted + epsilon, 1, keep_dims=True)
    result_raw = tf.Print(result_raw, [tf.reduce_sum(result_raw)], "Sum of weighted density. If zero, sigma is too small: ")
    result_raw = tf.Print(result_raw, [tf.reduce_max(result_raw)], "Max of weighted density. If zero, sigma is too small: ")
    result_raw = tf.verify_tensor_all_finite(result_raw, "Result not finite3!")
    result = -tf.log(result_raw + e)
    result = tf.verify_tensor_all_finite(result, "Result not finite4!")
    result = tf.reduce_sum(result)
    result = tf.verify_tensor_all_finite(result, "Result not finite5!")
    return result
Beispiel #30
0
def softplus(x):
    """Elementwise Softplus function

    .. math:: \log(1 + \exp(x))

    If input `x < -30`, returns `0.0` exactly.

    If input `x > 30`, returns `x` exactly.

    TensorFlow can't currently autodiff through ``tf.nn.softplus()``.

    Parameters
    ----------
    x : tf.Tensor
        scalar, vector, matrix, or n-Tensor

    Returns
    -------
    tf.Tensor
        size corresponding to size of input
    
    Raises
    ------
    InvalidArgumentError
        If the input has Inf or NaN values.
    """
    dependencies = [tf.verify_tensor_all_finite(x, msg='')]
    x = control_flow_ops.with_dependencies(dependencies, x)

    result = tf.log(1.0 + tf.exp(x))

    less_than_thirty = tf.less(x, -30.0)
    result = tf.select(less_than_thirty, tf.zeros_like(x), result)

    greater_than_thirty = tf.greater(x, 30.0)
    result = tf.select(greater_than_thirty, x, result)

    return result
Beispiel #31
0
    def buildModel(self, inputShape):
        if (self.vggFile):
            npWeights = loadWeights(self.vggFile)
        else:
            print "Must load from weights"
            assert (0)

        #Running on GPU
        with tf.device(self.device):
            with tf.name_scope("inputOps"):
                #Get convolution variables as placeholders
                self.inputImage = node_variable([
                    self.batchSize, inputShape[0], inputShape[1], inputShape[2]
                ], "inputImage")
                self.gt = node_variable([self.batchSize, self.numClasses],
                                        "gt")

            with tf.name_scope("Conv1Ops"):
                self.W_conv1_1 = weight_variable_fromnp(
                    npWeights["conv1_1_w"], "w_conv1_1")
                self.B_conv1_1 = weight_variable_fromnp(
                    npWeights["conv1_1_b"], "b_conv1_1")
                self.W_conv1_2 = weight_variable_fromnp(
                    npWeights["conv1_2_w"], "w_conv1_2")
                self.B_conv1_2 = weight_variable_fromnp(
                    npWeights["conv1_2_b"], "b_conv1_2")

                self.h_conv1_1 = tf.nn.relu(
                    conv2d(self.inputImage,
                           self.W_conv1_1,
                           "conv1_1",
                           stride=[1, 1, 1, 1]) + self.B_conv1_1)
                self.h_conv1_2 = tf.nn.relu(
                    conv2d(self.h_conv1_1,
                           self.W_conv1_2,
                           "conv1_1",
                           stride=[1, 1, 1, 1]) + self.B_conv1_2)
                self.h_pool1 = maxpool_2x2(self.h_conv1_2, "pool1")

            with tf.name_scope("Conv2Ops"):
                self.W_conv2_1 = weight_variable_fromnp(
                    npWeights["conv2_1_w"], "w_conv2_1")
                self.B_conv2_1 = weight_variable_fromnp(
                    npWeights["conv2_1_b"], "b_conv2_1")
                self.W_conv2_2 = weight_variable_fromnp(
                    npWeights["conv2_2_w"], "w_conv2_2")
                self.B_conv2_2 = weight_variable_fromnp(
                    npWeights["conv2_2_b"], "b_conv2_2")

                self.h_conv2_1 = tf.nn.relu(
                    conv2d(self.h_pool1, self.W_conv2_1, "conv2_1") +
                    self.B_conv2_1)
                self.h_conv2_2 = tf.nn.relu(
                    conv2d(self.h_conv2_1, self.W_conv2_2, "conv2_2") +
                    self.B_conv2_2)
                self.h_pool2 = maxpool_2x2(self.h_conv2_2, "pool2")

            with tf.name_scope("Conv3Ops"):
                self.W_conv3_1 = weight_variable_fromnp(
                    npWeights["conv3_1_w"], "w_conv3_1")
                self.B_conv3_1 = weight_variable_fromnp(
                    npWeights["conv3_1_b"], "b_conv3_1")
                self.W_conv3_2 = weight_variable_fromnp(
                    npWeights["conv3_2_w"], "w_conv3_2")
                self.B_conv3_2 = weight_variable_fromnp(
                    npWeights["conv3_2_b"], "b_conv3_2")
                self.W_conv3_3 = weight_variable_fromnp(
                    npWeights["conv3_3_w"], "w_conv3_3")
                self.B_conv3_3 = weight_variable_fromnp(
                    npWeights["conv3_3_b"], "b_conv3_3")

                self.h_conv3_1 = tf.nn.relu(
                    conv2d(self.h_pool2, self.W_conv3_1, "conv3_1") +
                    self.B_conv3_1)
                self.h_conv3_2 = tf.nn.relu(
                    conv2d(self.h_conv3_1, self.W_conv3_2, "conv3_2") +
                    self.B_conv3_2)
                self.h_conv3_3 = tf.nn.relu(
                    conv2d(self.h_conv3_2, self.W_conv3_3, "conv3_2") +
                    self.B_conv3_3)
                self.h_pool3 = maxpool_2x2(self.h_conv3_3, "pool3")

            with tf.name_scope("Conv4Ops"):
                self.W_conv4_1 = weight_variable_fromnp(
                    npWeights["conv4_1_w"], "w_conv4_1")
                self.B_conv4_1 = weight_variable_fromnp(
                    npWeights["conv4_1_b"], "b_conv4_1")
                self.W_conv4_2 = weight_variable_fromnp(
                    npWeights["conv4_2_w"], "w_conv4_2")
                self.B_conv4_2 = weight_variable_fromnp(
                    npWeights["conv4_2_b"], "b_conv4_2")
                self.W_conv4_3 = weight_variable_fromnp(
                    npWeights["conv4_3_w"], "w_conv4_3")
                self.B_conv4_3 = weight_variable_fromnp(
                    npWeights["conv4_3_b"], "b_conv4_3")

                self.h_conv4_1 = tf.nn.relu(
                    conv2d(self.h_pool3, self.W_conv4_1, "conv4_1") +
                    self.B_conv4_1)
                self.h_conv4_2 = tf.nn.relu(
                    conv2d(self.h_conv4_1, self.W_conv4_2, "conv4_2") +
                    self.B_conv4_2)
                self.h_conv4_3 = tf.nn.relu(
                    conv2d(self.h_conv4_2, self.W_conv4_3, "conv4_2") +
                    self.B_conv4_3)
                self.h_pool4 = maxpool_2x2(self.h_conv4_3, "pool4")

            with tf.name_scope("Conv5Ops"):
                self.W_conv5_1 = weight_variable_fromnp(
                    npWeights["conv5_1_w"], "w_conv5_1")
                self.B_conv5_1 = weight_variable_fromnp(
                    npWeights["conv5_1_b"], "b_conv5_1")
                self.W_conv5_2 = weight_variable_fromnp(
                    npWeights["conv5_2_w"], "w_conv5_2")
                self.B_conv5_2 = weight_variable_fromnp(
                    npWeights["conv5_2_b"], "b_conv5_2")
                self.W_conv5_3 = weight_variable_fromnp(
                    npWeights["conv5_3_w"], "w_conv5_3")
                self.B_conv5_3 = weight_variable_fromnp(
                    npWeights["conv5_3_b"], "b_conv5_3")

                self.h_conv5_1 = tf.nn.relu(
                    conv2d(self.h_pool4, self.W_conv5_1, "conv5_1") +
                    self.B_conv5_1)
                self.h_conv5_2 = tf.nn.relu(
                    conv2d(self.h_conv5_1, self.W_conv5_2, "conv5_2") +
                    self.B_conv5_2)
                self.h_conv5_3 = tf.nn.relu(
                    conv2d(self.h_conv5_2, self.W_conv5_3, "conv5_2") +
                    self.B_conv5_3)
                self.h_pool5 = maxpool_2x2(self.h_conv5_3, "pool5")

        with tf.device('cpu:0'):

            self.keep_prob = tf.placeholder(tf.float32)
            with tf.name_scope("FC6"):
                self.W_fc6 = weight_variable_fromnp(npWeights["fc6_w"],
                                                    "w_fc6")
                self.B_fc6 = weight_variable_fromnp(npWeights["fc6_b"],
                                                    "b_fc6")
                h_pool5_flat = tf.reshape(self.h_pool5,
                                          [self.batchSize, 7 * 7 * 512])
                self.h_fc6 = tf.nn.relu(
                    tf.matmul(h_pool5_flat, self.W_fc6, name="fc6") +
                    self.B_fc6, "fc6_relu")
                self.drop_h_fc6 = tf.nn.dropout(self.h_fc6, self.keep_prob)

        with tf.device(self.device):
            with tf.name_scope("FC7"):
                self.W_fc7 = weight_variable_fromnp(npWeights["fc7_w"],
                                                    "w_fc7")
                self.B_fc7 = weight_variable_fromnp(npWeights["fc7_b"],
                                                    "b_fc7")
                self.h_fc7 = tf.nn.relu(
                    tf.matmul(self.drop_h_fc6, self.W_fc7, name="fc7") +
                    self.B_fc7, "fc7_relu")
                self.drop_h_fc7 = tf.nn.dropout(self.h_fc7, self.keep_prob)

            with tf.name_scope("FC8"):
                self.W_fc8 = weight_variable_xavier([4096, 20], "w_fc8")
                self.B_fc8 = bias_variable([20], "b_fc8")
                self.est = tf.nn.softmax(
                    tf.matmul(self.drop_h_fc7, self.W_fc8, name="fc8") +
                    self.B_fc8, "fc8_relu")

            with tf.name_scope("Loss"):
                #Define loss
                self.loss = tf.reduce_mean(
                    -tf.reduce_sum(self.gt * tf.log(self.est + self.epsilon),
                                   reduction_indices=[1]))
                self.regLoss = self.loss + self.regStrength * tf.add_n(
                    [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
                self.nan_check_loss = tf.verify_tensor_all_finite(
                    self.loss, msg="check_nan")

            with tf.name_scope("Opt"):
                #Define optimizer
                self.optimizerAll = tf.train.AdamOptimizer(
                    self.learningRate,
                    beta1=self.beta1,
                    beta2=self.beta2,
                    epsilon=self.epsilon).minimize(self.loss)
                #self.optimizerAll = tf.train.MomentumOptimizer(self.learningRate, momentum=self.beta1).minimize(self.loss)
                self.optimizerPre = tf.train.AdamOptimizer(
                    self.learningRate,
                    beta1=self.beta1,
                    beta2=self.beta2,
                    epsilon=self.epsilon).minimize(self.loss,
                                                   var_list=[
                                                       self.W_fc6,
                                                       self.B_fc6,
                                                       self.W_fc7,
                                                       self.B_fc7,
                                                       self.W_fc8,
                                                       self.B_fc8,
                                                   ])

            with tf.name_scope("Metric"):
                self.correct = tf.equal(tf.argmax(self.gt, 1),
                                        tf.argmax(self.est, 1))
                self.accuracy = tf.reduce_mean(
                    tf.cast(self.correct, tf.float32))

        #Cannot be on GPU
        (self.eval_vals, self.eval_idx) = tf.nn.top_k(self.est, k=5)

        #Summaries
        tf.scalar_summary('loss', self.loss, name="lossSum")
        tf.scalar_summary('accuracy', self.accuracy, name="accSum")

        tf.histogram_summary('input', self.inputImage, name="image_vis")
        tf.histogram_summary('gt', self.gt, name="gt_vis")
        #Conv layer histograms
        tf.histogram_summary('conv1_1', self.h_conv1_1, name="conv1_1_vis")
        tf.histogram_summary('conv1_2', self.h_conv1_2, name="conv1_2_vis")
        tf.histogram_summary('conv2_1', self.h_conv2_1, name="conv2_1_vis")
        tf.histogram_summary('conv2_2', self.h_conv2_2, name="conv2_2_vis")
        tf.histogram_summary('conv3_1', self.h_conv3_1, name="conv3_1_vis")
        tf.histogram_summary('conv3_2', self.h_conv3_2, name="conv3_2_vis")
        tf.histogram_summary('conv3_3', self.h_conv3_3, name="conv3_3_vis")
        tf.histogram_summary('conv4_1', self.h_conv4_1, name="conv4_1_vis")
        tf.histogram_summary('conv4_2', self.h_conv4_2, name="conv4_2_vis")
        tf.histogram_summary('conv4_3', self.h_conv4_3, name="conv4_3_vis")
        tf.histogram_summary('conv5_1', self.h_conv5_1, name="conv5_1_vis")
        tf.histogram_summary('conv5_2', self.h_conv5_2, name="conv5_2_vis")
        tf.histogram_summary('conv5_3', self.h_conv5_3, name="conv5_3_vis")
        tf.histogram_summary('fc6', self.h_fc6, name="fc6_vis")
        tf.histogram_summary('fc7', self.h_fc7, name="fc7_vis")
        tf.histogram_summary('est', self.est, name="est_vis")
        #Weight and bias hists
        tf.histogram_summary('w_conv1_1', self.W_conv1_1, name="w_conv1_1_vis")
        tf.histogram_summary('b_conv1_1', self.B_conv1_1, name="b_conv1_1_vis")
        tf.histogram_summary('w_conv1_2', self.W_conv1_2, name="w_conv1_2_vis")
        tf.histogram_summary('b_conv1_2', self.B_conv1_2, name="b_conv1_2_vis")
        tf.histogram_summary('w_conv2_1', self.W_conv2_1, name="w_conv2_1_vis")
        tf.histogram_summary('b_conv2_1', self.B_conv2_1, name="b_conv2_1_vis")
        tf.histogram_summary('w_conv2_2', self.W_conv2_2, name="w_conv2_2_vis")
        tf.histogram_summary('b_conv2_2', self.B_conv2_2, name="b_conv2_2_vis")
        tf.histogram_summary('w_conv3_1', self.W_conv3_1, name="w_conv3_1_vis")
        tf.histogram_summary('b_conv3_1', self.B_conv3_1, name="b_conv3_1_vis")
        tf.histogram_summary('w_conv3_2', self.W_conv3_2, name="w_conv3_2_vis")
        tf.histogram_summary('b_conv3_2', self.B_conv3_2, name="b_conv3_2_vis")
        tf.histogram_summary('w_conv3_3', self.W_conv3_3, name="w_conv3_3_vis")
        tf.histogram_summary('b_conv3_3', self.B_conv3_3, name="b_conv3_3_vis")
        tf.histogram_summary('w_conv4_1', self.W_conv4_1, name="w_conv4_1_vis")
        tf.histogram_summary('b_conv4_1', self.B_conv4_1, name="b_conv4_1_vis")
        tf.histogram_summary('w_conv4_2', self.W_conv4_2, name="w_conv4_2_vis")
        tf.histogram_summary('b_conv4_2', self.B_conv4_2, name="b_conv4_2_vis")
        tf.histogram_summary('w_conv4_3', self.W_conv4_3, name="w_conv4_3_vis")
        tf.histogram_summary('b_conv4_3', self.B_conv4_3, name="b_conv4_3_vis")
        tf.histogram_summary('w_conv5_1', self.W_conv5_1, name="w_conv5_1_vis")
        tf.histogram_summary('b_conv5_1', self.B_conv5_1, name="b_conv5_1_vis")
        tf.histogram_summary('w_conv5_2', self.W_conv5_2, name="w_conv5_2_vis")
        tf.histogram_summary('b_conv5_2', self.B_conv5_2, name="b_conv5_2_vis")
        tf.histogram_summary('w_conv5_3', self.W_conv5_3, name="w_conv5_3_vis")
        tf.histogram_summary('b_conv5_3', self.B_conv5_3, name="b_conv5_3_vis")
        tf.histogram_summary('w_fc6', self.W_fc6, name="w_fc6_vis")
        tf.histogram_summary('b_fc6', self.B_fc6, name="b_fc6_vis")
        tf.histogram_summary('w_fc7', self.W_fc7, name="w_fc7_vis")
        tf.histogram_summary('b_fc7', self.B_fc7, name="b_fc7_vis")
        tf.histogram_summary('w_fc8', self.W_fc7, name="w_fc8_vis")
        tf.histogram_summary('b_fc8', self.B_fc7, name="b_fc8_vis")
Beispiel #32
0
    def buildModel(self, inputShape):
        self.imageShape = (self.batchSize, inputShape[0], inputShape[1],
                           inputShape[2])
        if self.fc:
            self.WShape = (self.imageShape[1] * self.imageShape[2] *
                           self.imageShape[3], self.numV)
            self.VShape = (self.batchSize, self.numV)
        else:
            assert (self.imageShape[1] % self.VStrideY == 0)
            assert (self.imageShape[2] % self.VStrideX == 0)
            V_Y = int(self.imageShape[1] / self.VStrideY)
            V_X = int(self.imageShape[2] / self.VStrideX)
            self.WShape = (self.patchSizeY, self.patchSizeX,
                           self.imageShape[3], self.numV)
            self.VShape = (self.batchSize, V_Y, V_X, self.numV)

        #Running on GPU
        with tf.device(self.device):
            with tf.name_scope("inputOps"):
                #Get convolution variables as placeholders
                self.inputImage = node_variable(self.imageShape, "inputImage")
                defaultMask = tf.zeros(self.imageShape)
                self.inputMask = tf.placeholder_with_default(
                    defaultMask, self.imageShape)

                #Normalize image
                if (self.normalize):
                    n = tf.reduce_sum(1 - self.inputMask,
                                      axis=[1, 2],
                                      keepdims=True)
                    #Avoid divide by 0
                    n = tf.where(tf.equal(n, 0), tf.ones(n.shape), n)
                    self.data_mean = tf.reduce_sum(
                        self.inputImage, axis=[1, 2], keepdims=True) / n
                    self.data_std = tf.sqrt(
                        tf.reduce_sum(
                            tf.square(self.inputImage - self.data_mean),
                            axis=[1, 2],
                            keepdims=True) / n)
                    #Avoid divide by 0
                    self.data_std = tf.where(tf.equal(self.data_std, 0),
                                             tf.ones(self.data_std.shape),
                                             self.data_std)

                    self.scaled_inputImage = (self.inputImage -
                                              self.data_mean) / self.data_std

                #Scale inputImage
                if (self.fc):
                    #TODO is this necessary for fc?
                    #self.scaled_inputImage = self.inputImage/(np.sqrt(self.imageShape[1]*self.imageShape[2]*self.imageShape[3]))
                    self.scaled_inputImage = self.scaled_inputImage
                else:
                    self.patch_norm = np.sqrt(
                        self.patchSizeX * self.patchSizeY * self.imageShape[3])
                    self.scaled_inputImage = self.scaled_inputImage / self.patch_norm
                self.scaled_inputImage = self.scaled_inputImage * self.inputMult
                #self.checked_inputImage = tf.check_numerics(self.scaled_inputImage, "scaled_input error", name=None)

            with tf.name_scope("Dictionary"):
                self.V1_W = weight_variable(self.WShape, "V1_W", 1e-3)

            with tf.name_scope("weightNorm"):
                if (self.fc):
                    self.normVals = tf.sqrt(
                        tf.reduce_sum(tf.square(self.V1_W),
                                      axis=[0],
                                      keepdims=True))
                else:
                    self.normVals = tf.sqrt(
                        tf.reduce_sum(tf.square(self.V1_W),
                                      axis=[0, 1, 2],
                                      keepdims=True))
                self.normVals = tf.verify_tensor_all_finite(self.normVals,
                                                            'V1W error',
                                                            name=None)
                self.normalize_W = self.V1_W.assign(self.V1_W /
                                                    (self.normVals + 1e-8))

            with tf.name_scope("LCA_ADAM"):
                self.V1_init = tf.random_uniform(self.VShape,
                                                 0,
                                                 1.25 * self.thresh,
                                                 dtype=tf.float32)
                self.V1_U = uniform_weight_variable(self.VShape, "V1_U", 0.0,
                                                    1.25 * self.thresh)
                self.V1_A = weight_variable(self.VShape, "V1_A", 1e-3)

            with tf.name_scope("Recon"):
                if (self.fc):
                    flat_recon = tf.matmul(self.V1_A,
                                           self.V1_W,
                                           transpose_b=True,
                                           a_is_sparse=False)
                    #Reshape recon into image shape
                    self.recon = tf.reshape(flat_recon, self.imageShape)
                else:
                    assert (self.VStrideY >= 1)
                    assert (self.VStrideX >= 1)
                    self.recon = tf.nn.conv2d_transpose(
                        self.V1_A,
                        self.V1_W,
                        self.imageShape, [1, self.VStrideY, self.VStrideX, 1],
                        padding='SAME',
                        name="recon")

                #Unnormalize
                self.unscaled_recon = self.recon / self.inputMult

                if (self.fc):
                    pass
                else:
                    self.unscaled_recon = self.unscaled_recon * self.patch_norm

                if (self.normalize):
                    self.unscaled_recon = (self.unscaled_recon *
                                           self.data_std) + self.data_mean
                else:
                    self.unscaled_recoon = recon

                #self.recon = tf.check_numerics(self.recon, 'recon error', name=None)

            with tf.name_scope("Error"):
                self.error = self.scaled_inputImage - self.recon

            with tf.name_scope("Loss"):
                if (self.fc):
                    self.reconError = tf.reduce_mean(
                        tf.reduce_sum(tf.square(self.error), axis=[1]))
                    self.l1Sparsity = tf.reduce_mean(
                        tf.reduce_sum(tf.abs(self.V1_A), axis=[1]))
                else:
                    self.reconError = tf.reduce_mean(
                        tf.reduce_sum(tf.square(self.error), axis=[1, 2, 3]))
                    self.l1Sparsity = tf.reduce_mean(
                        tf.reduce_sum(tf.abs(self.V1_A), axis=[1, 2, 3]))
                #self.reconError = tf.reduce_mean(tf.square(self.error))
                #self.l1Sparsity = tf.reduce_mean(tf.abs(self.V1_A))
                #Define loss
                self.loss = self.reconError / 2 + self.thresh * self.l1Sparsity

            with tf.name_scope("Opt"):
                #Calculate A from U
                self.optimizerA0 = self.V1_A.assign(
                    tf.nn.relu(self.V1_U - self.thresh))
                self.v1Reset = self.V1_U.assign(self.V1_init)

                self.optimizerA1 = tf.train.AdamOptimizer(self.learningRateA)

                #Find gradient wrt A
                self.lossGrad = self.optimizerA1.compute_gradients(
                    self.reconError, [self.V1_A])
                #self.checkGrad = tf.check_numerics(self.lossGrad[0][0], "grad error", name=None)
                self.dU = [(self.lossGrad[0][0] - self.V1_A + self.V1_U,
                            self.V1_U)]

                #TODO add momentum or ADAM here
                self.optimizerA = self.optimizerA1.apply_gradients(self.dU)

                #self.optimizerW = tf.train.AdadeltaOptimizer(self.learningRateW, epsilon=1e-6).minimize(self.loss,
                self.optimizerW = tf.train.AdamOptimizer(
                    self.learningRateW,
                    epsilon=1e-6).minimize(self.loss, var_list=[self.V1_W])

            with tf.name_scope("stats"):
                self.nnz = tf.reduce_mean(
                    tf.cast(tf.not_equal(self.V1_A, 0), tf.float32))

                self.imageStd = tf.sqrt(
                    tf.reduce_mean(
                        tf.square(self.scaled_inputImage -
                                  tf.reduce_mean(self.scaled_inputImage))))
                self.errorStd = tf.sqrt(
                    tf.reduce_mean(
                        tf.square(self.error -
                                  tf.reduce_mean(self.error)))) / self.imageStd
                self.l1_mean = tf.reduce_mean(tf.abs(self.V1_A))
                if (self.fc):
                    flat_weightImages = tf.transpose(self.V1_W,
                                                     [1, 0])  #[numV, img]
                    self.weightImages = tf.reshape(flat_weightImages, [
                        self.numV, self.imageShape[1], self.imageShape[2],
                        self.imageShape[3]
                    ])
                else:
                    self.weightImages = tf.squeeze(
                        tf.transpose(self.V1_W, [3, 0, 1, 2]))

                #For log of activities
                self.log_V1_A = tf.log(tf.abs(self.V1_A) + 1e-13)

        #Summaries
        self.s_loss = tf.summary.scalar('loss', self.loss)
        self.s_recon = tf.summary.scalar('recon error', self.reconError)
        self.s_errorStd = tf.summary.scalar('errorStd', self.errorStd)
        self.s_l1 = tf.summary.scalar('l1_sparsity', self.l1Sparsity)
        self.s_l1_mean = tf.summary.scalar('l1_mean', self.l1_mean)
        self.s_s_nnz = tf.summary.scalar('nnz', self.nnz)

        self.h_input = tf.summary.histogram('input', self.inputImage)
        self.h_input = tf.summary.histogram('scale_input',
                                            self.scaled_inputImage)
        self.h_recon = tf.summary.histogram('recon', self.recon)
        self.h_v1_w = tf.summary.histogram('V1_W', self.V1_W)
        self.h_v1_u = tf.summary.histogram('V1_U', self.V1_U)
        self.h_v1_a = tf.summary.histogram('V1_A', self.V1_A)
        self.h_log_v1_a = tf.summary.histogram('Log_V1_A', self.log_V1_A)
Beispiel #33
0
    def test1(self):

        # build the operator libs if needed
        cpulib = os.path.join(cache_directory, "libaddcpu.so")
        gpulib = os.path.join(cache_directory, "libaddgpu.so")
        if not os.path.exists(cpulib):
            this_file_path = os.path.abspath(__file__)
            this_directory = os.path.split(this_file_path)[0]

            cpp_path = os.path.join(this_directory, 'addcpu.cpp')
            subprocess.call([
                cxx, '-fPIC', '-Wall', '-std=c++11', '-Ofast', '-Wextra', '-g',
                '-pedantic', '-I' + this_directory + '/..', '-o', cpulib,
                '-shared', cpp_path
            ])

        if cuda_enabled:
            if not os.path.exists(gpulib):
                this_file_path = os.path.abspath(__file__)
                this_directory = os.path.split(this_file_path)[0]

                nvcc_path = os.path.join(cuda_directory, 'bin/nvcc')
                cuda_path = os.path.join(this_directory, 'addgpu.cu')
                cuda_o_path = os.path.join(cache_directory, 'addgpu.o')

                subprocess.call([
                    nvcc_path, '-O3', '--use_fast_math',
                    '--relocatable-device-code=true', '--compile',
                    '-Xcompiler', '-fPIC', '-std=c++11',
                    '-I' + this_directory + '/..', cuda_path, '-o', cuda_o_path
                ])
                subprocess.call(
                    [nvcc_path, '-shared', '-o', gpulib, cuda_o_path])
                # clean up .o files
                subprocess.call(['rm', cuda_o_path])

            devices = ['/cpu:0', '/gpu:0']
        else:
            devices = ['/cpu:0']
        for dev_string in devices:
            logger.debug('*** device: {dev}'.format(dev=dev_string))
            test_config = tf.ConfigProto(allow_soft_placement=False)
            # Don't perform optimizations for tests so we don't inadvertently run
            # gpu ops on cpu
            test_config.graph_options.optimizer_options.opt_level = -1
            with tf.Session(config=test_config):
                logger.debug('*** add2float')
                with tf.device(dev_string):
                    in0 = np.random.rand(3, 50).astype(np.float32)
                    in1 = np.random.rand(3, 50).astype(np.float32)
                    ones = np.ones((3, 50), dtype=np.float32)
                    output = _DynamicLibOp.module().dynamic_lib(
                        inputs=[in0, in1],
                        out_shapes=[[3, 50]],
                        out_types=['float'],
                        cpu_lib_path=cpulib,
                        cpu_func_name="add2float",
                        gpu_lib_path=gpulib,
                        gpu_func_name="add2float",
                        serialized_grad_dag='',
                        grad_dag_arg_index=[],
                        cuda_threads_per_block=_default_cuda_threads_per_block)

                    ref = np.add(in0, in1)
                    if (dev_string is '/gpu:0'):
                        ref = np.add(ref, ones)
                    assert np.allclose(output[0].eval(), ref)

                    in2 = np.random.rand(3, 50).astype(np.float64)
                    logger.debug('*** addFloatDoubleFloat')
                    output = _DynamicLibOp.module().dynamic_lib(
                        inputs=[in0, in2, in1],
                        out_shapes=[[3, 50]],
                        out_types=['float'],
                        cpu_lib_path=cpulib,
                        cpu_func_name="addFloatDoubleFloat",
                        gpu_lib_path=gpulib,
                        gpu_func_name="addFloatDoubleFloat",
                        serialized_grad_dag='',
                        grad_dag_arg_index=[],
                        cuda_threads_per_block=_default_cuda_threads_per_block)
                    ref = (in0 + in2 + in1).astype(np.float32)
                    if (dev_string is '/gpu:0'):
                        ref = ref + ones
                    assert np.allclose(output[0].eval(), ref)

                    logger.debug('*** sumAndSq')
                    output = _DynamicLibOp.module().dynamic_lib(
                        inputs=[in0, in2],
                        out_shapes=[[3, 50], [3, 50]],
                        out_types=['float', 'float'],
                        cpu_lib_path=cpulib,
                        cpu_func_name="sumAndSq",
                        gpu_lib_path=gpulib,
                        gpu_func_name="sumAndSq",
                        serialized_grad_dag='',
                        grad_dag_arg_index=[],
                        cuda_threads_per_block=_default_cuda_threads_per_block)

                    out0 = (in0 + in2).astype(np.float32)
                    if (dev_string is '/gpu:0'):
                        out0 = out0 + ones
                    out1 = np.multiply(out0, out0)
                    if (dev_string is '/gpu:0'):
                        out1 = out1 + ones
                    assert np.allclose(output[0].eval(), out0)
                    assert np.allclose(output[1].eval(), out1)

                    # make sure we can also use a standard TF gpu operator in the same session
                    logger.debug('*** TF numerics op')
                    x_shape = [5, 4]
                    x = np.random.random_sample(x_shape).astype(np.float32)
                    t = tf.constant(x, shape=x_shape, dtype=tf.float32)
                    t_verified = tf.verify_tensor_all_finite(
                        t, "Input is not a number.")
                    assert np.allclose(x, t_verified.eval())
 def check_legal_inputs(self, tensor, name):
     # ensure that the current tensor is finite (doesn't have any NaN values)
     return tf.verify_tensor_all_finite(tensor,
                                        "ERR: Tensor not finite - " + name,
                                        name=name)
Beispiel #35
0
def kl_multivariate_normal(loc_one, scale_one, loc_two=0.0, scale_two=1.0):
    """Calculate the KL of multivariate normal distributions with
  diagonal covariances.

  Parameters
  ----------
  loc_one : tf.Tensor
    A 0-D tensor, 1-D tensor of length n, or 2-D tensor of shape M
    x n where each row represents the mean of a n-dimensional
    Gaussian.
  scale_one : tf.Tensor
    A tensor of same shape as ``loc_one``, representing the
    standard deviation.
  loc_two : tf.Tensor, optional
    A tensor of same shape as ``loc_one``, representing the
    mean of another Gaussian.
  scale_two : tf.Tensor, optional
    A tensor of same shape as ``loc_one``, representing the
    standard deviation of another Gaussian.

  Returns
  -------
  tf.Tensor
    For 0-D or 1-D tensor inputs, outputs the 0-D tensor
    ``KL( N(z; loc_one, scale_one) || N(z; loc_two, scale_two) )``
    For 2-D tensor inputs, outputs the 1-D tensor
    ``[KL( N(z; loc_one[m,:], scale_one[m,:]) || ``
    ``N(z; loc_two[m,:], scale_two[m,:]) )]_{m=1}^M``

  Raises
  ------
  InvalidArgumentError
    If the location variables have Inf or NaN values, or if the scale
    variables are not positive.
  """
    loc_one = tf.convert_to_tensor(loc_one)
    scale_one = tf.convert_to_tensor(scale_one)
    loc_two = tf.convert_to_tensor(loc_two)
    scale_two = tf.convert_to_tensor(scale_two)
    dependencies = [
        tf.verify_tensor_all_finite(loc_one, msg=''),
        tf.verify_tensor_all_finite(loc_two, msg=''),
        tf.assert_positive(scale_one),
        tf.assert_positive(scale_two)
    ]
    loc_one = control_flow_ops.with_dependencies(dependencies, loc_one)
    scale_one = control_flow_ops.with_dependencies(dependencies, scale_one)

    if loc_two == 0.0 and scale_two == 1.0:
        # With default arguments, we can avoid some intermediate computation.
        out = tf.square(scale_one) + tf.square(loc_one) - \
            1.0 - 2.0 * tf.log(scale_one)
    else:
        loc_two = control_flow_ops.with_dependencies(dependencies, loc_two)
        scale_two = control_flow_ops.with_dependencies(dependencies, scale_two)
        out = tf.square(scale_one / scale_two) + \
            tf.square((loc_two - loc_one) / scale_two) - \
            1.0 + 2.0 * tf.log(scale_two) - 2.0 * tf.log(scale_one)

    if len(out.get_shape()) <= 1:  # scalar or vector
        return 0.5 * tf.reduce_sum(out)
    else:  # matrix
        return 0.5 * tf.reduce_sum(out, 1)
Beispiel #36
0
    def __init__(self, env, task, visualise, policy, learning_rate):
        """
An implementation of the A3C algorithm that is reasonably well-tuned for the VNC environments.
Below, we will have a modest amount of complexity due to the way TensorFlow handles data parallelism.
But overall, we'll define the model, specify its inputs, and describe how the policy gradients step
should be computed.
"""

        self.env = env
        self.task = task
        worker_device = "/job:worker/task:{}/cpu:0".format(task)
        with tf.device(tf.train.replica_device_setter(1, worker_device=worker_device)):
            with tf.variable_scope("global"):
                self.network = policy(env.observation_space, env.action_space)
                self.global_step = tf.get_variable("global_step", [], tf.int32,
                                                   initializer=tf.constant_initializer(0, dtype=tf.int32),
                                                   trainable=False)

        with tf.device(worker_device):
            with tf.variable_scope("local"):
                self.local_network = pi = policy(env.observation_space, env.action_space)
                pi.global_step = self.global_step

            self.ac = tf.placeholder(tf.float32, [None, env.action_space.dim()], name="ac")
            self.adv = tf.placeholder(tf.float32, [None], name="adv")
            self.r = tf.placeholder(tf.float32, [None], name="r")

            # the "policy gradients" loss:  its derivative is precisely the policy gradient
            # notice that self.ac is a placeholder that is provided externally.
            # adv will contain the advantages, as calculated in process_rollout
            pi_loss = - tf.reduce_sum(pi.log_prob(self.ac) * self.adv)

            # loss of value function
            vf_loss = tf.reduce_sum(tf.square(pi.vf - self.r))
            entropy = tf.reduce_sum(pi.dist.entropy())

            # loss gets minimized! pi_loss goes down, cv_loss, goes down, and entropy goes up.
            self.loss = pi_loss + 0.25 * vf_loss - entropy * 1e-4
            self.loss = tf.verify_tensor_all_finite(self.loss, 'loss')

            # 20 represents the number of "local steps":  the number of timesteps
            # we run the policy before we update the parameters.
            # The larger local steps is, the lower is the variance in our policy gradients estimate
            # on the one hand;  but on the other hand, we get less frequent parameter updates, which
            # slows down learning.  In this code, we found that making local steps be much
            # smaller than 20 makes the algorithm more difficult to tune and to get to work.
            self.runner = RunnerThread(env, pi, 20, visualise)

            grads = tf.gradients(self.loss, pi.var_list)

            # learning_rate = 1e-5  # / (tf.to_float(self.global_step) + 1e-6)
            # learning_rate /= (tf.to_float(self.global_step) + 1e-6)
            bs = tf.to_float(tf.shape(pi.x)[0])
            if USE_TF12_API:
                tf.summary.scalar("model/learning_rate", learning_rate)
                tf.summary.scalar("model/total_loss", self.loss / bs)
                tf.summary.scalar("model/policy_loss", pi_loss / bs)
                tf.summary.scalar("model/value_loss", vf_loss / bs)
                tf.summary.scalar("model/entropy", entropy / bs)
                if len(list(env.observation_space.shape)) > 1:
                    tf.summary.image("model/state", pi.x)
                tf.summary.scalar("model/grad_global_norm", tf.global_norm(grads))
                tf.summary.scalar("model/var_global_norm", tf.global_norm(pi.var_list))
                self.summary_op = tf.summary.merge_all()

            else:
                tf.scalar_scalar("model/learning_rate", learning_rate)
                tf.scalar_summary("model/total_loss", self.loss / bs)
                tf.scalar_summary("model/policy_loss", pi_loss / bs)
                tf.scalar_summary("model/value_loss", vf_loss / bs)
                tf.scalar_summary("model/entropy", entropy / bs)
                if len(list(env.observation_space.shape)) > 1:
                    tf.image_summary("model/state", pi.x)
                tf.scalar_summary("model/grad_global_norm", tf.global_norm(grads))
                tf.scalar_summary("model/var_global_norm", tf.global_norm(pi.var_list))
                self.summary_op = tf.merge_all_summaries()

            grads, _ = tf.clip_by_global_norm(grads, 40.0)

            # copy weights from the parameter server to the local model
            self.sync = tf.group(*[v1.assign(v2) for v1, v2 in zip(pi.var_list, self.network.var_list)])

            grads_and_vars = list(zip(grads, self.network.var_list))
            inc_step = self.global_step.assign_add(tf.shape(pi.x)[0])

            opt = tf.train.AdamOptimizer(learning_rate)
            self.train_op = tf.group(opt.apply_gradients(grads_and_vars), inc_step)
            self.summary_writer = None
            self.local_steps = 0
def tf_mean_l2(w, coefs, n_users):
    elementwise_sq_norm = tf.reduce_sum(tf.pow(w, 2), axis=1)
    checked_elwise_l2 = tf.verify_tensor_all_finite(elementwise_sq_norm, msg='NaN or Inf in norm', name='checked_elwise_l2')
    mean_l2 = tf.reduce_sum(tf.multiply(checked_elwise_l2, coefs))
    return mean_l2
Beispiel #38
0
 def init_target(self):
     self.target = self.reduced_loss + self.reg * self.regularization
     self.checked_target = tf.verify_tensor_all_finite(
         self.target,
         msg='NaN or Inf in target value', name='target')
     tf.scalar_summary('target', self.checked_target)
Beispiel #39
0
    def model(self, seq_length, img_ph, pnt_ph, aud_ph, partitions_ph, train_ph, prompts_ph, variable_scope,
              variable_scope2, var_img, var_pnt, var_aud, var_lstm, incep_reuse=True):  #
        def process_vars(seq, data_type):
            # cast inputs to the correct data type
            seq_inp = tf.cast(seq, tf.float32)
            return tf.reshape(seq_inp,
                              (self.__batch_size, -1, data_type["cmp_h"], data_type["cmp_w"], data_type["num_c"]))

        def convolve_data_inception(input_data, val, n, dtype):
            data = tf.reshape(input_data, [-1, 299, 299, 3])
            logits, end_points = inception_resnet_v2(data,
                                                     num_classes=output_sizes[-1] * output_sizes[-1] * layer_elements[
                                                         -2], is_training=False, reuse=incep_reuse)
            return logits

        def convolve_data_3layer_pnt(input_data, val, variables, n, dtype):
            def pad_tf(x, p):
                return tf.pad(x, [[0, 0], [p, p], [p, p], [0, 0]], "CONSTANT")

            def gen_convolved_output(sequence, W, b, stride, num_hidden, new_size, train_ph, padding='SAME'):
                conv = tf.nn.conv2d(sequence, W, strides=[1, stride, stride, 1], padding=padding) + b
                return tf.nn.relu(conv)

            input_data = tf.reshape(input_data, [-1, dtype["cmp_h"], dtype["cmp_w"], dtype["num_c"]],
                                    name=n + "_inp_reshape")

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out1_n: ")
            input_data = pad_tf(input_data, padding_size[0])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W1"], variables["b1"], stride_sizes[0],
                                              layer_elements[1], output_sizes[0], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv1")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - ",
                name="conv1_" + n
            )

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out2_n: ")
            input_data = pad_tf(input_data, padding_size[1])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W2"], variables["b2"], stride_sizes[1],
                                              layer_elements[2], output_sizes[1], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv2")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - ",
                name="conv2_" + n
            )

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out3_n: ")
            input_data = pad_tf(input_data, padding_size[2])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W3"], variables["b3"], stride_sizes[-1],
                                              layer_elements[-2], output_sizes[-1], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv3")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - ",
                name="conv3_" + n
            )

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out4_n: ")

            return input_data

        def convolve_data_3layer_aud(input_data, val, variables, n, dtype):
            def pad_tf(x, padding):
                return tf.pad(x, [[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]], "CONSTANT")

            def gen_convolved_output(sequence, W, b, stride, num_hidden, new_size, train_ph, padding='SAME'):
                conv = tf.nn.conv2d(sequence, W, strides=[1, stride[0], stride[1], 1], padding=padding) + b
                return tf.nn.relu(conv)

            input_data = tf.reshape(input_data, [-1, dtype["cmp_h"], dtype["cmp_w"], dtype["num_c"]],
                                    name=n + "_inp_reshape")

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out1_a: ")
            input_data = pad_tf(input_data, aud_padding_size[0])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W1"], variables["b1"], aud_stride_sizes[0],
                                              aud_layer_elements[1], aud_output_sizes[0], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv1")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - conv1_" + n,
                name="conv1_" + n
            )

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out2_a: ")
            input_data = pad_tf(input_data, aud_padding_size[1])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W2"], variables["b2"], aud_stride_sizes[1],
                                              aud_layer_elements[2], aud_output_sizes[1], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv2")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - conv2_" + n,
                name="conv2_" + n
            )

            # input_data = tf.Print(input_data, [tf.shape(input_data)], message="out3_a: ")
            input_data = pad_tf(input_data, aud_padding_size[2])
            padding = "VALID"

            input_data = gen_convolved_output(input_data, variables["W3"], variables["b3"], aud_stride_sizes[2],
                                              aud_layer_elements[3], aud_output_sizes[2], train_ph, padding)
            self.variable_summaries(input_data, dtype["name"] + "_conv3")
            input_data = tf.verify_tensor_all_finite(
                input_data,
                "ERR: Tensor not finite - conv3_" + n,
                name="conv3_" + n
            )

            return input_data

        # pass different data types through conv networks
        inp_data = [0] * TOTAL_PARAMS
        conv_inp = [0] * TOTAL_PARAMS

        # with tf.device('/gpu:0'):
        with tf.device('/gpu:1'):
            if (self.graphbuild[0]):
                val = 0
                inp_data[val] = process_vars(img_ph, img_dtype)
                conv_inp[val] = convolve_data_inception(inp_data[val], val, "img", img_dtype)

            with variable_scope as scope:
                # with tf.device('/gpu:1'):

                if (self.graphbuild[1]):
                    val = 1
                    inp_data[val] = process_vars(pnt_ph, pnt_dtype)
                    conv_inp[val] = convolve_data_3layer_pnt(inp_data[val], val, var_pnt, "pnt", pnt_dtype)
                if (self.graphbuild[2]):
                    val = 2
                    inp_data[val] = process_vars(aud_ph, aud_dtype)
                    conv_inp[val] = convolve_data_3layer_aud(inp_data[val], val, var_aud, "aud", aud_dtype)

                # combine different inputs together
                combined_data = None
                for i in range(TOTAL_PARAMS):

                    if (self.graphbuild[i]):
                        tf.Print(conv_inp[i], [tf.shape(conv_inp[i])])
                        if (i < 2):
                            conv_inp[i] = tf.reshape(conv_inp[i], [self.__batch_size, -1,
                                                                   output_sizes[-1] * output_sizes[-1] * layer_elements[
                                                                       -2]], name="combine_reshape")
                        else:
                            # print(">>", aud_output_sizes[-1][0]*aud_output_sizes[-1][0]*aud_layer_elements[-2])
                            conv_inp[i] = tf.reshape(conv_inp[i], [self.__batch_size, -1,
                                                                   aud_output_sizes[-1][0] * aud_output_sizes[-1][0] *
                                                                   aud_layer_elements[-2]], name="combine_reshape_aud")
                        # tf.Print(conv_inp[i], [tf.shape(conv_inp[i])])
                        if (combined_data == None):
                            combined_data = conv_inp[i]
                        else:
                            combined_data = tf.concat([combined_data, conv_inp[i]], 2)

                W_lstm = var_lstm["W_lstm"]
                b_lstm = var_lstm["b_lstm"]
                W_fc = var_lstm["W_fc"]
                b_fc = var_lstm["b_fc"]

                combined_data = tf.verify_tensor_all_finite(
                    combined_data,
                    "ERR: Tensor not finite - combined_data",
                    name="combined_data"
                )
            # combined_data = tf.Print(combined_data, [tf.shape(combined_data)], message="combined_data")

        with variable_scope2 as scope:
            # lstm_cell = BNLSTMCell(layer_elements[-2], is_training_tensor=train_ph, max_bn_steps=MAX_BN_LEN)

            lstm_cell = tf.contrib.rnn.LSTMCell(layer_elements[-2],
                                                use_peepholes=False,
                                                cell_clip=None,
                                                initializer=None,
                                                num_proj=None,
                                                proj_clip=None,
                                                forget_bias=1.0,
                                                state_is_tuple=True,
                                                activation=None,
                                                reuse=None
                                                )

            outputs, states = tf.nn.dynamic_rnn(
                cell=lstm_cell,
                inputs=combined_data,
                dtype=tf.float32,
                sequence_length=seq_length,
                time_major=False
            )

            outputs = tf.where(tf.is_nan(outputs), tf.zeros_like(outputs), outputs)
            # outputs = tf.Print(outputs, [outputs], message="outputs", summarize=100)
            # outputs = tf.Print(outputs, [tf.reduce_max(outputs)], message="outputs", summarize=100)
            outputs = tf.verify_tensor_all_finite(
                outputs,
                "ERR: Tensor not finite - outputs",
                name="outputs"
            )

            num_partitions = 2
            res_out = tf.dynamic_partition(outputs, partitions_ph, num_partitions)[1]
            # res_out = tf.Print(res_out, [res_out], message="res_out")

            # tf.where(tf.is_nan(res_out), tf.zeros_like(res_out), res_out)

            # res_out = tf.Print(res_out, [res_out], message="res_out", summarize=100)
            # res_out = tf.Print(res_out, [tf.reduce_max(res_out)], message="res_out", summarize=100)


            rnn_x = tf.matmul(res_out, W_lstm) + b_lstm

            self.variable_summaries(rnn_x, "lstm")

            rnn_x = tf.verify_tensor_all_finite(
                rnn_x,
                "ERR: Tensor not finite - fc1",
                name="fc1"
            )

            # prompts_ph = tf.reshape(prompts_ph, [-1, 1])
            x_tensor = rnn_x  # tf.concat([rnn_x, prompts_ph], 1)

            rnn_x = tf.matmul(x_tensor, W_fc) + b_fc
            self.variable_summaries(rnn_x, "fc")

            rnn_x = tf.verify_tensor_all_finite(
                rnn_x,
                "ERR: Tensor not finite - fc2",
                name="fc2"
            )

            return rnn_x
Beispiel #40
0
def tf_mean_l2(w):
    elementwise_sq_norm = tf.reduce_sum(tf.pow(w, 2), axis=1)
    checked_elwise_l2 = tf.verify_tensor_all_finite(elementwise_sq_norm, msg='NaN or Inf in norm', name='checked_elwise_l2')
    mean_l2 = tf.reduce_mean(checked_elwise_l2)
    return mean_l2
Beispiel #41
0
def run():
    global task_name
    parser = argparse.ArgumentParser()
    parser.add_argument('-d',
                        '--model_directory',
                        type=str,
                        default=MODEL_DIRECTORY)
    parser.add_argument('-dd',
                        '--dataset_directory',
                        type=str,
                        default=DATASET_DIRECTORY)
    parser.add_argument('-bs', '--batch_size', type=int, default=BATCH_SIZE)
    parser.add_argument('-lr',
                        '--learning_rate',
                        type=float,
                        default=LEARNING_RATE)
    parser.add_argument('-g', '--gpu', type=int, default=GPU)
    parser.add_argument('-t', '--task_name', type=str, default=task_name)
    parser.add_argument('-n', '--hyper_net', type=str, default=HYPER_NET)
    parser.add_argument('-v', '--variance', type=float, default=RF.VARIANCE)
    parser.add_argument('-de', '--depth', type=int, default=52)
    parser.add_argument('-ep', '--epoch', type=int, default=100)

    args = parser.parse_args()
    epoch = args.epoch
    task_name = args.task_name
    RF.VARIANCE = args.variance
    directory_output = os.path.join(args.model_directory)
    depth = args.depth

    X_train, Y_train, X_test, Y_test = load_data.load()
    X_test_m = [0] * (10)
    Y_test_m = [0] * (10)
    for i in range(10):
        X_test_m[i] = X_test[i * 1000:(i + 1) * 1000]
        Y_test_m[i] = Y_test[i * 1000:(i + 1) * 1000]

    # 縮小する
    #X_train, Y_train = X_train[0:5000], Y_train[0:5000]
    #X_test, Y_test = X_test[0:1000] , Y_test[0:1000]

    X = tf.placeholder("float", [None, 32, 32, 3])
    Y = tf.placeholder("float", [None, 10])
    time_list = tf.placeholder("float", [None])
    W_list = tf.placeholder("float", [None])
    learning_rate = tf.placeholder("float", [])
    hypernet = args.hyper_net  # tf.placeholder("string")
    task_name_tr = tf.placeholder("string")

    net = RF.SDE_model(X,
                       depth,
                       time_list,
                       W_list,
                       task_name,
                       hypernet,
                       test=False)
    test_net = RF.SDE_model(X,
                            depth,
                            time_list,
                            W_list,
                            task_name,
                            hypernet,
                            test=True)

    sess = tf.Session()
    beta = 1e-3

    cross_entropy = -tf.reduce_sum(
        Y * tf.log(tf.clip_by_value(net, 1e-10, 1.0)))
    suml2 = tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
    loss = cross_entropy + beta * suml2
    #opt = tf.train.MomentumOptimizer(learning_rate, 0.9)
    var_name_list1 = ["W_conv", "b_conv"] + hypernet_variable[0]
    var_name_list2 = ["W_fc1", "b_fc1", "W_fc2", "b_fc2", "W_fc3", "b_fc3"]

    #train_op = None

    correct_prediction = tf.equal(tf.argmax(test_net, 1), tf.argmax(Y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    saver = tf.train.Saver()
    batch_size = args.batch_size
    num_data = X_train.shape[0]

    # with tf.variable_scope("scope", reuse=True ):
    #    var_list1 = [ tf.get_variable(name=x) for x in var_name_list1 ]
    #    var_list2 = [ tf.get_variable(name=x) for x in var_name_list2 ]

    #    if task_name == "ResNet" or task_name =="ResNet_test" or task_name =="Stochastic_Depth":
    #        learning_late = 1e-4
    #    else:
    #        learning_late = 1e-6
    #    train_op1 = tf.train.MomentumOptimizer( 1e-6 , 0.9 ).minimize(cross_entropy,var_list = var_list1 )  # tf.train.GradientDescentOptimizer(0.000001)
    #    train_op2 = tf.train.MomentumOptimizer( 1e-6 , 0.9 ).minimize(cross_entropy,var_list = var_list2 ) # tf.train.GradientDescentOptimizer(0.0001)
    # tf.group(train_op1, train_op2)  # tf.train.GradientDescentOptimizer( 1e-6 ).minimize(cross_entropy) #
    train_op = tf.train.MomentumOptimizer(args.learning_rate,
                                          0.9).minimize(loss)

    sess.run(tf.global_variables_initializer())

    print(tf.trainable_variables())
    late_ad = 1.0
    for j in range(epoch):
        sff_idx = np.random.permutation(num_data)
        if j < 20:
            late_ad = 1.0
        elif j < 40:
            late_ad = 0.1
        elif j < 60:
            late_ad = 0.01
        else:
            late_ad = 0.001

        for idx in range(0, num_data, batch_size):
            batch_x = X_train[sff_idx[idx:idx + batch_size if idx +
                                      batch_size < num_data else num_data]]
            batch_y = Y_train[sff_idx[idx:idx + batch_size if idx +
                                      batch_size < num_data else num_data]]

            t, W = RF.tW_def(depth, task_name)

            feed_dict_train = {
                X: batch_x,
                Y: batch_y,
                learning_rate: args.learning_rate * late_ad,
                time_list: t,
                W_list: W,
                task_name_tr: task_name
            }

            # print(sess.run(net,feed_dict=feed_dict_train))
            #print(sess.run(tf.argmax(net, 1),feed_dict=feed_dict_train))

            sess.run([train_op], feed_dict=feed_dict_train)
            # for z in (RF.Z_imagetest):
            #print(sess.run(net,feed_dict= feed_dict_train))
            #assert(not np.isnan(sess.run(z,feed_dict=feed_dict_train)).any())
            #count += 1
        elapsed = time.time() - start_time
        print("epoch %d end : %.3f seconds elapsed " % (j, elapsed))

        # if j % 512 == 0:
        #    a=1
        if True or j == 0 or j % 10 == 9 or j + 1 == EPOCH:  # 最初 , 10回ごと , 最後 のどれかならテストしてみる
            t_test, W_test = RF.tW_def(depth, "test")
            if task_name == "ResNet" or task_name == "Stochastic_Depth":
                task_name_test = "ResNet_test"
            else:
                task_name_test = "test"
            feed_dict_test = {
                X: X_test,
                Y: Y_test,
                time_list: t_test,
                W_list: W_test,
                task_name_tr: task_name_test
            }
            if SAVE_ENABLE:
                print("saving checkpoint...")
                saver.save(
                    sess, "model/model.ckpt" + str(task_name) + "step" +
                    str(j) + datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
                print("saved!")
            acc = 0
            for i in range(10):
                feed_dict_test = {
                    X: X_test_m[i],
                    Y: Y_test_m[i],
                    time_list: t_test,
                    W_list: W_test,
                    task_name_tr: task_name_test
                }
                acc += sess.run(accuracy, feed_dict=feed_dict_test)
            acc = acc / 10.0
            print("accuracy after epoch %d : %.3f " % (j, acc), flush=True)
        # accuracy_summary = tf.scalar_summary("accuracy", accuracy)
    # ここからパラメータ数計算および列挙
    total_parameters = 0
    parameters_string = ""
    for variable in tf.trainable_variables():
        sess.run(
            tf.verify_tensor_all_finite(variable,
                                        "NaN  in : %s \n" % variable.name))
        shape = variable.get_shape()
        variable_parameters = 1
        for dim in shape:
            variable_parameters *= dim.value
        total_parameters += variable_parameters
        if len(shape) == 1:
            parameters_string += ("%s %d, " %
                                  (variable.name, variable_parameters))
        else:
            parameters_string += (
                "%s %s=%d, " %
                (variable.name, str(shape), variable_parameters))

    print(parameters_string)
    print("Total %d variables, %s params" %
          (len(tf.trainable_variables()), "{:,}".format(total_parameters)))
    sess.close()
    def loss_layer(self, idx, predicts, labels):
        predict_classes = tf.reshape(
            predicts[:, :self.boundary1],
            [self.batch_size, self.cell_size, self.cell_size, self.num_class])
        predict_scales = tf.reshape(predicts[:, self.boundary1:self.boundary2],
                                    [
                                        self.batch_size, self.cell_size,
                                        self.cell_size, self.boxes_per_cell
                                    ])
        predict_boxes = tf.reshape(predicts[:, self.boundary2:], [
            self.batch_size, self.cell_size, self.cell_size,
            self.boxes_per_cell, 4
        ])
        response = tf.reshape(
            labels[:, :, :,
                   0], [self.batch_size, self.cell_size, self.cell_size, 1])
        boxes = tf.reshape(
            labels[:, :, :, 1:5],
            [self.batch_size, self.cell_size, self.cell_size, 1, 4])
        boxes = tf.tile(boxes,
                        [1, 1, 1, self.boxes_per_cell, 1]) / self.image_size
        classes = labels[:, :, :, 5:]

        offset = tf.constant(self.offset, dtype=tf.float32)
        offset = tf.reshape(
            offset, [1, self.cell_size, self.cell_size, self.boxes_per_cell])
        offset = tf.tile(offset, [self.batch_size, 1, 1, 1])
        predict_boxes_tran = tf.stack([
            (predict_boxes[:, :, :, :, 0] + offset) / self.cell_size,
            (predict_boxes[:, :, :, :, 1] + tf.transpose(offset,
                                                         (0, 2, 1, 3))) /
            self.cell_size,
            tf.square(predict_boxes[:, :, :, :, 2]),
            tf.square(predict_boxes[:, :, :, :, 3])
        ])
        predict_boxes_tran = tf.transpose(predict_boxes_tran, [1, 2, 3, 4, 0])

        # TODO remove
        # predict_boxes = tf.Print(predict_boxes, [predict_boxes], "predict_boxes = ", -1, 490)
        # boxes = tf.Print(boxes, [boxes], "boxes = ", -1, 490)

        iou_predict_truth = self.calculate_iou(predict_boxes_tran, boxes)

        # calculate I tensor [BATCH_SIZE, CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
        object_mask = tf.reduce_max(iou_predict_truth, 3, keep_dims=True)
        object_mask = tf.cast(
            (iou_predict_truth >= object_mask), tf.float32) * response
        # mask = tf.tile(response, [1, 1, 1, self.boxes_per_cell])

        # calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL]
        noobject_mask = tf.ones_like(object_mask,
                                     dtype=tf.float32) - object_mask

        boxes_tran = tf.stack([
            boxes[:, :, :, :, 0] * self.cell_size - offset,
            boxes[:, :, :, :, 1] * self.cell_size -
            tf.transpose(offset, (0, 2, 1, 3)),
            tf.sqrt(boxes[:, :, :, :, 2]),
            tf.sqrt(boxes[:, :, :, :, 3])
        ])
        boxes_tran = tf.transpose(boxes_tran, [1, 2, 3, 4, 0])

        # class_loss
        class_loss = tf.reduce_mean(tf.reduce_sum(tf.square(
            response * (predict_classes - classes)),
                                                  reduction_indices=[1, 2, 3]),
                                    name='class_loss') * self.class_scale

        # object_loss
        object_loss = tf.reduce_mean(tf.reduce_sum(
            tf.square(object_mask * (predict_scales - iou_predict_truth)),
            reduction_indices=[1, 2, 3]),
                                     name='object_loss') * self.object_scale

        # noobject_loss
        noobject_loss = tf.reduce_mean(
            tf.reduce_sum(tf.square(noobject_mask * predict_scales),
                          reduction_indices=[1, 2, 3]),
            name='noobject_loss') * self.noobject_scale

        # coord_loss
        coord_mask = tf.expand_dims(object_mask, 4)

        # TODO remove
        # coord_mask = tf.Print(coord_mask, [coord_mask], "coord_mask = ", -1, 100000)
        # predict_boxes_without_negative = tf.nn.relu(predict_boxes, name=None)
        # boxes_tran_without_negative = tf.nn.relu(boxes_tran, name=None)

        # TODO remove
        # predict_boxes_without_negative = tf.Print(predict_boxes_without_negative, [predict_boxes_without_negative], "predict_boxes_without_negative = ", -1, 490)
        # boxes_tran_without_negative = tf.Print(boxes_tran_without_negative, [boxes_tran_without_negative], "boxes_tran_without_negative = ", -1, 490)

        boxes_delta = coord_mask * (predict_boxes - boxes_tran)

        coord_loss = tf.reduce_mean(tf.reduce_sum(
            tf.square(boxes_delta), reduction_indices=[1, 2, 3, 4]),
                                    name='coord_loss') * self.coord_scale

        # TODO remove
        # boxes_delta = tf.Print(boxes_delta, [boxes_delta[:, :, :, :, 0]],
        #                                       "boxes_delta_x = ", -1, 490)
        # boxes_delta = tf.Print(boxes_delta, [boxes_delta[:, :, :, :, 1]],
        #                                       "boxes_delta_y = ", -1, 490)
        # boxes_delta = tf.Print(boxes_delta, [boxes_delta[:, :, :, :, 2]],
        #                                       "boxes_delta_w = ", -1, 490)
        # boxes_delta= tf.Print(boxes_delta, [boxes_delta[:, :, :, :, 3]],
        #                                       "boxes_delta_h = ", -1, 490)

        # checks for NaN and inf
        tf.verify_tensor_all_finite(class_loss, "class_loss")
        tf.verify_tensor_all_finite(object_loss, "object_loss")
        tf.verify_tensor_all_finite(noobject_loss, "noobject_loss")
        tf.verify_tensor_all_finite(coord_loss, "coord_loss")

        tf.verify_tensor_all_finite(boxes_delta[:, :, :, :, 0],
                                    "boxes_delta_x")
        tf.verify_tensor_all_finite(boxes_delta[:, :, :, :, 1],
                                    "boxes_delta_y")
        tf.verify_tensor_all_finite(boxes_delta[:, :, :, :, 2],
                                    "boxes_delta_w")
        tf.verify_tensor_all_finite(boxes_delta[:, :, :, :, 3],
                                    "boxes_delta_h")
        tf.verify_tensor_all_finite(iou_predict_truth, "iou")

        # TODO remove
        # prints values of loss
        # class_loss = tf.Print(class_loss, [class_loss], "class_loss = ", -1, 490)
        # object_loss = tf.Print(object_loss, [object_loss], "object_loss = ", -1, 490)
        # noobject_loss = tf.Print(noobject_loss, [noobject_loss], "noobject_loss = ", -1, 490)
        # coord_loss = tf.Print(coord_loss, [coord_loss], "coord_loss = ", -1, 490)

        # for summary in tensorboard
        tf.summary.scalar(self.phase + '/class_loss', class_loss)
        tf.summary.scalar(self.phase + '/object_loss', object_loss)
        tf.summary.scalar(self.phase + '/noobject_loss', noobject_loss)
        tf.summary.scalar(self.phase + '/coord_loss', coord_loss)

        tf.summary.histogram(self.phase + '/boxes_delta_x',
                             boxes_delta[:, :, :, :, 0])
        tf.summary.histogram(self.phase + '/boxes_delta_y',
                             boxes_delta[:, :, :, :, 1])
        tf.summary.histogram(self.phase + '/boxes_delta_w',
                             boxes_delta[:, :, :, :, 2])
        tf.summary.histogram(self.phase + '/boxes_delta_h',
                             boxes_delta[:, :, :, :, 3])
        tf.summary.histogram(self.phase + '/iou', iou_predict_truth)

        return class_loss + object_loss + noobject_loss + coord_loss
Beispiel #43
0
 def check_legal_inputs(tensor, name):
     return tf.verify_tensor_all_finite(tensor, "ERR: Tensor not finite - " + name,
                                        name=name)