Example #1
0
        def fztloss( f, pVecs, nVecs ):
            """
            Tensorized cost function from Fast Zero-Shot Learning paper

            Args:
                f: The output from the network, a tensor of shape (# images, word embedding size)
                pVecs: The vector embeddings of the ground truth tags, a tensor
                    of shape (# images, # positive tags, word embedding size)
                nVecs: The vector embeddings of negatively sampled tags, a tensor
                    of shape (# images, # negative samples, word embedding size)

            Returns:
                Scalar tensor representing the batch cost
            """
            posmul = tf.mul(pVecs, f)
            negmul = tf.mul(nVecs, f)

            tfpos = tf.reduce_sum(posmul, reduction_indices=2)
            tfneg = tf.reduce_sum(negmul, reduction_indices=2)

            tfpos = tf.transpose(tfpos, [1,0])
            tfneg = tf.transpose(tfneg, [1,0])

            negexpan = tf.tile( tf.expand_dims(tfneg, -1), [1, 1, tf.shape(tfpos)[1]] )
            posexpan = tf.tile( tf.transpose(tf.expand_dims(tfpos, -1), [0,2,1]), [1, tf.shape(tfneg)[1], 1])
            differences = tf.sub(negexpan, posexpan)  

            return tf.reduce_sum(tf.reduce_sum(tf.log(1 + tf.exp(differences)), reduction_indices=[1,2]))
Example #2
0
def bidiag_matmul(matrix, alpha, beta, adjoint_b=False, name="bidiag_matmul"):
  """Multiplies a matrix by a bidiagonal matrix.

  alpha and beta are length k vectors representing the diagonal and first lower
  subdiagonal of (K+1) x K matrix B.
  If adjoint_b is False, computes A * B as follows:

    A * B =  A[:, :-1] * diag(alpha) + A[:, 1:] * diag(beta)

  If  adjoint_b is True, computes A * B[:-1, :]' as follows

    A * B[:-1, :]' =
      A * diag(alpha) + [zeros(m,1), A[:, :-1] * diag(beta[:-1])]

  Args:
    matrix: A rank-2 `Tensor` representing matrix A.
    alpha: A rank-1 `Tensor` representing the diagonal of B.
    beta: A rank-1 `Tensor` representing the lower subdiagonal diagonal of B.
    adjoint_b: `bool` determining what to compute.
    name: A name scope for the operation.

  Returns:
    If `adjoint_b` is False the `A * B` is returned.
    If `adjoint_b` is True the `A * B'` is returned.
  """
  with tf.name_scope(name):
    alpha = tf.expand_dims(alpha, 0)
    if adjoint_b is False:
      beta = tf.expand_dims(beta, 0)
      return matrix[:, :-1] * alpha + matrix[:, 1:] * beta
    else:
      beta = tf.expand_dims(beta[:-1], 0)
      shape = tf.shape(matrix)
      zero_column = tf.expand_dims(tf.zeros(shape[:1], dtype=matrix.dtype), 1)
      return matrix * alpha + tf.concat(1, [zero_column, matrix[:, :-1] * beta])
Example #3
0
def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
  """Overlay bounding box list on image.

  Currently this visualization plots a 1 pixel thick red bounding box on top
  of the image.  Note that tf.image.draw_bounding_boxes essentially is
  1 indexed.

  Args:
    image: an image tensor with shape [height, width, 3]
    boxlist: a BoxList
    normalized: (boolean) specify whether corners are to be interpreted
      as absolute coordinates in image space or normalized with respect to the
      image size.
    scope: name scope.

  Returns:
    image_and_boxes: an image tensor with shape [height, width, 3]
  """
  with tf.name_scope(scope, 'VisualizeBoxesInImage'):
    if not normalized:
      height, width, _ = tf.unstack(tf.shape(image))
      boxlist = scale(boxlist,
                      1.0 / tf.cast(height, tf.float32),
                      1.0 / tf.cast(width, tf.float32))
    corners = tf.expand_dims(boxlist.get(), 0)
    image = tf.expand_dims(image, 0)
    return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0])
Example #4
0
                def copy_net_logit_function(state):
                    state = tf.nn.dropout(state, self.dropout_placeholder)

                    # the logits for generating the next word are computed in
                    # the standard way
                    generate_logits = tf.matmul(state, decoding_w) + decoding_b

                    # Equation 8 in the paper ... in shape of source sentence
                    # (batch x time)
                    copy_logits_in_time = tf.reduce_sum(
                        projected_inputs * tf.expand_dims(state, 1), [2])

                    # mask out the padding in exponential domain
                    copy_logits_in_time_exp_masked = tf.exp(
                        tf.minimum([[80.0]], copy_logits_in_time)) * copy_mask

                    #  ... in shape of vocabulary (batch x time x vocabulary)
                    copy_logits_in_vocabulary = tf.expand_dims(
                        copy_logits_in_time_exp_masked,
                        2) * vocabulary_shaped_indices

                    # Equation 6 without normalization
                    copy_logits_exp = tf.reduce_sum(copy_logits_in_vocabulary,
                                                    [1])

                    logits_exp = copy_logits_exp \
                                 + tf.exp(tf.minimum([[80.0]], generate_logits))

                    return (tf.log(tf.maximum([[1e-40]], logits_exp)),
                            copy_logits_in_time)
Example #5
0
  def encode_coordinates_alt(self, net):
    """An alternative implemenation for the encoding coordinates.

    Args:
      net: a tensor of shape=[batch_size, height, width, num_features]

    Returns:
      a list of tensors with encoded image coordinates in them.
    """
    batch_size, h, w, _ = net.shape.as_list()
    h_loc = [
      tf.tile(
          tf.reshape(
              tf.contrib.layers.one_hot_encoding(
                  tf.constant([i]), num_classes=h), [h, 1]), [1, w])
      for i in xrange(h)
    ]
    h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2)
    w_loc = [
      tf.tile(
          tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w),
          [h, 1]) for i in xrange(w)
    ]
    w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2)
    loc = tf.concat([h_loc, w_loc], 2)
    loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1])
    return tf.concat([net, loc], 3)
  def __call__(self, x, z_grads):
    """Build the graph for the per-example gradient through the op.

    Assumes that the MatMul was called with a design matrix with examples
    in rows as the first argument and parameters as the second argument.

    Args:
      x: The Tensor to differentiate with respect to. This tensor must
         represent the weights.
      z_grads: The list of gradients on the output of the op.

    Returns:
      x_grads: A Tensor containing the gradient with respect to `x` for
       each example. This is a 3-D tensor, with the first axis corresponding
       to examples and the remaining axes matching the shape of x.
    """
    idx = list(self.op.inputs).index(x)
    assert idx != -1
    assert len(z_grads) == len(self.op.outputs)
    assert idx == 1 # We expect weights to be arg 1
    # We don't expect anyone to per-example differentiate with repsect
    # to anything other than the weights.
    x, w = self.op.inputs
    z_grads, = z_grads
    x_expanded = tf.expand_dims(x, 2)
    z_grads_expanded = tf.expand_dims(z_grads, 1)
    return tf.mul(x_expanded, z_grads_expanded)
Example #7
0
def bond_conv_layer(activated_atoms, bv_params, layer):
    flow_depth = flow_layer_depths[layer]
    
    next_activated_atoms = tf.zeros(tf.pack([N_atoms_ph, flow_depth]))

    for deg in range(1, 6):
        indices = tf.sub(deg_list_ph, tf.constant(1,dtype=tf.int32))
        flow_param = bv_params['A_flow'+str(layer)+'_'+str(deg)]
        flow_map = tf.gather(flow_param, type_adj_ph)

        multiples = tf.pack([N_atoms_ph, 1, 1])
        activated_atoms_dim = tf.expand_dims(tf.tile(tf.expand_dims(activated_atoms, 0), multiples), 2)

        adj_mul = tf.batch_matmul(activated_atoms_dim, flow_map)
        adj_mul = tf.squeeze(adj_mul, [2])

        deg_mask = tf.to_float(tf.equal(deg_list_ph, deg))

        multiples = tf.pack([1, N_atoms_ph, flow_depth])
        deg_list_dim = tf.tile(tf.expand_dims(tf.expand_dims(deg_mask, 1), 1), multiples)

        multiples = tf.pack([N_atoms_ph, N_atoms_ph, 1])
        biases = tf.tile(bv_params['b_flow'+str(layer)+'_'+str(deg)], multiples)
        filtered_atoms = tf.add(tf.mul(adj_mul, deg_list_dim), biases)

        next_activated_atoms = next_activated_atoms + tf.reduce_sum(filtered_atoms, 1)
        
    next_activated_atoms = tf.nn.relu(next_activated_atoms)
    return next_activated_atoms
def loss(logits, labels):
  """Calculates the loss from the logits and the labels.

  Args:
    logits: Logits tensor, float - [batch_size, NUM_CLASSES].
    labels: Labels tensor, int32 - [batch_size].

  Returns:
    loss: Loss tensor of type float.
  """
  # Convert from sparse integer labels in the range [0, NUM_CLASSES)
  # to 1-hot dense float vectors (that is we will have batch_size vectors,
  # each with NUM_CLASSES values, all of which are 0.0 except there will
  # be a 1.0 in the entry corresponding to the label).
  batch_size = tf.size(labels)
  labels = tf.expand_dims(labels, 1)
  indices = tf.expand_dims(tf.range(0, batch_size), 1)
  concated = tf.concat(1, [indices, labels])
  onehot_labels = tf.sparse_to_dense(
      concated, tf.pack([batch_size, NUM_CLASSES]), 1.0, 0.0)
  cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                          onehot_labels,
                                                          name='xentropy')
  loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
  return loss
Example #9
0
  def call(self, x):
    """Execute this layer on input tensors.

    Parameters
    ----------
    x: list of Tensor 
      should be [atom_features(batch_size*max_n_atoms*n_embedding), 
                 distance_matrix(batch_size*max_n_atoms*max_n_atoms*n_distance), 
                 distance_matrix_mask(batch_size*max_n_atoms*max_n_atoms)]

    Returns
    -------
    tf.Tensor
      new embeddings for atoms, same shape as x[0]
    """
    self.build()
    atom_features = x[0]
    distance_matrix = x[1]
    distance_matrix_mask = x[2]
    outputs = tf.multiply(
        (tf.tensordot(distance_matrix, self.W_df, [[3], [0]]) + self.b_df),
        tf.expand_dims(
            tf.tensordot(atom_features, self.W_cf, [[2], [0]]) + self.b_cf,
            axis=1))
    # for atom i in a molecule m, this step multiplies together distance info of atom pair(i,j)
    # and embeddings of atom j(both gone through a hidden layer)
    outputs = tf.tensordot(outputs, self.W_fc, [[3], [0]])
    outputs = tf.multiply(outputs, tf.expand_dims(distance_matrix_mask, axis=3))
    # masking the outputs tensor for pair(i,i) and all paddings
    outputs = self.activation(outputs)
    outputs = tf.reduce_sum(outputs, axis=2) + atom_features
    # for atom i, sum the influence from all other atom j in the molecule

    return outputs
Example #10
0
  def _define_distance_to_clusters(self, data):
    """Defines the Mahalanobis distance to the assigned Gaussian."""
    # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input -
    # mean) from log probability function.
    self._all_scores = []
    for shard in data:
      all_scores = []
      shard = tf.expand_dims(shard, 0)
      for c in xrange(self._num_classes):
        if self._covariance_type == FULL_COVARIANCE:
          cov = self._covs[c, :, :]
        elif self._covariance_type == DIAG_COVARIANCE:
          cov = tf.diag(self._covs[c, :])
        inverse = tf.matrix_inverse(cov + self._min_var)
        inv_cov = tf.tile(
            tf.expand_dims(inverse, 0),
            tf.pack([self._num_examples, 1, 1]))
        diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2])
        m_left = tf.batch_matmul(diff, inv_cov)
        all_scores.append(tf.sqrt(tf.batch_matmul(
            m_left, tf.transpose(diff, perm=[0, 2, 1])
        )))
      self._all_scores.append(tf.reshape(
          tf.concat(1, all_scores),
          tf.pack([self._num_examples, self._num_classes])))

    # Distance to the associated class.
    self._all_scores = tf.concat(0, self._all_scores)
    assignments = tf.concat(0, self.assignments())
    rows = tf.to_int64(tf.range(0, self._num_examples))
    indices = tf.concat(1, [tf.expand_dims(rows, 1),
                            tf.expand_dims(assignments, 1)])
    self._scores = tf.gather_nd(self._all_scores, indices)
Example #11
0
def dot(x, y):
    """Compute dot product between a Tensor matrix and a Tensor vector.

    If x is a ``[M x N]`` matrix, then y is a ``M``-vector.

    If x is a ``M``-vector, then y is a ``[M x N]`` matrix.

    Parameters
    ----------
    x : tf.Tensor
        ``M x N`` matrix or ``M`` vector (see above)
    y : tf.Tensor
        ``M`` vector or ``M x N`` matrix (see above)

    Returns
    -------
    tf.Tensor
        ``N``-vector
    """
    if len(x.get_shape()) == 1:
        vec = x
        mat = y
        return tf.matmul(tf.expand_dims(vec, 0), mat)
    else:
        mat = x
        vec = y
        return tf.matmul(mat, tf.expand_dims(vec, 1))
Example #12
0
    def __init__(self, memory_cells, query, project_query=False):
        """Define Attention.

        Args:
            memory_cells (SequenceBatch): a SequenceBatch containing a Tensor of shape (batch_size, num_cells, cell_dim)
            query (Tensor): a tensor of shape (batch_size, query_dim).
            project_query (bool): defaults to False. If True, the query goes through an extra projection layer to
                coerce it to cell_dim.
        """
        cell_dim = memory_cells.values.get_shape().as_list()[2]
        if project_query:
            # project the query up/down to cell_dim
            self._projection_layer = Dense(cell_dim, activation='linear')
            query = self._projection_layer(query)  # (batch_size, cand_dim)

        memory_values, memory_mask = memory_cells.values, memory_cells.mask

        # batch matrix multiply to compute logit scores for all choices in all batches
        query = tf.expand_dims(query, 2)  # (batch_size, cell_dim, 1)
        logit_values = tf.batch_matmul(memory_values, query)  # (batch_size, num_cells, 1)
        logit_values = tf.squeeze(logit_values, [2])  # (batch_size, num_cells)

        # set all pad logits to negative infinity
        logits = SequenceBatch(logit_values, memory_mask)
        logits = logits.with_pad_value(-float('inf'))

        # normalize to get probs
        probs = tf.nn.softmax(logits.values)  # (batch_size, num_cells)

        retrieved = tf.batch_matmul(tf.expand_dims(probs, 1), memory_values)  # (batch_size, 1, cell_dim)
        retrieved = tf.squeeze(retrieved, [1])  # (batch_size, cell_dim)

        self._logits = logits.values
        self._probs = probs
        self._retrieved = retrieved
Example #13
0
def softmax(x):
  """
  Compute the softmax function in tensorflow.

  You might find the tensorflow functions tf.exp, tf.reduce_max,
  tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may
  not need to use all of these functions). Recall also that many common
  tensorflow operations are sugared (e.g. x * y does a tensor multiplication
  if x and y are both tensors). Make sure to implement the numerical stability
  fixes as in the previous homework!

  Args:
    x:   tf.Tensor with shape (n_samples, n_features). Note feature vectors are
         represented by row-vectors. (For simplicity, no need to handle 1-d
         input as in the previous homework)
  Returns:
    out: tf.Tensor with shape (n_sample, n_features). You need to construct this
         tensor in this problem.
  """

  ### YOUR CODE HERE
  maxes = tf.expand_dims(tf.reduce_max(x, reduction_indices=[1]), 1)
  stable = x - maxes
  e = tf.exp(stable)
  sums = tf.expand_dims(tf.reduce_sum(e, reduction_indices=[1]), 1)
  out = tf.div(e, sums)
  ### END YOUR CODE
  
  return out 
Example #14
0
 def _build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = len(self.feature)
     err = self.Y - self.mean_function(self.X)
     Kuf = self.feature.Kuf(self.kern, self.X)
     Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
     Kus = self.feature.Kuf(self.kern, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tmp2, c, transpose_a=True)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
               - tf.matmul(tmp1, tmp1, transpose_a=True)
         shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
               - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.stack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
Example #15
0
def _mean_image_subtraction(image, means):
  """Subtracts the given means from each image channel.

  For example:
    means = [123.68, 116.779, 103.939]
    image = _mean_image_subtraction(image, means)

  Note that the rank of `image` must be known.

  Args:
    image: a tensor of size [height, width, C].
    means: a C-vector of values to subtract from each channel.

  Returns:
    the centered image.

  Raises:
    ValueError: If the rank of `image` is unknown, if `image` has a rank other
      than three or if the number of channels in `image` doesn't match the
      number of values in `means`.
  """
  if image.get_shape().ndims != 3:
    raise ValueError('Input must be of size [height, width, C>0]')
  num_channels = image.get_shape().as_list()[-1]
  if len(means) != num_channels:
    raise ValueError('len(means) must match the number of channels')

  # We have a 1-D tensor of means; convert to 3-D.
  means = tf.expand_dims(tf.expand_dims(means, 0), 0)

  return image - means
Example #16
0
def ValidArcAndTokenMasks(lengths, max_length, dtype=tf.float32):
  r"""Returns 0/1 masks for valid arcs and tokens.

  Args:
    lengths: [B] vector of input sequence lengths.
    max_length: Scalar maximum input sequence length, aka M.
    dtype: Data type for output mask.

  Returns:
    [B,M,M] tensor A with 0/1 indicators of valid arcs.  Specifically,
      A_{b,t,s} = t,s < lengths[b] ? 1 : 0
    [B,M] matrix T with 0/1 indicators of valid tokens.  Specifically,
      T_{b,t} = t < lengths[b] ? 1 : 0
  """
  lengths_bx1 = tf.expand_dims(lengths, 1)
  sequence_m = tf.range(tf.cast(max_length, lengths.dtype.base_dtype))
  sequence_1xm = tf.expand_dims(sequence_m, 0)

  # Create vectors of 0/1 indicators for valid tokens.  Note that the comparison
  # operator will broadcast from [1,M] and [B,1] to [B,M].
  valid_token_bxm = tf.cast(sequence_1xm < lengths_bx1, dtype)

  # Compute matrices of 0/1 indicators for valid arcs as the outer product of
  # the valid token indicator vector with itself.
  valid_arc_bxmxm = tf.matmul(
      tf.expand_dims(valid_token_bxm, 2), tf.expand_dims(valid_token_bxm, 1))

  return valid_arc_bxmxm, valid_token_bxm
Example #17
0
def build_psi_stats_rbf_plus_linear(Z, kern, mu, S):
    # TODO: make sure the acvite dimensions are overlapping completely

    # use only active dimensions
    mu, S = kern._slice(mu, S)  # only use the active dimensions.
    Z, _ = kern._slice(Z, None)

    psi0_lin, psi1_lin, psi2_lin = build_psi_stats_linear(Z, kern.linear, mu, S)
    psi0_rbf, psi1_rbf, psi2_rbf = build_psi_stats_rbf(Z, kern.rbf, mu, S)
    psi0, psi1, psi2 = psi0_lin + psi0_rbf, psi1_lin + psi1_rbf, psi2_lin + psi2_rbf

    # extra terms for the 'interaction' of linear and rbf
    l2 = tf.square(kern.rbf.lengthscales)
    A = tf.expand_dims(1./S + 1./l2, 1)  # N x 1 x Q
    m = (tf.expand_dims(mu/S, 1) + tf.expand_dims(Z/l2, 0)) / A  # N x M x Q
    mTAZ = tf.reduce_sum(tf.expand_dims(m * kern.linear.variance, 1) *
                         tf.expand_dims(tf.expand_dims(Z, 0), 0), 3)  # N x M x M
    Z2 = tf.reduce_sum(tf.square(Z) / l2, 1)  # M,
    mu2 = tf.reduce_sum(tf.square(mu) / S, 1)  # N
    mAm = tf.reduce_sum(tf.square(m) * A, 2)  # N x M
    exp_term = tf.exp(-(tf.reshape(Z2, (1, -1)) + tf.reshape(mu2, (-1, 1))-mAm) / 2.)  # N x M
    psi2_extra = tf.reduce_sum(kern.rbf.variance *
                               tf.expand_dims(exp_term, 2) *
                               tf.expand_dims(tf.expand_dims(tf.reduce_prod(S, 1), 1), 2) *
                               tf.expand_dims(tf.reduce_prod(A, 2), 1) *
                               mTAZ, 0)

    psi2 = psi2 + psi2_extra + tf.transpose(psi2_extra)
    return psi0, psi1, psi2
Example #18
0
  def _testGraphExtensionRestore(self):
    test_dir = os.path.join(self.get_temp_dir(), "graph_extension")
    filename = os.path.join(test_dir, "metafile")
    saver0_ckpt = os.path.join(test_dir, "saver0.ckpt")
    with self.test_session(graph=tf.Graph()) as sess:
      # Restores from MetaGraphDef.
      new_saver = tf.train.import_meta_graph(filename)
      # Generates a new MetaGraphDef.
      new_saver.export_meta_graph()
      # Restores from checkpoint.
      new_saver.restore(sess, saver0_ckpt)
      # Addes loss and train.
      labels = tf.constant(0, tf.int32, shape=[100], name="labels")
      batch_size = tf.size(labels)
      labels = tf.expand_dims(labels, 1)
      indices = tf.expand_dims(tf.range(0, batch_size), 1)
      concated = tf.concat(1, [indices, labels])
      onehot_labels = tf.sparse_to_dense(
          concated, tf.pack([batch_size, 10]), 1.0, 0.0)
      logits = tf.get_collection("logits")[0]
      cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                              onehot_labels,
                                                              name="xentropy")
      loss = tf.reduce_mean(cross_entropy, name="xentropy_mean")

      tf.scalar_summary(loss.op.name, loss)
      # Creates the gradient descent optimizer with the given learning rate.
      optimizer = tf.train.GradientDescentOptimizer(0.01)

      # Runs train_op.
      train_op = optimizer.minimize(loss)
      sess.run(train_op)
Example #19
0
  def body(self, features):
    hp = self.hparams
    block_fns = {
        "residual": residual_block,
        "bottleneck": bottleneck_block,
    }
    assert hp.block_fn in block_fns

    inputs = features["inputs"]

    data_format = "channels_last"
    if hp.use_nchw:
      # Convert from channels_last (NHWC) to channels_first (NCHW). This
      # provides a large performance boost on GPU.
      inputs = tf.transpose(inputs, [0, 3, 1, 2])
      data_format = "channels_first"

    out = resnet_v2(
        inputs,
        block_fns[hp.block_fn],
        hp.layer_sizes,
        data_format,
        is_training=hp.mode == tf.estimator.ModeKeys.TRAIN)

    out = tf.expand_dims(out, 1)
    out = tf.expand_dims(out, 1)
    return out
def additive_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150,
                       scope='additive-attention', reuse=False):
    """
    For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn,
    where attn(i, j) = dot(v, tanh(W*a_i + W*b_j)).  v is a learnable vector and W is a learnable
    matrix. The rows of attn are softmax normalized.

    Args:
        a: Input sequence a.  Tensor of shape [batch_size, max_seq_len, input_size].
        b: Input sequence b.  Tensor of shape [batch_size, max_seq_len, input_size].
        a_lengths: Lengths of sequences in a.  Tensor of shape [batch_size].
        b_lengths: Lengths of sequences in b.  Tensor of shape [batch_size].
        max_seq_len: Length of padded sequences a and b.  Integer.
        hidden_units: Number of hidden units.  Integer.

    Returns:
        Attention matrix.  Tensor of shape [max_seq_len, max_seq_len].

    """
    with tf.variable_scope(scope, reuse=reuse):
        aW = time_distributed_dense_layer(a, hidden_units, bias=False, scope='dense', reuse=False)
        bW = time_distributed_dense_layer(b, hidden_units, bias=False, scope='dense', reuse=True)
        aW = tf.expand_dims(aW, 2)
        bW = tf.expand_dims(bW, 1)
        v = tf.get_variable(
            name='dot_weights',
            initializer=tf.variance_scaling_initializer(),
            shape=[hidden_units]
        )
        logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(aW + bW), v)
        logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2)
        attn = tf.exp(logits)
        attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len)
        return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
def multiplicative_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150,
                             scope='multiplicative-attention', reuse=False):
    """
    For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn,
    where attn(i, j) = dot(W*a_i, W*b_j).  W is a learnable matrix.  The rows of attn are
    softmax normalized.

    Args:
        a: Input sequence a.  Tensor of shape [batch_size, max_seq_len, input_size].
        b: Input sequence b.  Tensor of shape [batch_size, max_seq_len, input_size].
        a_lengths: Lengths of sequences in a.  Tensor of shape [batch_size].
        b_lengths: Lengths of sequences in b.  Tensor of shape [batch_size].
        max_seq_len: Length of padded sequences a and b.  Integer.
        hidden_units: Number of hidden units.  Integer.

    Returns:
        Attention matrix.  Tensor of shape [max_seq_len, max_seq_len].

    """
    with tf.variable_scope(scope, reuse=reuse):
        aW = time_distributed_dense_layer(a, hidden_units, bias=False, scope='dense', reuse=False)
        bW = time_distributed_dense_layer(b, hidden_units, bias=False, scope='dense', reuse=True)
        logits = tf.matmul(aW, tf.transpose(bW, (0, 2, 1)))
        logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2)
        attn = tf.exp(logits)
        attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len)
        return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
Example #22
0
  def radial_symmetry(self, d_cutoff, d, atom_numbers):
    """ Radial Symmetry Function """
    embedding = tf.eye(np.max(self.atom_cases) + 1)
    atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers)

    Rs = np.linspace(0., self.radial_cutoff, self.radial_length)
    ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2
    Rs = tf.cast(np.reshape(Rs, (1, 1, 1, -1)), tf.float32)
    ita = tf.cast(np.reshape(ita, (1, 1, 1, -1)), tf.float32)
    length = ita.get_shape().as_list()[-1]

    d_cutoff = tf.stack([d_cutoff] * length, axis=3)
    d = tf.stack([d] * length, axis=3)

    out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff
    if self.atomic_number_differentiated:
      out_tensors = []
      for atom_type in self.atom_cases:
        selected_atoms = tf.expand_dims(
            tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1),
            axis=3)
        out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2))
      return tf.concat(out_tensors, axis=2)
    else:
      return tf.reduce_sum(out, axis=2)
Example #23
0
  def reward_prediction_big(
      self, input_images, input_reward, action, latent, mid_outputs):
    """Builds a reward prediction network."""
    del mid_outputs
    conv_size = self.tinyify([32, 32, 16, 8])

    with tf.variable_scope("reward_pred", reuse=tf.AUTO_REUSE):
      x = tf.concat(input_images, axis=3)
      x = tfcl.layer_norm(x)

      if not self.hparams.small_mode:
        x = tfl.conv2d(x, conv_size[1], [3, 3], strides=(2, 2),
                       activation=tf.nn.relu, name="reward_conv1")
        x = tfcl.layer_norm(x)

      # Inject additional inputs
      if action is not None:
        x = common_video.inject_additional_input(
            x, action, "action_enc", self.hparams.action_injection)
      if input_reward is not None:
        x = common_video.inject_additional_input(x, input_reward, "reward_enc")
      if latent is not None:
        latent = tfl.flatten(latent)
        latent = tf.expand_dims(latent, axis=1)
        latent = tf.expand_dims(latent, axis=1)
        x = common_video.inject_additional_input(x, latent, "latent_enc")

      x = tfl.conv2d(x, conv_size[2], [3, 3], strides=(2, 2),
                     activation=tf.nn.relu, name="reward_conv2")
      x = tfcl.layer_norm(x)
      x = tfl.conv2d(x, conv_size[3], [3, 3], strides=(2, 2),
                     activation=tf.nn.relu, name="reward_conv3")
  def testExpandAndSqueeze(self):
    with self.cached_session():

      # TODO(aselle): sparse_split, sparse_reduce_sum,
      #  sparse_reduce_sum_sparse, reduce_join
      a = [[1, 2, 3]]
      self.assertAllEqual(tf.expand_dims(tf.squeeze(a, [0]), 0).eval(),
                          a)
      self.assertAllEqual(tf.squeeze(tf.expand_dims(a, 1), [1]).eval(),
                          a)
      self.assertAllEqual(
          tf.expand_dims(
              tf.squeeze(
                  [[1, 2, 3]], squeeze_dims=[0]), dim=0).eval(),
          a)
      self.assertAllEqual(
          tf.squeeze(
              tf.expand_dims(
                  [[1, 2, 3]], dim=1), squeeze_dims=[1]).eval(),
          a)

      self.assertAllEqual(
          tf.squeeze(
              tf.expand_dims(
                  [[1, 2, 3]], dim=1), squeeze_dims=[1]).eval(),
          a)
Example #25
0
def roc_auc_score(y_pred, y_true):
    """ ROC AUC Score.

    Approximates the Area Under Curve score, using approximation based on
    the Wilcoxon-Mann-Whitney U statistic.

    Yan, L., Dodier, R., Mozer, M. C., & Wolniewicz, R. (2003).
    Optimizing Classifier Performance via an Approximation to the Wilcoxon-Mann-Whitney Statistic.

    Measures overall performance for a full range of threshold levels.

    Arguments:
        y_pred: `Tensor`. Predicted values.
        y_true: `Tensor` . Targets (labels), a probability distribution.

    """
    with tf.name_scope("RocAucScore"):

        pos = tf.boolean_mask(y_pred, tf.cast(y_true, tf.bool))
        neg = tf.boolean_mask(y_pred, ~tf.cast(y_true, tf.bool))

        pos = tf.expand_dims(pos, 0)
        neg = tf.expand_dims(neg, 1)

        # original paper suggests performance is robust to exact parameter choice
        gamma = 0.2
        p     = 3

        difference = tf.zeros_like(pos * neg) + pos - neg - gamma

        masked = tf.boolean_mask(difference, difference < 0.0)

        return tf.reduce_sum(tf.pow(-masked, p))
Example #26
0
def train():
	image_name = tf.constant("lily.jpg", tf.string)
	image1 = uf.read_image(image_name, IMG_ROW, IMG_COL)
	image1 = tf.expand_dims(image1, 0)
	image2 = uf.read_image(image_name, IMG_ROW, IMG_COL)
	image2 = tf.expand_dims(image2, 0)
	image = tf.concat(0, (image1, image2))

	clstm = crnn.con_lstm_cell(BATCH_SIZE, IMG_ROW, IMG_COL, 3, 3, CELL_C)
	input_ = tf.placeholder(tf.float32, (BATCH_SIZE, IMG_ROW, IMG_COL, 3))
	inputs = []
	inputs.append(input_)
	inputs.append(input_)
	
	outputs, state = crnn.clstm_encode(clstm, inputs)

	sess = tf.Session()

	init_op = tf.initialize_all_variables()
	sess.run(init_op)

	for i in xrange(100):
		image_v = sess.run(image)
		feed_data = dict()
		feed_data[inputs[0]] = image_v
		feed_data[inputs[1]] = image_v
		outputs_v = sess.run(outputs, feed_dict = feed_data)
		print(outputs_v)
Example #27
0
def dna_transformation(prev_image, dna_input, dna_kernel_size, relu_shift):
  """Apply dynamic neural advection to previous image.

  Args:
    prev_image: previous image to be transformed.
    dna_input: hidden lyaer to be used for computing DNA transformation.
    dna_kernel_size: dna kernel size.
    relu_shift: shift for ReLU function.
  Returns:
    List of images transformed by the predicted CDNA kernels.
  """
  # Construct translated images.
  prev_image_pad = tf.pad(prev_image, [[0, 0], [2, 2], [2, 2], [0, 0]])
  image_height = int(prev_image.get_shape()[1])
  image_width = int(prev_image.get_shape()[2])

  inputs = []
  for xkern in range(dna_kernel_size):
    for ykern in range(dna_kernel_size):
      inputs.append(
          tf.expand_dims(
              tf.slice(prev_image_pad, [0, xkern, ykern, 0],
                       [-1, image_height, image_width, -1]), [3]))
  inputs = tf.concat(axis=3, values=inputs)

  # Normalize channels to 1.
  kernel = tf.nn.relu(dna_input - relu_shift) + relu_shift
  kernel = tf.expand_dims(
      kernel / tf.reduce_sum(kernel, [3], keep_dims=True), [4])
  return tf.reduce_sum(kernel * inputs, [3], keep_dims=False)
Example #28
0
 def build(self):
   """ tensorflow computation graph for transform """
   graph = tf.Graph()
   with graph.as_default():
     self.inputs = tf.placeholder(tf.float32, shape=(None, self.max_atoms, 4))
     atom_numbers = tf.cast(self.inputs[:, :, 0], tf.int32)
     flags = tf.sign(atom_numbers)
     flags = tf.cast(
         tf.expand_dims(flags, 1) * tf.expand_dims(flags, 2), tf.float32)
     coordinates = self.inputs[:, :, 1:]
     if self.coordinates_in_bohr:
       coordinates = coordinates * 0.52917721092
     d = self.distance_matrix(coordinates, flags)
     d_radial_cutoff = self.distance_cutoff(d, self.radial_cutoff, flags)
     d_angular_cutoff = self.distance_cutoff(d, self.angular_cutoff, flags)
     radial_sym = self.radial_symmetry(d_radial_cutoff, d, atom_numbers)
     angular_sym = self.angular_symmetry(d_angular_cutoff, d, atom_numbers,
                                         coordinates)
     self.outputs = tf.concat(
         [
             tf.cast(tf.expand_dims(atom_numbers, 2), tf.float32), radial_sym,
             angular_sym
         ],
         axis=2)
   return graph
Example #29
0
    def _att(self, context, context_encode, h):
        with tf.variable_scope('att') as scope:
            
            hidden_att_W = self._variable_trunc_normal('hidden_att_W',
                    [self.dim_hidden, self.dim_ctx])
            pre_att_b = self._variable_constant('pre_att_b',
                    [self.dim_ctx])
            att_W = self._variable_trunc_normal('att_W',
                    [self.dim_ctx, 1])
            att_b = self._variable_constant('att_b', [1])

            # evaluate context_encode (e_ti)
            context_encode = context_encode + \
                    tf.expand_dims(tf.matmul(h, hidden_att_W), 1) + \
                    pre_att_b
            context_encode = tf.nn.tanh(context_encode)
            context_encode_flat = tf.reshape(context_encode,
                    [self.batch_size*self.ctx_shape[0], self.dim_ctx])
            alpha = tf.reshape(
                    tf.matmul(context_encode_flat, att_W) + att_b,
                    [self.batch_size, self.ctx_shape[0]])
            alpha = tf.nn.softmax(alpha)
            weighted_context = tf.reduce_sum(context * \
                    tf.expand_dims(alpha, 2), 1)
        return weighted_context
Example #30
0
    def __init__(self, num_layers, num_units, batch_size, input_size, keep_prob=1.0):
        self.num_layers = num_layers
        self.grus = []
        self.inits = []
        self.dropout_mask = []
        for layer in range(num_layers):
            input_size_ = input_size if layer == 0 else 2 * num_units
            gru_fw = tf.nn.rnn_cell.MultiRNNCell([
                tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(num_units=num_units)])

            gru_bw = tf.nn.rnn_cell.MultiRNNCell([
                tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(num_units=num_units)])

            init_fw = tf.Variable(tf.zeros([num_units]))
            init_fw = tf.expand_dims(tf.tile(tf.expand_dims(init_fw, axis=0), [batch_size, 1]), axis=0)
            init_bw = tf.Variable(tf.zeros([num_units]))
            init_bw = tf.expand_dims(tf.tile(tf.expand_dims(init_bw, axis=0), [batch_size, 1]), axis=0)

            mask_fw = tf.nn.dropout(tf.ones([1, batch_size, input_size_], dtype=tf.float32),
                                    keep_prob=keep_prob)
            mask_bw = tf.nn.dropout(tf.ones([1, batch_size, input_size_], dtype=tf.float32),
                                    keep_prob=keep_prob)

            self.grus.append((gru_fw, gru_bw,))
            self.inits.append((init_fw, init_bw,))
            self.dropout_mask.append((mask_fw, mask_bw,))
Example #31
0
def attention_layer(from_tensor,
                    to_tensor,
                    attention_mask=None,
                    num_attention_heads=1,
                    size_per_head=512,
                    query_act=None,
                    key_act=None,
                    value_act=None,
                    attention_probs_dropout_prob=0.0,
                    initializer_range=0.02,
                    do_return_2d_tensor=False,
                    batch_size=None,
                    from_seq_length=None,
                    to_seq_length=None):
    """Performs multi-headed attention from `from_tensor` to `to_tensor`.

  This is an implementation of multi-headed attention based on "Attention
  is all you Need". If `from_tensor` and `to_tensor` are the same, then
  this is self-attention. Each timestep in `from_tensor` attends to the
  corresponding sequence in `to_tensor`, and returns a fixed-with vector.

  This function first projects `from_tensor` into a "query" tensor and
  `to_tensor` into "key" and "value" tensors. These are (effectively) a list
  of tensors of length `num_attention_heads`, where each tensor is of shape
  [batch_size, seq_length, size_per_head].

  Then, the query and key tensors are dot-producted and scaled. These are
  softmaxed to obtain attention probabilities. The value tensors are then
  interpolated by these probabilities, then concatenated back to a single
  tensor and returned.

  In practice, the multi-headed attention are done with transposes and
  reshapes rather than actual separate tensors.

  Args:
    from_tensor: float Tensor of shape [batch_size, from_seq_length,
      from_width].
    to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width].
    attention_mask: (optional) int32 Tensor of shape [batch_size,
      from_seq_length, to_seq_length]. The values should be 1 or 0. The
      attention scores will effectively be set to -infinity for any positions in
      the mask that are 0, and will be unchanged for positions that are 1.
    num_attention_heads: int. Number of attention heads.
    size_per_head: int. Size of each attention head.
    query_act: (optional) Activation function for the query transform.
    key_act: (optional) Activation function for the key transform.
    value_act: (optional) Activation function for the value transform.
    attention_probs_dropout_prob: (optional) float. Dropout probability of the
      attention probabilities.
    initializer_range: float. Range of the weight initializer.
    do_return_2d_tensor: bool. If True, the output will be of shape [batch_size
      * from_seq_length, num_attention_heads * size_per_head]. If False, the
      output will be of shape [batch_size, from_seq_length, num_attention_heads
      * size_per_head].
    batch_size: (Optional) int. If the input is 2D, this might be the batch size
      of the 3D version of the `from_tensor` and `to_tensor`.
    from_seq_length: (Optional) If the input is 2D, this might be the seq length
      of the 3D version of the `from_tensor`.
    to_seq_length: (Optional) If the input is 2D, this might be the seq length
      of the 3D version of the `to_tensor`.

  Returns:
    float Tensor of shape [batch_size, from_seq_length,
      num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is
      true, this will be of shape [batch_size * from_seq_length,
      num_attention_heads * size_per_head]).

  Raises:
    ValueError: Any of the arguments or tensor shapes are invalid.
  """
    def transpose_for_scores(input_tensor, batch_size, num_attention_heads,
                             seq_length, width):
        output_tensor = tf.reshape(
            input_tensor, [batch_size, seq_length, num_attention_heads, width])

        output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3])
        return output_tensor

    from_shape = get_shape_list(from_tensor, expected_rank=[2, 3])
    to_shape = get_shape_list(to_tensor, expected_rank=[2, 3])

    if len(from_shape) != len(to_shape):
        raise ValueError(
            "The rank of `from_tensor` must match the rank of `to_tensor`.")

    if len(from_shape) == 3:
        batch_size = from_shape[0]
        from_seq_length = from_shape[1]
        to_seq_length = to_shape[1]
    elif len(from_shape) == 2:
        if (batch_size is None or from_seq_length is None
                or to_seq_length is None):
            raise ValueError(
                "When passing in rank 2 tensors to attention_layer, the values "
                "for `batch_size`, `from_seq_length`, and `to_seq_length` "
                "must all be specified.")

    # Scalar dimensions referenced here:
    #   B = batch size (number of sequences)
    #   F = `from_tensor` sequence length
    #   T = `to_tensor` sequence length
    #   N = `num_attention_heads`
    #   H = `size_per_head`

    from_tensor_2d = reshape_to_matrix(from_tensor)
    to_tensor_2d = reshape_to_matrix(to_tensor)

    # `query_layer` = [B*F, N*H]
    query_layer = tf.layers.dense(
        from_tensor_2d,
        num_attention_heads * size_per_head,
        activation=query_act,
        name="query",
        kernel_initializer=create_initializer(initializer_range))

    # `key_layer` = [B*T, N*H]
    key_layer = tf.layers.dense(
        to_tensor_2d,
        num_attention_heads * size_per_head,
        activation=key_act,
        name="key",
        kernel_initializer=create_initializer(initializer_range))

    # `value_layer` = [B*T, N*H]
    value_layer = tf.layers.dense(
        to_tensor_2d,
        num_attention_heads * size_per_head,
        activation=value_act,
        name="value",
        kernel_initializer=create_initializer(initializer_range))

    # `query_layer` = [B, N, F, H]
    query_layer = transpose_for_scores(query_layer, batch_size,
                                       num_attention_heads, from_seq_length,
                                       size_per_head)

    # `key_layer` = [B, N, T, H]
    key_layer = transpose_for_scores(key_layer, batch_size,
                                     num_attention_heads, to_seq_length,
                                     size_per_head)

    # Take the dot product between "query" and "key" to get the raw
    # attention scores.
    # `attention_scores` = [B, N, F, T]
    attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True)
    attention_scores = tf.multiply(attention_scores,
                                   1.0 / math.sqrt(float(size_per_head)))

    if attention_mask is not None:
        # `attention_mask` = [B, 1, F, T]
        attention_mask = tf.expand_dims(attention_mask, axis=[1])

        # Since attention_mask is 1.0 for positions we want to attend and 0.0 for
        # masked positions, this operation will create a tensor which is 0.0 for
        # positions we want to attend and -10000.0 for masked positions.
        adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0

        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
        attention_scores += adder

    # Normalize the attention scores to probabilities.
    # `attention_probs` = [B, N, F, T]
    attention_probs = tf.nn.softmax(attention_scores)

    # This is actually dropping out entire tokens to attend to, which might
    # seem a bit unusual, but is taken from the original Transformer paper.
    attention_probs = dropout(attention_probs, attention_probs_dropout_prob)

    # `value_layer` = [B, T, N, H]
    value_layer = tf.reshape(
        value_layer,
        [batch_size, to_seq_length, num_attention_heads, size_per_head])

    # `value_layer` = [B, N, T, H]
    value_layer = tf.transpose(value_layer, [0, 2, 1, 3])

    # `context_layer` = [B, N, F, H]
    context_layer = tf.matmul(attention_probs, value_layer)

    # `context_layer` = [B, F, N, H]
    context_layer = tf.transpose(context_layer, [0, 2, 1, 3])

    if do_return_2d_tensor:
        # `context_layer` = [B*F, N*H]
        context_layer = tf.reshape(context_layer, [
            batch_size * from_seq_length, num_attention_heads * size_per_head
        ])
    else:
        # `context_layer` = [B, F, N*H]
        context_layer = tf.reshape(
            context_layer,
            [batch_size, from_seq_length, num_attention_heads * size_per_head])

    return context_layer
Example #32
0
def draw_side_by_side_evaluation_image(eval_dict,
                                       category_index,
                                       max_boxes_to_draw=20,
                                       min_score_thresh=0.2,
                                       use_normalized_coordinates=True):
    """Creates a side-by-side image with detections and groundtruth.

  Bounding boxes (and instance masks, if available) are visualized on both
  subimages.

  Args:
    eval_dict: The evaluation dictionary returned by
      eval_util.result_dict_for_batched_example() or
      eval_util.result_dict_for_single_example().
    category_index: A category index (dictionary) produced from a labelmap.
    max_boxes_to_draw: The maximum number of boxes to draw for detections.
    min_score_thresh: The minimum score threshold for showing detections.
    use_normalized_coordinates: Whether to assume boxes and kepoints are in
      normalized coordinates (as opposed to absolute coordiantes).
      Default is True.

  Returns:
    A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left
      corresponds to detections, while the subimage on the right corresponds to
      groundtruth.
  """
    detection_fields = fields.DetectionResultFields()
    input_data_fields = fields.InputDataFields()

    images_with_detections_list = []

    # Add the batch dimension if the eval_dict is for single example.
    if len(eval_dict[detection_fields.detection_classes].shape) == 1:
        for key in eval_dict:
            if key != input_data_fields.original_image:
                eval_dict[key] = tf.expand_dims(eval_dict[key], 0)

    for indx in range(eval_dict[input_data_fields.original_image].shape[0]):
        instance_masks = None
        if detection_fields.detection_masks in eval_dict:
            instance_masks = tf.cast(
                tf.expand_dims(
                    eval_dict[detection_fields.detection_masks][indx], axis=0),
                tf.uint8)
        keypoints = None
        if detection_fields.detection_keypoints in eval_dict:
            keypoints = tf.expand_dims(
                eval_dict[detection_fields.detection_keypoints][indx], axis=0)
        groundtruth_instance_masks = None
        if input_data_fields.groundtruth_instance_masks in eval_dict:
            groundtruth_instance_masks = tf.cast(
                tf.expand_dims(eval_dict[
                    input_data_fields.groundtruth_instance_masks][indx],
                               axis=0), tf.uint8)

        images_with_detections = draw_bounding_boxes_on_image_tensors(
            tf.expand_dims(eval_dict[input_data_fields.original_image][indx],
                           axis=0),
            tf.expand_dims(eval_dict[detection_fields.detection_boxes][indx],
                           axis=0),
            tf.expand_dims(eval_dict[detection_fields.detection_classes][indx],
                           axis=0),
            tf.expand_dims(eval_dict[detection_fields.detection_scores][indx],
                           axis=0),
            category_index,
            original_image_spatial_shape=tf.expand_dims(eval_dict[
                input_data_fields.original_image_spatial_shape][indx],
                                                        axis=0),
            true_image_shape=tf.expand_dims(
                eval_dict[input_data_fields.true_image_shape][indx], axis=0),
            instance_masks=instance_masks,
            keypoints=keypoints,
            max_boxes_to_draw=max_boxes_to_draw,
            min_score_thresh=min_score_thresh,
            use_normalized_coordinates=use_normalized_coordinates)
        images_with_groundtruth = draw_bounding_boxes_on_image_tensors(
            tf.expand_dims(eval_dict[input_data_fields.original_image][indx],
                           axis=0),
            tf.expand_dims(
                eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0),
            tf.expand_dims(
                eval_dict[input_data_fields.groundtruth_classes][indx],
                axis=0),
            tf.expand_dims(tf.ones_like(
                eval_dict[input_data_fields.groundtruth_classes][indx],
                dtype=tf.float32),
                           axis=0),
            category_index,
            original_image_spatial_shape=tf.expand_dims(eval_dict[
                input_data_fields.original_image_spatial_shape][indx],
                                                        axis=0),
            true_image_shape=tf.expand_dims(
                eval_dict[input_data_fields.true_image_shape][indx], axis=0),
            instance_masks=groundtruth_instance_masks,
            keypoints=None,
            max_boxes_to_draw=None,
            min_score_thresh=0.0,
            use_normalized_coordinates=use_normalized_coordinates)
        images_with_detections_list.append(
            tf.concat([images_with_detections, images_with_groundtruth],
                      axis=2))
    return images_with_detections_list
    def model_fn(features, labels, mode, params):
        """The `model_fn` for TPUEstimator."""

        predictions = {}

        tags = set()
        if mode == tf.estimator.ModeKeys.TRAIN:
            tags.add("train")

        input_mask = features["input_mask"]
        batch_size = input_mask.shape[0]
        if labels is not None:
            label_ids = tf.cast(labels["label_ids"], tf.float32)

        if "embeddings" not in features:
            input_ids = features["input_ids"]
            segment_ids = features["segment_ids"]

            model = modeling.BertModel(config=params['bert_config'],
                                       is_training=params['trainable_bert'],
                                       input_ids=input_ids,
                                       input_mask=input_mask,
                                       token_type_ids=segment_ids,
                                       use_one_hot_embeddings=True)

            # In the demo, we are doing a simple classification task on the entire
            # TODO: Check is_training === trainable Bert j?
            # model = create_model(bert_config=params['bert_config'],
            #                     is_training=params['trainable_bert'],
            #                     num_labels=params['num_classes'],
            #                     labels=label_ids,
            #                     segment_ids=segment_ids,
            #                     input_ids=input_ids,
            #                     input_mask=input_mask,
            #                     use_one_hot_embeddings=True)

            # TODO: Find correct place
            tvars = tf.trainable_variables()
            initialized_variable_names = {}

            scaffold_fn = None
            if params["init_checkpoint"]:
                (assignment_map, initialized_variable_names
                 ) = modeling.get_assignment_map_from_checkpoint(
                     tvars, params["init_checkpoint"])
                if use_tpu:
                    def tpu_scaffold():
                        tf.train.init_from_checkpoint(params["init_checkpoint"],
                                                      assignment_map)
                        return tf.train.Scaffold()
                    scaffold_fn = tpu_scaffold
                else:
                    tf.train.init_from_checkpoint(params["init_checkpoint"], assignment_map)

            tf.logging.info("**** Variables - INIT FROM CKPT ****")
            for var in tvars:
                if var.name in initialized_variable_names:
                    tf.logging.info("name: {}, shape: {}".format(var.name, var.shape))

            sequence_output = model.get_sequence_output()
            predictions["sequence_output"] = sequence_output

        else:
            sequence_output = features["embeddings"]

        hidden_size = sequence_output.shape[-1].value
        if params["class_based_attention"]:
            shared_query_embedding = tf.get_variable(
                'shared_query', [1, 1, params["shared_size"]],
                initializer=tf.truncated_normal_initializer(stddev=0.02))
            shared_query_embedding = tf.broadcast_to(
                shared_query_embedding,
                [1, params["num_classes"], params["shared_size"]])
            class_query_embedding = tf.get_variable(
                'class_query',
                [1, params["num_classes"], hidden_size - params["shared_size"]],
                initializer=tf.truncated_normal_initializer(stddev=0.02))
            query_embedding = tf.concat(
                [shared_query_embedding, class_query_embedding], axis=2)
            # Reimplement Attention layer to peek into weights.
            scores = tf.matmul(query_embedding,
                               sequence_output,
                               transpose_b=True)
            input_bias = tf.abs(input_mask - 1)
            scores -= 1.e9 * tf.expand_dims(tf.cast(input_bias, tf.float32),
                                            axis=1)
            distribution = tf.nn.softmax(scores)
            pooled_output = tf.matmul(distribution, sequence_output)
        else:
            first_token_tensor = tf.squeeze(sequence_output[:, 0:1, :], axis=1)
            pooled_output = tf.layers.dense(first_token_tensor,
                                            hidden_size,
                                            activation=tf.tanh)

        if mode == tf.estimator.ModeKeys.TRAIN:
            pooled_output = tf.nn.dropout(pooled_output, rate=params["dropout"])

        logits = tf.layers.dense(pooled_output, params["num_classes"])
        logits = tf.matrix_diag_part(logits)

        # probabilities = tf.nn.softmax(logits, axis=-1)  # single-label case
        probabilities = tf.nn.sigmoid(logits)  # multi-label case

        train_op, loss = None, None
        eval_metrics = None
        if mode != tf.estimator.ModeKeys.PREDICT:
            with tf.variable_scope("loss"):
                per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=label_ids, logits=logits)
                loss = tf.reduce_mean(per_example_loss)
        if mode == tf.estimator.ModeKeys.TRAIN:
            train_op = optimization.create_optimizer(loss,
                                                     params["learning_rate"],
                                                     params["num_train_steps"],
                                                     params["num_warmup_steps"],
                                                     use_tpu,
                                                     trainable_bert=params['trainable_bert'])
        elif mode == tf.estimator.ModeKeys.EVAL:

            def _f1_score(labels, pred):
                """Computes F1 score, i.e. the harmonic mean of precision and recall."""
                precision = tf.metrics.precision(labels, pred)
                recall = tf.metrics.recall(labels, pred)
                return (2 * precision[0] * recall[0] /
                        (precision[0] + recall[0] + 1e-5),
                        tf.group(precision[1], recall[1]))

            def metric_fn(per_example_loss, labels, probabilities):
                pred = tf.where(probabilities > 0.4,
                                tf.ones_like(probabilities),
                                tf.zeros_like(probabilities))
                return {
                    'absolute/false_positives':
                        tf.metrics.false_positives(labels, pred),
                    'absolute/false_negatives':
                        tf.metrics.false_negatives(labels, pred),
                    'absolute/true_positives':
                        tf.metrics.true_positives(labels, pred),
                    'absolute/true_negatives':
                        tf.metrics.true_negatives(labels, pred),
                    'absolute/total':
                        tf.metrics.true_positives(tf.ones([batch_size]),
                                                  tf.ones([batch_size])),
                    'metric/acc':
                        tf.metrics.accuracy(labels, pred),
                    'metric/prec':
                        tf.metrics.precision(labels, pred),
                    'metric/recall':
                        tf.metrics.recall(labels, pred),
                    'metric/f1':
                        _f1_score(labels, pred),
                }

            eval_metrics = (metric_fn,
                            [per_example_loss, label_ids, probabilities])

        predictions["probabilities"] = probabilities
        predictions["attention"] = distribution
        predictions["pooled_output"] = pooled_output

        if use_tpu:
            return tf.contrib.tpu.TPUEstimatorSpec(mode=mode,
                                                   loss=loss,
                                                   train_op=train_op,
                                                   scaffold_fn=scaffold_fn,
                                                   eval_metrics=eval_metrics,
                                                   predictions=predictions)
        else:
            return tf.estimator.EstimatorSpec(mode=mode,
                                              loss=loss,
                                              train_op=train_op,
                                              predictions=predictions)
Example #34
0
    epochs=100)

# MODEL PREDICTIONS --------------------------------------- #
test_generator.reset(
)  # this ensures that outputs are in the correct order (need to do this every time we call predict_generator)
img_model_preds = img_model.predict(test_generator,
                                    steps=step_size_test,
                                    verbose=1)

i = 0
path_to_img = 'C://Users//jbolton//Documents//naughty//deep_tagger//images//' + test_data_df.iloc[
    i]['filename']
img = keras.preprocessing.image.load_img(path_to_img,
                                         target_size=img_size_for_model)
img_array = keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0)  # Create batch axis
predictions = img_model.predict(img_array)


# HYPERPARAMETER TUNING ----------------------------------- #
def build_model(hp):
    inputs = keras.Input(
        shape=(224, 224, 3), name='image_input'
    )  # ResNet was trained on 224x224 TODO: validate this Joe
    preprocess_inputs = tf.keras.applications.resnet50.preprocess_input(
        inputs)  # preprocess input data as expected by ResNet50
    x = base_model(preprocess_inputs, training=False)
    # only need to use this if we use pooling='none' in ResNet50 model:
    #flat_x = keras.layers.Flatten( name='flatten_ResNet_output' )(x)
    dense1 = keras.layers.Dense(units=hp.Int("units",
                                             min_value=32,
Example #35
0
                    shape=[num_nodes[-1], 1],
                    initializer=tf.contrib.layers.xavier_initializer())
b = tf.get_variable('b', initializer=tf.random_uniform([1], -0.1, 0.1))

c, h = [], []
initial_state = []
for li in range(n_layers):
    c.append(
        tf.Variable(tf.zeros([batch_size, num_nodes[li]]), trainable=False))
    h.append(
        tf.Variable(tf.zeros([batch_size, num_nodes[li]]), trainable=False))
    initial_state.append(tf.contrib.rnn.LSTMStateTuple(c[li], h[li]))

# Do several tensor transofmations, because the function dynamic_rnn requires the output to be of
# a specific format. Read more at: https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn
all_inputs = tf.concat([tf.expand_dims(t, 0) for t in train_inputs], axis=0)

# all_outputs is [seq_length, batch_size, num_nodes]
all_lstm_outputs, state = tf.nn.dynamic_rnn(drop_multi_cell,
                                            all_inputs,
                                            initial_state=tuple(initial_state),
                                            time_major=True,
                                            dtype=tf.float32)

all_lstm_outputs = tf.reshape(all_lstm_outputs,
                              [batch_size * num_unrollings, num_nodes[-1]])

all_outputs = tf.nn.xw_plus_b(all_lstm_outputs, w, b)

split_outputs = tf.split(all_outputs, num_unrollings, axis=0)
Example #36
0
def darkeras_loss(net_out):
	
	sprob = float(cfg.class_scale)
	sconf = float(cfg.object_scale)
	snoob = float(cfg.noobject_scale)
	scoor = float(cfg.coord_scale)
	S, B, C = cfg.cell_size, cfg.boxes_per_cell, cfg.num_classes
	SS = S * S # number of grid cells

	size1 = [None, SS, C]
	size2 = [None, SS, B]

	# return the below placeholders
	_probs = tf.placeholder(tf.float32, size1)
	_confs = tf.placeholder(tf.float32, size2)
	_coord = tf.placeholder(tf.float32, size2 + [4])
	# weights term for L2 loss
	_proid = tf.placeholder(tf.float32, size1)
	# material calculating IOU
	_areas = tf.placeholder(tf.float32, size2)
	_upleft = tf.placeholder(tf.float32, size2 + [2])
	_botright = tf.placeholder(tf.float32, size2 + [2])

	placeholders = {
		'probs':_probs, 'confs':_confs, 'coord':_coord, 'proid':_proid,
		'areas':_areas, 'upleft':_upleft, 'botright':_botright
	}

	
	# Extract the coordinate prediction from net.out
	coords = net_out[:, SS * (C + B):]
	coords = tf.reshape(coords, [-1, SS, B, 4])
	wh = tf.pow(coords[:,:,:,2:4], 2) * S # unit: grid cell
	area_pred = wh[:,:,:,0] * wh[:,:,:,1] # unit: grid cell^2
	centers = coords[:,:,:,0:2] # [batch, SS, B, 2]
	floor = centers - (wh * .5) # [batch, SS, B, 2]
	ceil  = centers + (wh * .5) # [batch, SS, B, 2]

	# calculate the intersection areas
	intersect_upleft   = tf.maximum(floor, _upleft)
	intersect_botright = tf.minimum(ceil , _botright)
	intersect_wh = intersect_botright - intersect_upleft
	intersect_wh = tf.maximum(intersect_wh, 0.0)
	intersect = tf.multiply(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1])

	# calculate the best IOU, set 0.0 confidence for worse boxes
	iou = tf.truediv(intersect, _areas + area_pred - intersect)
	best_box = tf.equal(iou, tf.reduce_max(iou, [2], True))
	best_box = tf.to_float(best_box)
	confs = tf.multiply(best_box, _confs)

	# take care of the weight terms
	conid = snoob * (1. - confs) + sconf * confs
	weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3)
	cooid = scoor * weight_coo
	proid = sprob * _proid

	# flatten 'em all
	probs = slim.flatten(_probs)
	proid = slim.flatten(proid)
	confs = slim.flatten(confs)
	conid = slim.flatten(conid)
	coord = slim.flatten(_coord)
	cooid = slim.flatten(cooid)

	# reshape 1 dim vevtor
	# probs = tf.reshape(_probs, [-1])
	# proid = tf.reshape(proid, [-1])
	# confs = tf.reshape(confs, [-1])
	# conid = tf.reshape(conid, [-1])
	# coord = tf.reshape(_coord, [-1])
	# cooid = tf.reshape(cooid, [-1])

	true = tf.concat([probs, confs, coord], 1)
	wght = tf.concat([proid, conid, cooid], 1)

	print('Building {} loss'.format(cfg.model_name))
	loss = tf.pow(net_out - true, 2)
	loss = tf.multiply(loss, wght)
	loss = tf.reduce_sum(loss, 1)
	return placeholders, .5 * tf.reduce_mean(loss)
Example #37
0
    def __call__(self, x, prev_state):
        prev_read_vector_list = prev_state.read_vector_list

        controller_input = tf.concat([x] + prev_read_vector_list, axis=1)
        with tf.compat.v1.variable_scope('controller', reuse=self.reuse):
            controller_output, controller_state = self.controller(controller_input, prev_state.controller_state)

        num_parameters_per_head = self.memory_vector_dim + 1 + 1 + (self.shift_range * 2 + 1) + 1
        num_heads = self.read_head_num + self.write_head_num
        total_parameter_num = num_parameters_per_head * num_heads + self.memory_vector_dim * 2 * self.write_head_num
        with tf.compat.v1.variable_scope("o2p", reuse=(self.step > 0) or self.reuse):
            parameters = tf.compat.v1.layers.dense(
                controller_output, total_parameter_num, activation=None,
                kernel_initializer=self.o2p_initializer)
            parameters = tf.clip_by_value(parameters, -self.clip_value, self.clip_value)
        head_parameter_list = tf.split(parameters[:, :num_parameters_per_head * num_heads], num_heads, axis=1)
        erase_add_list = tf.split(parameters[:, num_parameters_per_head * num_heads:], 2 * self.write_head_num, axis=1)

        prev_w_list = prev_state.w_list
        prev_M = prev_state.M
        w_list = []
        for i, head_parameter in enumerate(head_parameter_list):
            k = tf.tanh(head_parameter[:, 0:self.memory_vector_dim])
            beta = tf.nn.softplus(head_parameter[:, self.memory_vector_dim])
            g = tf.sigmoid(head_parameter[:, self.memory_vector_dim + 1])
            s = tf.nn.softmax(
                head_parameter[:, self.memory_vector_dim + 2:self.memory_vector_dim + 2 + (self.shift_range * 2 + 1)]
            )
            gamma = tf.nn.softplus(head_parameter[:, -1]) + 1
            with tf.compat.v1.variable_scope('addressing_head_%d' % i):
                w = self.addressing(k, beta, g, s, gamma, prev_M, prev_w_list[i])
            w_list.append(w)

        # Reading (Sec 3.1)

        read_w_list = w_list[:self.read_head_num]
        read_vector_list = []
        for i in range(self.read_head_num):
            read_vector = tf.reduce_sum(tf.expand_dims(read_w_list[i], axis=2) * prev_M, axis=1)
            read_vector_list.append(read_vector)

        # Writing (Sec 3.2)

        write_w_list = w_list[self.read_head_num:]
        M = prev_M
        for i in range(self.write_head_num):
            w = tf.expand_dims(write_w_list[i], axis=2)
            erase_vector = tf.expand_dims(tf.sigmoid(erase_add_list[i * 2]), axis=1)
            add_vector = tf.expand_dims(tf.tanh(erase_add_list[i * 2 + 1]), axis=1)
            M = M * (tf.ones(M.get_shape()) - tf.matmul(w, erase_vector)) + tf.matmul(w, add_vector)

        if not self.output_dim:
            output_dim = x.get_shape()[1]
        else:
            output_dim = self.output_dim
        with tf.compat.v1.variable_scope("o2o", reuse=(self.step > 0) or self.reuse):
            NTM_output = tf.compat.v1.layers.dense(
                tf.concat([controller_output] + read_vector_list, axis=1), output_dim, activation=None,
                kernel_initializer=self.o2o_initializer)
            NTM_output = tf.clip_by_value(NTM_output, -self.clip_value, self.clip_value)

        self.step += 1
        return NTM_output, NTMControllerState(
            controller_state=controller_state, read_vector_list=read_vector_list, w_list=w_list, M=M)
    def _set_up_input_pls(self):
        """Sets up input placeholders by adding them to self._placeholders.
        Keys are defined as self.PL_*.
        """
        # Combine config data
        bev_dims = np.append(self._bev_pixel_size, self._bev_depth)

        with tf.variable_scope('bev_input'):
            # Placeholder for BEV image input, to be filled in with feed_dict
            bev_input_placeholder = self._add_placeholder(
                tf.float32, bev_dims, self.PL_BEV_INPUT)

            self._bev_input_batches = tf.expand_dims(bev_input_placeholder,
                                                     axis=0)

            self._bev_preprocessed = \
                self._bev_feature_extractor.preprocess_input(
                    self._bev_input_batches, self._bev_pixel_size)

            # Summary Images
            bev_summary_images = tf.split(bev_input_placeholder,
                                          self._bev_depth,
                                          axis=2)
            tf.summary.image("bev_maps",
                             bev_summary_images,
                             max_outputs=self._bev_depth)

        with tf.variable_scope('img_input'):
            # Take variable size input images
            img_input_placeholder = self._add_placeholder(
                tf.float32, [None, None, self._img_depth], self.PL_IMG_INPUT)

            self._img_input_batches = tf.expand_dims(img_input_placeholder,
                                                     axis=0)

            self._img_preprocessed = \
                self._img_feature_extractor.preprocess_input(
                    self._img_input_batches, self._img_pixel_size)

            # Summary Image
            tf.summary.image("rgb_image",
                             self._img_preprocessed,
                             max_outputs=2)

        with tf.variable_scope('pl_labels'):
            self._add_placeholder(tf.float32, [None, 6], self.PL_LABEL_ANCHORS)
            self._add_placeholder(tf.float32, [None, 7],
                                  self.PL_LABEL_BOXES_3D)
            self._add_placeholder(tf.float32, [None], self.PL_LABEL_CLASSES)

        # Placeholders for anchors
        with tf.variable_scope('pl_anchors'):
            self._add_placeholder(tf.float32, [None, 6], self.PL_ANCHORS)
            self._add_placeholder(tf.float32, [None], self.PL_ANCHOR_IOUS)
            self._add_placeholder(tf.float32, [None, 6],
                                  self.PL_ANCHOR_OFFSETS)
            self._add_placeholder(tf.float32, [None], self.PL_ANCHOR_CLASSES)

            with tf.variable_scope('bev_anchor_projections'):
                self._add_placeholder(tf.float32, [None, 4],
                                      self.PL_BEV_ANCHORS)
                self._bev_anchors_norm_pl = self._add_placeholder(
                    tf.float32, [None, 4], self.PL_BEV_ANCHORS_NORM)

            with tf.variable_scope('img_anchor_projections'):
                self._add_placeholder(tf.float32, [None, 4],
                                      self.PL_IMG_ANCHORS)
                self._img_anchors_norm_pl = self._add_placeholder(
                    tf.float32, [None, 4], self.PL_IMG_ANCHORS_NORM)

            with tf.variable_scope('sample_info'):
                # the calib matrix shape is (3 x 4)
                self._add_placeholder(tf.float32, [3, 4], self.PL_CALIB_P2)
                self._add_placeholder(tf.int32,
                                      shape=[1],
                                      name=self.PL_IMG_IDX)
                self._add_placeholder(tf.float32, [4], self.PL_GROUND_PLANE)
Example #39
0
    def __init__(self,
                 model_type,
                 sequence_length,
                 num_classes,
                 vocab_size,
                 embedding_size,
                 filter_sizes,
                 num_filters,
                 l2_reg_lambda=0):

        self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
                                      name="input_x")
        self.input_y = tf.placeholder(tf.float32, [None, num_classes],
                                      name="input_y")
        self.dropout_keep_prob = tf.placeholder(tf.float32,
                                                name="dropout_keep_prob")
        self.learing_rate = tf.placeholder(tf.float32, name="learing_rate")

        l2_loss = tf.constant(0.0)

        with tf.device('/cpu:0'), tf.name_scope("embedding"):
            self.W = tf.Variable(tf.random_uniform(
                [vocab_size, embedding_size], -1, 1),
                                 name="W",
                                 trainable=True)
            self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
            self.embedded_chars_expanded = tf.expand_dims(
                self.embedded_chars, -1)

        pooled_outputs = []
        for i, filter_size in enumerate(filter_sizes):
            with tf.name_scope("conv-maxpool-%s" % filter_size):
                filter_shape = [filter_size, embedding_size, 1, num_filters]
                W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1),
                                name="W")
                b = tf.Variable(tf.constant(0.1, shape=[num_filters]),
                                name="b")
                conv = tf.nn.conv2d(self.embedded_chars_expanded,
                                    W,
                                    strides=[1, 1, 1, 1],
                                    padding="VALID",
                                    name="conv")
                h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
                pooled = tf.nn.max_pool(
                    h,
                    ksize=[1, sequence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1],
                    padding='VALID',
                    name="pool")
                pooled_outputs.append(pooled)

        num_filters_total = num_filters * len(filter_sizes)
        self.h_pool = tf.concat(pooled_outputs, 3)
        self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])

        with tf.name_scope("dropout"):
            self.h_drop = tf.nn.dropout(self.h_pool_flat,
                                        self.dropout_keep_prob)

        with tf.name_scope("output"):
            W = tf.get_variable(
                "W",
                shape=[num_filters_total, num_classes],
                initializer=tf.contrib.layers.xavier_initializer())
            b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
            l2_loss += tf.nn.l2_loss(W)
            l2_loss += tf.nn.l2_loss(b)
            self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
            if (model_type == "clf"):
                self.predictions = tf.argmax(self.scores,
                                             1,
                                             name="predictions")
            elif model_type == "reg":
                self.predictions = tf.reduce_max(self.scores,
                                                 1,
                                                 name="predictions")
                self.predictions = tf.expand_dims(self.predictions, -1)

        with tf.name_scope("loss"):
            if model_type == "clf":
                losses = tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.scores, labels=self.input_y)
                self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss
            if model_type == "reg":
                losses = tf.sqrt(
                    tf.losses.mean_squared_error(predictions=self.predictions,
                                                 labels=self.input_y))
                self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

        with tf.name_scope("accuracy"):
            if model_type == "clf":
                correct_predictions = tf.equal(self.predictions,
                                               tf.argmax(self.input_y, 1))
                self.accuracy = tf.reduce_mean(tf.cast(correct_predictions,
                                                       "float"),
                                               name="accuracy")
            elif model_type == "reg":
                self.accuracy = tf.constant(0.0, name="accuracy")
    def build(self):

        # Setup input placeholders
        self._set_up_input_pls()

        # Setup feature extractors
        self._set_up_feature_extractors()

        bev_proposal_input = self.bev_bottleneck
        img_proposal_input = self.img_bottleneck

        fusion_mean_div_factor = 2.0

        # If both img and bev probabilites are set to 1.0, don't do
        # path drop.
        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0):
            with tf.variable_scope('rpn_path_drop'):

                random_values = tf.random_uniform(shape=[3],
                                                  minval=0.0,
                                                  maxval=1.0)

                img_mask, bev_mask = self.create_path_drop_masks(
                    self._path_drop_probabilities[0],
                    self._path_drop_probabilities[1], random_values)

                img_proposal_input = tf.multiply(img_proposal_input, img_mask)

                bev_proposal_input = tf.multiply(bev_proposal_input, bev_mask)

                self.img_path_drop_mask = img_mask
                self.bev_path_drop_mask = bev_mask

                # Overwrite the division factor
                fusion_mean_div_factor = img_mask + bev_mask

        with tf.variable_scope('proposal_roi_pooling'):

            with tf.variable_scope('box_indices'):

                def get_box_indices(boxes):
                    proposals_shape = boxes.get_shape().as_list()
                    if any(dim is None for dim in proposals_shape):
                        proposals_shape = tf.shape(boxes)
                    ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                    multiplier = tf.expand_dims(
                        tf.range(start=0, limit=proposals_shape[0]), 1)
                    return tf.reshape(ones_mat * multiplier, [-1])

                bev_boxes_norm_batches = tf.expand_dims(
                    self._bev_anchors_norm_pl, axis=0)

                # These should be all 0's since there is only 1 image
                tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            bev_proposal_rois = tf.image.crop_and_resize(
                bev_proposal_input, self._bev_anchors_norm_pl, tf_box_indices,
                self._proposal_roi_crop_size)
            # Do ROI Pooling on image
            img_proposal_rois = tf.image.crop_and_resize(
                img_proposal_input, self._img_anchors_norm_pl, tf_box_indices,
                self._proposal_roi_crop_size)

            print("img_proposal_rois shape")
            # print(img_proposal_rois.shape)
            # for i in range(img_proposal_rois.shape[0]):
            # print(img_proposal_rois[i])
        ####################################################################################
        # TODO PROJECT: insert code here to add mixture of experts
        # self._moe_model = MoeModel(img_proposal_input, bev_proposal_input)
        # self._moe_model._set_up_input_pls()
        # moe_prediction = self._moe_model.build()

        ####################################################################################
        with tf.variable_scope('proposal_roi_fusion'):
            rpn_fusion_out = None
            ####################################################################################
            # TODO PROJECT: weight the feature before average img and bev
            # weighted_img_proposal_rois = tf.multiply(moe_prediction['img_weight'],img_proposal_rois)
            # weighted_bev_proposal_rois = tf.multiply(moe_prediction['bev_weight'],bev_proposal_rois)
            ####################################################################################
            if self._fusion_method == 'mean':
                tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois)
                rpn_fusion_out = tf.divide(tf_features_sum,
                                           fusion_mean_div_factor)

                ####################################################################################
                # TODO PROJECT: weight the feature before average img and bev
                # tf_features_sum = tf.add(weighted_bev_proposal_rois, weighted_img_proposal_rois)
                # rpn_fusion_out = tf.divide(tf_features_sum, fusion_mean_div_factor)
                ####################################################################################
            elif self._fusion_method == 'concat':
                rpn_fusion_out = tf.concat(
                    [bev_proposal_rois, img_proposal_rois], axis=3)

                ####################################################################################
                # TODO PROJECT: weight the feature before concatenation
                # rpn_fusion_out = tf.concat(
                # [weighted_bev_proposal_rois, weighted_img_proposal_rois], axis=3)
                ####################################################################################
            else:
                raise ValueError('Invalid fusion method', self._fusion_method)

        # TODO: move this section into an separate AnchorPredictor class
        with tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]):
            tensor_in = rpn_fusion_out

            # Parse rpn layers config
            layers_config = self._config.layers_config.rpn_config
            l2_weight_decay = layers_config.l2_weight_decay

            if l2_weight_decay > 0:
                weights_regularizer = slim.l2_regularizer(l2_weight_decay)
            else:
                weights_regularizer = None

            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=weights_regularizer):
                # Use conv2d instead of fully_connected layers.
                cls_fc6 = slim.conv2d(tensor_in,
                                      layers_config.cls_fc6,
                                      self._proposal_roi_crop_size,
                                      padding='VALID',
                                      scope='cls_fc6')

                cls_fc6_drop = slim.dropout(cls_fc6,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='cls_fc6_drop')

                cls_fc7 = slim.conv2d(cls_fc6_drop,
                                      layers_config.cls_fc7, [1, 1],
                                      scope='cls_fc7')

                cls_fc7_drop = slim.dropout(cls_fc7,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='cls_fc7_drop')

                cls_fc8 = slim.conv2d(cls_fc7_drop,
                                      2, [1, 1],
                                      activation_fn=None,
                                      scope='cls_fc8')

                objectness = tf.squeeze(cls_fc8, [1, 2],
                                        name='cls_fc8/squeezed')

                # Use conv2d instead of fully_connected layers.
                reg_fc6 = slim.conv2d(tensor_in,
                                      layers_config.reg_fc6,
                                      self._proposal_roi_crop_size,
                                      padding='VALID',
                                      scope='reg_fc6')

                reg_fc6_drop = slim.dropout(reg_fc6,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='reg_fc6_drop')

                reg_fc7 = slim.conv2d(reg_fc6_drop,
                                      layers_config.reg_fc7, [1, 1],
                                      scope='reg_fc7')

                reg_fc7_drop = slim.dropout(reg_fc7,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='reg_fc7_drop')

                reg_fc8 = slim.conv2d(reg_fc7_drop,
                                      6, [1, 1],
                                      activation_fn=None,
                                      scope='reg_fc8')

                offsets = tf.squeeze(reg_fc8, [1, 2], name='reg_fc8/squeezed')

        # Histogram summaries
        with tf.variable_scope('histograms_feature_extractor'):
            with tf.variable_scope('bev_vgg'):
                for end_point in self.bev_end_points:
                    tf.summary.histogram(end_point,
                                         self.bev_end_points[end_point])

            with tf.variable_scope('img_vgg'):
                for end_point in self.img_end_points:
                    tf.summary.histogram(end_point,
                                         self.img_end_points[end_point])

        with tf.variable_scope('histograms_rpn'):
            with tf.variable_scope('anchor_predictor'):
                fc_layers = [
                    cls_fc6, cls_fc7, cls_fc8, objectness, reg_fc6, reg_fc7,
                    reg_fc8, offsets
                ]
                for fc_layer in fc_layers:
                    # fix the name to avoid tf warnings
                    tf.summary.histogram(fc_layer.name.replace(':', '_'),
                                         fc_layer)

        # Return the proposals
        with tf.variable_scope('proposals'):
            anchors = self.placeholders[self.PL_ANCHORS]

            # Decode anchor regression offsets
            with tf.variable_scope('decoding'):
                regressed_anchors = anchor_encoder.offset_to_anchor(
                    anchors, offsets)

            with tf.variable_scope('bev_projection'):
                _, bev_proposal_boxes_norm = anchor_projector.project_to_bev(
                    regressed_anchors, self._bev_extents)

            with tf.variable_scope('softmax'):
                objectness_softmax = tf.nn.softmax(objectness)

            with tf.variable_scope('nms'):
                objectness_scores = objectness_softmax[:, 1]

                # Do NMS on regressed anchors
                top_indices = tf.image.non_max_suppression(
                    bev_proposal_boxes_norm,
                    objectness_scores,
                    max_output_size=self._nms_size,
                    iou_threshold=self._nms_iou_thresh)

                top_anchors = tf.gather(regressed_anchors, top_indices)
                top_objectness_softmax = tf.gather(objectness_scores,
                                                   top_indices)
                # top_offsets = tf.gather(offsets, top_indices)
                # top_objectness = tf.gather(objectness, top_indices)

        # Get mini batch
        all_ious_gt = self.placeholders[self.PL_ANCHOR_IOUS]
        all_offsets_gt = self.placeholders[self.PL_ANCHOR_OFFSETS]
        all_classes_gt = self.placeholders[self.PL_ANCHOR_CLASSES]

        with tf.variable_scope('mini_batch'):
            mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils
            mini_batch_mask, _ = \
                mini_batch_utils.sample_rpn_mini_batch(all_ious_gt)

        # ROI summary images
        rpn_mini_batch_size = \
            self.dataset.kitti_utils.mini_batch_utils.rpn_mini_batch_size
        with tf.variable_scope('bev_rpn_rois'):
            mb_bev_anchors_norm = tf.boolean_mask(self._bev_anchors_norm_pl,
                                                  mini_batch_mask)
            mb_bev_box_indices = tf.zeros_like(tf.boolean_mask(
                all_classes_gt, mini_batch_mask),
                                               dtype=tf.int32)

            # Show the ROIs of the BEV input density map
            # for the mini batch anchors
            bev_input_rois = tf.image.crop_and_resize(self._bev_preprocessed,
                                                      mb_bev_anchors_norm,
                                                      mb_bev_box_indices,
                                                      (32, 32))

            bev_input_roi_summary_images = tf.split(bev_input_rois,
                                                    self._bev_depth,
                                                    axis=3)
            tf.summary.image('bev_rpn_rois',
                             bev_input_roi_summary_images[-1],
                             max_outputs=rpn_mini_batch_size)

        with tf.variable_scope('img_rpn_rois'):
            # ROIs on image input
            mb_img_anchors_norm = tf.boolean_mask(self._img_anchors_norm_pl,
                                                  mini_batch_mask)
            mb_img_box_indices = tf.zeros_like(tf.boolean_mask(
                all_classes_gt, mini_batch_mask),
                                               dtype=tf.int32)

            # Do test ROI pooling on mini batch
            img_input_rois = tf.image.crop_and_resize(self._img_preprocessed,
                                                      mb_img_anchors_norm,
                                                      mb_img_box_indices,
                                                      (32, 32))

            tf.summary.image('img_rpn_rois',
                             img_input_rois,
                             max_outputs=rpn_mini_batch_size)

        # Ground Truth Tensors
        with tf.variable_scope('one_hot_classes'):

            # Anchor classification ground truth
            # Object / Not Object
            min_pos_iou = \
                self.dataset.kitti_utils.mini_batch_utils.rpn_pos_iou_range[0]

            objectness_classes_gt = tf.cast(tf.greater_equal(
                all_ious_gt, min_pos_iou),
                                            dtype=tf.int32)
            objectness_gt = tf.one_hot(
                objectness_classes_gt,
                depth=2,
                on_value=1.0 - self._config.label_smoothing_epsilon,
                off_value=self._config.label_smoothing_epsilon)

        # Mask predictions for mini batch
        with tf.variable_scope('prediction_mini_batch'):
            objectness_masked = tf.boolean_mask(objectness, mini_batch_mask)
            offsets_masked = tf.boolean_mask(offsets, mini_batch_mask)

        with tf.variable_scope('ground_truth_mini_batch'):
            objectness_gt_masked = tf.boolean_mask(objectness_gt,
                                                   mini_batch_mask)
            offsets_gt_masked = tf.boolean_mask(all_offsets_gt,
                                                mini_batch_mask)

        # Specify the tensors to evaluate
        predictions = dict()

        # Temporary predictions for debugging
        # predictions['anchor_ious'] = anchor_ious
        # predictions['anchor_offsets'] = all_offsets_gt

        if self._train_val_test in ['train', 'val']:
            # All anchors
            predictions[self.PRED_ANCHORS] = anchors

            # Mini-batch masks
            predictions[self.PRED_MB_MASK] = mini_batch_mask
            # Mini-batch predictions
            predictions[self.PRED_MB_OBJECTNESS] = objectness_masked
            predictions[self.PRED_MB_OFFSETS] = offsets_masked

            # Mini batch ground truth
            predictions[self.PRED_MB_OFFSETS_GT] = offsets_gt_masked
            predictions[self.PRED_MB_OBJECTNESS_GT] = objectness_gt_masked

            # Proposals after nms
            predictions[self.PRED_TOP_INDICES] = top_indices
            predictions[self.PRED_TOP_ANCHORS] = top_anchors
            predictions[
                self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax

        else:
            # self._train_val_test == 'test'
            predictions[self.PRED_TOP_ANCHORS] = top_anchors
            predictions[
                self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax

        return predictions
Example #41
0
def expand_tile(value, size):
    """Add a new axis of given size."""
    value = tf.convert_to_tensor(value, name='value')
    ndims = value.shape.ndims
    return tf.tile(tf.expand_dims(value, axis=0), [size] + [1] * ndims)
Example #42
0
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
    """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.

  Performs box voting as described in 'Object detection via a multi-region &
  semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
  each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
  with iou overlap >= iou_thresh. The location of B is set to the weighted
  average location of boxes in S (scores are used for weighting). And the score
  of B is set to the average score of boxes in S.

  Args:
    selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
      boxes are usually selected from pool_boxes using non max suppression.
    pool_boxes: BoxList containing a set of (possibly redundant) boxes.
    iou_thresh: (float scalar) iou threshold for matching boxes in
      selected_boxes and pool_boxes.

  Returns:
    BoxList containing averaged locations and scores for each box in
    selected_boxes.

  Raises:
    ValueError: if
      a) selected_boxes or pool_boxes is not a BoxList.
      b) if iou_thresh is not in [0, 1].
      c) pool_boxes does not have a scores field.
  """
    if not 0.0 <= iou_thresh <= 1.0:
        raise ValueError('iou_thresh must be between 0 and 1')
    if not isinstance(selected_boxes, box_list.BoxList):
        raise ValueError('selected_boxes must be a BoxList')
    if not isinstance(pool_boxes, box_list.BoxList):
        raise ValueError('pool_boxes must be a BoxList')
    if not pool_boxes.has_field('scores'):
        raise ValueError('pool_boxes must have a \'scores\' field')

    iou_ = iou(selected_boxes, pool_boxes)
    match_indicator = tf.cast(tf.greater(iou_, iou_thresh), dtype=tf.float32)
    num_matches = tf.reduce_sum(match_indicator, 1)
    # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not
    # match to any boxes in pool_boxes. For such boxes without any matches, we
    # should return the original boxes without voting.
    match_assert = tf.Assert(tf.reduce_all(tf.greater(num_matches, 0)), [
        'Each box in selected_boxes must match with at least one box '
        'in pool_boxes.'
    ])

    scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
    scores_assert = tf.Assert(tf.reduce_all(tf.greater_equal(scores, 0)),
                              ['Scores must be non negative.'])

    with tf.control_dependencies([scores_assert, match_assert]):
        sum_scores = tf.matmul(match_indicator, scores)
    averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches

    box_locations = tf.matmul(match_indicator,
                              pool_boxes.get() * scores) / sum_scores
    averaged_boxes = box_list.BoxList(box_locations)
    _copy_extra_fields(averaged_boxes, selected_boxes)
    averaged_boxes.add_field('scores', averaged_scores)
    return averaged_boxes
def create_image_from_point_values_unbatched(
    pixel_locations,
    pixel_values,
    image_height,
    image_width,
    default_value=0,
    use_sparse_tensor=False):
  """Creates an image (like depth) from a list of pixel locations and values.

  Args:
    pixel_locations: A tf.int32 tensor of shape [N, 2] with u, v pixel
      locations.
    pixel_values: A tensor of shape [N, m] or [N,] with per pixel values.
    image_height: An int for the image height.
    image_width: An int for the image width.
    default_value: default fill value of the output image tensor for pixels
      other than pixel_locations.
    use_sparse_tensor: Whether to use the sparse tensor version of scatter_nd.

  Returns:
    image: An image where every pixel in pixel_location has a value
      according to pixel_values.

  Raises:
    ValueError: if pixel_locations or pixel_values ranks are incompatible.
    ValueError: if you try to have a non-zero default value without using
      use_sparse_tensor
  """
  if len(pixel_locations.shape) != 2:
    raise ValueError('pixel_locations should be rank 2.')
  if len(pixel_values.shape) not in [1, 2]:
    raise ValueError('pixel_values should have rank of 1 or 2')
  if len(pixel_values.shape) == 1:
    pixel_values = tf.expand_dims(pixel_values, axis=1)

  valid_locations_y = tf.logical_and(
      tf.greater_equal(pixel_locations[:, 0], 0),
      tf.less(pixel_locations[:, 0], image_height))
  valid_locations_x = tf.logical_and(
      tf.greater_equal(pixel_locations[:, 1], 0),
      tf.less(pixel_locations[:, 1], image_width))
  valid_locations = tf.logical_and(valid_locations_y, valid_locations_x)
  pixel_locations = tf.boolean_mask(pixel_locations, valid_locations)
  pixel_values = tf.boolean_mask(pixel_values, valid_locations)

  n = tf.shape(pixel_locations)[0]
  value_dim = pixel_values.get_shape().as_list()[1]
  # In: [N, 2] w/ i, j
  pixel_locations = tf.tile(
      tf.expand_dims(pixel_locations, axis=1), [1, value_dim, 1])
  # Out: [N, value_dim, 2]

  pixel_locations_addition = tf.tile(
      tf.reshape(tf.range(value_dim, dtype=tf.int32), [1, value_dim, 1]),
      [n, 1, 1])
  # Out: [N, value_dim, 1]
  pixel_locations = tf.concat([pixel_locations, pixel_locations_addition],
                              axis=2)
  # Out: [N, value_dim, 3] (y, x, c)
  pixel_locations_2d = tf.reshape(pixel_locations, [n * value_dim, 3])
  if use_sparse_tensor:
    image = tf.SparseTensor(
        indices=tf.cast(pixel_locations_2d, dtype=tf.int64),
        values=tf.reshape(pixel_values, [n * value_dim]),
        dense_shape=(image_height, image_width, value_dim))
    return tf.sparse.to_dense(
        sp_input=image, default_value=default_value, validate_indices=False)
  else:
    image = tf.scatter_nd(
        indices=tf.cast(pixel_locations_2d, dtype=tf.int64),
        updates=tf.reshape(pixel_values - default_value, [n * value_dim]),
        shape=(image_height, image_width, value_dim))
    image += default_value
    return image
Example #44
0
    def call(self, x, mask=None):
        # TODO: validate input shape

        assert (len(x) == 3)
        L_flat = x[0]
        mu = x[1]
        a = x[2]

        if self.mode == 'full':
            # Create L and L^T matrix, which we use to construct the positive-definite matrix P.
            L = None
            LT = None
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, L_acc, LT_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)],
                                         x)
                    diag = K.exp(T.diag(x_)) + K.epsilon()
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)],
                                         diag)
                    return x_, x_.T

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                results, _ = theano.scan(fn=fn,
                                         sequences=L_flat,
                                         outputs_info=outputs_info)
                L, LT = results
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Number of elements in a triangular matrix.
                nb_elems = (self.nb_actions * self.nb_actions +
                            self.nb_actions) // 2

                # Create mask for the diagonal elements in L_flat. This is used to exponentiate
                # only the diagonal elements, which is done before gathering.
                diag_indeces = [0]
                for row in range(1, self.nb_actions):
                    diag_indeces.append(diag_indeces[-1] + (row + 1))
                diag_mask = np.zeros(1 + nb_elems)  # +1 for the leading zero
                diag_mask[np.array(diag_indeces) + 1] = 1
                diag_mask = K.variable(diag_mask)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1, )), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except (TypeError, ValueError):
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Create mask that can be used to gather elements from L_flat and put them
                # into a lower triangular matrix.
                tril_mask = np.zeros((self.nb_actions, self.nb_actions),
                                     dtype='int32')
                tril_mask[np.tril_indices(self.nb_actions)] = range(
                    1, nb_elems + 1)

                # Finally, process each element of the batch.
                init = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]

                def fn(a, x):
                    # Exponentiate everything. This is much easier than only exponentiating
                    # the diagonal elements, and, usually, the action space is relatively low.
                    x_ = K.exp(x) + K.epsilon()
                    # Only keep the diagonal elements.
                    x_ *= diag_mask
                    # Add the original, non-diagonal elements.
                    x_ += x * (1. - diag_mask)
                    # Finally, gather everything into a lower triangular matrix.
                    L_ = tf.gather(x_, tril_mask)
                    return [L_, tf.transpose(L_)]

                tmp = tf.scan(fn, L_flat, initializer=init)
                if isinstance(tmp, (list, tuple)):
                    # TensorFlow 0.10 now returns a tuple of tensors.
                    L, LT = tmp
                else:
                    # Old TensorFlow < 0.10 returns a shared tensor.
                    L = tmp[:, 0, :, :]
                    LT = tmp[:, 1, :, :]
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(
                    K.backend()))
            assert L is not None
            assert LT is not None
            P = K.batch_dot(L, LT)
        elif self.mode == 'diag':
            if K.backend() == 'theano':
                import theano.tensor as T
                import theano

                def fn(x, P_acc):
                    x_ = K.zeros((self.nb_actions, self.nb_actions))
                    x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)],
                                         x)
                    return x_

                outputs_info = [
                    K.zeros((self.nb_actions, self.nb_actions)),
                ]
                P, _ = theano.scan(fn=fn,
                                   sequences=L_flat,
                                   outputs_info=outputs_info)
            elif K.backend() == 'tensorflow':
                import tensorflow as tf

                # Create mask that can be used to gather elements from L_flat and put them
                # into a diagonal matrix.
                diag_mask = np.zeros((self.nb_actions, self.nb_actions),
                                     dtype='int32')
                diag_mask[np.diag_indices(self.nb_actions)] = range(
                    1, self.nb_actions + 1)

                # Add leading zero element to each element in the L_flat. We use this zero
                # element when gathering L_flat into a lower triangular matrix L.
                nb_rows = tf.shape(L_flat)[0]
                zeros = tf.expand_dims(tf.tile(K.zeros((1, )), [nb_rows]), 1)
                try:
                    # Old TF behavior.
                    L_flat = tf.concat(1, [zeros, L_flat])
                except (TypeError, ValueError):
                    # New TF behavior
                    L_flat = tf.concat([zeros, L_flat], 1)

                # Finally, process each element of the batch.
                def fn(a, x):
                    x_ = tf.gather(x, diag_mask)
                    return x_

                P = tf.scan(fn,
                            L_flat,
                            initializer=K.zeros(
                                (self.nb_actions, self.nb_actions)))
            else:
                raise RuntimeError('Unknown Keras backend "{}".'.format(
                    K.backend()))
        assert P is not None
        assert K.ndim(P) == 3

        # Combine a, mu and P into a scalar (over the batches). What we compute here is
        # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately
        # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to
        # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All
        # operations happen over the batch size, which is dimension 0.
        prod = K.batch_dot(K.expand_dims(a - mu, 1), P)
        prod = K.batch_dot(prod, K.expand_dims(a - mu, -1))
        A = -.5 * K.batch_flatten(prod)
        assert K.ndim(A) == 2
        return A
Example #45
0
def get_3dmfv_tf(points,n_gaussians=9, sigma = 0.0625,flatten=True, normalize=True,full_fv = True):
    """
    Compute the fisher vector (on the gpu using tf) given the gmm model parameters (w,mu,sigma) and a set of points for classification network
    Input:
         points: B X N x 3 tensor of XYZ points
         w: B X n_gaussians tensor of gaussian weights
         mu: B X n_gaussians X 63 tensor of gaussian cetnters
         sigma: B X n_gaussians X 3 tensor of stddev of diagonal covariance
    Output:
        fv: B X 7*n_gaussians tensor of the fisher vector
    """
    n_batches = points.shape[0].value
    n_points = points.shape[1].value
    # n_gaussians = mu.shape[0].value
    # D = mu.shape[1].value
    D = points.shape[-1].value
    if D==2:
        grid_size = int(np.sqrt(n_gaussians))
    else:
        grid_size = int(np.ceil(np.power(n_gaussians, 1 / 3)))
    l = np.linspace(-1,1,grid_size,False)+(1/grid_size)
    if D==2:
        x,y = np.meshgrid(l,l)
        x = np.stack([x.flatten(),y.flatten()]).T
    elif D==3:
        x,y,z = np.meshgrid(l,l,l)
        x = np.stack([x.flatten(), y.flatten(),z.flatten()]).T
    w = tf.ones([n_gaussians])/(n_gaussians)
    mu = tf.constant(x,tf.float32)
    sigma = sigma*tf.ones([n_gaussians,D])

    #Expand dimension for batch compatibility
    batch_sig = tf.tile(tf.expand_dims(sigma,0),[n_points, 1, 1])  #n_points X n_gaussians X D
    batch_sig = tf.tile(tf.expand_dims(batch_sig, 0), [n_batches, 1, 1,1]) #n_batches X n_points X n_gaussians X D
    batch_mu = tf.tile(tf.expand_dims(mu, 0),[n_points, 1, 1]) #n_points X n_gaussians X D
    batch_mu = tf.tile(tf.expand_dims(batch_mu, 0), [n_batches, 1, 1, 1]) #n_batches X n_points X n_gaussians X D
    batch_w = tf.tile(tf.expand_dims(tf.expand_dims(w, 0), 0), [n_batches, n_points, 1]) #n_batches X n_points X n_guassians X D  - should check what happens when weights change
    batch_points = tf.tile(tf.expand_dims(points, -2), [1, 1, n_gaussians,1]) #n_batchesXn_pointsXn_gaussians_D  # Generating the number of points for each gaussian for separate computation

    #Compute derivatives
    if full_fv:
        w_per_batch_per_d = tf.tile(tf.expand_dims(tf.expand_dims(w, 0), -1), [n_batches, 1, D*3]) #n_batches X n_gaussians X 128*D (D for min and D for max)
    else:
        w_per_batch_per_d = tf.tile(tf.expand_dims(tf.expand_dims(w, 0), -1), [n_batches, 1, D]) #n_batches X n_gaussians X 128*D (D for min and D for max)


    #Define multivariate noraml distributions
    mvn = tf.contrib.distributions.MultivariateNormalDiag(loc=batch_mu, scale_diag=batch_sig)
    #Compute probability per point
    p_per_point = mvn.prob(batch_points)

    w_p = tf.multiply(p_per_point,batch_w)
    Q = w_p/tf.tile(tf.reduce_sum(w_p, axis=-1,keepdims=True),[1, 1, n_gaussians])
    Q_per_d = tf.tile(tf.expand_dims(Q, -1), [1, 1, 1, D])

    # Compute derivatives and take max and min
    d_pi_all = tf.expand_dims((Q - batch_w)/ (tf.sqrt(batch_w) * n_points), -1)
    # d_pi_sum = tf.reduce_sum(d_pi_all , axis=1)
    d_pi_max = tf.reduce_max(d_pi_all , axis=1)
    d_pi_mean = tf.reduce_mean(d_pi_all , axis=1)
    if full_fv:
        d_pi = tf.concat([d_pi_mean,d_pi_max],2)
    else:
        d_pi = d_pi_mean

    d_mu_all = Q_per_d * (batch_points - batch_mu) / batch_sig
    # d_mu_all_sum = tf.reduce_sum(d_mu_all , axis=1)
    d_mu_all_max = tf.reduce_max(d_mu_all , axis=1)
    d_mu_all_min = tf.reduce_min(d_mu_all , axis=1)
    d_mu_all_mean = tf.reduce_mean(d_mu_all , axis=1)

    if full_fv:
        d_mu_all_full = tf.concat([d_mu_all_mean, d_mu_all_max, d_mu_all_min], 2)
    else:
        d_mu_all_full = d_mu_all_mean

    d_mu = (1 / (tf.sqrt(w_per_batch_per_d))) * d_mu_all_full

    d_sig_all = Q_per_d * ( tf.pow((batch_points - batch_mu) / batch_sig,2) - 1)
    # d_sig_all_sum = tf.reduce_sum(d_sig_all , axis=1)
    d_sig_all_max = tf.reduce_max(d_sig_all , axis=1)
    d_sig_all_min = tf.reduce_min(d_sig_all , axis=1)
    d_sig_all_mean = tf.reduce_mean(d_sig_all , axis=1)
    if full_fv:
        d_sig_all_full = tf.concat([d_sig_all_mean,d_sig_all_max,d_sig_all_min],2)
    else:
        d_sig_all_full = d_sig_all_mean
    d_sigma = (1 / (tf.sqrt(2*w_per_batch_per_d))) * d_sig_all_full

    normalize=True
    if normalize:
        #Power normaliation
        alpha = 0.5
        # d_pi = tf.sign(d_pi) * tf.pow(tf.abs(d_pi),alpha)
        # d_mu = tf.sign(d_mu) * tf.pow(tf.abs(d_mu), alpha)
        # d_sigma = tf.sign(d_sigma) * tf.pow(tf.abs(d_sigma), alpha)
        epsilon = 1e-12
        d_pi = tf.sign(d_pi) * tf.pow(tf.maximum(tf.abs(d_pi),epsilon),alpha)
        d_mu = tf.sign(d_mu) * tf.pow(tf.maximum(tf.abs(d_mu),epsilon), alpha)
        d_sigma = tf.sign(d_sigma) * tf.pow(tf.maximum(tf.abs(d_sigma),epsilon), alpha)

        # L2 normaliation
        d_pi = tf.nn.l2_normalize(d_pi, dim=1)
        d_mu = tf.nn.l2_normalize(d_mu, dim=1)
        d_sigma = tf.nn.l2_normalize(d_sigma, dim=1)
    if flatten:
        #flatten d_mu and d_sigma
        d_pi = tf.contrib.layers.flatten(tf.transpose(d_pi, perm=[0, 2, 1]))
        d_mu = tf.contrib.layers.flatten(tf.transpose(d_mu,perm=[0,2,1]))
        d_sigma = tf.contrib.layers.flatten(tf.transpose(d_sigma,perm=[0,2,1]))
        fv = tf.concat([d_pi, d_mu, d_sigma], axis=1)
    else:
        fv = tf.concat([d_pi, d_mu, d_sigma], axis=2)
        fv = tf.transpose(fv, perm=[0, 2, 1])

        fv = tf.transpose(fv ,[0,2,1])  # BX20XV->BXVX20
    # print(fv)

    # fv = fv / 2
    return fv #BX20XK
def project_points_with_depth_visibility_check(point_positions,
                                               camera_intrinsics,
                                               camera_rotation_matrix,
                                               camera_translation,
                                               image_width,
                                               image_height,
                                               depth_image,
                                               depth_intrinsics=None,
                                               depth_threshold=0.1):
  """Project 3D points to image with depthmap based visibility check.

  Args:
    point_positions: A tf.float32 tensor of shape [N, 3] containing N 3D point
      positions.
    camera_intrinsics: A tf.float32 tensor of shape [3, 3] contains intrinsic
      matrix.
    camera_rotation_matrix: A tf.float32 tensor of size [3, 3].
    camera_translation: A tf.float32 tensor of size [3].
    image_width: Width of image.
    image_height: Height of image.
    depth_image: Depth image as 2D tensor.
    depth_intrinsics: A tf.float32 tensor of size [3, 3]. If None, it is set to
      be same as camera_intrinsics.
    depth_threshold: Threshold for depth checking.

  Returns:
    points_in_image_frame: A tf.int32 tensor of size [N, 2] containing the x, y
      location of point projections in image.
    visibility: A tf.bool tensor of size [N] which denotes if a point is visible
      from the image.
  """
  if depth_intrinsics is None:
    depth_intrinsics = camera_intrinsics

  image_height = tf.convert_to_tensor(image_height, dtype=tf.int32)
  image_width = tf.convert_to_tensor(image_width, dtype=tf.int32)
  depth_image_height = tf.shape(depth_image)[0]
  depth_image_width = tf.shape(depth_image)[1]

  # Points in camera frame
  points_in_camera_frame = tf.linalg.einsum('ij,nj->ni', camera_rotation_matrix,
                                            point_positions) + tf.expand_dims(
                                                camera_translation, axis=0)

  # Points in image frame.
  points_in_image_frame = tf.linalg.einsum('ij,nj->ni', camera_intrinsics,
                                           points_in_camera_frame)
  points_in_image_frame = tf.cast(
      points_in_image_frame[:, :2] / points_in_image_frame[:, 2:3],
      dtype=tf.int32)

  # Points in depth frame.
  points_in_depth_frame = tf.linalg.einsum('ij,nj->ni', depth_intrinsics,
                                           points_in_camera_frame)
  points_in_depth_frame = tf.cast(
      points_in_depth_frame[:, :2] / points_in_depth_frame[:, 2:3],
      dtype=tf.int32)

  # Check if point is in front of camera.
  visibility = tf.greater(points_in_camera_frame[:, 2], 0.0)

  # Check if within color image.
  visibility &= tf.math.reduce_all(
      tf.greater_equal(points_in_image_frame, 0), axis=1)
  visibility &= tf.math.reduce_all(
      tf.less(points_in_image_frame,
              tf.expand_dims(tf.stack([image_width, image_height]), axis=0)),
      axis=1)

  # Check if within depth image.
  visibility &= tf.math.reduce_all(
      tf.greater_equal(points_in_depth_frame, 0), axis=1)
  visibility &= tf.math.reduce_all(
      tf.less(
          points_in_depth_frame,
          tf.expand_dims(
              tf.stack([depth_image_width, depth_image_height]), axis=0)),
      axis=1)

  # Check if the depth of points is within some threshold of depth_image.
  points_in_depth_frame = tf.boolean_mask(points_in_depth_frame, visibility)
  points_in_depth_frame_y = points_in_depth_frame[:, 1]
  points_in_depth_frame_x = points_in_depth_frame[:, 0]
  indices = (
      points_in_depth_frame_y * depth_image_width + points_in_depth_frame_x)

  visible_points_in_camera_frame = tf.boolean_mask(points_in_camera_frame,
                                                   visibility)
  depth_of_visible_points_in_camera_frame = visible_points_in_camera_frame[:, 2]
  depth_of_visible_points_in_depth_frame = tf.gather(
      tf.reshape(depth_image, [-1]), indices)
  valid_depths_visible = tf.less_equal(
      tf.abs(depth_of_visible_points_in_camera_frame -
             depth_of_visible_points_in_depth_frame), depth_threshold)
  visibility_indices = tf.cast(tf.where(visibility), dtype=tf.int32)
  valid_depths = tf.scatter_nd(
      indices=visibility_indices,
      updates=tf.cast(valid_depths_visible, dtype=tf.int32),
      shape=tf.shape(visibility))
  visibility &= tf.cast(valid_depths, dtype=tf.bool)

  return points_in_image_frame, visibility
def pairwise_and(a, b):
    column = tf.expand_dims(a, 2)
    row = tf.expand_dims(b, 1)
    return tf.logical_and(column, row)
Example #48
0
 def predict(self, image):
     input = tf.expand_dims(image, axis=0)
     x = self.encoder(input)
     x = self.decoder(x)
     return tf.squeeze(x, axis=0)
Example #49
0
 def image_to_4d(image):
     image = tf.expand_dims(image, 0)
     return image
Example #50
0
# 例2
# 首先,重置计算图,并重新初始化变量
from tensorflow.python.framework import ops
ops.reset_default_graph()
sess = tf.Session()
# 生成数据,目标标签,占位符和偏差
x_vals = np.concatenate((np.random.normal(-1.,1.,50),np.random.normal(3.,1.,50)))  # 数组拼接,参数axis=0按行拼接,为默认
y_vals = np.concatenate((np.repeat(0.,50),np.repeat(1.,50)))
x_data = tf.placeholder(tf.float32,shape=[1])
y_target = tf.placeholder(tf.float32,shape=[1])
A = tf.Variable(tf.random_normal(mean=10,shape=[1]))  # A是变量
# 增加转换操作
my_output = tf.add(x_data,A)
# 增加维度
my_output_expanded = tf.expand_dims(my_output,0)
y_target_expanded = tf.expand_dims(y_target,0)
# 初始化变量A
init = tf.initialize_all_variables()
sess.run(init)
# 声明损失函数
xentroy = tf.nn.sigmoid_cross_entropy_with_logits(logits=my_output_expanded,labels=y_target_expanded)
# 增加一个优化器函数让tensorflow知道如何更新和偏差变量
my_opt= tf.train.GradientDescentOptimizer(0.05)
train_step = my_opt.minimize(xentroy)
# 通过随机选择的数据迭代,更新变量A
for i in range(1400):
    rand_index = np.random.choice(100)
    rand_x = [x_vals[rand_index]]
    rand_y = [y_vals[rand_index]]
    sess.run(train_step,feed_dict={x_data:rand_x,y_target:rand_y})
def generalized_dice_loss(pred, true, p=1, q=1, eps=1E-6):
    """pred and true are tensors of shape (b, w_0, w_1, ..., c) where
             b   ... batch size
             w_k ... width of input in k-th dimension
             c   ... number of segments/classes
       Furthermore, boths tensors have exclusively values in [0, 1].
       more than already good ones. The remaining parameters are as follows:
             p   ... power of inverse weigthing (p=2 default, p=0 uniform)
             q   ... power of inverse loss weighting (q=1 default, q=0 none)
             eps ... regularization term if empty classes occur"""

    assert (p >= 0)
    assert (q >= 0)
    assert (eps >= 0)
    assert (pred.get_shape()[1:] == true.get_shape()[1:])

    m = "the values in your last layer must be strictly in [0, 1]"
    with tf.control_dependencies([]):

        shape_pred = pred.get_shape()
        shape_true = true.get_shape()
        prod_pred = reduce(lambda x, y: x * y, shape_pred[1:-1],
                           tf.Dimension(1))
        prod_true = reduce(lambda x, y: x * y, shape_true[1:-1],
                           tf.Dimension(1))

        # reshape to shape (b, W, c) where W is product of w_k
        pred = tf.reshape(pred, [-1, prod_pred, shape_pred[-1]])
        true = tf.reshape(true, [-1, prod_true, shape_true[-1]])

        # no class reweighting at all
        if p == 0:
            # unweighted intersection and union
            inter = tf.reduce_mean(pred * true, axis=[1, 2])
            union = tf.reduce_mean(pred + true, axis=[1, 2])
        else:
            # inverse L_p weighting for class cardinalities
            weights = tf.abs(tf.reduce_sum(true, axis=[1]))**p + eps
            weights = tf.expand_dims(tf.reduce_sum(weights, axis=[-1]), -1) \
                    / weights

            # weighted intersection and union
            inter = tf.reduce_mean(weights *
                                   tf.reduce_mean(pred * true, axis=[1]),
                                   axis=[-1])
            union = tf.reduce_mean(weights *
                                   tf.reduce_mean(pred + true, axis=[1]),
                                   axis=[-1])

        # the traditional dice formula
        loss = 1.0 - 2.0 * (inter + eps) / (union + eps)

        # no reweighting of the batch
        if q == 0:
            return tf.reduce_mean(loss)

        # inverse L_q weighting for loss scores
        weights = tf.abs(loss)**q + eps
        weights = tf.reduce_sum(weights) / weights

        return tf.reduce_mean(loss * weights) / tf.reduce_mean(weights)
def pairwise_sub(a, b):
    column = tf.expand_dims(a, 2)
    row = tf.expand_dims(b, 1)
    return tf.subtract(column, row)
Example #53
0
def custom_v3(is_training, images, params, mode):
    """Compute outputs of the model (embeddings for triplet loss).

    Args:
        is_training: (bool) whether we are training or not
        images: (dict) contains the inputs of the graph (features)
                this can be `tf.placeholder` or outputs of `tf.data`
        params: (Params) hyperparameters

    Returns:
        output: (tf.Tensor) output of the model
    """
    # Apply dropout to the input layer
    input_dropout = tf.layers.dropout(images,
                                      rate=params.input_dropout,
                                      training=is_training,
                                      name='input_dropout')

    # Define the number of filters for each convolution
    # For each block, we do: 3x3 conv -> batch norm -> relu -> 2x2 maxpool
    image_size_in = params.image_size

    num_filters = params.num_filters
    num_blocks = params.num_blocks
    bn_momentum = params.bn_momentum
    filters = [
        32, 64, 128
    ]  # each element in this list indicates the number of filters to use in a new conv block

    if params.image_size != 96:
        raise ValueError(
            "Image size should be equal to 96 if you want to use custom_v3.")

    out = input_dropout
    for i, f in enumerate(filters):
        with tf.variable_scope('block_{}'.format(i + 1)):
            out = tf.layers.conv2d(out, f, 3, padding='same')
            if params.use_batch_norm:
                out = tf.layers.batch_normalization(out,
                                                    momentum=bn_momentum,
                                                    training=is_training)
            out = tf.nn.relu(out)
            out = tf.layers.conv2d(out, f, 3, padding='same')
            if params.use_batch_norm:
                out = tf.layers.batch_normalization(out,
                                                    momentum=bn_momentum,
                                                    training=is_training)
            out = tf.nn.relu(out)
            out = tf.layers.max_pooling2d(out, 2, 2)

    image_size_out = int(image_size_in / (2**3))  # 3 reductions by 2*2 maxpool
    assert out.shape[1:] == [
        image_size_out, image_size_out, filters[-1]
    ], "filters: {}\nout shape: {}\nimage_size_out: {}".format(
        filters[-1], out.shape, image_size_out)
    # 12 x 12 x 128

    out = tf.layers.conv2d(out, 64, 1, padding='same')
    # 12 x 12 x 64

    out = tf.layers.average_pooling2d(out, 12, strides=1)
    # 1 x 1 x 64

    out = tf.reshape(out, [-1, 1 * 1 * 64])

    with tf.variable_scope('fc'):
        out = tf.layers.dense(out, params.embedding_size)
        out = tf.divide(
            out,
            tf.expand_dims(tf.norm(out, ord='euclidean', axis=1) + 1e-16, 1))
        out = params.alpha * out
    # 1 x 1 x 64

    return out
Example #54
0
def divergence3(x):
    dudx = x[:, :-1, :-1, 1:, 0] - x[:, :-1, :-1, :-1, 0]
    dvdy = x[:, :-1, 1:, :-1, 1] - x[:, :-1, :-1, :-1, 1]
    dwdz = x[:, 1:, :-1, :-1, 2] - x[:, :-1, :-1, :-1, 2]
    return tf.expand_dims(dudx + dvdy + dwdz, axis=-1)
Example #55
0
                    help='The directory of tensorflow checkpoint.')

if __name__ == "__main__":
    os.environ['CUDA_VISIBLE_DEVICES'] = '4'
    args = parser.parse_args()

    config_path = os.path.join('config.yml')
    config = Config(config_path)
    model = GDNInpainting(config)
    image = imread(args.image)
    mask = imread(args.mask)
    mask = (mask > 173).astype(np.uint8) * 255
    assert image.shape == mask.shape

    image = tf.constant(image, dtype=tf.float32)
    image = tf.expand_dims(image, axis=0)
    mask = tf.constant(mask, dtype=tf.float32)
    mask = tf.expand_dims(mask, axis=0)
    mask = tf.expand_dims(mask, axis=-1)

    image /= 255
    mask /= 255

    images_masked = (image * (1 - mask)) + mask
    # input of the model
    inputs = tf.concat([images_masked, mask], axis=3)

    # process outputs
    output = model.inpaint_generator(inputs, 8, 64, 2)

    outputs_merged = (output * mask) + (image * (1 - mask))
Example #56
0
def miniception_v6(is_training, images, params, mode):
    """Compute outputs of the model (embeddings for triplet loss).
    Adding L2-norm layer to miniception_v2
    (maybe add a learnable scaling parameter alpha, see paper L2-constraint softmax)
    Args:
        is_training: (bool) whether we are training or not
        images: (dict) contains the inputs of the graph (features)
                this can be `tf.placeholder` or outputs of `tf.data`
        params: (Params) hyperparameters

    Returns:
        output: (tf.Tensor) output of the model
    """
    # Apply dropout to the input layer
    input_dropout = tf.layers.dropout(images,
                                      rate=params.input_dropout,
                                      training=is_training,
                                      name='input_dropout')

    out = input_dropout
    # 448 x 448 x num_channels

    if params.image_size != 448:
        raise ValueError(
            "Image size should be equal to 448 if you want to use miniception_v5."
        )

    out = tf.layers.conv2d(out,
                           16,
                           7,
                           strides=2,
                           padding='same',
                           activation=tf.nn.relu)
    assert out.shape[1:] == [224, 224,
                             16], "output has shape {}".format(out.shape)
    # 224 x 224 x 16

    out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same')
    assert out.shape[1:] == [112, 112,
                             16], "output has shape {}".format(out.shape)
    # 112 x 112 x 16

    out = tf.layers.conv2d(out,
                           32,
                           3,
                           strides=1,
                           padding='same',
                           activation=tf.nn.relu)
    assert out.shape[1:] == [112, 112,
                             32], "output has shape {}".format(out.shape)
    # 112 x 112 x 32

    out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same')
    assert out.shape[1:] == [56, 56,
                             32], "output has shape {}".format(out.shape)
    # 56 x 56 x 16

    out = tf.layers.conv2d(out,
                           64,
                           3,
                           strides=1,
                           padding='same',
                           activation=tf.nn.relu)
    assert out.shape[1:] == [56, 56,
                             64], "output has shape {}".format(out.shape)
    # 56 x 56 x 64

    out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same')
    assert out.shape[1:] == [28, 28,
                             64], "output has shape {}".format(out.shape)
    # 28 x 28 x 64

    out = tf.nn.local_response_normalization(out)

    out = tf.layers.conv2d(out, 96, 3, padding='same', activation=tf.nn.relu)
    assert out.shape[1:] == [28, 28,
                             96], "output has shape {}".format(out.shape)
    # 28 x 28 x 96

    out = tf.nn.local_response_normalization(out)

    out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same')
    assert out.shape[1:] == [14, 14,
                             96], "output has shape {}".format(out.shape)
    # 14 x 14 x 96

    # Miniception module 1
    # ------------------
    with tf.variable_scope('miniception_block1'):
        with tf.variable_scope('branch1x1'):
            branch1x1 = tf.layers.conv2d(out,
                                         32,
                                         1,
                                         padding='same',
                                         activation=tf.nn.relu)
        with tf.variable_scope('branch5x5'):
            branch5x5 = tf.layers.conv2d(out, 8, 1, activation=tf.nn.relu)
            branch5x5 = tf.layers.conv2d(branch5x5,
                                         16,
                                         5,
                                         padding='same',
                                         activation=tf.nn.relu)
        with tf.variable_scope('branch3x3'):
            branch3x3 = tf.layers.conv2d(out, 48, 1, activation=tf.nn.relu)
            branch3x3 = tf.layers.conv2d(branch3x3,
                                         64,
                                         3,
                                         padding='same',
                                         activation=tf.nn.relu)
        with tf.variable_scope('branch_pool'):
            branch_pool = tf.layers.average_pooling2d(out,
                                                      3,
                                                      strides=1,
                                                      padding='same')
            branch_pool = tf.layers.conv2d(branch_pool,
                                           16,
                                           1,
                                           padding='same',
                                           activation=tf.nn.relu)
        out = tf.concat(axis=3,
                        values=[branch1x1, branch5x5, branch3x3, branch_pool])
        # 14 x 14 x 128

    # Transitional max pooling layer
    # ------------------------------
    out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same')
    assert out.shape[1:] == [7, 7,
                             128], "output has shape {}".format(out.shape)
    # 7 x 7 x 128

    # Miniception module 2
    # ------------------
    with tf.variable_scope('miniception_block2'):
        with tf.variable_scope('branch1x1'):
            branch1x1 = tf.layers.conv2d(out,
                                         64,
                                         1,
                                         padding='same',
                                         activation=tf.nn.relu)
        with tf.variable_scope('branch5x5'):
            branch5x5 = tf.layers.conv2d(out, 16, 1, activation=tf.nn.relu)
            branch5x5 = tf.layers.conv2d(branch5x5,
                                         48,
                                         5,
                                         padding='same',
                                         activation=tf.nn.relu)
        with tf.variable_scope('branch3x3'):
            branch3x3 = tf.layers.conv2d(out, 64, 1, activation=tf.nn.relu)
            branch3x3 = tf.layers.conv2d(branch3x3,
                                         96,
                                         3,
                                         padding='same',
                                         activation=tf.nn.relu)
        with tf.variable_scope('branch_pool'):
            branch_pool = tf.layers.average_pooling2d(out,
                                                      3,
                                                      strides=1,
                                                      padding='same')
            branch_pool = tf.layers.conv2d(branch_pool,
                                           32,
                                           1,
                                           padding='same',
                                           activation=tf.nn.relu)
        out = tf.concat(axis=3,
                        values=[branch1x1, branch5x5, branch3x3, branch_pool])
        # 7 x 7 x 240

    assert out.shape[1:] == [7, 7, 240], "out shape: {}".format(out.shape)

    # Average pooling reduction
    # -------------------------
    out = tf.layers.average_pooling2d(out, 7, strides=1)
    # 1 x 1 x 240

    # Flatten layer with dropout
    # --------------------------
    out = tf.reshape(out, [-1, 1 * 1 * 240])
    out = tf.layers.dropout(out,
                            rate=params.output_dropout,
                            training=is_training,
                            name='output_dropout')

    # Final dense layer (embeddings) followed by L2 normalization
    # -----------------------------------------------------------
    with tf.variable_scope('fc'):
        out = tf.layers.dense(out, params.embedding_size)
        out = tf.divide(
            out,
            tf.expand_dims(tf.norm(out, ord='euclidean', axis=1) + 1e-16, 1))
        out = params.alpha * out

    return out
Example #57
0
def attention(inputs, attention_size, time_major=False, return_alphas=False):
    """

    Attention mechanism layer which reduces RNN/Bi-RNN outputs with Attention vector.
    The idea was proposed in the article by Z. Yang et al., "Hierarchical Attention Networks
     for Document Classification", 2016: http://www.aclweb.org/anthology/N16-1174.
    Variables notation is also inherited from the article

    Args:
        inputs: The Attention inputs.
            Matches outputs of RNN/Bi-RNN layer (not final state):
                In case of RNN, this must be RNN outputs `Tensor`:
                    If time_major == False (default), this must be a tensor of shape:
                        `[batch_size, max_time, cell.output_size]`.
                    If time_major == True, this must be a tensor of shape:
                        `[max_time, batch_size, cell.output_size]`.
                In case of Bidirectional RNN, this must be a tuple (outputs_fw, outputs_bw) containing the forward and
                the backward RNN outputs `Tensor`.
                    If time_major == False (default),
                        outputs_fw is a `Tensor` shaped:
                        `[batch_size, max_time, cell_fw.output_size]`
                        and outputs_bw is a `Tensor` shaped:
                        `[batch_size, max_time, cell_bw.output_size]`.
                    If time_major == True,
                        outputs_fw is a `Tensor` shaped:
                        `[max_time, batch_size, cell_fw.output_size]`
                        and outputs_bw is a `Tensor` shaped:
                        `[max_time, batch_size, cell_bw.output_size]`.
        attention_size: Linear size of the Attention weights.
        time_major: The shape format of the `inputs` Tensors.
            If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`.
            If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`.
            Using `time_major = True` is a bit more efficient because it avoids
            transposes at the beginning and end of the RNN calculation.  However,
            most TensorFlow data is batch-major, so by default this function
            accepts input and emits output in batch-major form.
        return_alphas: Whether to return attention coefficients variable along with layer's output.
            Used for visualization purpose.

    Returns:
        The Attention output `Tensor`.
        In case of RNN, this will be a `Tensor` shaped:
            `[batch_size, cell.output_size]`.
        In case of Bidirectional RNN, this will be a `Tensor` shaped:
            `[batch_size, cell_fw.output_size + cell_bw.output_size]`.

    """

    if isinstance(inputs, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
        inputs = tf.concat(inputs, 2)

    if time_major:
        # (T,B,D) => (B,T,D)
        inputs = tf.array_ops.transpose(inputs, [1, 0, 2])

    hidden_size = inputs.shape[
        2].value  # D value - hidden size of the RNN layer

    # Trainable parameters
    w_omega = tf.Variable(
        tf.random_normal([hidden_size, attention_size], stddev=0.1))
    b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))
    u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1))

    with tf.name_scope('v'):
        # Applying fully connected layer with non-linear activation to each of the B*T timestamps;
        #  the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size
        v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega)

    # For each of the timestamps its vector of size A from `v` is reduced with `u` vector
    vu = tf.tensordot(v, u_omega, axes=1, name='vu')  # (B,T) shape
    alphas = tf.nn.softmax(vu, name='alphas')  # (B,T) shape

    # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape
    output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1)

    if not return_alphas:
        return output
    else:
        return output, alphas
Example #58
0
    def perform_greedy(self, features, predicted, states, swap_memory = False):
        encoded = self.encoder_inference(features)
        prediction = tf.TensorArray(
            dtype = tf.int32,
            size = (tf.shape(encoded)[0] + 1),
            dynamic_size = False,
            element_shape = tf.TensorShape([]),
            clear_after_read = False,
        )
        time = tf.constant(0, dtype = tf.int32)
        total = tf.shape(encoded)[0]

        hypothesis = Hypothesis(
            index = tf.constant(0, dtype = tf.int32),
            prediction = prediction.write(0, predicted),
            states = states,
        )

        def condition(time, total, encoded, hypothesis):
            return tf.less(time, total)

        def body(time, total, encoded, hypothesis):
            ytu, new_states = self.decoder_inference(
                encoded = tf.gather_nd(
                    encoded, tf.expand_dims(time, axis = -1)
                ),
                predicted = hypothesis.prediction.read(hypothesis.index),
                states = hypothesis.states,
            )
            char = tf.argmax(ytu, axis = -1, output_type = tf.int32)
            index, char, new_states = tf.cond(
                tf.equal(char, BLANK),
                true_fn = lambda: (
                    hypothesis.index + 1,
                    BLANK,
                    hypothesis.states,
                ),
                false_fn = lambda: (hypothesis.index + 1, char, new_states),
            )
            hypothesis = Hypothesis(
                index = index,
                prediction = hypothesis.prediction.write(index, char),
                states = new_states,
            )
            return time + 1, total, encoded, hypothesis

        time, total, encoded, hypothesis = tf.while_loop(
            condition,
            body,
            loop_vars = (time, total, encoded, hypothesis),
            swap_memory = swap_memory,
        )
        hypothesis = Hypothesis(
            index = hypothesis.index,
            prediction = tf.gather_nd(
                params = hypothesis.prediction.stack(),
                indices = tf.expand_dims(
                    tf.range(hypothesis.index + 1), axis = -1
                ),
            ),
            states = hypothesis.states,
        )

        return hypothesis
Example #59
0
def main():

    # parse arguments
    args = parse_args()

    # window details
    width = args.window_size[0]
    height = args.window_size[1]
    display = (width, height)

    # window setup
    pygame.init()
    pygame.display.set_caption('Spout Neural Style Receiver')
    pygame.display.set_mode(display, DOUBLEBUF | OPENGL)

    # OpenGL init
    glMatrixMode(GL_PROJECTION)
    glLoadIdentity()
    glOrtho(0, width, height, 0, 1, -1)
    glMatrixMode(GL_MODELVIEW)
    glDisable(GL_DEPTH_TEST)
    glClearColor(0.0, 0.0, 0.0, 0.0)
    glEnable(GL_TEXTURE_2D)

    # init spout receiver
    receiverName = args.spout_name
    spoutReceiverWidth = args.spout_size[0]
    spoutReceiverHeight = args.spout_size[1]
    # create spout receiver
    spoutReceiver = SpoutSDK.SpoutReceiver()

    # Its signature in c++ looks like this: bool pyCreateReceiver(const char* theName, unsigned int theWidth, unsigned int theHeight, bool bUseActive);
    spoutReceiver.pyCreateReceiver(receiverName, spoutReceiverWidth,
                                   spoutReceiverHeight, False)

    # create textures for spout receiver and spout sender
    textureReceiveID = glGenTextures(1)
    textureStyleID = glGenTextures(1)

    # initalise receiver texture
    glBindTexture(GL_TEXTURE_2D, textureReceiveID)
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST)
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST)

    # copy data into texture
    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, spoutReceiverWidth,
                 spoutReceiverHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, None)
    glBindTexture(GL_TEXTURE_2D, 0)

    # initalise sender texture
    glBindTexture(GL_TEXTURE_2D, textureStyleID)
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE)
    glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE)
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST)
    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST)
    glBindTexture(GL_TEXTURE_2D, 0)

    # open tf session
    soft_config = tf.ConfigProto(allow_soft_placement=True)
    soft_config.gpu_options.allow_growth = True  # to deal with large image
    sess = tf.Session(config=soft_config)
    # build tf graph
    style = tf.placeholder(tf.float32,
                           shape=[spoutReceiverHeight, spoutReceiverWidth, 3],
                           name='input')
    styleI = tf.expand_dims(style, 0)  # add one dim for batch

    # result image from transform-net
    scaler = transform.Transform()
    y_hat = scaler.net(styleI / 255.0)
    y_hat = tf.squeeze(y_hat)  # remove one dim for batch
    y_hat = tf.clip_by_value(y_hat, 0., 255.)

    # initialize parameters
    sess.run(tf.global_variables_initializer())

    # load pre-trained model
    saver = tf.train.Saver()
    saver.restore(sess, args.style_model)

    # loop for graph frame by frame
    while (True):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                spoutReceiver.ReleaseReceiver()
                pygame.quit()
                quit()

        # receive texture
        # Its signature in c++ looks like this: bool pyReceiveTexture(const char* theName, unsigned int theWidth, unsigned int theHeight, GLuint TextureID, GLuint TextureTarget, bool bInvert, GLuint HostFBO);
        spoutReceiver.pyReceiveTexture(receiverName, spoutReceiverWidth,
                                       spoutReceiverHeight, textureReceiveID,
                                       GL_TEXTURE_2D, False, 0)

        glBindTexture(GL_TEXTURE_2D, textureReceiveID)
        # copy pixel byte array from received texture
        data = glGetTexImage(GL_TEXTURE_2D,
                             0,
                             GL_RGB,
                             GL_UNSIGNED_BYTE,
                             outputType=None)  #Using GL_RGB can use GL_RGBA
        glBindTexture(GL_TEXTURE_2D, 0)
        # swap width and height data around due to oddness with glGetTextImage. http://permalink.gmane.org/gmane.comp.python.opengl.user/2423
        data.shape = (data.shape[1], data.shape[0], data.shape[2])

        # start time of the loop for FPS counter
        start_time = time.time()
        #run the graph
        output = sess.run(y_hat, feed_dict={style: data})
        # fiddle back to an image we can display. I *think* this is correct
        output = np.clip(output, 0.0, 255.0)
        output = output.astype(np.uint8)

        # setup the texture so we can load the stylised output into it
        glBindTexture(GL_TEXTURE_2D, textureStyleID)
        # copy style output into texture
        glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, spoutReceiverWidth,
                     spoutReceiverHeight, 0, GL_RGB, GL_UNSIGNED_BYTE, output)

        # setup window to draw to screen
        glActiveTexture(GL_TEXTURE0)

        # clean start
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT)
        # reset drawing perspective
        glLoadIdentity()

        # draw texture on screen
        glBegin(GL_QUADS)

        glTexCoord(0, 0)
        glVertex2f(0, 0)

        glTexCoord(1, 0)
        glVertex2f(spoutReceiverWidth, 0)

        glTexCoord(1, 1)
        glVertex2f(spoutReceiverWidth, spoutReceiverHeight)

        glTexCoord(0, 1)
        glVertex2f(0, spoutReceiverHeight)

        glEnd()

        # update window
        pygame.display.flip()

        # FPS = 1 / time to process loop
        print("FPS: ", 1.0 / (time.time() - start_time))
if __name__=='__main__':
    import numpy as np
    np.random.seed(100)
    triangles=np.random.rand(1,5,3,3).astype('float32')
    with tf.device('/gpu:0'):
        inp=tf.constant(triangles)
        tria=inp[:,:,0,:] # 1 x 5 x 3
        trib=inp[:,:,1,:] # 1 x 5 x 3
        tric=inp[:,:,2,:] # 1 x 5 x 3
        areas=tf.sqrt(tf.reduce_sum(tf.cross(trib-tria,tric-tria)**2,2)+1e-9) # 1 x 5
        randomnumbers=tf.random_uniform((1,8192)) # 1 x 8192
        triids=prob_sample(areas,randomnumbers) # 1 x 8192
        tria_sample=gather_point(tria,triids) # 1 x 8192 x 3
        trib_sample=gather_point(trib,triids) # 1 x 8192 x 3
        tric_sample=gather_point(tric,triids) # 1 x 8192 x 3
        us=tf.random_uniform((1,8192))
        vs=tf.random_uniform((1,8192))
        uplusv=1-tf.abs(us+vs-1)
        uminusv=us-vs
        us=(uplusv+uminusv)*0.5
        vs=(uplusv-uminusv)*0.5
        pt_sample=tria_sample+(trib_sample-tria_sample)*tf.expand_dims(us,-1)+(tric_sample-tria_sample)*tf.expand_dims(vs,-1)
        print('pt_sample: ', pt_sample)
        reduced_sample=gather_point(pt_sample,farthest_point_sample(1024,pt_sample))
        print(reduced_sample)
    with tf.Session('') as sess:
        ret=sess.run(reduced_sample)
    print(ret.shape,ret.dtype)
    #import cPickle as pickle
    #pickle.dump(ret,open('1.pkl','wb'),-1)