Beispiel #1
0
    def compute_loss(self, y_true, y_pred):
        batch_size = tf.shape(y_true)[0]
        h = tf.shape(y_true)[1]
        w = tf.shape(y_true)[2]
        n_chans = tf.shape(y_true)[3]
        n_pixels = h * w
        n_dims = 5
        eps = 1e-8

        # indices in batch, row, column format
        #y_pred_norm, center_x, center_y = self.compute_center_coords(y_pred)
        y_pred_norm, center_point_xyrgb = self.compute_center_coords(y_true, y_pred)
        center_point_xyrgb = tf.tile(
            tf.reshape(center_point_xyrgb, [batch_size, 1, n_dims]), (1, h * w, 1))
        #center_x = tf.reshape(center_x, [batch_size])
        #center_y = tf.reshape(center_y, [batch_size])
        # make a batch_size x 3 matrix so we can index into the batch, r, c dimensions
        #center_rgb = tf.gather_nd(y_true, center_point_bxy)  # should be batch_size x 3
        true_rgbs = tf.reshape(y_true, [batch_size, n_pixels, n_chans])

        im_coords = tf.concat([
            tf.cast(tf.tile(self.xs, [batch_size, 1, 1]), tf.float32),
            tf.cast(tf.tile(self.ys, [batch_size, 1, 1]), tf.float32),
            true_rgbs
        ], axis=-1)

        # compute normalized distance, and weight using lambdas
        pixel_dists = ((im_coords - center_point_xyrgb) * self.lambdas_norm) ** 2 * self.lambdas
        soft_pixel_affinities = (1. - tf.exp(tf.reduce_sum(-0.5 * pixel_dists / self.sigma_norm ** 2, axis=-1)))
        soft_pixel_affinities = tf.reshape(soft_pixel_affinities, [batch_size, h, w])  # weight mask

        return soft_pixel_affinities * y_pred_norm
Beispiel #2
0
    def build_predict(self, Xnew, full_cov=False):
        """
        Xnew is a data matrix, point at which we want to predict

        This method computes

            p(F* | Y )

        where F* are points on the GP at Xnew, Y are noisy observations at X.

        """
        Kx = self.kern.K(self.X, Xnew)
        K = self.kern.K(self.X) + eye(self.num_data) * self.likelihood.variance
        L = tf.cholesky(K)
        A = tf.matrix_triangular_solve(L, Kx, lower=True)
        V = tf.matrix_triangular_solve(L, self.Y - self.mean_function(self.X))
        fmean = tf.matmul(tf.transpose(A), V) + self.mean_function(Xnew)
        if full_cov:
            fvar = self.kern.K(Xnew) - tf.matmul(tf.transpose(A), A)
            shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
            fvar = tf.tile(tf.expand_dims(fvar, 2), shape)
        else:
            fvar = self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(A), 0)
            fvar = tf.tile(tf.reshape(fvar, (-1, 1)), [1, self.Y.shape[1]])
        return fmean, fvar
Beispiel #3
0
def bond_conv_layer(activated_atoms, bv_params, layer):
    flow_depth = flow_layer_depths[layer]
    
    next_activated_atoms = tf.zeros(tf.pack([N_atoms_ph, flow_depth]))

    for deg in range(1, 6):
        indices = tf.sub(deg_list_ph, tf.constant(1,dtype=tf.int32))
        flow_param = bv_params['A_flow'+str(layer)+'_'+str(deg)]
        flow_map = tf.gather(flow_param, type_adj_ph)

        multiples = tf.pack([N_atoms_ph, 1, 1])
        activated_atoms_dim = tf.expand_dims(tf.tile(tf.expand_dims(activated_atoms, 0), multiples), 2)

        adj_mul = tf.batch_matmul(activated_atoms_dim, flow_map)
        adj_mul = tf.squeeze(adj_mul, [2])

        deg_mask = tf.to_float(tf.equal(deg_list_ph, deg))

        multiples = tf.pack([1, N_atoms_ph, flow_depth])
        deg_list_dim = tf.tile(tf.expand_dims(tf.expand_dims(deg_mask, 1), 1), multiples)

        multiples = tf.pack([N_atoms_ph, N_atoms_ph, 1])
        biases = tf.tile(bv_params['b_flow'+str(layer)+'_'+str(deg)], multiples)
        filtered_atoms = tf.add(tf.mul(adj_mul, deg_list_dim), biases)

        next_activated_atoms = next_activated_atoms + tf.reduce_sum(filtered_atoms, 1)
        
    next_activated_atoms = tf.nn.relu(next_activated_atoms)
    return next_activated_atoms
Beispiel #4
0
def tf_compute_distances(points, start_centers):
    """
    Given a set of points and some centroids, computes the distance from each point to each
    centroid.

    :param points: a 2d TF tensor of shape num_points x dim
    :param start_centers: a numpy array of shape num_centroid x dim
    :return: a TF tensor of shape num_points x num_centroids
    """
    with tf.variable_scope("distances"):
        # The dimensions in the problem
        (num_centroids, _) = np.shape(start_centers)
        # The shape of the block is extracted as a TF variable.
        num_points = tf.shape(points)[0]
        # The centers are embedded in the TF program.
        centers = tf.constant(start_centers)
        # Computation of the minimum distance. This is a standard implementation that follows
        # what MLlib does.
        squares = tf.reduce_sum(tf.square(points), reduction_indices=1)
        center_squares = tf.reduce_sum(tf.square(centers), reduction_indices=1)
        prods = tf.matmul(points, centers, transpose_b = True)
        # This code simply expresses two outer products: center_squares * ones(num_points)
        # and ones(num_centroids) * squares
        t1a = tf.expand_dims(center_squares, 0)
        t1b = tf.stack([num_points, 1])
        t1 = tf.tile(t1a, t1b)
        t2a = tf.expand_dims(squares, 1)
        t2b = tf.stack([1, num_centroids])
        t2 = tf.tile(t2a, t2b)
        distances = t1 + t2 - 2 * prods
    return distances
Beispiel #5
0
  def encode_coordinates_alt(self, net):
    """An alternative implemenation for the encoding coordinates.

    Args:
      net: a tensor of shape=[batch_size, height, width, num_features]

    Returns:
      a list of tensors with encoded image coordinates in them.
    """
    batch_size, h, w, _ = net.shape.as_list()
    h_loc = [
      tf.tile(
          tf.reshape(
              tf.contrib.layers.one_hot_encoding(
                  tf.constant([i]), num_classes=h), [h, 1]), [1, w])
      for i in xrange(h)
    ]
    h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2)
    w_loc = [
      tf.tile(
          tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w),
          [h, 1]) for i in xrange(w)
    ]
    w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2)
    loc = tf.concat([h_loc, w_loc], 2)
    loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1])
    return tf.concat([net, loc], 3)
Beispiel #6
0
def knn_point(k, xyz1, xyz2):
    '''
    Input:
        k: int32, number of k in k-nn search
        xyz1: (batch_size, ndataset, c) float32 array, input points
        xyz2: (batch_size, npoint, c) float32 array, query points
    Output:
        val: (batch_size, npoint, k) float32 array, L2 distances
        idx: (batch_size, npoint, k) int32 array, indices to input points
    '''
    b = xyz1.get_shape()[0].value
    n = xyz1.get_shape()[1].value
    c = xyz1.get_shape()[2].value
    m = xyz2.get_shape()[1].value
    print b, n, c, m
    print xyz1, (b,1,n,c)
    xyz1 = tf.tile(tf.reshape(xyz1, (b,1,n,c)), [1,m,1,1])
    xyz2 = tf.tile(tf.reshape(xyz2, (b,m,1,c)), [1,1,n,1])
    dist = tf.reduce_sum((xyz1-xyz2)**2, -1)
    print dist, k
    outi, out = select_top_k(k, dist)
    idx = tf.slice(outi, [0,0,0], [-1,-1,k])
    val = tf.slice(out, [0,0,0], [-1,-1,k])
    print idx, val
    #val, idx = tf.nn.top_k(-dist, k=k) # ONLY SUPPORT CPU
    return val, idx
Beispiel #7
0
  def testShapeFunctionEdgeCases(self):
    # Unknown multiples shape.
    inp = tf.constant(0.0, shape=[4, 4, 4, 4])
    tiled = tf.tile(inp, tf.placeholder(tf.int32))
    self.assertEqual([None, None, None, None], tiled.get_shape().as_list())

    # Unknown input shape.
    inp = tf.placeholder(tf.float32)
    tiled = tf.tile(inp, [2, 2, 2, 2])
    self.assertEqual([None, None, None, None], tiled.get_shape().as_list())

    # Unknown input and multiples shape.
    inp = tf.placeholder(tf.float32)
    tiled = tf.tile(inp, tf.placeholder(tf.int32))
    self.assertIs(None, tiled.get_shape().ndims)

    # Known input and partially known multiples.
    inp = tf.constant(0.0, shape=[1, 1])
    tiled = tf.tile(inp, [tf.placeholder(tf.int32), 7])
    self.assertEqual([None, 7], tiled.get_shape().as_list())

    # Mismatched input rank and multiples length.
    inp = tf.placeholder(tf.float32, shape=[None, None])
    with self.assertRaises(ValueError):
      tiled = tf.tile(inp, tf.placeholder(tf.int32, shape=[3]))
Beispiel #8
0
 def _build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = len(self.feature)
     err = self.Y - self.mean_function(self.X)
     Kuf = self.feature.Kuf(self.kern, self.X)
     Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level)
     Kus = self.feature.Kuf(self.kern, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tmp2, c, transpose_a=True)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \
               - tf.matmul(tmp1, tmp1, transpose_a=True)
         shape = tf.stack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \
               - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.stack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
def routing(input, b_IJ):
    ''' The routing algorithm.
        Args:
        input: A Tensor with [batch_size, num_caps_l=1152, 1, length(u_i)=8, 1]
        shape, num_caps_l meaning the number of capsule in the layer l.
        Returns:
        A Tensor of shape [batch_size, num_caps_l_plus_1, length(v_j)=16, 1]
        representing the vector output `v_j` in the layer l+1
        Notes:
        u_i represents the vector output of capsule i in the layer l, and
        v_j the vector output of capsule j in the layer l+1.
        '''
    
    # W: [num_caps_j, num_caps_i, len_u_i, len_v_j]
    W = tf.get_variable('Weight', shape=(1, 1152, 10, 8, 16), dtype=tf.float32,
                        initializer=tf.random_normal_initializer(stddev=cfg.stddev))
        
                        # Eq.2, calc u_hat
                        # do tiling for input and W before matmul
                        # input => [batch_size, 1152, 10, 8, 1]
                        # W => [batch_size, 1152, 10, 8, 16]
                        input = tf.tile(input, [1, 1, 10, 1, 1])
                        W = tf.tile(W, [cfg.batch_size, 1, 1, 1, 1])
                        assert input.get_shape() == [cfg.batch_size, 1152, 10, 8, 1]
                        
                        # in last 2 dims:
                        # [8, 16].T x [8, 1] => [16, 1] => [batch_size, 1152, 10, 16, 1]
                        u_hat = tf.matmul(W, input, transpose_a=True)
                        assert u_hat.get_shape() == [cfg.batch_size, 1152, 10, 16, 1]
                        
                        # line 3,for r iterations do
                        for r_iter in range(cfg.iter_routing):
                            with tf.variable_scope('iter_' + str(r_iter)):
                                # line 4:
                                # => [1, 1152, 10, 1, 1]
                                c_IJ = tf.nn.softmax(b_IJ, dim=2)
                                c_IJ = tf.tile(c_IJ, [cfg.batch_size, 1, 1, 1, 1])
                                assert c_IJ.get_shape() == [cfg.batch_size, 1152, 10, 1, 1]
                            
                                # line 5:
                                # weighting u_hat with c_IJ, element-wise in the last two dims
                                # => [batch_size, 1152, 10, 16, 1]
                                s_J = tf.multiply(c_IJ, u_hat)
                                # then sum in the second dim, resulting in [batch_size, 1, 10, 16, 1]
                                s_J = tf.reduce_sum(s_J, axis=1, keep_dims=True)
                                assert s_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]
                                
                                # line 6:
                                # squash using Eq.1,
                                v_J = squash(s_J)
                                assert v_J.get_shape() == [cfg.batch_size, 1, 10, 16, 1]
                                
                                # line 7:
                                # reshape & tile v_j from [batch_size ,1, 10, 16, 1] to [batch_size, 10, 1152, 16, 1]
                                # then matmul in the last tow dim: [16, 1].T x [16, 1] => [1, 1], reduce mean in the
                                # batch_size dim, resulting in [1, 1152, 10, 1, 1]
                                v_J_tiled = tf.tile(v_J, [1, 1152, 1, 1, 1])
                                u_produce_v = tf.matmul(u_hat, v_J_tiled, transpose_a=True)
                                assert u_produce_v.get_shape() == [cfg.batch_size, 1152, 10, 1, 1]
                                    b_IJ += tf.reduce_sum(u_produce_v, axis=0, keep_dims=True)
def w(input_data, cu, kappas_t_1, config):
	
	batch_size = config.batch_size
	mixture_size = config.mixture_size
	vocab_length = config.vocab_length

	# split along dim of mixture size * 3
	hat_alphas_t, hat_betas_t, hat_kappas_t = tf.split(1, 3, input_data)

	alphas_t = tf.exp(hat_alphas_t)
	betas_t = tf.exp(hat_betas_t)
	kappas_t = tf.add(kappas_t_1, tf.exp(hat_kappas_t))

	speech_length = tf.shape(cu)[1]

	u = tf.linspace(1.0, tf.cast(speech_length,tf.float32) , speech_length)
	u = tf.expand_dims(u, 0)
	u = tf.expand_dims(u, 0)
	u = tf.tile(u, [batch_size, mixture_size, 1])

	alphas_t_expanded = tf.tile(tf.expand_dims(alphas_t, -1), [1, 1, speech_length])
	betas_t_expanded = tf.tile(tf.expand_dims(betas_t, -1), [1, 1, speech_length])
	kappas_t_expanded = tf.tile(tf.expand_dims(kappas_t, -1), [1, 1, speech_length])

	calc = tf.square(tf.sub(kappas_t_expanded, u))
	calc = tf.mul(calc, tf.neg(betas_t_expanded))
	calc = tf.exp(calc)
	calc = tf.mul(calc, alphas_t_expanded)

	phi_t = tf.expand_dims(tf.reduce_sum(calc, 1), 1)

	output = tf.squeeze(tf.batch_matmul(phi_t, cu), [1])

	return output, kappas_t, phi_t
Beispiel #11
0
    def __init__(self, num_layers, num_units, batch_size, input_size, keep_prob=1.0):
        self.num_layers = num_layers
        self.grus = []
        self.inits = []
        self.dropout_mask = []
        for layer in range(num_layers):
            input_size_ = input_size if layer == 0 else 2 * num_units
            gru_fw = tf.nn.rnn_cell.MultiRNNCell([
                tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(num_units=num_units)])

            gru_bw = tf.nn.rnn_cell.MultiRNNCell([
                tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(num_units=num_units)])

            init_fw = tf.Variable(tf.zeros([num_units]))
            init_fw = tf.expand_dims(tf.tile(tf.expand_dims(init_fw, axis=0), [batch_size, 1]), axis=0)
            init_bw = tf.Variable(tf.zeros([num_units]))
            init_bw = tf.expand_dims(tf.tile(tf.expand_dims(init_bw, axis=0), [batch_size, 1]), axis=0)

            mask_fw = tf.nn.dropout(tf.ones([1, batch_size, input_size_], dtype=tf.float32),
                                    keep_prob=keep_prob)
            mask_bw = tf.nn.dropout(tf.ones([1, batch_size, input_size_], dtype=tf.float32),
                                    keep_prob=keep_prob)

            self.grus.append((gru_fw, gru_bw,))
            self.inits.append((init_fw, init_bw,))
            self.dropout_mask.append((mask_fw, mask_bw,))
Beispiel #12
0
 def while_step(t, rnn_state, tas, accs):
   """Implements one timestep of FIVO computation."""
   log_weights_acc, log_p_hat_acc, kl_acc = accs
   cur_inputs, cur_mask = nested.read_tas([inputs_ta, mask_ta], t)
   # Run the cell for one step.
   log_q_z, log_p_z, log_p_x_given_z, kl, new_state = cell(
       cur_inputs,
       rnn_state,
       cur_mask,
   )
   # Compute the incremental weight and use it to update the current
   # accumulated weight.
   kl_acc += kl * cur_mask
   log_alpha = (log_p_x_given_z + log_p_z - log_q_z) * cur_mask
   log_alpha = tf.reshape(log_alpha, [num_samples, batch_size])
   log_weights_acc += log_alpha
   # Calculate the effective sample size.
   ess_num = 2 * tf.reduce_logsumexp(log_weights_acc, axis=0)
   ess_denom = tf.reduce_logsumexp(2 * log_weights_acc, axis=0)
   log_ess = ess_num - ess_denom
   # Calculate the ancestor indices via resampling. Because we maintain the
   # log unnormalized weights, we pass the weights in as logits, allowing
   # the distribution object to apply a softmax and normalize them.
   resampling_dist = tf.contrib.distributions.Categorical(
       logits=tf.transpose(log_weights_acc, perm=[1, 0]))
   ancestor_inds = tf.stop_gradient(
       resampling_dist.sample(sample_shape=num_samples, seed=random_seed))
   # Because the batch is flattened and laid out as discussed
   # above, we must modify ancestor_inds to index the proper samples.
   # The particles in the ith filter are distributed every batch_size rows
   # in the batch, and offset i rows from the top. So, to correct the indices
   # we multiply by the batch_size and add the proper offset. Crucially,
   # when ancestor_inds is flattened the layout of the batch is maintained.
   offset = tf.expand_dims(tf.range(batch_size), 0)
   ancestor_inds = tf.reshape(ancestor_inds * batch_size + offset, [-1])
   noresample_inds = tf.range(num_samples * batch_size)
   # Decide whether or not we should resample; don't resample if we are past
   # the end of a sequence.
   should_resample = resampling_criterion(num_samples, log_ess, t)
   should_resample = tf.logical_and(should_resample,
                                    cur_mask[:batch_size] > 0.)
   float_should_resample = tf.to_float(should_resample)
   ancestor_inds = tf.where(
       tf.tile(should_resample, [num_samples]),
       ancestor_inds,
       noresample_inds)
   new_state = nested.gather_tensors(new_state, ancestor_inds)
   # Update the TensorArrays before we reset the weights so that we capture
   # the incremental weights and not zeros.
   ta_updates = [log_weights_acc, log_ess, float_should_resample]
   new_tas = [ta.write(t, x) for ta, x in zip(tas, ta_updates)]
   # For the particle filters that resampled, update log_p_hat and
   # reset weights to zero.
   log_p_hat_update = tf.reduce_logsumexp(
       log_weights_acc, axis=0) - tf.log(tf.to_float(num_samples))
   log_p_hat_acc += log_p_hat_update * float_should_resample
   log_weights_acc *= (1. - tf.tile(float_should_resample[tf.newaxis, :],
                                    [num_samples, 1]))
   new_accs = (log_weights_acc, log_p_hat_acc, kl_acc)
   return t + 1, new_state, new_tas, new_accs
Beispiel #13
0
def ae_latent_sample_beam(latents_dense_in, inputs, ed, embed, hparams):
  """Sample from the latent space in the autoencoder."""
  vocab_size = 2**hparams.z_size
  beam_size = 1  # TODO(lukaszkaiser): larger beam sizes seem to work bad.
  inputs = tf.tile(inputs, [beam_size, 1, 1])
  ed = tf.tile(ed, [beam_size, 1, 1, 1])

  def symbols_to_logits_fn(ids):
    """Go from ids to logits."""
    ids = tf.expand_dims(ids, axis=2)  # Ids start with added all-zeros.
    latents_discrete = tf.pad(ids[:, 1:], [[0, 0], [0, 1], [0, 0]])

    with tf.variable_scope(tf.get_variable_scope(), reuse=False):
      latents_dense = embed(latents_discrete)
      latents_pred = decode_transformer(
          inputs, ed, latents_dense, hparams, "extra")
      logits = tf.layers.dense(latents_pred, vocab_size, name="extra_logits")
      current_output_position = common_layers.shape_list(ids)[1] - 1
      logits = logits[:, current_output_position, :, :]
    return tf.squeeze(logits, axis=[1])

  initial_ids = tf.zeros([tf.shape(latents_dense_in)[0]], dtype=tf.int32)
  length = tf.shape(latents_dense_in)[1]
  ids, _ = beam_search.beam_search(
      symbols_to_logits_fn, initial_ids, beam_size, length,
      vocab_size, alpha=0.0, eos_id=-1, stop_early=False)

  res = tf.expand_dims(ids[:, 0, :], axis=2)  # Pick first beam.
  return res[:, 1:]  # Remove the added all-zeros from ids.
def tf_format_mnist_images(X, Y, Y_, n=100, lines=10):
    correct_prediction = tf.equal(tf.argmax(Y,1), tf.argmax(Y_,1))
    correctly_recognised_indices = tf.squeeze(tf.where(correct_prediction), [1])  # indices of correctly recognised images
    incorrectly_recognised_indices = tf.squeeze(tf.where(tf.logical_not(correct_prediction)), [1]) # indices of incorrectly recognised images
    everything_incorrect_first = tf.concat([incorrectly_recognised_indices, correctly_recognised_indices], 0) # images reordered with indeces of unrecognised images first
    everything_incorrect_first = tf.slice(everything_incorrect_first, [0], [n]) # compute first 100 only - no space to display more anyway
    # compute n=100 digits to display only
    Xs = tf.gather(X, everything_incorrect_first)
    Ys = tf.gather(Y, everything_incorrect_first)
    Ys_ = tf.gather(Y_, everything_incorrect_first)
    correct_prediction_s = tf.gather(correct_prediction, everything_incorrect_first)

    digits_left = tf.image.grayscale_to_rgb(tensorflowvisu_digits.digits_left())
    correct_tags = tf.gather(digits_left, tf.argmax(Ys_, 1)) # correct digits to be printed on the images
    digits_right = tf.image.grayscale_to_rgb(tensorflowvisu_digits.digits_right())
    computed_tags = tf.gather(digits_right, tf.argmax(Ys, 1)) # computed digits to be printed on the images
    #superimposed_digits = correct_tags+computed_tags
    superimposed_digits = tf.where(correct_prediction_s, tf.zeros_like(correct_tags),correct_tags+computed_tags) # only pring the correct and computed digits on unrecognised images
    correct_bkg   = tf.reshape(tf.tile([1.3,1.3,1.3], [28*28]), [1, 28,28,3]) # white background
    incorrect_bkg = tf.reshape(tf.tile([1.3,1.0,1.0], [28*28]), [1, 28,28,3]) # red background
    recognised_bkg = tf.gather(tf.concat([incorrect_bkg, correct_bkg], 0), tf.cast(correct_prediction_s, tf.int32)) # pick either the red or the white background depending on recognised status

    I = tf.image.grayscale_to_rgb(Xs)
    I = ((1-(I+superimposed_digits))*recognised_bkg)/1.3 # stencil extra data on top of images and reorder them unrecognised first
    I = tf.image.convert_image_dtype(I, tf.uint8, saturate=True)
    Islices = [] # 100 images => 10x10 image block
    for imslice in range(lines):
        Islices.append(tf.concat(tf.unstack(tf.slice(I, [imslice*n//lines,0,0,0], [n//lines,28,28,3])), 1))
    I = tf.concat(Islices, 0)
    return I
Beispiel #15
0
        def fztloss( f, pVecs, nVecs ):
            """
            Tensorized cost function from Fast Zero-Shot Learning paper

            Args:
                f: The output from the network, a tensor of shape (# images, word embedding size)
                pVecs: The vector embeddings of the ground truth tags, a tensor
                    of shape (# images, # positive tags, word embedding size)
                nVecs: The vector embeddings of negatively sampled tags, a tensor
                    of shape (# images, # negative samples, word embedding size)

            Returns:
                Scalar tensor representing the batch cost
            """
            posmul = tf.mul(pVecs, f)
            negmul = tf.mul(nVecs, f)

            tfpos = tf.reduce_sum(posmul, reduction_indices=2)
            tfneg = tf.reduce_sum(negmul, reduction_indices=2)

            tfpos = tf.transpose(tfpos, [1,0])
            tfneg = tf.transpose(tfneg, [1,0])

            negexpan = tf.tile( tf.expand_dims(tfneg, -1), [1, 1, tf.shape(tfpos)[1]] )
            posexpan = tf.tile( tf.transpose(tf.expand_dims(tfpos, -1), [0,2,1]), [1, tf.shape(tfneg)[1], 1])
            differences = tf.sub(negexpan, posexpan)  

            return tf.reduce_sum(tf.reduce_sum(tf.log(1 + tf.exp(differences)), reduction_indices=[1,2]))
Beispiel #16
0
 def compute_max_or_min(self, select, maxi=True):
   #computes the argmax and argmin of a column with probabilistic row selection
   answer = tf.zeros([
       self.batch_size, self.num_cols + self.num_word_cols, self.max_elements
   ], self.data_type)
   sum_prob = tf.zeros([self.batch_size, self.num_cols + self.num_word_cols],
                       self.data_type)
   for j in range(self.max_elements):
     if (maxi):
       curr_pos = j
     else:
       curr_pos = self.max_elements - 1 - j
     select_index = tf.slice(self.full_processed_sorted_index_column,
                             [0, 0, curr_pos], [self.batch_size, -1, 1])
     select_mask = tf.equal(
         tf.tile(
             tf.expand_dims(
                 tf.tile(
                     tf.expand_dims(tf.range(self.max_elements), 0),
                     [self.batch_size, 1]), 1),
             [1, self.num_cols + self.num_word_cols, 1]), select_index)
     curr_prob = tf.expand_dims(select, 1) * tf.cast(
         select_mask, self.data_type) * self.select_bad_number_mask
     curr_prob = curr_prob * tf.expand_dims((1 - sum_prob), 2)
     curr_prob = curr_prob * tf.expand_dims(
         tf.cast((1 - sum_prob) > 0.0, self.data_type), 2)
     answer = tf.where(select_mask, curr_prob, answer)
     sum_prob += tf.reduce_sum(curr_prob, 2)
   return answer
Beispiel #17
0
 def build_predict(self, Xnew, full_cov=False):
     """
     Compute the mean and variance of the latent function at some new points
     Xnew. For a derivation of the terms in here, see the associated SGPR
     notebook.
     """
     num_inducing = tf.shape(self.Z)[0]
     err = self.Y - self.mean_function(self.X)
     Kuf = self.kern.K(self.Z, self.X)
     Kuu = self.kern.K(self.Z) + eye(num_inducing) * 1e-6
     Kus = self.kern.K(self.Z, Xnew)
     sigma = tf.sqrt(self.likelihood.variance)
     L = tf.cholesky(Kuu)
     A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma
     B = tf.matmul(A, tf.transpose(A)) + eye(num_inducing)
     LB = tf.cholesky(B)
     Aerr = tf.matmul(A, err)
     c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma
     tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True)
     tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True)
     mean = tf.matmul(tf.transpose(tmp2), c)
     if full_cov:
         var = self.kern.K(Xnew) + tf.matmul(tf.transpose(tmp2), tmp2)\
             - tf.matmul(tf.transpose(tmp1), tmp1)
         shape = tf.pack([1, 1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 2), shape)
     else:
         var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0)\
             - tf.reduce_sum(tf.square(tmp1), 0)
         shape = tf.pack([1, tf.shape(self.Y)[1]])
         var = tf.tile(tf.expand_dims(var, 1), shape)
     return mean + self.mean_function(Xnew), var
def get_locs_cost(locs, mean, reward, base):
    mean_d = tf.tile(tf.expand_dims(mean, 1), [1, n_sample, 1])
    tiled_base = tf.cast(tf.tile(tf.expand_dims(tf.expand_dims(base, 0), 0), [batch_size, n_sample]), tf.float32)
    print "TILED BASE SHAPE: ", tiled_base.get_shape(), tiled_base.dtype
    red_reward = reward - tiled_base
    reward_cost_arr = tf.mul(tf.reduce_sum(0.5 * tf.square((locs-mean_d)/stddev), 2), red_reward)
    return tf.div(tf.reduce_sum(reward_cost_arr), (n_sample * batch_size))
Beispiel #19
0
 def compute_ans(op_embedding, comparison):
   op_embedding = tf.expand_dims(op_embedding, 0)
   #dot product of operation embedding with hidden state to the left of the number occurrence
   first = tf.transpose(
       tf.matmul(op_embedding,
                 tf.transpose(
                     tf.reduce_sum(hidden_vectors * tf.tile(
                         tf.expand_dims(
                             tf.transpose(self.batch_ordinal_question), 2),
                         [1, 1, self.utility.FLAGS.embedding_dims]), 0))))
   second = self.batch_question_number_one_mask + tf.transpose(
       tf.matmul(op_embedding,
                 tf.transpose(
                     tf.reduce_sum(hidden_vectors * tf.tile(
                         tf.expand_dims(
                             tf.transpose(self.batch_ordinal_question_one), 2
                         ), [1, 1, self.utility.FLAGS.embedding_dims]), 0))))
   question_number_softmax = tf.nn.softmax(tf.concat(axis=1, values=[first, second]))
   if (self.mode == "test"):
     cond = tf.equal(question_number_softmax,
                     tf.reshape(
                         tf.reduce_max(question_number_softmax, 1),
                         [self.batch_size, 1]))
     question_number_softmax = tf.where(
         cond,
         tf.fill(tf.shape(question_number_softmax), 1.0),
         tf.fill(tf.shape(question_number_softmax), 0.0))
     question_number_softmax = tf.cast(question_number_softmax,
                                       self.data_type)
   ans = tf.reshape(
       tf.reduce_sum(question_number_softmax * tf.concat(
           axis=1, values=[self.batch_question_number, self.batch_question_number_one]),
                     1), [self.batch_size, 1])
   return ans
  def _meshgrid(depth, height, width, z_near, z_far):
    with tf.variable_scope('_meshgrid'):
      x_t = tf.reshape(
          tf.tile(tf.linspace(-1.0, 1.0, width), [height * depth]),
          [depth, height, width])
      y_t = tf.reshape(
          tf.tile(tf.linspace(-1.0, 1.0, height), [width * depth]),
          [depth, width, height])
      y_t = tf.transpose(y_t, [0, 2, 1])
      sample_grid = tf.tile(
          tf.linspace(float(z_near), float(z_far), depth), [width * height])
      z_t = tf.reshape(sample_grid, [height, width, depth])
      z_t = tf.transpose(z_t, [2, 0, 1])

      z_t = 1 / z_t
      d_t = 1 / z_t
      x_t /= z_t
      y_t /= z_t

      x_t_flat = tf.reshape(x_t, (1, -1))
      y_t_flat = tf.reshape(y_t, (1, -1))
      d_t_flat = tf.reshape(d_t, (1, -1))

      ones = tf.ones_like(x_t_flat)
      grid = tf.concat([d_t_flat, y_t_flat, x_t_flat, ones], 0)
      return grid
Beispiel #21
0
    def __init__(self, config):

        self.inputs = [ev.placeholder(config) for ev in config.evidence]
        exists = [ev.exists(i) for ev, i in zip(config.evidence, self.inputs)]
        zeros = tf.zeros([config.batch_size, config.latent_size], dtype=tf.float32)

        # Compute the denominator used for mean and covariance
        for ev in config.evidence:
            ev.init_sigma(config)
        d = [tf.where(exist, tf.tile([1. / tf.square(ev.sigma)], [config.batch_size]),
                      tf.zeros(config.batch_size)) for ev, exist in zip(config.evidence, exists)]
        d = 1. + tf.reduce_sum(tf.stack(d), axis=0)
        denom = tf.tile(tf.reshape(d, [-1, 1]), [1, config.latent_size])

        # Compute the mean of Psi
        with tf.variable_scope('mean'):
            # 1. compute encoding
            self.encodings = [ev.encode(i, config) for ev, i in zip(config.evidence, self.inputs)]
            encodings = [encoding / tf.square(ev.sigma) for ev, encoding in
                         zip(config.evidence, self.encodings)]

            # 2. pick only encodings from valid inputs that exist, otherwise pick zero encoding
            encodings = [tf.where(exist, enc, zeros) for exist, enc in zip(exists, encodings)]

            # 3. tile the encodings according to each evidence type
            encodings = [[enc] * ev.tile for ev, enc in zip(config.evidence, encodings)]
            encodings = tf.stack(list(chain.from_iterable(encodings)))

            # 4. compute the mean of non-zero encodings
            self.psi_mean = tf.reduce_sum(encodings, axis=0) / denom

        # Compute the covariance of Psi
        with tf.variable_scope('covariance'):
            I = tf.ones([config.batch_size, config.latent_size], dtype=tf.float32)
            self.psi_covariance = I / denom
Beispiel #22
0
 def q_zt(self, unused_observation, prev_state, t):
   batch_size = tf.shape(prev_state)[0]
   q_mu = tf.tile(self.mus[t][tf.newaxis, :], [batch_size, 1])
   q_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min)
   q_sigma = tf.tile(q_sigma[tf.newaxis, :], [batch_size, 1])
   q_zt = tf.contrib.distributions.Normal(loc=q_mu, scale=tf.sqrt(q_sigma))
   return q_zt
 def build_loss(self, logits, labels, lambs):
     # put a sigfunction on logits and then transpose
     logits = tf.transpose(framwork.sig_func(logits))
     # according to the labels, erase rows which is not in labels
     labels_unique = tf.constant(range(self.image_classes), dtype=tf.int32)
     labels_num = self.image_classes
     logits = tf.gather(logits, indices=labels_unique)
     lambs = tf.gather(lambs, indices=labels_unique)
     # set the value of each row to True when it occurs in labels
     template = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, self.batch_size])
     labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1])
     indict_logic = tf.equal(labels_expand, template)
     # split the tensor along rows
     logit_list = tf.split(0, labels_num, logits)
     indict_logic_list = tf.split(0, labels_num, indict_logic)
     lambda_list = tf.split(0, self.image_classes, lambs)
     # loss_list = list()
     # for i in range(self.image_classes):
     #     loss_list.append(framwork.loss_func(logit_list[i], indict_logic_list[i], lambda_list[i]))
     loss_list = map(framwork.loss_func, logit_list, indict_logic_list, lambda_list)
     loss = tf.add_n(loss_list)
     tensors_dict = {'labels_unique': labels_unique, 'template': template, 'logits_sig_trans': logits,
                     'loss': loss, 'indict_logic': indict_logic}
     self.tensors_names.extend(tensors_dict.keys())
     self.net_tensors.update(tensors_dict)
Beispiel #24
0
    def build_predict(self, Xnew, full_cov=False):
        """
        Compute the mean and variance of the latent function at some new points
        Xnew.
        """
        _, _, Luu, L, _, _, gamma = self.build_common_terms()
        Kus = self.kern.K(self.Z, Xnew)  # size  M x Xnew

        w = tf.matrix_triangular_solve(Luu, Kus, lower=True)  # size M x Xnew

        tmp = tf.matrix_triangular_solve(tf.transpose(L), gamma, lower=False)
        mean = tf.matmul(tf.transpose(w), tmp) + self.mean_function(Xnew)
        intermediateA = tf.matrix_triangular_solve(L, w, lower=True)

        if full_cov:
            var = (
                self.kern.K(Xnew)
                - tf.matmul(tf.transpose(w), w)
                + tf.matmul(tf.transpose(intermediateA), intermediateA)
            )
            var = tf.tile(tf.expand_dims(var, 2), tf.pack([1, 1, tf.shape(self.Y)[1]]))
        else:
            var = (
                self.kern.Kdiag(Xnew) - tf.reduce_sum(tf.square(w), 0) + tf.reduce_sum(tf.square(intermediateA), 0)
            )  # size Xnew,
            var = tf.tile(tf.expand_dims(var, 1), tf.pack([1, tf.shape(self.Y)[1]]))

        return mean, var
    def build_network(self):
        net_tensors = self.net_tensors
        with self.net_graph.as_default(), tf.device(self.net_device):
            logits = tf.placeholder(dtype=tf.float32, shape=(self.batch_size, self.image_classes))
            labels = tf.placeholder(dtype=tf.int32, shape=(self.batch_size,))
            lambs = tf.placeholder(dtype=tf.float32, shape=(self.image_classes,))
            # put a sigfunction on logits and then transpose
            logits = tf.transpose(framwork.sig_func(logits))
            # according to the labels, erase rows which is not in labels

            labels_unique = tf.constant(range(self.image_classes), dtype=tf.int32)
            labels_num = self.image_classes
            logits = tf.gather(logits, indices=labels_unique)
            lambs = tf.gather(lambs, indices=labels_unique)
            # set the value of each row to True when it occurs in labels
            templete = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, self.batch_size])
            labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1])
            indict_logic = tf.equal(labels_expand, templete)
            # split the tensor along rows
            logit_list = tf.split(0, labels_num, logits)
            indict_logic_list = tf.split(0, labels_num, indict_logic)
            lamb_list = tf.split(0, self.image_classes, lambs)
            logit_list = [tf.squeeze(item) for item in logit_list]
            indict_logic_list = [tf.squeeze(item) for item in indict_logic_list]
            left_right_tuples = list()
            for i in range(self.image_classes):
                left_right_tuples.append(framwork.lamb_func(logit_list[i], indict_logic_list[i], lamb=lamb_list[i]))
            # func = framwork.lamb_func()
            # left_right_tuples = map(func, logit_list, indict_logic_list, lamb_list)
            net_tensors.update({'left_right_tuples': left_right_tuples, 'logits': logits, 'labels': labels,
                                'lambs': lambs})
Beispiel #26
0
 def compute_attention(self, image, text): 
     with tf.variable_scope("attention") as scope:
         if self.reuse:
             scope.reuse_variables()
         text_replicated = self._replicate_features(text, (1, 14, 14, 1), 
                                                    project=self.project)
         
         # Now both the features from the resnet and lstm are concatenated along the depth axis
         features = tf.nn.dropout(tf.concat([image, text_replicated], axis=3), 
                                  keep_prob=self.dropout_prob)
         conv1 = tf.nn.dropout(self.conv2d_layer(features, filters=512, 
                                            kernel_size=(1,1), 
                                            name="attention_conv1"),
                               keep_prob=self.dropout_prob)
         conv2 = self.conv2d_layer(conv1, filters=2, kernel_size=(1,1), name="attention_conv2")
         
         # Flatenning each attention map to perform softmax
         attention_map = tf.reshape(conv2, (self.batch_size, 14*14, 2))
         attention_map = tf.nn.softmax(attention_map, axis=1, name = "attention_map")
         image = tf.reshape(image, (self.batch_size, 196, 2048, 1))
         attention = tf.tile(tf.expand_dims(attention_map, 2), (1, 1, 2048, 1))
         image = tf.tile(image,(1,1,1,2))
         weighted = image * attention
         weighted_average = tf.reduce_mean(weighted, 1)
         
         # Flatten both glimpses into a single vector
         weighted_average = tf.reshape(weighted_average, (self.batch_size, 2048*2))
         attention_output = tf.nn.dropout(tf.concat([weighted_average, text], 1), self.dropout_prob)
     return attention_output
Beispiel #27
0
 def _tf_sample_generator(self):
     archit = self.network_architecture
     depth = len(archit) - 1
     self.samp_prob1_tfhl_list = [tf.tile(self.transfer_fun(self.bias_list[depth]), [1, self.batch_size])] # top layer is just the bias
     self.sample_tfhl_list = [sampleInt(self.samp_prob1_tfhl_list[0])]
     self.samp_w_tfhl_list = [tf.ones([1, self.batch_size])]
     sample_handle = [self.samp_var_list[depth].assign(self.sample_tfhl_list[0]),\
         self.samp_w_var_list[depth].assign(self.samp_w_tfhl_list[0]),\
         self.samp_prob1_var_list[depth].assign(self.samp_prob1_tfhl_list[0])]
     # sample from top to the bottom
     for i in range(depth-1, -1, -1): # not include top one
         n = archit[i]
         m = archit[i+1]
         spb = self.transfer_fun(tf.matmul(self.weights_list[i], self.sample_tfhl_list[0]) +\
               tf.tile(self.bias_list[i], [1, self.batch_size]))
         # we need to save the prob of sample
         sp = sampleInt(spb)
         spb_assign_handle = self.samp_prob1_var_list[i].assign(spb)
         sp_assign_handle = self.samp_var_list[i].assign(sp)
         #compute_importance_weight(Hi+1, Hi, H_wi+1, W, b)
         spw = compute_importance_weight(self.sample_tfhl_list[0],
                                         sp,
                                         self.samp_w_tfhl_list[0],
                                         self.weights_list[i],
                                         self.bias_list[i],
                                         self.batch_size)
         spw_assign_handle = self.samp_w_var_list[i].assign(spw)
         sample_handle.extend([sp_assign_handle, spw_assign_handle, spb_assign_handle])
         self.samp_prob1_tfhl_list.insert(0, spb)
         self.sample_tfhl_list.insert(0, sp)
         self.samp_w_tfhl_list.insert(0, spw)
     return sample_handle
Beispiel #28
0
    def call(self, inputs):
        # print("in call")
# TODO: check input dtype

        # Tile kb_inputs
        kb_inputs = self.kb_inputs
        for i in range(inputs.shape.ndims - 1):
            kb_inputs = tf.expand_dims(kb_inputs, 0)
        kb_inputs = tf.tile(kb_inputs, tf.concat((tf.shape(inputs)[:-1], [1, 1]), 0))

        # Expand kb_mask
        kb_mask = self.kb_mask
        for i in range(inputs.shape.ndims - 2):
            kb_mask = tf.expand_dims(kb_mask, 1)
        kb_mask = tf.expand_dims(kb_mask, -1)

        # Tile inputs
        kb_size = tf.shape(self.kb_inputs)[0]
        tiling = tf.concat(([1] * (inputs.shape.ndims - 1), [kb_size], [1]), 0)
        cell_inputs = tf.tile(tf.expand_dims(inputs, -2), tiling)

        outputs = tf.concat([kb_inputs, cell_inputs], -1)
        outputs = tf.multiply(outputs, kb_mask)
        for layer in self.layers:
            outputs = layer.call(outputs)
        # outputs = tf.Print(outputs, [outputs], "KB attention pre-last layer output =")
        outputs = tf.squeeze(outputs, [-1])
        # print("inputs shape =", inputs.shape)
        # print("outputs shape =", outputs.shape)
        outputs = tf.concat([self.output_layer(inputs), outputs], -1)
        # print("out of call")
        return outputs
Beispiel #29
0
def loss(logits, labels, lambs):
    # put a sigfunction on logits and then transpose
    logits = tf.transpose(framwork.sig_func(logits))
    # according to the labels, erase rows which is not in labels
    labels_unique = tf.constant(range(NUM_CLASSES), dtype=tf.int32)
    labels_num = NUM_CLASSES
    # logits = tf.gather(logits, indices=labels_unique)
    # lambs = tf.gather(lambs, indices=labels_unique)
    # set the value of each row to True when it occurs in labels
    template = tf.tile(tf.expand_dims(labels_unique, dim=1), [1, BATCH_SIZE])
    labels_expand = tf.tile(tf.expand_dims(labels, dim=0), [labels_num, 1])
    indict_logic = tf.equal(labels_expand, template)
    # split the tensor along rows
    logit_list = tf.split(0, labels_num, logits)
    indict_logic_list = tf.split(0, labels_num, indict_logic)
    lambda_list = tf.split(0, NUM_CLASSES, lambs)
    # loss_list = list()
    # for i in range(self.image_classes):
    #     loss_list.append(framwork.loss_func(logit_list[i], indict_logic_list[i], lambda_list[i]))
    loss_list = map(framwork.loss_func, logit_list, indict_logic_list, lambda_list)
    losses = tf.add_n(loss_list)
    tf.add_to_collection('losses', losses)
    # The total loss is defined as the cross entropy loss plus all of the weight
    # decay terms (L2 loss).
    return tf.add_n(tf.get_collection('losses'), name='total_loss')
Beispiel #30
0
  def r_xn(self, z_t, t):
    """Computes a distribution over the future observations given current latent
    state.

    The indexing in these messages is 1 indexed and inclusive. This is
    consistent with the latex documents.

    Args:
      z_t: [batch_size, state_size] Tensor
      t: Current timestep
    """
    tf.logging.info(
        "r(x_{start}:{end} | z_{t}) ~ N(z_{t}, sigma_{t})".format(
            **{"t": t,
               "start": (self.first_future_obs_index(t)+1)*self.steps_per_obs,
               "end": self.num_timesteps-1}))
    batch_size = tf.shape(z_t)[0]
    # the mean for all future observations is the same.
    # this tiling results in a [batch_size, num_future_obs, state_size] Tensor
    r_mu = tf.tile(z_t[:,tf.newaxis,:], [1, self.num_future_obs(t), 1])
    # compute the variance
    r_sigma = tf.maximum(tf.nn.softplus(self.sigmas[t]), self.sigma_min)
    # the variance is the same across all state dimensions, so we only have to
    # time sigma to be [batch_size, num_future_obs].
    r_sigma = tf.tile(r_sigma[tf.newaxis,:, tf.newaxis], [batch_size, 1, self.state_size])
    return tf.contrib.distributions.Normal(
        loc=r_mu, scale=tf.sqrt(r_sigma))
Beispiel #31
0
    # 1D features
    f1d_seq = msa1hot[0, :, :20]
    f1d_pssm = msa2pssm(msa1hot, w)

    f1d = tf.concat(values=[f1d_seq, f1d_pssm], axis=1)
    f1d = tf.expand_dims(f1d, axis=0)
    f1d = tf.reshape(f1d, [1, ncol, 42])

    # 2D features
    f2d_dca = tf.cond(nrow > 1, lambda: fast_dca(msa1hot, w),
                      lambda: tf.zeros([ncol, ncol, 442], tf.float32))
    f2d_dca = tf.expand_dims(f2d_dca, axis=0)

    f2d = tf.concat([
        tf.tile(f1d[:, :, None, :], [1, 1, ncol, 1]),
        tf.tile(f1d[:, None, :, :], [1, ncol, 1, 1]), f2d_dca
    ],
                    axis=-1)
    f2d = tf.reshape(f2d, [1, ncol, ncol, 442 + 2 * 42])

    #
    # 2D network
    #
    layers2d = [f2d]
    layers2d.append(conv2d(layers2d[-1], n2d_filters, 1, padding='SAME'))
    layers2d.append(tf.contrib.layers.instance_norm(layers2d[-1]))
    layers2d.append(activation(layers2d[-1]))

    # stack of residual blocks with dilations
    dilation = 1
    def decoding_layer(self):
        '''
            构造Decoder层
        
            参数:
        - target_letter_to_int: target数据的映射表
        - decoding_embedding_size: embed向量大小
        - num_layers: 堆叠的RNN单元数量
        - rnn_size: RNN单元的隐层结点数量
        - target_sequence_length: target数据序列长度
        - max_target_sequence_length: target数据序列最大长度
        - encoder_state: encoder端编码的状态向量
        - decoder_input: decoder端输入
        '''
        # 1. Embedding
        decoder_embeddings = tf.Variable(tf.random_uniform([len(self.data.word_letter_to_int), self.args.decoding_embedding_size]))
        decoder_embed_input = tf.nn.embedding_lookup(decoder_embeddings, self.decoder_input)
              
        # 2. 构造Decoder中的RNN单元
        def get_decoder_cell(rnn_size):
            decoder_cell = tf.contrib.rnn.LSTMCell(rnn_size,
                                               initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=2))
            single_cell=tf.contrib.rnn.DropoutWrapper(decoder_cell,output_keep_prob=self.drop_out)
            return single_cell
        
        cell = tf.contrib.rnn.MultiRNNCell([get_decoder_cell(self.args.rnn_size) for _ in range(self.args.num_layers)])
         
        # 3. Output全连接层
        output_layer = Dense(len(self.data.word_letter_to_int),
                             kernel_initializer = tf.truncated_normal_initializer(mean = 0.0, stddev=0.1))

    
        # 4. Training decoder
        with tf.variable_scope("decode"):
            # 得到help对象
            training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_embed_input,
                                                                sequence_length=self.target_sequence_length,
                                                                time_major=False)
            # 构造decoder
            training_decoder = tf.contrib.seq2seq.BasicDecoder(cell,
                                                               training_helper,
                                                               self.encoder_state,
                                                               output_layer) 
            #tf.contrib.seq2seq.dynamic_decode执行decode,最终返回:(final_outputs, final_state, final_sequence_lengths)
            self.training_decoder_output, _,_ = tf.contrib.seq2seq.dynamic_decode(training_decoder,
                                                                                  maximum_iterations=self.max_target_sequence_length)

            #tf.identity是返回了一个一模一样新的tensor
            self.training_logits = tf.identity(self.training_decoder_output.rnn_output, 'logits')
        # 5. Predicting decoder
        # Replicate encoder infos beam_width times
        if (self.args.mode=='test'):
            with tf.variable_scope("predict"):
                decoder_initial_state = tf.contrib.seq2seq.tile_batch(self.encoder_state, multiplier=self.args.beam_size)
                start_tokens = tf.tile(tf.constant([self.data.word_letter_to_int['<GO>']], dtype=tf.int32), [self.args.batch_size], 
                                           name='start_tokens')
                # Define a beam-search decoder
                decoder = tf.contrib.seq2seq.BeamSearchDecoder(
                        cell=cell,
                        embedding=decoder_embeddings,
                        start_tokens=start_tokens,
                        end_token=self.data.word_letter_to_int['<EOS>'],
                        initial_state=decoder_initial_state,
                        beam_width=self.args.beam_size,
                        output_layer=output_layer,
                        length_penalty_weight=0.0) 
        
                # Dynamic decoding
                self.predict_decoder_outputs,_,_ = tf.contrib.seq2seq.dynamic_decode(decoder,
                                                                                     maximum_iterations=self.max_target_sequence_length)
                                                                                                
                self.predicts = tf.identity(tf.transpose(self.predict_decoder_outputs.predicted_ids, perm=[0, 2, 1]),'predictions')            
Beispiel #33
0
    def __init__(self, session, learning_rate, data_size, static_data_size,
                 lstm_size):
        self.sess = session
        self.data_size = data_size
        self.static_data_size = static_data_size

        self.gpu_inputs = tf.placeholder(tf.float32, [None, None, data_size])
        self.gpu_labels = tf.placeholder(tf.float32, [None])

        if static_data_size > 0:
            self.gpu_static = tf.placeholder(tf.float32,
                                             [None, static_data_size])
            with tf.variable_scope("lstm"):
                total_time = tf.shape(self.gpu_inputs)[1]
                lstm = tf.contrib.rnn.LSTMCell(lstm_size,
                                               num_proj=1,
                                               forget_bias=1.0)
                self.W = tf.Variable(
                    (np.random.rand(data_size + static_data_size, lstm_size) -
                     0.5) * 0.01,
                    dtype=tf.float32)
                self.b = tf.Variable(np.zeros((lstm_size)), dtype=tf.float32)
                self.stacked_lstm = tf.contrib.rnn.MultiRNNCell([lstm] * 1)
                tiled_static = tf.tile(
                    tf.reshape(self.gpu_static, [-1, 1, static_data_size]),
                    [1, total_time, 1])
                preLSTM = tf.tanh(
                    linear_layer(
                        tf.concat([self.gpu_inputs, tiled_static],
                                  axis=2), self.W, self.b,
                        data_size + static_data_size, lstm_size))
                output, state = tf.nn.dynamic_rnn(self.stacked_lstm,
                                                  preLSTM,
                                                  dtype=tf.float32,
                                                  time_major=False,
                                                  parallel_iterations=1,
                                                  swap_memory=True)
        else:
            with tf.variable_scope("lstm"):
                total_time = tf.shape(self.gpu_inputs)[1]
                lstm = tf.contrib.rnn.LSTMCell(lstm_size,
                                               num_proj=1,
                                               forget_bias=1.0)
                self.W = tf.Variable(
                    (np.random.rand(data_size, lstm_size) - 0.5) * 0.01,
                    dtype=tf.float32)
                self.b = tf.Variable(np.zeros((lstm_size)), dtype=tf.float32)
                self.stacked_lstm = tf.contrib.rnn.MultiRNNCell([lstm] * 1)
                preLSTM = tf.tanh(
                    linear_layer(self.gpu_inputs, self.W, self.b, data_size,
                                 lstm_size))
                output, state = tf.nn.dynamic_rnn(self.stacked_lstm,
                                                  preLSTM,
                                                  dtype=tf.float32,
                                                  time_major=False,
                                                  parallel_iterations=1,
                                                  swap_memory=True)

        lstm_scope = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                       scope="lstm")

        self.y = tf.sigmoid(
            tf.reshape(tf.slice(output, [0, total_time - 1, 0], [-1, 1, -1]),
                       [-1]))
        self.overall_cost = tf.reduce_sum(
            -tf.multiply(self.gpu_labels, tf.log(self.y)) -
            tf.multiply(1 - self.gpu_labels, tf.log(1 - self.y)))

        self.training_op = tf.train.AdamOptimizer(learning_rate).minimize(
            self.overall_cost, var_list=lstm_scope)
        self.saver = tf.train.Saver(var_list=lstm_scope,
                                    keep_checkpoint_every_n_hours=1)
def space_tiling(x):  # expand from [None, 64] to [None, 4, 4, 64]
    x = tf.expand_dims(tf.expand_dims(x, 1), 1)
    return tf.tile(x, [1, 4, 4, 1])
Beispiel #35
0
def dmnrun(fulldata, queask):
    # Loading saved meta graph
    sess = tf.Session()
    saver = tf.train.import_meta_graph("C:/Users/Mark/PycharmProjects/DMNTrain/weights/model.meta")
    saver.restore(sess, tf.train.latest_checkpoint('C:/Users/Mark/PycharmProjects/DMNTrain/weights'))
    tf.reset_default_graph()

    def wideArray(x, weight):
        wide = np.zeros([len(x), weight])
        for i in range(0, len(x)):
            for j in range(0, len(x[i])):
                wide[i][j] = x[i][j]
        return wide

    def octalConv(x):
        ans = []
        rows = []
        words = []
        for line in x.split(' '):
            for word in line:
                number = ord(word)
                convNum = oct(number)
                convNum = int(convNum[2:])

            rows.append(ans)
            ans = []
            words.append(line)
        ans = wideArray(rows, 50)
        return ans, words

    def contextualize(data, quest):
        """
        Read in the input and question and build a context sets.
        Output is a list of data points, each of which is a 7-element tuple containing:
            The sentences in the context in vectorized form.
            The sentences in the context as a list of string tokens.
            The question in vectorized form.
            The question as a list of string tokens.
            The answer in vectorized form.
            The answer as a list of string tokens.
            A list of numbers for supporting statements, which is currently unused.
        """
        output = []
        context = []
        for entry in data:
            # Turn input into a word vector
            # TODO: Change to Octal Decimal encoding
            context.append(octalConv(entry[:-1]))
        # Wrap up object so DMN can use it
        comp_context = tuple(zip(*context))
        output.append(comp_context +
                      octalConv(quest) +
                      octalConv('Nothing') +
                      (0,))
        return output

    test_data = contextualize(fulldata, queask)

    final_train_data = []

    def finalize(data):
        """
        Prepares data generated by contextualize() for use in the network.
        """
        final_data = []
        for cqas in data:
            contextvs, contextws, qvs, qws, avs, aws, spt = cqas

            lspt = [spt]

            lengths = itertools.accumulate(len(cvec) for cvec in contextvs)
            context_vec = np.concatenate(contextvs)
            context_words = sum(contextws, [])

            # Location markers for the beginnings of new sentences.
            sentence_ends = np.array(list(lengths))
            final_data.append((context_vec, sentence_ends, qvs, lspt, context_words, cqas, avs, aws))
        return np.array(final_data)

    final_test_data = finalize(test_data)

    tf.reset_default_graph()

    # Hyperparameters

    # The number of dimensions used to store data passed between recurrent layers in the network.
    recurrent_cell_size = 128

    # The number of dimensions in our word vectorizations.
    D = 50

    # How quickly the network learns. Too high, and we may run into numeric instability
    # or other issues.
    learning_rate = 0.005

    # Dropout probabilities. For a description of dropout and what these probabilities are,
    # see Entailment with TensorFlow.
    input_p, output_p = 0.5, 0.5

    # How many questions we train on at a time.
    batch_size = 128

    # Number of passes in episodic memory. We'll get to this later.
    passes = 4

    # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers.
    ff_hidden_size = 256

    weight_decay = 0.00000001
    # The strength of our regularization. Increase to encourage sparsity in episodic memory,
    # but makes training slower. Don't make this larger than leraning_rate.

    training_iterations_count = 400000
    # How many questions the network trains on each time it is trained.
    # Some questions are counted multiple times.

    display_step = 1
    # How many iterations of training occur before each validation check.

    # Input Module

    # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor
    # that contains all the context information.
    context = tf.placeholder(tf.float32, [None, None, D], "context")
    context_placeholder = context  # I use context as a variable name later on

    # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that
    # contains the locations of the ends of sentences.
    input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence")

    # recurrent_cell_size: the number of hidden units in recurrent layers.
    input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

    # input_p: The probability of maintaining a specific hidden input unit.
    # Likewise, output_p is the probability of maintaining a specific hidden output unit.
    gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p)

    # dynamic_rnn also returns the final internal state. We don't need that, and can
    # ignore the corresponding output (_).
    input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, context, dtype=tf.float32, scope="input_module")

    # cs: the facts gathered from the context.
    cs = tf.gather_nd(input_module_outputs, input_sentence_endings)
    # to use every word as a fact, useful for tasks with one-sentence contexts
    s = input_module_outputs

    # Question Module

    # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor
    #  that contains all of the questions.

    query = tf.placeholder(tf.float32, [None, None, D], "query")

    # input_query_lengths: A [batch_size, 2] tensor that contains question length information.
    # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range()
    # so that it plays nice with gather_nd.
    input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths")

    question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32,
                                                   scope=tf.VariableScope(True, "input_module"))

    # q: the question states. A [batch_size, recurrent_cell_size] tensor.
    q = tf.gather_nd(question_module_outputs, input_query_lengths)

    # Episodic Memory

    # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension
    size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)])
    re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size)

    # Final output for attention, needs to be 1 in order to create a mask
    output_size = 1

    # Weights and biases
    attend_init = tf.random_normal_initializer(stddev=0.1)
    w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size],
                          tf.float32, initializer=attend_init)
    w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size],
                          tf.float32, initializer=attend_init)

    b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size],
                          tf.float32, initializer=attend_init)
    b_2 = tf.get_variable("attend_b2", [1, output_size],
                          tf.float32, initializer=attend_init)

    # Regulate all the weights and biases
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2))
    tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2))

    def attention(c, mem, existing_facts):
        """
        Custom attention mechanism.
        c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor
            that contains all the facts from the contexts.
        mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that
            contains the current memory. It should be the same memory for all facts for accurate results.
        existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that
            acts as a binary mask for which facts exist and which do not.

        """
        with tf.variable_scope("attending") as scope:
            # attending: The metrics by which we decide what to attend to.
            attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2)

            # m1: First layer of multiplied weights for the feed-forward network.
            #     We tile the weights in order to manually broadcast, since tf.matmul does not
            #     automatically broadcast batch matrix multiplication as of TensorFlow 1.2.
            m1 = tf.matmul(attending * existing_facts,
                           tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts
            # bias_1: A masked version of the first feed-forward layer's bias
            #     over only existing facts.

            bias_1 = b_1 * existing_facts

            # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity;
            #        choosing relu was a design choice intended to avoid issues with
            #        low gradient magnitude when the tanh returned values close to 1 or -1.
            tnhan = tf.nn.relu(m1 + bias_1)

            # m2: Second layer of multiplied weights for the feed-forward network.
            #     Still tiling weights for the same reason described in m1's comments.
            m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1])))

            # bias_2: A masked version of the second feed-forward layer's bias.
            bias_2 = b_2 * existing_facts

            # norm_m2: A normalized version of the second layer of weights, which is used
            #     to help make sure the softmax nonlinearity doesn't saturate.
            norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1)

            # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor.
            #     We make norm_m2 a sparse tensor, then make it dense again after the operation.
            softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1]
            softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx)
            softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1]
            softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape)
            return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1)

    # facts_0s: a [batch_size, max_facts_length, 1] tensor
    #     whose values are 1 if the corresponding fact exists and 0 if not.
    facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keepdims=True), tf.float32)

    with tf.variable_scope("Episodes") as scope:
        attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size)

        # memory: A list of all tensors that are the (current or past) memory state
        #   of the attention mechanism.
        memory = [q]

        # attends: A list of all tensors that represent what the network attends to.
        attends = []
        for a in range(passes):
            # attention mask
            attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size),
                                  facts_0s)

            # Inverse attention mask, for what's retained in the state.
            retain = 1 - attend_to

            # GRU pass over the facts, according to the attention mask.
            while_valid_index = (lambda state, index: index < tf.shape(cs)[1])
            update_state = (lambda state, index: (attend_to[:, index, :] *
                                                  attention_gru(cs[:, index, :], state)[0] +
                                                  retain[:, index, :] * state))
            # start loop with most recent memory and at the first index
            memory.append(tuple(tf.while_loop(while_valid_index,
                                              (lambda state, index: (update_state(state, index), index + 1)),
                                              loop_vars=[memory[-1], 0]))[0])

            attends.append(attend_to)

            # Reuse variables so the GRU pass uses the same variables every pass.
            scope.reuse_variables()

    # Answer Module

    # a0: Final memory state. (Input to answer module)
    a0 = tf.concat([memory[-1], q], -1)

    # fc_init: Initializer for the final fully connected layer's weights.
    fc_init = tf.random_normal_initializer(stddev=0.1)

    with tf.variable_scope("answer"):
        # w_answer: The final fully connected layer's weights.
        w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D],
                                   tf.float32, initializer=fc_init)
        # Regulate the fully connected layer's weights
        tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES,
                             tf.nn.l2_loss(w_answer))

        # The regressed word. This isn't an actual word yet;
        #    we still have to find the closest match.
        logit = tf.expand_dims(tf.matmul(a0, w_answer), 1)

        # Make a mask over which words exist.
        with tf.variable_scope("ending"):
            all_ends = tf.reshape(input_sentence_endings, [-1, 2])
            range_ends = tf.range(tf.shape(all_ends)[0])
            ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1)
            ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1],
                                              [tf.shape(q)[0], tf.shape(all_ends)[0]]),
                                axis=-1)
            range_ind = tf.range(tf.shape(ind)[0])
            mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1),
                                              tf.ones_like(range_ind), [tf.reduce_max(ind) + 1,
                                                                        tf.shape(ind)[0]]), bool)
            # A bit of a trick. With the locations of the ends of the mask (the last periods in
            #  each of the contexts) as 1 and the rest as 0, we can scan with exclusive or
            #  (starting from all 1). For each context in the batch, this will result in 1s
            #  up until the marker (the location of that last period) and 0s afterwards.
            mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool))

        # We score each possible word inversely with their Euclidean distance to the regressed word.
        #  The highest score (lowest distance) will correspond to the selected word.
        logits = -tf.reduce_sum(tf.square(context * tf.transpose(tf.expand_dims(
            tf.cast(mask, tf.float32), -1), [1, 0, 2]) - logit), axis=-1, name='logits')

    # Training

    # gold_standard: The real answers.
    gold_standard = tf.placeholder(tf.float32, [None, 1, D], "answer")
    with tf.variable_scope('accuracy'):
        eq = tf.equal(context, gold_standard)
        corrbool = tf.reduce_all(eq, -1, name='corrbool')
        logloc = tf.reduce_max(logits, -1, keepdims=True)
        # locs: A boolean tensor that indicates where the score
        #  matches the minimum score. This happens on multiple dimensions,
        #  so in the off chance there's one or two indexes that match
        #  we make sure it matches in all indexes.
        locs = tf.equal(logits, logloc)

        # correctsbool: A boolean tensor that indicates for which
        #   words in the context the score always matches the minimum score.
        correctsbool = tf.reduce_any(tf.logical_and(locs, corrbool), -1)
        # corrects: A tensor that is simply correctsbool cast to floats.
        corrects = tf.where(correctsbool, tf.ones_like(correctsbool, dtype=tf.float32),
                            tf.zeros_like(correctsbool, dtype=tf.float32))

        # corr: corrects, but for the right answer instead of our selected answer.
        corr = tf.where(corrbool, tf.ones_like(corrbool, dtype=tf.float32),
                        tf.zeros_like(corrbool, dtype=tf.float32))
    with tf.variable_scope("loss"):
        # Use sigmoid cross entropy as the base loss,
        #  with our distances as the relative probabilities. There are
        #  multiple correct labels, for each location of the answer word within the context.
        loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.nn.l2_normalize(logits, -1),
                                                       labels=corr)

        # Add regularization losses, weighted by weight_decay.
        total_loss = tf.reduce_mean(loss) + weight_decay * tf.add_n(
            tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))

    # TensorFlow's default implementation of the Adam optimizer works. We can adjust more than
    #  just the learning rate, but it's not necessary to find a very good optimum.
    optimizer = tf.train.AdamOptimizer(learning_rate)

    # Once we have an optimizer, we ask it to minimize the loss
    #   in order to work towards the proper training.
    opt_op = optimizer.minimize(total_loss)

    # Initialize variables
    init = tf.global_variables_initializer()

    # Launch the TensorFlow session
    sess = tf.Session()
    sess.run(init)

    def prep_batch(batch_data, more_data=False):
        """
            Prepare all the preproccessing that needs to be done on a batch-by-batch basis.
        """
        context_vec, sentence_ends, questionvs, spt, context_words, cqas, answervs, _ = zip(*batch_data)
        ends = list(sentence_ends)
        maxend = max(map(len, ends))
        aends = np.zeros((len(ends), maxend))
        for index, i in enumerate(ends):
            for indexj, x in enumerate(i):
                aends[index, indexj] = x - 1
        new_ends = np.zeros(aends.shape + (2,))

        for index, x in np.ndenumerate(aends):
            new_ends[index + (0,)] = index[0]
            new_ends[index + (1,)] = x

        contexts = list(context_vec)
        max_context_length = max([len(x) for x in contexts])
        contextsize = list(np.array(contexts[0]).shape)
        contextsize[0] = max_context_length
        final_contexts = np.zeros([len(contexts)] + contextsize)

        contexts = [np.array(x) for x in contexts]
        for i, context in enumerate(contexts):
            final_contexts[i, 0:len(context), :] = context
        max_query_length = max(len(x) for x in questionvs)
        querysize = list(np.array(questionvs[0]).shape)
        querysize[:1] = [len(questionvs), max_query_length]
        queries = np.zeros(querysize)
        querylengths = np.array(list(zip(range(len(questionvs)), [len(q) - 1 for q in questionvs])))
        questions = [np.array(q) for q in questionvs]
        for i, question in enumerate(questions):
            queries[i, 0:len(question), :] = question
        data = {context_placeholder: final_contexts, input_sentence_endings: new_ends,
                query: queries, input_query_lengths: querylengths, gold_standard: answervs}
        return (data, context_words, cqas) if more_data else data

    # Use TQDM if installed
    tqdm_installed = False

    # Prepare validation set
    batch = np.random.randint(final_test_data.shape[0], size=batch_size * 10)
    batch_data = final_test_data[batch]

    validation_set, val_context_words, val_cqas = prep_batch(batch_data, True)

    holder = [corrbool, locs, total_loss, logits, facts_0s, w_1] + attends + [query, cs, question_module_outputs]
    
    print('Starting session')
    start_time = time.time()
    ancr = sess.run([corrbool, locs, total_loss, logits, facts_0s, w_1] + attends +
                    [query, cs, question_module_outputs], feed_dict=validation_set)
    elapsed_time = time.time() - start_time
    print(elapsed_time)
    a = ancr[0]
    n = ancr[1]
    cr = ancr[2]
    attenders = np.array(ancr[6:-3])
    faq = np.sum(ancr[4], axis=(-1, -2))  # Number of facts in each context

    limit = 1

    # Locations of responses within contexts
    indices = np.argmax(n, axis=1)

    # Locations of actual answers within contexts
    indicesc = np.argmax(a, axis=1)
    response = ""

    ans = 0
    inp = ''

    for i, e, cw, cqa in list(zip(indices, indicesc, val_context_words, val_cqas))[:limit]:
        ccc = " ".join(cw)
        print("TEXT: ", ccc)
        inp = ccc
        print("QUESTION: ", " ".join(cqa[3]))
        print("RESPONSE: ", cw[i], ["Correct", "Incorrect"][i != e])
        ans = i
        print("EXPECTED: ", cw[e])
        print()
    # For safety, return this if nothing is found
    sess.close()
    
    print('--')
    tot_index = 0
    for line in fulldata:
        tot_index = tot_index + len(line)
        if tot_index >= ans:
            return line
    return response
Beispiel #36
0
    def _build_graph(self,
                     hidden_dim,
                     env_state_size,
                     action_space_dim,
                     learning_rate=0.01,
                     activation=tf.nn.elu,
                     scope_name='policy-network',
                     **kwargs):
        with tf.variable_scope(scope_name) as scope:
            # Size variables
            with tf.variable_scope('dimensions'):
                self.hidden_dim = hidden_dim
                self.env_state_dim = env_state_size
                self.action_space_dim = action_space_dim

            # model variables
            with tf.variable_scope('model-parameters'):
                self.rnn_cell = tf.contrib.rnn.BasicRNNCell(
                    hidden_dim, activation=activation)
                self.initial_state = tf.get_variable(
                    'rnn_init_state', [1, hidden_dim],
                    initializer=tf.contrib.layers.variance_scaling_initializer(
                    ))
                self.output_weights = tf.get_variable(
                    'output_weights', [hidden_dim, action_space_dim],
                    initializer=tf.contrib.layers.variance_scaling_initializer(
                    ))
                self.output_bias = tf.get_variable(
                    'output_bias', [action_space_dim],
                    initializer=tf.contrib.layers.variance_scaling_initializer(
                    ))

            # single step
            self.env_state = tf.placeholder(tf.float32, [1, env_state_size],
                                            name="state")
            self.rnn_state = tf.placeholder(tf.float32, [1, hidden_dim])

            with tf.variable_scope('single-step-rnn'):
                self.rnn_state_val = None
                self.step_rnn, _ = self.rnn_cell(self.env_state,
                                                 self.rnn_state)
                self.action_probability = tf.nn.softmax(
                    tf.matmul(self.rnn_state, self.output_weights) +
                    self.output_bias)

            # multiple episodes
            self.batch_size = tf.placeholder(tf.int32, name='max-episode-len')
            # returns ~ [n, max(epi_len)]
            self.returns = tf.placeholder(tf.float32, [None, None], 'returns')
            # env_states ~ [n, max(epi_len), env_state_size]
            self.env_states = tf.placeholder(tf.float32,
                                             [None, None, env_state_size],
                                             'states')
            # actions ~ [n, max(epi_len), env_state_size]
            self.actions = tf.placeholder(tf.int32, [None, None, 3], 'actions')
            # tiling initial state
            self.initial_states = tf.tile(self.initial_state,
                                          multiples=[self.batch_size, 1])

            with tf.variable_scope('multi-step-rnn'):
                with tf.variable_scope('rnn'):
                    # rnn_states ~ [n, max(epi_len), hidden_dim]
                    self.rnn_states, _ = tf.nn.dynamic_rnn(
                        self.rnn_cell,
                        inputs=self.env_states,
                        initial_state=self.initial_states,
                        dtype=tf.float32)

                with tf.variable_scope('action-p'):
                    # logits, action_probabilities ~ [n, max(epi_len), action_space_dim]
                    self.logits = tf.tensordot(
                        self.rnn_states, self.output_weights,
                        axes=[[2], [0]]) + self.output_bias
                    self.action_probabilities = tf.nn.softmax(self.logits)
                    # obs_action_probabilities ~ [n, max(epi_len)]
                    self.obs_action_probabilities = tf.gather_nd(
                        self.action_probabilities, self.actions)

            with tf.variable_scope('train'):
                # calculate path-wise likelihood ratios
                self.episodic_loss = tf.reduce_sum(
                    -tf.log(self.obs_action_probabilities + 1e-10) *
                    self.returns,
                    axis=1)
                # average over episodes
                self.loss = tf.reduce_mean(self.episodic_loss)
                self.optimizer = tf.train.RMSPropOptimizer(
                    learning_rate=learning_rate)
                self.train_op = self.optimizer.minimize(
                    self.loss, global_step=tf.train.get_global_step())

            # summary variables
            with tf.variable_scope('summary'):
                tf.summary.tensor_summary('rnn-states', self.rnn_states)
                tf.summary.scalar('loss', self.loss)
                self.summary_op = tf.summary.merge_all()
def din_fcn_attention(query,
                      rnn_output,
                      keys_len,
                      scope_name,
                      stag='null',
                      mode='SUM',
                      softmax_stag=1,
                      time_major=False,
                      return_alphas=False,
                      for_cnn=False):
    if isinstance(rnn_output, tuple):
        # In case of Bi-RNN, concatenate the forward and the backward RNN outputs.
        rnn_output = tf.concat(rnn_output, 2)
    if len(rnn_output.get_shape().as_list()) == 2:
        rnn_output = tf.expand_dims(rnn_output, 1)
    if time_major:
        # (T,B,D) => (B,T,D)
        rnn_output = array_ops.transpose(rnn_output, [1, 0, 2])

    # Trainable parameters
    # mask = tf.equal(mask, tf.ones_like(mask))

    # query_size = query.get_shape().as_list()[-1]
    rnn_output_size = rnn_output.get_shape().as_list()[
        -1]  # D value - hidden size of the RNN layer
    query = tf.layers.dense(query,
                            rnn_output_size,
                            activation=None,
                            name=scope_name + '_f1' + stag)
    query = prelu(query, scope=scope_name)
    queries = tf.tile(query, [1, tf.shape(rnn_output)[1]])
    queries = tf.reshape(queries, tf.shape(rnn_output))
    din_all = tf.concat(
        [queries, rnn_output, queries - rnn_output, queries * rnn_output],
        axis=-1)
    d_layer_1_all = tf.layers.dense(din_all,
                                    80,
                                    activation=tf.nn.sigmoid,
                                    name=scope_name + 'f1_att' + stag)
    d_layer_2_all = tf.layers.dense(d_layer_1_all,
                                    40,
                                    activation=tf.nn.sigmoid,
                                    name=scope_name + 'f2_att' + stag)
    d_layer_3_all = tf.layers.dense(d_layer_2_all,
                                    1,
                                    activation=None,
                                    name=scope_name + 'f3_att' + stag)
    d_layer_3_all = tf.reshape(d_layer_3_all, [-1, 1, tf.shape(rnn_output)[1]])
    scores = d_layer_3_all
    # Mask
    key_masks = tf.sequence_mask(keys_len, tf.shape(rnn_output)[1])  # [B, T]
    key_masks = tf.expand_dims(key_masks, 1)  # [B, 1, T]
    paddings = tf.ones_like(scores) * (-2**32 + 1)
    if not for_cnn:
        scores = tf.where(key_masks, scores, paddings)  # [B, 1, T]

    # Scale
    # scores = scores / (facts.get_shape().as_list()[-1] ** 0.5)

    # Activation
    if softmax_stag:
        scores = tf.nn.softmax(scores)  # [B, 1, T]

    # Weighted sum
    if mode == 'SUM':
        output = tf.matmul(scores, rnn_output)  # [B, 1, H]
        # output = tf.reshape(output, [-1, tf.shape(facts)[-1]])
    else:
        scores = tf.reshape(scores, [-1, tf.shape(rnn_output)[1]])
        output = rnn_output * tf.expand_dims(scores, -1)
        output = tf.reshape(output, tf.shape(rnn_output))
    if return_alphas:
        return output, scores
    return output
Beispiel #38
0
def _model_fn(features, labels, mode, params, model, variable_filter_fn=None):
    """Model defination for the RetinaNet model based on ResNet-50.

  Args:
    features: the input image tensor with shape [batch_size, height, width, 3].
      The height and width are fixed and equal.
    labels: the input labels in a dictionary. The labels include class targets
      and box targets which are dense label maps. The labels are generated from
      get_input_fn function in data/dataloader.py
    mode: the mode of TPUEstimator including TRAIN, EVAL, and PREDICT.
    params: the dictionary defines hyperparameters of model. The default
      settings are in default_hparams function in this file.
    model: the RetinaNet model outputs class logits and box regression outputs.
    variable_filter_fn: the filter function that takes trainable_variables and
      returns the variable list after applying the filter rule.

  Returns:
    tpu_spec: the TPUEstimatorSpec to run training, evaluation, or prediction.
  """
    cls_outputs, box_outputs = model(features,
                                     min_level=params['min_level'],
                                     max_level=params['max_level'],
                                     num_classes=params['num_classes'],
                                     num_anchors=len(params['aspect_ratios'] *
                                                     params['num_scales']),
                                     is_training_bn=params['is_training_bn'])
    levels = cls_outputs.keys()

    # First check if it is in PREDICT mode.
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {
            'image': features,
        }
        for level in levels:
            predictions['cls_outputs_%d' % level] = cls_outputs[level]
            predictions['box_outputs_%d' % level] = box_outputs[level]
        return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)

    # Load pretrained model from checkpoint.
    if params['resnet_checkpoint'] and mode == tf.estimator.ModeKeys.TRAIN:

        def scaffold_fn():
            """Loads pretrained model through scaffold function."""
            tf.train.init_from_checkpoint(params['resnet_checkpoint'], {
                '/': 'resnet50/',
            })
            return tf.train.Scaffold()
    else:
        scaffold_fn = None

    # Set up training loss and learning rate.
    global_step = tf.train.get_global_step()
    learning_rate = _learning_rate_schedule(params['learning_rate'],
                                            params['lr_warmup_init'],
                                            params['lr_warmup_step'],
                                            params['lr_drop_step'],
                                            global_step)
    # cls_loss and box_loss are for logging. only total_loss is optimized.
    total_loss, cls_loss, box_loss = _detection_loss(cls_outputs, box_outputs,
                                                     labels, params)

    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.MomentumOptimizer(learning_rate,
                                               momentum=params['momentum'])
        if params['use_tpu']:
            optimizer = tpu_optimizer.CrossShardOptimizer(optimizer)

        # Batch norm requires update_ops to be added as a train_op dependency.
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        var_list = variable_filter_fn(
            tf.trainable_variables()) if variable_filter_fn else None
        with tf.control_dependencies(update_ops):
            train_op = optimizer.minimize(total_loss,
                                          global_step,
                                          var_list=var_list)
    else:
        train_op = None

    # Evaluation only works on GPU/CPU host and batch_size=1
    eval_metrics = None
    if mode == tf.estimator.ModeKeys.EVAL:

        def metric_fn(**kwargs):
            """Evaluation metric fn. Performed on CPU, do not reference TPU ops."""
            eval_anchors = anchors.Anchors(params['min_level'],
                                           params['max_level'],
                                           params['num_scales'],
                                           params['aspect_ratios'],
                                           params['anchor_scale'],
                                           params['image_size'])
            anchor_labeler = anchors.AnchorLabeler(eval_anchors,
                                                   params['num_classes'])
            cls_loss = tf.metrics.mean(kwargs['cls_loss_repeat'])
            box_loss = tf.metrics.mean(kwargs['box_loss_repeat'])
            # add metrics to output
            cls_outputs = {}
            box_outputs = {}
            for level in range(params['min_level'], params['max_level'] + 1):
                cls_outputs[level] = kwargs['cls_outputs_%d' % level]
                box_outputs[level] = kwargs['box_outputs_%d' % level]
            detections = anchor_labeler.generate_detections(
                cls_outputs, box_outputs, kwargs['source_ids'])
            eval_metric = coco_metric.EvaluationMetric(params['val_json_file'])
            coco_metrics = eval_metric.estimator_metric_fn(
                detections, kwargs['image_scales'])
            # Add metrics to output.
            output_metrics = {
                'cls_loss': cls_loss,
                'box_loss': box_loss,
            }
            output_metrics.update(coco_metrics)
            return output_metrics

        batch_size = params['batch_size']
        cls_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(cls_loss, 0), [
                batch_size,
            ]), [batch_size, 1])
        box_loss_repeat = tf.reshape(
            tf.tile(tf.expand_dims(box_loss, 0), [
                batch_size,
            ]), [batch_size, 1])
        metric_fn_inputs = {
            'cls_loss_repeat': cls_loss_repeat,
            'box_loss_repeat': box_loss_repeat,
            'source_ids': labels['source_ids'],
            'image_scales': labels['image_scales'],
        }
        for level in range(params['min_level'], params['max_level'] + 1):
            metric_fn_inputs['cls_outputs_%d' % level] = cls_outputs[level]
            metric_fn_inputs['box_outputs_%d' % level] = box_outputs[level]
        eval_metrics = (metric_fn, metric_fn_inputs)

    return tpu_estimator.TPUEstimatorSpec(mode=mode,
                                          loss=total_loss,
                                          train_op=train_op,
                                          eval_metrics=eval_metrics,
                                          scaffold_fn=scaffold_fn)
Beispiel #39
0
    def __init__(self, config, name):
        assert name in ('validation', 'training', 'test')
        self.name = name
        logging.debug('{} - model - initialize'.format(self.name))
        self.is_training = True if self.name == 'training' else False
        self.config = config

        if not self.is_training:
            self.reinitializable_iter_for_dataset = None
        self.batch = self._gen_batch_fn()  # generate mini-batch

        with tf.name_scope(self.name):
            with tf.variable_scope('full_conv', reuse=tf.AUTO_REUSE):
                logits_stereo = self._nn_model_fn()

            logits_stereo_flattened = flatten_maybe_padded_sequences(
                maybe_padded_sequences=logits_stereo,
                lengths=tf.tile(input=self.batch['num_frames'], multiples=[2]))
            logits_left_flattened, logits_right_flattened = tf.split(
                value=logits_stereo_flattened, num_or_size_splits=2, axis=0)

            logits_minor_flattened = tf.minimum(logits_left_flattened, logits_right_flattened)
            logits_larger_flattened = tf.maximum(logits_left_flattened, logits_right_flattened)

            labels_bool_flattened = flatten_maybe_padded_sequences(
                maybe_padded_sequences=self.batch['label'], lengths=self.batch['num_frames'])
            negated_labels_bool_flattened = tf.logical_not(labels_bool_flattened)
            labels_float_flattened = tf.cast(x=labels_bool_flattened, dtype=tf.float32)

            #When label is True, choose the smaller logits. Otherwise, choose the larger logits
            logits_mono_flattened = tf.where(
               tf.equal(labels_bool_flattened, True), logits_minor_flattened, logits_larger_flattened)

            #cross-entropy
            #loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=labels_float_flattened, logits=logits_mono_flattened)

            #weighted cross-entropy
            #A value `pos_weights > 1` decreases the false negative count, hence increasing the recall.
            #Conversely setting `pos_weights < 1` decreases the false positive count and increases the precision.
            loss = tf.nn.weighted_cross_entropy_with_logits(targets=labels_float_flattened, logits=logits_mono_flattened, pos_weight=1.1)

            #focal loss
            #loss = MiscFns.focal_loss(labels=labels_float_flattened, logits=logits_mono_flattened)

            loss = tf.reduce_mean(loss)

            if self.is_training:
                global_step = tf.train.get_or_create_global_step()
                learning_rate = tf.train.exponential_decay(self.config.learning_rate, global_step, \
                                                           self.config.batches_per_epoch * 7, 0.7, staircase=True)

                _update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
                if _update_ops:
                    with tf.control_dependencies(_update_ops):
                        training_op = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)
                else:
                    training_op = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=global_step)

            pred_labels_flattened = tf.greater(logits_left_flattened+logits_right_flattened, 0)
            negated_pred_labels_flattened = tf.logical_not(pred_labels_flattened)

            # individual and ensemble statistics for test and validation
            if not self.is_training:
                with tf.name_scope('individual_and_ensemble_stats'):
                    with tf.variable_scope('{}_local_vars'.format(self.name), reuse=tf.AUTO_REUSE):
                        individual_tps_fps_tns_fns_var = tf.get_variable(
                            name='individual_tps_fps_tns_fns',
                            shape=[len(self.config.file_names[self.name]), 4],
                            dtype=tf.int32,
                            initializer=tf.zeros_initializer,
                            trainable=False,
                            collections=[tf.GraphKeys.LOCAL_VARIABLES]
                        )

                        acc_loss_var = tf.get_variable(
                            name='acc_loss',
                            shape=[],
                            dtype=tf.float32,
                            initializer=tf.zeros_initializer,
                            trainable=False,
                            collections=[tf.GraphKeys.LOCAL_VARIABLES]
                        )

                        batch_counter_var = tf.get_variable(
                            name='batch_counter',
                            shape=[],
                            dtype=tf.int32,
                            initializer=tf.zeros_initializer,
                            trainable=False,
                            collections=[tf.GraphKeys.LOCAL_VARIABLES]
                        )

                    loop_var_proto = collections.namedtuple(
                        'loop_var_proto',
                        ['sample_idx', 'batch_size', 'preds', 'negated_preds',
                         'labels', 'negated_labels', 'lengths', 'me_ids'])

                    def cond_fn(loop_var):
                        return tf.less(loop_var.sample_idx, loop_var.batch_size)

                    def body_fn(loop_var):
                        start_pos = tf.reduce_sum(loop_var.lengths[:loop_var.sample_idx])
                        end_pos = start_pos + loop_var.lengths[loop_var.sample_idx]
                        cur_preds = loop_var.preds
                        negated_cur_preds = loop_var.negated_preds
                        cur_labels = loop_var.labels
                        negated_cur_labels = loop_var.negated_labels
                        cur_preds, negated_cur_preds, cur_labels, negated_cur_labels = \
                            [value[start_pos:end_pos]
                             for value in [cur_preds, negated_cur_preds, cur_labels, negated_cur_labels]]
                        tps = tf.logical_and(cur_preds, cur_labels)
                        fps = tf.logical_and(cur_preds, negated_cur_labels)
                        tns = tf.logical_and(negated_cur_preds, negated_cur_labels)
                        fns = tf.logical_and(negated_cur_preds, cur_labels)
                        tps, fps, tns, fns = \
                            [tf.reduce_sum(tf.cast(value, tf.int32)) for value in [tps, fps, tns, fns]]
                        me_id = loop_var.me_ids[loop_var.sample_idx]
                        stats_var = individual_tps_fps_tns_fns_var
                        _new_value = stats_var[me_id] + tf.convert_to_tensor([tps, fps, tns, fns])
                        _update_stats = tf.scatter_update(
                            stats_var, me_id, _new_value, use_locking=True)
                        with tf.control_dependencies([_update_stats]):
                            sample_idx = loop_var.sample_idx + 1
                        loop_var = loop_var_proto(
                            sample_idx=sample_idx,
                            batch_size=loop_var.batch_size,
                            preds=loop_var.preds,
                            negated_preds=loop_var.negated_preds,
                            labels=loop_var.labels,
                            negated_labels=loop_var.negated_labels,
                            lengths=loop_var.lengths,
                            me_ids=loop_var.me_ids
                        )

                        return [loop_var]

                    sample_idx = tf.constant(0, dtype=tf.int32)
                    cur_batch_size = tf.shape(self.batch['num_frames'])[0]
                    loop_var = loop_var_proto(
                        sample_idx=sample_idx,
                        batch_size=cur_batch_size,
                        preds=pred_labels_flattened,
                        negated_preds=negated_pred_labels_flattened,
                        labels=labels_bool_flattened,
                        negated_labels=negated_labels_bool_flattened,
                        lengths=self.batch['num_frames'],
                        me_ids=self.batch['me_id']
                    )
                    final_sample_idx = tf.while_loop(
                        cond=cond_fn,
                        body=body_fn,
                        loop_vars=[loop_var],
                        parallel_iterations=self.config.batch_size,
                        back_prop=False,
                        return_same_structure=True
                    )[0].sample_idx

                    individual_tps_fps_tns_fns_float = tf.cast(individual_tps_fps_tns_fns_var, tf.float32)
                    tps, fps, _, fns = tf.unstack(individual_tps_fps_tns_fns_float, axis=1)
                    me_wise_precisions = tps / (tps + fps + 1e-7)
                    me_wise_recalls = tps / (tps + fns + 1e-7)
                    me_wise_f1s = 2. * me_wise_precisions * me_wise_recalls / \
                                  (me_wise_precisions + me_wise_recalls + 1e-7)
                    me_wise_prfs = tf.stack([me_wise_precisions, me_wise_recalls, me_wise_f1s], axis=1)
                    assert me_wise_prfs.shape.as_list() == [len(self.config.file_names[self.name]), 3]
                    average_me_wise_prf = tf.reduce_mean(me_wise_prfs, axis=0)
                    assert average_me_wise_prf.shape.as_list() == [3]

                    # ensemble stats
                    ensemble_tps_fps_tns_fns = tf.reduce_sum(individual_tps_fps_tns_fns_var, axis=0)
                    tps, fps, _, fns = tf.unstack(tf.cast(ensemble_tps_fps_tns_fns, tf.float32))
                    en_precision = tps / (tps + fps + 1e-7)
                    en_recall = tps / (tps + fns + 1e-7)
                    en_f1 = 2. * en_precision * en_recall / (en_precision + en_recall + 1e-7)
                    batch_counter_update_op = tf.assign_add(batch_counter_var, 1)
                    acc_loss_update_op = tf.assign_add(acc_loss_var, loss)
                    ensemble_prf_and_loss = tf.convert_to_tensor(
                        [en_precision, en_recall, en_f1, acc_loss_var / tf.cast(batch_counter_var, tf.float32)])

                    update_op_after_each_batch = tf.group(
                        final_sample_idx, batch_counter_update_op, acc_loss_update_op,
                        name='grouped update ops to be run after each batch'.replace(' ', '_'))
                    stats_after_each_epoch = dict(
                        individual_tps_fps_tns_fns=individual_tps_fps_tns_fns_var,
                        individual_prfs=me_wise_prfs,
                        ensemble_tps_fps_tns_fns=ensemble_tps_fps_tns_fns,
                        ensemble_prf_and_loss=ensemble_prf_and_loss,
                        average_prf=average_me_wise_prf
                    )

            '''
            # ensemble stats for training
            if self.is_training:
                with tf.name_scope('ensemble_stats'):
                    with tf.variable_scope('{}_local_vars'.format(self.name), reuse=tf.AUTO_REUSE):
                        ensemble_tps_fps_tns_fns_var = tf.get_variable(
                            name='ensemble_tps_fps_tns_fns',
                            shape=[4],
                            dtype=tf.int32,
                            initializer=tf.zeros_initializer,
                            trainable=False,
                            collections=[tf.GraphKeys.LOCAL_VARIABLES]
                        )
                        acc_loss_var = tf.get_variable(
                            name='acc_loss',
                            shape=[],
                            dtype=tf.float32,
                            initializer=tf.zeros_initializer,
                            trainable=False,
                            collections=[tf.GraphKeys.LOCAL_VARIABLES]
                        )
                        batch_counter_var = tf.get_variable(
                            name='batch_counter',
                            shape=[],
                            dtype=tf.int32,
                            initializer=tf.zeros_initializer,
                            trainable=False,
                            collections=[tf.GraphKeys.LOCAL_VARIABLES]
                        )

                    tps = tf.logical_and(pred_labels_flattened, labels_bool_flattened)
                    fps = tf.logical_and(pred_labels_flattened, negated_labels_bool_flattened)
                    tns = tf.logical_and(negated_pred_labels_flattened, negated_labels_bool_flattened)
                    fns = tf.logical_and(negated_pred_labels_flattened, labels_bool_flattened)
                    tps, fps, tns, fns = [tf.reduce_sum(tf.cast(value, tf.int32)) for value in [tps, fps, tns, fns]]

                    ensemble_tps_fps_tns_fns_update_op = tf.assign_add(
                        ensemble_tps_fps_tns_fns_var, tf.convert_to_tensor([tps, fps, tns, fns]))

                    acc_loss_update_op = tf.assign_add(acc_loss_var, loss)
                    batch_counter_update_op = tf.assign_add(batch_counter_var, 1)
                    ensemble_tps_fps_tns_fns_float = tf.cast(ensemble_tps_fps_tns_fns_var, tf.float32)
                    tps, fps, _, fns = tf.unstack(ensemble_tps_fps_tns_fns_float)
                    ensemble_precision = tps / (tps + fps + 1e-7)
                    ensemble_recall = tps / (tps + fns + 1e-7)
                    ensemble_f1 = 2. * ensemble_precision * ensemble_recall / \
                                  (ensemble_precision + ensemble_recall + 1e-7)
                    ensemble_loss = acc_loss_var / tf.cast(batch_counter_var, tf.float32)
                    ensemble_prf_and_loss = tf.convert_to_tensor(
                        [ensemble_precision, ensemble_recall, ensemble_f1, ensemble_loss])

                    update_op_after_each_batch = tf.group(
                        batch_counter_update_op, ensemble_tps_fps_tns_fns_update_op, acc_loss_update_op)
                    stats_after_each_epoch = dict(
                        ensemble_tps_fps_tns_fns=ensemble_tps_fps_tns_fns_var,
                        ensemble_prf_and_loss=ensemble_prf_and_loss
                    )

            '''


            # define tensorboard summaries
            with tf.name_scope('tensorboard_summary'):
                with tf.name_scope('statistics'):
                    if not self.is_training:
                        list_of_summaries = []
                        with tf.name_scope('ensemble'):
                            p, r, f, lo = tf.unstack(stats_after_each_epoch['ensemble_prf_and_loss'])
                            items_for_summary = dict(precision=p, recall=r, f1=f, average_loss=lo)
                            for item_name, item_value in items_for_summary.items():
                                tmp = tf.summary.scalar(item_name, item_value)
                                list_of_summaries.append(tmp)
                        with tf.name_scope('individual'):
                            p, r, f = tf.unstack(stats_after_each_epoch['average_prf'])
                            items_for_summary = dict(precision=p, recall=r, f1=f)
                            for item_name, item_value in items_for_summary.items():
                                tmp = tf.summary.scalar(item_name, item_value)
                                list_of_summaries.append(tmp)
                        statistical_summary = tf.summary.merge(list_of_summaries)
                    '''
                    else:
                        list_of_summaries = []
                        with tf.name_scope('ensemble'):
                            p, r, f, lo = tf.unstack(stats_after_each_epoch['ensemble_prf_and_loss'])
                            items_for_summary = dict(precision=p, recall=r, f1=f, average_loss=lo)
                            for item_name, item_value in items_for_summary.items():
                                tmp = tf.summary.scalar(item_name, item_value)
                                list_of_summaries.append(tmp)
                    statistical_summary = tf.summary.merge(list_of_summaries)           
                    '''

                with tf.name_scope('images'):
                    image_summary_length = int(6 * 16000 // 512)
                    labels_uint8 = self.batch['label'][:, :image_summary_length, :]
                    labels_uint8 = tf.cast(labels_uint8, tf.uint8) * 255
                    #assert labels_uint8.dtype == tf.uint8
                    labels_uint8 = labels_uint8[..., None]

                    _logits_left = tf.split(value=logits_stereo, num_or_size_splits=2, axis=0)[0]
                    
                    logits_prob_uint8 = tf.sigmoid(_logits_left[:, :image_summary_length, :])
                    logits_prob_uint8 = tf.cast(logits_prob_uint8 * 255., tf.uint8)
                    logits_prob_uint8 = logits_prob_uint8[..., None]

                    images = tf.concat([labels_uint8, logits_prob_uint8, tf.zeros_like(labels_uint8)], axis=-1)
                    images = tf.transpose(images, [0, 2, 1, 3])
                    images.set_shape([None, 88, image_summary_length, 3])
                    image_summary = tf.summary.image('images', images)

                if self.is_training:
                    with tf.name_scope('params'):
                        var_summary_dict = dict()
                        for var in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
                            var_summary_dict[var.op.name] = tf.summary.histogram(var.op.name, var)
                        param_summary = tf.summary.merge(list(var_summary_dict.values()))

        if self.is_training:
            op_dict = dict(
                training_op=training_op,
                #tb_summary=dict(statistics=statistical_summary, image=image_summary, parameter=param_summary),
                #tb_summary=dict(image=image_summary, parameter=param_summary),
                #update_op_after_each_batch=update_op_after_each_batch,
                #statistics_after_each_epoch=stats_after_each_epoch
            )
        else:
            op_dict = dict(
                tb_summary=dict(statistics=statistical_summary, image=image_summary),
                update_op_after_each_batch=update_op_after_each_batch,
                statistics_after_each_epoch=stats_after_each_epoch
            )

        self.op_dict = op_dict
Beispiel #40
0
def batch_multiclass_non_max_suppression(boxes,
                                         scores,
                                         score_thresh,
                                         iou_thresh,
                                         max_size_per_class,
                                         max_total_size=0,
                                         clip_window=None,
                                         change_coordinate_frame=False,
                                         num_valid_boxes=None,
                                         masks=None,
                                         additional_fields=None,
                                         scope=None,
                                         parallel_iterations=32):
    """Multi-class version of non maximum suppression that operates on a batch.

    This op is similar to `multiclass_non_max_suppression` but operates on a batch
    of boxes and scores. See documentation for `multiclass_non_max_suppression`
    for details.

    Args:
      boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
        detections. If `q` is 1 then same boxes are used for all classes
          otherwise, if `q` is equal to number of classes, class-specific boxes
          are used.
      scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
        the scores for each of the `num_anchors` detections.
      score_thresh: scalar threshold for score (low scoring boxes are removed).
      iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
        with previously selected boxes are removed).
      max_size_per_class: maximum number of retained boxes per class.
      max_total_size: maximum number of boxes retained over all classes. By
        default returns all boxes retained after capping boxes per class.
      clip_window: A float32 tensor of shape [batch_size, 4]  where each entry is
        of the form [y_min, x_min, y_max, x_max] representing the window to clip
        boxes to before performing non-max suppression. This argument can also be
        a tensor of shape [4] in which case, the same clip window is applied to
        all images in the batch. If clip_widow is None, all boxes are used to
        perform non-max suppression.
      change_coordinate_frame: Whether to normalize coordinates after clipping
        relative to clip_window (this can only be set to True if a clip_window
        is provided)
      num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
        [batch_size] representing the number of valid boxes to be considered
        for each image in the batch.  This parameter allows for ignoring zero
        paddings.
      masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
        float32 tensor containing box masks. `q` can be either number of classes
        or 1 depending on whether a separate mask is predicted per class.
      additional_fields: (optional) If not None, a dictionary that maps keys to
        tensors whose dimensions are [batch_size, num_anchors, ...].
      scope: tf scope name.
      parallel_iterations: (optional) number of batch items to process in
        parallel.

    Returns:
      'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
        containing the non-max suppressed boxes.
      'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
        the scores for the boxes.
      'nmsed_classes': A [batch_size, max_detections] float32 tensor
        containing the class for boxes.
      'nmsed_masks': (optional) a
        [batch_size, max_detections, mask_height, mask_width] float32 tensor
        containing masks for each selected box. This is set to None if input
        `masks` is None.
      'nmsed_additional_fields': (optional) a dictionary of
        [batch_size, max_detections, ...] float32 tensors corresponding to the
        tensors specified in the input `additional_fields`. This is not returned
        if input `additional_fields` is None.
      'num_detections': A [batch_size] int32 tensor indicating the number of
        valid detections per batch item. Only the top num_detections[i] entries in
        nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
        entries are zero paddings.

    Raises:
      ValueError: if `q` in boxes.shape is not 1 or not equal to number of
        classes as inferred from scores.shape.
    """
    q = boxes.shape[2].value
    num_classes = scores.shape[2].value
    if q != 1 and q != num_classes:
        raise ValueError('third dimension of boxes must be either 1 or equal '
                         'to the third dimension of scores')
    if change_coordinate_frame and clip_window is None:
        raise ValueError(
            'if change_coordinate_frame is True, then a clip_window'
            'must be specified.')
    original_masks = masks
    original_additional_fields = additional_fields
    with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
        boxes_shape = boxes.shape
        batch_size = boxes_shape[0].value
        num_anchors = boxes_shape[1].value

        if batch_size is None:
            batch_size = tf.shape(boxes)[0]
        if num_anchors is None:
            num_anchors = tf.shape(boxes)[1]

        # If num valid boxes aren't provided, create one and mark all boxes as
        # valid.
        if num_valid_boxes is None:
            num_valid_boxes = tf.ones([batch_size],
                                      dtype=tf.int32) * num_anchors

        # If masks aren't provided, create dummy masks so we can only have one copy
        # of _single_image_nms_fn and discard the dummy masks after map_fn.
        if masks is None:
            masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
            masks = tf.zeros(masks_shape)

        if clip_window is None:
            clip_window = tf.stack([
                tf.reduce_min(boxes[:, :, :, 0]),
                tf.reduce_min(boxes[:, :, :, 1]),
                tf.reduce_max(boxes[:, :, :, 2]),
                tf.reduce_max(boxes[:, :, :, 3])
            ])
        if clip_window.shape.ndims == 1:
            clip_window = tf.tile(tf.expand_dims(clip_window, 0),
                                  [batch_size, 1])

        if additional_fields is None:
            additional_fields = {}

        def _single_image_nms_fn(args):
            """Runs NMS on a single image and returns padded output.

            Args:
              args: A list of tensors consisting of the following:
                per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
                  detections. If `q` is 1 then same boxes are used for all classes
                  otherwise, if `q` is equal to number of classes, class-specific
                  boxes are used.
                per_image_scores - A [num_anchors, num_classes] float32 tensor
                  containing the scores for each of the `num_anchors` detections.
                per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
                  tensor containing box masks. `q` can be either number of classes
                  or 1 depending on whether a separate mask is predicted per class.
                per_image_clip_window - A 1D float32 tensor of the form
                  [ymin, xmin, ymax, xmax] representing the window to clip the boxes
                  to.
                per_image_additional_fields - (optional) A variable number of float32
                  tensors each with size [num_anchors, ...].
                per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
                  shape [batch_size] representing the number of valid boxes to be
                  considered for each image in the batch.  This parameter allows for
                  ignoring zero paddings.

            Returns:
              'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
                non-max suppressed boxes.
              'nmsed_scores': A [max_detections] float32 tensor containing the scores
                for the boxes.
              'nmsed_classes': A [max_detections] float32 tensor containing the class
                for boxes.
              'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
                float32 tensor containing masks for each selected box. This is set to
                None if input `masks` is None.
              'nmsed_additional_fields':  (optional) A variable number of float32
                tensors each with size [max_detections, ...] corresponding to the
                input `per_image_additional_fields`.
              'num_detections': A [batch_size] int32 tensor indicating the number of
                valid detections per batch item. Only the top num_detections[i]
                entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
                rest of the entries are zero paddings.
            """
            per_image_boxes = args[0]
            per_image_scores = args[1]
            per_image_masks = args[2]
            per_image_clip_window = args[3]
            per_image_additional_fields = {
                key: value
                for key, value in zip(additional_fields, args[4:-1])
            }
            per_image_num_valid_boxes = args[-1]
            per_image_boxes = tf.reshape(
                tf.slice(per_image_boxes, 3 * [0],
                         tf.stack([per_image_num_valid_boxes, -1, -1])),
                [-1, q, 4])
            per_image_scores = tf.reshape(
                tf.slice(per_image_scores, [0, 0],
                         tf.stack([per_image_num_valid_boxes, -1])),
                [-1, num_classes])
            per_image_masks = tf.reshape(
                tf.slice(per_image_masks, 4 * [0],
                         tf.stack([per_image_num_valid_boxes, -1, -1, -1])), [
                             -1, q, per_image_masks.shape[2].value,
                             per_image_masks.shape[3].value
                         ])
            if per_image_additional_fields is not None:
                for key, tensor in per_image_additional_fields.items():
                    additional_field_shape = tensor.get_shape()
                    additional_field_dim = len(additional_field_shape)
                    per_image_additional_fields[key] = tf.reshape(
                        tf.slice(
                            per_image_additional_fields[key],
                            additional_field_dim * [0],
                            tf.stack([per_image_num_valid_boxes] +
                                     (additional_field_dim - 1) * [-1])),
                        [-1] +
                        [dim.value for dim in additional_field_shape[1:]])
            nmsed_boxlist = multiclass_non_max_suppression(
                per_image_boxes,
                per_image_scores,
                score_thresh,
                iou_thresh,
                max_size_per_class,
                max_total_size,
                clip_window=per_image_clip_window,
                change_coordinate_frame=change_coordinate_frame,
                masks=per_image_masks,
                additional_fields=per_image_additional_fields)
            padded_boxlist = box_list_ops.pad_or_clip_box_list(
                nmsed_boxlist, max_total_size)
            num_detections = nmsed_boxlist.num_boxes()
            nmsed_boxes = padded_boxlist.get()
            nmsed_scores = padded_boxlist.get_field(
                fields.BoxListFields.scores)
            nmsed_classes = padded_boxlist.get_field(
                fields.BoxListFields.classes)
            nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks)
            nmsed_additional_fields = [
                padded_boxlist.get_field(key)
                for key in per_image_additional_fields
            ]
            return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
                    nmsed_additional_fields + [num_detections])

        num_additional_fields = 0
        if additional_fields is not None:
            num_additional_fields = len(additional_fields)
        num_nmsed_outputs = 4 + num_additional_fields

        batch_outputs = shape_utils.static_or_dynamic_map_fn(
            _single_image_nms_fn,
            elems=([boxes, scores, masks, clip_window] +
                   list(additional_fields.values()) + [num_valid_boxes]),
            dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
            parallel_iterations=parallel_iterations)

        batch_nmsed_boxes = batch_outputs[0]
        batch_nmsed_scores = batch_outputs[1]
        batch_nmsed_classes = batch_outputs[2]
        batch_nmsed_masks = batch_outputs[3]
        batch_nmsed_additional_fields = {
            key: value
            for key, value in zip(additional_fields, batch_outputs[4:-1])
        }
        batch_num_detections = batch_outputs[-1]

        if original_masks is None:
            batch_nmsed_masks = None

        if original_additional_fields is None:
            batch_nmsed_additional_fields = None

        return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
                batch_nmsed_masks, batch_nmsed_additional_fields,
                batch_num_detections)
with tf.variable_scope('test_model', reuse=False):    
    if version == 1:
        model = resnet_v1(input_shape=input_shape, depth=depth)
    elif version == 2:
        model = resnet_v2(input_shape=input_shape, depth=depth)
        
var_cls = model.trainable_weights   
saver_model = tf.train.Saver(var_cls, max_to_keep = None) 
      
#augmentation
aug_1 = tf.image.pad_to_bounding_box(x_train_tf, 4, 4, height + 8, width + 8)
aug_2 = tf.image.random_crop(aug_1, [batch_size, height, width, nch])
aug_3 = tf.image.random_flip_left_right(aug_2)

x_train_tf_reshaped = tf.reshape(aug_3, [-1, height*width*nch])
repeated_x_train_tf = tf.tile(x_train_tf_reshaped, [1, k_macer])
repeated_x_train_tf = tf.reshape(repeated_x_train_tf, [-1, height*width*nch])
repeated_x_train_tf = tf.reshape(repeated_x_train_tf, [-1, height, width, nch])

noise = tf.random.normal(repeated_x_train_tf.shape) * sigma_macer

noisy_inputs = repeated_x_train_tf + noise

outputs = KerasModelWrapper(model).get_logits(noisy_inputs)
outputs = tf.reshape(outputs, [-1, k_macer, nclass])

cls_test = KerasModelWrapper(model).get_logits(x_test_tf)

# Classification loss on smoothed 
outputs_softmax = tf.reduce_mean(tf.nn.softmax(outputs, axis = 2), axis = 1)
log_softmax = tf.math.log(outputs_softmax + 1E-10)
Beispiel #42
0
    def __init__(self, is_training, config):
        self._batch_size = batch_size = config.batch_size
        self.num_skills = num_skills = config.num_skills
        self.num_steps = num_steps = config.num_steps

        label_size = (num_skills * 2)
        id_size = num_skills
        df_size = 11
        cluster_size = (FLAGS.num_cluster + 1)
        reuse_flag = False

        output_size = (cluster_size)

        self.current_label = tf.placeholder(tf.int32, [batch_size, num_steps],
                                            name='current')
        self.next = tf.placeholder(tf.int32, [batch_size, num_steps],
                                   name='next')
        self.next_label = tf.placeholder(tf.int32, [batch_size, num_steps],
                                         name='next_label')
        self.ndf = tf.placeholder(tf.int32, [batch_size, num_steps], name='pd')
        self.cluster = tf.placeholder(tf.int32, [batch_size, num_steps],
                                      name='cluster')

        self._target_id = target_id = tf.placeholder(tf.int32, [None])
        self._target_correctness = target_correctness = tf.placeholder(
            tf.float32, [None])
        #final_hidden_size = size

        #one-hot encoding
        current_label = tf.reshape(self.current_label, [-1])
        slice_cl_data = one_hot_output(current_label, label_size, batch_size,
                                       num_steps)

        next_label = tf.reshape(self.next_label, [-1])
        slice_nl_data = one_hot_output(next_label, label_size, batch_size,
                                       num_steps)

        next = tf.reshape(self.next, [-1])
        slice_x_data = one_hot_output(next, id_size, batch_size, num_steps)

        ndf = tf.reshape(self.ndf, [-1])
        slice_ndf_data = one_hot_output(ndf, df_size, batch_size, num_steps)

        cluster = tf.reshape(self.cluster, [-1])
        slice_cluster_data = one_hot_output(cluster, cluster_size, batch_size,
                                            num_steps)

        with tf.variable_scope('Memory'):
            init_memory_key = tf.get_variable(
                'key', [FLAGS.memory_size, (id_size + cluster_size)],
                initializer=tf.truncated_normal_initializer(stddev=0.1))
            init_memory_value = tf.get_variable(
                'value', [FLAGS.memory_size, FLAGS.memory_value_state_dim],
                initializer=tf.truncated_normal_initializer(stddev=0.1))

        init_memory_value = tf.tile(tf.expand_dims(init_memory_value, 0),
                                    tf.stack([batch_size, 1, 1]))

        memory = DSCMN(FLAGS.memory_size,
                       id_size,
                       FLAGS.memory_value_state_dim,
                       init_memory_key=init_memory_key,
                       init_memory_value=init_memory_value,
                       name='DSCMN')

        input_l = []
        for i in range(num_steps):
            if i != 0:
                reuse_flag = True

            current_label = tf.squeeze(slice_cl_data[i], 1)
            next_label = tf.squeeze(slice_nl_data[i], 1)

            next_id = tf.squeeze(slice_x_data[i], 1)
            df = tf.squeeze(slice_ndf_data[i], 1)
            cu = tf.squeeze(slice_cluster_data[i], 1)

            m = tf.concat([next_id, cu], 1)
            correlation_weight = memory.attention(m)

            read_content = memory.read(correlation_weight)
            m1 = tf.concat([current_label, read_content, df], 1)
            input_l.append(m1)

            update = tf.concat([next_label], 1)
            new_memory_value = memory.write(correlation_weight,
                                            update,
                                            reuse=reuse_flag)

        input_ = tf.stack(input_l)
        input_size = int(input_[0].get_shape()[1])
        x_input = tf.reshape(input_, [-1, input_size])
        x_input = tf.split(x_input, num_steps, 0)

        final_hidden_size = input_size
        hidden_layers = []
        for i in range(FLAGS.hidden_layer_num):
            final_hidden_size = final_hidden_size
            hidden1 = tf.nn.rnn_cell.LSTMCell(final_hidden_size,
                                              state_is_tuple=True)
            if is_training and config.keep_prob < 1:
                hidden1 = tf.nn.rnn_cell.DropoutWrapper(
                    hidden1, output_keep_prob=FLAGS.keep_prob)
            hidden_layers.append(hidden1)

        cell = tf.nn.rnn_cell.MultiRNNCell(hidden_layers, state_is_tuple=True)

        outputs, state = rnn.static_rnn(cell, x_input, dtype=tf.float32)
        output = tf.reshape(tf.concat(outputs, 1),
                            [-1, int(final_hidden_size)])
        sigmoid_w = tf.get_variable("sigmoid_w",
                                    [final_hidden_size, output_size])
        sigmoid_b = tf.get_variable("sigmoid_b", [output_size])
        logits = tf.matmul(output, sigmoid_w) + sigmoid_b
        logits = tf.reshape(logits, [-1])
        selected_logits = tf.gather(logits, self.target_id)
        self._all_logits = logits

        #make prediction
        self._pred = tf.sigmoid(selected_logits)

        # loss function
        loss = tf.reduce_sum(
            tf.nn.sigmoid_cross_entropy_with_logits(logits=selected_logits,
                                                    labels=target_correctness))
        self._cost = cost = loss
Beispiel #43
0
  def sample(self, n, max_length=None, z=None, temperature=None,
             start_inputs=None, beam_width=None, end_token=None):
    """Overrides BaseLstmDecoder `sample` method to add optional beam search.

    Args:
      n: Scalar number of samples to return.
      max_length: (Optional) Scalar maximum sample length to return. Required if
        data representation does not include end tokens.
      z: (Optional) Latent vectors to sample from. Required if model is
        conditional. Sized `[n, z_size]`.
      temperature: (Optional) The softmax temperature to use when not doing beam
        search. Defaults to 1.0. Ignored when `beam_width` is provided.
      start_inputs: (Optional) Initial inputs to use for batch.
        Sized `[n, output_depth]`.
      beam_width: (Optional) Width of beam to use for beam search. Beam search
        is disabled if not provided.
      end_token: (Optional) Scalar token signaling the end of the sequence to
        use for early stopping.
    Returns:
      samples: Sampled sequences. Sized `[n, max_length, output_depth]`.
    Raises:
      ValueError: If `z` is provided and its first dimension does not equal `n`.
    """
    if beam_width is None:
      end_fn = (None if end_token is None else
                lambda x: tf.equal(tf.argmax(x, axis=-1), end_token))
      return super(CategoricalLstmDecoder, self).sample(
          n, max_length, z, temperature, start_inputs, end_fn)

    # If `end_token` is not given, use an impossible value.
    end_token = self._output_depth if end_token is None else end_token
    if z is not None and z.shape[0].value != n:
      raise ValueError(
          '`z` must have a first dimension that equals `n` when given. '
          'Got: %d vs %d' % (z.shape[0].value, n))

    if temperature is not None:
      tf.logging.warning('`temperature` is ignored when using beam search.')
    # Use a dummy Z in unconditional case.
    z = tf.zeros((n, 0), tf.float32) if z is None else z

    # If not given, start with dummy `-1` token and replace with zero vectors in
    # `embedding_fn`.
    start_tokens = (
        tf.argmax(start_inputs, axis=-1, output_type=tf.int32)
        if start_inputs is not None else
        -1 * tf.ones([n], dtype=tf.int32))

    initial_state = initial_cell_state_from_embedding(
        self._dec_cell, z, n, name='decoder/z_to_initial_state')
    beam_initial_state = tf.contrib.seq2seq.tile_batch(
        initial_state, multiplier=beam_width)

    # Tile `z` across beams.
    beam_z = tf.tile(tf.expand_dims(z, 1), [1, beam_width, 1])

    def embedding_fn(tokens):
      # If tokens are the start_tokens (negative), replace with zero vectors.
      next_inputs = tf.cond(
          tf.less(tokens[0, 0], 0),
          lambda: tf.zeros([n, beam_width, self._output_depth]),
          lambda: tf.one_hot(tokens, self._output_depth))

      # Concatenate `z` to next inputs.
      next_inputs = tf.concat([next_inputs, beam_z], axis=-1)
      return next_inputs

    decoder = tf.contrib.seq2seq.BeamSearchDecoder(
        self._dec_cell,
        embedding_fn,
        start_tokens,
        end_token,
        beam_initial_state,
        beam_width,
        output_layer=self._output_layer,
        length_penalty_weight=0.0)

    final_output, _, _ = tf.contrib.seq2seq.dynamic_decode(
        decoder,
        maximum_iterations=max_length,
        swap_memory=True,
        scope='decoder')

    return tf.one_hot(
        final_output.predicted_ids[:, :, 0],
        self._output_depth)
Beispiel #44
0
 def _repeat(x, n_repeats):
     with tf.variable_scope('_repeat'):
         rep = tf.tile(tf.expand_dims(x, 1), [1, n_repeats])
         return tf.reshape(rep, [-1])
Beispiel #45
0
        #         tf.random.categorical(
        #             tf.ones((1, 36), dtype=tf.float32),
        #             32)[0]))
        #
        # h_xz = tf.reduce_sum(tf.square(z))
        # ts_xz = tf.reduce_sum(log_det)

        # loss_xz = tf.reduce_sum(tf.square(z)) - tf.reduce_sum(log_det)
        #
        z = tf.random.normal((256, 6, 3), stddev=1)

        x_, log_det = graph_flow.f_zx(
            z,
            atoms,
            adjacency_map,
            tf.tile(walk, [256, 1]))

        bond_energy, angle_energy, one_four_energy, nonbonded_energy = gin.deterministic.mm.alkane_energy.alkane_energy(
            atoms, adjacency_map, x_)

        h_zx = tf.reduce_sum(bond_energy/(kB * T)) + tf.reduce_sum(angle_energy/(kB * T)) # + tf.reduce_sum(torsion_energy)# + tf.reduce_sum(one_four_energy)
        ts_zx = tf.reduce_sum(log_det)

        h_.append(h_zx.numpy())
        ts_.append(ts_zx.numpy())

        # # loss_zx = tf.reduce_sum(h_zx) - tf.reduce_sum(ts_zx)
        #
        # bond_energy, angle_energy, one_four_energy, nonbonded_energy = gin.deterministic.mm.alkane_energy.alkane_energy(
        #     mol[0], mol[1], x)
        #
def pointnet_sa_module_msg(xyz,
                           points,
                           npoint,
                           radius_list,
                           nsample_list,
                           mlp_list,
                           is_training,
                           bn_decay,
                           scope,
                           bn=True,
                           use_xyz=True,
                           use_nchw=False):
    ''' PointNet Set Abstraction (SA) module with Multi-Scale Grouping (MSG)
        Input:
            xyz: (batch_size, ndataset, 3) TF tensor
            points: (batch_size, ndataset, channel) TF tensor
            npoint: int32 -- #points sampled in farthest point sampling
            radius: list of float32 -- search radius in local region
            nsample: list of int32 -- how many points in each local region
            mlp: list of list of int32 -- output size for MLP on each point
            use_xyz: bool, if True concat XYZ with local point features, otherwise just use point features
            use_nchw: bool, if True, use NCHW data format for conv2d, which is usually faster than NHWC format
        Return:
            new_xyz: (batch_size, npoint, 3) TF tensor
            new_points: (batch_size, npoint, \sum_k{mlp[k][-1]}) TF tensor
    '''
    data_format = 'NCHW' if use_nchw else 'NHWC'
    with tf.variable_scope(scope) as sc:
        new_xyz = gather_point(xyz, farthest_point_sample(npoint, xyz))
        new_points_list = []
        for i in range(len(radius_list)):
            radius = radius_list[i]
            nsample = nsample_list[i]
            idx, pts_cnt = query_ball_point(radius, nsample, xyz, new_xyz)
            grouped_xyz = group_point(xyz, idx)
            grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2),
                                   [1, 1, nsample, 1])
            if points is not None:
                grouped_points = group_point(points, idx)
                if use_xyz:
                    grouped_points = tf.concat([grouped_points, grouped_xyz],
                                               axis=-1)
            else:
                grouped_points = grouped_xyz
            if use_nchw:
                grouped_points = tf.transpose(grouped_points, [0, 3, 1, 2])
            for j, num_out_channel in enumerate(mlp_list[i]):
                grouped_points = tf_util.conv2d(grouped_points,
                                                num_out_channel, [1, 1],
                                                padding='VALID',
                                                stride=[1, 1],
                                                bn=bn,
                                                is_training=is_training,
                                                scope='conv%d_%d' % (i, j),
                                                bn_decay=bn_decay)
            if use_nchw:
                grouped_points = tf.transpose(grouped_points, [0, 2, 3, 1])
            new_points = tf.reduce_max(grouped_points, axis=[2])
            new_points_list.append(new_points)
        new_points_concat = tf.concat(new_points_list, axis=-1)
        return new_xyz, new_points_concat
def _selective_crop_and_resize(features,
                               boxes,
                               box_levels,
                               boundaries,
                               output_size=7,
                               sample_offset=0.5,
                               use_einsum_gather=False):
    """Crop and resize boxes on a set of feature maps.

  Given multiple features maps indexed by different levels, and a set of boxes
  where each box is mapped to a certain level, it selectively crops and resizes
  boxes from the corresponding feature maps to generate the box features.

  We follow the ROIAlign technique (see https://arxiv.org/pdf/1703.06870.pdf,
  figure 3 for reference). Specifically, for each feature map, we select an
  (output_size, output_size) set of pixels corresponding to the box location,
  and then use bilinear interpolation to select the feature value for each
  pixel.

  For performance, we perform the gather and interpolation on all layers as a
  single operation. In this op the multi-level features are first stacked and
  gathered into [2*output_size, 2*output_size] feature points. Then bilinear
  interpolation is performed on the gathered feature points to generate
  [output_size, output_size] RoIAlign feature map.

  Here is the step-by-step algorithm:
    1. The multi-level features are gathered into a
       [batch_size, num_boxes, output_size*2, output_size*2, num_filters]
       Tensor. The Tensor contains four neighboring feature points for each
       vertex in the output grid.
    2. Compute the interpolation kernel of shape
       [batch_size, num_boxes, output_size*2, output_size*2]. The last 2 axis
       can be seen as stacking 2x2 interpolation kernels for all vertices in the
       output grid.
    3. Element-wise multiply the gathered features and interpolation kernel.
       Then apply 2x2 average pooling to reduce spatial dimension to
       output_size.

  Args:
    features: a 5-D tensor of shape [batch_size, num_levels, max_height,
      max_width, num_filters] where cropping and resizing are based.
    boxes: a 3-D tensor of shape [batch_size, num_boxes, 4] encoding the
      information of each box w.r.t. the corresponding feature map.
      boxes[:, :, 0:2] are the grid position in (y, x) (float) of the top-left
      corner of each box. boxes[:, :, 2:4] are the box sizes in (h, w) (float)
        in terms of the number of pixels of the corresponding feature map size.
    box_levels: a 3-D tensor of shape [batch_size, num_boxes, 1] representing
      the 0-based corresponding feature level index of each box.
    boundaries: a 3-D tensor of shape [batch_size, num_boxes, 2] representing
      the boundary (in (y, x)) of the corresponding feature map for each box.
      Any resampled grid points that go beyond the bounary will be clipped.
    output_size: a scalar indicating the output crop size.
    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
      from grid point.
    use_einsum_gather: use einsum to replace gather or not. Replacing einsum
      with gather can improve performance when feature size is not large, einsum
      is friendly with model partition as well. Gather's performance is better
      when feature size is very large and there are multiple box levels.

  Returns:
    features_per_box: a 5-D tensor of shape
      [batch_size, num_boxes, output_size, output_size, num_filters]
      representing the cropped features.
  """
    (batch_size, num_levels, max_feature_height, max_feature_width,
     num_filters) = features.get_shape().as_list()
    if batch_size is None:
        batch_size = tf.shape(features)[0]
    _, num_boxes, _ = boxes.get_shape().as_list()

    kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = _compute_grid_positions(
        boxes, boundaries, output_size, sample_offset)
    x_indices = tf.cast(tf.reshape(box_gridx0x1,
                                   [batch_size, num_boxes, output_size * 2]),
                        dtype=tf.int32)
    y_indices = tf.cast(tf.reshape(box_gridy0y1,
                                   [batch_size, num_boxes, output_size * 2]),
                        dtype=tf.int32)

    if use_einsum_gather:
        # Blinear interpolation is done during the last two gathers:
        #        f(y, x) = [hy, ly] * [[f00, f01], * [hx, lx]^T
        #                              [f10, f11]]
        #        [[f00, f01],
        #         [f10, f11]] = tf.einsum(tf.einsum(features, y_one_hot), x_one_hot)
        #       where [hy, ly] and [hx, lx] are the bilinear interpolation kernel.
        y_indices = tf.cast(tf.reshape(
            box_gridy0y1, [batch_size, num_boxes, output_size, 2]),
                            dtype=tf.int32)
        x_indices = tf.cast(tf.reshape(
            box_gridx0x1, [batch_size, num_boxes, output_size, 2]),
                            dtype=tf.int32)

        # shape is [batch_size, num_boxes, output_size, 2, height]
        grid_y_one_hot = tf.one_hot(tf.cast(y_indices, tf.int32),
                                    max_feature_height,
                                    dtype=kernel_y.dtype)
        # shape is [batch_size, num_boxes, output_size, 2, width]
        grid_x_one_hot = tf.one_hot(tf.cast(x_indices, tf.int32),
                                    max_feature_width,
                                    dtype=kernel_x.dtype)

        # shape is [batch_size, num_boxes, output_size, height]
        grid_y_weight = tf.reduce_sum(tf.multiply(grid_y_one_hot, kernel_y),
                                      axis=-2)
        # shape is [batch_size, num_boxes, output_size, width]
        grid_x_weight = tf.reduce_sum(tf.multiply(grid_x_one_hot, kernel_x),
                                      axis=-2)

        # Gather for y_axis.
        # shape is [batch_size, num_boxes, output_size, width, features]
        features_per_box = tf.einsum('bmhwf,bmoh->bmowf', features,
                                     tf.cast(grid_y_weight, features.dtype))
        # Gather for x_axis.
        # shape is [batch_size, num_boxes, output_size, output_size, features]
        features_per_box = tf.einsum('bmhwf,bmow->bmhof', features_per_box,
                                     tf.cast(grid_x_weight, features.dtype))
    else:
        height_dim_offset = max_feature_width
        level_dim_offset = max_feature_height * height_dim_offset
        batch_dim_offset = num_levels * level_dim_offset

        batch_size_offset = tf.tile(
            tf.reshape(
                tf.range(batch_size) * batch_dim_offset,
                [batch_size, 1, 1, 1]),
            [1, num_boxes, output_size * 2, output_size * 2])
        box_levels_offset = tf.tile(
            tf.reshape(box_levels * level_dim_offset,
                       [batch_size, num_boxes, 1, 1]),
            [1, 1, output_size * 2, output_size * 2])
        y_indices_offset = tf.tile(
            tf.reshape(y_indices * height_dim_offset,
                       [batch_size, num_boxes, output_size * 2, 1]),
            [1, 1, 1, output_size * 2])
        x_indices_offset = tf.tile(
            tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
            [1, 1, output_size * 2, 1])

        indices = tf.reshape(
            batch_size_offset + box_levels_offset + y_indices_offset +
            x_indices_offset, [-1])

        features = tf.reshape(features, [-1, num_filters])
        # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar
        # performance.
        features_per_box = tf.reshape(tf.gather(features, indices), [
            batch_size, num_boxes, output_size * 2, output_size * 2,
            num_filters
        ])
        features_per_box = _feature_bilinear_interpolation(
            features_per_box, kernel_y, kernel_x)

    return features_per_box
Beispiel #48
0
    def _build_graph(self):
        dim_u = self.config['ds'].dim_u
        dim_x = self.config['dim_x']
        dim_y = self.config['ds'].dim_y
        ind_pnt_num = self.config['ind_pnt_num']
        samples = self.config['samples']
        loss_factors = self.config['loss_factors']

        with self.graph.as_default():

            # Variables
            self.zeta_pos = tf.Variable(
                np.random.uniform(low=-self.config['zeta_pos'],
                                  high=self.config['zeta_pos'],
                                  size=(ind_pnt_num, dim_x + dim_u)))
            self.zeta_mean = tf.Variable(self.config['zeta_mean'] *
                                         np.random.rand(ind_pnt_num, dim_x))
            zeta_var_unc = tf.Variable(
                backward(self.config['zeta_var'] * np.ones(
                    (ind_pnt_num, dim_x))))
            self.zeta_var = forward(zeta_var_unc)
            var_x_unc = tf.Variable(backward(self.config['var_x']))
            self.var_x = forward(var_x_unc)
            var_y_unc = tf.Variable(backward(self.config['var_y']))
            self.var_y = forward(var_y_unc)
            self.kern = RBF(self.config['gp_var'], self.config['gp_len'])
            self.var_dict = {
                'process noise': self.var_x,
                'observation noise': self.var_y,
                'kernel lengthscales': self.kern.lengthscales,
                'kernel variance': self.kern.variance,
                'IP pos': self.zeta_pos,
                'IP mean': self.zeta_mean,
                'IP var': self.zeta_var
            }

            # Loop init
            x_array = tf.TensorArray(dtype=tf.float64,
                                     size=self.seq_len_tf,
                                     clear_after_read=False)
            x_0 = self._recog_model(self.sample_in, self.sample_out)
            x_array = x_array.write(0, x_0)

            u_array = tf.TensorArray(dtype=tf.float64,
                                     size=self.seq_len_tf,
                                     clear_after_read=False)
            u_dub = tf.transpose(self.sample_in, perm=[1, 0, 2])
            u_dub = tf.tile(tf.expand_dims(u_dub, axis=2), [1, 1, samples, 1])
            u_array = u_array.unstack(u_dub)

            # Loop
            u_final, x_final, t_final = tf.while_loop(
                lambda u, x, t: t < self.seq_len_tf - 1,
                self._loop_body, [u_array, x_array, 0],
                parallel_iterations=1)

            x_final = tf.transpose(x_final.stack(), perm=[1, 0, 2, 3])
            self.y_final = x_final[:, :, :, :dim_y]

            # Likelihood
            var_y_exp = tf.expand_dims(
                tf.expand_dims(tf.expand_dims(self.var_y, 0), 0), 0)
            var_full = tf.tile(var_y_exp,
                               [self.batch_tf, self.seq_len_tf, samples, 1])
            y_dist = tf.contrib.distributions.MultivariateNormalDiag(
                loc=self.y_final, scale_diag=tf.sqrt(var_full))
            obs = tf.tile(tf.expand_dims(self.sample_out, 2),
                          [1, 1, samples, 1])
            log_probs = y_dist.log_prob(obs)
            loglik = tf.reduce_sum(log_probs)

            # KL-Regularizer
            k_prior = self.kern.K(self.zeta_pos, self.zeta_pos)
            scale_prior = tf.tile(tf.expand_dims(tf.cholesky(k_prior), 0),
                                  [dim_x, 1, 1])
            zeta_prior = tf.contrib.distributions.MultivariateNormalTriL(
                loc=tf.zeros((dim_x, ind_pnt_num), dtype=tf.float64),
                scale_tril=scale_prior)
            zeta_dist = tf.contrib.distributions.MultivariateNormalDiag(
                loc=tf.transpose(self.zeta_mean),
                scale_diag=tf.sqrt(tf.transpose(self.zeta_var)))
            kl_reg = tf.reduce_sum(
                tf.contrib.distributions.kl_divergence(zeta_dist, zeta_prior))

            # Statistics
            self.pred_mean, self.pred_var = tf.nn.moments(self.y_final,
                                                          axes=[2])
            self.pred_var = tf.add(self.pred_var, self.var_y)
            self.internal_mean, self.internal_var = tf.nn.moments(x_final,
                                                                  axes=[2])
            self.mse = tf.losses.mean_squared_error(labels=self.sample_out,
                                                    predictions=self.pred_mean)
            self.sde = tf.abs(self.pred_mean - self.sample_out) / tf.sqrt(
                self.pred_var)

            # Training
            elbo = loglik * loss_factors[0] - kl_reg
            self.loss = tf.negative(elbo)
            optimizer = tf.train.AdamOptimizer(
                learning_rate=self.config['learning_rate'])
            self.train = optimizer.minimize(self.loss)
            self.saver = tf.train.Saver()
            self.init = tf.global_variables_initializer()
Beispiel #49
0
def compute_eval_loss_and_metrics(logits,              # type: tf.Tensor
                                  softmax_logits,      # type: tf.Tensor
                                  duplicate_mask,      # type: tf.Tensor
                                  num_training_neg,    # type: int
                                  match_mlperf=False,  # type: bool
                                  use_tpu_spec=False   # type: bool
                                 ):
  # type: (...) -> tf.estimator.EstimatorSpec
  """Model evaluation with HR and NDCG metrics.

  The evaluation protocol is to rank the test interacted item (truth items)
  among the randomly chosen 999 items that are not interacted by the user.
  The performance of the ranked list is judged by Hit Ratio (HR) and Normalized
  Discounted Cumulative Gain (NDCG).

  For evaluation, the ranked list is truncated at 10 for both metrics. As such,
  the HR intuitively measures whether the test item is present on the top-10
  list, and the NDCG accounts for the position of the hit by assigning higher
  scores to hits at top ranks. Both metrics are calculated for each test user,
  and the average scores are reported.

  If `match_mlperf` is True, then the HR and NDCG computations are done in a
  slightly unusual way to match the MLPerf reference implementation.
  Specifically, if the evaluation negatives contain duplicate items, it will be
  treated as if the item only appeared once. Effectively, for duplicate items in
  a row, the predicted score for all but one of the items will be set to
  -infinity

  For example, suppose we have that following inputs:
  logits_by_user:     [[ 2,  3,  3],
                       [ 5,  4,  4]]

  items_by_user:     [[10, 20, 20],
                      [30, 40, 40]]

  # Note: items_by_user is not explicitly present. Instead the relevant \
          information is contained within `duplicate_mask`

  top_k: 2

  Then with match_mlperf=True, the HR would be 2/2 = 1.0. With
  match_mlperf=False, the HR would be 1/2 = 0.5. This is because each user has
  predicted scores for only 2 unique items: 10 and 20 for the first user, and 30
  and 40 for the second. Therefore, with match_mlperf=True, it's guaranteed the
  first item's score is in the top 2. With match_mlperf=False, this function
  would compute the first user's first item is not in the top 2, because item 20
  has a higher score, and item 20 occurs twice.

  Args:
    logits: A tensor containing the predicted logits for each user. The shape
      of logits is (num_users_per_batch * (1 + NUM_EVAL_NEGATIVES),) Logits
      for a user are grouped, and the first element of the group is the true
      element.

    softmax_logits: The same tensor, but with zeros left-appended.

    duplicate_mask: A vector with the same shape as logits, with a value of 1
      if the item corresponding to the logit at that position has already
      appeared for that user.

    num_training_neg: The number of negatives per positive during training.

    match_mlperf: Use the MLPerf reference convention for computing rank.

    use_tpu_spec: Should a TPUEstimatorSpec be returned instead of an
      EstimatorSpec. Required for TPUs and if XLA is done on a GPU. Despite its
      name, TPUEstimatorSpecs work with GPUs

  Returns:
    An EstimatorSpec for evaluation.
  """
  in_top_k, ndcg, metric_weights, logits_by_user = compute_top_k_and_ndcg(
      logits, duplicate_mask, match_mlperf)

  # Examples are provided by the eval Dataset in a structured format, so eval
  # labels can be reconstructed on the fly.
  eval_labels = tf.reshape(tf.one_hot(
      tf.zeros(shape=(logits_by_user.shape[0],), dtype=tf.int32),
      logits_by_user.shape[1], dtype=tf.int32), (-1,))

  eval_labels_float = tf.cast(eval_labels, tf.float32)

  # During evaluation, the ratio of negatives to positives is much higher
  # than during training. (Typically 999 to 1 vs. 4 to 1) By adjusting the
  # weights for the negative examples we compute a loss which is consistent with
  # the training data. (And provides apples-to-apples comparison)
  negative_scale_factor = num_training_neg / rconst.NUM_EVAL_NEGATIVES
  example_weights = (
      (eval_labels_float + (1 - eval_labels_float) * negative_scale_factor) *
      (1 + rconst.NUM_EVAL_NEGATIVES) / (1 + num_training_neg))

  # Tile metric weights back to logit dimensions
  expanded_metric_weights = tf.reshape(tf.tile(
      metric_weights[:, tf.newaxis], (1, rconst.NUM_EVAL_NEGATIVES + 1)), (-1,))

  # ignore padded examples
  example_weights *= tf.cast(expanded_metric_weights, tf.float32)

  cross_entropy = tf.losses.sparse_softmax_cross_entropy(
      logits=softmax_logits, labels=eval_labels, weights=example_weights)

  def metric_fn(top_k_tensor, ndcg_tensor, weight_tensor):
    return {
        rconst.HR_KEY: tf.metrics.mean(top_k_tensor, weights=weight_tensor),
        rconst.NDCG_KEY: tf.metrics.mean(ndcg_tensor, weights=weight_tensor),
    }

  if use_tpu_spec:
    return tf.contrib.tpu.TPUEstimatorSpec(
        mode=tf.estimator.ModeKeys.EVAL, loss=cross_entropy,
        eval_metrics=(metric_fn, [in_top_k, ndcg, metric_weights]))

  return tf.estimator.EstimatorSpec(
      mode=tf.estimator.ModeKeys.EVAL,
      loss=cross_entropy,
      eval_metric_ops=metric_fn(in_top_k, ndcg, metric_weights)
  )
def crop_mask_in_target_box(masks,
                            boxes,
                            target_boxes,
                            output_size,
                            sample_offset=0,
                            use_einsum=True):
    """Crop masks in target boxes.

  Args:
    masks: A tensor with a shape of [batch_size, num_masks, height, width].
    boxes: a float tensor representing box cooridnates that tightly enclose
      masks with a shape of [batch_size, num_masks, 4] in un-normalized
      coordinates. A box is represented by [ymin, xmin, ymax, xmax].
    target_boxes: a float tensor representing target box cooridnates for masks
      with a shape of [batch_size, num_masks, 4] in un-normalized coordinates. A
      box is represented by [ymin, xmin, ymax, xmax].
    output_size: A scalar to indicate the output crop size. It currently only
      supports to output a square shape outputs.
    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
      from grid point.
    use_einsum: Use einsum to replace gather in selective_crop_and_resize.

  Returns:
    A 4-D tensor representing feature crop of shape
    [batch_size, num_boxes, output_size, output_size].
  """
    with tf.name_scope('crop_mask_in_target_box'):
        # Cast to float32, as the y_transform and other transform variables may
        # overflow in float16
        masks = tf.cast(masks, tf.float32)
        boxes = tf.cast(boxes, tf.float32)
        target_boxes = tf.cast(target_boxes, tf.float32)

        batch_size, num_masks, height, width = masks.get_shape().as_list()
        if batch_size is None:
            batch_size = tf.shape(masks)[0]
        masks = tf.reshape(masks, [batch_size * num_masks, height, width, 1])
        # Pad zeros on the boundary of masks.
        masks = tf.image.pad_to_bounding_box(masks, 2, 2, height + 4,
                                             width + 4)
        masks = tf.reshape(masks,
                           [batch_size, num_masks, height + 4, width + 4, 1])

        # Projects target box locations and sizes to corresponding cropped
        # mask coordinates.
        gt_y_min, gt_x_min, gt_y_max, gt_x_max = tf.split(value=boxes,
                                                          num_or_size_splits=4,
                                                          axis=2)
        bb_y_min, bb_x_min, bb_y_max, bb_x_max = tf.split(value=target_boxes,
                                                          num_or_size_splits=4,
                                                          axis=2)
        y_transform = (bb_y_min - gt_y_min) * height / (gt_y_max - gt_y_min +
                                                        _EPSILON) + 2
        x_transform = (bb_x_min - gt_x_min) * height / (gt_x_max - gt_x_min +
                                                        _EPSILON) + 2
        h_transform = (bb_y_max - bb_y_min) * width / (gt_y_max - gt_y_min +
                                                       _EPSILON)
        w_transform = (bb_x_max - bb_x_min) * width / (gt_x_max - gt_x_min +
                                                       _EPSILON)

        boundaries = tf.concat([
            tf.ones_like(y_transform) * ((height + 4) - 1),
            tf.ones_like(x_transform) * ((width + 4) - 1)
        ],
                               axis=-1)
        boundaries = tf.cast(boundaries, dtype=y_transform.dtype)

        # Reshape tensors to have the right shape for selective_crop_and_resize.
        trasnformed_boxes = tf.concat(
            [y_transform, x_transform, h_transform, w_transform], -1)
        levels = tf.tile(tf.reshape(tf.range(num_masks), [1, num_masks]),
                         [batch_size, 1])

        cropped_masks = _selective_crop_and_resize(
            masks,
            trasnformed_boxes,
            levels,
            boundaries,
            output_size,
            sample_offset=sample_offset,
            use_einsum_gather=use_einsum)
        cropped_masks = tf.squeeze(cropped_masks, axis=-1)

    return cropped_masks
Beispiel #51
0
def discriminative_loss_single(prediction, correct_label, feature_dim,
                               label_shape, delta_v, delta_d, param_var,
                               param_dist, param_reg):
    ''' Discriminative loss for a single prediction/label pair.
  :param prediction: inference of network
  :param correct_label: instance label
  :feature_dim: feature dimension of prediction
  :param label_shape: shape of label
  :param delta_v: cutoff variance distance
  :param delta_d: curoff cluster distance
  :param param_var: weight for intra cluster variance
  :param param_dist: weight for inter cluster distances
  :param param_reg: weight regularization
  '''

    ### Reshape so pixels are aligned along a vector
    correct_label = tf.reshape(correct_label,
                               [label_shape[1] * label_shape[0]])
    reshaped_pred = tf.reshape(prediction,
                               [label_shape[1] * label_shape[0], feature_dim])

    ### Count instances
    unique_labels, unique_id, counts = tf.unique_with_counts(correct_label)
    counts = tf.cast(counts, tf.float32)
    num_instances = tf.size(unique_labels)

    segmented_sum = tf.math.unsorted_segment_sum(reshaped_pred, unique_id,
                                                 num_instances)

    mu = tf.divide(segmented_sum, tf.reshape(counts, (-1, 1)))
    mu_expand = tf.gather(mu, unique_id)

    ### Calculate l_var
    distance = tf.norm(tf.subtract(mu_expand, reshaped_pred), axis=1)
    distance = tf.subtract(distance, delta_v)
    distance = tf.clip_by_value(distance, 0., distance)
    distance = tf.square(distance)

    l_var = tf.math.unsorted_segment_sum(distance, unique_id, num_instances)
    l_var = tf.divide(l_var, counts)
    l_var = tf.reduce_sum(l_var)
    l_var = tf.divide(l_var, tf.cast(num_instances, tf.float32))

    ### Calculate l_dist

    # Get distance for each pair of clusters like this:
    #   mu_1 - mu_1
    #   mu_2 - mu_1
    #   mu_3 - mu_1
    #   mu_1 - mu_2
    #   mu_2 - mu_2
    #   mu_3 - mu_2
    #   mu_1 - mu_3
    #   mu_2 - mu_3
    #   mu_3 - mu_3

    mu_interleaved_rep = tf.tile(mu, [num_instances, 1])
    mu_band_rep = tf.tile(mu, [1, num_instances])
    mu_band_rep = tf.reshape(mu_band_rep,
                             (num_instances * num_instances, feature_dim))

    mu_diff = tf.subtract(mu_band_rep, mu_interleaved_rep)

    # Filter out zeros from same cluster subtraction
    intermediate_tensor = tf.reduce_sum(tf.abs(mu_diff), axis=1)
    zero_vector = tf.zeros(1, dtype=tf.float32)
    bool_mask = tf.not_equal(intermediate_tensor, zero_vector)
    mu_diff_bool = tf.boolean_mask(mu_diff, bool_mask)

    mu_norm = tf.norm(mu_diff_bool, axis=1)
    mu_norm = tf.subtract(2. * delta_d, mu_norm)
    mu_norm = tf.clip_by_value(mu_norm, 0., mu_norm)
    mu_norm = tf.square(mu_norm)

    l_dist = tf.reduce_mean(mu_norm)

    ### Calculate l_reg
    l_reg = tf.reduce_mean(tf.norm(mu, axis=1))

    param_scale = 1.
    l_var = param_var * l_var
    l_dist = param_dist * l_dist
    l_reg = param_reg * l_reg

    loss = param_scale * (l_var + l_dist + l_reg)

    return loss, l_var, l_dist, l_reg
def multilevel_crop_and_resize(features,
                               boxes,
                               output_size=7,
                               sample_offset=0.5):
    """Crop and resize on multilevel feature pyramid.

  Generate the (output_size, output_size) set of pixels for each input box
  by first locating the box into the correct feature level, and then cropping
  and resizing it using the correspoding feature map of that level.

  Args:
    features: A dictionary with key as pyramid level and value as features. The
      features are in shape of [batch_size, height_l, width_l, num_filters].
    boxes: A 3-D Tensor of shape [batch_size, num_boxes, 4]. Each row represents
      a box with [y1, x1, y2, x2] in un-normalized coordinates.
    output_size: A scalar to indicate the output crop size.
    sample_offset: a float number in [0, 1] indicates the subpixel sample offset
      from grid point.

  Returns:
    A 5-D tensor representing feature crop of shape
    [batch_size, num_boxes, output_size, output_size, num_filters].
  """

    with tf.name_scope('multilevel_crop_and_resize'):
        levels = list(features.keys())
        min_level = int(min(levels))
        max_level = int(max(levels))
        features_shape = tf.shape(features[str(min_level)])
        batch_size, max_feature_height, max_feature_width, num_filters = (
            features_shape[0], features_shape[1], features_shape[2],
            features_shape[3])

        num_boxes = tf.shape(boxes)[1]

        # Stack feature pyramid into a features_all of shape
        # [batch_size, levels, height, width, num_filters].
        features_all = []
        feature_heights = []
        feature_widths = []
        for level in range(min_level, max_level + 1):
            shape = features[str(level)].get_shape().as_list()
            feature_heights.append(shape[1])
            feature_widths.append(shape[2])
            # Concat tensor of [batch_size, height_l * width_l, num_filters] for each
            # levels.
            features_all.append(
                tf.reshape(features[str(level)],
                           [batch_size, -1, num_filters]))
        features_r2 = tf.reshape(tf.concat(features_all, 1), [-1, num_filters])

        # Calculate height_l * width_l for each level.
        level_dim_sizes = [
            feature_widths[i] * feature_heights[i]
            for i in range(len(feature_widths))
        ]
        # level_dim_offsets is accumulated sum of level_dim_size.
        level_dim_offsets = [0]
        for i in range(len(feature_widths) - 1):
            level_dim_offsets.append(level_dim_offsets[i] + level_dim_sizes[i])
        batch_dim_size = level_dim_offsets[-1] + level_dim_sizes[-1]
        level_dim_offsets = tf.constant(level_dim_offsets, tf.int32)
        height_dim_sizes = tf.constant(feature_widths, tf.int32)

        # Assigns boxes to the right level.
        box_width = boxes[:, :, 3] - boxes[:, :, 1]
        box_height = boxes[:, :, 2] - boxes[:, :, 0]
        areas_sqrt = tf.sqrt(
            tf.cast(box_height, tf.float32) * tf.cast(box_width, tf.float32))

        levels = tf.cast(tf.math.floordiv(
            tf.math.log(tf.math.divide_no_nan(areas_sqrt, 224.0)),
            tf.math.log(2.0)) + 4.0,
                         dtype=tf.int32)
        # Maps levels between [min_level, max_level].
        levels = tf.minimum(max_level, tf.maximum(levels, min_level))

        # Projects box location and sizes to corresponding feature levels.
        scale_to_level = tf.cast(tf.pow(tf.constant(2.0),
                                        tf.cast(levels, tf.float32)),
                                 dtype=boxes.dtype)
        boxes /= tf.expand_dims(scale_to_level, axis=2)
        box_width /= scale_to_level
        box_height /= scale_to_level
        boxes = tf.concat([
            boxes[:, :, 0:2],
            tf.expand_dims(box_height, -1),
            tf.expand_dims(box_width, -1)
        ],
                          axis=-1)

        # Maps levels to [0, max_level-min_level].
        levels -= min_level
        level_strides = tf.pow([[2.0]], tf.cast(levels, tf.float32))
        boundary = tf.cast(
            tf.concat([
                tf.expand_dims([[tf.cast(max_feature_height, tf.float32)]] /
                               level_strides - 1,
                               axis=-1),
                tf.expand_dims([[tf.cast(max_feature_width, tf.float32)]] /
                               level_strides - 1,
                               axis=-1),
            ],
                      axis=-1), boxes.dtype)

        # Compute grid positions.
        kernel_y, kernel_x, box_gridy0y1, box_gridx0x1 = _compute_grid_positions(
            boxes, boundary, output_size, sample_offset)

        x_indices = tf.cast(tf.reshape(
            box_gridx0x1, [batch_size, num_boxes, output_size * 2]),
                            dtype=tf.int32)
        y_indices = tf.cast(tf.reshape(
            box_gridy0y1, [batch_size, num_boxes, output_size * 2]),
                            dtype=tf.int32)

        batch_size_offset = tf.tile(
            tf.reshape(
                tf.range(batch_size) * batch_dim_size, [batch_size, 1, 1, 1]),
            [1, num_boxes, output_size * 2, output_size * 2])
        # Get level offset for each box. Each box belongs to one level.
        levels_offset = tf.tile(
            tf.reshape(tf.gather(level_dim_offsets, levels),
                       [batch_size, num_boxes, 1, 1]),
            [1, 1, output_size * 2, output_size * 2])
        y_indices_offset = tf.tile(
            tf.reshape(
                y_indices *
                tf.expand_dims(tf.gather(height_dim_sizes, levels), -1),
                [batch_size, num_boxes, output_size * 2, 1]),
            [1, 1, 1, output_size * 2])
        x_indices_offset = tf.tile(
            tf.reshape(x_indices, [batch_size, num_boxes, 1, output_size * 2]),
            [1, 1, output_size * 2, 1])
        indices = tf.reshape(
            batch_size_offset + levels_offset + y_indices_offset +
            x_indices_offset, [-1])

        # TODO(wangtao): replace tf.gather with tf.gather_nd and try to get similar
        # performance.
        features_per_box = tf.reshape(tf.gather(features_r2, indices), [
            batch_size, num_boxes, output_size * 2, output_size * 2,
            num_filters
        ])

        # Bilinear interpolation.
        features_per_box = _feature_bilinear_interpolation(
            features_per_box, kernel_y, kernel_x)
        return features_per_box
Beispiel #53
0
    def decode(self, x, conv_inputs1, features):
        batch_size = tf.shape(x)[0]

        # initialize state tensor arrays
        state_queues = []
        for i, (conv_input, dilation) in enumerate(zip(conv_inputs1, self.dilations)):
            batch_idx = tf.range(batch_size)
            batch_idx = tf.tile(tf.expand_dims(batch_idx, 1), (1, dilation))
            batch_idx = tf.reshape(batch_idx, [-1])

            queue_begin_time = self.encode_len - dilation - 1
            temporal_idx = tf.expand_dims(queue_begin_time, 1) + tf.expand_dims(tf.range(dilation), 0)
            temporal_idx = tf.reshape(temporal_idx, [-1])

            idx = tf.stack([batch_idx, temporal_idx], axis=1)
            slices = tf.reshape(tf.gather_nd(conv_input, idx), (batch_size, dilation, shape(conv_input, 2)))

            layer_ta = tf.TensorArray(dtype=tf.float32, size=dilation + self.decode_series_len)
            layer_ta = layer_ta.unstack(tf.transpose(slices, (1, 0, 2)))
            state_queues.append(layer_ta)

        # initialize feature tensor array
        features_ta = tf.TensorArray(dtype=tf.float32, size=self.decode_series_len)
        features_ta = features_ta.unstack(tf.transpose(features, (1, 0, 2)))

        # initialize output tensor array
        emit_ta = tf.TensorArray(size=self.decode_series_len, dtype=tf.float32)

        # initialize other loop vars
        elements_finished = 0 >= self.decode_len
        time = tf.constant(0, dtype=tf.int32)

        # get initial x input
        current_idx = tf.stack([tf.range(tf.shape(self.encode_len)[0]), self.encode_len - 1], axis=1)
        initial_input = tf.gather_nd(x, current_idx)

        def loop_fn(time1, current_input, queues):

            current_features = features_ta.read(time1)
            current_input = tf.concat([current_input, current_features], axis=1)

            with tf.variable_scope('x-proj-decode', reuse=True):
                w_x_proj = tf.get_variable('weights')
                b_x_proj = tf.get_variable('biases')
                x_proj = tf.nn.tanh(tf.matmul(current_input, w_x_proj) + b_x_proj)

            skip_outputs, updated_queues = [], []
            for i, (conv_input, queue, dilation) in enumerate(zip(conv_inputs1, queues, self.dilations)):
                state = queue.read(time1)
                with tf.variable_scope('dilated-conv-decode-{}'.format(i), reuse=True):
                    w_conv = tf.get_variable('weights'.format(i))
                    b_conv = tf.get_variable('biases'.format(i))
                    dilated_conv = tf.matmul(state, w_conv[0, :, :]) + tf.matmul(x_proj, w_conv[1, :, :]) + b_conv
                conv_filter, conv_gate = tf.split(dilated_conv, 2, axis=1)
                dilated_conv = tf.nn.tanh(conv_filter) * tf.nn.sigmoid(conv_gate)

                with tf.variable_scope('dilated-conv-proj-decode-{}'.format(i), reuse=True):
                    w_proj = tf.get_variable('weights'.format(i))
                    b_proj = tf.get_variable('biases'.format(i))
                    concat_outputs = tf.matmul(dilated_conv, w_proj) + b_proj
                skips, residuals = tf.split(concat_outputs, [self.skip_channels, self.residual_channels], axis=1)

                x_proj += residuals
                skip_outputs.append(skips)
                updated_queues.append(queue.write(time1 + dilation, x_proj))

            skip_outputs = tf.nn.relu(tf.concat(skip_outputs, axis=1))
            with tf.variable_scope('dense-decode-1', reuse=True):
                w_h = tf.get_variable('weights')
                b_h = tf.get_variable('biases')
                h = tf.nn.relu(tf.matmul(skip_outputs, w_h) + b_h)

            with tf.variable_scope('dense-decode-2', reuse=True):
                w_y = tf.get_variable('weights')
                b_y = tf.get_variable('biases')
                y_hat2 = tf.matmul(h, w_y) + b_y

            elements_finished2 = (time1 >= self.decode_len)
            finished = tf.reduce_all(elements_finished2)

            next_input = tf.cond(
                finished,
                lambda: tf.zeros([batch_size, 1], dtype=tf.float32),
                lambda: y_hat2
            )
            next_elements_finished = (time1 >= self.decode_len -1)

            return next_elements_finished, next_input, updated_queues

        def condition(unused_time, elements_finished1, *_):
            return tf.logical_not(tf.reduce_all(elements_finished1))

        def body(time1, elements_finished1, emit_ta1, *state_queues1):
            (next_finished, emit_output, state_queues2) = loop_fn(time1, initial_input, state_queues1)

            emit = tf.where(elements_finished1, tf.zeros_like(emit_output), emit_output)
            emit_ta2 = emit_ta1.write(time1, emit)

            #elements_finished2 = tf.logical_or(elements_finished1, next_finished)

            return [time1 + 1, next_finished, emit_ta2] + list(state_queues2)

        returned = tf.while_loop(
            cond=condition,
            body=body,
            loop_vars=[time, elements_finished, emit_ta] + state_queues
        )

        outputs_ta = returned[2]
        y_hat = tf.transpose(outputs_ta.stack(), (1, 0, 2))

        return y_hat
Beispiel #54
0
    def __init__(self):
        self.sess = tf.Session()
        self.state_size = env_set['state']
        self.output_size = env_set['action']
        self.worker_size = env_set['worker']
        self.support_size = 8
        self.target_update_tau = 0.995
        self.gamma = 0.99
        self.hidden = env_set['hidden']
        self.batch_size = 64
        self.pi_lr = 1e-4
        self.q_lr = 1e-3
        self.action_limit = 1.0
        self.memory = replay_buffer(env_set['mem_size'])
        self.target_noise = 0.2
        self.noise_clip = 0.1
        self.alpha = 1e-5
        
        self.x_ph, self.a_ph, self.tau_ph,self.x2_ph, self.r_ph, self.d_ph = \
            cr.placeholders(self.state_size, self.output_size, self.support_size,self.state_size, None, None)

        with tf.variable_scope('main'):
            self.pi, self.logp_pi, self.q1, self.q2, self.q1_pi, self.q2_pi,  self.v = cr.dipg_sac_mlp_actor_critic(
                x=self.x_ph,
                a=self.a_ph,
                tau= self.tau_ph,
                hidden=self.hidden,
                activation=tf.nn.relu,
                output_activation=tf.tanh,
                output_size=self.output_size
            )

        with tf.variable_scope('target'):
            _, _, _, _, _, _, self.v_targ = cr.dipg_sac_mlp_actor_critic(
                x=self.x2_ph,
                a=self.a_ph,
                tau=self.tau_ph,
                hidden=self.hidden,
                activation=tf.nn.relu,
                output_activation=tf.tanh,
                output_size=self.output_size
            )

        self.pi_params = cr.get_vars('main/pi')
        self.value_params = cr.get_vars('main/q') + cr.get_vars('main/v')
        self.min_q = tf.where(tf.less(tf.reduce_mean(self.q1_pi),tf.reduce_mean(self.q2_pi)),self.q1_pi,self.q2_pi)
        self.q_backup = tf.stop_gradient(tf.tile(tf.expand_dims(self.r_ph,axis=1),[1,self.support_size])\
                    + self.gamma*tf.tile(tf.expand_dims(1-self.d_ph,axis=1),[1,self.support_size])*self.v_targ)
        self.v_backup = tf.stop_gradient(self.min_q\
                        - self.alpha*tf.tile(tf.expand_dims(self.logp_pi,axis=1),[1,self.support_size]))
        self.pi_loss = tf.reduce_mean(self.alpha * self.logp_pi - tf.reduce_mean(self.q1_pi*tf.square(self.tau_ph)))
        tau = self.tau_ph
        inv_tau = 1 - tau
        tau = tf.tile(tf.expand_dims(tau, axis=1), [1, self.support_size, 1])
        inv_tau = tf.tile(tf.expand_dims(inv_tau, axis=1), [1, self.support_size, 1])
        logit_valid_tile = tf.tile(tf.expand_dims(self.q_backup, axis=1), [1, self.support_size, 1])

        theta_loss_tile = tf.tile(tf.expand_dims(self.q1, axis=2), [1, 1, self.support_size])
        Huber_loss = tf.losses.mean_squared_error(logit_valid_tile, theta_loss_tile, reduction=tf.losses.Reduction.NONE)
        error_loss = logit_valid_tile - theta_loss_tile
        Loss = tf.where(tf.less(error_loss, 0.0), Huber_loss, tau * Huber_loss)
        self.q1_loss = 0.5*tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(Loss, axis=2), axis=1))

        theta_loss_tile = tf.tile(tf.expand_dims(self.q2, axis=2), [1, 1, self.support_size])
        Huber_loss = tf.losses.mean_squared_error(logit_valid_tile, theta_loss_tile, reduction=tf.losses.Reduction.NONE)
        error_loss = logit_valid_tile - theta_loss_tile
        Loss = tf.where(tf.less(error_loss, 0.0), Huber_loss, tau * Huber_loss)
        self.q2_loss = 0.5*tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(Loss, axis=2), axis=1))

        theta_loss_tile = tf.tile(tf.expand_dims(self.v, axis=2), [1, 1, self.support_size])
        logit_valid_tile = tf.tile(tf.expand_dims(self.v_backup, axis=1), [1, self.support_size, 1])
        Huber_loss = tf.losses.mean_squared_error(logit_valid_tile, theta_loss_tile, reduction=tf.losses.Reduction.NONE)
        error_loss = logit_valid_tile - theta_loss_tile
        Loss = tf.where(tf.less(error_loss, 0.0), Huber_loss, tau * Huber_loss)
        self.v_loss = 0.5*tf.reduce_mean(tf.reduce_sum(tf.reduce_mean(Loss, axis=2), axis=1))
        self.value_loss = self.q1_loss + self.q2_loss + self.v_loss

        self.pi_optimizer = tf.train.AdamOptimizer(self.pi_lr)
        self.train_pi_op = self.pi_optimizer.minimize(self.pi_loss, var_list=self.pi_params)

        self.value_optimizer = tf.train.AdamOptimizer(self.q_lr)
        with tf.control_dependencies([self.train_pi_op]):
            self.train_value_op = self.value_optimizer.minimize(self.value_loss, var_list=self.value_params)

        with tf.control_dependencies([self.train_value_op]):
            self.target_update = tf.group([tf.assign(v_targ, self.target_update_tau * v_targ + (1 - self.target_update_tau) * v_main)
                                           for v_main, v_targ in zip(cr.get_vars('main'), cr.get_vars('target'))])

        self.step_ops = [self.pi_loss, self.value_loss, self.train_pi_op, self.train_value_op, self.target_update]
        self.target_init = tf.group([tf.assign(v_targ, v_main)
                                    for v_main, v_targ in zip(cr.get_vars('main/v'), cr.get_vars('target/v'))])

        self.sess.run(tf.global_variables_initializer())

        self.sess.run(self.target_init)
Beispiel #55
0
def parse_obs(game_state):
    full_map = game_state['gameMap']
    rows = game_state['gameDetails']['mapHeight']
    columns = game_state['gameDetails']['mapWidth']

    player_buildings = getPlayerBuildings(full_map, rows, columns)
    opponent_buildings = getOpponentBuildings(full_map, rows, columns)
    projectiles = getProjectiles(full_map, rows, columns)

    player_info = getPlayerInfo('A', game_state)
    opponent_info = getPlayerInfo('B', game_state)

    round_num = game_state['gameDetails']['round']

    # works for jar v1.1.2
    prices = {
        "ATTACK":
        game_state['gameDetails']['buildingsStats']['ATTACK']['price'],
        "DEFENSE":
        game_state['gameDetails']['buildingsStats']['DEFENSE']['price'],
        "ENERGY":
        game_state['gameDetails']['buildingsStats']['ENERGY']['price'],
        "TESLTA":
        game_state['gameDetails']['buildingsStats']['TESLA']['price'],
    }

    with tf.name_scope("shaping_inputs") as scope:
        if debug:
            print("Shaping inputs...")
            s = Stopwatch()

        pb = tf.one_hot(indices=player_buildings,
                        depth=5,
                        axis=-1,
                        name="player_buildings")  # 20x20x5
        ob = tf.one_hot(indices=opponent_buildings,
                        depth=5,
                        axis=-1,
                        name="opp_buildings")  # 20x20x5
        proj = tf.one_hot(indices=projectiles,
                          depth=3,
                          axis=-1,
                          name='projectiles')  # 20x40x3
        k = proj.get_shape().as_list()
        proj = tf.reshape(proj, [int(k[0]), int(k[1] / 2), 6
                                 ])  # 20x20x6. Only works for single misssiles

        non_spatial = list(player_info.values())[1:] + list(
            opponent_info.values())[1:] + list(prices.values())  # 12x1
        non_spatial = tf.cast(non_spatial, dtype=tf.float32)
        # broadcasting the non-spatial features to the channel dimension
        broadcast_stats = tf.tile(
            tf.expand_dims(tf.expand_dims(non_spatial, axis=0), axis=0),
            [int(k[0]), int(k[1] / 2), 1])  # now 20x20x11

        # adding all the inputs together via the channel dimension
        spatial = tf.concat([pb, ob, proj, broadcast_stats],
                            axis=-1)  # 20x20x(16 + 12)

        if debug:
            print("Finished shaping inputs. Took " + s.delta +
                  "\nShape of inputs:" + str(spatial.shape))

        return spatial, rows, columns
Beispiel #56
0
w_1 = tf.Variable(tf.truncated_normal([785, middle], stddev=0.1))
w_2 = tf.Variable(tf.truncated_normal([middle + 1, 10], stddev=0.1))

w_old_1 = tf.Variable(tf.zeros([785, middle]))
w_old_2 = tf.Variable(tf.zeros([middle + 1, 10]))

#Conceptors for used spaces
A_0 = np.zeros([785, 785])
A_1 = np.zeros([middle + 1, middle + 1])

#Conceptors for free spaces
F_0 = tf.Variable(tf.eye(785))
F_1 = tf.Variable(tf.eye(middle + 1))

#Forward Pass, ab_i is the state vector together with bias
ab_0 = tf.concat([a_0, tf.tile(tf.ones([1, 1]), [tf.shape(a_0)[0], 1])], 1)
z_1 = tf.matmul(ab_0, w_1)
a_1 = sigma(z_1)
ab_1 = tf.concat([a_1, tf.tile(tf.ones([1, 1]), [tf.shape(a_1)[0], 1])], 1)
z_2 = tf.matmul(ab_1, w_2)
a_2 = sigma(z_2)

diff = tf.subtract(a_2, y)

#Backward Pass
reg2 = tf.Variable(0.001)
reg1 = tf.Variable(0.001)

d_z_2 = tf.multiply(diff, sigmaprime(z_2))
d_w_2 = tf.matmul(tf.transpose(tf.matmul(ab_1, F_1)), d_z_2)
Beispiel #57
0
Datei: jsa.py Projekt: yyht/JSA
x_dim = 784
eps = 1e-10
n_class = args.n_class
n_cv = args.n_cv
z_dim = n_cv * (n_class - 1)
z_concate_dim = n_cv * n_class

prior_logit0 = tf.get_variable("p_b_logit",
                               dtype=tf.float32,
                               initializer=tf.zeros([n_cv, n_class]))

x = tf.placeholder(tf.float32, [None, x_dim])
x_binary = tf.to_float(x > .5)
ntimes = tf.placeholder(tf.int32)
x_binary = tf.tile(x_binary, (ntimes, 1))

N = tf.shape(x_binary)[0]

#encoder q(z|x)
z0 = encoder(x_binary, z_dim)
z = tf.reshape(z0, [N, n_cv, n_class - 1])
zeros_logits = tf.zeros(shape=[N, n_cv, 1])
z_concate = tf.concat([zeros_logits, z], axis=2)
q_b = Categorical(logits=z_concate)

b_sample = q_b.sample()
b_sample = tf.one_hot(b_sample, depth=n_class)
b_sample_out = tf.cast(b_sample, tf.float32)
b_sample_in = tf.placeholder(tf.float32, [None, n_cv, n_class])
Beispiel #58
0
def beam_search(symbols_to_logits_fn,
                initial_ids,
                beam_size,
                decode_length,
                vocab_size,
                alpha,
                states=None,
                eos_id=EOS_ID,
                stop_early=True,
                return_states=False):
  """Beam search with length penalties.

  Requires a function that can take the currently decoded symbols and return
  the logits for the next symbol. The implementation is inspired by
  https://arxiv.org/abs/1609.08144.

  When running, the beam search steps can be visualized by using tfdbg to watch
  the operations generating the output ids for each beam step.  These operations
  have the pattern:
    (alive|finished)_topk_(seq,scores)

  Operations marked `alive` represent the new beam sequences that will be
  processed in the next step.  Operations marked `finished` represent the
  completed beam sequences, which may be padded with 0s if no beams finished.

  Operations marked `seq` store the full beam sequence for the time step.
  Operations marked `scores` store the sequence's final log scores.

  The beam search steps will be processed sequentially in order, so when
  capturing observed from these operations, tensors, clients can make
  assumptions about which step is being recorded.

  WARNING: Assumes 2nd dimension of tensors in `states` and not invariant, this
  means that the shape of the 2nd dimension of these tensors will not be
  available (i.e. set to None) inside symbols_to_logits_fn.

  Args:
    symbols_to_logits_fn: Interface to the model, to provide logits.
        Shoud take [batch_size, decoded_ids] and return [batch_size, vocab_size]
    initial_ids: Ids to start off the decoding, this will be the first thing
        handed to symbols_to_logits_fn (after expanding to beam size)
        [batch_size]
    beam_size: Size of the beam.
    decode_length: Number of steps to decode for.
    vocab_size: Size of the vocab, must equal the size of the logits returned by
        symbols_to_logits_fn
    alpha: alpha for length penalty.
    states: dict (possibly nested) of decoding states.
    eos_id: ID for end of sentence.
    stop_early: a boolean - stop once best sequence is provably determined.
    return_states: a boolean - return the update states dictionary.
  Returns:
    Tuple of
    (decoded beams [batch_size, beam_size, decode_length]
     decoding probabilities [batch_size, beam_size]) and the decoding
    states if `return_states` is True.
  """
  batch_size = _shape_list(initial_ids)[0]

  # Assume initial_ids are prob 1.0
  initial_log_probs = tf.constant([[0.] + [-float("inf")] * (beam_size - 1)])
  # Expand to beam_size (batch_size, beam_size)
  alive_log_probs = tf.tile(initial_log_probs, [batch_size, 1])

  # Expand each batch and state to beam_size
  alive_seq = _expand_to_beam_size(initial_ids, beam_size)
  alive_seq = tf.expand_dims(alive_seq, axis=2)  # (batch_size, beam_size, 1)
  if states:
    states = nest.map_structure(
        lambda state: _expand_to_beam_size(state, beam_size), states)
  else:
    states = {}

  # Finished will keep track of all the sequences that have finished so far
  # Finished log probs will be negative infinity in the beginning
  # finished_flags will keep track of booleans
  finished_seq = tf.zeros(_shape_list(alive_seq), tf.int32)
  # Setting the scores of the initial to negative infinity.
  finished_scores = tf.ones([batch_size, beam_size]) * -INF
  finished_flags = tf.zeros([batch_size, beam_size], tf.bool)

  def grow_finished(finished_seq, finished_scores, finished_flags, curr_seq,
                    curr_scores, curr_finished):
    """Given sequences and scores, will gather the top k=beam size sequences.

    Args:
      finished_seq: Current finished sequences.
        [batch_size, beam_size, current_decoded_length]
      finished_scores: scores for each of these sequences.
        [batch_size, beam_size]
      finished_flags: finished bools for each of these sequences.
        [batch_size, beam_size]
      curr_seq: current topk sequence that has been grown by one position.
        [batch_size, beam_size, current_decoded_length]
      curr_scores: scores for each of these sequences. [batch_size, beam_size]
      curr_finished: Finished flags for each of these sequences.
        [batch_size, beam_size]
    Returns:
      Tuple of
        (Topk sequences based on scores,
         log probs of these sequences,
         Finished flags of these sequences)
    """
    # First append a column of 0'ids to finished to make the same length with
    # finished scores
    finished_seq = tf.concat(
        [finished_seq,
         tf.zeros([batch_size, beam_size, 1], tf.int32)], axis=2)

    # Set the scores of the unfinished seq in curr_seq to large negative
    # values
    curr_scores += (1. - tf.to_float(curr_finished)) * -INF
    # concatenating the sequences and scores along beam axis
    curr_finished_seq = tf.concat([finished_seq, curr_seq], axis=1)
    curr_finished_scores = tf.concat([finished_scores, curr_scores], axis=1)
    curr_finished_flags = tf.concat([finished_flags, curr_finished], axis=1)
    return compute_topk_scores_and_seq(
        curr_finished_seq, curr_finished_scores, curr_finished_scores,
        curr_finished_flags, beam_size, batch_size, "grow_finished")

  def grow_alive(curr_seq, curr_scores, curr_log_probs, curr_finished, states):
    """Given sequences and scores, will gather the top k=beam size sequences.

    Args:
      curr_seq: current topk sequence that has been grown by one position.
        [batch_size, beam_size, i+1]
      curr_scores: scores for each of these sequences. [batch_size, beam_size]
      curr_log_probs: log probs for each of these sequences.
        [batch_size, beam_size]
      curr_finished: Finished flags for each of these sequences.
        [batch_size, beam_size]
      states: dict (possibly nested) of decoding states.
    Returns:
      Tuple of
        (Topk sequences based on scores,
         log probs of these sequences,
         Finished flags of these sequences)
    """
    # Set the scores of the finished seq in curr_seq to large negative
    # values
    curr_scores += tf.to_float(curr_finished) * -INF
    return compute_topk_scores_and_seq(curr_seq, curr_scores, curr_log_probs,
                                       curr_finished, beam_size, batch_size,
                                       "grow_alive", states)

  def grow_topk(i, alive_seq, alive_log_probs, states):
    r"""Inner beam search loop.

    This function takes the current alive sequences, and grows them to topk
    sequences where k = 2*beam. We use 2*beam because, we could have beam_size
    number of sequences that might hit <EOS> and there will be no alive
    sequences to continue. With 2*beam_size, this will not happen. This relies
    on the assumption the vocab size is > beam size. If this is true, we'll
    have at least beam_size non <EOS> extensions if we extract the next top
    2*beam words.
    Length penalty is given by = (5+len(decode)/6) ^ -\alpha. Pls refer to
    https://arxiv.org/abs/1609.08144.

    Args:
      i: loop index
      alive_seq: Topk sequences decoded so far [batch_size, beam_size, i+1]
      alive_log_probs: probabilities of these sequences. [batch_size, beam_size]
      states: dict (possibly nested) of decoding states.
    Returns:
      Tuple of
        (Topk sequences extended by the next word,
         The log probs of these sequences,
         The scores with length penalty of these sequences,
         Flags indicating which of these sequences have finished decoding,
         dict of transformed decoding states)
    """
    # Get the logits for all the possible next symbols
    flat_ids = tf.reshape(alive_seq, [batch_size * beam_size, -1])

    # (batch_size * beam_size, decoded_length)
    if states:
      flat_states = nest.map_structure(_merge_beam_dim, states)
      flat_logits, flat_states = symbols_to_logits_fn(flat_ids, i, flat_states)
      states = nest.map_structure(
          lambda t: _unmerge_beam_dim(t, batch_size, beam_size), flat_states)
    else:
      flat_logits = symbols_to_logits_fn(flat_ids)

    logits = tf.reshape(flat_logits, [batch_size, beam_size, -1])

    # Convert logits to normalized log probs
    candidate_log_probs = _log_prob_from_logits(logits)

    # Multiply the probabilities by the current probabilities of the beam.
    # (batch_size, beam_size, vocab_size) + (batch_size, beam_size, 1)
    log_probs = candidate_log_probs + tf.expand_dims(alive_log_probs, axis=2)

    length_penalty = tf.pow(((5. + tf.to_float(i + 1)) / 6.), alpha)

    curr_scores = log_probs / length_penalty
    # Flatten out (beam_size, vocab_size) probs in to a list of possibilities
    flat_curr_scores = tf.reshape(curr_scores, [-1, beam_size * vocab_size])

    topk_scores, topk_ids = tf.nn.top_k(flat_curr_scores, k=beam_size * 2)

    # Recovering the log probs because we will need to send them back
    topk_log_probs = topk_scores * length_penalty

    # Work out what beam the top probs are in.
    topk_beam_index = topk_ids // vocab_size
    topk_ids %= vocab_size  # Unflatten the ids

    # The next three steps are to create coordinates for tf.gather_nd to pull
    # out the correct sequences from id's that we need to grow.
    # We will also use the coordinates to gather the booleans of the beam items
    # that survived.
    batch_pos = compute_batch_indices(batch_size, beam_size * 2)

    # top beams will give us the actual coordinates to do the gather.
    # stacking will create a tensor of dimension batch * beam * 2, where the
    # last dimension contains the i,j gathering coordinates.
    topk_coordinates = tf.stack([batch_pos, topk_beam_index], axis=2)

    # Gather up the most probable 2*beams both for the ids and finished_in_alive
    # bools
    topk_seq = tf.gather_nd(alive_seq, topk_coordinates)
    if states:
      states = nest.map_structure(
          lambda state: tf.gather_nd(state, topk_coordinates), states)

    # Append the most probable alive
    topk_seq = tf.concat([topk_seq, tf.expand_dims(topk_ids, axis=2)], axis=2)

    topk_finished = tf.equal(topk_ids, eos_id)

    return topk_seq, topk_log_probs, topk_scores, topk_finished, states

  def inner_loop(i, alive_seq, alive_log_probs, finished_seq, finished_scores,
                 finished_flags, states):
    """Inner beam search loop.

    There are three groups of tensors, alive, finished, and topk.
    The alive group contains information about the current alive sequences
    The topk group contains information about alive + topk current decoded words
    the finished group contains information about finished sentences, that is,
    the ones that have decoded to <EOS>. These are what we return.
    The general beam search algorithm is as follows:
    While we haven't terminated (pls look at termination condition)
      1. Grow the current alive to get beam*2 topk sequences
      2. Among the topk, keep the top beam_size ones that haven't reached EOS
      into alive
      3. Among the topk, keep the top beam_size ones have reached EOS into
      finished
    Repeat
    To make things simple with using fixed size tensors, we will end
    up inserting unfinished sequences into finished in the beginning. To stop
    that we add -ve INF to the score of the unfinished sequence so that when a
    true finished sequence does appear, it will have a higher score than all the
    unfinished ones.

    Args:
      i: loop index
      alive_seq: Topk sequences decoded so far [batch_size, beam_size, i+1]
      alive_log_probs: probabilities of the beams. [batch_size, beam_size]
      finished_seq: Current finished sequences.
        [batch_size, beam_size, i+1]
      finished_scores: scores for each of these sequences.
        [batch_size, beam_size]
      finished_flags: finished bools for each of these sequences.
        [batch_size, beam_size]
      states: dict (possibly nested) of decoding states.

    Returns:
      Tuple of
        (Incremented loop index
         New alive sequences,
         Log probs of the alive sequences,
         New finished sequences,
         Scores of the new finished sequences,
         Flags indicating which sequence in finished as reached EOS,
         dict of final decoding states)
    """

    # Each inner loop, we carry out three steps:
    # 1. Get the current topk items.
    # 2. Extract the ones that have finished and haven't finished
    # 3. Recompute the contents of finished based on scores.
    topk_seq, topk_log_probs, topk_scores, topk_finished, states = grow_topk(
        i, alive_seq, alive_log_probs, states)
    alive_seq, alive_log_probs, _, states = grow_alive(
        topk_seq, topk_scores, topk_log_probs, topk_finished, states)
    finished_seq, finished_scores, finished_flags, _ = grow_finished(
        finished_seq, finished_scores, finished_flags, topk_seq, topk_scores,
        topk_finished)

    return (i + 1, alive_seq, alive_log_probs, finished_seq, finished_scores,
            finished_flags, states)

  def _is_finished(i, unused_alive_seq, alive_log_probs, unused_finished_seq,
                   finished_scores, finished_in_finished, unused_states):
    """Checking termination condition.

    We terminate when we decoded up to decode_length or the lowest scoring item
    in finished has a greater score that the highest prob item in alive divided
    by the max length penalty

    Args:
      i: loop index
      alive_log_probs: probabilities of the beams. [batch_size, beam_size]
      finished_scores: scores for each of these sequences.
        [batch_size, beam_size]
      finished_in_finished: finished bools for each of these sequences.
        [batch_size, beam_size]

    Returns:
      Bool.
    """
    if not stop_early:
      return tf.less(i, decode_length)
    max_length_penalty = tf.pow(((5. + tf.to_float(decode_length)) / 6.), alpha)
    # The best possible score of the most likely alive sequence.
    lower_bound_alive_scores = alive_log_probs[:, 0] / max_length_penalty

    # Now to compute the lowest score of a finished sequence in finished
    # If the sequence isn't finished, we multiply it's score by 0. since
    # scores are all -ve, taking the min will give us the score of the lowest
    # finished item.
    lowest_score_of_finished_in_finished = tf.reduce_min(
        finished_scores * tf.to_float(finished_in_finished), axis=1)
    # If none of the sequences have finished, then the min will be 0 and
    # we have to replace it by -ve INF if it is. The score of any seq in alive
    # will be much higher than -ve INF and the termination condition will not
    # be met.
    lowest_score_of_finished_in_finished += (
        (1. - tf.to_float(tf.reduce_any(finished_in_finished, 1))) * -INF)

    bound_is_met = tf.reduce_all(
        tf.greater(lowest_score_of_finished_in_finished,
                   lower_bound_alive_scores))

    return tf.logical_and(
        tf.less(i, decode_length), tf.logical_not(bound_is_met))

  (_, alive_seq, alive_log_probs, finished_seq, finished_scores,
   finished_flags, states) = tf.while_loop(
       _is_finished,
       inner_loop, [
           tf.constant(0), alive_seq, alive_log_probs, finished_seq,
           finished_scores, finished_flags, states
       ],
       shape_invariants=[
           tf.TensorShape([]),
           tf.TensorShape([None, None, None]),
           alive_log_probs.get_shape(),
           tf.TensorShape([None, None, None]),
           finished_scores.get_shape(),
           finished_flags.get_shape(),
           nest.map_structure(get_state_shape_invariants, states),
       ],
       parallel_iterations=1,
       back_prop=False)

  alive_seq.set_shape((None, beam_size, None))
  finished_seq.set_shape((None, beam_size, None))

  # Accounting for corner case: It's possible that no sequence in alive for a
  # particular batch item ever reached EOS. In that case, we should just copy
  # the contents of alive for that batch item. tf.reduce_any(finished_flags, 1)
  # if 0, means that no sequence for that batch index had reached EOS. We need
  # to do the same for the scores as well.
  finished_seq = tf.where(
      tf.reduce_any(finished_flags, 1), finished_seq, alive_seq)
  finished_scores = tf.where(
      tf.reduce_any(finished_flags, 1), finished_scores, alive_log_probs)
  if return_states:
    return finished_seq, finished_scores, states
  return finished_seq, finished_scores
def rcnn_proposals(proposals,
                   bbox_pred,
                   cls_prob,
                   im_shape,
                   num_classes,
                   min_prob_threshold=0.0,
                   class_max_detections=100):
    """
    Args:
        proposals: Tensor with the RPN proposals bounding boxes.
            Shape (num_proposals, 4). Where num_proposals is less than
            POST_NMS_TOP_N (We don't know exactly beforehand)
        bbox_pred: Tensor with the RCNN delta predictions for each proposal
            for each class. Shape (num_proposals, 4 * num_classes)
        cls_prob: A softmax probability for each proposal where the idx = 0
            is the background class (which we should ignore).
            Shape (num_proposals, num_classes + 1)

    Returns:
        objects:
            Shape (final_num_proposals, 4)
            Where final_num_proposals is unknown before-hand (it depends on
            NMS). The 4-length Tensor for each corresponds to:
            (x_min, y_min, x_max, y_max).
        objects_label:
            Shape (final_num_proposals,)
        objects_label_prob:
            Shape (final_num_proposals,)

    """
    selected_boxes = []
    selected_probs = []
    selected_labels = []

    TARGET_VARIANCES = np.array([0.1, 0.1, 0.2, 0.2])

    # For each class, take the proposals with the class-specific
    # predictions (class scores and bbox regression) and filter accordingly
    # (valid area, min probability score and NMS).
    for class_id in range(num_classes):
        # Apply the class-specific transformations to the proposals to
        # obtain the current class' prediction.
        class_prob = cls_prob[:, class_id + 1]  # 0 is background class.
        class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)]
        raw_class_objects = decode(
            proposals,
            class_bboxes * TARGET_VARIANCES,
        )

        # Clip bboxes so they don't go out of the image.
        class_objects = clip_boxes(raw_class_objects, im_shape)

        # Filter objects based on the min probability threshold and on them
        # having a valid area.
        prob_filter = tf.greater_equal(class_prob, min_prob_threshold)

        (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1)
        area_filter = tf.greater(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0)

        object_filter = tf.logical_and(area_filter, prob_filter)

        class_objects = tf.boolean_mask(class_objects, object_filter)
        class_prob = tf.boolean_mask(class_prob, object_filter)

        # We have to use the TensorFlow's bounding box convention to use
        # the included function for NMS.
        class_objects_tf = change_order(class_objects)

        # Apply class NMS.
        class_selected_idx = tf.image.non_max_suppression(
            class_objects_tf,
            class_prob,
            class_max_detections,
            iou_threshold=CLASS_NMS_THRESHOLD)

        # Using NMS resulting indices, gather values from Tensors.
        class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
        class_prob = tf.gather(class_prob, class_selected_idx)

        # Revert to our bbox convention.
        class_objects = change_order(class_objects_tf)

        # We append values to a regular list which will later be
        # transformed to a proper Tensor.
        selected_boxes.append(class_objects)
        selected_probs.append(class_prob)
        # In the case of the class_id, since it is a loop on classes, we
        # already have a fixed class_id. We use `tf.tile` to create that
        # Tensor with the total number of indices returned by the NMS.
        selected_labels.append(
            tf.tile([class_id], [tf.shape(class_selected_idx)[0]]))

    # We use concat (axis=0) to generate a Tensor where the rows are
    # stacked on top of each other
    objects = tf.concat(selected_boxes, axis=0)
    proposal_label = tf.concat(selected_labels, axis=0)
    proposal_label_prob = tf.concat(selected_probs, axis=0)

    # Get top-k detections of all classes.
    k = tf.minimum(TOTAL_MAX_DETECTIONS, tf.shape(proposal_label_prob)[0])

    top_k = tf.nn.top_k(proposal_label_prob, k=k)
    top_k_proposal_label_prob = top_k.values
    top_k_objects = tf.gather(objects, top_k.indices)
    top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

    return (
        top_k_objects,
        top_k_proposal_label,
        top_k_proposal_label_prob,
    )
  def _remove_dilations(self):
    """
            This method removes the dilations by extracting the values from
            the input for every sliding window according to the dilations,
            strides and kernel size and generates output that can be used by
            pooling operations with strides = kernel_shape to accomplish
            dilated pooling

            Example:
              Input:     [[  0,  1,  2,  3],
                          [  4,  5,  6,  7],
                          [  8,  9, 10, 11],
                          [ 12, 13, 14, 15]]

              Kernel:    [2, 2]
              Dilations: [2, 2]
              Strides:   [1, 1]

              Will return:
                         [[  0,  2,  1,  3],
                          [  8, 10,  9, 11],
                          [  4,  6,  5,  7],
                          [ 12, 14, 13, 15]]

              After max_pool2d with kernel_shape = strides = [2, 2]
              the result is:
                         [[ 10, 11],
                          [ 14, 15]]
        """

    input_shape = tf_shape(self.input)
    in_spatial_shape = input_shape[1:self.spatial_size + 1]

    channels_count = input_shape[self.spatial_size + 1]
    # Initialize gather_ind with the range of channels
    # e.g. [0 1]
    gather_ind = tf.range(channels_count, dtype=tf.int64)
    # convert the vector to column vector
    # in the following logic we use column vectors
    gather_ind = tf.expand_dims(gather_ind, 1)

    # initilize the output_shape with zeros
    # self.output_shape will contain the shape of the
    # output tensor after the loop below is executed
    self.output_shape = [0] * (self.spatial_size + 2)
    self.output_shape[0] = input_shape[0]
    """
            Loop over the input spatial dimensions starting from the
            last (most internal) going up to the first dimension

            On every step of the loop calculate the output indices and
            map them to the input indices using `_calc_input_ind`,
            then "combine" with the already calculated indices from the
            previous dimensions using cartesian product.

            For the following example input:

              Input:     [[  0,  1,  2,  3],
                          [  4,  5,  6,  7],
                          [  8,  9, 10, 11],
                          [ 12, 13, 14, 15]]

              Kernel:    [2, 2]
              Dilations: [2, 2]
              Strides:   [1, 1]

            these are the steps that will be executed:

            1. Initilize gather_ind = [[0]]     # we have only 1 channel

            2. Loop step 0 (axis 1):
                  filter_size = 3
                  output_size = 4
                  dim_ind = [[0]
                             [2]
                             [1]
                             [3]]

                  gather_ind = [[0 0]
                                [2 0]
                                [1 0]
                                [3 0]]

            3. Loop step 1 (axis 0):
                  filter_size = 3
                  output_size = 4
                  dim_ind = [[0]
                             [2]
                             [1]
                             [3]]

                  gather_ind = [[0 0 0]
                                [0 2 0]
                                [0 1 0]
                                [0 3 0]
                                [2 0 0]
                                [2 2 0]
                                [2 1 0]
                                [2 3 0]
                                [1 0 0]
                                [1 2 0]
                                [1 1 0]
                                [1 3 0]
                                [3 0 0]
                                [3 2 0]
                                [3 1 0]
                                [3 3 0]]

            These are the indices used for gather_nd operation to collect
            the values from the input data.
        """

    for dim in range(self.spatial_size - 1, -1, -1):
      filter_size = (self.kernel_shape[dim] - 1) * \
                     self.dilations[dim] + 1
      output_size = ((
          (in_spatial_shape[dim] - filter_size) // self.strides[dim]) + 1
                    ) * self.kernel_shape[dim]
      self.output_shape[dim + 1] = output_size

      # initialize the output dimension index with the range of the
      # dimension output size (e.g. 4): [0, 1, 2, 3]
      dim_ind = tf.range(output_size)

      # calculate the matching indices in the input data
      # [0, 1, 2, 3] will calculate to [0, 2, 1, 3]
      # from the above example
      dim_ind = self._calc_input_ind(dim_ind, self.kernel_shape[dim],
                                     self.dilations[dim], self.strides[dim])
      # convert to column vector
      dim_ind = tf.expand_dims(dim_ind, 1)

      # "combine" current dimension indices with the previous dimensions
      # using cartesian product
      gather_ind = tf_product(dim_ind, gather_ind)

    # The result from the above loop for 2D data will be:
    # [[y1, x1, c], [y2, x2, c], ..., [yn, xm, c]] where n is the height,
    # m is the width and c is the channel number.

    # set the channels count in the output_shape
    self.output_shape[self.spatial_size + 1] = channels_count

    # expand the dimensions to match the input dimensions + 1
    for x in range(self.spatial_size):
      gather_ind = tf.expand_dims(gather_ind, 0)
    # dublicate the indices for every batch
    gather_ind = tf.tile(gather_ind,
                         [input_shape[0]] + [1] * (self.spatial_size + 1))

    # extract the selected values from the input
    output = tf.gather_nd(self.input, gather_ind, batch_dims=1)
    # reshape the output to the correct shape calculated earlier
    output = tf.reshape(output, self.output_shape)

    return output