def fztloss( f, pVecs, nVecs ): """ Tensorized cost function from Fast Zero-Shot Learning paper Args: f: The output from the network, a tensor of shape (# images, word embedding size) pVecs: The vector embeddings of the ground truth tags, a tensor of shape (# images, # positive tags, word embedding size) nVecs: The vector embeddings of negatively sampled tags, a tensor of shape (# images, # negative samples, word embedding size) Returns: Scalar tensor representing the batch cost """ posmul = tf.mul(pVecs, f) negmul = tf.mul(nVecs, f) tfpos = tf.reduce_sum(posmul, reduction_indices=2) tfneg = tf.reduce_sum(negmul, reduction_indices=2) tfpos = tf.transpose(tfpos, [1,0]) tfneg = tf.transpose(tfneg, [1,0]) negexpan = tf.tile( tf.expand_dims(tfneg, -1), [1, 1, tf.shape(tfpos)[1]] ) posexpan = tf.tile( tf.transpose(tf.expand_dims(tfpos, -1), [0,2,1]), [1, tf.shape(tfneg)[1], 1]) differences = tf.sub(negexpan, posexpan) return tf.reduce_sum(tf.reduce_sum(tf.log(1 + tf.exp(differences)), reduction_indices=[1,2]))
def bidiag_matmul(matrix, alpha, beta, adjoint_b=False, name="bidiag_matmul"): """Multiplies a matrix by a bidiagonal matrix. alpha and beta are length k vectors representing the diagonal and first lower subdiagonal of (K+1) x K matrix B. If adjoint_b is False, computes A * B as follows: A * B = A[:, :-1] * diag(alpha) + A[:, 1:] * diag(beta) If adjoint_b is True, computes A * B[:-1, :]' as follows A * B[:-1, :]' = A * diag(alpha) + [zeros(m,1), A[:, :-1] * diag(beta[:-1])] Args: matrix: A rank-2 `Tensor` representing matrix A. alpha: A rank-1 `Tensor` representing the diagonal of B. beta: A rank-1 `Tensor` representing the lower subdiagonal diagonal of B. adjoint_b: `bool` determining what to compute. name: A name scope for the operation. Returns: If `adjoint_b` is False the `A * B` is returned. If `adjoint_b` is True the `A * B'` is returned. """ with tf.name_scope(name): alpha = tf.expand_dims(alpha, 0) if adjoint_b is False: beta = tf.expand_dims(beta, 0) return matrix[:, :-1] * alpha + matrix[:, 1:] * beta else: beta = tf.expand_dims(beta[:-1], 0) shape = tf.shape(matrix) zero_column = tf.expand_dims(tf.zeros(shape[:1], dtype=matrix.dtype), 1) return matrix * alpha + tf.concat(1, [zero_column, matrix[:, :-1] * beta])
def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None): """Overlay bounding box list on image. Currently this visualization plots a 1 pixel thick red bounding box on top of the image. Note that tf.image.draw_bounding_boxes essentially is 1 indexed. Args: image: an image tensor with shape [height, width, 3] boxlist: a BoxList normalized: (boolean) specify whether corners are to be interpreted as absolute coordinates in image space or normalized with respect to the image size. scope: name scope. Returns: image_and_boxes: an image tensor with shape [height, width, 3] """ with tf.name_scope(scope, 'VisualizeBoxesInImage'): if not normalized: height, width, _ = tf.unstack(tf.shape(image)) boxlist = scale(boxlist, 1.0 / tf.cast(height, tf.float32), 1.0 / tf.cast(width, tf.float32)) corners = tf.expand_dims(boxlist.get(), 0) image = tf.expand_dims(image, 0) return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0])
def copy_net_logit_function(state): state = tf.nn.dropout(state, self.dropout_placeholder) # the logits for generating the next word are computed in # the standard way generate_logits = tf.matmul(state, decoding_w) + decoding_b # Equation 8 in the paper ... in shape of source sentence # (batch x time) copy_logits_in_time = tf.reduce_sum( projected_inputs * tf.expand_dims(state, 1), [2]) # mask out the padding in exponential domain copy_logits_in_time_exp_masked = tf.exp( tf.minimum([[80.0]], copy_logits_in_time)) * copy_mask # ... in shape of vocabulary (batch x time x vocabulary) copy_logits_in_vocabulary = tf.expand_dims( copy_logits_in_time_exp_masked, 2) * vocabulary_shaped_indices # Equation 6 without normalization copy_logits_exp = tf.reduce_sum(copy_logits_in_vocabulary, [1]) logits_exp = copy_logits_exp \ + tf.exp(tf.minimum([[80.0]], generate_logits)) return (tf.log(tf.maximum([[1e-40]], logits_exp)), copy_logits_in_time)
def encode_coordinates_alt(self, net): """An alternative implemenation for the encoding coordinates. Args: net: a tensor of shape=[batch_size, height, width, num_features] Returns: a list of tensors with encoded image coordinates in them. """ batch_size, h, w, _ = net.shape.as_list() h_loc = [ tf.tile( tf.reshape( tf.contrib.layers.one_hot_encoding( tf.constant([i]), num_classes=h), [h, 1]), [1, w]) for i in xrange(h) ] h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2) w_loc = [ tf.tile( tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w), [h, 1]) for i in xrange(w) ] w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2) loc = tf.concat([h_loc, w_loc], 2) loc = tf.tile(tf.expand_dims(loc, 0), [batch_size, 1, 1, 1]) return tf.concat([net, loc], 3)
def __call__(self, x, z_grads): """Build the graph for the per-example gradient through the op. Assumes that the MatMul was called with a design matrix with examples in rows as the first argument and parameters as the second argument. Args: x: The Tensor to differentiate with respect to. This tensor must represent the weights. z_grads: The list of gradients on the output of the op. Returns: x_grads: A Tensor containing the gradient with respect to `x` for each example. This is a 3-D tensor, with the first axis corresponding to examples and the remaining axes matching the shape of x. """ idx = list(self.op.inputs).index(x) assert idx != -1 assert len(z_grads) == len(self.op.outputs) assert idx == 1 # We expect weights to be arg 1 # We don't expect anyone to per-example differentiate with repsect # to anything other than the weights. x, w = self.op.inputs z_grads, = z_grads x_expanded = tf.expand_dims(x, 2) z_grads_expanded = tf.expand_dims(z_grads, 1) return tf.mul(x_expanded, z_grads_expanded)
def bond_conv_layer(activated_atoms, bv_params, layer): flow_depth = flow_layer_depths[layer] next_activated_atoms = tf.zeros(tf.pack([N_atoms_ph, flow_depth])) for deg in range(1, 6): indices = tf.sub(deg_list_ph, tf.constant(1,dtype=tf.int32)) flow_param = bv_params['A_flow'+str(layer)+'_'+str(deg)] flow_map = tf.gather(flow_param, type_adj_ph) multiples = tf.pack([N_atoms_ph, 1, 1]) activated_atoms_dim = tf.expand_dims(tf.tile(tf.expand_dims(activated_atoms, 0), multiples), 2) adj_mul = tf.batch_matmul(activated_atoms_dim, flow_map) adj_mul = tf.squeeze(adj_mul, [2]) deg_mask = tf.to_float(tf.equal(deg_list_ph, deg)) multiples = tf.pack([1, N_atoms_ph, flow_depth]) deg_list_dim = tf.tile(tf.expand_dims(tf.expand_dims(deg_mask, 1), 1), multiples) multiples = tf.pack([N_atoms_ph, N_atoms_ph, 1]) biases = tf.tile(bv_params['b_flow'+str(layer)+'_'+str(deg)], multiples) filtered_atoms = tf.add(tf.mul(adj_mul, deg_list_dim), biases) next_activated_atoms = next_activated_atoms + tf.reduce_sum(filtered_atoms, 1) next_activated_atoms = tf.nn.relu(next_activated_atoms) return next_activated_atoms
def loss(logits, labels): """Calculates the loss from the logits and the labels. Args: logits: Logits tensor, float - [batch_size, NUM_CLASSES]. labels: Labels tensor, int32 - [batch_size]. Returns: loss: Loss tensor of type float. """ # Convert from sparse integer labels in the range [0, NUM_CLASSES) # to 1-hot dense float vectors (that is we will have batch_size vectors, # each with NUM_CLASSES values, all of which are 0.0 except there will # be a 1.0 in the entry corresponding to the label). batch_size = tf.size(labels) labels = tf.expand_dims(labels, 1) indices = tf.expand_dims(tf.range(0, batch_size), 1) concated = tf.concat(1, [indices, labels]) onehot_labels = tf.sparse_to_dense( concated, tf.pack([batch_size, NUM_CLASSES]), 1.0, 0.0) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, onehot_labels, name='xentropy') loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') return loss
def call(self, x): """Execute this layer on input tensors. Parameters ---------- x: list of Tensor should be [atom_features(batch_size*max_n_atoms*n_embedding), distance_matrix(batch_size*max_n_atoms*max_n_atoms*n_distance), distance_matrix_mask(batch_size*max_n_atoms*max_n_atoms)] Returns ------- tf.Tensor new embeddings for atoms, same shape as x[0] """ self.build() atom_features = x[0] distance_matrix = x[1] distance_matrix_mask = x[2] outputs = tf.multiply( (tf.tensordot(distance_matrix, self.W_df, [[3], [0]]) + self.b_df), tf.expand_dims( tf.tensordot(atom_features, self.W_cf, [[2], [0]]) + self.b_cf, axis=1)) # for atom i in a molecule m, this step multiplies together distance info of atom pair(i,j) # and embeddings of atom j(both gone through a hidden layer) outputs = tf.tensordot(outputs, self.W_fc, [[3], [0]]) outputs = tf.multiply(outputs, tf.expand_dims(distance_matrix_mask, axis=3)) # masking the outputs tensor for pair(i,i) and all paddings outputs = self.activation(outputs) outputs = tf.reduce_sum(outputs, axis=2) + atom_features # for atom i, sum the influence from all other atom j in the molecule return outputs
def _define_distance_to_clusters(self, data): """Defines the Mahalanobis distance to the assigned Gaussian.""" # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input - # mean) from log probability function. self._all_scores = [] for shard in data: all_scores = [] shard = tf.expand_dims(shard, 0) for c in xrange(self._num_classes): if self._covariance_type == FULL_COVARIANCE: cov = self._covs[c, :, :] elif self._covariance_type == DIAG_COVARIANCE: cov = tf.diag(self._covs[c, :]) inverse = tf.matrix_inverse(cov + self._min_var) inv_cov = tf.tile( tf.expand_dims(inverse, 0), tf.pack([self._num_examples, 1, 1])) diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2]) m_left = tf.batch_matmul(diff, inv_cov) all_scores.append(tf.sqrt(tf.batch_matmul( m_left, tf.transpose(diff, perm=[0, 2, 1]) ))) self._all_scores.append(tf.reshape( tf.concat(1, all_scores), tf.pack([self._num_examples, self._num_classes]))) # Distance to the associated class. self._all_scores = tf.concat(0, self._all_scores) assignments = tf.concat(0, self.assignments()) rows = tf.to_int64(tf.range(0, self._num_examples)) indices = tf.concat(1, [tf.expand_dims(rows, 1), tf.expand_dims(assignments, 1)]) self._scores = tf.gather_nd(self._all_scores, indices)
def dot(x, y): """Compute dot product between a Tensor matrix and a Tensor vector. If x is a ``[M x N]`` matrix, then y is a ``M``-vector. If x is a ``M``-vector, then y is a ``[M x N]`` matrix. Parameters ---------- x : tf.Tensor ``M x N`` matrix or ``M`` vector (see above) y : tf.Tensor ``M`` vector or ``M x N`` matrix (see above) Returns ------- tf.Tensor ``N``-vector """ if len(x.get_shape()) == 1: vec = x mat = y return tf.matmul(tf.expand_dims(vec, 0), mat) else: mat = x vec = y return tf.matmul(mat, tf.expand_dims(vec, 1))
def __init__(self, memory_cells, query, project_query=False): """Define Attention. Args: memory_cells (SequenceBatch): a SequenceBatch containing a Tensor of shape (batch_size, num_cells, cell_dim) query (Tensor): a tensor of shape (batch_size, query_dim). project_query (bool): defaults to False. If True, the query goes through an extra projection layer to coerce it to cell_dim. """ cell_dim = memory_cells.values.get_shape().as_list()[2] if project_query: # project the query up/down to cell_dim self._projection_layer = Dense(cell_dim, activation='linear') query = self._projection_layer(query) # (batch_size, cand_dim) memory_values, memory_mask = memory_cells.values, memory_cells.mask # batch matrix multiply to compute logit scores for all choices in all batches query = tf.expand_dims(query, 2) # (batch_size, cell_dim, 1) logit_values = tf.batch_matmul(memory_values, query) # (batch_size, num_cells, 1) logit_values = tf.squeeze(logit_values, [2]) # (batch_size, num_cells) # set all pad logits to negative infinity logits = SequenceBatch(logit_values, memory_mask) logits = logits.with_pad_value(-float('inf')) # normalize to get probs probs = tf.nn.softmax(logits.values) # (batch_size, num_cells) retrieved = tf.batch_matmul(tf.expand_dims(probs, 1), memory_values) # (batch_size, 1, cell_dim) retrieved = tf.squeeze(retrieved, [1]) # (batch_size, cell_dim) self._logits = logits.values self._probs = probs self._retrieved = retrieved
def softmax(x): """ Compute the softmax function in tensorflow. You might find the tensorflow functions tf.exp, tf.reduce_max, tf.reduce_sum, tf.expand_dims useful. (Many solutions are possible, so you may not need to use all of these functions). Recall also that many common tensorflow operations are sugared (e.g. x * y does a tensor multiplication if x and y are both tensors). Make sure to implement the numerical stability fixes as in the previous homework! Args: x: tf.Tensor with shape (n_samples, n_features). Note feature vectors are represented by row-vectors. (For simplicity, no need to handle 1-d input as in the previous homework) Returns: out: tf.Tensor with shape (n_sample, n_features). You need to construct this tensor in this problem. """ ### YOUR CODE HERE maxes = tf.expand_dims(tf.reduce_max(x, reduction_indices=[1]), 1) stable = x - maxes e = tf.exp(stable) sums = tf.expand_dims(tf.reduce_sum(e, reduction_indices=[1]), 1) out = tf.div(e, sums) ### END YOUR CODE return out
def _build_predict(self, Xnew, full_cov=False): """ Compute the mean and variance of the latent function at some new points Xnew. For a derivation of the terms in here, see the associated SGPR notebook. """ num_inducing = len(self.feature) err = self.Y - self.mean_function(self.X) Kuf = self.feature.Kuf(self.kern, self.X) Kuu = self.feature.Kuu(self.kern, jitter=settings.numerics.jitter_level) Kus = self.feature.Kuf(self.kern, Xnew) sigma = tf.sqrt(self.likelihood.variance) L = tf.cholesky(Kuu) A = tf.matrix_triangular_solve(L, Kuf, lower=True) / sigma B = tf.matmul(A, A, transpose_b=True) + tf.eye(num_inducing, dtype=settings.float_type) LB = tf.cholesky(B) Aerr = tf.matmul(A, err) c = tf.matrix_triangular_solve(LB, Aerr, lower=True) / sigma tmp1 = tf.matrix_triangular_solve(L, Kus, lower=True) tmp2 = tf.matrix_triangular_solve(LB, tmp1, lower=True) mean = tf.matmul(tmp2, c, transpose_a=True) if full_cov: var = self.kern.K(Xnew) + tf.matmul(tmp2, tmp2, transpose_a=True) \ - tf.matmul(tmp1, tmp1, transpose_a=True) shape = tf.stack([1, 1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 2), shape) else: var = self.kern.Kdiag(Xnew) + tf.reduce_sum(tf.square(tmp2), 0) \ - tf.reduce_sum(tf.square(tmp1), 0) shape = tf.stack([1, tf.shape(self.Y)[1]]) var = tf.tile(tf.expand_dims(var, 1), shape) return mean + self.mean_function(Xnew), var
def _mean_image_subtraction(image, means): """Subtracts the given means from each image channel. For example: means = [123.68, 116.779, 103.939] image = _mean_image_subtraction(image, means) Note that the rank of `image` must be known. Args: image: a tensor of size [height, width, C]. means: a C-vector of values to subtract from each channel. Returns: the centered image. Raises: ValueError: If the rank of `image` is unknown, if `image` has a rank other than three or if the number of channels in `image` doesn't match the number of values in `means`. """ if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') num_channels = image.get_shape().as_list()[-1] if len(means) != num_channels: raise ValueError('len(means) must match the number of channels') # We have a 1-D tensor of means; convert to 3-D. means = tf.expand_dims(tf.expand_dims(means, 0), 0) return image - means
def ValidArcAndTokenMasks(lengths, max_length, dtype=tf.float32): r"""Returns 0/1 masks for valid arcs and tokens. Args: lengths: [B] vector of input sequence lengths. max_length: Scalar maximum input sequence length, aka M. dtype: Data type for output mask. Returns: [B,M,M] tensor A with 0/1 indicators of valid arcs. Specifically, A_{b,t,s} = t,s < lengths[b] ? 1 : 0 [B,M] matrix T with 0/1 indicators of valid tokens. Specifically, T_{b,t} = t < lengths[b] ? 1 : 0 """ lengths_bx1 = tf.expand_dims(lengths, 1) sequence_m = tf.range(tf.cast(max_length, lengths.dtype.base_dtype)) sequence_1xm = tf.expand_dims(sequence_m, 0) # Create vectors of 0/1 indicators for valid tokens. Note that the comparison # operator will broadcast from [1,M] and [B,1] to [B,M]. valid_token_bxm = tf.cast(sequence_1xm < lengths_bx1, dtype) # Compute matrices of 0/1 indicators for valid arcs as the outer product of # the valid token indicator vector with itself. valid_arc_bxmxm = tf.matmul( tf.expand_dims(valid_token_bxm, 2), tf.expand_dims(valid_token_bxm, 1)) return valid_arc_bxmxm, valid_token_bxm
def build_psi_stats_rbf_plus_linear(Z, kern, mu, S): # TODO: make sure the acvite dimensions are overlapping completely # use only active dimensions mu, S = kern._slice(mu, S) # only use the active dimensions. Z, _ = kern._slice(Z, None) psi0_lin, psi1_lin, psi2_lin = build_psi_stats_linear(Z, kern.linear, mu, S) psi0_rbf, psi1_rbf, psi2_rbf = build_psi_stats_rbf(Z, kern.rbf, mu, S) psi0, psi1, psi2 = psi0_lin + psi0_rbf, psi1_lin + psi1_rbf, psi2_lin + psi2_rbf # extra terms for the 'interaction' of linear and rbf l2 = tf.square(kern.rbf.lengthscales) A = tf.expand_dims(1./S + 1./l2, 1) # N x 1 x Q m = (tf.expand_dims(mu/S, 1) + tf.expand_dims(Z/l2, 0)) / A # N x M x Q mTAZ = tf.reduce_sum(tf.expand_dims(m * kern.linear.variance, 1) * tf.expand_dims(tf.expand_dims(Z, 0), 0), 3) # N x M x M Z2 = tf.reduce_sum(tf.square(Z) / l2, 1) # M, mu2 = tf.reduce_sum(tf.square(mu) / S, 1) # N mAm = tf.reduce_sum(tf.square(m) * A, 2) # N x M exp_term = tf.exp(-(tf.reshape(Z2, (1, -1)) + tf.reshape(mu2, (-1, 1))-mAm) / 2.) # N x M psi2_extra = tf.reduce_sum(kern.rbf.variance * tf.expand_dims(exp_term, 2) * tf.expand_dims(tf.expand_dims(tf.reduce_prod(S, 1), 1), 2) * tf.expand_dims(tf.reduce_prod(A, 2), 1) * mTAZ, 0) psi2 = psi2 + psi2_extra + tf.transpose(psi2_extra) return psi0, psi1, psi2
def _testGraphExtensionRestore(self): test_dir = os.path.join(self.get_temp_dir(), "graph_extension") filename = os.path.join(test_dir, "metafile") saver0_ckpt = os.path.join(test_dir, "saver0.ckpt") with self.test_session(graph=tf.Graph()) as sess: # Restores from MetaGraphDef. new_saver = tf.train.import_meta_graph(filename) # Generates a new MetaGraphDef. new_saver.export_meta_graph() # Restores from checkpoint. new_saver.restore(sess, saver0_ckpt) # Addes loss and train. labels = tf.constant(0, tf.int32, shape=[100], name="labels") batch_size = tf.size(labels) labels = tf.expand_dims(labels, 1) indices = tf.expand_dims(tf.range(0, batch_size), 1) concated = tf.concat(1, [indices, labels]) onehot_labels = tf.sparse_to_dense( concated, tf.pack([batch_size, 10]), 1.0, 0.0) logits = tf.get_collection("logits")[0] cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits, onehot_labels, name="xentropy") loss = tf.reduce_mean(cross_entropy, name="xentropy_mean") tf.scalar_summary(loss.op.name, loss) # Creates the gradient descent optimizer with the given learning rate. optimizer = tf.train.GradientDescentOptimizer(0.01) # Runs train_op. train_op = optimizer.minimize(loss) sess.run(train_op)
def body(self, features): hp = self.hparams block_fns = { "residual": residual_block, "bottleneck": bottleneck_block, } assert hp.block_fn in block_fns inputs = features["inputs"] data_format = "channels_last" if hp.use_nchw: # Convert from channels_last (NHWC) to channels_first (NCHW). This # provides a large performance boost on GPU. inputs = tf.transpose(inputs, [0, 3, 1, 2]) data_format = "channels_first" out = resnet_v2( inputs, block_fns[hp.block_fn], hp.layer_sizes, data_format, is_training=hp.mode == tf.estimator.ModeKeys.TRAIN) out = tf.expand_dims(out, 1) out = tf.expand_dims(out, 1) return out
def additive_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150, scope='additive-attention', reuse=False): """ For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn, where attn(i, j) = dot(v, tanh(W*a_i + W*b_j)). v is a learnable vector and W is a learnable matrix. The rows of attn are softmax normalized. Args: a: Input sequence a. Tensor of shape [batch_size, max_seq_len, input_size]. b: Input sequence b. Tensor of shape [batch_size, max_seq_len, input_size]. a_lengths: Lengths of sequences in a. Tensor of shape [batch_size]. b_lengths: Lengths of sequences in b. Tensor of shape [batch_size]. max_seq_len: Length of padded sequences a and b. Integer. hidden_units: Number of hidden units. Integer. Returns: Attention matrix. Tensor of shape [max_seq_len, max_seq_len]. """ with tf.variable_scope(scope, reuse=reuse): aW = time_distributed_dense_layer(a, hidden_units, bias=False, scope='dense', reuse=False) bW = time_distributed_dense_layer(b, hidden_units, bias=False, scope='dense', reuse=True) aW = tf.expand_dims(aW, 2) bW = tf.expand_dims(bW, 1) v = tf.get_variable( name='dot_weights', initializer=tf.variance_scaling_initializer(), shape=[hidden_units] ) logits = tf.einsum('ijkl,l->ijk', tf.nn.tanh(aW + bW), v) logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2) attn = tf.exp(logits) attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len) return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
def multiplicative_attention(a, b, a_lengths, b_lengths, max_seq_len, hidden_units=150, scope='multiplicative-attention', reuse=False): """ For sequences a and b of lengths a_lengths and b_lengths, computes an attention matrix attn, where attn(i, j) = dot(W*a_i, W*b_j). W is a learnable matrix. The rows of attn are softmax normalized. Args: a: Input sequence a. Tensor of shape [batch_size, max_seq_len, input_size]. b: Input sequence b. Tensor of shape [batch_size, max_seq_len, input_size]. a_lengths: Lengths of sequences in a. Tensor of shape [batch_size]. b_lengths: Lengths of sequences in b. Tensor of shape [batch_size]. max_seq_len: Length of padded sequences a and b. Integer. hidden_units: Number of hidden units. Integer. Returns: Attention matrix. Tensor of shape [max_seq_len, max_seq_len]. """ with tf.variable_scope(scope, reuse=reuse): aW = time_distributed_dense_layer(a, hidden_units, bias=False, scope='dense', reuse=False) bW = time_distributed_dense_layer(b, hidden_units, bias=False, scope='dense', reuse=True) logits = tf.matmul(aW, tf.transpose(bW, (0, 2, 1))) logits = logits - tf.expand_dims(tf.reduce_max(logits, axis=2), 2) attn = tf.exp(logits) attn = mask_attention_weights(attn, a_lengths, b_lengths, max_seq_len) return attn / tf.expand_dims(tf.reduce_sum(attn, axis=2) + 1e-10, 2)
def radial_symmetry(self, d_cutoff, d, atom_numbers): """ Radial Symmetry Function """ embedding = tf.eye(np.max(self.atom_cases) + 1) atom_numbers_embedded = tf.nn.embedding_lookup(embedding, atom_numbers) Rs = np.linspace(0., self.radial_cutoff, self.radial_length) ita = np.ones_like(Rs) * 3 / (Rs[1] - Rs[0])**2 Rs = tf.cast(np.reshape(Rs, (1, 1, 1, -1)), tf.float32) ita = tf.cast(np.reshape(ita, (1, 1, 1, -1)), tf.float32) length = ita.get_shape().as_list()[-1] d_cutoff = tf.stack([d_cutoff] * length, axis=3) d = tf.stack([d] * length, axis=3) out = tf.exp(-ita * tf.square(d - Rs)) * d_cutoff if self.atomic_number_differentiated: out_tensors = [] for atom_type in self.atom_cases: selected_atoms = tf.expand_dims( tf.expand_dims(atom_numbers_embedded[:, :, atom_type], axis=1), axis=3) out_tensors.append(tf.reduce_sum(out * selected_atoms, axis=2)) return tf.concat(out_tensors, axis=2) else: return tf.reduce_sum(out, axis=2)
def reward_prediction_big( self, input_images, input_reward, action, latent, mid_outputs): """Builds a reward prediction network.""" del mid_outputs conv_size = self.tinyify([32, 32, 16, 8]) with tf.variable_scope("reward_pred", reuse=tf.AUTO_REUSE): x = tf.concat(input_images, axis=3) x = tfcl.layer_norm(x) if not self.hparams.small_mode: x = tfl.conv2d(x, conv_size[1], [3, 3], strides=(2, 2), activation=tf.nn.relu, name="reward_conv1") x = tfcl.layer_norm(x) # Inject additional inputs if action is not None: x = common_video.inject_additional_input( x, action, "action_enc", self.hparams.action_injection) if input_reward is not None: x = common_video.inject_additional_input(x, input_reward, "reward_enc") if latent is not None: latent = tfl.flatten(latent) latent = tf.expand_dims(latent, axis=1) latent = tf.expand_dims(latent, axis=1) x = common_video.inject_additional_input(x, latent, "latent_enc") x = tfl.conv2d(x, conv_size[2], [3, 3], strides=(2, 2), activation=tf.nn.relu, name="reward_conv2") x = tfcl.layer_norm(x) x = tfl.conv2d(x, conv_size[3], [3, 3], strides=(2, 2), activation=tf.nn.relu, name="reward_conv3")
def testExpandAndSqueeze(self): with self.cached_session(): # TODO(aselle): sparse_split, sparse_reduce_sum, # sparse_reduce_sum_sparse, reduce_join a = [[1, 2, 3]] self.assertAllEqual(tf.expand_dims(tf.squeeze(a, [0]), 0).eval(), a) self.assertAllEqual(tf.squeeze(tf.expand_dims(a, 1), [1]).eval(), a) self.assertAllEqual( tf.expand_dims( tf.squeeze( [[1, 2, 3]], squeeze_dims=[0]), dim=0).eval(), a) self.assertAllEqual( tf.squeeze( tf.expand_dims( [[1, 2, 3]], dim=1), squeeze_dims=[1]).eval(), a) self.assertAllEqual( tf.squeeze( tf.expand_dims( [[1, 2, 3]], dim=1), squeeze_dims=[1]).eval(), a)
def roc_auc_score(y_pred, y_true): """ ROC AUC Score. Approximates the Area Under Curve score, using approximation based on the Wilcoxon-Mann-Whitney U statistic. Yan, L., Dodier, R., Mozer, M. C., & Wolniewicz, R. (2003). Optimizing Classifier Performance via an Approximation to the Wilcoxon-Mann-Whitney Statistic. Measures overall performance for a full range of threshold levels. Arguments: y_pred: `Tensor`. Predicted values. y_true: `Tensor` . Targets (labels), a probability distribution. """ with tf.name_scope("RocAucScore"): pos = tf.boolean_mask(y_pred, tf.cast(y_true, tf.bool)) neg = tf.boolean_mask(y_pred, ~tf.cast(y_true, tf.bool)) pos = tf.expand_dims(pos, 0) neg = tf.expand_dims(neg, 1) # original paper suggests performance is robust to exact parameter choice gamma = 0.2 p = 3 difference = tf.zeros_like(pos * neg) + pos - neg - gamma masked = tf.boolean_mask(difference, difference < 0.0) return tf.reduce_sum(tf.pow(-masked, p))
def train(): image_name = tf.constant("lily.jpg", tf.string) image1 = uf.read_image(image_name, IMG_ROW, IMG_COL) image1 = tf.expand_dims(image1, 0) image2 = uf.read_image(image_name, IMG_ROW, IMG_COL) image2 = tf.expand_dims(image2, 0) image = tf.concat(0, (image1, image2)) clstm = crnn.con_lstm_cell(BATCH_SIZE, IMG_ROW, IMG_COL, 3, 3, CELL_C) input_ = tf.placeholder(tf.float32, (BATCH_SIZE, IMG_ROW, IMG_COL, 3)) inputs = [] inputs.append(input_) inputs.append(input_) outputs, state = crnn.clstm_encode(clstm, inputs) sess = tf.Session() init_op = tf.initialize_all_variables() sess.run(init_op) for i in xrange(100): image_v = sess.run(image) feed_data = dict() feed_data[inputs[0]] = image_v feed_data[inputs[1]] = image_v outputs_v = sess.run(outputs, feed_dict = feed_data) print(outputs_v)
def dna_transformation(prev_image, dna_input, dna_kernel_size, relu_shift): """Apply dynamic neural advection to previous image. Args: prev_image: previous image to be transformed. dna_input: hidden lyaer to be used for computing DNA transformation. dna_kernel_size: dna kernel size. relu_shift: shift for ReLU function. Returns: List of images transformed by the predicted CDNA kernels. """ # Construct translated images. prev_image_pad = tf.pad(prev_image, [[0, 0], [2, 2], [2, 2], [0, 0]]) image_height = int(prev_image.get_shape()[1]) image_width = int(prev_image.get_shape()[2]) inputs = [] for xkern in range(dna_kernel_size): for ykern in range(dna_kernel_size): inputs.append( tf.expand_dims( tf.slice(prev_image_pad, [0, xkern, ykern, 0], [-1, image_height, image_width, -1]), [3])) inputs = tf.concat(axis=3, values=inputs) # Normalize channels to 1. kernel = tf.nn.relu(dna_input - relu_shift) + relu_shift kernel = tf.expand_dims( kernel / tf.reduce_sum(kernel, [3], keep_dims=True), [4]) return tf.reduce_sum(kernel * inputs, [3], keep_dims=False)
def build(self): """ tensorflow computation graph for transform """ graph = tf.Graph() with graph.as_default(): self.inputs = tf.placeholder(tf.float32, shape=(None, self.max_atoms, 4)) atom_numbers = tf.cast(self.inputs[:, :, 0], tf.int32) flags = tf.sign(atom_numbers) flags = tf.cast( tf.expand_dims(flags, 1) * tf.expand_dims(flags, 2), tf.float32) coordinates = self.inputs[:, :, 1:] if self.coordinates_in_bohr: coordinates = coordinates * 0.52917721092 d = self.distance_matrix(coordinates, flags) d_radial_cutoff = self.distance_cutoff(d, self.radial_cutoff, flags) d_angular_cutoff = self.distance_cutoff(d, self.angular_cutoff, flags) radial_sym = self.radial_symmetry(d_radial_cutoff, d, atom_numbers) angular_sym = self.angular_symmetry(d_angular_cutoff, d, atom_numbers, coordinates) self.outputs = tf.concat( [ tf.cast(tf.expand_dims(atom_numbers, 2), tf.float32), radial_sym, angular_sym ], axis=2) return graph
def _att(self, context, context_encode, h): with tf.variable_scope('att') as scope: hidden_att_W = self._variable_trunc_normal('hidden_att_W', [self.dim_hidden, self.dim_ctx]) pre_att_b = self._variable_constant('pre_att_b', [self.dim_ctx]) att_W = self._variable_trunc_normal('att_W', [self.dim_ctx, 1]) att_b = self._variable_constant('att_b', [1]) # evaluate context_encode (e_ti) context_encode = context_encode + \ tf.expand_dims(tf.matmul(h, hidden_att_W), 1) + \ pre_att_b context_encode = tf.nn.tanh(context_encode) context_encode_flat = tf.reshape(context_encode, [self.batch_size*self.ctx_shape[0], self.dim_ctx]) alpha = tf.reshape( tf.matmul(context_encode_flat, att_W) + att_b, [self.batch_size, self.ctx_shape[0]]) alpha = tf.nn.softmax(alpha) weighted_context = tf.reduce_sum(context * \ tf.expand_dims(alpha, 2), 1) return weighted_context
def __init__(self, num_layers, num_units, batch_size, input_size, keep_prob=1.0): self.num_layers = num_layers self.grus = [] self.inits = [] self.dropout_mask = [] for layer in range(num_layers): input_size_ = input_size if layer == 0 else 2 * num_units gru_fw = tf.nn.rnn_cell.MultiRNNCell([ tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(num_units=num_units)]) gru_bw = tf.nn.rnn_cell.MultiRNNCell([ tf.contrib.cudnn_rnn.CudnnCompatibleGRUCell(num_units=num_units)]) init_fw = tf.Variable(tf.zeros([num_units])) init_fw = tf.expand_dims(tf.tile(tf.expand_dims(init_fw, axis=0), [batch_size, 1]), axis=0) init_bw = tf.Variable(tf.zeros([num_units])) init_bw = tf.expand_dims(tf.tile(tf.expand_dims(init_bw, axis=0), [batch_size, 1]), axis=0) mask_fw = tf.nn.dropout(tf.ones([1, batch_size, input_size_], dtype=tf.float32), keep_prob=keep_prob) mask_bw = tf.nn.dropout(tf.ones([1, batch_size, input_size_], dtype=tf.float32), keep_prob=keep_prob) self.grus.append((gru_fw, gru_bw,)) self.inits.append((init_fw, init_bw,)) self.dropout_mask.append((mask_fw, mask_bw,))
def attention_layer(from_tensor, to_tensor, attention_mask=None, num_attention_heads=1, size_per_head=512, query_act=None, key_act=None, value_act=None, attention_probs_dropout_prob=0.0, initializer_range=0.02, do_return_2d_tensor=False, batch_size=None, from_seq_length=None, to_seq_length=None): """Performs multi-headed attention from `from_tensor` to `to_tensor`. This is an implementation of multi-headed attention based on "Attention is all you Need". If `from_tensor` and `to_tensor` are the same, then this is self-attention. Each timestep in `from_tensor` attends to the corresponding sequence in `to_tensor`, and returns a fixed-with vector. This function first projects `from_tensor` into a "query" tensor and `to_tensor` into "key" and "value" tensors. These are (effectively) a list of tensors of length `num_attention_heads`, where each tensor is of shape [batch_size, seq_length, size_per_head]. Then, the query and key tensors are dot-producted and scaled. These are softmaxed to obtain attention probabilities. The value tensors are then interpolated by these probabilities, then concatenated back to a single tensor and returned. In practice, the multi-headed attention are done with transposes and reshapes rather than actual separate tensors. Args: from_tensor: float Tensor of shape [batch_size, from_seq_length, from_width]. to_tensor: float Tensor of shape [batch_size, to_seq_length, to_width]. attention_mask: (optional) int32 Tensor of shape [batch_size, from_seq_length, to_seq_length]. The values should be 1 or 0. The attention scores will effectively be set to -infinity for any positions in the mask that are 0, and will be unchanged for positions that are 1. num_attention_heads: int. Number of attention heads. size_per_head: int. Size of each attention head. query_act: (optional) Activation function for the query transform. key_act: (optional) Activation function for the key transform. value_act: (optional) Activation function for the value transform. attention_probs_dropout_prob: (optional) float. Dropout probability of the attention probabilities. initializer_range: float. Range of the weight initializer. do_return_2d_tensor: bool. If True, the output will be of shape [batch_size * from_seq_length, num_attention_heads * size_per_head]. If False, the output will be of shape [batch_size, from_seq_length, num_attention_heads * size_per_head]. batch_size: (Optional) int. If the input is 2D, this might be the batch size of the 3D version of the `from_tensor` and `to_tensor`. from_seq_length: (Optional) If the input is 2D, this might be the seq length of the 3D version of the `from_tensor`. to_seq_length: (Optional) If the input is 2D, this might be the seq length of the 3D version of the `to_tensor`. Returns: float Tensor of shape [batch_size, from_seq_length, num_attention_heads * size_per_head]. (If `do_return_2d_tensor` is true, this will be of shape [batch_size * from_seq_length, num_attention_heads * size_per_head]). Raises: ValueError: Any of the arguments or tensor shapes are invalid. """ def transpose_for_scores(input_tensor, batch_size, num_attention_heads, seq_length, width): output_tensor = tf.reshape( input_tensor, [batch_size, seq_length, num_attention_heads, width]) output_tensor = tf.transpose(output_tensor, [0, 2, 1, 3]) return output_tensor from_shape = get_shape_list(from_tensor, expected_rank=[2, 3]) to_shape = get_shape_list(to_tensor, expected_rank=[2, 3]) if len(from_shape) != len(to_shape): raise ValueError( "The rank of `from_tensor` must match the rank of `to_tensor`.") if len(from_shape) == 3: batch_size = from_shape[0] from_seq_length = from_shape[1] to_seq_length = to_shape[1] elif len(from_shape) == 2: if (batch_size is None or from_seq_length is None or to_seq_length is None): raise ValueError( "When passing in rank 2 tensors to attention_layer, the values " "for `batch_size`, `from_seq_length`, and `to_seq_length` " "must all be specified.") # Scalar dimensions referenced here: # B = batch size (number of sequences) # F = `from_tensor` sequence length # T = `to_tensor` sequence length # N = `num_attention_heads` # H = `size_per_head` from_tensor_2d = reshape_to_matrix(from_tensor) to_tensor_2d = reshape_to_matrix(to_tensor) # `query_layer` = [B*F, N*H] query_layer = tf.layers.dense( from_tensor_2d, num_attention_heads * size_per_head, activation=query_act, name="query", kernel_initializer=create_initializer(initializer_range)) # `key_layer` = [B*T, N*H] key_layer = tf.layers.dense( to_tensor_2d, num_attention_heads * size_per_head, activation=key_act, name="key", kernel_initializer=create_initializer(initializer_range)) # `value_layer` = [B*T, N*H] value_layer = tf.layers.dense( to_tensor_2d, num_attention_heads * size_per_head, activation=value_act, name="value", kernel_initializer=create_initializer(initializer_range)) # `query_layer` = [B, N, F, H] query_layer = transpose_for_scores(query_layer, batch_size, num_attention_heads, from_seq_length, size_per_head) # `key_layer` = [B, N, T, H] key_layer = transpose_for_scores(key_layer, batch_size, num_attention_heads, to_seq_length, size_per_head) # Take the dot product between "query" and "key" to get the raw # attention scores. # `attention_scores` = [B, N, F, T] attention_scores = tf.matmul(query_layer, key_layer, transpose_b=True) attention_scores = tf.multiply(attention_scores, 1.0 / math.sqrt(float(size_per_head))) if attention_mask is not None: # `attention_mask` = [B, 1, F, T] attention_mask = tf.expand_dims(attention_mask, axis=[1]) # Since attention_mask is 1.0 for positions we want to attend and 0.0 for # masked positions, this operation will create a tensor which is 0.0 for # positions we want to attend and -10000.0 for masked positions. adder = (1.0 - tf.cast(attention_mask, tf.float32)) * -10000.0 # Since we are adding it to the raw scores before the softmax, this is # effectively the same as removing these entirely. attention_scores += adder # Normalize the attention scores to probabilities. # `attention_probs` = [B, N, F, T] attention_probs = tf.nn.softmax(attention_scores) # This is actually dropping out entire tokens to attend to, which might # seem a bit unusual, but is taken from the original Transformer paper. attention_probs = dropout(attention_probs, attention_probs_dropout_prob) # `value_layer` = [B, T, N, H] value_layer = tf.reshape( value_layer, [batch_size, to_seq_length, num_attention_heads, size_per_head]) # `value_layer` = [B, N, T, H] value_layer = tf.transpose(value_layer, [0, 2, 1, 3]) # `context_layer` = [B, N, F, H] context_layer = tf.matmul(attention_probs, value_layer) # `context_layer` = [B, F, N, H] context_layer = tf.transpose(context_layer, [0, 2, 1, 3]) if do_return_2d_tensor: # `context_layer` = [B*F, N*H] context_layer = tf.reshape(context_layer, [ batch_size * from_seq_length, num_attention_heads * size_per_head ]) else: # `context_layer` = [B, F, N*H] context_layer = tf.reshape( context_layer, [batch_size, from_seq_length, num_attention_heads * size_per_head]) return context_layer
def draw_side_by_side_evaluation_image(eval_dict, category_index, max_boxes_to_draw=20, min_score_thresh=0.2, use_normalized_coordinates=True): """Creates a side-by-side image with detections and groundtruth. Bounding boxes (and instance masks, if available) are visualized on both subimages. Args: eval_dict: The evaluation dictionary returned by eval_util.result_dict_for_batched_example() or eval_util.result_dict_for_single_example(). category_index: A category index (dictionary) produced from a labelmap. max_boxes_to_draw: The maximum number of boxes to draw for detections. min_score_thresh: The minimum score threshold for showing detections. use_normalized_coordinates: Whether to assume boxes and kepoints are in normalized coordinates (as opposed to absolute coordiantes). Default is True. Returns: A list of [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to detections, while the subimage on the right corresponds to groundtruth. """ detection_fields = fields.DetectionResultFields() input_data_fields = fields.InputDataFields() images_with_detections_list = [] # Add the batch dimension if the eval_dict is for single example. if len(eval_dict[detection_fields.detection_classes].shape) == 1: for key in eval_dict: if key != input_data_fields.original_image: eval_dict[key] = tf.expand_dims(eval_dict[key], 0) for indx in range(eval_dict[input_data_fields.original_image].shape[0]): instance_masks = None if detection_fields.detection_masks in eval_dict: instance_masks = tf.cast( tf.expand_dims( eval_dict[detection_fields.detection_masks][indx], axis=0), tf.uint8) keypoints = None if detection_fields.detection_keypoints in eval_dict: keypoints = tf.expand_dims( eval_dict[detection_fields.detection_keypoints][indx], axis=0) groundtruth_instance_masks = None if input_data_fields.groundtruth_instance_masks in eval_dict: groundtruth_instance_masks = tf.cast( tf.expand_dims(eval_dict[ input_data_fields.groundtruth_instance_masks][indx], axis=0), tf.uint8) images_with_detections = draw_bounding_boxes_on_image_tensors( tf.expand_dims(eval_dict[input_data_fields.original_image][indx], axis=0), tf.expand_dims(eval_dict[detection_fields.detection_boxes][indx], axis=0), tf.expand_dims(eval_dict[detection_fields.detection_classes][indx], axis=0), tf.expand_dims(eval_dict[detection_fields.detection_scores][indx], axis=0), category_index, original_image_spatial_shape=tf.expand_dims(eval_dict[ input_data_fields.original_image_spatial_shape][indx], axis=0), true_image_shape=tf.expand_dims( eval_dict[input_data_fields.true_image_shape][indx], axis=0), instance_masks=instance_masks, keypoints=keypoints, max_boxes_to_draw=max_boxes_to_draw, min_score_thresh=min_score_thresh, use_normalized_coordinates=use_normalized_coordinates) images_with_groundtruth = draw_bounding_boxes_on_image_tensors( tf.expand_dims(eval_dict[input_data_fields.original_image][indx], axis=0), tf.expand_dims( eval_dict[input_data_fields.groundtruth_boxes][indx], axis=0), tf.expand_dims( eval_dict[input_data_fields.groundtruth_classes][indx], axis=0), tf.expand_dims(tf.ones_like( eval_dict[input_data_fields.groundtruth_classes][indx], dtype=tf.float32), axis=0), category_index, original_image_spatial_shape=tf.expand_dims(eval_dict[ input_data_fields.original_image_spatial_shape][indx], axis=0), true_image_shape=tf.expand_dims( eval_dict[input_data_fields.true_image_shape][indx], axis=0), instance_masks=groundtruth_instance_masks, keypoints=None, max_boxes_to_draw=None, min_score_thresh=0.0, use_normalized_coordinates=use_normalized_coordinates) images_with_detections_list.append( tf.concat([images_with_detections, images_with_groundtruth], axis=2)) return images_with_detections_list
def model_fn(features, labels, mode, params): """The `model_fn` for TPUEstimator.""" predictions = {} tags = set() if mode == tf.estimator.ModeKeys.TRAIN: tags.add("train") input_mask = features["input_mask"] batch_size = input_mask.shape[0] if labels is not None: label_ids = tf.cast(labels["label_ids"], tf.float32) if "embeddings" not in features: input_ids = features["input_ids"] segment_ids = features["segment_ids"] model = modeling.BertModel(config=params['bert_config'], is_training=params['trainable_bert'], input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=True) # In the demo, we are doing a simple classification task on the entire # TODO: Check is_training === trainable Bert j? # model = create_model(bert_config=params['bert_config'], # is_training=params['trainable_bert'], # num_labels=params['num_classes'], # labels=label_ids, # segment_ids=segment_ids, # input_ids=input_ids, # input_mask=input_mask, # use_one_hot_embeddings=True) # TODO: Find correct place tvars = tf.trainable_variables() initialized_variable_names = {} scaffold_fn = None if params["init_checkpoint"]: (assignment_map, initialized_variable_names ) = modeling.get_assignment_map_from_checkpoint( tvars, params["init_checkpoint"]) if use_tpu: def tpu_scaffold(): tf.train.init_from_checkpoint(params["init_checkpoint"], assignment_map) return tf.train.Scaffold() scaffold_fn = tpu_scaffold else: tf.train.init_from_checkpoint(params["init_checkpoint"], assignment_map) tf.logging.info("**** Variables - INIT FROM CKPT ****") for var in tvars: if var.name in initialized_variable_names: tf.logging.info("name: {}, shape: {}".format(var.name, var.shape)) sequence_output = model.get_sequence_output() predictions["sequence_output"] = sequence_output else: sequence_output = features["embeddings"] hidden_size = sequence_output.shape[-1].value if params["class_based_attention"]: shared_query_embedding = tf.get_variable( 'shared_query', [1, 1, params["shared_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)) shared_query_embedding = tf.broadcast_to( shared_query_embedding, [1, params["num_classes"], params["shared_size"]]) class_query_embedding = tf.get_variable( 'class_query', [1, params["num_classes"], hidden_size - params["shared_size"]], initializer=tf.truncated_normal_initializer(stddev=0.02)) query_embedding = tf.concat( [shared_query_embedding, class_query_embedding], axis=2) # Reimplement Attention layer to peek into weights. scores = tf.matmul(query_embedding, sequence_output, transpose_b=True) input_bias = tf.abs(input_mask - 1) scores -= 1.e9 * tf.expand_dims(tf.cast(input_bias, tf.float32), axis=1) distribution = tf.nn.softmax(scores) pooled_output = tf.matmul(distribution, sequence_output) else: first_token_tensor = tf.squeeze(sequence_output[:, 0:1, :], axis=1) pooled_output = tf.layers.dense(first_token_tensor, hidden_size, activation=tf.tanh) if mode == tf.estimator.ModeKeys.TRAIN: pooled_output = tf.nn.dropout(pooled_output, rate=params["dropout"]) logits = tf.layers.dense(pooled_output, params["num_classes"]) logits = tf.matrix_diag_part(logits) # probabilities = tf.nn.softmax(logits, axis=-1) # single-label case probabilities = tf.nn.sigmoid(logits) # multi-label case train_op, loss = None, None eval_metrics = None if mode != tf.estimator.ModeKeys.PREDICT: with tf.variable_scope("loss"): per_example_loss = tf.nn.sigmoid_cross_entropy_with_logits( labels=label_ids, logits=logits) loss = tf.reduce_mean(per_example_loss) if mode == tf.estimator.ModeKeys.TRAIN: train_op = optimization.create_optimizer(loss, params["learning_rate"], params["num_train_steps"], params["num_warmup_steps"], use_tpu, trainable_bert=params['trainable_bert']) elif mode == tf.estimator.ModeKeys.EVAL: def _f1_score(labels, pred): """Computes F1 score, i.e. the harmonic mean of precision and recall.""" precision = tf.metrics.precision(labels, pred) recall = tf.metrics.recall(labels, pred) return (2 * precision[0] * recall[0] / (precision[0] + recall[0] + 1e-5), tf.group(precision[1], recall[1])) def metric_fn(per_example_loss, labels, probabilities): pred = tf.where(probabilities > 0.4, tf.ones_like(probabilities), tf.zeros_like(probabilities)) return { 'absolute/false_positives': tf.metrics.false_positives(labels, pred), 'absolute/false_negatives': tf.metrics.false_negatives(labels, pred), 'absolute/true_positives': tf.metrics.true_positives(labels, pred), 'absolute/true_negatives': tf.metrics.true_negatives(labels, pred), 'absolute/total': tf.metrics.true_positives(tf.ones([batch_size]), tf.ones([batch_size])), 'metric/acc': tf.metrics.accuracy(labels, pred), 'metric/prec': tf.metrics.precision(labels, pred), 'metric/recall': tf.metrics.recall(labels, pred), 'metric/f1': _f1_score(labels, pred), } eval_metrics = (metric_fn, [per_example_loss, label_ids, probabilities]) predictions["probabilities"] = probabilities predictions["attention"] = distribution predictions["pooled_output"] = pooled_output if use_tpu: return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op, scaffold_fn=scaffold_fn, eval_metrics=eval_metrics, predictions=predictions) else: return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op, predictions=predictions)
epochs=100) # MODEL PREDICTIONS --------------------------------------- # test_generator.reset( ) # this ensures that outputs are in the correct order (need to do this every time we call predict_generator) img_model_preds = img_model.predict(test_generator, steps=step_size_test, verbose=1) i = 0 path_to_img = 'C://Users//jbolton//Documents//naughty//deep_tagger//images//' + test_data_df.iloc[ i]['filename'] img = keras.preprocessing.image.load_img(path_to_img, target_size=img_size_for_model) img_array = keras.preprocessing.image.img_to_array(img) img_array = tf.expand_dims(img_array, 0) # Create batch axis predictions = img_model.predict(img_array) # HYPERPARAMETER TUNING ----------------------------------- # def build_model(hp): inputs = keras.Input( shape=(224, 224, 3), name='image_input' ) # ResNet was trained on 224x224 TODO: validate this Joe preprocess_inputs = tf.keras.applications.resnet50.preprocess_input( inputs) # preprocess input data as expected by ResNet50 x = base_model(preprocess_inputs, training=False) # only need to use this if we use pooling='none' in ResNet50 model: #flat_x = keras.layers.Flatten( name='flatten_ResNet_output' )(x) dense1 = keras.layers.Dense(units=hp.Int("units", min_value=32,
shape=[num_nodes[-1], 1], initializer=tf.contrib.layers.xavier_initializer()) b = tf.get_variable('b', initializer=tf.random_uniform([1], -0.1, 0.1)) c, h = [], [] initial_state = [] for li in range(n_layers): c.append( tf.Variable(tf.zeros([batch_size, num_nodes[li]]), trainable=False)) h.append( tf.Variable(tf.zeros([batch_size, num_nodes[li]]), trainable=False)) initial_state.append(tf.contrib.rnn.LSTMStateTuple(c[li], h[li])) # Do several tensor transofmations, because the function dynamic_rnn requires the output to be of # a specific format. Read more at: https://www.tensorflow.org/api_docs/python/tf/nn/dynamic_rnn all_inputs = tf.concat([tf.expand_dims(t, 0) for t in train_inputs], axis=0) # all_outputs is [seq_length, batch_size, num_nodes] all_lstm_outputs, state = tf.nn.dynamic_rnn(drop_multi_cell, all_inputs, initial_state=tuple(initial_state), time_major=True, dtype=tf.float32) all_lstm_outputs = tf.reshape(all_lstm_outputs, [batch_size * num_unrollings, num_nodes[-1]]) all_outputs = tf.nn.xw_plus_b(all_lstm_outputs, w, b) split_outputs = tf.split(all_outputs, num_unrollings, axis=0)
def darkeras_loss(net_out): sprob = float(cfg.class_scale) sconf = float(cfg.object_scale) snoob = float(cfg.noobject_scale) scoor = float(cfg.coord_scale) S, B, C = cfg.cell_size, cfg.boxes_per_cell, cfg.num_classes SS = S * S # number of grid cells size1 = [None, SS, C] size2 = [None, SS, B] # return the below placeholders _probs = tf.placeholder(tf.float32, size1) _confs = tf.placeholder(tf.float32, size2) _coord = tf.placeholder(tf.float32, size2 + [4]) # weights term for L2 loss _proid = tf.placeholder(tf.float32, size1) # material calculating IOU _areas = tf.placeholder(tf.float32, size2) _upleft = tf.placeholder(tf.float32, size2 + [2]) _botright = tf.placeholder(tf.float32, size2 + [2]) placeholders = { 'probs':_probs, 'confs':_confs, 'coord':_coord, 'proid':_proid, 'areas':_areas, 'upleft':_upleft, 'botright':_botright } # Extract the coordinate prediction from net.out coords = net_out[:, SS * (C + B):] coords = tf.reshape(coords, [-1, SS, B, 4]) wh = tf.pow(coords[:,:,:,2:4], 2) * S # unit: grid cell area_pred = wh[:,:,:,0] * wh[:,:,:,1] # unit: grid cell^2 centers = coords[:,:,:,0:2] # [batch, SS, B, 2] floor = centers - (wh * .5) # [batch, SS, B, 2] ceil = centers + (wh * .5) # [batch, SS, B, 2] # calculate the intersection areas intersect_upleft = tf.maximum(floor, _upleft) intersect_botright = tf.minimum(ceil , _botright) intersect_wh = intersect_botright - intersect_upleft intersect_wh = tf.maximum(intersect_wh, 0.0) intersect = tf.multiply(intersect_wh[:,:,:,0], intersect_wh[:,:,:,1]) # calculate the best IOU, set 0.0 confidence for worse boxes iou = tf.truediv(intersect, _areas + area_pred - intersect) best_box = tf.equal(iou, tf.reduce_max(iou, [2], True)) best_box = tf.to_float(best_box) confs = tf.multiply(best_box, _confs) # take care of the weight terms conid = snoob * (1. - confs) + sconf * confs weight_coo = tf.concat(4 * [tf.expand_dims(confs, -1)], 3) cooid = scoor * weight_coo proid = sprob * _proid # flatten 'em all probs = slim.flatten(_probs) proid = slim.flatten(proid) confs = slim.flatten(confs) conid = slim.flatten(conid) coord = slim.flatten(_coord) cooid = slim.flatten(cooid) # reshape 1 dim vevtor # probs = tf.reshape(_probs, [-1]) # proid = tf.reshape(proid, [-1]) # confs = tf.reshape(confs, [-1]) # conid = tf.reshape(conid, [-1]) # coord = tf.reshape(_coord, [-1]) # cooid = tf.reshape(cooid, [-1]) true = tf.concat([probs, confs, coord], 1) wght = tf.concat([proid, conid, cooid], 1) print('Building {} loss'.format(cfg.model_name)) loss = tf.pow(net_out - true, 2) loss = tf.multiply(loss, wght) loss = tf.reduce_sum(loss, 1) return placeholders, .5 * tf.reduce_mean(loss)
def __call__(self, x, prev_state): prev_read_vector_list = prev_state.read_vector_list controller_input = tf.concat([x] + prev_read_vector_list, axis=1) with tf.compat.v1.variable_scope('controller', reuse=self.reuse): controller_output, controller_state = self.controller(controller_input, prev_state.controller_state) num_parameters_per_head = self.memory_vector_dim + 1 + 1 + (self.shift_range * 2 + 1) + 1 num_heads = self.read_head_num + self.write_head_num total_parameter_num = num_parameters_per_head * num_heads + self.memory_vector_dim * 2 * self.write_head_num with tf.compat.v1.variable_scope("o2p", reuse=(self.step > 0) or self.reuse): parameters = tf.compat.v1.layers.dense( controller_output, total_parameter_num, activation=None, kernel_initializer=self.o2p_initializer) parameters = tf.clip_by_value(parameters, -self.clip_value, self.clip_value) head_parameter_list = tf.split(parameters[:, :num_parameters_per_head * num_heads], num_heads, axis=1) erase_add_list = tf.split(parameters[:, num_parameters_per_head * num_heads:], 2 * self.write_head_num, axis=1) prev_w_list = prev_state.w_list prev_M = prev_state.M w_list = [] for i, head_parameter in enumerate(head_parameter_list): k = tf.tanh(head_parameter[:, 0:self.memory_vector_dim]) beta = tf.nn.softplus(head_parameter[:, self.memory_vector_dim]) g = tf.sigmoid(head_parameter[:, self.memory_vector_dim + 1]) s = tf.nn.softmax( head_parameter[:, self.memory_vector_dim + 2:self.memory_vector_dim + 2 + (self.shift_range * 2 + 1)] ) gamma = tf.nn.softplus(head_parameter[:, -1]) + 1 with tf.compat.v1.variable_scope('addressing_head_%d' % i): w = self.addressing(k, beta, g, s, gamma, prev_M, prev_w_list[i]) w_list.append(w) # Reading (Sec 3.1) read_w_list = w_list[:self.read_head_num] read_vector_list = [] for i in range(self.read_head_num): read_vector = tf.reduce_sum(tf.expand_dims(read_w_list[i], axis=2) * prev_M, axis=1) read_vector_list.append(read_vector) # Writing (Sec 3.2) write_w_list = w_list[self.read_head_num:] M = prev_M for i in range(self.write_head_num): w = tf.expand_dims(write_w_list[i], axis=2) erase_vector = tf.expand_dims(tf.sigmoid(erase_add_list[i * 2]), axis=1) add_vector = tf.expand_dims(tf.tanh(erase_add_list[i * 2 + 1]), axis=1) M = M * (tf.ones(M.get_shape()) - tf.matmul(w, erase_vector)) + tf.matmul(w, add_vector) if not self.output_dim: output_dim = x.get_shape()[1] else: output_dim = self.output_dim with tf.compat.v1.variable_scope("o2o", reuse=(self.step > 0) or self.reuse): NTM_output = tf.compat.v1.layers.dense( tf.concat([controller_output] + read_vector_list, axis=1), output_dim, activation=None, kernel_initializer=self.o2o_initializer) NTM_output = tf.clip_by_value(NTM_output, -self.clip_value, self.clip_value) self.step += 1 return NTM_output, NTMControllerState( controller_state=controller_state, read_vector_list=read_vector_list, w_list=w_list, M=M)
def _set_up_input_pls(self): """Sets up input placeholders by adding them to self._placeholders. Keys are defined as self.PL_*. """ # Combine config data bev_dims = np.append(self._bev_pixel_size, self._bev_depth) with tf.variable_scope('bev_input'): # Placeholder for BEV image input, to be filled in with feed_dict bev_input_placeholder = self._add_placeholder( tf.float32, bev_dims, self.PL_BEV_INPUT) self._bev_input_batches = tf.expand_dims(bev_input_placeholder, axis=0) self._bev_preprocessed = \ self._bev_feature_extractor.preprocess_input( self._bev_input_batches, self._bev_pixel_size) # Summary Images bev_summary_images = tf.split(bev_input_placeholder, self._bev_depth, axis=2) tf.summary.image("bev_maps", bev_summary_images, max_outputs=self._bev_depth) with tf.variable_scope('img_input'): # Take variable size input images img_input_placeholder = self._add_placeholder( tf.float32, [None, None, self._img_depth], self.PL_IMG_INPUT) self._img_input_batches = tf.expand_dims(img_input_placeholder, axis=0) self._img_preprocessed = \ self._img_feature_extractor.preprocess_input( self._img_input_batches, self._img_pixel_size) # Summary Image tf.summary.image("rgb_image", self._img_preprocessed, max_outputs=2) with tf.variable_scope('pl_labels'): self._add_placeholder(tf.float32, [None, 6], self.PL_LABEL_ANCHORS) self._add_placeholder(tf.float32, [None, 7], self.PL_LABEL_BOXES_3D) self._add_placeholder(tf.float32, [None], self.PL_LABEL_CLASSES) # Placeholders for anchors with tf.variable_scope('pl_anchors'): self._add_placeholder(tf.float32, [None, 6], self.PL_ANCHORS) self._add_placeholder(tf.float32, [None], self.PL_ANCHOR_IOUS) self._add_placeholder(tf.float32, [None, 6], self.PL_ANCHOR_OFFSETS) self._add_placeholder(tf.float32, [None], self.PL_ANCHOR_CLASSES) with tf.variable_scope('bev_anchor_projections'): self._add_placeholder(tf.float32, [None, 4], self.PL_BEV_ANCHORS) self._bev_anchors_norm_pl = self._add_placeholder( tf.float32, [None, 4], self.PL_BEV_ANCHORS_NORM) with tf.variable_scope('img_anchor_projections'): self._add_placeholder(tf.float32, [None, 4], self.PL_IMG_ANCHORS) self._img_anchors_norm_pl = self._add_placeholder( tf.float32, [None, 4], self.PL_IMG_ANCHORS_NORM) with tf.variable_scope('sample_info'): # the calib matrix shape is (3 x 4) self._add_placeholder(tf.float32, [3, 4], self.PL_CALIB_P2) self._add_placeholder(tf.int32, shape=[1], name=self.PL_IMG_IDX) self._add_placeholder(tf.float32, [4], self.PL_GROUND_PLANE)
def __init__(self, model_type, sequence_length, num_classes, vocab_size, embedding_size, filter_sizes, num_filters, l2_reg_lambda=0): self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") self.learing_rate = tf.placeholder(tf.float32, name="learing_rate") l2_loss = tf.constant(0.0) with tf.device('/cpu:0'), tf.name_scope("embedding"): self.W = tf.Variable(tf.random_uniform( [vocab_size, embedding_size], -1, 1), name="W", trainable=True) self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) self.embedded_chars_expanded = tf.expand_dims( self.embedded_chars, -1) pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d(self.embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(pooled_outputs, 3) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) with tf.name_scope("output"): W = tf.get_variable( "W", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") if (model_type == "clf"): self.predictions = tf.argmax(self.scores, 1, name="predictions") elif model_type == "reg": self.predictions = tf.reduce_max(self.scores, 1, name="predictions") self.predictions = tf.expand_dims(self.predictions, -1) with tf.name_scope("loss"): if model_type == "clf": losses = tf.nn.softmax_cross_entropy_with_logits( logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss if model_type == "reg": losses = tf.sqrt( tf.losses.mean_squared_error(predictions=self.predictions, labels=self.input_y)) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss with tf.name_scope("accuracy"): if model_type == "clf": correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy") elif model_type == "reg": self.accuracy = tf.constant(0.0, name="accuracy")
def build(self): # Setup input placeholders self._set_up_input_pls() # Setup feature extractors self._set_up_feature_extractors() bev_proposal_input = self.bev_bottleneck img_proposal_input = self.img_bottleneck fusion_mean_div_factor = 2.0 # If both img and bev probabilites are set to 1.0, don't do # path drop. if not (self._path_drop_probabilities[0] == self._path_drop_probabilities[1] == 1.0): with tf.variable_scope('rpn_path_drop'): random_values = tf.random_uniform(shape=[3], minval=0.0, maxval=1.0) img_mask, bev_mask = self.create_path_drop_masks( self._path_drop_probabilities[0], self._path_drop_probabilities[1], random_values) img_proposal_input = tf.multiply(img_proposal_input, img_mask) bev_proposal_input = tf.multiply(bev_proposal_input, bev_mask) self.img_path_drop_mask = img_mask self.bev_path_drop_mask = bev_mask # Overwrite the division factor fusion_mean_div_factor = img_mask + bev_mask with tf.variable_scope('proposal_roi_pooling'): with tf.variable_scope('box_indices'): def get_box_indices(boxes): proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) bev_boxes_norm_batches = tf.expand_dims( self._bev_anchors_norm_pl, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(bev_boxes_norm_batches) # Do ROI Pooling on BEV bev_proposal_rois = tf.image.crop_and_resize( bev_proposal_input, self._bev_anchors_norm_pl, tf_box_indices, self._proposal_roi_crop_size) # Do ROI Pooling on image img_proposal_rois = tf.image.crop_and_resize( img_proposal_input, self._img_anchors_norm_pl, tf_box_indices, self._proposal_roi_crop_size) print("img_proposal_rois shape") # print(img_proposal_rois.shape) # for i in range(img_proposal_rois.shape[0]): # print(img_proposal_rois[i]) #################################################################################### # TODO PROJECT: insert code here to add mixture of experts # self._moe_model = MoeModel(img_proposal_input, bev_proposal_input) # self._moe_model._set_up_input_pls() # moe_prediction = self._moe_model.build() #################################################################################### with tf.variable_scope('proposal_roi_fusion'): rpn_fusion_out = None #################################################################################### # TODO PROJECT: weight the feature before average img and bev # weighted_img_proposal_rois = tf.multiply(moe_prediction['img_weight'],img_proposal_rois) # weighted_bev_proposal_rois = tf.multiply(moe_prediction['bev_weight'],bev_proposal_rois) #################################################################################### if self._fusion_method == 'mean': tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois) rpn_fusion_out = tf.divide(tf_features_sum, fusion_mean_div_factor) #################################################################################### # TODO PROJECT: weight the feature before average img and bev # tf_features_sum = tf.add(weighted_bev_proposal_rois, weighted_img_proposal_rois) # rpn_fusion_out = tf.divide(tf_features_sum, fusion_mean_div_factor) #################################################################################### elif self._fusion_method == 'concat': rpn_fusion_out = tf.concat( [bev_proposal_rois, img_proposal_rois], axis=3) #################################################################################### # TODO PROJECT: weight the feature before concatenation # rpn_fusion_out = tf.concat( # [weighted_bev_proposal_rois, weighted_img_proposal_rois], axis=3) #################################################################################### else: raise ValueError('Invalid fusion method', self._fusion_method) # TODO: move this section into an separate AnchorPredictor class with tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]): tensor_in = rpn_fusion_out # Parse rpn layers config layers_config = self._config.layers_config.rpn_config l2_weight_decay = layers_config.l2_weight_decay if l2_weight_decay > 0: weights_regularizer = slim.l2_regularizer(l2_weight_decay) else: weights_regularizer = None with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): # Use conv2d instead of fully_connected layers. cls_fc6 = slim.conv2d(tensor_in, layers_config.cls_fc6, self._proposal_roi_crop_size, padding='VALID', scope='cls_fc6') cls_fc6_drop = slim.dropout(cls_fc6, layers_config.keep_prob, is_training=self._is_training, scope='cls_fc6_drop') cls_fc7 = slim.conv2d(cls_fc6_drop, layers_config.cls_fc7, [1, 1], scope='cls_fc7') cls_fc7_drop = slim.dropout(cls_fc7, layers_config.keep_prob, is_training=self._is_training, scope='cls_fc7_drop') cls_fc8 = slim.conv2d(cls_fc7_drop, 2, [1, 1], activation_fn=None, scope='cls_fc8') objectness = tf.squeeze(cls_fc8, [1, 2], name='cls_fc8/squeezed') # Use conv2d instead of fully_connected layers. reg_fc6 = slim.conv2d(tensor_in, layers_config.reg_fc6, self._proposal_roi_crop_size, padding='VALID', scope='reg_fc6') reg_fc6_drop = slim.dropout(reg_fc6, layers_config.keep_prob, is_training=self._is_training, scope='reg_fc6_drop') reg_fc7 = slim.conv2d(reg_fc6_drop, layers_config.reg_fc7, [1, 1], scope='reg_fc7') reg_fc7_drop = slim.dropout(reg_fc7, layers_config.keep_prob, is_training=self._is_training, scope='reg_fc7_drop') reg_fc8 = slim.conv2d(reg_fc7_drop, 6, [1, 1], activation_fn=None, scope='reg_fc8') offsets = tf.squeeze(reg_fc8, [1, 2], name='reg_fc8/squeezed') # Histogram summaries with tf.variable_scope('histograms_feature_extractor'): with tf.variable_scope('bev_vgg'): for end_point in self.bev_end_points: tf.summary.histogram(end_point, self.bev_end_points[end_point]) with tf.variable_scope('img_vgg'): for end_point in self.img_end_points: tf.summary.histogram(end_point, self.img_end_points[end_point]) with tf.variable_scope('histograms_rpn'): with tf.variable_scope('anchor_predictor'): fc_layers = [ cls_fc6, cls_fc7, cls_fc8, objectness, reg_fc6, reg_fc7, reg_fc8, offsets ] for fc_layer in fc_layers: # fix the name to avoid tf warnings tf.summary.histogram(fc_layer.name.replace(':', '_'), fc_layer) # Return the proposals with tf.variable_scope('proposals'): anchors = self.placeholders[self.PL_ANCHORS] # Decode anchor regression offsets with tf.variable_scope('decoding'): regressed_anchors = anchor_encoder.offset_to_anchor( anchors, offsets) with tf.variable_scope('bev_projection'): _, bev_proposal_boxes_norm = anchor_projector.project_to_bev( regressed_anchors, self._bev_extents) with tf.variable_scope('softmax'): objectness_softmax = tf.nn.softmax(objectness) with tf.variable_scope('nms'): objectness_scores = objectness_softmax[:, 1] # Do NMS on regressed anchors top_indices = tf.image.non_max_suppression( bev_proposal_boxes_norm, objectness_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_thresh) top_anchors = tf.gather(regressed_anchors, top_indices) top_objectness_softmax = tf.gather(objectness_scores, top_indices) # top_offsets = tf.gather(offsets, top_indices) # top_objectness = tf.gather(objectness, top_indices) # Get mini batch all_ious_gt = self.placeholders[self.PL_ANCHOR_IOUS] all_offsets_gt = self.placeholders[self.PL_ANCHOR_OFFSETS] all_classes_gt = self.placeholders[self.PL_ANCHOR_CLASSES] with tf.variable_scope('mini_batch'): mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils mini_batch_mask, _ = \ mini_batch_utils.sample_rpn_mini_batch(all_ious_gt) # ROI summary images rpn_mini_batch_size = \ self.dataset.kitti_utils.mini_batch_utils.rpn_mini_batch_size with tf.variable_scope('bev_rpn_rois'): mb_bev_anchors_norm = tf.boolean_mask(self._bev_anchors_norm_pl, mini_batch_mask) mb_bev_box_indices = tf.zeros_like(tf.boolean_mask( all_classes_gt, mini_batch_mask), dtype=tf.int32) # Show the ROIs of the BEV input density map # for the mini batch anchors bev_input_rois = tf.image.crop_and_resize(self._bev_preprocessed, mb_bev_anchors_norm, mb_bev_box_indices, (32, 32)) bev_input_roi_summary_images = tf.split(bev_input_rois, self._bev_depth, axis=3) tf.summary.image('bev_rpn_rois', bev_input_roi_summary_images[-1], max_outputs=rpn_mini_batch_size) with tf.variable_scope('img_rpn_rois'): # ROIs on image input mb_img_anchors_norm = tf.boolean_mask(self._img_anchors_norm_pl, mini_batch_mask) mb_img_box_indices = tf.zeros_like(tf.boolean_mask( all_classes_gt, mini_batch_mask), dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize(self._img_preprocessed, mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_rpn_rois', img_input_rois, max_outputs=rpn_mini_batch_size) # Ground Truth Tensors with tf.variable_scope('one_hot_classes'): # Anchor classification ground truth # Object / Not Object min_pos_iou = \ self.dataset.kitti_utils.mini_batch_utils.rpn_pos_iou_range[0] objectness_classes_gt = tf.cast(tf.greater_equal( all_ious_gt, min_pos_iou), dtype=tf.int32) objectness_gt = tf.one_hot( objectness_classes_gt, depth=2, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=self._config.label_smoothing_epsilon) # Mask predictions for mini batch with tf.variable_scope('prediction_mini_batch'): objectness_masked = tf.boolean_mask(objectness, mini_batch_mask) offsets_masked = tf.boolean_mask(offsets, mini_batch_mask) with tf.variable_scope('ground_truth_mini_batch'): objectness_gt_masked = tf.boolean_mask(objectness_gt, mini_batch_mask) offsets_gt_masked = tf.boolean_mask(all_offsets_gt, mini_batch_mask) # Specify the tensors to evaluate predictions = dict() # Temporary predictions for debugging # predictions['anchor_ious'] = anchor_ious # predictions['anchor_offsets'] = all_offsets_gt if self._train_val_test in ['train', 'val']: # All anchors predictions[self.PRED_ANCHORS] = anchors # Mini-batch masks predictions[self.PRED_MB_MASK] = mini_batch_mask # Mini-batch predictions predictions[self.PRED_MB_OBJECTNESS] = objectness_masked predictions[self.PRED_MB_OFFSETS] = offsets_masked # Mini batch ground truth predictions[self.PRED_MB_OFFSETS_GT] = offsets_gt_masked predictions[self.PRED_MB_OBJECTNESS_GT] = objectness_gt_masked # Proposals after nms predictions[self.PRED_TOP_INDICES] = top_indices predictions[self.PRED_TOP_ANCHORS] = top_anchors predictions[ self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax else: # self._train_val_test == 'test' predictions[self.PRED_TOP_ANCHORS] = top_anchors predictions[ self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax return predictions
def expand_tile(value, size): """Add a new axis of given size.""" value = tf.convert_to_tensor(value, name='value') ndims = value.shape.ndims return tf.tile(tf.expand_dims(value, axis=0), [size] + [1] * ndims)
def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5): """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015. Performs box voting as described in 'Object detection via a multi-region & semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes with iou overlap >= iou_thresh. The location of B is set to the weighted average location of boxes in S (scores are used for weighting). And the score of B is set to the average score of boxes in S. Args: selected_boxes: BoxList containing a subset of boxes in pool_boxes. These boxes are usually selected from pool_boxes using non max suppression. pool_boxes: BoxList containing a set of (possibly redundant) boxes. iou_thresh: (float scalar) iou threshold for matching boxes in selected_boxes and pool_boxes. Returns: BoxList containing averaged locations and scores for each box in selected_boxes. Raises: ValueError: if a) selected_boxes or pool_boxes is not a BoxList. b) if iou_thresh is not in [0, 1]. c) pool_boxes does not have a scores field. """ if not 0.0 <= iou_thresh <= 1.0: raise ValueError('iou_thresh must be between 0 and 1') if not isinstance(selected_boxes, box_list.BoxList): raise ValueError('selected_boxes must be a BoxList') if not isinstance(pool_boxes, box_list.BoxList): raise ValueError('pool_boxes must be a BoxList') if not pool_boxes.has_field('scores'): raise ValueError('pool_boxes must have a \'scores\' field') iou_ = iou(selected_boxes, pool_boxes) match_indicator = tf.cast(tf.greater(iou_, iou_thresh), dtype=tf.float32) num_matches = tf.reduce_sum(match_indicator, 1) # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not # match to any boxes in pool_boxes. For such boxes without any matches, we # should return the original boxes without voting. match_assert = tf.Assert(tf.reduce_all(tf.greater(num_matches, 0)), [ 'Each box in selected_boxes must match with at least one box ' 'in pool_boxes.' ]) scores = tf.expand_dims(pool_boxes.get_field('scores'), 1) scores_assert = tf.Assert(tf.reduce_all(tf.greater_equal(scores, 0)), ['Scores must be non negative.']) with tf.control_dependencies([scores_assert, match_assert]): sum_scores = tf.matmul(match_indicator, scores) averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches box_locations = tf.matmul(match_indicator, pool_boxes.get() * scores) / sum_scores averaged_boxes = box_list.BoxList(box_locations) _copy_extra_fields(averaged_boxes, selected_boxes) averaged_boxes.add_field('scores', averaged_scores) return averaged_boxes
def create_image_from_point_values_unbatched( pixel_locations, pixel_values, image_height, image_width, default_value=0, use_sparse_tensor=False): """Creates an image (like depth) from a list of pixel locations and values. Args: pixel_locations: A tf.int32 tensor of shape [N, 2] with u, v pixel locations. pixel_values: A tensor of shape [N, m] or [N,] with per pixel values. image_height: An int for the image height. image_width: An int for the image width. default_value: default fill value of the output image tensor for pixels other than pixel_locations. use_sparse_tensor: Whether to use the sparse tensor version of scatter_nd. Returns: image: An image where every pixel in pixel_location has a value according to pixel_values. Raises: ValueError: if pixel_locations or pixel_values ranks are incompatible. ValueError: if you try to have a non-zero default value without using use_sparse_tensor """ if len(pixel_locations.shape) != 2: raise ValueError('pixel_locations should be rank 2.') if len(pixel_values.shape) not in [1, 2]: raise ValueError('pixel_values should have rank of 1 or 2') if len(pixel_values.shape) == 1: pixel_values = tf.expand_dims(pixel_values, axis=1) valid_locations_y = tf.logical_and( tf.greater_equal(pixel_locations[:, 0], 0), tf.less(pixel_locations[:, 0], image_height)) valid_locations_x = tf.logical_and( tf.greater_equal(pixel_locations[:, 1], 0), tf.less(pixel_locations[:, 1], image_width)) valid_locations = tf.logical_and(valid_locations_y, valid_locations_x) pixel_locations = tf.boolean_mask(pixel_locations, valid_locations) pixel_values = tf.boolean_mask(pixel_values, valid_locations) n = tf.shape(pixel_locations)[0] value_dim = pixel_values.get_shape().as_list()[1] # In: [N, 2] w/ i, j pixel_locations = tf.tile( tf.expand_dims(pixel_locations, axis=1), [1, value_dim, 1]) # Out: [N, value_dim, 2] pixel_locations_addition = tf.tile( tf.reshape(tf.range(value_dim, dtype=tf.int32), [1, value_dim, 1]), [n, 1, 1]) # Out: [N, value_dim, 1] pixel_locations = tf.concat([pixel_locations, pixel_locations_addition], axis=2) # Out: [N, value_dim, 3] (y, x, c) pixel_locations_2d = tf.reshape(pixel_locations, [n * value_dim, 3]) if use_sparse_tensor: image = tf.SparseTensor( indices=tf.cast(pixel_locations_2d, dtype=tf.int64), values=tf.reshape(pixel_values, [n * value_dim]), dense_shape=(image_height, image_width, value_dim)) return tf.sparse.to_dense( sp_input=image, default_value=default_value, validate_indices=False) else: image = tf.scatter_nd( indices=tf.cast(pixel_locations_2d, dtype=tf.int64), updates=tf.reshape(pixel_values - default_value, [n * value_dim]), shape=(image_height, image_width, value_dim)) image += default_value return image
def call(self, x, mask=None): # TODO: validate input shape assert (len(x) == 3) L_flat = x[0] mu = x[1] a = x[2] if self.mode == 'full': # Create L and L^T matrix, which we use to construct the positive-definite matrix P. L = None LT = None if K.backend() == 'theano': import theano.tensor as T import theano def fn(x, L_acc, LT_acc): x_ = K.zeros((self.nb_actions, self.nb_actions)) x_ = T.set_subtensor(x_[np.tril_indices(self.nb_actions)], x) diag = K.exp(T.diag(x_)) + K.epsilon() x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], diag) return x_, x_.T outputs_info = [ K.zeros((self.nb_actions, self.nb_actions)), K.zeros((self.nb_actions, self.nb_actions)), ] results, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info) L, LT = results elif K.backend() == 'tensorflow': import tensorflow as tf # Number of elements in a triangular matrix. nb_elems = (self.nb_actions * self.nb_actions + self.nb_actions) // 2 # Create mask for the diagonal elements in L_flat. This is used to exponentiate # only the diagonal elements, which is done before gathering. diag_indeces = [0] for row in range(1, self.nb_actions): diag_indeces.append(diag_indeces[-1] + (row + 1)) diag_mask = np.zeros(1 + nb_elems) # +1 for the leading zero diag_mask[np.array(diag_indeces) + 1] = 1 diag_mask = K.variable(diag_mask) # Add leading zero element to each element in the L_flat. We use this zero # element when gathering L_flat into a lower triangular matrix L. nb_rows = tf.shape(L_flat)[0] zeros = tf.expand_dims(tf.tile(K.zeros((1, )), [nb_rows]), 1) try: # Old TF behavior. L_flat = tf.concat(1, [zeros, L_flat]) except (TypeError, ValueError): # New TF behavior L_flat = tf.concat([zeros, L_flat], 1) # Create mask that can be used to gather elements from L_flat and put them # into a lower triangular matrix. tril_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32') tril_mask[np.tril_indices(self.nb_actions)] = range( 1, nb_elems + 1) # Finally, process each element of the batch. init = [ K.zeros((self.nb_actions, self.nb_actions)), K.zeros((self.nb_actions, self.nb_actions)), ] def fn(a, x): # Exponentiate everything. This is much easier than only exponentiating # the diagonal elements, and, usually, the action space is relatively low. x_ = K.exp(x) + K.epsilon() # Only keep the diagonal elements. x_ *= diag_mask # Add the original, non-diagonal elements. x_ += x * (1. - diag_mask) # Finally, gather everything into a lower triangular matrix. L_ = tf.gather(x_, tril_mask) return [L_, tf.transpose(L_)] tmp = tf.scan(fn, L_flat, initializer=init) if isinstance(tmp, (list, tuple)): # TensorFlow 0.10 now returns a tuple of tensors. L, LT = tmp else: # Old TensorFlow < 0.10 returns a shared tensor. L = tmp[:, 0, :, :] LT = tmp[:, 1, :, :] else: raise RuntimeError('Unknown Keras backend "{}".'.format( K.backend())) assert L is not None assert LT is not None P = K.batch_dot(L, LT) elif self.mode == 'diag': if K.backend() == 'theano': import theano.tensor as T import theano def fn(x, P_acc): x_ = K.zeros((self.nb_actions, self.nb_actions)) x_ = T.set_subtensor(x_[np.diag_indices(self.nb_actions)], x) return x_ outputs_info = [ K.zeros((self.nb_actions, self.nb_actions)), ] P, _ = theano.scan(fn=fn, sequences=L_flat, outputs_info=outputs_info) elif K.backend() == 'tensorflow': import tensorflow as tf # Create mask that can be used to gather elements from L_flat and put them # into a diagonal matrix. diag_mask = np.zeros((self.nb_actions, self.nb_actions), dtype='int32') diag_mask[np.diag_indices(self.nb_actions)] = range( 1, self.nb_actions + 1) # Add leading zero element to each element in the L_flat. We use this zero # element when gathering L_flat into a lower triangular matrix L. nb_rows = tf.shape(L_flat)[0] zeros = tf.expand_dims(tf.tile(K.zeros((1, )), [nb_rows]), 1) try: # Old TF behavior. L_flat = tf.concat(1, [zeros, L_flat]) except (TypeError, ValueError): # New TF behavior L_flat = tf.concat([zeros, L_flat], 1) # Finally, process each element of the batch. def fn(a, x): x_ = tf.gather(x, diag_mask) return x_ P = tf.scan(fn, L_flat, initializer=K.zeros( (self.nb_actions, self.nb_actions))) else: raise RuntimeError('Unknown Keras backend "{}".'.format( K.backend())) assert P is not None assert K.ndim(P) == 3 # Combine a, mu and P into a scalar (over the batches). What we compute here is # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All # operations happen over the batch size, which is dimension 0. prod = K.batch_dot(K.expand_dims(a - mu, 1), P) prod = K.batch_dot(prod, K.expand_dims(a - mu, -1)) A = -.5 * K.batch_flatten(prod) assert K.ndim(A) == 2 return A
def get_3dmfv_tf(points,n_gaussians=9, sigma = 0.0625,flatten=True, normalize=True,full_fv = True): """ Compute the fisher vector (on the gpu using tf) given the gmm model parameters (w,mu,sigma) and a set of points for classification network Input: points: B X N x 3 tensor of XYZ points w: B X n_gaussians tensor of gaussian weights mu: B X n_gaussians X 63 tensor of gaussian cetnters sigma: B X n_gaussians X 3 tensor of stddev of diagonal covariance Output: fv: B X 7*n_gaussians tensor of the fisher vector """ n_batches = points.shape[0].value n_points = points.shape[1].value # n_gaussians = mu.shape[0].value # D = mu.shape[1].value D = points.shape[-1].value if D==2: grid_size = int(np.sqrt(n_gaussians)) else: grid_size = int(np.ceil(np.power(n_gaussians, 1 / 3))) l = np.linspace(-1,1,grid_size,False)+(1/grid_size) if D==2: x,y = np.meshgrid(l,l) x = np.stack([x.flatten(),y.flatten()]).T elif D==3: x,y,z = np.meshgrid(l,l,l) x = np.stack([x.flatten(), y.flatten(),z.flatten()]).T w = tf.ones([n_gaussians])/(n_gaussians) mu = tf.constant(x,tf.float32) sigma = sigma*tf.ones([n_gaussians,D]) #Expand dimension for batch compatibility batch_sig = tf.tile(tf.expand_dims(sigma,0),[n_points, 1, 1]) #n_points X n_gaussians X D batch_sig = tf.tile(tf.expand_dims(batch_sig, 0), [n_batches, 1, 1,1]) #n_batches X n_points X n_gaussians X D batch_mu = tf.tile(tf.expand_dims(mu, 0),[n_points, 1, 1]) #n_points X n_gaussians X D batch_mu = tf.tile(tf.expand_dims(batch_mu, 0), [n_batches, 1, 1, 1]) #n_batches X n_points X n_gaussians X D batch_w = tf.tile(tf.expand_dims(tf.expand_dims(w, 0), 0), [n_batches, n_points, 1]) #n_batches X n_points X n_guassians X D - should check what happens when weights change batch_points = tf.tile(tf.expand_dims(points, -2), [1, 1, n_gaussians,1]) #n_batchesXn_pointsXn_gaussians_D # Generating the number of points for each gaussian for separate computation #Compute derivatives if full_fv: w_per_batch_per_d = tf.tile(tf.expand_dims(tf.expand_dims(w, 0), -1), [n_batches, 1, D*3]) #n_batches X n_gaussians X 128*D (D for min and D for max) else: w_per_batch_per_d = tf.tile(tf.expand_dims(tf.expand_dims(w, 0), -1), [n_batches, 1, D]) #n_batches X n_gaussians X 128*D (D for min and D for max) #Define multivariate noraml distributions mvn = tf.contrib.distributions.MultivariateNormalDiag(loc=batch_mu, scale_diag=batch_sig) #Compute probability per point p_per_point = mvn.prob(batch_points) w_p = tf.multiply(p_per_point,batch_w) Q = w_p/tf.tile(tf.reduce_sum(w_p, axis=-1,keepdims=True),[1, 1, n_gaussians]) Q_per_d = tf.tile(tf.expand_dims(Q, -1), [1, 1, 1, D]) # Compute derivatives and take max and min d_pi_all = tf.expand_dims((Q - batch_w)/ (tf.sqrt(batch_w) * n_points), -1) # d_pi_sum = tf.reduce_sum(d_pi_all , axis=1) d_pi_max = tf.reduce_max(d_pi_all , axis=1) d_pi_mean = tf.reduce_mean(d_pi_all , axis=1) if full_fv: d_pi = tf.concat([d_pi_mean,d_pi_max],2) else: d_pi = d_pi_mean d_mu_all = Q_per_d * (batch_points - batch_mu) / batch_sig # d_mu_all_sum = tf.reduce_sum(d_mu_all , axis=1) d_mu_all_max = tf.reduce_max(d_mu_all , axis=1) d_mu_all_min = tf.reduce_min(d_mu_all , axis=1) d_mu_all_mean = tf.reduce_mean(d_mu_all , axis=1) if full_fv: d_mu_all_full = tf.concat([d_mu_all_mean, d_mu_all_max, d_mu_all_min], 2) else: d_mu_all_full = d_mu_all_mean d_mu = (1 / (tf.sqrt(w_per_batch_per_d))) * d_mu_all_full d_sig_all = Q_per_d * ( tf.pow((batch_points - batch_mu) / batch_sig,2) - 1) # d_sig_all_sum = tf.reduce_sum(d_sig_all , axis=1) d_sig_all_max = tf.reduce_max(d_sig_all , axis=1) d_sig_all_min = tf.reduce_min(d_sig_all , axis=1) d_sig_all_mean = tf.reduce_mean(d_sig_all , axis=1) if full_fv: d_sig_all_full = tf.concat([d_sig_all_mean,d_sig_all_max,d_sig_all_min],2) else: d_sig_all_full = d_sig_all_mean d_sigma = (1 / (tf.sqrt(2*w_per_batch_per_d))) * d_sig_all_full normalize=True if normalize: #Power normaliation alpha = 0.5 # d_pi = tf.sign(d_pi) * tf.pow(tf.abs(d_pi),alpha) # d_mu = tf.sign(d_mu) * tf.pow(tf.abs(d_mu), alpha) # d_sigma = tf.sign(d_sigma) * tf.pow(tf.abs(d_sigma), alpha) epsilon = 1e-12 d_pi = tf.sign(d_pi) * tf.pow(tf.maximum(tf.abs(d_pi),epsilon),alpha) d_mu = tf.sign(d_mu) * tf.pow(tf.maximum(tf.abs(d_mu),epsilon), alpha) d_sigma = tf.sign(d_sigma) * tf.pow(tf.maximum(tf.abs(d_sigma),epsilon), alpha) # L2 normaliation d_pi = tf.nn.l2_normalize(d_pi, dim=1) d_mu = tf.nn.l2_normalize(d_mu, dim=1) d_sigma = tf.nn.l2_normalize(d_sigma, dim=1) if flatten: #flatten d_mu and d_sigma d_pi = tf.contrib.layers.flatten(tf.transpose(d_pi, perm=[0, 2, 1])) d_mu = tf.contrib.layers.flatten(tf.transpose(d_mu,perm=[0,2,1])) d_sigma = tf.contrib.layers.flatten(tf.transpose(d_sigma,perm=[0,2,1])) fv = tf.concat([d_pi, d_mu, d_sigma], axis=1) else: fv = tf.concat([d_pi, d_mu, d_sigma], axis=2) fv = tf.transpose(fv, perm=[0, 2, 1]) fv = tf.transpose(fv ,[0,2,1]) # BX20XV->BXVX20 # print(fv) # fv = fv / 2 return fv #BX20XK
def project_points_with_depth_visibility_check(point_positions, camera_intrinsics, camera_rotation_matrix, camera_translation, image_width, image_height, depth_image, depth_intrinsics=None, depth_threshold=0.1): """Project 3D points to image with depthmap based visibility check. Args: point_positions: A tf.float32 tensor of shape [N, 3] containing N 3D point positions. camera_intrinsics: A tf.float32 tensor of shape [3, 3] contains intrinsic matrix. camera_rotation_matrix: A tf.float32 tensor of size [3, 3]. camera_translation: A tf.float32 tensor of size [3]. image_width: Width of image. image_height: Height of image. depth_image: Depth image as 2D tensor. depth_intrinsics: A tf.float32 tensor of size [3, 3]. If None, it is set to be same as camera_intrinsics. depth_threshold: Threshold for depth checking. Returns: points_in_image_frame: A tf.int32 tensor of size [N, 2] containing the x, y location of point projections in image. visibility: A tf.bool tensor of size [N] which denotes if a point is visible from the image. """ if depth_intrinsics is None: depth_intrinsics = camera_intrinsics image_height = tf.convert_to_tensor(image_height, dtype=tf.int32) image_width = tf.convert_to_tensor(image_width, dtype=tf.int32) depth_image_height = tf.shape(depth_image)[0] depth_image_width = tf.shape(depth_image)[1] # Points in camera frame points_in_camera_frame = tf.linalg.einsum('ij,nj->ni', camera_rotation_matrix, point_positions) + tf.expand_dims( camera_translation, axis=0) # Points in image frame. points_in_image_frame = tf.linalg.einsum('ij,nj->ni', camera_intrinsics, points_in_camera_frame) points_in_image_frame = tf.cast( points_in_image_frame[:, :2] / points_in_image_frame[:, 2:3], dtype=tf.int32) # Points in depth frame. points_in_depth_frame = tf.linalg.einsum('ij,nj->ni', depth_intrinsics, points_in_camera_frame) points_in_depth_frame = tf.cast( points_in_depth_frame[:, :2] / points_in_depth_frame[:, 2:3], dtype=tf.int32) # Check if point is in front of camera. visibility = tf.greater(points_in_camera_frame[:, 2], 0.0) # Check if within color image. visibility &= tf.math.reduce_all( tf.greater_equal(points_in_image_frame, 0), axis=1) visibility &= tf.math.reduce_all( tf.less(points_in_image_frame, tf.expand_dims(tf.stack([image_width, image_height]), axis=0)), axis=1) # Check if within depth image. visibility &= tf.math.reduce_all( tf.greater_equal(points_in_depth_frame, 0), axis=1) visibility &= tf.math.reduce_all( tf.less( points_in_depth_frame, tf.expand_dims( tf.stack([depth_image_width, depth_image_height]), axis=0)), axis=1) # Check if the depth of points is within some threshold of depth_image. points_in_depth_frame = tf.boolean_mask(points_in_depth_frame, visibility) points_in_depth_frame_y = points_in_depth_frame[:, 1] points_in_depth_frame_x = points_in_depth_frame[:, 0] indices = ( points_in_depth_frame_y * depth_image_width + points_in_depth_frame_x) visible_points_in_camera_frame = tf.boolean_mask(points_in_camera_frame, visibility) depth_of_visible_points_in_camera_frame = visible_points_in_camera_frame[:, 2] depth_of_visible_points_in_depth_frame = tf.gather( tf.reshape(depth_image, [-1]), indices) valid_depths_visible = tf.less_equal( tf.abs(depth_of_visible_points_in_camera_frame - depth_of_visible_points_in_depth_frame), depth_threshold) visibility_indices = tf.cast(tf.where(visibility), dtype=tf.int32) valid_depths = tf.scatter_nd( indices=visibility_indices, updates=tf.cast(valid_depths_visible, dtype=tf.int32), shape=tf.shape(visibility)) visibility &= tf.cast(valid_depths, dtype=tf.bool) return points_in_image_frame, visibility
def pairwise_and(a, b): column = tf.expand_dims(a, 2) row = tf.expand_dims(b, 1) return tf.logical_and(column, row)
def predict(self, image): input = tf.expand_dims(image, axis=0) x = self.encoder(input) x = self.decoder(x) return tf.squeeze(x, axis=0)
def image_to_4d(image): image = tf.expand_dims(image, 0) return image
# 例2 # 首先,重置计算图,并重新初始化变量 from tensorflow.python.framework import ops ops.reset_default_graph() sess = tf.Session() # 生成数据,目标标签,占位符和偏差 x_vals = np.concatenate((np.random.normal(-1.,1.,50),np.random.normal(3.,1.,50))) # 数组拼接,参数axis=0按行拼接,为默认 y_vals = np.concatenate((np.repeat(0.,50),np.repeat(1.,50))) x_data = tf.placeholder(tf.float32,shape=[1]) y_target = tf.placeholder(tf.float32,shape=[1]) A = tf.Variable(tf.random_normal(mean=10,shape=[1])) # A是变量 # 增加转换操作 my_output = tf.add(x_data,A) # 增加维度 my_output_expanded = tf.expand_dims(my_output,0) y_target_expanded = tf.expand_dims(y_target,0) # 初始化变量A init = tf.initialize_all_variables() sess.run(init) # 声明损失函数 xentroy = tf.nn.sigmoid_cross_entropy_with_logits(logits=my_output_expanded,labels=y_target_expanded) # 增加一个优化器函数让tensorflow知道如何更新和偏差变量 my_opt= tf.train.GradientDescentOptimizer(0.05) train_step = my_opt.minimize(xentroy) # 通过随机选择的数据迭代,更新变量A for i in range(1400): rand_index = np.random.choice(100) rand_x = [x_vals[rand_index]] rand_y = [y_vals[rand_index]] sess.run(train_step,feed_dict={x_data:rand_x,y_target:rand_y})
def generalized_dice_loss(pred, true, p=1, q=1, eps=1E-6): """pred and true are tensors of shape (b, w_0, w_1, ..., c) where b ... batch size w_k ... width of input in k-th dimension c ... number of segments/classes Furthermore, boths tensors have exclusively values in [0, 1]. more than already good ones. The remaining parameters are as follows: p ... power of inverse weigthing (p=2 default, p=0 uniform) q ... power of inverse loss weighting (q=1 default, q=0 none) eps ... regularization term if empty classes occur""" assert (p >= 0) assert (q >= 0) assert (eps >= 0) assert (pred.get_shape()[1:] == true.get_shape()[1:]) m = "the values in your last layer must be strictly in [0, 1]" with tf.control_dependencies([]): shape_pred = pred.get_shape() shape_true = true.get_shape() prod_pred = reduce(lambda x, y: x * y, shape_pred[1:-1], tf.Dimension(1)) prod_true = reduce(lambda x, y: x * y, shape_true[1:-1], tf.Dimension(1)) # reshape to shape (b, W, c) where W is product of w_k pred = tf.reshape(pred, [-1, prod_pred, shape_pred[-1]]) true = tf.reshape(true, [-1, prod_true, shape_true[-1]]) # no class reweighting at all if p == 0: # unweighted intersection and union inter = tf.reduce_mean(pred * true, axis=[1, 2]) union = tf.reduce_mean(pred + true, axis=[1, 2]) else: # inverse L_p weighting for class cardinalities weights = tf.abs(tf.reduce_sum(true, axis=[1]))**p + eps weights = tf.expand_dims(tf.reduce_sum(weights, axis=[-1]), -1) \ / weights # weighted intersection and union inter = tf.reduce_mean(weights * tf.reduce_mean(pred * true, axis=[1]), axis=[-1]) union = tf.reduce_mean(weights * tf.reduce_mean(pred + true, axis=[1]), axis=[-1]) # the traditional dice formula loss = 1.0 - 2.0 * (inter + eps) / (union + eps) # no reweighting of the batch if q == 0: return tf.reduce_mean(loss) # inverse L_q weighting for loss scores weights = tf.abs(loss)**q + eps weights = tf.reduce_sum(weights) / weights return tf.reduce_mean(loss * weights) / tf.reduce_mean(weights)
def pairwise_sub(a, b): column = tf.expand_dims(a, 2) row = tf.expand_dims(b, 1) return tf.subtract(column, row)
def custom_v3(is_training, images, params, mode): """Compute outputs of the model (embeddings for triplet loss). Args: is_training: (bool) whether we are training or not images: (dict) contains the inputs of the graph (features) this can be `tf.placeholder` or outputs of `tf.data` params: (Params) hyperparameters Returns: output: (tf.Tensor) output of the model """ # Apply dropout to the input layer input_dropout = tf.layers.dropout(images, rate=params.input_dropout, training=is_training, name='input_dropout') # Define the number of filters for each convolution # For each block, we do: 3x3 conv -> batch norm -> relu -> 2x2 maxpool image_size_in = params.image_size num_filters = params.num_filters num_blocks = params.num_blocks bn_momentum = params.bn_momentum filters = [ 32, 64, 128 ] # each element in this list indicates the number of filters to use in a new conv block if params.image_size != 96: raise ValueError( "Image size should be equal to 96 if you want to use custom_v3.") out = input_dropout for i, f in enumerate(filters): with tf.variable_scope('block_{}'.format(i + 1)): out = tf.layers.conv2d(out, f, 3, padding='same') if params.use_batch_norm: out = tf.layers.batch_normalization(out, momentum=bn_momentum, training=is_training) out = tf.nn.relu(out) out = tf.layers.conv2d(out, f, 3, padding='same') if params.use_batch_norm: out = tf.layers.batch_normalization(out, momentum=bn_momentum, training=is_training) out = tf.nn.relu(out) out = tf.layers.max_pooling2d(out, 2, 2) image_size_out = int(image_size_in / (2**3)) # 3 reductions by 2*2 maxpool assert out.shape[1:] == [ image_size_out, image_size_out, filters[-1] ], "filters: {}\nout shape: {}\nimage_size_out: {}".format( filters[-1], out.shape, image_size_out) # 12 x 12 x 128 out = tf.layers.conv2d(out, 64, 1, padding='same') # 12 x 12 x 64 out = tf.layers.average_pooling2d(out, 12, strides=1) # 1 x 1 x 64 out = tf.reshape(out, [-1, 1 * 1 * 64]) with tf.variable_scope('fc'): out = tf.layers.dense(out, params.embedding_size) out = tf.divide( out, tf.expand_dims(tf.norm(out, ord='euclidean', axis=1) + 1e-16, 1)) out = params.alpha * out # 1 x 1 x 64 return out
def divergence3(x): dudx = x[:, :-1, :-1, 1:, 0] - x[:, :-1, :-1, :-1, 0] dvdy = x[:, :-1, 1:, :-1, 1] - x[:, :-1, :-1, :-1, 1] dwdz = x[:, 1:, :-1, :-1, 2] - x[:, :-1, :-1, :-1, 2] return tf.expand_dims(dudx + dvdy + dwdz, axis=-1)
help='The directory of tensorflow checkpoint.') if __name__ == "__main__": os.environ['CUDA_VISIBLE_DEVICES'] = '4' args = parser.parse_args() config_path = os.path.join('config.yml') config = Config(config_path) model = GDNInpainting(config) image = imread(args.image) mask = imread(args.mask) mask = (mask > 173).astype(np.uint8) * 255 assert image.shape == mask.shape image = tf.constant(image, dtype=tf.float32) image = tf.expand_dims(image, axis=0) mask = tf.constant(mask, dtype=tf.float32) mask = tf.expand_dims(mask, axis=0) mask = tf.expand_dims(mask, axis=-1) image /= 255 mask /= 255 images_masked = (image * (1 - mask)) + mask # input of the model inputs = tf.concat([images_masked, mask], axis=3) # process outputs output = model.inpaint_generator(inputs, 8, 64, 2) outputs_merged = (output * mask) + (image * (1 - mask))
def miniception_v6(is_training, images, params, mode): """Compute outputs of the model (embeddings for triplet loss). Adding L2-norm layer to miniception_v2 (maybe add a learnable scaling parameter alpha, see paper L2-constraint softmax) Args: is_training: (bool) whether we are training or not images: (dict) contains the inputs of the graph (features) this can be `tf.placeholder` or outputs of `tf.data` params: (Params) hyperparameters Returns: output: (tf.Tensor) output of the model """ # Apply dropout to the input layer input_dropout = tf.layers.dropout(images, rate=params.input_dropout, training=is_training, name='input_dropout') out = input_dropout # 448 x 448 x num_channels if params.image_size != 448: raise ValueError( "Image size should be equal to 448 if you want to use miniception_v5." ) out = tf.layers.conv2d(out, 16, 7, strides=2, padding='same', activation=tf.nn.relu) assert out.shape[1:] == [224, 224, 16], "output has shape {}".format(out.shape) # 224 x 224 x 16 out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same') assert out.shape[1:] == [112, 112, 16], "output has shape {}".format(out.shape) # 112 x 112 x 16 out = tf.layers.conv2d(out, 32, 3, strides=1, padding='same', activation=tf.nn.relu) assert out.shape[1:] == [112, 112, 32], "output has shape {}".format(out.shape) # 112 x 112 x 32 out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same') assert out.shape[1:] == [56, 56, 32], "output has shape {}".format(out.shape) # 56 x 56 x 16 out = tf.layers.conv2d(out, 64, 3, strides=1, padding='same', activation=tf.nn.relu) assert out.shape[1:] == [56, 56, 64], "output has shape {}".format(out.shape) # 56 x 56 x 64 out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same') assert out.shape[1:] == [28, 28, 64], "output has shape {}".format(out.shape) # 28 x 28 x 64 out = tf.nn.local_response_normalization(out) out = tf.layers.conv2d(out, 96, 3, padding='same', activation=tf.nn.relu) assert out.shape[1:] == [28, 28, 96], "output has shape {}".format(out.shape) # 28 x 28 x 96 out = tf.nn.local_response_normalization(out) out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same') assert out.shape[1:] == [14, 14, 96], "output has shape {}".format(out.shape) # 14 x 14 x 96 # Miniception module 1 # ------------------ with tf.variable_scope('miniception_block1'): with tf.variable_scope('branch1x1'): branch1x1 = tf.layers.conv2d(out, 32, 1, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch5x5'): branch5x5 = tf.layers.conv2d(out, 8, 1, activation=tf.nn.relu) branch5x5 = tf.layers.conv2d(branch5x5, 16, 5, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch3x3'): branch3x3 = tf.layers.conv2d(out, 48, 1, activation=tf.nn.relu) branch3x3 = tf.layers.conv2d(branch3x3, 64, 3, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch_pool'): branch_pool = tf.layers.average_pooling2d(out, 3, strides=1, padding='same') branch_pool = tf.layers.conv2d(branch_pool, 16, 1, padding='same', activation=tf.nn.relu) out = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3, branch_pool]) # 14 x 14 x 128 # Transitional max pooling layer # ------------------------------ out = tf.layers.max_pooling2d(out, 3, strides=2, padding='same') assert out.shape[1:] == [7, 7, 128], "output has shape {}".format(out.shape) # 7 x 7 x 128 # Miniception module 2 # ------------------ with tf.variable_scope('miniception_block2'): with tf.variable_scope('branch1x1'): branch1x1 = tf.layers.conv2d(out, 64, 1, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch5x5'): branch5x5 = tf.layers.conv2d(out, 16, 1, activation=tf.nn.relu) branch5x5 = tf.layers.conv2d(branch5x5, 48, 5, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch3x3'): branch3x3 = tf.layers.conv2d(out, 64, 1, activation=tf.nn.relu) branch3x3 = tf.layers.conv2d(branch3x3, 96, 3, padding='same', activation=tf.nn.relu) with tf.variable_scope('branch_pool'): branch_pool = tf.layers.average_pooling2d(out, 3, strides=1, padding='same') branch_pool = tf.layers.conv2d(branch_pool, 32, 1, padding='same', activation=tf.nn.relu) out = tf.concat(axis=3, values=[branch1x1, branch5x5, branch3x3, branch_pool]) # 7 x 7 x 240 assert out.shape[1:] == [7, 7, 240], "out shape: {}".format(out.shape) # Average pooling reduction # ------------------------- out = tf.layers.average_pooling2d(out, 7, strides=1) # 1 x 1 x 240 # Flatten layer with dropout # -------------------------- out = tf.reshape(out, [-1, 1 * 1 * 240]) out = tf.layers.dropout(out, rate=params.output_dropout, training=is_training, name='output_dropout') # Final dense layer (embeddings) followed by L2 normalization # ----------------------------------------------------------- with tf.variable_scope('fc'): out = tf.layers.dense(out, params.embedding_size) out = tf.divide( out, tf.expand_dims(tf.norm(out, ord='euclidean', axis=1) + 1e-16, 1)) out = params.alpha * out return out
def attention(inputs, attention_size, time_major=False, return_alphas=False): """ Attention mechanism layer which reduces RNN/Bi-RNN outputs with Attention vector. The idea was proposed in the article by Z. Yang et al., "Hierarchical Attention Networks for Document Classification", 2016: http://www.aclweb.org/anthology/N16-1174. Variables notation is also inherited from the article Args: inputs: The Attention inputs. Matches outputs of RNN/Bi-RNN layer (not final state): In case of RNN, this must be RNN outputs `Tensor`: If time_major == False (default), this must be a tensor of shape: `[batch_size, max_time, cell.output_size]`. If time_major == True, this must be a tensor of shape: `[max_time, batch_size, cell.output_size]`. In case of Bidirectional RNN, this must be a tuple (outputs_fw, outputs_bw) containing the forward and the backward RNN outputs `Tensor`. If time_major == False (default), outputs_fw is a `Tensor` shaped: `[batch_size, max_time, cell_fw.output_size]` and outputs_bw is a `Tensor` shaped: `[batch_size, max_time, cell_bw.output_size]`. If time_major == True, outputs_fw is a `Tensor` shaped: `[max_time, batch_size, cell_fw.output_size]` and outputs_bw is a `Tensor` shaped: `[max_time, batch_size, cell_bw.output_size]`. attention_size: Linear size of the Attention weights. time_major: The shape format of the `inputs` Tensors. If true, these `Tensors` must be shaped `[max_time, batch_size, depth]`. If false, these `Tensors` must be shaped `[batch_size, max_time, depth]`. Using `time_major = True` is a bit more efficient because it avoids transposes at the beginning and end of the RNN calculation. However, most TensorFlow data is batch-major, so by default this function accepts input and emits output in batch-major form. return_alphas: Whether to return attention coefficients variable along with layer's output. Used for visualization purpose. Returns: The Attention output `Tensor`. In case of RNN, this will be a `Tensor` shaped: `[batch_size, cell.output_size]`. In case of Bidirectional RNN, this will be a `Tensor` shaped: `[batch_size, cell_fw.output_size + cell_bw.output_size]`. """ if isinstance(inputs, tuple): # In case of Bi-RNN, concatenate the forward and the backward RNN outputs. inputs = tf.concat(inputs, 2) if time_major: # (T,B,D) => (B,T,D) inputs = tf.array_ops.transpose(inputs, [1, 0, 2]) hidden_size = inputs.shape[ 2].value # D value - hidden size of the RNN layer # Trainable parameters w_omega = tf.Variable( tf.random_normal([hidden_size, attention_size], stddev=0.1)) b_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) u_omega = tf.Variable(tf.random_normal([attention_size], stddev=0.1)) with tf.name_scope('v'): # Applying fully connected layer with non-linear activation to each of the B*T timestamps; # the shape of `v` is (B,T,D)*(D,A)=(B,T,A), where A=attention_size v = tf.tanh(tf.tensordot(inputs, w_omega, axes=1) + b_omega) # For each of the timestamps its vector of size A from `v` is reduced with `u` vector vu = tf.tensordot(v, u_omega, axes=1, name='vu') # (B,T) shape alphas = tf.nn.softmax(vu, name='alphas') # (B,T) shape # Output of (Bi-)RNN is reduced with attention vector; the result has (B,D) shape output = tf.reduce_sum(inputs * tf.expand_dims(alphas, -1), 1) if not return_alphas: return output else: return output, alphas
def perform_greedy(self, features, predicted, states, swap_memory = False): encoded = self.encoder_inference(features) prediction = tf.TensorArray( dtype = tf.int32, size = (tf.shape(encoded)[0] + 1), dynamic_size = False, element_shape = tf.TensorShape([]), clear_after_read = False, ) time = tf.constant(0, dtype = tf.int32) total = tf.shape(encoded)[0] hypothesis = Hypothesis( index = tf.constant(0, dtype = tf.int32), prediction = prediction.write(0, predicted), states = states, ) def condition(time, total, encoded, hypothesis): return tf.less(time, total) def body(time, total, encoded, hypothesis): ytu, new_states = self.decoder_inference( encoded = tf.gather_nd( encoded, tf.expand_dims(time, axis = -1) ), predicted = hypothesis.prediction.read(hypothesis.index), states = hypothesis.states, ) char = tf.argmax(ytu, axis = -1, output_type = tf.int32) index, char, new_states = tf.cond( tf.equal(char, BLANK), true_fn = lambda: ( hypothesis.index + 1, BLANK, hypothesis.states, ), false_fn = lambda: (hypothesis.index + 1, char, new_states), ) hypothesis = Hypothesis( index = index, prediction = hypothesis.prediction.write(index, char), states = new_states, ) return time + 1, total, encoded, hypothesis time, total, encoded, hypothesis = tf.while_loop( condition, body, loop_vars = (time, total, encoded, hypothesis), swap_memory = swap_memory, ) hypothesis = Hypothesis( index = hypothesis.index, prediction = tf.gather_nd( params = hypothesis.prediction.stack(), indices = tf.expand_dims( tf.range(hypothesis.index + 1), axis = -1 ), ), states = hypothesis.states, ) return hypothesis
def main(): # parse arguments args = parse_args() # window details width = args.window_size[0] height = args.window_size[1] display = (width, height) # window setup pygame.init() pygame.display.set_caption('Spout Neural Style Receiver') pygame.display.set_mode(display, DOUBLEBUF | OPENGL) # OpenGL init glMatrixMode(GL_PROJECTION) glLoadIdentity() glOrtho(0, width, height, 0, 1, -1) glMatrixMode(GL_MODELVIEW) glDisable(GL_DEPTH_TEST) glClearColor(0.0, 0.0, 0.0, 0.0) glEnable(GL_TEXTURE_2D) # init spout receiver receiverName = args.spout_name spoutReceiverWidth = args.spout_size[0] spoutReceiverHeight = args.spout_size[1] # create spout receiver spoutReceiver = SpoutSDK.SpoutReceiver() # Its signature in c++ looks like this: bool pyCreateReceiver(const char* theName, unsigned int theWidth, unsigned int theHeight, bool bUseActive); spoutReceiver.pyCreateReceiver(receiverName, spoutReceiverWidth, spoutReceiverHeight, False) # create textures for spout receiver and spout sender textureReceiveID = glGenTextures(1) textureStyleID = glGenTextures(1) # initalise receiver texture glBindTexture(GL_TEXTURE_2D, textureReceiveID) glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE) glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST) # copy data into texture glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, spoutReceiverWidth, spoutReceiverHeight, 0, GL_RGBA, GL_UNSIGNED_BYTE, None) glBindTexture(GL_TEXTURE_2D, 0) # initalise sender texture glBindTexture(GL_TEXTURE_2D, textureStyleID) glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE) glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST) glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST) glBindTexture(GL_TEXTURE_2D, 0) # open tf session soft_config = tf.ConfigProto(allow_soft_placement=True) soft_config.gpu_options.allow_growth = True # to deal with large image sess = tf.Session(config=soft_config) # build tf graph style = tf.placeholder(tf.float32, shape=[spoutReceiverHeight, spoutReceiverWidth, 3], name='input') styleI = tf.expand_dims(style, 0) # add one dim for batch # result image from transform-net scaler = transform.Transform() y_hat = scaler.net(styleI / 255.0) y_hat = tf.squeeze(y_hat) # remove one dim for batch y_hat = tf.clip_by_value(y_hat, 0., 255.) # initialize parameters sess.run(tf.global_variables_initializer()) # load pre-trained model saver = tf.train.Saver() saver.restore(sess, args.style_model) # loop for graph frame by frame while (True): for event in pygame.event.get(): if event.type == pygame.QUIT: spoutReceiver.ReleaseReceiver() pygame.quit() quit() # receive texture # Its signature in c++ looks like this: bool pyReceiveTexture(const char* theName, unsigned int theWidth, unsigned int theHeight, GLuint TextureID, GLuint TextureTarget, bool bInvert, GLuint HostFBO); spoutReceiver.pyReceiveTexture(receiverName, spoutReceiverWidth, spoutReceiverHeight, textureReceiveID, GL_TEXTURE_2D, False, 0) glBindTexture(GL_TEXTURE_2D, textureReceiveID) # copy pixel byte array from received texture data = glGetTexImage(GL_TEXTURE_2D, 0, GL_RGB, GL_UNSIGNED_BYTE, outputType=None) #Using GL_RGB can use GL_RGBA glBindTexture(GL_TEXTURE_2D, 0) # swap width and height data around due to oddness with glGetTextImage. http://permalink.gmane.org/gmane.comp.python.opengl.user/2423 data.shape = (data.shape[1], data.shape[0], data.shape[2]) # start time of the loop for FPS counter start_time = time.time() #run the graph output = sess.run(y_hat, feed_dict={style: data}) # fiddle back to an image we can display. I *think* this is correct output = np.clip(output, 0.0, 255.0) output = output.astype(np.uint8) # setup the texture so we can load the stylised output into it glBindTexture(GL_TEXTURE_2D, textureStyleID) # copy style output into texture glTexImage2D(GL_TEXTURE_2D, 0, GL_RGB, spoutReceiverWidth, spoutReceiverHeight, 0, GL_RGB, GL_UNSIGNED_BYTE, output) # setup window to draw to screen glActiveTexture(GL_TEXTURE0) # clean start glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT) # reset drawing perspective glLoadIdentity() # draw texture on screen glBegin(GL_QUADS) glTexCoord(0, 0) glVertex2f(0, 0) glTexCoord(1, 0) glVertex2f(spoutReceiverWidth, 0) glTexCoord(1, 1) glVertex2f(spoutReceiverWidth, spoutReceiverHeight) glTexCoord(0, 1) glVertex2f(0, spoutReceiverHeight) glEnd() # update window pygame.display.flip() # FPS = 1 / time to process loop print("FPS: ", 1.0 / (time.time() - start_time))
if __name__=='__main__': import numpy as np np.random.seed(100) triangles=np.random.rand(1,5,3,3).astype('float32') with tf.device('/gpu:0'): inp=tf.constant(triangles) tria=inp[:,:,0,:] # 1 x 5 x 3 trib=inp[:,:,1,:] # 1 x 5 x 3 tric=inp[:,:,2,:] # 1 x 5 x 3 areas=tf.sqrt(tf.reduce_sum(tf.cross(trib-tria,tric-tria)**2,2)+1e-9) # 1 x 5 randomnumbers=tf.random_uniform((1,8192)) # 1 x 8192 triids=prob_sample(areas,randomnumbers) # 1 x 8192 tria_sample=gather_point(tria,triids) # 1 x 8192 x 3 trib_sample=gather_point(trib,triids) # 1 x 8192 x 3 tric_sample=gather_point(tric,triids) # 1 x 8192 x 3 us=tf.random_uniform((1,8192)) vs=tf.random_uniform((1,8192)) uplusv=1-tf.abs(us+vs-1) uminusv=us-vs us=(uplusv+uminusv)*0.5 vs=(uplusv-uminusv)*0.5 pt_sample=tria_sample+(trib_sample-tria_sample)*tf.expand_dims(us,-1)+(tric_sample-tria_sample)*tf.expand_dims(vs,-1) print('pt_sample: ', pt_sample) reduced_sample=gather_point(pt_sample,farthest_point_sample(1024,pt_sample)) print(reduced_sample) with tf.Session('') as sess: ret=sess.run(reduced_sample) print(ret.shape,ret.dtype) #import cPickle as pickle #pickle.dump(ret,open('1.pkl','wb'),-1)