def conv3d_oneToMany(x, xShape, w, wShape, strideT, strideY, strideX, inName): [ntp, nyp, nxp, nifp, nofp] = wShape [nb, nt, ny, nx, nf] = xShape # stride must be divisible by both weights and input assert ntp % strideT == 0 assert nyp % strideY == 0 assert nxp % strideX == 0 assert nt % strideT == 0 assert ny % strideY == 0 assert nx % strideX == 0 assert nifp == nf print "Building weight indices for conv3d" # Build gather indices for weights # Must be in shape of target output weights weightIdxs = np.zeros( (int(ntp / strideT), int(nyp / strideY), int(nxp / strideX), nifp, nofp * strideT * strideX * strideY, 5) ).astype(np.int32) # Adding kernel number to end of features for itp in range(ntp): for iyp in range(nyp): for ixp in range(nxp): for iifp in range(nifp): for iofp in range(nofp): # Calculate output indices given input indices # Must reverse, as we're using conv2d as transpose conv2d otp = int((ntp - itp - 1) / strideT) oyp = int((nyp - iyp - 1) / strideY) oxp = int((nxp - ixp - 1) / strideX) oifp = iifp # Input features stay the same # oofp uses iofp as offset, plus an nf stride based on which kernel it belongs to kernelIdx = (itp % strideT) * strideY * strideX + (iyp % strideY) * strideX + (ixp % strideX) oofp = iofp + nofp * kernelIdx weightIdxs[otp, oyp, oxp, oifp, oofp, :] = [itp, iyp, ixp, iifp, iofp] print "Building output indices for conv3d" # Build gather indices for output # Must be in shape of target output data dataIdxs = np.zeros((nb, nt * strideT, ny * strideY, nx * strideX, nofp, 5)).astype(np.int32) for oob in range(nb): for oot in range(nt * strideT): for ooy in range(ny * strideY): for oox in range(nx * strideX): for oof in range(nofp): # Calculate input indices given output indices iib = oob iit = oot / strideT iiy = ooy / strideY iix = oox / strideX kernelIdx = (oot % strideT) * strideY * strideX + (ooy % strideY) * strideX + (oox % strideX) iif = oof + nofp * kernelIdx dataIdxs[oob, oot, ooy, oox, oof, :] = [iib, iit, iiy, iix, iif] # Build convolution structure w_reshape = tf.gather_nd(w, weightIdxs) o_reshape = tf.nn.conv3d(x, w_reshape, strides=[1, 1, 1, 1, 1], padding="SAME", name=inName) o = tf.gather_nd(o_reshape, dataIdxs) return o
def testEmptyIndicesAndParamsOKButJustEmptyParamsFails(self): with self.test_session(use_gpu=self.use_gpu): params = np.ones((3, 3), dtype=np.float32) indices_empty = np.empty((0, 2), dtype=np.int32) gather_nd_ok_t = tf.gather_nd(params, indices_empty) gather_nd_ok_val = gather_nd_ok_t.eval() self.assertEqual([0], gather_nd_ok_t.get_shape()) self.assertAllEqual(np.empty((0,), dtype=np.float32), gather_nd_ok_val) indices_empty = np.empty((0, 1), dtype=np.int32) gather_nd_ok_t = tf.gather_nd(params, indices_empty) gather_nd_ok_val = gather_nd_ok_t.eval() self.assertEqual([0, 3], gather_nd_ok_t.get_shape()) self.assertAllEqual(np.empty((0, 3), dtype=np.float32), gather_nd_ok_val) params_empty = np.empty((0, 3), dtype=np.float32) indices_empty = np.empty((0, 2), dtype=np.int32) gather_nd_ok_t = tf.gather_nd(params_empty, indices_empty) gather_nd_ok_val = gather_nd_ok_t.eval() self.assertEqual([0], gather_nd_ok_t.get_shape()) self.assertAllEqual(np.empty((0,), dtype=np.float32), gather_nd_ok_val) params_empty = np.empty((0, 3), dtype=np.float32) indices_nonempty = np.zeros((1, 2), dtype=np.int32) gather_nd_break_t = tf.gather_nd(params_empty, indices_nonempty) with self.assertRaisesOpError( r"Requested more than 0 entries, but params is empty."): gather_nd_break_t.eval() self.assertAllEqual(np.empty((0,), dtype=np.float32), gather_nd_ok_val)
def parse_sequence_to_pairs_batch( serialized_example, preprocess_fn, is_training, num_views, batch_size, window): """Parses a serialized sequence example into a batch of preprocessed data. Args: serialized_example: A serialized SequenceExample. preprocess_fn: A function with the signature (raw_images, is_training) -> preprocessed_images. is_training: Boolean, whether or not we're in training. num_views: Int, the number of simultaneous viewpoints at each timestep in the dataset. batch_size: Int, size of the batch to get. window: Int, only take pairs from a maximium window of this size. Returns: preprocessed: A 4-D float32 `Tensor` holding preprocessed images. anchor_images: A 4-D float32 `Tensor` holding raw anchor images. pos_images: A 4-D float32 `Tensor` holding raw positive images. """ _, views, seq_len = parse_sequence_example(serialized_example, num_views) # Get random (anchor, positive) timestep and viewpoint indices. num_pairs = batch_size // 2 ap_time_indices, a_view_indices, p_view_indices = get_tcn_anchor_pos_indices( seq_len, num_views, num_pairs, window) # Gather the image strings. combined_anchor_indices = tf.concat( [tf.expand_dims(a_view_indices, 1), tf.expand_dims(ap_time_indices, 1)], 1) combined_pos_indices = tf.concat( [tf.expand_dims(p_view_indices, 1), tf.expand_dims(ap_time_indices, 1)], 1) anchor_images = tf.gather_nd(views, combined_anchor_indices) pos_images = tf.gather_nd(views, combined_pos_indices) # Decode images. anchor_images = tf.map_fn( preprocessing.decode_image, anchor_images, dtype=tf.float32) pos_images = tf.map_fn( preprocessing.decode_image, pos_images, dtype=tf.float32) # Concatenate [anchor, postitive] images into a batch and preprocess it. concatenated = tf.concat([anchor_images, pos_images], 0) preprocessed = preprocess_fn(concatenated, is_training) anchor_prepro, positive_prepro = tf.split(preprocessed, num_or_size_splits=2, axis=0) # Set static batch dimensions for all image tensors ims = [anchor_prepro, positive_prepro, anchor_images, pos_images] ims = [set_image_tensor_batch_dim(i, num_pairs) for i in ims] [anchor_prepro, positive_prepro, anchor_images, pos_images] = ims # Assign each anchor and positive the same label. anchor_labels = tf.range(1, num_pairs+1) positive_labels = tf.range(1, num_pairs+1) return (anchor_prepro, positive_prepro, anchor_images, pos_images, anchor_labels, positive_labels, seq_len)
def _get_coordinatewise_learning_rate(self, grad, var): # Compute the learning rate using a moving average for the diagonal of BB^T avg_first = self.get_slot(var, 'first_moment') avg_second = self.get_slot(var, 'second_moment') decay_tensor = tf.cast(self._decay_tensor, var.dtype) batch_size = tf.cast(self._batch_size_tensor, var.dtype) # Create an estimator for the moving average of gradient mean and variance # via Welford's algorithm if isinstance(grad, tf.Tensor): delta = grad - avg_first first_moment_update = avg_first.assign_add( delta * tf.where(self._counter < 1, tf.cast(1, var.dtype), 1. - decay_tensor)) with tf.control_dependencies([first_moment_update]): second_moment_update = avg_second.assign_add( tf.cast(self._counter < 1, var.dtype) * -(1. - decay_tensor) * ( avg_second - decay_tensor * tf.square(delta))) diag_preconditioner = control_flow_ops.with_dependencies( [second_moment_update], tf.clip_by_value(avg_second, 1e-12, 1e12)) elif isinstance(grad, tf.IndexedSlices): delta = grad.values - tf.gather_nd(avg_first, grad.indices) first_moment_update = tf.scatter_add( avg_first, grad.indices, delta * tf.where(self._counter < 1, tf.cast(1., var.dtype), 1. - decay_tensor)) with tf.control_dependencies([first_moment_update]): avg_second = tf.scatter_add( avg_second, grad.indices, tf.cast(self._counter < 1, var.dtype) * -(1. - decay_tensor) * ( tf.gather_nd(avg_second, grad.indices) - decay_tensor * tf.square(delta))) avg_second = tf.gather_nd(avg_second, grad.indices) # TODO(b/70783772): Needs dtype specific clipping. diag_preconditioner = tf.clip_by_value(avg_second, 1e-12, 1e12) else: raise tf.errors.InvalidArgumentError( None, None, 'grad must of type Tensor or IndexedSlice') diag_preconditioner *= batch_size if self._use_single_learning_rate: diag_preconditioner = tf.reduce_mean(diag_preconditioner) # From Theorem 2 Corollary 1 of Mandt et al. 2017 return 2. * batch_size / ( tf.cast(self._total_num_examples, var.dtype.base_dtype) * diag_preconditioner)
def get_valid_logits_and_labels(annotation_batch_tensor, logits_batch_tensor, class_labels): labels_batch_tensor = get_labels_from_annotation_batch(annotation_batch_tensor=annotation_batch_tensor, class_labels=class_labels) valid_batch_indices = get_valid_entries_indices_from_annotation_batch( annotation_batch_tensor=annotation_batch_tensor, class_labels=class_labels) valid_labels_batch_tensor = tf.gather_nd(params=labels_batch_tensor, indices=valid_batch_indices) valid_logits_batch_tensor = tf.gather_nd(params=logits_batch_tensor, indices=valid_batch_indices) return valid_labels_batch_tensor, valid_logits_batch_tensor
def fastrcnn_inference(self, image_shape2d, rcnn_boxes, rcnn_label_logits, rcnn_box_logits): """ Args: image_shape2d: h, w rcnn_boxes (nx4): the proposal boxes rcnn_label_logits (n): rcnn_box_logits (nx #class x 4): Returns: boxes (mx4): labels (m): each >= 1 """ rcnn_box_logits = rcnn_box_logits[:, 1:, :] rcnn_box_logits.set_shape([None, cfg.DATA.NUM_CATEGORY, None]) label_probs = tf.nn.softmax(rcnn_label_logits, name='fastrcnn_all_probs') # #proposal x #Class anchors = tf.tile(tf.expand_dims(rcnn_boxes, 1), [1, cfg.DATA.NUM_CATEGORY, 1]) # #proposal x #Cat x 4 decoded_boxes = decode_bbox_target( rcnn_box_logits / tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32), anchors) decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') # indices: Nx2. Each index into (#proposal, #category) pred_indices, final_probs = fastrcnn_predictions(decoded_boxes, label_probs) final_probs = tf.identity(final_probs, 'final_probs') final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes') final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels') return final_boxes, final_labels
def gather_flat(x: tf.Tensor, indices: tf.Tensor, batch_size: Union[int, tf.Tensor] = 1, beam_size: Union[int, tf.Tensor] = 1) -> tf.Tensor: """Gather values from the flattened (shape=[batch * beam, ...]) input. This function expects a flattened tensor with first dimension of size *batch x beam* elements. Using the given batch and beam size, it reshapes the input tensor to a tensor of shape ``(batch, beam, ...)`` and gather the values from it using the index tensor. Arguments: x: A flattened ``Tensor`` from which to gather values. indices: Index tensor. batch_size: The size of the batch. beam_size: The size of the beam. Returns: The ``Tensor`` of gathered values. """ if x.shape.ndims == 0: return x shape = [batch_size, beam_size] + get_shape_list(x)[1:] gathered = tf.gather_nd(tf.reshape(x, shape), indices) return tf.reshape(gathered, [-1] + shape[2:])
def build(self, observations): """Builds the model to embed object detection observations. Args: observations: a tuple of (dets, det_num). dets is a tensor of BxTxLxE that has the detection boxes in all the images of the batch. B is the batch size, T is the maximum length of episode, L is the maximum number of detections per image in the batch and E is the size of each detection embedding. det_num is a tensor of BxT that contains the number of detected boxes each image of each sequence in the batch. Returns: For each image in the batch, returns the accumulative embedding of all the detection boxes in that image. """ with tf.variable_scope(self._scope, default_name=''): shape = observations[0].shape dets = tf.reshape(observations[0], [-1, shape[-2], shape[-1]]) det_num = tf.reshape(observations[1], [-1]) lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self._rnn_state_size) batch_size = tf.shape(dets)[0] lstm_outputs, _ = tf.nn.dynamic_rnn( cell=lstm_cell, inputs=dets, sequence_length=det_num, initial_state=lstm_cell.zero_state(batch_size, dtype=tf.float32), dtype=tf.float32) # Gathering the last state of each sequence in the batch. batch_range = tf.range(batch_size) indices = tf.stack([batch_range, det_num - 1], axis=1) last_lstm_outputs = tf.gather_nd(lstm_outputs, indices) last_lstm_outputs = tf.reshape(last_lstm_outputs, [-1, shape[1], self._rnn_state_size]) return last_lstm_outputs
def _define_distance_to_clusters(self, data): """Defines the Mahalanobis distance to the assigned Gaussian.""" # TODO(xavigonzalvo): reuse (input - mean) * cov^-1 * (input - # mean) from log probability function. self._all_scores = [] for shard in data: all_scores = [] shard = tf.expand_dims(shard, 0) for c in xrange(self._num_classes): if self._covariance_type == FULL_COVARIANCE: cov = self._covs[c, :, :] elif self._covariance_type == DIAG_COVARIANCE: cov = tf.diag(self._covs[c, :]) inverse = tf.matrix_inverse(cov + self._min_var) inv_cov = tf.tile( tf.expand_dims(inverse, 0), tf.pack([self._num_examples, 1, 1])) diff = tf.transpose(shard - self._means[c, :, :], perm=[1, 0, 2]) m_left = tf.batch_matmul(diff, inv_cov) all_scores.append(tf.sqrt(tf.batch_matmul( m_left, tf.transpose(diff, perm=[0, 2, 1]) ))) self._all_scores.append(tf.reshape( tf.concat(1, all_scores), tf.pack([self._num_examples, self._num_classes]))) # Distance to the associated class. self._all_scores = tf.concat(0, self._all_scores) assignments = tf.concat(0, self.assignments()) rows = tf.to_int64(tf.range(0, self._num_examples)) indices = tf.concat(1, [tf.expand_dims(rows, 1), tf.expand_dims(assignments, 1)]) self._scores = tf.gather_nd(self._all_scores, indices)
def fastrcnn_inference(self, image_shape2d, rcnn_boxes, rcnn_label_logits, rcnn_box_logits): """ Args: image_shape2d: h, w rcnn_boxes (nx4): the proposal boxes rcnn_label_logits (n): rcnn_box_logits (nx4): Returns: boxes (mx4): labels (m): each >= 1 """ label_probs = tf.nn.softmax(rcnn_label_logits, name='fastrcnn_all_probs') # #proposal x #Class anchors = tf.tile(tf.expand_dims(rcnn_boxes, 1), [1, config.NUM_CLASS - 1, 1]) # #proposal x #Cat x 4 decoded_boxes = decode_bbox_target( rcnn_box_logits / tf.constant(config.FASTRCNN_BBOX_REG_WEIGHTS), anchors) decoded_boxes = clip_boxes(decoded_boxes, image_shape2d, name='fastrcnn_all_boxes') # indices: Nx2. Each index into (#proposal, #category) pred_indices, final_probs = fastrcnn_predictions(decoded_boxes, label_probs) final_probs = tf.identity(final_probs, 'final_probs') final_boxes = tf.gather_nd(decoded_boxes, pred_indices, name='final_boxes') final_labels = tf.add(pred_indices[:, 1], 1, name='final_labels') return final_boxes, final_labels
def transpose5dWeight(w, wShape, strideT, strideY, strideX): print "Building weight indices for conv3d" # These shapes are in terms of the already strided values [ntp, nyp, nxp, nifp, nofp] = wShape # Translate to target output shape ntp *= strideT nyp *= strideY nxp *= strideX nofp = nofp / (strideT * strideX * strideY) # Build gather indices for weights # Must be in shape of target output weights weightIdxs = np.zeros((ntp, nyp, nxp, nifp, nofp, 5)).astype(np.int32) # Adding kernel number to end of features for otp in range(ntp): for oyp in range(nyp): for oxp in range(nxp): for oifp in range(nifp): for oofp in range(nofp): # Calculate output indices given input indices # Must reverse, as we're using conv2d as transpose conv2d # otp = int((ntp-itp-1)/strideT) # oyp = int((nyp-iyp-1)/strideY) # oxp = int((nxp-ixp-1)/strideX) # oifp = iifp #Input features stay the same itp = int((ntp - otp - 1) / strideT) iyp = int((nyp - oyp - 1) / strideY) ixp = int((nxp - oxp - 1) / strideX) iifp = oifp # oofp uses iofp as offset, plus an nf stride based on which kernel it belongs to kernelIdx = (otp % strideT) * strideY * strideX + (oyp % strideY) * strideX + (oxp % strideX) iofp = oofp + nofp * kernelIdx weightIdxs[otp, oyp, oxp, oifp, oofp, :] = [itp, iyp, ixp, iifp, iofp] return tf.gather_nd(w, weightIdxs)
def arg_closest_anchor(bboxes, anchors): """Find the closest anchor. Box Format [ymin, xmin, ymax, xmax] """ num_anchors = anchors.get_shape().as_list()[0] num_bboxes = tf.shape(bboxes)[0] _indices = tf.reshape(tf.range(num_bboxes), shape=[-1, 1]) _indices = tf.reshape(tf.stack([_indices] * num_anchors, axis=1), shape=[-1, 1]) bboxes_m = tf.gather_nd(bboxes, _indices) # bboxes_m = tf.Print(bboxes_m, [bboxes_m], "bboxes_m", summarize=100) anchors_m = tf.tile(anchors, [num_bboxes, 1]) # anchors_m = tf.Print(anchors_m, [anchors_m], "anchors_m", summarize=100) square_dist = tf.squared_difference(bboxes_m[:, 0], anchors_m[:, 0]) + \ tf.squared_difference(bboxes_m[:, 1], anchors_m[:, 1]) + \ tf.squared_difference(bboxes_m[:, 2], anchors_m[:, 2]) + \ tf.squared_difference(bboxes_m[:, 3], anchors_m[:, 3]) square_dist = tf.reshape(square_dist, shape=[num_bboxes, num_anchors]) # square_dist = tf.Print(square_dist, [square_dist], "square_dist", summarize=100) indices = tf.arg_min(square_dist, dimension=1) return indices
def calculate_outputs(self, x): h = lstm_layer(x, self.history_length, self.lstm_size, scope='lstm-1') h_final = time_distributed_dense_layer(h, 50, activation=tf.nn.relu, scope='dense-1') y_hat = tf.squeeze(time_distributed_dense_layer(h_final, 1, scope='dense2'), 2) final_temporal_idx = tf.stack([tf.range(tf.shape(self.history_length)[0]), self.history_length - 1], axis=1) self.final_states = tf.gather_nd(h_final, final_temporal_idx) self.final_predictions = tf.gather_nd(y_hat, final_temporal_idx) self.prediction_tensors = { 'user_ids': self.user_id, 'final_states': self.final_states, 'predictions': self.final_predictions } return y_hat
def argmax_attentive_matching(a, b, a_lengths, b_lengths, max_seq_len, attention_func=dot_attention, attention_func_kwargs={}): """ Matches each vector in a with the weighted vector in b that has the largest inner product. The weightings are determined by the attention matrix. The attention matrix is computed using attention_func. Args: a: Input sequence a. Tensor of shape [batch_size, max_seq_len, input_size]. b: Input sequence b. Tensor of shape [batch_size, max_seq_len, input_size]. a_lengths: Lengths of sequences in a. Tensor of shape [batch_size]. b_lengths: Lengths of sequences in b. Tensor of shape [batch_size]. max_seq_len: Length of padded sequences a and b. Integer. attention_func: Function used to calculate attention matrix. Can be one of the following: multiplicative_attention, additive_attention, concat_attention, dot_attention, or cosine_attention. attention_func_kwargs: Keyword arguments to pass to attention_func. Returns: Tensor of shape [batch_size, max_seq_len, input_size] consisting of the matching vectors for each timestep in a. """ attn = attention_func(a, b, a_lengths, b_lengths, max_seq_len, **attention_func_kwargs) b_match_idx = tf.argmax(attn, axis=2) batch_index = tf.tile(tf.expand_dims(tf.range(shape(b, 0), dtype=tf.int64), 1), (1, max_seq_len)) b_idx = tf.stack([batch_index, b_match_idx], axis=2) return tf.gather_nd(b, b_idx)
def train_speech_to_text_network(): logit = speech_to_text_network() # CTC loss indices = tf.where(tf.not_equal(tf.cast(Y, tf.float32), 0.)) target = tf.SparseTensor(indices=indices, values=tf.gather_nd(Y, indices) - 1, shape=tf.cast(tf.shape(Y), tf.int64)) loss = tf.nn.ctc_loss(logit, target, sequence_len, time_major=False) # optimizer lr = tf.Variable(0.001, dtype=tf.float32, trainable=False) optimizer = MaxPropOptimizer(learning_rate=lr, beta2=0.99) var_list = [t for t in tf.trainable_variables()] gradient = optimizer.compute_gradients(loss, var_list=var_list) optimizer_op = optimizer.apply_gradients(gradient) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(tf.global_variables()) for epoch in range(16): sess.run(tf.assign(lr, 0.001 * (0.97 ** epoch))) global pointer pointer = 0 for batch in range(n_batch): batches_wavs, batches_labels = get_next_batches(batch_size) train_loss, _ = sess.run([loss, optimizer_op], feed_dict={X: batches_wavs, Y: batches_labels}) print(epoch, batch, train_loss) if epoch % 5 == 0: saver.save(sess, 'speech.module', global_step=epoch)
def virtual_adversarial_loss_bidir(logits, embedded, inputs, logits_from_embedding_fn): """Virtual adversarial loss for bidirectional models.""" logits = tf.stop_gradient(logits) f_inputs, _ = inputs weights = f_inputs.eos_weights if FLAGS.single_label: indices = tf.stack([tf.range(FLAGS.batch_size), f_inputs.length - 1], 1) weights = tf.expand_dims(tf.gather_nd(f_inputs.eos_weights, indices), 1) assert weights is not None perturbs = [ _mask_by_length(tf.random_normal(shape=tf.shape(emb)), f_inputs.length) for emb in embedded ] for _ in xrange(FLAGS.num_power_iteration): perturbs = [ _scale_l2(d, FLAGS.small_constant_for_finite_diff) for d in perturbs ] d_logits = logits_from_embedding_fn( [emb + d for (emb, d) in zip(embedded, perturbs)]) kl = _kl_divergence_with_logits(logits, d_logits, weights) perturbs = tf.gradients( kl, perturbs, aggregation_method=tf.AggregationMethod.EXPERIMENTAL_ACCUMULATE_N) perturbs = [tf.stop_gradient(d) for d in perturbs] perturbs = [_scale_l2(d, FLAGS.perturb_norm_length) for d in perturbs] vadv_logits = logits_from_embedding_fn( [emb + d for (emb, d) in zip(embedded, perturbs)]) return _kl_divergence_with_logits(logits, vadv_logits, weights)
def testUnknownIndices(self): params = tf.constant([[0, 1, 2]]) indices = tf.placeholder(tf.int32) gather_nd_t = tf.gather_nd(params, indices) shape = gather_nd_t.get_shape() self.assertEqual(shape.ndims, None) self.assertEqual(shape[0].value, None)
def _gather_beams(nested, beam_indices, batch_size, new_beam_size): """Gather beams from nested structure of tensors. Each tensor in nested represents a batch of beams, where beam refers to a single search state (beam search involves searching through multiple states in parallel). This function is used to gather the top beams, specified by beam_indices, from the nested tensors. Args: nested: Nested structure (tensor, list, tuple or dict) containing tensors with shape [batch_size, beam_size, ...]. beam_indices: int32 tensor with shape [batch_size, new_beam_size]. Each value in beam_indices must be between [0, beam_size), and are not necessarily unique. batch_size: int size of batch new_beam_size: int number of beams to be pulled from the nested tensors. Returns: Nested structure containing tensors with shape [batch_size, new_beam_size, ...] """ # Computes the i'th coodinate that contains the batch index for gather_nd. # Batch pos is a tensor like [[0,0,0,0,],[1,1,1,1],..]. batch_pos = tf.range(batch_size * new_beam_size) // new_beam_size batch_pos = tf.reshape(batch_pos, [batch_size, new_beam_size]) # Create coordinates to be passed to tf.gather_nd. Stacking creates a tensor # with shape [batch_size, beam_size, 2], where the last dimension contains # the (i, j) gathering coordinates. coordinates = tf.stack([batch_pos, beam_indices], axis=2) return nest.map_structure( lambda state: tf.gather_nd(state, coordinates), nested)
def create_model(input_shape, num_actions, model_name, create_network_fn, learning_rate): # noqa: D103 """Create the Q-network model.""" with tf.name_scope(model_name): input_frames = tf.placeholder(tf.float32, [None, input_shape], name ='input_frames') q_network, network_parameters = create_network_fn( input_frames, input_shape, num_actions) mean_max_Q =tf.reduce_mean( tf.reduce_max(q_network, axis=[1]), name='mean_max_Q') Q_vector_indexes = tf.placeholder(tf.int32, [None, 2], name ='Q_vector_indexes') gathered_outputs = tf.gather_nd(q_network, Q_vector_indexes, name='gathered_outputs') y_ph = tf.placeholder(tf.float32, name='y_ph') loss = mean_huber_loss(y_ph, gathered_outputs) train_step = tf.train.RMSPropOptimizer(learning_rate, decay=RMSP_DECAY, momentum=RMSP_MOMENTUM, epsilon=RMSP_EPSILON).minimize(loss) model = { 'q_network' : q_network, 'input_frames' : input_frames, 'Q_vector_indexes' : Q_vector_indexes, 'y_ph' : y_ph, 'train_step': train_step, 'mean_max_Q' : mean_max_Q, } return model, network_parameters
def batch_gather(reference, indices): """ C+P From Keras pull request https://github.com/keras-team/keras/pull/6377/files Batchwise gathering of row indices. The numpy equivalent is `reference[np.arange(batch_size), indices]`, where `batch_size` is the first dimension of the reference tensor. # Arguments reference: A tensor with ndim >= 2 of shape. (batch_size, dim1, dim2, ..., dimN) indices: A 1d integer tensor of shape (batch_size) satisfying 0 <= i < dim2 for each element i. # Returns The selected tensor with shape (batch_size, dim2, ..., dimN). # Examples 1. If reference is `[[3, 5, 7], [11, 13, 17]]` and indices is `[2, 1]` then the result is `[7, 13]`. 2. If reference is ``` [[[2, 3], [4, 5], [6, 7]], [[10, 11], [12, 13], [16, 17]]] ``` and indices is `[2, 1]` then the result is `[[6, 7], [12, 13]]`. """ batch_size = K.shape(reference)[0] indices = tf.stack([tf.range(batch_size), indices], axis=1) return tf.gather_nd(reference, indices)
def rnn_discriminator(x, x_l, cell_type, n_hidden, num_layers, in_dp, out_dp, batch_size, reuse=False): with tf.variable_scope('RNN_DIS', reuse=reuse) as rnn_dis_scope: # encode sketch temp_cells = [] for idx in range(num_layers): if cell_type == "lstm": temp_cell = tf.contrib.rnn.BasicLSTMCell(n_hidden) elif cell_type == "gru": temp_cell = tf.nn.rnn_cell.GRUCell(n_hidden) elif cell_type == "lstm-layerNorm": temp_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(n_hidden) else: temp_cell = tf.nn.rnn_cell.RNNCell(n_hidden) temp_cells.append(temp_cell) rnn_cell = tf.contrib.rnn.MultiRNNCell(temp_cells) if out_dp != 1.0: rnn_cell = tf.contrib.rnn.DropoutWrapper(rnn_cell, output_keep_prob=out_dp) init_state = rnn_cell.zero_state(batch_size=batch_size, dtype=tf.float32) outputs, stateFinal = tf.nn.dynamic_rnn(rnn_cell, x, sequence_length=x_l, initial_state=init_state, dtype=tf.float32, scope=rnn_dis_scope) batch_range = tf.range(batch_size) indices = tf.stack([batch_range, x_l-1], axis=1) last_output = tf.gather_nd(outputs, indices) # classifier logits = slim.fully_connected(last_output, 1, activation_fn=None, scope='clf') preds = tf.nn.softmax(logits) return preds, logits
def get_pixel_value(img, x, y): """ Utility function to get pixel value for coordinate vectors x and y from a 4D tensor image. Input ----- - img: tensor of shape (B, H, W, C) - x: flattened tensor of shape (B*H*W, ) - y: flattened tensor of shape (B*H*W, ) Returns ------- - output: tensor of shape (B, H, W, C) """ shape = tf.shape(x) batch_size = shape[0] height = shape[1] width = shape[2] batch_idx = tf.range(0, batch_size) batch_idx = tf.reshape(batch_idx, (batch_size, 1, 1)) b = tf.tile(batch_idx, (1, height, width)) indices = tf.stack([b, y, x], 3) return tf.gather_nd(img, indices)
def compute_saliency_maps(X, y, model): """ Compute a class saliency map using the model for images X and labels y. Input: - X: Input images, numpy array of shape (N, H, W, 3) - y: Labels for X, numpy of shape (N,) - model: A SqueezeNet model that will be used to compute the saliency map. Returns: - saliency: A numpy array of shape (N, H, W) giving the saliency maps for the input images. """ saliency = None # Compute the score of the correct class for each example. # This gives a Tensor with shape [N], the number of examples. # # Note: this is equivalent to scores[np.arange(N), y] we used in NumPy # for computing vectorized losses. correct_scores = tf.gather_nd(model.classifier, tf.stack((tf.range(X.shape[0]), model.labels), axis=1)) losses = tf.square(1 - correct_scores) #losses = tf.nn.softmax_cross_entropy_with_logits(labels=tf.one_hot(model.labels, model.classifier.shape[1]), logits=model.classifier) grad_img = tf.gradients(losses,model.image) grad_img_val = sess.run(grad_img,feed_dict={model.image:X,model.labels:y})[0] saliency = np.sum(np.maximum(grad_img_val,0),axis=3) return saliency
def _build_net(self): # ------------------ all inputs ------------------------ self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input State self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input Next State self.r = tf.placeholder(tf.float32, [None, ], name='r') # input Reward self.a = tf.placeholder(tf.int32, [None, ], name='a') # input Action w_initializer, b_initializer = tf.random_normal_initializer(0., 0.3), tf.constant_initializer(0.1) # ------------------ build evaluate_net ------------------ with tf.variable_scope('eval_net'): e1 = tf.layers.dense(self.s, 20, tf.nn.relu, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='e1') self.q_eval = tf.layers.dense(e1, self.n_actions, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='q') # ------------------ build target_net ------------------ with tf.variable_scope('target_net'): t1 = tf.layers.dense(self.s_, 20, tf.nn.relu, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='t1') self.q_next = tf.layers.dense(t1, self.n_actions, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='t2') with tf.variable_scope('q_target'): q_target = self.r + self.gamma * tf.reduce_max(self.q_next, axis=1, name='Qmax_s_') # shape=(None, ) self.q_target = tf.stop_gradient(q_target) with tf.variable_scope('q_eval'): a_indices = tf.stack([tf.range(tf.shape(self.a)[0], dtype=tf.int32), self.a], axis=1) self.q_eval_wrt_a = tf.gather_nd(params=self.q_eval, indices=a_indices) # shape=(None, ) with tf.variable_scope('loss'): self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval_wrt_a, name='TD_error')) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
def killRegions(anchors, image_attr, axis=-1): """ Prune the anchors so that only those entirely within the image remain This function is the RPN-training analog of clipRegions, just more murderous Output: The anchors that survive the slaughter, along with their indices """ with tf.device("/cpu:0"): # Assumes input of shape (numBaseAnchors, feature_h, feature_w, 4) # Or, was previously as above but then got flattened to (-1,4) anchors = tf.reshape(anchors, [-1, 4], name="flattened_anchors") x1, y1, x2, y2 = tf.unstack(anchors, num=4, axis=axis) zero = tf.constant([0.]) max_x = [tf.subtract(image_attr[1] * image_attr[2], tf.constant([1.]), name="murder_img_w")] max_y = [tf.subtract(image_attr[0] * image_attr[2], tf.constant([1.]), name="murder_img_h")] x1_valid = x1 >= zero x2_valid = x2 <= max_x y1_valid = y1 >= zero y2_valid = y2 <= max_y anchor_valid = x1_valid and x2_valid and y1_valid and y2_valid valid_indices = tf.where(anchor_valid, name="surviving_indices") return tf.gather_nd(anchors, valid_indices, name="surviving_anchors"), valid_indices
def compute_center_coords(self, y_true, y_pred): batch_size = tf.shape(y_pred)[0] h = tf.shape(y_pred)[1] w = tf.shape(y_pred)[2] n_chans = tf.shape(y_pred)[3] n_dims = 5 # weighted center of mass x = tf.cast(tf.tile(tf.reshape(self.xs, [1, h, w]), [batch_size, 1, 1]), tf.float32) y = tf.cast(tf.tile(tf.reshape(self.ys, [1, h, w]), [batch_size, 1, 1]), tf.float32) eps = 1e-8 # grayscale pred_gray = tf.reduce_mean(y_pred, axis=-1) # should be batch_size x h x w # normalize pred_gray = pred_gray - tf.reduce_min(pred_gray, axis=[1, 2], keepdims=True) pred_gray = pred_gray / (eps + tf.reduce_max(pred_gray, axis=[1, 2], keepdims=True)) pred_gray = tf.clip_by_value(pred_gray, 0., 1.) # make each of these (batch_size, 1) weighted_x = tf.round(tf.expand_dims( tf.reduce_sum(x * pred_gray, axis=[1, 2]) / (eps + tf.reduce_sum(pred_gray, axis=[1, 2])), axis=-1)) weighted_y = tf.round(tf.expand_dims( tf.reduce_sum(y * pred_gray, axis=[1, 2]) / (eps + tf.reduce_sum(pred_gray, axis=[1, 2])), axis=-1)) batch_indices = tf.reshape(tf.linspace(0., tf.cast(batch_size, tf.float32) - 1., batch_size), [batch_size, 1]) indices = tf.cast(tf.concat([batch_indices, weighted_y, weighted_x], axis=-1), tf.int32) #center_rgb = transform_network_utils.interpolate([y_true, weighted_x, weighted_y], constant_vals=1.) center_rgb = tf.gather_nd(y_true, indices) center_rgb = tf.reshape(center_rgb, [batch_size, n_chans]) center_point_xyrgb = tf.concat([ weighted_x, weighted_y, center_rgb ], axis=-1) return pred_gray, center_point_xyrgb
def _lookup_probabilities(predictions, probabilities): predictions = tf.cast(predictions, tf.int32) rang = tf.range(tf.shape(predictions)[0]) indices = tf.concat( [tf.expand_dims(rang, 1), tf.expand_dims(predictions, 1)], 1) prediction_probabilities = tf.gather_nd(probabilities, indices) return prediction_probabilities
def build_net(self): self.s = tf.placeholder(tf.float32, [None, self.n_features]) self.s_ = tf.placeholder(tf.float32, [None, self.n_features]) self.r = tf.placeholder(tf.float32, [None, ]) self.a = tf.placeholder(tf.int32, [None, ]) w_initializer = tf.random_normal_initializer(0., 0.3) b_initializer = tf.constant_initializer(0.1) # q_eval网络架构,输入状态属性,输出4种动作 with tf.variable_scope('eval_net'): eval_layer = tf.layers.dense(self.s, 20, tf.nn.relu, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='eval_layer') self.q_eval = tf.layers.dense(eval_layer, self.n_actions, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='output_layer1') with tf.variable_scope('target_net'): target_layer = tf.layers.dense(self.s_, 20, tf.nn.relu, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='target_layer') self.q_next = tf.layers.dense(target_layer, self.n_actions, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='output_layer2') with tf.variable_scope('q_target'): # 计算期望价值,并使用stop_gradient函数将其不计算梯度,也就是当做常数对待 self.q_target = tf.stop_gradient(self.r + self.gamma * tf.reduce_max(self.q_next, axis=1)) with tf.variable_scope('q_eval'): # 将a的值对应起来, a_indices = tf.stack([tf.range(tf.shape(self.a)[0]), self.a], axis=1) self.q_eval_a = tf.gather_nd(params=self.q_eval, indices=a_indices) with tf.variable_scope('loss'): self.loss = tf.reduce_mean(tf.squared_difference(self.q_target, self.q_eval_a)) with tf.variable_scope('train'): self.train_op = tf.train.RMSPropOptimizer(self.lr).minimize(self.loss)
def tf_gather_object_pc(point_cloud, mask, npoints=512): ''' Gather object point clouds according to predicted masks. Input: point_cloud: TF tensor in shape (B,N,C) mask: TF tensor in shape (B,N) of 0 (not pick) or 1 (pick) npoints: int scalar, maximum number of points to keep (default: 512) Output: object_pc: TF tensor in shape (B,npoint,C) indices: TF int tensor in shape (B,npoint,2) ''' def mask_to_indices(mask): indices = np.zeros((mask.shape[0], npoints, 2), dtype=np.int32) for i in range(mask.shape[0]): pos_indices = np.where(mask[i,:]>0.5)[0] # skip cases when pos_indices is empty if len(pos_indices) > 0: if len(pos_indices) > npoints: choice = np.random.choice(len(pos_indices), npoints, replace=False) else: choice = np.random.choice(len(pos_indices), npoints-len(pos_indices), replace=True) choice = np.concatenate((np.arange(len(pos_indices)), choice)) np.random.shuffle(choice) indices[i,:,1] = pos_indices[choice] indices[i,:,0] = i return indices indices = tf.py_func(mask_to_indices, [mask], tf.int32) object_pc = tf.gather_nd(point_cloud, indices) return object_pc, indices
def accuracy_instance(predictions, targets, n=[1, 2, 3, 4, 5, 10], nb_classes=5, nb_samples_per_class=10, batch_size=1): targets = tf.cast(targets, predictions.dtype) accuracy = tf.constant(value=0, shape=(batch_size, nb_samples_per_class), dtype=tf.float32) indices = tf.constant(value=0, shape=(batch_size, nb_classes+1), dtype=tf.float32) def step_((accuracy, indices), (p, t)): """with tf.variable_scope("Metric_step_var", reuse=True): accuracy = tf.get_variable(name="accuracy", shape=(batch_size, nb_samples_per_class), initializer=tf.constant_initializer(0), dtype=tf.float32) indices = tf.get_variable(name="indices", shape=(batch_size, nb_classes + 1), initializer=tf.constant_initializer(0), dtype=tf.float32)""" p = tf.cast(p, tf.int32) t = tf.cast(t, tf.int32) ##Accuracy Update batch_range = tf.cast(tf.range(0, batch_size), dtype=tf.int32) gather = tf.cast(tf.gather_nd(indices,tf.stack([tf.range(0,p.get_shape().as_list()[0]), t], axis=1)), tf.int32) index = tf.cast(tf.stack([batch_range, gather], axis=1), dtype=tf.int64) val = tf.cast(tf.equal(p, t), tf.float32) delta = tf.SparseTensor(indices=index, values=val, dense_shape=tf.cast(accuracy.get_shape().as_list(), tf.int64)) accuracy = accuracy + tf.sparse_tensor_to_dense(delta) ##Index Update index = tf.cast(tf.stack([batch_range, t], axis=1), dtype=tf.int64) val = tf.constant(1.0, shape=[batch_size]) delta = tf.SparseTensor(indices=index, values=val, dense_shape=tf.cast(indices.get_shape().as_list(), dtype=tf.int64)) indices = indices + tf.sparse_tensor_to_dense(delta) return [accuracy, indices]
def build_graph(self, *inputs): is_training = get_current_tower_context().is_training if cfg.MODE_MASK: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels, gt_masks = inputs else: image, anchor_labels, anchor_boxes, gt_boxes, gt_labels = inputs image = self.preprocess(image) # 1CHW featuremap = resnet_c4_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK[:3]) rpn_label_logits, rpn_box_logits = rpn_head('rpn', featuremap, cfg.RPN.HEAD_DIM, cfg.RPN.NUM_ANCHOR) anchors = RPNAnchors(get_all_anchors(), anchor_labels, anchor_boxes) anchors = anchors.narrow_to(featuremap) image_shape2d = tf.shape(image)[2:] # h,w pred_boxes_decoded = anchors.decode_logits(rpn_box_logits) # fHxfWxNAx4, floatbox proposal_boxes, proposal_scores = generate_rpn_proposals( tf.reshape(pred_boxes_decoded, [-1, 4]), tf.reshape(rpn_label_logits, [-1]), image_shape2d, cfg.RPN.TRAIN_PRE_NMS_TOPK if is_training else cfg.RPN.TEST_PRE_NMS_TOPK, cfg.RPN.TRAIN_POST_NMS_TOPK if is_training else cfg.RPN.TEST_POST_NMS_TOPK) if is_training: # sample proposal boxes in training rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. # Use all proposal boxes in inference rcnn_boxes = proposal_boxes boxes_on_featuremap = rcnn_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE) roi_resized = roi_align(featuremap, boxes_on_featuremap, 14) feature_fastrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) # nxcx7x7 # Keep C5 feature to be shared with mask branch feature_gap = GlobalAvgPooling('gap', feature_fastrcnn, data_format='channels_first') fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs('fastrcnn', feature_gap, cfg.DATA.NUM_CLASS) if is_training: # rpn loss rpn_label_loss, rpn_box_loss = rpn_losses( anchors.gt_labels, anchors.encoded_gt_boxes(), rpn_label_logits, rpn_box_logits) # fastrcnn loss matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if cfg.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) # In training, mask branch shares the same C5 feature. fg_feature = tf.gather(feature_fastrcnn, fg_inds_wrt_sample) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY, num_convs=0) # #fg x #cat x 14x14 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 14, pad_border=False) # nfg x 1x14x14 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') total_cost = tf.add_n([ rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost * (1. / cfg.TRAIN.NUM_GPUS) else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if cfg.MODE_MASK: roi_resized = roi_align(featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE), 14) feature_maskrcnn = resnet_conv5(roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCK[-1]) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 0) # #result x #cat x 14x14 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx14x14 tf.sigmoid(final_mask_logits, name='final_masks')
def build_graph(self, *inputs): num_fpn_level = len(cfg.FPN.ANCHOR_STRIDES) assert len(cfg.RPN.ANCHOR_SIZES) == num_fpn_level is_training = get_current_tower_context().is_training image = inputs[0] input_anchors = inputs[1: 1 + 2 * num_fpn_level] multilevel_anchors = [RPNAnchors(*args) for args in zip(get_all_anchors_fpn(), input_anchors[0::2], input_anchors[1::2])] gt_boxes, gt_labels = inputs[11], inputs[12] if cfg.MODE_MASK: gt_masks = inputs[-1] image = self.preprocess(image) # 1CHW image_shape2d = tf.shape(image)[2:] # h,w c2345 = resnet_fpn_backbone(image, cfg.BACKBONE.RESNET_NUM_BLOCK) p23456 = fpn_model('fpn', c2345) self.slice_feature_and_anchors(image_shape2d, p23456, multilevel_anchors) # Multi-Level RPN Proposals rpn_outputs = [rpn_head('rpn', pi, cfg.FPN.NUM_CHANNEL, len(cfg.RPN.ANCHOR_RATIOS)) for pi in p23456] multilevel_label_logits = [k[0] for k in rpn_outputs] multilevel_box_logits = [k[1] for k in rpn_outputs] proposal_boxes, proposal_scores = generate_fpn_proposals( multilevel_anchors, multilevel_label_logits, multilevel_box_logits, image_shape2d) if is_training: rcnn_boxes, rcnn_labels, fg_inds_wrt_gt = sample_fast_rcnn_targets( proposal_boxes, gt_boxes, gt_labels) else: # The boxes to be used to crop RoIs. rcnn_boxes = proposal_boxes roi_feature_fastrcnn = multilevel_roi_align(p23456[:4], rcnn_boxes, 7) fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC) fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_head_func( 'fastrcnn', roi_feature_fastrcnn, cfg.DATA.NUM_CLASS) if is_training: # rpn loss: rpn_label_loss, rpn_box_loss = multilevel_rpn_losses( multilevel_anchors, multilevel_label_logits, multilevel_box_logits) # fastrcnn loss: matched_gt_boxes = tf.gather(gt_boxes, fg_inds_wrt_gt) fg_inds_wrt_sample = tf.reshape(tf.where(rcnn_labels > 0), [-1]) # fg inds w.r.t all samples fg_sampled_boxes = tf.gather(rcnn_boxes, fg_inds_wrt_sample) fg_fastrcnn_box_logits = tf.gather(fastrcnn_box_logits, fg_inds_wrt_sample) fastrcnn_label_loss, fastrcnn_box_loss = self.fastrcnn_training( image, rcnn_labels, fg_sampled_boxes, matched_gt_boxes, fastrcnn_label_logits, fg_fastrcnn_box_logits) if cfg.MODE_MASK: # maskrcnn loss fg_labels = tf.gather(rcnn_labels, fg_inds_wrt_sample) roi_feature_maskrcnn = multilevel_roi_align( p23456[:4], fg_sampled_boxes, 14, name_scope='multilevel_roi_align_mask') mask_logits = maskrcnn_upXconv_head( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 4) # #fg x #cat x 28 x 28 target_masks_for_fg = crop_and_resize( tf.expand_dims(gt_masks, 1), fg_sampled_boxes, fg_inds_wrt_gt, 28, pad_border=False) # fg x 1x28x28 target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1, 'sampled_fg_mask_targets') mrcnn_loss = maskrcnn_loss(mask_logits, fg_labels, target_masks_for_fg) else: mrcnn_loss = 0.0 wd_cost = regularize_cost( '(?:group1|group2|group3|rpn|fpn|fastrcnn|maskrcnn)/.*W', l2_regularizer(cfg.TRAIN.WEIGHT_DECAY), name='wd_cost') total_cost = tf.add_n([rpn_label_loss, rpn_box_loss, fastrcnn_label_loss, fastrcnn_box_loss, mrcnn_loss, wd_cost], 'total_cost') add_moving_summary(total_cost, wd_cost) return total_cost * (1. / cfg.TRAIN.NUM_GPUS) else: final_boxes, final_labels = self.fastrcnn_inference( image_shape2d, rcnn_boxes, fastrcnn_label_logits, fastrcnn_box_logits) if cfg.MODE_MASK: # Cascade inference needs roi transform with refined boxes. roi_feature_maskrcnn = multilevel_roi_align(p23456[:4], final_boxes, 14) mask_logits = maskrcnn_upXconv_head( 'maskrcnn', roi_feature_maskrcnn, cfg.DATA.NUM_CATEGORY, 4) # #fg x #cat x 28 x 28 indices = tf.stack([tf.range(tf.size(final_labels)), tf.to_int32(final_labels) - 1], axis=1) final_mask_logits = tf.gather_nd(mask_logits, indices) # #resultx28x28 tf.sigmoid(final_mask_logits, name='final_masks')
def deconv_featuremap_with_data(layer_id, featuremap_id, tf_selected_dataset, guided_backprop=False): global weights, biases pool_switches = {} activation_masks = {} # used for guided_backprop outputs_fwd = [] logger.info('Current set of operations: %s' % iconv_ops) outputs_fwd.append(tf_selected_dataset) logger.debug('Received data for X(%s)...' % outputs_fwd[-1].get_shape().as_list()) logger.info('Performing the forward pass ...') output_fwd_for_layer = None # need to calculate the output according to the layers we have for op in iconv_ops: if 'conv' in op: logger.debug('\tConvolving (%s) With Weights:%s Stride:%s' % (op, hyparams[op]['weights'], hyparams[op]['stride'])) logger.debug('\t\tX before convolution:%s' % (outputs_fwd[-1].get_shape().as_list())) logger.debug('\t\tWeights: %s', weights[op].get_shape().as_list()) outputs_fwd.append( tf.nn.conv2d(outputs_fwd[-1], weights[op], hyparams[op]['stride'], padding=hyparams[op]['padding'])) logger.debug('\t\t Relu with x(%s) and b(%s)' % (outputs_fwd[-1].get_shape().as_list(), biases[op].get_shape().as_list())) # should happend before RELU application if guided_backprop: activation_masks[op] = tf.greater( outputs_fwd[-1], tf.constant(0, dtype=tf.float32)) assert activation_masks[op].get_shape().as_list( ) == outputs_fwd[-1].get_shape().as_list() outputs_fwd[-1] = tf.nn.relu(outputs_fwd[-1] + biases[op]) logger.debug('\t\tX after %s:%s' % (op, outputs_fwd[-1].get_shape().as_list())) if op == layer_id: output_fwd_for_layer = outputs_fwd[-1] if 'pool' in op: logger.debug('\tPooling (%s) with Kernel:%s Stride:%s' % (op, hyparams[op]['kernel'], hyparams[op]['stride'])) pool_out, switch = tf.nn.max_pool_with_argmax( outputs_fwd[-1], ksize=hyparams[op]['kernel'], strides=hyparams[op]['stride'], padding=hyparams[op]['padding']) outputs_fwd.append(pool_out) pool_switches[op] = switch logger.debug('\t\tX after %s:%s' % (op, outputs_fwd[-1].get_shape().as_list())) if 'fulcon' in op: break shape = outputs_fwd[-1].get_shape().as_list() rows = shape[0] print('Unwrapping last convolution layer %s to %s hidden layer' % (shape, (rows, hyparams['fulcon_out']['in']))) reshaped_output = tf.reshape(outputs_fwd[-1], [rows, hyparams['fulcon_out']['in']]) outputs_fwd.append( tf.matmul(reshaped_output, weights['fulcon_out']) + biases['fulcon_out']) logger.info('Performing the backward pass ...\n') logger.debug('\tInput Size (Non-Zeroed): %s', str(outputs_fwd[-1].get_shape().as_list())) # b h w d parameters of the required layer # b - batch size, h - height, w - width, d - number of filters b, h, w, d = output_fwd_for_layer.get_shape().as_list() # outputs[-1] will have the required activation # will be of size b x 1 x 1 # we create a tensor from the activations of the layer which only has non-zeros # for the selected feature map (layer_activations_2) layer_activations = tf.transpose(output_fwd_for_layer, [3, 0, 1, 2]) layer_indices = tf.constant([[featuremap_id]]) layer_updates = tf.expand_dims(layer_activations[featuremap_id, :, :, :], 0) layer_activations_2 = tf.scatter_nd( layer_indices, layer_updates, tf.constant(layer_activations.get_shape().as_list())) layer_activations_2 = tf.transpose(layer_activations_2, [1, 2, 3, 0]) assert output_fwd_for_layer.get_shape().as_list( ) == layer_activations_2.get_shape().as_list() # single out only the maximally activated neuron and set the zeros argmax_indices = tf.argmax(tf.reshape(layer_activations_2, [b, h * w * d]), axis=1) batch_range = tf.range(b, dtype=tf.int32) nonzero_indices = tf.stack( [batch_range, tf.to_int32(argmax_indices)], axis=1) updates = tf.gather_nd(tf.reshape(layer_activations_2, [b, h * w * d]), nonzero_indices) # OBSOLETE At the Moment # this will be of size layer_id (some type of b x h x w x d) dOut_over_dh = tf.gradients(outputs_fwd[-1], output_fwd_for_layer)[0] deriv_updates = tf.gather_nd(tf.reshape(dOut_over_dh, [b, h * w * d]), nonzero_indices) logger.debug('\tNon-zero indices shape: %s', nonzero_indices.get_shape().as_list()) logger.debug('\tNon-zero updates shape: %s', updates.get_shape().as_list()) logger.debug('\tdOut/dh shape: %s', dOut_over_dh.get_shape().as_list()) # OBSOLETE # Creating the new gradient tensor (of size: b x w x h x d) for deconv # with only the gradient for highest activation of given feature map ID non-zero and rest set to zero zeroed_derivatives = tf.scatter_nd( nonzero_indices, updates, tf.constant([b, h * w * d], dtype=tf.int32)) zeroed_derivatives = tf.reshape(zeroed_derivatives, [b, h, w, d]) outputs_bckwd = [ zeroed_derivatives ] # this will be the output of the previous layer to layer_id prev_op_index = iconv_ops.index(layer_id) logger.debug('Input Size (Zeroed): %s', str(outputs_bckwd[-1].get_shape().as_list())) for op in reversed(iconv_ops[:prev_op_index + 1]): if 'conv' in op: # Deconvolution logger.debug('\tDeConvolving (%s) With Weights:%s Stride:%s' % (op, weights[op].get_shape().as_list(), hyparams[op]['stride'])) logger.debug('\t\tX before deconvolution:%s' % (outputs_bckwd[-1].get_shape().as_list())) logger.debug('\t\tWeights: %s', weights[op].get_shape().as_list()) output_shape = outputs_bckwd[-1].get_shape().as_list() output_shape[1] *= hyparams[op]['stride'][1] output_shape[2] *= hyparams[op]['stride'][2] output_shape[3] = hyparams[op]['weights'][2] logger.debug('\t\tExpected output shape: %s', output_shape) outputs_bckwd.append( tf.nn.conv2d_transpose(outputs_bckwd[-1], filter=weights[op], strides=hyparams[op]['stride'], padding=hyparams[op]['padding'], output_shape=tf.constant(output_shape))) logger.debug('\t\tX after %s:%s' % (op, outputs_bckwd[-1].get_shape().as_list())) if 'pool' in op: # find previous conv_op previous_conv_op = None for before_op in reversed(iconv_ops[:iconv_ops.index(op) + 1]): if 'conv' in before_op: previous_conv_op = before_op break logger.debug('\tDetected previous conv op %s', previous_conv_op) # Unpooling operation and Rectification logger.debug('\tUnPooling (%s) with Kernel:%s Stride:%s' % (op, hyparams[op]['kernel'], hyparams[op]['stride'])) logger.debug('\t\tInput shape: %s', outputs_bckwd[-1].get_shape().as_list()) output_shape = outputs_bckwd[-1].get_shape().as_list() output_shape[1] *= hyparams[op]['stride'][1] output_shape[2] *= hyparams[op]['stride'][2] logger.debug('\t\tExpected output shape: %s', output_shape) # Unpooling # Switch variable returns an array of size b x h x w x d. But only provide flattened indices # Meaning that if you have an output of size 4x4 it will flatten it to a 16 element long array # we're goin go make a batch_range which is like [0,0,...,0,1,1,...,1,...] # so each unique number will have (h/stride * w/stride * d) elements # first it will be of shape b x h/stride x w/stride x d # but then we reshape it to b x (h/stride * w/stride * d) tf_switches = pool_switches[op] tf_batch_range = tf.reshape(tf.range(b, dtype=tf.int32), [b, 1, 1, 1]) tf_ones_mask = tf.ones_like(tf_switches, dtype=tf.int32) tf_multi_batch_range = tf_ones_mask * tf_batch_range # here we have indices that looks like b*(h/stride)*(w/stride) x 2 tf_indices = tf.stack([ tf.reshape(tf_multi_batch_range, [-1]), tf.reshape(tf.to_int32(tf_switches), [-1]) ], axis=1) updates = tf.reshape(outputs_bckwd[-1], [-1]) ref = tf.Variable(tf.zeros( [b, output_shape[1] * output_shape[2] * output_shape[3]], dtype=tf.float32), dtype=tf.float32, name='ref_' + op, trainable=False) session.run(tf.variables_initializer([ref])) updated_unpool = tf.scatter_nd( tf.to_int32(tf_indices), updates, tf.constant( [b, output_shape[1] * output_shape[2] * output_shape[3]]), name='updated_unpool_' + op) outputs_bckwd.append( tf.reshape( updated_unpool, [b, output_shape[1], output_shape[2], output_shape[3]])) # should happen before RELU if guided_backprop and previous_conv_op is not None: logger.info('Output-bckwd: %s', outputs_bckwd[-1].get_shape().as_list()) logger.info( 'Activation mask %s', activation_masks[previous_conv_op].get_shape().as_list()) assert outputs_bckwd[-1].get_shape().as_list( ) == activation_masks[previous_conv_op].get_shape().as_list() outputs_bckwd[-1] = outputs_bckwd[-1] * tf.to_float( activation_masks[previous_conv_op]) outputs_bckwd[-1] = tf.nn.relu(outputs_bckwd[-1]) logger.debug('\t\tX after %s:%s' % (op, outputs_bckwd[-1].get_shape().as_list())) return outputs_fwd, outputs_bckwd
[[5, 5, 5], [9, 9, 9], [0, 0, 0], [0, 0, 0]]]) def f(p1, p2, p3, p4, ph): print("p1: {}".format(p1)) print("p2: {}".format(p2)) print("received p3:{}".format(p3)) return "{}+{}".format(p1, p2), p2 with tf.Session() as sess: # l = sess.run(le) # print(l) # print(ts) # print([1, 2] + [2, 3] + [4]) st = tf.gather_nd(t, [[[0, 2], [0, 3]], [[1, 1], [1, 2]], [[2, 0], [2, 1]]]) print(st.get_shape()) print(st.eval()) print(t.get_shape()) px1 = tf.placeholder(tf.string, [None, 3, 1]) x1 = np.array([[['a'], ['b'], ['c']], [['1'], ['2'], ['3']]]) x2 = np.array([5, 6]) ph = tf.placeholder(tf.float32) # input = tf.Variable([[1.0, 2.0], [3.0, 4.0]]) elems = (np.array([1, 2, 3]), np.array([-1, 1, -1])) r = tf.map_fn(lambda x: f(x[0], x[1], '3', '4', ph), (px1, x2)) print(sess.run(r, feed_dict={px1: x1, ph: 0.5})) print("---------------------------") a = tf.constant([[1, 2, 3], [4, 5, 6]]) b = tf.constant([True, False], dtype=tf.bool)
def get_final(sequence, sequence_length, time_major=True): """Get the final item in a batch of sequences.""" final_index = _get_final_index(sequence_length, time_major) return tf.gather_nd(sequence, final_index)
gen_dat = generator(latent) inf_lat_lab = inference(x_lab) inf_lat_unl = inference(x_unl) output_before_softmax_lab, xx_lab = discriminator(x_lab,inf_lat_lab) output_before_softmax_unl, xx_unl = discriminator(x_unl,inf_lat_unl) output_before_softmax_gen, xx_gen = discriminator(gen_dat,latent) def tf_log_sum_exp(xs): maxes = tf.reduce_max(xs, keep_dims=True) xs -= maxes return tf.squeeze(maxes, [-1]) + tf.log(tf.reduce_sum(tf.exp(xs), -1)) l_lab = tf.gather_nd(output_before_softmax_lab, tf.concat([ tf.expand_dims(tf.range(batch_size), 1), tf.expand_dims(labels,1) ],1)) l_unl = tf_log_sum_exp(output_before_softmax_unl) l_gen = tf_log_sum_exp(output_before_softmax_gen) loss_lab_node = -tf.reduce_mean(l_lab) + tf.reduce_mean( tf_log_sum_exp(output_before_softmax_lab)) loss_unl_node = -0.5*tf.reduce_mean(l_unl) + 0.5*tf.reduce_mean(tf.nn.softplus(l_unl)) + 0.5*tf.reduce_mean(tf.nn.softplus(l_gen)) train_err_node = tf.reduce_mean(tf.to_float(tf.not_equal(tf.to_int32(tf.argmax(output_before_softmax_lab, axis=1)),labels))) # test error output_before_softmax, xx_lab = discriminator(x_lab, inf_lat_lab) test_err_node = tf.reduce_mean(tf.to_float(tf.not_equal(tf.to_int32(tf.argmax(output_before_softmax, axis=1)),labels))) # training the disc net params_disc = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, "Net_Dis")
def gen_adv_mal_graph(self, x_tensor, y_tensor, trials=0): """ static graph for enhancing attack :param x_tensor: batch of input data :param y_tensor: batch of ground truths :param trials: number of trials :return: the strongest attack """ mal_indices = tf.where(y_tensor) # '1' denotes the malicious sample mal_x_tensor = tf.gather_nd(x_tensor, mal_indices) mal_y_tensor = tf.gather_nd(y_tensor, mal_indices) ben_indices = tf.where(tf.equal(y_tensor, 0)) ben_x_tensor = tf.gather_nd(x_tensor, ben_indices) ben_y_tensor = tf.gather_nd(y_tensor, ben_indices) with tf.control_dependencies([mal_x_tensor, mal_y_tensor]): self.batch_size_mal = tf.assign(self.batch_size_mal, tf.reduce_sum(y_tensor)) def filter(adv_mal_x): """ replace the perturbed case but not adversarial case of pristine data """ _1, _2, logits = self.nn(adv_mal_x, self.hidden_layers, self.output_dim, False, name=self.model_name, reuse=True) pred_y_adv = tf.argmax(logits, axis=1) incorrect_case = tf.reshape( tf.to_float(tf.logical_not(tf.equal(pred_y_adv, mal_y_tensor))), (-1, 1)) return tf.stop_gradient((adv_mal_x - mal_x_tensor) * incorrect_case + mal_x_tensor) if trials == 0: adv_mal_x = filter( tf.stop_gradient( self.inner_maximizer.graph(mal_x_tensor, mal_y_tensor))) adv_x_tensor = tf.concat([adv_mal_x, ben_x_tensor], axis=0) rtn_x = tf.concat([mal_x_tensor, ben_x_tensor], axis=0) rtn_y = tf.concat([mal_y_tensor, ben_y_tensor], axis=0) return adv_x_tensor, rtn_x, rtn_y elif trials >= 1: # random start x_shape = mal_x_tensor.get_shape().as_list() mal_x_batch_ext = tf.tile(mal_x_tensor, [trials, 1]) mal_y_batch_ext = tf.tile(mal_y_tensor, [ trials, ]) eta = tf.random_uniform([ 1, ], 0, self.hp_params.eta) init_perturbations = tf.random_uniform(tf.shape(mal_x_batch_ext), minval=-1., maxval=1., dtype=tf.float32) init_perturbations = tf.multiply( tf.sign(init_perturbations), tf.to_float(tf.abs(init_perturbations) > 1. - eta), ) init_x_batch_ext = self.inner_maximizer.project_perturbations( mal_x_batch_ext, init_perturbations) adv_x_batch_ext = tf.stop_gradient( self.inner_maximizer.graph(init_x_batch_ext, mal_y_batch_ext)) def _loss_fn(x, y): _1, _2, logits = self.nn(x, self.hidden_layers, self.output_dim, False, name=self.model_name, reuse=True) return -1 * tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=y) adv_losses = _loss_fn(adv_x_batch_ext, mal_y_batch_ext) adv_x_pool = tf.reshape(adv_x_batch_ext, [trials, -1, x_shape[1]]) adv_losses = tf.reshape(adv_losses, [trials, -1]) idx_selected = tf.stack([ tf.argmin(adv_losses, axis=0), tf.range(self.batch_size_mal, dtype=tf.int64) ], axis=1) adv_mal_x = filter(tf.gather_nd(adv_x_pool, idx_selected)) adv_x_tensor = tf.concat([adv_mal_x, ben_x_tensor], axis=0) rtn_x = tf.concat([mal_x_tensor, ben_x_tensor], axis=0) rtn_y = tf.concat([mal_y_tensor, ben_y_tensor], axis=0) return adv_x_tensor, rtn_x, rtn_y else: raise ValueError("trials shall be a non-negative integer.")
def create_Q_network(self): state = tf.placeholder(dtype=tf.float32, shape=[None] + self.s_dim, name='state') # store Q(s,a) value # q_a = tf.placeholder(dtype=tf.float32, shape=[None]+self.s_dim) # gamma_init = tf.placeholder(dtype=tf.float32, shape=[None, 1]) q_a = tf.Variable(0, dtype=tf.float32, name="q_a") # state feature extraction state_f = layers.convolution2d(state, num_outputs=16, kernel_size=3, stride=1, padding='SAME', activation_fn=tf.nn.relu) state_f = layers.convolution2d(state_f, num_outputs=16, kernel_size=3, stride=1, padding='SAME', activation_fn=tf.nn.relu) state_f = layers.batch_norm(state_f) # state_f = tf.nn.relu(state_f) # model 1 from state feature to action number of next state state_m1_h1 = layers.convolution2d(state_f, num_outputs=16, kernel_size=3, stride=1, padding='SAME', activation_fn=tf.nn.relu) state_m1_h1 = layers.convolution2d(state_m1_h1, num_outputs=16, kernel_size=3, stride=1, padding='SAME', activation_fn=tf.nn.relu) state_f = layers.batch_norm(state_f) # state_f = tf.nn.relu(state_f) state_m1_n = layers.convolution2d(state_m1_h1, num_outputs=16, kernel_size=3, stride=1, padding='SAME', activation_fn=tf.nn.relu) state_m1_n = layers.convolution2d(state_m1_n, num_outputs=self.a_dim, kernel_size=3, stride=1, padding='SAME', activation_fn=tf.nn.relu) reward_m1_n = layers.fully_connected(layers.flatten(state_m1_h1), num_outputs=16, activation_fn=tf.nn.relu) reward_m1_n = layers.fully_connected(reward_m1_n, num_outputs=self.a_dim, activation_fn=tf.nn.relu) q_a += reward_m1_n # model 2 latent model ch_h = 16 ch_latent_actions = 8 k = 5 # with tf.variable_scope("model2", reuse=reuse): # state transition functuon m2_w0 = tf.Variable(np.random.randn(3, 3, 1, ch_h) * 0.01, dtype=tf.float32) m2_b0 = tf.Variable(np.random.randn(1, 1, 1, ch_h) * 0.01, dtype=tf.float32) m2_w1 = tf.Variable(np.random.randn(3, 3, ch_h, ch_latent_actions) * 0.01, dtype=tf.float32) m2_b1 = tf.Variable(np.random.randn(1, 1, 1, ch_latent_actions) * 0.01, dtype=tf.float32) # reward function dim = self.s_dim[0] * self.s_dim[1] * ch_h reward_w0 = tf.Variable(np.random.randn(dim, ch_h) * 0.01, dtype=tf.float32) reward_b0 = tf.Variable(tf.zeros([ch_h]), dtype=tf.float32, name="reward_b") reward_w1 = tf.Variable(np.random.randn(ch_h, ch_latent_actions) * 0.01, dtype=tf.float32) reward_b1 = tf.Variable(tf.zeros([ch_latent_actions]), dtype=tf.float32, name="reward_b") # state value function value_w0 = tf.Variable(np.random.randn(dim, ch_h), dtype=tf.float32) value_b0 = tf.Variable(tf.zeros([ch_h]), dtype=tf.float32, name="value_b") value_w1 = tf.Variable(np.random.randn(ch_h, ch_latent_actions), dtype=tf.float32) value_b1 = tf.Variable(tf.zeros([ch_latent_actions]), dtype=tf.float32, name="value_b") # gamma(discount rate) function gamma_w0 = tf.Variable(np.random.randn(dim, ch_h), dtype=tf.float32) gamma_b0 = tf.Variable(tf.zeros([ch_h]), dtype=tf.float32, name="gamma_b") gamma_w1 = tf.Variable(np.random.randn(ch_h, ch_latent_actions), dtype=tf.float32) gamma_b1 = tf.Variable(tf.zeros([ch_latent_actions]), dtype=tf.float32, name="gamma_b") # lambda(discount rate) function lambda_w0 = tf.Variable(np.random.randn(dim, ch_h), dtype=tf.float32) lambda_b0 = tf.Variable(tf.zeros([ch_h]), dtype=tf.float32, name="lambda_b") lambda_w1 = tf.Variable(np.random.randn(ch_h, 1), dtype=tf.float32) lambda_b1 = tf.Variable(tf.zeros([1]), dtype=tf.float32, name="lamda_b") for i in range(self.a_dim): # state_n = state_m1_n[:,:,:,i] state_n = tf.expand_dims(state_m1_n[:, :, :, i], 3) # state_n = tf.reshape(state_n, shape=[-1, self.s_dim[0], self.s_dim[1], 1]) gamma = tf.Variable(1, dtype=tf.float32, name="gamma") for j in range(k): # state state_m2_h1 = tf.nn.relu( tf.nn.conv2d( state_n, m2_w0, strides=(1, 1, 1, 1), padding='SAME') + m2_b0) state_m2_ns = tf.nn.relu( tf.nn.conv2d(state_m2_h1, m2_w1, strides=(1, 1, 1, 1), padding='SAME') + m2_b1) state_m2_ns = layers.batch_norm(state_m2_h1) flat_state_m2_h1 = layers.flatten(state_m2_h1) flat_state_m2_ns = layers.flatten(state_m2_ns) # reward reward_n = tf.nn.relu( tf.matmul(flat_state_m2_h1, reward_w0) + reward_b0) reward_n = tf.nn.relu( tf.matmul(reward_n, reward_w1) + reward_b1) # gamma gamma_n = tf.nn.relu( tf.matmul(flat_state_m2_h1, gamma_w0) + gamma_b0) gamma_n = tf.nn.sigmoid( tf.matmul(gamma_n, gamma_w1) + gamma_b1) # value value_n = tf.nn.relu( tf.matmul(flat_state_m2_ns, value_w0) + value_b0) value_n = tf.nn.relu(tf.matmul(value_n, value_w1) + value_b1) # labmda lambda_n = tf.nn.relu( tf.matmul(flat_state_m2_h1, lambda_w0) + lambda_b0) lambda_n = tf.nn.sigmoid( tf.matmul(lambda_n, lambda_w1) + lambda_b1) gamma *= gamma_n # select action = argmaxQ(s,a) q_n = reward_n + gamma_n * value_n # Act = tf.argmax(q_n, axis=1) Act = tf.cast(tf.argmax(q_n, axis=1), tf.int32) idx = tf.stack([tf.range(0, tf.shape(Act)[0]), Act], axis=1) # select next state # state_nt = tf.transpose(state_m2_ns, [0,3,1,2]) # state_nt = tf.gather_nd(tf.transpose(state_m2_ns, [0,3,1,2]), idx) state_nt = tf.expand_dims( tf.gather_nd(tf.transpose(state_m2_ns, [0, 3, 1, 2]), idx), 3) state_n += state_nt state_n /= 2 # state_n = state_nt discount_reward_n = gamma * reward_n discount_reward_n = tf.gather_nd(discount_reward_n, idx) discount_reward_n = tf.stack([ discount_reward_n, discount_reward_n, discount_reward_n, discount_reward_n ], axis=1) discount_reward_n = discount_reward_n * tf.one_hot( i, depth=self.a_dim) discount_value_n = gamma * value_n discount_value_n = tf.gather_nd(discount_value_n, idx) discount_value_n = tf.stack([ discount_value_n, discount_value_n, discount_value_n, discount_value_n ], axis=1) discount_value_n = discount_value_n * tf.one_hot( i, depth=self.a_dim) q_a += (1 - lambda_n ) * discount_reward_n + lambda_n * discount_value_n return state, q_a
def xconv(pts, fts, qrs, tag, N, K, D, P, C, C_pts_fts, labels, cond_scale, cond_shift, cond_alpha, is_training, with_X_transformation, depth_multiplier, sorting_method=None, with_global=False): _, indices_dilated = pf.knn_indices_general(qrs, pts, K * D, True) indices = indices_dilated[:, :, ::D, :] if sorting_method is not None: indices = pf.sort_points(pts, indices, sorting_method) nn_pts = tf.gather_nd(pts, indices, name=tag + 'nn_pts') # (N, P, K, 3) nn_pts_center = tf.expand_dims(qrs, axis=2, name=tag + 'nn_pts_center') # (N, P, 1, 3) nn_pts_local = tf.subtract(nn_pts, nn_pts_center, name=tag + 'nn_pts_local') # (N, P, K, 3) # Prepare features to be transformed nn_fts_from_pts_0 = pf.dense(nn_pts_local, C_pts_fts, tag + 'nn_fts_from_pts_0', is_training) nn_fts_from_pts = pf.dense(nn_fts_from_pts_0, C_pts_fts, tag + 'nn_fts_from_pts', is_training) if fts is None: nn_fts_input = nn_fts_from_pts else: nn_fts_from_prev = tf.gather_nd(fts, indices, name=tag + 'nn_fts_from_prev') nn_fts_input = tf.concat([nn_fts_from_pts, nn_fts_from_prev], axis=-1, name=tag + 'nn_fts_input') if with_X_transformation: ######################## X-transformation ######################### X_0 = pf.conv2d(nn_pts_local, K * K, tag + 'X_0', labels, cond_scale, cond_shift, cond_alpha, is_training, (1, K)) X_0_KK = tf.reshape(X_0, (N, P, K, K), name=tag + 'X_0_KK') X_1 = pf.depthwise_conv2d(X_0_KK, K, tag + 'X_1', labels, cond_scale, cond_shift, cond_alpha, is_training, (1, K)) X_1_KK = tf.reshape(X_1, (N, P, K, K), name=tag + 'X_1_KK') X_2 = pf.depthwise_conv2d(X_1_KK, K, tag + 'X_2', labels, cond_scale, cond_shift, cond_alpha, is_training, (1, K), activation=None) X_2_KK = tf.reshape(X_2, (N, P, K, K), name=tag + 'X_2_KK') fts_X = tf.matmul(X_2_KK, nn_fts_input, name=tag + 'fts_X') ################################################################### else: fts_X = nn_fts_input fts_conv = pf.separable_conv2d(fts_X, C, tag + 'fts_conv', labels, cond_scale, cond_shift, cond_alpha, is_training, (1, K), depth_multiplier=depth_multiplier) fts_conv_3d = tf.squeeze(fts_conv, axis=2, name=tag + 'fts_conv_3d') if with_global: fts_global_0 = pf.dense(qrs, C // 4, tag + 'fts_global_0', is_training) fts_global = pf.dense(fts_global_0, C // 4, tag + 'fts_global', is_training) return tf.concat([fts_global, fts_conv_3d], axis=-1, name=tag + 'fts_conv_3d_with_global') else: return fts_conv_3d
def last_frame_layer(self, bottom, name): number = tf.range(0, tf.shape(self.seq_length)[0]) indexs = tf.stack([self.seq_length - 1, number], axis=1) return tf.gather_nd(bottom, indexs, name)
def __call__(self, roi_features, class_indices, is_training=False): """Mask branch for the Mask-RCNN model. Args: roi_features: A ROI feature tensor of shape [batch_size, num_rois, height_l, width_l, num_filters]. class_indices: a Tensor of shape [batch_size, num_rois], indicating which class the ROI is. is_training: `boolean`, if True if model is in training mode. Returns: mask_outputs: a tensor with a shape of [batch_size, num_masks, mask_height, mask_width, num_classes], representing the mask predictions. fg_gather_indices: a tensor with a shape of [batch_size, num_masks, 2], representing the fg mask targets. Raises: ValueError: If boxes is not a rank-3 tensor or the last dimension of boxes is not 4. """ def _get_stddev_equivalent_to_msra_fill(kernel_size, fan_out): """Returns the stddev of random normal initialization as MSRAFill.""" # Reference: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/filler_op.h#L445-L463 # pylint: disable=line-too-long # For example, kernel size is (3, 3) and fan out is 256, stddev is 0.029. # stddev = (2/(3*3*256))^0.5 = 0.029 return (2 / (kernel_size[0] * kernel_size[1] * fan_out))**0.5 with tf.variable_scope('mask_head'): _, num_rois, height, width, filters = roi_features.get_shape( ).as_list() net = tf.reshape(roi_features, [-1, height, width, filters]) for i in range(4): kernel_size = (3, 3) fan_out = 256 init_stddev = _get_stddev_equivalent_to_msra_fill( kernel_size, fan_out) net = tf.layers.conv2d( net, fan_out, kernel_size=kernel_size, strides=(1, 1), padding='same', dilation_rate=(1, 1), activation=None, kernel_initializer=tf.random_normal_initializer( stddev=init_stddev), bias_initializer=tf.zeros_initializer(), name='mask-conv-l%d' % i) net = self._batch_norm_relu(net, is_training=is_training) kernel_size = (2, 2) fan_out = 256 init_stddev = _get_stddev_equivalent_to_msra_fill( kernel_size, fan_out) net = tf.layers.conv2d_transpose( net, fan_out, kernel_size=kernel_size, strides=(2, 2), padding='valid', activation=None, kernel_initializer=tf.random_normal_initializer( stddev=init_stddev), bias_initializer=tf.zeros_initializer(), name='conv5-mask') net = self._batch_norm_relu(net, is_training=is_training) kernel_size = (1, 1) fan_out = self._num_classes init_stddev = _get_stddev_equivalent_to_msra_fill( kernel_size, fan_out) mask_outputs = tf.layers.conv2d( net, fan_out, kernel_size=kernel_size, strides=(1, 1), padding='valid', kernel_initializer=tf.random_normal_initializer( stddev=init_stddev), bias_initializer=tf.zeros_initializer(), name='mask_fcn_logits') mask_outputs = tf.reshape(mask_outputs, [ -1, num_rois, self._mrcnn_resolution, self._mrcnn_resolution, self._num_classes ]) with tf.name_scope('masks_post_processing'): # TODO(pengchong): Figure out the way not to use the static inferred # batch size. batch_size, num_masks = class_indices.get_shape().as_list() mask_outputs = tf.transpose(mask_outputs, [0, 1, 4, 2, 3]) # Contructs indices for gather. batch_indices = tf.tile( tf.expand_dims(tf.range(batch_size), axis=1), [1, num_masks]) mask_indices = tf.tile( tf.expand_dims(tf.range(num_masks), axis=0), [batch_size, 1]) gather_indices = tf.stack( [batch_indices, mask_indices, class_indices], axis=2) mask_outputs = tf.gather_nd(mask_outputs, gather_indices) return mask_outputs
def _add_seq2seq(self): """Add the whole sequence-to-sequence model to the graph.""" hps = self._hps vsize = self._vocab.size() # size of the vocabulary with tf.variable_scope('seq2seq'): # Some initializers self.rand_unif_init = tf.random_uniform_initializer( -hps.rand_unif_init_mag.value, hps.rand_unif_init_mag.value, seed=123) self.trunc_norm_init = tf.truncated_normal_initializer( stddev=hps.trunc_norm_init_std.value) # Add embedding matrix (shared by the encoder and decoder inputs) with tf.variable_scope('embedding'): embedding = tf.get_variable('embedding', [vsize, hps.emb_dim.value], dtype=tf.float32, initializer=self.trunc_norm_init) if hps.mode.value == "train": self._add_emb_vis(embedding) # add to tensorboard emb_enc_inputs = tf.nn.embedding_lookup( embedding, self._enc_batch ) # tensor with shape (batch_size, max_enc_steps, emb_size) emb_dec_inputs = [ tf.nn.embedding_lookup(embedding, x) for x in tf.unstack(self._dec_batch, axis=1) ] # list length max_dec_steps containing shape (batch_size, emb_size) # Add the encoder. enc_outputs, fw_st, bw_st = self._add_encoder( emb_enc_inputs, self._enc_lens) self._enc_states = enc_outputs # Our encoder is bidirectional and our decoder is unidirectional so we need to reduce the final encoder hidden state to the right size to be the initial decoder hidden state self._dec_in_state = self._reduce_states(fw_st, bw_st) # Add the decoder. with tf.variable_scope('decoder'): decoder_outputs, self._dec_out_state, self.attn_dists, self.p_gens, self.coverage = self._add_decoder( emb_dec_inputs) # Add the output projection to obtain the vocabulary distribution with tf.variable_scope('output_projection'): w = tf.get_variable('w', [hps.hidden_dim.value, vsize], dtype=tf.float32, initializer=self.trunc_norm_init) w_t = tf.transpose(w) v = tf.get_variable('v', [vsize], dtype=tf.float32, initializer=self.trunc_norm_init) vocab_scores = [ ] # vocab_scores is the vocabulary distribution before applying softmax. Each entry on the list corresponds to one decoder step for i, output in enumerate(decoder_outputs): if i > 0: tf.get_variable_scope().reuse_variables() vocab_scores.append(tf.nn.xw_plus_b( output, w, v)) # apply the linear layer vocab_dists = [ tf.nn.softmax(s) for s in vocab_scores ] # The vocabulary distributions. List length max_dec_steps of (batch_size, vsize) arrays. The words are in the order they appear in the vocabulary file. # For pointer-generator model, calc final distribution from copy distribution and vocabulary distribution if FLAGS.pointer_gen: final_dists = self._calc_final_dist(vocab_dists, self.attn_dists) else: # final distribution is just vocabulary distribution final_dists = vocab_dists if hps.mode.value in ['train', 'eval']: # Calculate the loss with tf.variable_scope('loss'): if FLAGS.pointer_gen: # Calculate the loss per step # This is fiddly; we use tf.gather_nd to pick out the probabilities of the gold target words loss_per_step = [ ] # will be list length max_dec_steps containing shape (batch_size) batch_nums = tf.range( 0, limit=hps.batch_size.value) # shape (batch_size) for dec_step, dist in enumerate(final_dists): targets = self._target_batch[:, dec_step] # The indices of the target words. shape (batch_size) indices = tf.stack((batch_nums, targets), axis=1) # shape (batch_size, 2) gold_probs = tf.gather_nd( dist, indices ) # shape (batch_size). prob of correct words on this step losses = -tf.log(gold_probs) loss_per_step.append(losses) # Apply dec_padding_mask and get loss self._loss = _mask_and_avg(loss_per_step, self._dec_padding_mask) else: # baseline model self._loss = tf.contrib.seq2seq.sequence_loss( tf.stack(vocab_scores, axis=1), self._target_batch, self._dec_padding_mask ) # this applies softmax internally tf.summary.scalar('loss', self._loss) # Calculate coverage loss from the attention distributions if hps.coverage.value: with tf.variable_scope('coverage_loss'): self._coverage_loss = _coverage_loss( self.attn_dists, self._dec_padding_mask) tf.summary.scalar('coverage_loss', self._coverage_loss) self._total_loss = self._loss + hps.cov_loss_wt.value * self._coverage_loss tf.summary.scalar('total_loss', self._total_loss) if hps.mode.value == "decode": # We run decode beam search mode one decoder step at a time assert len( final_dists ) == 1 # final_dists is a singleton list containing shape (batch_size, extended_vsize) final_dists = final_dists[0] topk_probs, self._topk_ids = tf.nn.top_k( final_dists, hps.batch_size.value * 2 ) # take the k largest probs. note batch_size=beam_size in decode mode self._topk_log_probs = tf.log(topk_probs)
def build_dqn(self): self.w = {} self.t_w = {} #initializer = tf.contrib.layers.xavier_initializer() initializer = tf.truncated_normal_initializer(0, 0.02) activation_fn = tf.nn.relu # training network with tf.variable_scope('prediction'): if self.cnn_format == 'NHWC': self.s_t = tf.placeholder('float32', [ None, self.screen_height, self.screen_width, self.history_length ], name='s_t') else: self.s_t = tf.placeholder('float32', [ None, self.history_length, self.screen_height, self.screen_width ], name='s_t') self.l1, self.w['l1_w'], self.w['l1_b'] = conv2d(self.s_t, 32, [8, 8], [4, 4], initializer, activation_fn, self.cnn_format, name='l1') self.l2, self.w['l2_w'], self.w['l2_b'] = conv2d(self.l1, 64, [4, 4], [2, 2], initializer, activation_fn, self.cnn_format, name='l2') self.l3, self.w['l3_w'], self.w['l3_b'] = conv2d(self.l2, 64, [3, 3], [1, 1], initializer, activation_fn, self.cnn_format, name='l3') shape = self.l3.get_shape().as_list() self.l3_flat = tf.reshape( self.l3, [-1, reduce(lambda x, y: x * y, shape[1:])]) if self.dueling: self.value_hid, self.w['l4_val_w'], self.w['l4_val_b'] = \ linear(self.l3_flat, 512, activation_fn=activation_fn, name='value_hid') self.adv_hid, self.w['l4_adv_w'], self.w['l4_adv_b'] = \ linear(self.l3_flat, 512, activation_fn=activation_fn, name='adv_hid') self.value, self.w['val_w_out'], self.w['val_w_b'] = \ linear(self.value_hid, 1, name='value_out') self.advantage, self.w['adv_w_out'], self.w['adv_w_b'] = \ linear(self.adv_hid, self.env.action_size, name='adv_out') # Average Dueling self.q = self.value + (self.advantage - tf.reduce_mean( self.advantage, reduction_indices=1, keep_dims=True)) else: self.l4, self.w['l4_w'], self.w['l4_b'] = linear( self.l3_flat, 512, activation_fn=activation_fn, name='l4') self.q, self.w['q_w'], self.w['q_b'] = linear( self.l4, self.env.action_size, name='q') self.q_action = tf.argmax(self.q, dimension=1) q_summary = [] avg_q = tf.reduce_mean(self.q, 0) for idx in range(self.env.action_size): q_summary.append(tf.summary.histogram('q/%s' % idx, avg_q[idx])) self.q_summary = tf.summary.merge(q_summary, 'q_summary') # target network with tf.variable_scope('target'): if self.cnn_format == 'NHWC': self.target_s_t = tf.placeholder('float32', [ None, self.screen_height, self.screen_width, self.history_length ], name='target_s_t') else: self.target_s_t = tf.placeholder('float32', [ None, self.history_length, self.screen_height, self.screen_width ], name='target_s_t') self.target_l1, self.t_w['l1_w'], self.t_w['l1_b'] = conv2d( self.target_s_t, 32, [8, 8], [4, 4], initializer, activation_fn, self.cnn_format, name='target_l1') self.target_l2, self.t_w['l2_w'], self.t_w['l2_b'] = conv2d( self.target_l1, 64, [4, 4], [2, 2], initializer, activation_fn, self.cnn_format, name='target_l2') self.target_l3, self.t_w['l3_w'], self.t_w['l3_b'] = conv2d( self.target_l2, 64, [3, 3], [1, 1], initializer, activation_fn, self.cnn_format, name='target_l3') shape = self.target_l3.get_shape().as_list() self.target_l3_flat = tf.reshape( self.target_l3, [-1, reduce(lambda x, y: x * y, shape[1:])]) if self.dueling: self.t_value_hid, self.t_w['l4_val_w'], self.t_w['l4_val_b'] = \ linear(self.target_l3_flat, 512, activation_fn=activation_fn, name='target_value_hid') self.t_adv_hid, self.t_w['l4_adv_w'], self.t_w['l4_adv_b'] = \ linear(self.target_l3_flat, 512, activation_fn=activation_fn, name='target_adv_hid') self.t_value, self.t_w['val_w_out'], self.t_w['val_w_b'] = \ linear(self.t_value_hid, 1, name='target_value_out') self.t_advantage, self.t_w['adv_w_out'], self.t_w['adv_w_b'] = \ linear(self.t_adv_hid, self.env.action_size, name='target_adv_out') # Average Dueling self.target_q = self.t_value + ( self.t_advantage - tf.reduce_mean( self.t_advantage, reduction_indices=1, keep_dims=True)) else: self.target_l4, self.t_w['l4_w'], self.t_w['l4_b'] = \ linear(self.target_l3_flat, 512, activation_fn=activation_fn, name='target_l4') self.target_q, self.t_w['q_w'], self.t_w['q_b'] = \ linear(self.target_l4, self.env.action_size, name='target_q') self.target_q_idx = tf.placeholder('int32', [None, None], 'outputs_idx') self.target_q_with_idx = tf.gather_nd(self.target_q, self.target_q_idx) with tf.variable_scope('pred_to_target'): self.t_w_input = {} self.t_w_assign_op = {} for name in self.w.keys(): self.t_w_input[name] = tf.placeholder( 'float32', self.t_w[name].get_shape().as_list(), name=name) self.t_w_assign_op[name] = self.t_w[name].assign( self.t_w_input[name]) # optimizer with tf.variable_scope('optimizer'): self.target_q_t = tf.placeholder('float32', [None], name='target_q_t') self.action = tf.placeholder('int64', [None], name='action') action_one_hot = tf.one_hot(self.action, self.env.action_size, 1.0, 0.0, name='action_one_hot') q_acted = tf.reduce_sum(self.q * action_one_hot, reduction_indices=1, name='q_acted') self.delta = self.target_q_t - q_acted self.global_step = tf.Variable(0, trainable=False) self.loss = tf.reduce_mean(clipped_error(self.delta), name='loss') self.learning_rate_step = tf.placeholder('int64', None, name='learning_rate_step') self.learning_rate_op = tf.maximum( self.learning_rate_minimum, tf.train.exponential_decay(self.learning_rate, self.learning_rate_step, self.learning_rate_decay_step, self.learning_rate_decay, staircase=True)) self.optim = tf.train.RMSPropOptimizer(self.learning_rate_op, momentum=0.95, epsilon=0.01).minimize( self.loss) with tf.variable_scope('summary'): scalar_summary_tags = ['average.reward', 'average.loss', 'average.q', \ 'episode.max reward', 'episode.min reward', 'episode.avg reward', 'episode.num of game', 'training.learning_rate'] self.summary_placeholders = {} self.summary_ops = {} for tag in scalar_summary_tags: self.summary_placeholders[tag] = tf.placeholder( 'float32', None, name=tag.replace(' ', '_')) self.summary_ops[tag] = tf.summary.scalar( "%s-%s/%s" % (self.env_name, self.env_type, tag), self.summary_placeholders[tag]) histogram_summary_tags = ['episode.rewards', 'episode.actions'] for tag in histogram_summary_tags: self.summary_placeholders[tag] = tf.placeholder( 'float32', None, name=tag.replace(' ', '_')) self.summary_ops[tag] = tf.summary.histogram( tag, self.summary_placeholders[tag]) self.writer = tf.summary.FileWriter('./logs/%s' % self.model_dir, self.sess.graph) tf.initialize_all_variables().run() self._saver = tf.train.Saver(list(self.w.values()) + [self.step_op], max_to_keep=30) self.load_model() self.update_target_q_network()
def error(self): number = tf.range(0, tf.shape(self.seq_length)[0]) indexs = tf.stack([number, self.target], axis=1) self.cross_entropy = -tf.reduce_sum( tf.log(tf.gather_nd(self.prediction, indexs))) return self.cross_entropy
def dmnrun(fulldata, queask): # Loading saved meta graph sess = tf.Session() saver = tf.train.import_meta_graph("C:/Users/Mark/PycharmProjects/DMNTrain/weights/model.meta") saver.restore(sess, tf.train.latest_checkpoint('C:/Users/Mark/PycharmProjects/DMNTrain/weights')) tf.reset_default_graph() def wideArray(x, weight): wide = np.zeros([len(x), weight]) for i in range(0, len(x)): for j in range(0, len(x[i])): wide[i][j] = x[i][j] return wide def octalConv(x): ans = [] rows = [] words = [] for line in x.split(' '): for word in line: number = ord(word) convNum = oct(number) convNum = int(convNum[2:]) rows.append(ans) ans = [] words.append(line) ans = wideArray(rows, 50) return ans, words def contextualize(data, quest): """ Read in the input and question and build a context sets. Output is a list of data points, each of which is a 7-element tuple containing: The sentences in the context in vectorized form. The sentences in the context as a list of string tokens. The question in vectorized form. The question as a list of string tokens. The answer in vectorized form. The answer as a list of string tokens. A list of numbers for supporting statements, which is currently unused. """ output = [] context = [] for entry in data: # Turn input into a word vector # TODO: Change to Octal Decimal encoding context.append(octalConv(entry[:-1])) # Wrap up object so DMN can use it comp_context = tuple(zip(*context)) output.append(comp_context + octalConv(quest) + octalConv('Nothing') + (0,)) return output test_data = contextualize(fulldata, queask) final_train_data = [] def finalize(data): """ Prepares data generated by contextualize() for use in the network. """ final_data = [] for cqas in data: contextvs, contextws, qvs, qws, avs, aws, spt = cqas lspt = [spt] lengths = itertools.accumulate(len(cvec) for cvec in contextvs) context_vec = np.concatenate(contextvs) context_words = sum(contextws, []) # Location markers for the beginnings of new sentences. sentence_ends = np.array(list(lengths)) final_data.append((context_vec, sentence_ends, qvs, lspt, context_words, cqas, avs, aws)) return np.array(final_data) final_test_data = finalize(test_data) tf.reset_default_graph() # Hyperparameters # The number of dimensions used to store data passed between recurrent layers in the network. recurrent_cell_size = 128 # The number of dimensions in our word vectorizations. D = 50 # How quickly the network learns. Too high, and we may run into numeric instability # or other issues. learning_rate = 0.005 # Dropout probabilities. For a description of dropout and what these probabilities are, # see Entailment with TensorFlow. input_p, output_p = 0.5, 0.5 # How many questions we train on at a time. batch_size = 128 # Number of passes in episodic memory. We'll get to this later. passes = 4 # Feed Forward layer sizes: the number of dimensions used to store data passed from feed-forward layers. ff_hidden_size = 256 weight_decay = 0.00000001 # The strength of our regularization. Increase to encourage sparsity in episodic memory, # but makes training slower. Don't make this larger than leraning_rate. training_iterations_count = 400000 # How many questions the network trains on each time it is trained. # Some questions are counted multiple times. display_step = 1 # How many iterations of training occur before each validation check. # Input Module # Context: A [batch_size, maximum_context_length, word_vectorization_dimensions] tensor # that contains all the context information. context = tf.placeholder(tf.float32, [None, None, D], "context") context_placeholder = context # I use context as a variable name later on # input_sentence_endings: A [batch_size, maximum_sentence_count, 2] tensor that # contains the locations of the ends of sentences. input_sentence_endings = tf.placeholder(tf.int32, [None, None, 2], "sentence") # recurrent_cell_size: the number of hidden units in recurrent layers. input_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size) # input_p: The probability of maintaining a specific hidden input unit. # Likewise, output_p is the probability of maintaining a specific hidden output unit. gru_drop = tf.contrib.rnn.DropoutWrapper(input_gru, input_p, output_p) # dynamic_rnn also returns the final internal state. We don't need that, and can # ignore the corresponding output (_). input_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, context, dtype=tf.float32, scope="input_module") # cs: the facts gathered from the context. cs = tf.gather_nd(input_module_outputs, input_sentence_endings) # to use every word as a fact, useful for tasks with one-sentence contexts s = input_module_outputs # Question Module # query: A [batch_size, maximum_question_length, word_vectorization_dimensions] tensor # that contains all of the questions. query = tf.placeholder(tf.float32, [None, None, D], "query") # input_query_lengths: A [batch_size, 2] tensor that contains question length information. # input_query_lengths[:,1] has the actual lengths; input_query_lengths[:,0] is a simple range() # so that it plays nice with gather_nd. input_query_lengths = tf.placeholder(tf.int32, [None, 2], "query_lengths") question_module_outputs, _ = tf.nn.dynamic_rnn(gru_drop, query, dtype=tf.float32, scope=tf.VariableScope(True, "input_module")) # q: the question states. A [batch_size, recurrent_cell_size] tensor. q = tf.gather_nd(question_module_outputs, input_query_lengths) # Episodic Memory # make sure the current memory (i.e. the question vector) is broadcasted along the facts dimension size = tf.stack([tf.constant(1), tf.shape(cs)[1], tf.constant(1)]) re_q = tf.tile(tf.reshape(q, [-1, 1, recurrent_cell_size]), size) # Final output for attention, needs to be 1 in order to create a mask output_size = 1 # Weights and biases attend_init = tf.random_normal_initializer(stddev=0.1) w_1 = tf.get_variable("attend_w1", [1, recurrent_cell_size * 7, recurrent_cell_size], tf.float32, initializer=attend_init) w_2 = tf.get_variable("attend_w2", [1, recurrent_cell_size, output_size], tf.float32, initializer=attend_init) b_1 = tf.get_variable("attend_b1", [1, recurrent_cell_size], tf.float32, initializer=attend_init) b_2 = tf.get_variable("attend_b2", [1, output_size], tf.float32, initializer=attend_init) # Regulate all the weights and biases tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_2)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(b_2)) def attention(c, mem, existing_facts): """ Custom attention mechanism. c: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains all the facts from the contexts. mem: A [batch_size, maximum_sentence_count, recurrent_cell_size] tensor that contains the current memory. It should be the same memory for all facts for accurate results. existing_facts: A [batch_size, maximum_sentence_count, 1] tensor that acts as a binary mask for which facts exist and which do not. """ with tf.variable_scope("attending") as scope: # attending: The metrics by which we decide what to attend to. attending = tf.concat([c, mem, re_q, c * re_q, c * mem, (c - re_q) ** 2, (c - mem) ** 2], 2) # m1: First layer of multiplied weights for the feed-forward network. # We tile the weights in order to manually broadcast, since tf.matmul does not # automatically broadcast batch matrix multiplication as of TensorFlow 1.2. m1 = tf.matmul(attending * existing_facts, tf.tile(w_1, tf.stack([tf.shape(attending)[0], 1, 1]))) * existing_facts # bias_1: A masked version of the first feed-forward layer's bias # over only existing facts. bias_1 = b_1 * existing_facts # tnhan: First nonlinearity. In the original paper, this is a tanh nonlinearity; # choosing relu was a design choice intended to avoid issues with # low gradient magnitude when the tanh returned values close to 1 or -1. tnhan = tf.nn.relu(m1 + bias_1) # m2: Second layer of multiplied weights for the feed-forward network. # Still tiling weights for the same reason described in m1's comments. m2 = tf.matmul(tnhan, tf.tile(w_2, tf.stack([tf.shape(attending)[0], 1, 1]))) # bias_2: A masked version of the second feed-forward layer's bias. bias_2 = b_2 * existing_facts # norm_m2: A normalized version of the second layer of weights, which is used # to help make sure the softmax nonlinearity doesn't saturate. norm_m2 = tf.nn.l2_normalize(m2 + bias_2, -1) # softmaxable: A hack in order to use sparse_softmax on an otherwise dense tensor. # We make norm_m2 a sparse tensor, then make it dense again after the operation. softmax_idx = tf.where(tf.not_equal(norm_m2, 0))[:, :-1] softmax_gather = tf.gather_nd(norm_m2[..., 0], softmax_idx) softmax_shape = tf.shape(norm_m2, out_type=tf.int64)[:-1] softmaxable = tf.SparseTensor(softmax_idx, softmax_gather, softmax_shape) return tf.expand_dims(tf.sparse_tensor_to_dense(tf.sparse_softmax(softmaxable)), -1) # facts_0s: a [batch_size, max_facts_length, 1] tensor # whose values are 1 if the corresponding fact exists and 0 if not. facts_0s = tf.cast(tf.count_nonzero(input_sentence_endings[:, :, -1:], -1, keepdims=True), tf.float32) with tf.variable_scope("Episodes") as scope: attention_gru = tf.contrib.rnn.GRUCell(recurrent_cell_size) # memory: A list of all tensors that are the (current or past) memory state # of the attention mechanism. memory = [q] # attends: A list of all tensors that represent what the network attends to. attends = [] for a in range(passes): # attention mask attend_to = attention(cs, tf.tile(tf.reshape(memory[-1], [-1, 1, recurrent_cell_size]), size), facts_0s) # Inverse attention mask, for what's retained in the state. retain = 1 - attend_to # GRU pass over the facts, according to the attention mask. while_valid_index = (lambda state, index: index < tf.shape(cs)[1]) update_state = (lambda state, index: (attend_to[:, index, :] * attention_gru(cs[:, index, :], state)[0] + retain[:, index, :] * state)) # start loop with most recent memory and at the first index memory.append(tuple(tf.while_loop(while_valid_index, (lambda state, index: (update_state(state, index), index + 1)), loop_vars=[memory[-1], 0]))[0]) attends.append(attend_to) # Reuse variables so the GRU pass uses the same variables every pass. scope.reuse_variables() # Answer Module # a0: Final memory state. (Input to answer module) a0 = tf.concat([memory[-1], q], -1) # fc_init: Initializer for the final fully connected layer's weights. fc_init = tf.random_normal_initializer(stddev=0.1) with tf.variable_scope("answer"): # w_answer: The final fully connected layer's weights. w_answer = tf.get_variable("weight", [recurrent_cell_size * 2, D], tf.float32, initializer=fc_init) # Regulate the fully connected layer's weights tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, tf.nn.l2_loss(w_answer)) # The regressed word. This isn't an actual word yet; # we still have to find the closest match. logit = tf.expand_dims(tf.matmul(a0, w_answer), 1) # Make a mask over which words exist. with tf.variable_scope("ending"): all_ends = tf.reshape(input_sentence_endings, [-1, 2]) range_ends = tf.range(tf.shape(all_ends)[0]) ends_indices = tf.stack([all_ends[:, 0], range_ends], axis=1) ind = tf.reduce_max(tf.scatter_nd(ends_indices, all_ends[:, 1], [tf.shape(q)[0], tf.shape(all_ends)[0]]), axis=-1) range_ind = tf.range(tf.shape(ind)[0]) mask_ends = tf.cast(tf.scatter_nd(tf.stack([ind, range_ind], axis=1), tf.ones_like(range_ind), [tf.reduce_max(ind) + 1, tf.shape(ind)[0]]), bool) # A bit of a trick. With the locations of the ends of the mask (the last periods in # each of the contexts) as 1 and the rest as 0, we can scan with exclusive or # (starting from all 1). For each context in the batch, this will result in 1s # up until the marker (the location of that last period) and 0s afterwards. mask = tf.scan(tf.logical_xor, mask_ends, tf.ones_like(range_ind, dtype=bool)) # We score each possible word inversely with their Euclidean distance to the regressed word. # The highest score (lowest distance) will correspond to the selected word. logits = -tf.reduce_sum(tf.square(context * tf.transpose(tf.expand_dims( tf.cast(mask, tf.float32), -1), [1, 0, 2]) - logit), axis=-1, name='logits') # Training # gold_standard: The real answers. gold_standard = tf.placeholder(tf.float32, [None, 1, D], "answer") with tf.variable_scope('accuracy'): eq = tf.equal(context, gold_standard) corrbool = tf.reduce_all(eq, -1, name='corrbool') logloc = tf.reduce_max(logits, -1, keepdims=True) # locs: A boolean tensor that indicates where the score # matches the minimum score. This happens on multiple dimensions, # so in the off chance there's one or two indexes that match # we make sure it matches in all indexes. locs = tf.equal(logits, logloc) # correctsbool: A boolean tensor that indicates for which # words in the context the score always matches the minimum score. correctsbool = tf.reduce_any(tf.logical_and(locs, corrbool), -1) # corrects: A tensor that is simply correctsbool cast to floats. corrects = tf.where(correctsbool, tf.ones_like(correctsbool, dtype=tf.float32), tf.zeros_like(correctsbool, dtype=tf.float32)) # corr: corrects, but for the right answer instead of our selected answer. corr = tf.where(corrbool, tf.ones_like(corrbool, dtype=tf.float32), tf.zeros_like(corrbool, dtype=tf.float32)) with tf.variable_scope("loss"): # Use sigmoid cross entropy as the base loss, # with our distances as the relative probabilities. There are # multiple correct labels, for each location of the answer word within the context. loss = tf.nn.sigmoid_cross_entropy_with_logits(logits=tf.nn.l2_normalize(logits, -1), labels=corr) # Add regularization losses, weighted by weight_decay. total_loss = tf.reduce_mean(loss) + weight_decay * tf.add_n( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) # TensorFlow's default implementation of the Adam optimizer works. We can adjust more than # just the learning rate, but it's not necessary to find a very good optimum. optimizer = tf.train.AdamOptimizer(learning_rate) # Once we have an optimizer, we ask it to minimize the loss # in order to work towards the proper training. opt_op = optimizer.minimize(total_loss) # Initialize variables init = tf.global_variables_initializer() # Launch the TensorFlow session sess = tf.Session() sess.run(init) def prep_batch(batch_data, more_data=False): """ Prepare all the preproccessing that needs to be done on a batch-by-batch basis. """ context_vec, sentence_ends, questionvs, spt, context_words, cqas, answervs, _ = zip(*batch_data) ends = list(sentence_ends) maxend = max(map(len, ends)) aends = np.zeros((len(ends), maxend)) for index, i in enumerate(ends): for indexj, x in enumerate(i): aends[index, indexj] = x - 1 new_ends = np.zeros(aends.shape + (2,)) for index, x in np.ndenumerate(aends): new_ends[index + (0,)] = index[0] new_ends[index + (1,)] = x contexts = list(context_vec) max_context_length = max([len(x) for x in contexts]) contextsize = list(np.array(contexts[0]).shape) contextsize[0] = max_context_length final_contexts = np.zeros([len(contexts)] + contextsize) contexts = [np.array(x) for x in contexts] for i, context in enumerate(contexts): final_contexts[i, 0:len(context), :] = context max_query_length = max(len(x) for x in questionvs) querysize = list(np.array(questionvs[0]).shape) querysize[:1] = [len(questionvs), max_query_length] queries = np.zeros(querysize) querylengths = np.array(list(zip(range(len(questionvs)), [len(q) - 1 for q in questionvs]))) questions = [np.array(q) for q in questionvs] for i, question in enumerate(questions): queries[i, 0:len(question), :] = question data = {context_placeholder: final_contexts, input_sentence_endings: new_ends, query: queries, input_query_lengths: querylengths, gold_standard: answervs} return (data, context_words, cqas) if more_data else data # Use TQDM if installed tqdm_installed = False # Prepare validation set batch = np.random.randint(final_test_data.shape[0], size=batch_size * 10) batch_data = final_test_data[batch] validation_set, val_context_words, val_cqas = prep_batch(batch_data, True) holder = [corrbool, locs, total_loss, logits, facts_0s, w_1] + attends + [query, cs, question_module_outputs] print('Starting session') start_time = time.time() ancr = sess.run([corrbool, locs, total_loss, logits, facts_0s, w_1] + attends + [query, cs, question_module_outputs], feed_dict=validation_set) elapsed_time = time.time() - start_time print(elapsed_time) a = ancr[0] n = ancr[1] cr = ancr[2] attenders = np.array(ancr[6:-3]) faq = np.sum(ancr[4], axis=(-1, -2)) # Number of facts in each context limit = 1 # Locations of responses within contexts indices = np.argmax(n, axis=1) # Locations of actual answers within contexts indicesc = np.argmax(a, axis=1) response = "" ans = 0 inp = '' for i, e, cw, cqa in list(zip(indices, indicesc, val_context_words, val_cqas))[:limit]: ccc = " ".join(cw) print("TEXT: ", ccc) inp = ccc print("QUESTION: ", " ".join(cqa[3])) print("RESPONSE: ", cw[i], ["Correct", "Incorrect"][i != e]) ans = i print("EXPECTED: ", cw[e]) print() # For safety, return this if nothing is found sess.close() print('--') tot_index = 0 for line in fulldata: tot_index = tot_index + len(line) if tot_index >= ans: return line return response
def _build_net(self): # ------------------ all inputs ------------------------ self.s = tf.placeholder(tf.float32, [None, self.n_features], name='s') # input State self.s_ = tf.placeholder(tf.float32, [None, self.n_features], name='s_') # input Next State self.r = tf.placeholder(tf.float32, [ None, ], name='r') # input Reward self.a = tf.placeholder(tf.int32, [ None, ], name='a') # input Action w_initializer, b_initializer = tf.random_normal_initializer( 0., 0.3), tf.constant_initializer(0.1) # ------------------ build evaluate_net ------------------ with tf.variable_scope('eval_net'): e1 = tf.layers.dense(self.s, 20, tf.nn.relu, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='e1') self.q_eval = tf.layers.dense(e1, self.n_actions, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='q') # ------------------ build target_net ------------------ with tf.variable_scope('target_net'): t1 = tf.layers.dense(self.s_, 20, tf.nn.relu, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='t1') self.q_next = tf.layers.dense(t1, self.n_actions, kernel_initializer=w_initializer, bias_initializer=b_initializer, name='t2') with tf.variable_scope('q_target'): q_target = self.r + self.gamma * tf.reduce_max( self.q_next, axis=1, name='Qmax_s_') # shape=(None, ) self.q_target = tf.stop_gradient(q_target) with tf.variable_scope('q_eval'): a_indices = tf.stack( [tf.range(tf.shape(self.a)[0], dtype=tf.int32), self.a], axis=1) self.q_eval_wrt_a = tf.gather_nd( params=self.q_eval, indices=a_indices) # shape=(None, ) with tf.variable_scope('loss'): self.loss = tf.reduce_mean( tf.squared_difference(self.q_target, self.q_eval_wrt_a, name='TD_error')) with tf.variable_scope('train'): self._train_op = tf.train.RMSPropOptimizer(self.lr).minimize( self.loss)
def _tower_func(self, gpu_id): """Calculate the total loss on a single tower running the LSTM model. """ data = self.data_splits[gpu_id] data_num = self.data_num_splits[gpu_id] data_len = self.data_len_splits[gpu_id] data_ab = self.data_ab_splits[gpu_id] y_weight = self.y_weight_splits[gpu_id] y_weight = tf.Print(y_weight, [tf.shape(y_weight)], message="y_weight shape init: ") self.batch_size = tf.shape(data)[0] self.sentence_len = tf.shape(data)[2] self.sentence_num = tf.shape(data)[1] - 1 self.y_weight_len = tf.shape(y_weight)[1] x = tf.slice(data, [0, 0, 0], [self.batch_size, self.sentence_num, self.sentence_len]) x_num = data_num - 1 x_len = tf.slice(data_len, [0, 0], [self.batch_size, self.sentence_num]) x_ab = tf.slice(data_ab, [0, 0], [self.batch_size, self.sentence_num]) y_index = tf.concat([ tf.expand_dims(tf.range(self.batch_size), axis=1), tf.expand_dims(x_num, axis=-1) ], axis=1) y_data = tf.gather_nd(data, y_index) y_in = tf.slice(y_data, [0, 0], [self.batch_size, self.sentence_len - 1]) y_out = tf.slice(y_data, [0, 1], [self.batch_size, self.sentence_len - 1]) y_weight = tf.slice(y_weight, [0, 1], [self.batch_size, self.y_weight_len - 1]) y_len = tf.gather_nd(data_len - 1, y_index) # Look up embedding, emp_inp: [max_time, batch_size, num_units] with tf.variable_scope("embedding"): self.embedding = tf.get_variable( "embedding", [self.vocab_size, model_config.embed_size]) # Encoder with tf.variable_scope("encoder"): # Build sentence cell if self.mode == tf.contrib.learn.ModeKeys.TRAIN: raw_sentence_cell = build_rnn_cell( model_config.sentence_cell_layer_size, model_config.sentence_cell_layer_num) self.sentence_cell = tf.contrib.rnn.DropoutWrapper( cell=raw_sentence_cell, state_keep_prob=model_config.sentence_cell_keep_prob, variational_recurrent=True, input_size=tf.TensorShape( model_config.sentence_cell_layer_size), dtype=tf.float32) else: self.sentence_cell = build_rnn_cell( model_config.sentence_cell_layer_size, model_config.sentence_cell_layer_num) # Buil session cell if self.mode == tf.contrib.learn.ModeKeys.TRAIN: raw_session_cell = build_rnn_cell( model_config.session_cell_layer_size, model_config.session_cell_layer_num) self.session_cell = tf.contrib.rnn.DropoutWrapper( cell=raw_session_cell, state_keep_prob=model_config.session_cell_keep_prob, variational_recurrent=True, input_size=tf.TensorShape( model_config.session_cell_layer_size), dtype=tf.float32) else: self.session_cell = build_rnn_cell( model_config.session_cell_layer_size, model_config.session_cell_layer_num) # get last hidden state from session cell session_state = self._build_encoder(x, x_num, x_len, x_ab) # select state a or b to decode intermediate_state = tf.reshape( session_state, [-1, model_config.session_cell_layer_size]) with tf.variable_scope("intent_projection"): decoder_intent_state_middle = layers_core.dense( intermediate_state, model_config.decoder_cell_layer_size * model_config.intent_num, activation=tf.nn.relu, use_bias=True, name="intent_proj_middle") decoder_intent_state = layers_core.dense( decoder_intent_state_middle, model_config.decoder_cell_layer_size * model_config.intent_num, use_bias=True, name="intent_proj") decoder_intent_state = tf.reshape( decoder_intent_state, [-1, model_config.decoder_cell_layer_size]) # Decoder with tf.variable_scope("decoder"): # Build decoder cell if self.mode == tf.contrib.learn.ModeKeys.TRAIN: raw_decoder_cell = build_rnn_cell( model_config.decoder_cell_layer_size, model_config.decoder_cell_layer_num) self.decoder_cell = tf.contrib.rnn.DropoutWrapper( cell=raw_decoder_cell, state_keep_prob=model_config.decoder_cell_keep_prob, variational_recurrent=True, input_size=tf.TensorShape( model_config.decoder_cell_layer_size), dtype=tf.float32) else: self.decoder_cell = build_rnn_cell( model_config.decoder_cell_layer_size, model_config.decoder_cell_layer_num) logits = self._build_decoder(decoder_intent_state, y_in, y_len) loss = self._compute_loss(logits, y_out, y_len, y_weight, decoder_intent_state) return loss
def VIN(state_input, state_dim, action_dim, config, weights=None): numactions = action_dim; numstates = config.numstates; k = config.k width = config.width assert width % 2 == 1 ch_i = config.ch_i ch_q = config.ch_q batch_size = tf.shape(state_input)[0]; if (weights == None): #Reward Map reward_hidden1 = tf.Variable(np.random.randn(3, config.vel_hidden1) * 0.001, dtype=tf.float32) reward_bias1 = tf.Variable(np.random.randn(1, config.vel_hidden1) * 0.001, dtype=tf.float32) reward_output = tf.Variable(np.random.randn(numstates, numstates, config.vel_hidden1) * 0.001, dtype=tf.float32) reward_biasOut = tf.Variable(np.random.randn(1,numstates, numstates) * 0.001, dtype=tf.float32) w = tf.Variable(np.random.randn(width, width, 1, ch_q) * 0.001, dtype=tf.float32) # feedback weights from v layer into q layer (~transition probabilities in Bellman equation) w_fb = tf.Variable(np.random.randn(width, width, 1, ch_q) * 0.001, dtype=tf.float32) #output weights w_h1 = tf.Variable(np.random.randn(ch_q + state_dim, config.hidden1) * 0.001, dtype=tf.float32) bias1 = tf.Variable(np.random.randn(1, config.hidden1) * 0.001, dtype=tf.float32) w_o = tf.Variable(np.random.randn(config.hidden1, numactions) * 0.001, dtype=tf.float32) bias_o = tf.Variable(np.random.randn(1, numactions) * 0.001, dtype=tf.float32) else: reward_hidden1 = weights[0]; reward_bias1 = weights[1]; reward_output = weights[2]; reward_biasOut = weights[3]; w = weights[4] w_fb = weights[5] w_h1 = weights[6] bias1 = weights[7] w_o = weights[8] bias_o = weights[9] #Make Reward Maps state_input_Transpose = tf.transpose(state_input, perm=[1,0]) velocities = tf.stack([state_input_Transpose[2], state_input_Transpose[3], state_input_Transpose[4]]) velocities = tf.transpose(velocities, perm=[1,0]) r_hidden1 = tf.nn.relu(tf.matmul(velocities, reward_hidden1) + reward_bias1) r = tf.nn.tanh(tf.tensordot(r_hidden1, reward_output, axes=[[1],[2]]) + reward_biasOut) r = tf.reshape(r, [batch_size, numstates, numstates, 1]) q = conv2d(r, w) v = tf.reduce_max(q, axis=3, keep_dims=True, name="v") wwfb = tf.concat([w, w_fb], 2) #value iterations for i in range(0, k-1): rv = tf.concat([r, v], 3) q = conv2d(rv, wwfb) v = tf.reduce_max(q, axis=3, keep_dims=True, name="v") q = conv2d(tf.concat([r, v], 3), wwfb) # Calculate Position S1 = tf.cast(tf.floordiv(state_input_Transpose[0] + 1.1, 2.2/(numstates)), tf.int32) S1 = tf.minimum(tf.maximum(S1, 0),numstates - 1) S2 = tf.cast(tf.floordiv(state_input_Transpose[1] + .2, (.4)/(numstates)), tf.int32) #Select the conv-net channels at the state position ins1 = tf.range(batch_size) idx_in = tf.transpose(tf.stack([ins1, S1, S2]), [1,0]) #idx_in = tf.Print(idx_in, [idx_in], "INDEX IN:") #Output action q_out = tf.gather_nd(q, idx_in, name="q_out") inputs = tf.concat([state_input, q_out], axis=1) hiddenLayer1 = tf.nn.relu(tf.matmul(inputs, w_h1) + bias1) output = tf.nn.tanh(tf.matmul(hiddenLayer1, w_o) + bias_o); return state_input, output, [reward_hidden1, reward_bias1, reward_output, reward_biasOut, w, w_fb, w_h1, bias1, w_o, bias_o]
def refine_detections_graph(rois, probs, deltas, window, cfg): """Refine classified proposals and filter overlaps and return final detections. Inputs: rois: [N, (y1, x1, y2, x2)] in normalized coordinates probs: [N, num_classes]. Class probabilities. deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific bounding box deltas. window: (y1, x1, y2, x2) in image coordinates. The part of the image that contains the image excluding the padding. Returns detections shaped: [N, (y1, x1, y2, x2, class_id, score)] where coordinates are normalized. """ # Class IDs per ROI probs = tf.squeeze(probs, axis=1) probs = tf.squeeze(probs, axis=1) rois = tf.squeeze(rois, axis=0) class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) class_scores = tf.reduce_max(probs, axis=1) # Class probability of the top class of each ROI ix = tf.range(cfg.NMS_ROIS_TRAINING) idece = tf.stack([ix, class_ids], axis=1) deltas_specific = tf.gather_nd(deltas, idece) refined_rois = apply_box_deltas_graph(rois, deltas_specific * cfg.BBOX_STD_DEV) # Clip boxes to image window refined_rois = clip_boxes_graph(refined_rois, window) # TODO: Filter out boxes with zero area # Filter out background boxes keep = tf.where(class_ids > 0)[:, 0] # Filter out low confidence boxes if cfg.DETECTION_MIN_CONFIDENCE: conf_keep = tf.where(class_scores >= cfg.DETECTION_MIN_CONFIDENCE)[:, 0] keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Apply per-class NMS # 1. Prepare variables pre_nms_class_ids = tf.gather(class_ids, keep) pre_nms_scores = tf.gather(class_scores, keep) pre_nms_rois = tf.gather(refined_rois, keep) unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] def nms_keep_map(class_id): """Apply Non-Maximum Suppression on ROIs of the given class.""" # Indices of ROIs of the given class ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] # Apply NMS class_keep = tf.image.non_max_suppression( tf.gather(pre_nms_rois, ixs), tf.gather(pre_nms_scores, ixs), max_output_size=cfg.DETECTION_MAX_INSTANCES, iou_threshold=cfg.DETECTION_NMS_THRESHOLD) # Map indicies class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) # Pad with -1 so returned tensors have the same shape gap = cfg.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] class_keep = tf.pad(class_keep, [(0, gap)], mode='CONSTANT', constant_values=-1) # Set shape so map_fn() can infer result shape class_keep.set_shape([cfg.DETECTION_MAX_INSTANCES]) return class_keep # 2. Map over class IDs nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, dtype=tf.int64) # 3. Merge results into one list, and remove -1 padding nms_keep = tf.reshape(nms_keep, [-1]) nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) # 4. Compute intersection between keep and nms_keep keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), tf.expand_dims(nms_keep, 0)) keep = tf.sparse_tensor_to_dense(keep)[0] # Keep top detections roi_count = cfg.DETECTION_MAX_INSTANCES class_scores_keep = tf.gather(class_scores, keep) num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] keep = tf.gather(keep, top_ids) # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] # Coordinates are normalized. detections = tf.concat([ tf.gather(refined_rois, keep), tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], tf.gather(class_scores, keep)[..., tf.newaxis] ], axis=1) # Pad with zeros if detections < DETECTION_MAX_INSTANCES gap = cfg.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") return detections
tf.random_shuffle() tf.expm1() tf.as_dtype() tf.as_string() # slice tf.slice() tf.sparse_slice() tf.strided_slice() tf.convert_to_tensor_or_indexed_slices() tf.resource_strided_slice_assign() tf.strided_slice_assign() tf.strided_slice_grad() tf.gather() tf.gather_nd() tf.gather_v2() tf.get_summary_op() tf.gradients() tf.boolean_mask() tf.sparse_mask() tf.sequence_mask() tf.random_gamma() tf.digamma() tf.igamma() tf.lgamma() tf.polygamma() tf.igammac() tf.tensor_shape.as_shape()
def _gather_logprob(logprob, target): lp_size = shape_list(logprob) r = tf.range(lp_size[0]) idx = tf.stack([r, target], 1) return tf.gather_nd(logprob, idx)
def _build_train_op(self): """Builds a training op. Returns: train_op: An op performing one step of training from replay data. """ batch_size = tf.shape(self._replay.rewards)[0] target_quantile_values = tf.stop_gradient( self._build_target_quantile_values_op()) # Reshape to self.num_tau_prime_samples x batch_size x 1 since this is # the manner in which the target_quantile_values are tiled. target_quantile_values = tf.reshape( target_quantile_values, [self.num_tau_prime_samples, batch_size, 1]) # Transpose dimensions so that the dimensionality is batch_size x # self.num_tau_prime_samples x 1 to prepare for computation of # Bellman errors. # Final shape of target_quantile_values: # batch_size x num_tau_prime_samples x 1. target_quantile_values = tf.transpose(target_quantile_values, [1, 0, 2]) # Shape of indices: (num_tau_samples x batch_size) x 1. # Expand dimension by one so that it can be used to index into all the # quantiles when using the tf.gather_nd function (see below). indices = tf.range(self.num_tau_samples * batch_size)[:, None] # Expand the dimension by one so that it can be used to index into all the # quantiles when using the tf.gather_nd function (see below). reshaped_actions = self._replay.actions[:, None] reshaped_actions = tf.tile(reshaped_actions, [self.num_tau_samples, 1]) # Shape of reshaped_actions: (num_tau_samples x batch_size) x 2. reshaped_actions = tf.concat([indices, reshaped_actions], axis=1) chosen_action_quantile_values = tf.gather_nd( self._replay_net_quantile_values, reshaped_actions) # Transpose dimensions so that the dimensionality is batch_size x # self.num_tau_samples x 1 to prepare for computation of # Bellman errors. # Reshape to self.num_tau_samples x batch_size x 1 since this is the manner # in which the quantile values are tiled. chosen_action_quantile_values = tf.reshape( chosen_action_quantile_values, [self.num_tau_samples, batch_size, 1]) # Final shape of chosen_action_quantile_values: # batch_size x num_tau_samples x 1. chosen_action_quantile_values = tf.transpose( chosen_action_quantile_values, [1, 0, 2]) #batchsize x quan x 1 # Shape of bellman_erors and huber_loss: # batch_size x num_tau_prime_samples x num_tau_samples x 1. bellman_errors = target_quantile_values[:, :, None, :] - chosen_action_quantile_values[:, None, :, :] # The huber loss (see Section 2.3 of the paper) is defined via two cases: # case_one: |bellman_errors| <= kappa # case_two: |bellman_errors| > kappa huber_loss_case_one = tf.to_float( tf.abs(bellman_errors) <= self.kappa) * 0.5 * bellman_errors**2 huber_loss_case_two = tf.to_float( tf.abs(bellman_errors) > self.kappa) * self.kappa * ( tf.abs(bellman_errors) - 0.5 * self.kappa) huber_loss = huber_loss_case_one + huber_loss_case_two # Reshape replay_quantiles to batch_size x num_tau_samples x 1 replay_quantiles = tf.reshape(self._replay_net_quantiles, [self.num_tau_samples, batch_size, 1]) replay_quantiles = tf.transpose(replay_quantiles, [1, 0, 2]) #batchsize x quan x 1 # Tile by num_tau_prime_samples along a new dimension. Shape is now # batch_size x num_tau_prime_samples x num_tau_samples x 1. # These quantiles will be used for computation of the quantile huber loss # below (see section 2.3 of the paper). replay_quantiles = tf.to_float( tf.tile(replay_quantiles[:, None, :, :], [1, self.num_tau_prime_samples, 1, 1])) # Shape: batch_size x num_tau_prime_samples x num_tau_samples x 1. quantile_huber_loss = (tf.abs( tf.stop_gradient(replay_quantiles) - tf.stop_gradient(tf.to_float(bellman_errors < 0))) * huber_loss) / self.kappa # Sum over current quantile value (num_tau_samples) dimension, # average over target quantile value (num_tau_prime_samples) dimension. # Shape: batch_size x num_tau_prime_samples x 1. loss = tf.reduce_sum(quantile_huber_loss, axis=2) # Shape: batch_size x 1. loss = tf.reduce_mean(loss, axis=1) # TODO(kumasaurabh): Add prioritized replay functionality here. update_priorities_op = tf.no_op() with tf.control_dependencies([update_priorities_op]): if self.summary_writer is not None: with tf.variable_scope('Losses'): tf.summary.scalar('QuantileLoss', tf.reduce_mean(loss)) return self.optimizer.minimize(tf.reduce_mean(loss)),\ tf.squeeze(chosen_action_quantile_values), \ tf.squeeze(replay_quantiles[:,0,:,:])
def __init__(self): self.sess = tf.Session() self.tfs = tf.placeholder(tf.float32, [None, S_DIM], 'state') # critic with tf.variable_scope('critic'): #variable_scope下声明共享后,tf.Variable()同名变量指向两个不同变量实体,而tf.get_variable ()同名变量则指向同一个变量实体 l1 = tf.layers.dense(self.tfs, 100, tf.nn.relu) self.v = tf.layers.dense(l1, 1) self.tfdc_r = tf.placeholder(tf.float32, [None, 1], 'discounted_r') self.advantage = self.tfdc_r - self.v self.closs = tf.reduce_mean(tf.square(self.advantage)) self.ctrain_op = tf.train.AdamOptimizer(C_LR).minimize(self.closs) # actor # pi, pi_params = self._build_anet('pi', trainable=True) # oldpi, oldpi_params = self._build_anet('oldpi', trainable=False) self.pi, pi_params = self._build_anetDiscrete('pi', trainable=True) oldpi, oldpi_params = self._build_anetDiscrete('oldpi', trainable=False) # with tf.variable_scope('sample_action'): # # tf.variable_scope()的作用是为了实现变量共享, # # 它和tf.get_variable()来完成变量共享的功能。 # self.sample_op = tf.squeeze(pi.sample(1), axis=0) # # choosing action sample_op是一个张量 # # 这边pi是一个正态分布,sample(1) # # 就是采一个点(就是选一个动作) with tf.variable_scope('update_oldpi'): self.update_oldpi_op = [ oldp.assign(p) for p, oldp in zip(pi_params, oldpi_params) ] #更新oldp参数 # zip函数用于将可迭代的对象作为参数,将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表 self.tfa = tf.placeholder(tf.int32, [ None, ], 'action') #[None,]表示行不定,无列 self.tfadv = tf.placeholder(tf.float32, [None, 1], 'advantage') a_indices = tf.stack( [tf.range(tf.shape(self.tfa)[0], dtype=tf.int32), self.tfa], axis=1) #张量拼接函数tf.stack(),range()函数用于创建数字序列变量 pi_prob = tf.gather_nd(params=self.pi, indices=a_indices) # shape=(None, ) #nd的意思是可以收集n dimension的tensor, # 按照indices的格式从params中抽取切片(合并为一个Tensor) # indices是一个K维整数Tensor oldpi_prob = tf.gather_nd(params=oldpi, indices=a_indices) # shape=(None, ) ratio = pi_prob / oldpi_prob with tf.variable_scope('loss'): with tf.variable_scope('surrogate'): # ratio = tf.exp(pi.log_prob(self.tfa) - oldpi.log_prob(self.tfa)) # ratio = pi.prob(self.tfa) / oldpi.prob(self.tfa) surr = ratio * self.tfadv if METHOD['name'] == 'kl_pen': self.tflam = tf.placeholder(tf.float32, None, 'lambda') kl = tf.distributions.kl_divergence(oldpi, self.pi) self.kl_mean = tf.reduce_mean(kl) self.aloss = -(tf.reduce_mean(surr - self.tflam * kl)) else: # clipping method, find this is better self.aloss = -tf.reduce_mean( tf.minimum( surr, tf.clip_by_value(ratio, 1. - METHOD['epsilon'], 1. + METHOD['epsilon']) * self.tfadv)) with tf.variable_scope('atrain'): self.atrain_op = tf.train.AdamOptimizer(A_LR).minimize(self.aloss) # tf.summary.FileWriter("logs/", self.sess.graph) self.sess.run(tf.global_variables_initializer())
def roi_align(boxes, feature_maps, cfg): # Assign each ROI to a level in the pyramid based on the ROI area. y1, x1, y2, x2 = tf.split(boxes, 4, axis=2) h = y2 - y1 w = x2 - x1 # Use shape of first image. Images in a batch must have the same size. image_shape = cfg.image_size # Equation 1 in the Feature Pyramid Networks paper. Account for # the fact that our coordinates are normalized here. # e.g. a 224x224 ROI (in pixels) maps to P4 image_area = tf.cast(image_shape[0] * image_shape[1], tf.float32) roi_level = log2_graph(tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area))) roi_level = tf.minimum( 2, tf.maximum(0, 1 + tf.cast(tf.round(roi_level), tf.int32))) roi_level = tf.squeeze(roi_level, 2) # Loop through levels and apply ROI pooling to each. P2 to P5. pooled = [] box_to_level = [] for i, level in enumerate(range(3)): ix = tf.where(tf.equal(roi_level, level)) level_boxes = tf.gather_nd(boxes, ix) # Box indicies for crop_and_resize. box_indices = tf.cast(ix[:, 0], tf.int32) # Keep track of which box is mapped to which level box_to_level.append(ix) # Stop gradient propogation to ROI proposals level_boxes = tf.stop_gradient(level_boxes) box_indices = tf.stop_gradient(box_indices) # Crop and Resize # From Mask R-CNN paper: "We sample four regular locations, so # that we can evaluate either max or average pooling. In fact, # interpolating only a single value at each bin center (without # pooling) is nearly as effective." # # Here we use the simplified approach of a single value per bin, # which is how it's done in tf.crop_and_resize() # Result: [batch * num_boxes, pool_height, pool_width, channels] pooled.append( tf.image.crop_and_resize(feature_maps[i], level_boxes, box_indices, [cfg.pool_shape, cfg.pool_shape], method="bilinear")) # Pack pooled features into one tensor pooled = tf.concat(pooled, axis=0) # Pack box_to_level mapping into one array and add another # column representing the order of pooled boxes box_to_level = tf.concat(box_to_level, axis=0) box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1) box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range], axis=1) # Rearrange pooled features to match the order of the original boxes # Sort box_to_level by batch then box index # TF doesn't have a way to sort by two columns, so merge them and sort. sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1] ix = tf.nn.top_k(sorting_tensor, k=tf.shape(box_to_level)[0]).indices[::-1] ix = tf.gather(box_to_level[:, 2], ix) pooled = tf.gather(pooled, ix) # Re-add the batch dimension #pooled = tf.expand_dims(pooled, 0) return pooled
# this slice, gather, and reshape picks out the zeta vectors # along the "bi == bj" diagonal. slice_ndx = tf.constant([[i, j, k] for i in range(batch_size) for j in range(batch_size) for k in range(num_choice_col) if i == j]) if (False): print("These are the indices which need to be used") for f in sess.run(slice_ndx): print(f) # --------------------------------------------- #%% # select the large zeta-tensor into a plain vector zeta_v = tf.gather_nd(zeta_0, slice_ndx) # NOTE WELL: we do not "learn" on zeta. # reshape that vector into the desired vectors zeta = tf.reshape(zeta_v, [batch_size, num_choice_col]) print("After contraction-by-selection, shape of Zeta: %s" % (zeta.shape)) # --------------------------------------------- #%% ''' # Remove layer 3 (input was L0, so this is second hidden) A3 = tf.Variable(tf.random_normal(shape=[layer_2_width, layer_3_width], mean=0.0, stddev=a_stdv)) print("32 shape: %s" % str(A3.shape)) b3 = tf.Variable(tf.random_normal(shape=[layer_3_width], mean=0.0, stddev=b_stdv)) print("b3 shape: %s" % str(b3.shape)) out3 = tf.add(tf.matmul(out2, A3), b3)
where scatter_nd meshgrid """ """ Where(tensor) tensor = True """ a = tf.random.normal([3, 3]) # [[True, True, False], [False, True, False], [True, False, True]] mask = a > 0 a1 = tf.boolean_mask(a, mask=mask) # 1D l=5 array indices = tf.where(mask) a2 = tf.gather_nd(a, indices=indices) """ where(cond, A, B) T/F matrix choose A[i] or B[i] according to T/F matrix """ """ scatter_nd indices, updates, shape background is a zeros array """ b = tf.constant([[4], [3], [1], [7]]) updates = tf.constant([9, 10, 11,
def __init__(self, points, features, is_training, setting): xconv_params = setting.xconv_params fc_params_classification = setting.fc_params_classification fc_params_segmentation = setting.fc_params_segmentation with_X_transformation = setting.with_X_transformation sorting_method = setting.sorting_method N = tf.shape(points)[0] if setting.sampling == 'fps': from sampling import tf_sampling self.layer_pts = [points] if features is None: self.layer_fts = [features] else: features = tf.reshape(features, (N, -1, setting.data_dim - 3), name='features_reshape') C_fts = xconv_params[0]['C'] // 2 features_hd = pf.dense(features, C_fts, 'features_hd', is_training) self.layer_fts = [features_hd] for layer_idx, layer_param in enumerate(xconv_params): tag = 'xconv_' + str(layer_idx + 1) + '_' K = layer_param['K'] D = layer_param['D'] P = layer_param['P'] C = layer_param['C'] links = layer_param['links'] if setting.sampling != 'random' and links: print( 'Error: flexible links are supported only when random sampling is used!' ) exit() # get k-nearest points pts = self.layer_pts[-1] fts = self.layer_fts[-1] if P == -1 or (layer_idx > 0 and P == xconv_params[layer_idx - 1]['P']): qrs = self.layer_pts[-1] else: if setting.sampling == 'fps': fps_indices = tf_sampling.farthest_point_sample(P, pts) batch_indices = tf.tile( tf.reshape(tf.range(N), (-1, 1, 1)), (1, P, 1)) indices = tf.concat( [batch_indices, tf.expand_dims(fps_indices, -1)], axis=-1) qrs = tf.gather_nd(pts, indices, name=tag + 'qrs') # (N, P, 3) elif setting.sampling == 'ids': indices = pf.inverse_density_sampling(pts, K, P) qrs = tf.gather_nd(pts, indices) elif setting.sampling == 'random': qrs = tf.slice(pts, (0, 0, 0), (-1, P, -1), name=tag + 'qrs') # (N, P, 3) else: print('Unknown sampling method!') exit() self.layer_pts.append(qrs) if layer_idx == 0: C_pts_fts = C // 2 if fts is None else C // 4 depth_multiplier = 4 else: C_prev = xconv_params[layer_idx - 1]['C'] C_pts_fts = C_prev // 4 depth_multiplier = math.ceil(C / C_prev) with_global = (setting.with_global and layer_idx == len(xconv_params) - 1) fts_xconv = xconv(pts, fts, qrs, tag, N, K, D, P, C, C_pts_fts, is_training, with_X_transformation, depth_multiplier, sorting_method, with_global) fts_list = [] for link in links: fts_from_link = self.layer_fts[link] if fts_from_link is not None: fts_slice = tf.slice(fts_from_link, (0, 0, 0), (-1, P, -1), name=tag + 'fts_slice_' + str(-link)) fts_list.append(fts_slice) if fts_list: fts_list.append(fts_xconv) self.layer_fts.append( tf.concat(fts_list, axis=-1, name=tag + 'fts_list_concat')) else: self.layer_fts.append(fts_xconv) #######Classification Branch self.fc_layers_classification = [self.layer_fts[-1]] for layer_idx, layer_param in enumerate(fc_params_classification): C = layer_param['C'] dropout_rate = layer_param['dropout_rate'] fc = pf.dense(self.fc_layers_classification[-1], C, 'fc_class_{:d}'.format(layer_idx), is_training) fc_drop = tf.layers.dropout( fc, dropout_rate, training=is_training, name='fc_class_{:d}_drop'.format(layer_idx)) self.fc_layers_classification.append(fc_drop) #######Segmentation Branch if hasattr(setting, 'xdconv_params'): for layer_idx, layer_param in enumerate(setting.xdconv_params): tag = 'xdconv_' + str(layer_idx + 1) + '_' K = layer_param['K'] D = layer_param['D'] pts_layer_idx = layer_param['pts_layer_idx'] qrs_layer_idx = layer_param['qrs_layer_idx'] pts = self.layer_pts[pts_layer_idx + 1] fts = self.layer_fts[ pts_layer_idx + 1] if layer_idx == 0 else self.layer_fts[-1] qrs = self.layer_pts[qrs_layer_idx + 1] fts_qrs = self.layer_fts[qrs_layer_idx + 1] P = xconv_params[qrs_layer_idx]['P'] C = xconv_params[qrs_layer_idx]['C'] C_prev = xconv_params[pts_layer_idx]['C'] C_pts_fts = C_prev // 4 depth_multiplier = 1 fts_xdconv = xconv(pts, fts, qrs, tag, N, K, D, P, C, C_pts_fts, is_training, with_X_transformation, depth_multiplier, sorting_method) fts_concat = tf.concat([fts_xdconv, fts_qrs], axis=-1, name=tag + 'fts_concat') fts_fuse = pf.dense(fts_concat, C, tag + 'fts_fuse', is_training) self.layer_pts.append(qrs) self.layer_fts.append(fts_fuse) self.fc_layers_segmentation = [self.layer_fts[-1]] for layer_idx, layer_param in enumerate(fc_params_segmentation): C = layer_param['C'] dropout_rate = layer_param['dropout_rate'] fc = pf.dense(self.fc_layers_segmentation[-1], C, 'fc_seg_{:d}'.format(layer_idx), is_training) fc_drop = tf.layers.dropout( fc, dropout_rate, training=is_training, name='fc_seg_{:d}_drop'.format(layer_idx)) self.fc_layers_segmentation.append(fc_drop)
def getRefinementLoss(): with tf.name_scope("getRefinementLoss"): totalLoss = [] # tf.Print(proposals, [tf.shape(proposals)]) for pro, bx, cls, num in zip(proposals, refBoxes, refClasses, range(0, 2)): iou = BoxUtils.iou(pro, bx) maxIou = tf.reduce_max(iou, axis=1) bestIou = tf.expand_dims(tf.cast( tf.argmax(iou, axis=1), tf.int32), axis=-1) # Find positive and negative indices based on their IOU posBoxIndices = tf.cast( tf.where(maxIou > self.posIouTheshold), tf.int32) negBoxIndices = tf.cast( tf.where( tf.logical_and( maxIou < self.negIouThesholdHi, maxIou > self.negIouThesholdLo)), tf.int32) # Split the boxes and references posBoxes, posRefIndices = MultiGather.gather( [pro, bestIou], posBoxIndices) negBoxes = tf.gather_nd(pro, negBoxIndices) # Add GT boxes posBoxes = tf.concat([posBoxes, bx], 0) posRefIndices = tf.concat([ posRefIndices, tf.reshape(tf.range(tf.shape(cls)[0]), [-1, 1]) ], 0) # Call the loss if the box collection is not empty nPositive = tf.shape(posBoxes)[0] nNegative = tf.shape(negBoxes)[0] if self.hardMining: posLoss, box = tf.cond( nPositive > 0, lambda: getPosLoss(posBoxes, posRefIndices, 0, cls, bx, num)[0], lambda: [ tf.zeros((0, ), tf.float32), tf.constant([[0, 0, 0, 0]]) ]) negLoss = tf.cond( nNegative > 0, lambda: getNegLoss(negBoxes, 0, num), lambda: tf.zeros((0, ), tf.float32)) allLoss = tf.concat([posLoss, negLoss], 0) totalLoss.append( tf.cond( tf.shape(allLoss)[0] > 0, lambda: tf.reduce_mean( basic.MultiGather.gatherTopK( allLoss, self.nTrainBoxes)), lambda: tf.constant(0.0))) totalBoxes.append(box) else: posLoss, box = tf.cond( nPositive > 0, lambda: getPosLoss( posBoxes, posRefIndices, self. nTrainPositives, cls, bx, num), lambda: returnNullLoss()) posCount = posLoss[1] posLoss = posLoss[0] negLoss = tf.cond( nNegative > 0, lambda: getNegLoss(negBoxes, self.nTrainBoxes - posCount, num), lambda: tf.constant(0.0)) nPositive = tf.cast( tf.shape(posLoss)[0], tf.float32) nNegative = tf.cond( nNegative > 0, lambda: tf.cast( tf.shape(negLoss)[0], tf.float32), lambda: tf.constant(0.0)) totalLoss.append( (tf.reduce_mean(posLoss) * nPositive + tf.reduce_mean(negLoss) * nNegative) / (nNegative + nPositive)) totalBoxes.append(box) return tf.reduce_mean(tf.stack(totalLoss, axis=0))
def __init__(self, config): self.config = config self.msize1 = config['data1_maxlen'] self.msize2 = config['data2_maxlen'] self.psize1 = config['data1_psize'] self.psize2 = config['data2_psize'] tf.reset_default_graph() self.X1 = tf.placeholder(tf.int32, name='X1', shape=(None, self.msize1)) self.X2 = tf.placeholder(tf.int32, name='X2', shape=(None, self.msize2)) self.X1_len = tf.placeholder(tf.int32, name='X1_len', shape=(None, )) self.X2_len = tf.placeholder(tf.int32, name='X2_len', shape=(None, )) self.Y = tf.placeholder(tf.int32, name='Y', shape=(None, )) self.F = tf.placeholder(tf.float32, name='F', shape=(None, config['feat_size'])) self.dpool_index = tf.placeholder(tf.int32, name='dpool_index', shape=(None, self.msize1, self.msize2, 3)) self.batch_size = tf.shape(self.X1)[0] self.embedding = tf.get_variable('embedding', initializer=config['embedding'], dtype=tf.float32, trainable=False) self.embed1 = tf.nn.embedding_lookup(self.embedding, self.X1) self.embed2 = tf.nn.embedding_lookup(self.embedding, self.X2) tf.add_to_collection('explain_input', self.embed1) tf.add_to_collection('explain_input', self.embed2) # batch_size * X1_maxlen * X2_maxlen self.cross = tf.einsum('abd,acd->abc', self.embed1, self.embed2) self.cross_img = tf.expand_dims(self.cross, 3) tf.add_to_collection('explain_input', self.cross) # convolution self.w1 = tf.get_variable('w1', initializer=tf.truncated_normal_initializer( mean=0.0, stddev=0.2, dtype=tf.float32), dtype=tf.float32, shape=[2, 10, 1, 8]) self.b1 = tf.get_variable('b1', initializer=tf.constant_initializer(), dtype=tf.float32, shape=[8]) # batch_size * X1_maxlen * X2_maxlen * feat_out self.conv1 = tf.nn.relu( tf.nn.conv2d(self.cross_img, self.w1, [1, 1, 1, 1], "SAME") + self.b1) # dynamic pooling self.conv1_expand = tf.gather_nd(self.conv1, self.dpool_index) stride1 = self.msize1 / self.psize1 stride2 = self.msize2 / self.psize2 suggestion1 = self.msize1 / stride1 suggestion2 = self.msize2 / stride2 if suggestion1 != self.psize1 or suggestion2 != self.psize2: print("DynamicMaxPooling Layer can not " "generate ({} x {}) output feature map," "please use ({} x {} instead.)".format( self.psize1, self.psize2, suggestion1, suggestion2)) exit() self.pool1 = tf.nn.max_pool(self.conv1_expand, [1, stride1, stride2, 1], [1, stride1, stride2, 1], "VALID") with tf.variable_scope('fc1'): self.fc1 = tf.nn.relu( tf.contrib.layers.linear( tf.reshape(self.pool1, [ -1, config['data1_psize'] * config['data2_psize'] * 8 ]), 20)) self.pred = tf.contrib.layers.linear(self.fc1, 1) tf.add_to_collection('explain_output', self.pred) pos = tf.strided_slice(self.pred, [0], [self.batch_size], [2]) neg = tf.strided_slice(self.pred, [1], [self.batch_size], [2]) self.loss = tf.reduce_mean(tf.maximum(1.0 + neg - pos, 0.0)) self.train_model = tf.train.AdamOptimizer().minimize(self.loss) self.saver = tf.train.Saver(max_to_keep=20)
def _get_pixels(images, batch_x, batch_y, batch_indices): indices = tf.stack([batch_indices, batch_y, batch_x], axis=2) # => [B, n, 3] pixels = tf.gather_nd(images, indices) return pixels