def _call(self, _inp, output_size, is_training): batch_size = tf.shape(_inp)[0] H, W, B, A = tuple(int(i) for i in _inp.shape[1:]) if self.embedding is None: self.embedding = tf.get_variable("embedding", shape=(int(A / 2), self.n_objects), dtype=tf.float32) inp = tf.reshape(_inp, (batch_size, H * W * B, A)) key, value = tf.split(inp, 2, axis=2) raw_attention = tf.tensordot(key, self.embedding, [[2], [0]]) attention = tf.nn.softmax(raw_attention, axis=1) attention_t = tf.transpose(attention, (0, 2, 1)) weighted_value = tf.matmul(attention_t, value) flat_weighted_value = tf.reshape( weighted_value, (batch_size, self.n_objects * int(A / 2))) if self.output_network is None: self.output_network = cfg.build_math_output(scope="math_output") return self.output_network(flat_weighted_value, output_size, is_training)
def _call(self, inp, output_size, is_training): if self.cell is None: self.cell = cfg.build_math_cell(scope="regression_cell") if self.output_network is None: self.output_network = cfg.build_math_output(scope="math_output") if self.use_mask: final_dim = int(inp.shape[-1]) mask, inp = tf.split(inp, (1, final_dim - 1), axis=-1) inp, n_on, _ = apply_mask_and_group_at_front(inp, mask) else: batch_size = tf.shape(inp)[0] n_objects = np.prod(inp.shape[1:-1]) A = inp.shape[-1] inp = tf.reshape(inp, (batch_size, n_objects, A)) batch_size = tf.shape(inp)[0] output, final_state = tf.nn.dynamic_rnn( self.cell, inp, initial_state=self.cell.zero_state(batch_size, tf.float32), parallel_iterations=1, swap_memory=False, time_major=False) if self.use_mask: # Get the output at the end of each sequence. indices = tf.stack([tf.range(batch_size), n_on - 1], axis=1) output = tf.gather_nd(output, indices) else: output = output[:, -1, :] return self.output_network(output, output_size, is_training)
def _call(self, _inp, output_size, is_training): if self.h_cell is None: self.h_cell = cfg.build_math_cell(scope="regression_h_cell") self.w_cell = cfg.build_math_cell(scope="regression_w_cell") self.b_cell = cfg.build_math_cell(scope="regression_b_cell") edge_state = self.h_cell.zero_state(tf.shape(_inp)[0], tf.float32) H, W, B = tuple(int(i) for i in _inp.shape[1:4]) h_states = np.empty((H, W, B), dtype=np.object) w_states = np.empty((H, W, B), dtype=np.object) b_states = np.empty((H, W, B), dtype=np.object) for h in range(H): for w in range(W): for b in range(B): h_state = h_states[h - 1, w, b] if h > 0 else edge_state w_state = w_states[h, w - 1, b] if w > 0 else edge_state b_state = b_states[h, w, b - 1] if b > 0 else edge_state inp = _inp[:, h, w, b, :] h_inp = tf.concat([inp, w_state.h, b_state.h], axis=1) _, h_states[h, w, b] = self.h_cell(h_inp, h_state) w_inp = tf.concat([inp, h_state.h, b_state.h], axis=1) _, w_states[h, w, b] = self.w_cell(w_inp, w_state) b_inp = tf.concat([inp, h_state.h, w_state.h], axis=1) _, b_states[h, w, b] = self.b_cell(b_inp, b_state) if self.output_network is None: self.output_network = cfg.build_math_output(scope="math_output") final_layer_input = tf.concat([ h_states[-1, -1, -1].h, w_states[-1, -1, -1].h, b_states[-1, -1, -1].h ], axis=1) return self.output_network(final_layer_input, output_size, is_training)