def get_constants(self, inputs, training=None): constants = [] if 0 < self.dropout < 1: input_shape = K.int_shape(inputs) input_dim = input_shape[-1] ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, int(input_dim))) def dropped_inputs(): return K.dropout(ones, self.dropout) dp_mask = K.in_train_phase(dropped_inputs, ones, training=training) constants.append(dp_mask) else: constants.append(K.cast_to_floatx(1.)) if 0 < self.recurrent_dropout < 1: ones = K.ones_like(K.reshape(inputs[:, 0, 0], (-1, 1))) ones = K.tile(ones, (1, self.units)) def dropped_inputs(): return K.dropout(ones, self.recurrent_dropout) rec_dp_mask = K.in_train_phase(dropped_inputs, ones, training=training) constants.append(rec_dp_mask) else: constants.append(K.cast_to_floatx(1.)) return constants
def call(self, inputs, training=None, **kwargs): inputs, memory = inputs batch_size = K.shape(inputs)[0] seq_len = K.shape(inputs)[1] mem_mask = K.tile(K.ones_like(memory[:, :, :1], dtype=K.floatx()), [1, 1, seq_len]) # Build content mask with random permutation ranges = K.tile(K.expand_dims(K.arange(0, seq_len), axis=-1), [1, batch_size]) if self.enabled: shuffle = random_shuffle(ranges) else: shuffle = ranges if self.directional: shuffled = K.in_train_phase(shuffle, ranges, training) else: if self.enabled: shuffled = K.in_train_phase(shuffle, ranges + seq_len, training) else: shuffled = ranges + seq_len ranges = K.expand_dims(K.permute_dimensions(ranges, [1, 0]), axis=-1) shuffled = K.expand_dims(K.permute_dimensions(shuffled, [1, 0]), axis=1) content_mask = K.cast(ranges <= shuffled, dtype=K.floatx()) # Build query mask based on content mask ranges = K.arange(0, seq_len) eye = K.equal(K.expand_dims(ranges, axis=0), K.expand_dims(ranges, axis=-1)) eye = K.expand_dims(K.cast(eye, dtype=K.floatx()), axis=0) query_mask = content_mask * (1.0 - eye) content_mask = K.concatenate([mem_mask, content_mask], axis=1) query_mask = K.concatenate([mem_mask, query_mask], axis=1) return [ K.permute_dimensions(content_mask, [0, 2, 1]), K.permute_dimensions(query_mask, [0, 2, 1]), ]
def _compute_valid_seed_region(self): positions = K.concatenate([ K.expand_dims(K.tile(K.expand_dims(K.arange(self.height), axis=1), [1, self.width]), axis=-1), K.expand_dims(K.tile(K.expand_dims(K.arange(self.width), axis=0), [self.height, 1]), axis=-1), ], axis=-1) half_block_size = self.block_size // 2 valid_seed_region = K.switch( K.all( K.stack( [ positions[:, :, 0] >= half_block_size, positions[:, :, 1] >= half_block_size, positions[:, :, 0] < self.height - half_block_size, positions[:, :, 1] < self.width - half_block_size, ], axis=-1, ), axis=-1, ), self.ones, self.zeros, ) return K.expand_dims(K.expand_dims(valid_seed_region, axis=0), axis=-1)
def _calculate_features(self, xy, wh, objectiveness, classes, anchors): shape = K.shape(xy)[1:3] # width, height xy_sig = K.sigmoid(xy) # TODO rethink logic here, grid needs to be calculated just once after model initialization col = K.reshape(K.tile(K.arange(0, shape[0]), shape[0:1]), (-1, shape[0])) row = K.reshape(K.tile(K.arange(0, shape[1]), shape[1:2]), (-1, shape[1])) row = K.transpose(row) col = K.repeat_elements(K.reshape(col, (shape[0], shape[1], 1, 1)), rep=len(anchors), axis=-2) row = K.repeat_elements(K.reshape(row, (shape[0], shape[1], 1, 1)), rep=len(anchors), axis=-2) grid = K.concatenate((col, row), axis=-1) # TODO same thing for the anchors anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, len(anchors), 2]) box_xy = (xy_sig + K.cast(grid, K.dtype(xy_sig))) / (shape[0], shape[1]) box_wh = K.exp(wh) * anchors_tensor / K.cast(self.input_image_dims, K.dtype(wh)) obj_sig = K.sigmoid(objectiveness) class_sig = K.sigmoid(classes) return box_xy, box_wh, obj_sig, class_sig
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): num_anchors = len(anchors) anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def cell_offset_table(scale_size): # Dynamic implementation of conv dims for fully convolutional model. # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=scale_size) conv_width_index = K.arange(0, stop=scale_size) conv_height_index = K.tile(conv_height_index, [scale_size]) # 늘어놓는 함수 tile -> 같은걸 N번 반복함 # 결과 -> 0~12, 0~12, ...., 0~12 # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile( K.expand_dims(conv_width_index, 0), [scale_size, 1]) # tile을 [n, m] 쓰면 dims 2로 만들어줌 # 결과 -> [0~12], [0~12], [0~12], ... conv_width_index = K.flatten(K.transpose(conv_width_index)) # 결과 -> 0, 0, 0, 0, 0, 0, 0 (13개), 1, 1, 1, 1, 1, 1, 1 (13개), ... conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) # 결과 -> [0, 0], [1, 0], [2, 0], ..., [11, 12], [12, 12] conv_index = K.reshape(conv_index, [1, scale_size, scale_size, 1, 2]) # 결과 -> 1 * 13 * 13 에 있는 [1 * 2]의 conv index item이 만들어짐 # 각각 [1 * 2]의 값은 [0, 0], [1, 0], [2, 0], ..., [11, 12], [12, 12] # 이런 식으로 이루어져 있음 -> Mask를 만들기 위한 과정 # 결과 shape -> 1, 13, 13, 1, 2 conv_index = K.cast(conv_index, tf.float32) diff = (1 / scale_size * 416) conv_index = conv_index * diff return conv_index
def yolo_head(feats, anchors, input_shape, calc_loss=False, att_map=None): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[..., ::-1], K.dtype(feats)) if att_map is not None: seg_map = K.tile(att_map, [1, 1, 1, 3]) seg_map = K.expand_dims(seg_map, axis=-1) box_confidence = K.sigmoid( feats[..., 4:5] ) #*.8+seg_map*.2 ##denote if add attention score to confidence score else: box_confidence = K.sigmoid(feats[..., 4:5]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """转换识别结果 例如:(batch_size,13,13,255) -> (batch_size,13,13,3,85) """ num_anchors = len(anchors) anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # 特征层高和宽 grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) # 生成 特征层网格点坐标 # 如(13,13)特征层面,[[(0,0)..(0,12)]..[(12,0)..[12,12]]] grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # 网格点坐标(特征层中心点)+识别结果(偏移量) box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) if calc_loss == True: return grid, feats, box_xy, box_wh else: box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # todo:这里调用激活函数是起到什么作用 return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats,anchors,num_classes,input_shape,calc_loss=False): """Convert final predictions into bounding boxes""" num_anchors = len(anchors) # (batch, height, width, num_anchors, box_prams) anchor_tensor = K.reshape(K.constant(anchors),[1,1,1,num_anchors,2]) grid_shape = K.shape(feats)[1:3] #(height,width) grid_y = K.tile(K.reshape(K.arange(0,stop=grid_shape[0]),[-1,1,1,1]), [1,grid_shape[1],1,1]) grid_x = K.tile(K.reshape(K.arange(0,stop=grid_shape[1]),[1,-1,1,1]), [grid_shape[0],1,1,1]) grid = K.concatenate([grid_x,grid_y]) grid = K.cast(grid,K.dtype(feats)) feats = K.reshape( feats,[-1,grid.shape[0],grid.shape[1],num_anchors,num_classes+5]) box_xy = (K.sigmoid(feats[...,:2])+grid) / K.cast(grid_shape[::-1],K.dtype(feats)) box_wh = K.exp(feats[...,2:4]) * anchor_tensor / K.cast(input_shape[::-1],K.dtype(feats)) box_confidence = K.sigmoid(feats[...,4:5]) box_class_probs = K.sigmoid(feats[...,5:]) if calc_loss: return grid,feats,box_xy,box_wh return box_xy, box_wh, box_confidence, box_class_probs
def monotonic_alignment(args): h_enc, h_dec, T_x, T_y, Y, hidden_dim = args struc_zeros = K.expand_dims( K.cast(np.triu(np.ones([T_x, T_x])), dtype='float32'), 0) alignment_probs = K.softmax( dot([Dense(hidden_dim)(h_enc), h_dec], axes=-1, normalize=False), -2) h_enc_rep = K.tile(K.expand_dims(h_enc, -2), [1, 1, T_y, 1]) h_dec_rep = K.tile(K.expand_dims(h_dec, -3), [1, T_x, 1, 1]) h_rep = K.concatenate([h_enc_rep, h_dec_rep], -1) alignment_probs_ = [] for i in range(T_y): if i == 0: align_prev_curr = tf.gather(alignment_probs, i, axis=-1) if i > 0: align_prev_curr = tf.einsum('nx,ny->nxy', tf.gather(alignment_probs, i, axis=-1), alignment_probs_[i - 1]) align_prev_curr *= struc_zeros align_prev_curr = K.sum(align_prev_curr, 1) + 1e-6 align_prev_curr /= K.sum(align_prev_curr, -1, keepdims=True) alignment_probs_.append(align_prev_curr) alignment_probs_ = K.stack(alignment_probs_, -1) emission_probs = Dense(hidden_dim * 3, activation='tanh')(h_rep) emission_probs = Dense(Y, activation='softmax')(emission_probs) #alphas = tf.expand_dims(alignment_probs_,-1)*emission_probs #return(tf.reduce_sum(alphas,-3)) return (alignment_probs_, emission_probs)
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): num_anchors = len(anchors) # [1, 1, 1, num_anchors, 2] anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) # 获得x,y的网格 # (13,13, 1, 2) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) # (batch_size,13,13,3,85) feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # 将预测值调成真实值 # box_xy对应框的中心点 # box_wh对应框的宽和高 box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[...,::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[...,::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) # 在计算loss的时候返回如下参数 if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def call(self, x): assert isinstance(x, list) conv_output, FiLM_gamma, FiLM_beta = x FiLM_gamma = K.expand_dims(FiLM_gamma, axis=[1]) FiLM_gamma = K.expand_dims(FiLM_gamma, axis=[1]) FiLM_gamma = K.tile(FiLM_gamma, [1, self.height, self.width, 1]) FiLM_beta = K.expand_dims(FiLM_beta, axis=[1]) FiLM_beta = K.expand_dims(FiLM_beta, axis=[1]) FiLM_beta = K.tile(FiLM_beta, [1, self.height, self.width, 1]) def repeat(w): n = 1 if self.h == 'C4': n *= 4 elif self.h == 'D4': n *= 8 elif self.h == 'Z2': n *= 1 else: raise ValueError('Wrong h: %s' % self.h) return K.reshape( K.tile(K.expand_dims(w, -1), [1, 1, 1, 1, n]), [-1, self.height, self.width, self.n_feature_maps * n], ) repeated_gamma = repeat(FiLM_gamma) repeated_beta = repeat(FiLM_beta) # Apply affine transformation return (1 + repeated_gamma) * conv_output + repeated_beta
def yolo3_head(feats, anchors, num_classes, input_shape, calc_loss=False): """Convert final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust preditions to each spatial grid point and anchor size. box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def build_critic2(self): img = Input(shape=self.img_shape) label = Input(shape=(self.classes, )) label_reshaped = K.reshape(label, shape=(-1, 1, 1, self.classes)) #tiled = K.tile(a, [4, 4, 1]) tiled = K.tile(label_reshaped, [1, 28, 28, 1]) concat = K.concatenate([img, tiled], axis=3) x = Conv2D(64, kernel_size=5, strides=2, padding="same")(concat) x = LayerNormalization(epsilon=1e-6)(x) x = LeakyReLU(alpha=0.2)(x) tiled = K.tile(label_reshaped, [1, 14, 14, 1]) concat = K.concatenate([x, tiled], axis=3) x = Conv2D(128, kernel_size=5, strides=2, padding="same")(concat) x = LayerNormalization(epsilon=1e-6)(x) x = LeakyReLU(alpha=0.2)(x) tiled = K.tile(label_reshaped, [1, 7, 7, 1]) concat = K.concatenate([x, tiled], axis=3) x = Conv2D(256, kernel_size=5, strides=2, padding="same")(concat) x = LayerNormalization(epsilon=1e-6)(x) x = LeakyReLU(alpha=0.2)(x) tiled = K.tile(label_reshaped, [1, 4, 4, 1]) concat = K.concatenate([x, tiled], axis=3) x = Flatten()(concat) validity = Dense(1)(x) #pdb.set_trace() return Model(inputs=[img, label], outputs=[validity], name="critic")
def call(self, x): assert isinstance(x, list) observations, mask = x self._batch_size = K.shape(observations)[0] self.x_flat = K.reshape(observations, [-1, 1]) F_flat = K.tile(self.F, [self._batch_size, 1, 1]) F_flat = K.reshape(F_flat, [-1, 10]) b_flat = K.tile(self.b, [self._batch_size, 1, 1]) b_flat = K.reshape(b_flat, [-1, 1]) # self.x_aug = K.concatenate( # [self.x_flat, self.x_flat * F_flat, b_flat], 1) self.x_aug = K.concatenate([self.x_flat * F_flat, b_flat], 1) print('x_aug', self.x_aug.shape) self.encoded = Dense(self._K)( self.x_aug) #layers.fully_connected(self.x_aug, self._K) print('e1', self.encoded) self.encoded = K.reshape(self.encoded, [-1, self._obs_dim, self._K]) print('e2', self.encoded) self.mask_on_hidden = K.reshape(mask, [-1, self._obs_dim, 1]) print('make', self.mask_on_hidden) self.mask_on_hidden = K.tile(self.mask_on_hidden, [1, 1, self._K]) print('mask2', self.mask_on_hidden) print(self.encoded.shape) self.encoded = K.relu(K.sum(self.encoded * self.mask_on_hidden, 1)) return self.encoded
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): """ :param feats: (N, 13, 13, 3 * (5+n_class)), ... :param anchors: (3, 2) :param num_classes: 15 :param input_shape: (416, 416) :param calc_loss: :return: """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. if calc_loss: anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile( K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile( K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.floatx()) feats = K.reshape(feats, [ -1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5 ]) box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[::-1], K.dtype(feats)) return grid, feats, box_xy, box_wh else: anchors_tensor = np.reshape(np.array(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = np.asarray(feats.shape[1:3]) # height, width grid_y = np.tile( np.reshape(np.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = np.tile( np.reshape(np.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = np.concatenate([grid_x, grid_y], axis=-1) grid = grid.astype(feats.dtype) feats = np.reshape(feats, [ -1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5 ]) box_xy = (utils.sigmoid(feats[..., :2]) + grid) / grid_shape[..., ::-1].astype(feats.dtype) box_wh = np.exp(feats[..., 2:4]) * anchors_tensor / input_shape[ ..., ::-1].astype(feats.dtype) box_confidence = utils.sigmoid(feats[..., 4:5]) box_class_probs = utils.sigmoid(feats[..., 5:]) return box_xy, box_wh, box_confidence, box_class_probs
def image_lpips(y_true, y_pred): y_true = join_reim_mag_output(y_true) y_pred = join_reim_mag_output(y_pred) y_true = K.tile(y_true, [1, 1, 1, 3]) y_pred = K.tile(y_pred, [1, 1, 1, 3]) return lpips_tf.lpips(y_true, y_pred, model='net-lin', net='alex')
def compute_kernel(x, y): x_size = K.shape(x)[0] y_size = K.shape(y)[0] dim = K.shape(x)[1] tiled_x = K.tile(K.reshape(x, [x_size, 1, dim]), [1, y_size, 1]) tiled_y = K.tile(K.reshape(y, [1, y_size, dim]), [x_size, 1, 1]) return K.exp(-K.mean(K.square(tiled_x - tiled_y), axis=2) / K.cast(dim, 'float32'))
def gen_emission_probs(args): h_enc,h_dec,max_encoder_seq_length,max_decoder_seq_length,num_decoder_tokens,hidden_dim = args h_enc_rep = K.tile(K.expand_dims(h_enc,-2),[1,1,max_decoder_seq_length,1]) h_dec_rep = K.tile(K.expand_dims(h_dec,-3),[1,max_encoder_seq_length,1,1]) h_rep = K.concatenate([h_enc_rep,h_dec_rep],-1) #emission probabilities emission_probs = Dense(num_decoder_tokens, activation='softmax')(Dense(hidden_dim*3,activation='tanh')(h_rep)) return(emission_probs)
def call(self, inputs): a, b = inputs # Expand both arrays to (M, N, x) arrays a_tiled = K.tile(K.expand_dims(a, 1), [1, K.shape(b)[0], 1]) b_tiled = K.tile(K.expand_dims(b, 0), [K.shape(a)[0], 1, 1]) return K.concatenate([a_tiled, b_tiled], axis=2)
def yolo4_decode(feats, anchors, num_classes, input_shape, scale_x_y=None, calc_loss=False): """Decode final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) # ---------------------------------------------------------------------------------------------------------- # 生成 grid 网格基准 (13, 13, 1, 2) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) # Reshape to ([batch_size, height, width, num_anchors, (num_classes+5)]) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) # Adjust predictions to each spatial grid point and anchor size. # box_xy 数值范围调整为【0-1】(归一化) # box_wh 数值范围调整为 【0-1】(归一化),输入尺寸是使用backbone的最小特征图尺寸*stride得到的 # 强调说明一下:这里 box_xy 是相对于grid 的位置(说成input似乎也行);box_wh是相对于 input_shape大小 # scale_x_y是一个 trick,见下文链接 if scale_x_y: # Eliminate grid sensitivity trick involved in YOLOv4 # # Reference Paper & code: # "YOLOv4: Optimal Speed and Accuracy of Object Detection" # https://arxiv.org/abs/2004.10934 # https://github.com/opencv/opencv/issues/17148 # https://zhuanlan.zhihu.com/p/139724869 box_xy_tmp = K.sigmoid( feats[..., :2]) * scale_x_y - (scale_x_y - 1) / 2 box_xy = (box_xy_tmp + grid) / K.cast(grid_shape[..., ::-1], K.dtype(feats)) else: box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[..., ::-1], K.dtype(feats)) # sigmoid objectness scores 置信度解码 box_confidence = K.sigmoid(feats[..., 4:5]) # class probs 类别解码 box_class_probs = K.sigmoid(feats[..., 5:]) # 在计算loss的时候返回grid, feats, box_xy, box_wh # 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs if calc_loss: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def image_lpips(y_true, y_pred): y_true_image = utils.convert_tensor_to_image_domain(y_true) y_pred_image = utils.convert_tensor_to_image_domain(y_pred) y_true = join_reim_mag_output(y_true_image) y_pred = join_reim_mag_output(y_pred_image) y_true = K.tile(y_true, [1, 1, 1, 3]) y_pred = K.tile(y_pred, [1, 1, 1, 3]) return lpips_tf.lpips(y_true, y_pred, model='net-lin', net='alex')
def call(self, inputs, mask=None): matrix_1, matrix_2 = inputs num_rows_1 = K.shape(matrix_1)[1] num_rows_2 = K.shape(matrix_2)[1] tile_dims_1 = K.concatenate([[1, 1], [num_rows_2], [1]], 0) tile_dims_2 = K.concatenate([[1], [num_rows_1], [1, 1]], 0) tiled_matrix_1 = K.tile(K.expand_dims(matrix_1, axis=2), tile_dims_1) tiled_matrix_2 = K.tile(K.expand_dims(matrix_2, axis=1), tile_dims_2) return self.similarity_function.compute_similarity( tiled_matrix_1, tiled_matrix_2)
def call(self, inputs): C, Q = inputs C_len = K.shape(C)[1] Q_len = K.shape(Q)[1] C_rep = K.concatenate([[1,1],[Q_len],[1]], 0) Q_rep = K.concatenate([[1],[C_len],[1,1]],0) C_repv = K.tile(K.expand_dims(C, axis=2),C_rep) Q_repv = K.tile(K.expand_dims(Q, axis=1), Q_rep) return self.similarity(C_repv, Q_repv)
def yolo3_decode(feats, anchors, num_classes, input_shape, scale_x_y=None, calc_loss=False): """Decode final layer features to bounding box parameters.""" num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) grid_shape = K.shape(feats)[1:3] # height, width grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), [1, grid_shape[1], 1, 1]) grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), [grid_shape[0], 1, 1, 1]) grid = K.concatenate([grid_x, grid_y]) grid = K.cast(grid, K.dtype(feats)) feats = K.reshape( feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) box_xy = feats[..., :2] box_wh = feats[..., 2:4] box_xy = tf.where(box_xy < -10.0, -10.0, box_xy) box_xy = tf.where(box_xy > 10.0, 10.0, box_xy) box_wh = tf.where(box_wh < -8.0, -8.0, box_wh) box_wh = tf.where(box_wh > 8.0, 8.0, box_wh) # Adjust preditions to each spatial grid point and anchor size. if scale_x_y: # Eliminate grid sensitivity trick involved in YOLOv4 # # Reference Paper & code: # "YOLOv4: Optimal Speed and Accuracy of Object Detection" # https://arxiv.org/abs/2004.10934 # https://github.com/opencv/opencv/issues/17148 # box_xy_tmp = K.sigmoid( feats[..., :2]) * scale_x_y - (scale_x_y - 1) / 2 box_xy = (box_xy_tmp + grid) / K.cast(grid_shape[..., ::-1], K.dtype(feats)) else: box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast( grid_shape[..., ::-1], K.dtype(feats)) box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast( input_shape[..., ::-1], K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.sigmoid(feats[..., 5:]) if calc_loss == True: return grid, feats, box_xy, box_wh return box_xy, box_wh, box_confidence, box_class_probs
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) box_confidence = K.sigmoid(feats[..., 4:5]) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_confidence, box_xy, box_wh, box_class_probs
def span_matrix_func(tensor): global max_seq_length embeddings = tensor start_expand = K.tile(K.expand_dims(embeddings, 2), [1, 1, max_seq_length, 1]) end_expand = K.tile(K.expand_dims(embeddings, 1), [1, max_seq_length, 1, 1]) span_matrix = K.concatenate([start_expand, end_expand], 3) return span_matrix
def _combine_heatmaps_visual(inp): hm = inp[0] x = inp[1] nj = K.int_shape(hm)[-1] nf = K.int_shape(x)[-1] hm = K.expand_dims(hm, axis=-1) hm = K.tile(hm, (1, 1, 1, 1, 1, nf)) x = K.expand_dims(x, axis=-2) x = K.tile(x, (1, 1, 1, 1, nj, 1)) x = hm * x x = K.sum(x, axis=(2, 3)) return x
def construct_grid(rows, cols): grid_x = K.arange(0, stop=cols) grid_x = K.reshape(grid_x, [1, -1, 1, 1]) grid_x = K.tile(grid_x, [rows, 1, 1, 1]) grid_y = K.arange(0, stop=rows) grid_y = K.reshape(grid_y, [-1, 1, 1, 1]) grid_y = K.tile(grid_y, [1, cols, 1, 1]) grid = K.concatenate([grid_x, grid_y]) return grid
def call(self, inputs): context_vectors, query_vectors = inputs num_context_words = K.shape(context_vectors)[1] num_query_words = K.shape(query_vectors)[1] context_dim_repeat = K.concatenate([[1, 1], [num_query_words], [1]], 0) query_dim_repeat = K.concatenate([[1], [num_context_words], [1, 1]], 0) repeated_context_vectors = K.tile( K.expand_dims(context_vectors, axis=2), context_dim_repeat) repeated_query_vectors = K.tile(K.expand_dims(query_vectors, axis=1), query_dim_repeat) similarity_matrix = self.compute_similarity(repeated_context_vectors, repeated_query_vectors) return similarity_matrix