def _infer_network_outputs(*, sess, restored_model, num_of_anchors, anchors, orig_image_width, orig_image_height, model_image_width, model_image_height, img_np, verbose): start = time.time() boxes = [] prob_class = [] for yolo_head_idx in range(len(restored_model.output)): yolo_head = restored_model.output[yolo_head_idx] yolo_head_shape = K.shape(yolo_head) yolo_head_num_of_cols, yolo_head_num_of_rows = yolo_head_shape[ 2], yolo_head_shape[1] curr_yolo_head = K.reshape(yolo_head, [ -1, yolo_head_num_of_cols, yolo_head_num_of_rows, num_of_anchors, NUM_OF_BOX_PARAMS + NUM_OF_CLASSES ]) grid = construct_grid(yolo_head_shape[1], yolo_head_shape[2]) grid = K.cast(grid, dtype=K.dtype(curr_yolo_head)) grid_size = K.cast([yolo_head_num_of_cols, yolo_head_num_of_rows], dtype=K.dtype(curr_yolo_head)) curr_boxes_xy = (K.sigmoid(curr_yolo_head[..., :2]) + grid) / grid_size curr_boxes_wh = K.exp(curr_yolo_head[..., 2:4]) * anchors[yolo_head_idx] curr_prob_obj = K.sigmoid(curr_yolo_head[..., 4:5]) curr_prob_class = K.sigmoid(curr_yolo_head[..., 5:]) curr_prob_detected_class = curr_prob_obj * curr_prob_class boxes.append( get_corrected_boxes(box_width=curr_boxes_wh[..., 0:1], box_height=curr_boxes_wh[..., 1:2], box_x=curr_boxes_xy[..., 0:1], box_y=curr_boxes_xy[..., 1:2], orig_image_shape=(orig_image_width, orig_image_height), model_image_shape=(model_image_width, model_image_height))) curr_prob_detected_class = K.reshape(curr_prob_detected_class, [-1, NUM_OF_CLASSES]) prob_class.append(curr_prob_detected_class) prob_class = K.concatenate(prob_class, axis=0) boxes = K.concatenate(boxes, axis=0) out_tensors = [ boxes, prob_class, ] if verbose: print(f'Took {time.time() - start} seconds to construct network.') start = time.time() sess_out = sess.run(out_tensors, feed_dict={ restored_model.input: img_np, K.learning_phase(): 0 }) if verbose: print( f'Took {time.time() - start} seconds to infer outputs in session.') boxes, out_boxes_classes = sess_out return boxes, out_boxes_classes
def custom_loss(y_true, y_pred): # define a grid of offsets # [[[ 0. 0.]] # [[ 1. 0.]] # [[ 0. 1.]] # [[ 1. 1.]]] grid = np.array([[[float(x), float(y)]] * nb_boxes for y in range(grid_h) for x in range(grid_w)]) # first three values are classes : cat, rat, and none. # However yolo doesn't predict none as a class, none is everything else and is just not predicted # so I don't use it in the loss y_true_class = y_true[..., 0:2] y_pred_class = y_pred[..., 0:2] # reshape array as a list of grid / grid cells / boxes / of 5 elements pred_boxes = K.reshape(y_pred[..., 3:], (-1, grid_w * grid_h, nb_boxes, 5)) true_boxes = K.reshape(y_true[..., 3:], (-1, grid_w * grid_h, nb_boxes, 5)) # sum coordinates of center of boxes with cell offsets. # as pred boxes are limited to 0 to 1 range, pred x,y + offset is limited to predicting elements inside a cell y_pred_xy = pred_boxes[..., 0:2] + K.variable(grid) # w and h predicted are 0 to 1 with 1 being image size y_pred_wh = pred_boxes[..., 2:4] # probability that there is something to predict here y_pred_conf = pred_boxes[..., 4] # same as predicate except that we don't need to add an offset, coordinate are already between 0 and cell count y_true_xy = true_boxes[..., 0:2] # with and height y_true_wh = true_boxes[..., 2:4] # probability that there is something in that cell. 0 or 1 here as it's a certitude. y_true_conf = true_boxes[..., 4] clss_loss = K.sum(K.square(y_true_class - y_pred_class), axis=-1) xy_loss = K.sum(K.sum(K.square(y_true_xy - y_pred_xy), axis=-1) * y_true_conf, axis=-1) wh_loss = K.sum( K.sum(K.square(K.sqrt(y_true_wh) - K.sqrt(y_pred_wh)), axis=-1) * y_true_conf, axis=-1) # when we add the confidence the box prediction lower in quality but we gain the estimation of the quality of the box # however the training is a bit unstable # compute the intersection of all boxes at once (the IOU) intersect_wh = K.maximum(K.zeros_like(y_pred_wh), (y_pred_wh + y_true_wh) / 2 - K.square(y_pred_xy - y_true_xy)) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] true_area = y_true_wh[..., 0] * y_true_wh[..., 1] pred_area = y_pred_wh[..., 0] * y_pred_wh[..., 1] union_area = pred_area + true_area - intersect_area iou = intersect_area / union_area conf_loss = K.sum(K.square(y_true_conf * iou - y_pred_conf), axis=-1) # final loss function d = xy_loss + wh_loss + conf_loss + clss_loss if False: d = tf.Print(d, [d], "loss") d = tf.Print(d, [xy_loss], "xy_loss") d = tf.Print(d, [wh_loss], "wh_loss") d = tf.Print(d, [clss_loss], "clss_loss") d = tf.Print(d, [conf_loss], "conf_loss") return d
def call(self, inputs, **kwargs): """ Creates the layer as a Keras graph Notes: This does not add self loops to the adjacency matrix. Args: inputs (list): list of inputs with 4 items: node features (size b x N x F), sparse graph adjacency matrix (size N x N), where N is the number of nodes in the graph, F is the dimensionality of node features M is the number of output nodes """ X = inputs[0] # Node features (1 x N x F) A_sparse = inputs[1] # Adjacency matrix (1 x N x N) if not isinstance(A_sparse, tf.SparseTensor): raise TypeError("A is not sparse") # Get undirected graph edges (E x 2) A_indices = A_sparse.indices batch_dim, n_nodes, _ = K.int_shape(X) if batch_dim != 1: raise ValueError( "Currently full-batch methods only support a batch dimension of one" ) else: # Remove singleton batch dimension X = K.squeeze(X, 0) outputs = [] for head in range(self.attn_heads): kernel = self.kernels[head] # W in the paper (F x F') attention_kernel = self.attn_kernels[ head] # Attention kernel a in the paper (2F' x 1) # Compute inputs to attention network features = K.dot(X, kernel) # (N x F') # Compute feature combinations # Note: [[a_1], [a_2]]^T [[Wh_i], [Wh_j]] = [a_1]^T [Wh_i] + [a_2]^T [Wh_j] attn_for_self = K.dot( features, attention_kernel[0]) # (N x 1), [a_1]^T [Wh_i] attn_for_neighs = K.dot( features, attention_kernel[1]) # (N x 1), [a_2]^T [Wh_j] # Create sparse attention vector (All non-zero values of the matrix) sparse_attn_self = tf.gather(K.reshape(attn_for_self, [-1]), A_indices[:, 0], axis=0) sparse_attn_neighs = tf.gather(K.reshape(attn_for_neighs, [-1]), A_indices[:, 1], axis=0) attn_values = sparse_attn_self + sparse_attn_neighs # Add nonlinearity attn_values = LeakyReLU(alpha=0.2)(attn_values) # Apply dropout to features and attention coefficients dropout_feat = Dropout(self.in_dropout_rate)(features) # (N x F') dropout_attn = Dropout(self.attn_dropout_rate)( attn_values) # (N x N) # Convert to sparse matrix sparse_attn = tf.sparse.SparseTensor( A_indices, values=dropout_attn, dense_shape=[n_nodes, n_nodes]) # Apply softmax to get attention coefficients sparse_attn = tf.sparse.softmax( sparse_attn) # (N x N), Eq. 3 of the paper # Linear combination with neighbors' features [YT: see Eq. 4] node_features = tf.sparse.sparse_dense_matmul( sparse_attn, dropout_feat) # (N x F') if self.use_bias: node_features = K.bias_add(node_features, self.biases[head]) # Add output of attention head to final output outputs.append(node_features) # Aggregate the heads' output according to the reduction method if self.attn_heads_reduction == "concat": output = K.concatenate(outputs) # (N x KF') else: output = K.mean(K.stack(outputs), axis=0) # N x F') output = self.activation(output) # Add batch dimension back if we removed it if batch_dim == 1: output = K.expand_dims(output, 0) return output
def __call__(self, shape, dtype=None, partition_info=None): w = self.base_initializer(shape=shape, dtype=dtype) u = K.random_uniform(shape=tuple([1, shape[-1]]), dtype=dtype) w_bar, _u, sigma = spectral_normalization(w, u, self.niter_spectral) w_bar = bjorck_normalization(w_bar, self.niter_bjorck) return K.reshape(w_bar, shape)
def call(self, x, mask=None): a = KB.permute_dimensions(x, (1,0,2)) a = KB.reshape(a, (x.shape[1] *x.shape[0], x.shape[2])) a = ifft(a) a = KB.reshape(a, (x.shape[1], x.shape[0], x.shape[2])) return KB.permute_dimensions(a, (1,0,2))
def yolo_head(feats, anchors, num_classes): """Convert final layer features to bounding box parameters. Parameters ---------- feats : tensor Final convolutional layer features. anchors : array-like Anchor box widths and heights. num_classes : int Number of target classes. Returns ------- box_xy : tensor x, y box predictions adjusted by spatial location in conv layer. box_wh : tensor w, h box predictions adjusted by anchors and conv spatial resolution. box_conf : tensor Probability estimate for whether each box contains any object. box_class_pred : tensor Probability distribution estimate for each box over class labels. """ num_anchors = len(anchors) # Reshape to batch, height, width, num_anchors, box_params. anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) # Static implementation for fixed models. # TODO: Remove or add option for static implementation. # _, conv_height, conv_width, _ = K.int_shape(feats) # conv_dims = K.variable([conv_width, conv_height]) # Dynamic implementation of conv dims for fully convolutional model. conv_dims = K.shape(feats)[1:3] # assuming channels last # In YOLO the height index is the inner most iteration. conv_height_index = K.arange(0, stop=conv_dims[0]) conv_width_index = K.arange(0, stop=conv_dims[1]) conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) # TODO: Repeat_elements and tf.split doesn't support dynamic splits. # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) conv_width_index = K.tile(K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) conv_width_index = K.flatten(K.transpose(conv_width_index)) conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) conv_index = K.cast(conv_index, K.dtype(feats)) feats = K.reshape( feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) # Static generation of conv_index: # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. # conv_index = K.variable( # conv_index.reshape(1, conv_height, conv_width, 1, 2)) # feats = Reshape( # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) box_xy = K.sigmoid(feats[..., :2]) box_wh = K.exp(feats[..., 2:4]) box_confidence = K.sigmoid(feats[..., 4:5]) box_class_probs = K.softmax(feats[..., 5:]) # Adjust preditions to each spatial grid point and anchor size. # Note: YOLO iterates over height index before width index. box_xy = (box_xy + conv_index) / conv_dims box_wh = box_wh * anchors_tensor / conv_dims return box_xy, box_wh, box_confidence, box_class_probs
def gather_nd_reshape(t, indices, final_shape): h = tf.gather_nd(t, indices) return K.reshape(h, final_shape)
def yolo_loss(inputs, num_anchors): ignore_thresh = .5 # Порог вероятности обнаружения объекта num_layers = num_anchors // 3 # Подсчитываем количество анкоров на каждом уровне сетки y_pred = inputs[:num_layers] # Из входных данных выцепляем посчитанные моделью значения y_true = inputs[num_layers:] # Из входных данных выцепляем эталонные значения anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # Задаем маску анкоров для каждого уровня сеток # Получаем размерность входного изображения ( (13 х 13) * 32 = (416 х 416)) и приводим к типу элемента y_true[0] input_shape = K.cast(K.shape(y_pred[0])[1:3] * 32, K.dtype(y_true[0])) # Получаем двумерный массив, соответствующий размерностям сеток ((13, 13), (26, 26), (52, 52)) grid_shapes = [K.cast(K.shape(y_pred[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 # Значение ошибки # Считываем количество элементов m = K.shape(y_pred[0])[0] # Размер пакета batch_size = K.cast(m, K.dtype(y_pred[0])) # Преобразуем к типу y_pred[0] for l in range(num_layers): # Пробегаем по всем трем уровням сеток # Получаем маску для сетки l-го уровня по вероятности определения объекта (5-ый параметр в списке общих параметров). # В массиве object_mask будут значения, которые соответствуют только вероятности обнаружения объекта object_mask = y_true[l][..., 4:5] # Вернется набор данных вида ([0][0][0][0]...[1]...[0]) # Получаем аналогичную выборку для сетки l-го уровня с OHE (где записана позиция нашего класса) # В массиве true_class будут значения, которые соответствуют только OHE представлению класса для данного уровня анкоров true_class = y_true[l][..., 5:] # Вернется набор данных вида ([0][0][0][0]...[1]...[0]) num_sub_anchors = len(anchors[anchor_mask[l]]) # Получаем количество анкоров для отдельного уровян сетки (3) # Решейпим анкоры отдельного уровня сетки и записываем в переменную anchors_tensor anchors_tensor = K.reshape(K.constant(anchors[anchor_mask[l]]), [1, 1, 1, num_sub_anchors, 2]) # Создаем двумерный массив grid со значениями [[[0, 0] , [0, 1] , [0, 2] , ... , [0, k]], # [[1, 0] , [1, 1] , [1, 2] , ... , [1 ,k]], # ... # [[k, 0] , [k, 1] , [k, 2] , ... , [k, k]]] # где k - размерность сетки. Массив хранит индексы ячеек сетки grid_shape = K.shape(y_pred[l])[1:3] # Получаем ширину и высоту сетки grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),[1, grid_shape[1], 1, 1]) # Создаем вертикальную линию grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),[grid_shape[0], 1, 1, 1]) # Создаем горизонтальную линию grid = K.concatenate([grid_x, grid_y]) # Объединяем grid = K.cast(grid, K.dtype(y_pred[l])) # Приводим к типу y_pred[l] # Решейпим y_pred[l] feats = K.reshape(y_pred[l], [-1, grid_shape[0], grid_shape[1], num_sub_anchors, num_classes + 5]) # Считаем ошибку в определении координат центра объекта # Получаем координаты центра объекта из спредиктенного значения pred_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) # Производим обратные вычисления для оригинальных значений из y_true для координат центра объекта true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid # Реальные координаты центра bounding_box box_loss_scale = 2 - y_true[l][...,2:3] * y_true[l][...,3:4] # чем больше бокс, тем меньше ошибка # binary_crossentropy для истинного значения и спредиктенного (obect_mask для подсчета только требуемого значения) xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(true_xy, feats[...,0:2], from_logits=True) # Считаем ошибку в определении координат ширины и высоты # Получаем значения ширины и высоты изображения из спредиктенного значения pred_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) # Производим обратные вычисления для оригинальных значений из y_true для ширины и высоты объекта true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) # Оставляем значение высоты и ширины только у тех элементов, где object_mask = 1 true_wh = K.switch(object_mask, true_wh, K.zeros_like(true_wh)) # Считаем значение ошибки в определении высоты и ширины wh_loss = object_mask * box_loss_scale * 0.5 * K.square(true_wh-feats[...,2:4]) # Объединяем значения в один массив pred_box = K.concatenate([pred_xy, pred_wh]) # Считаем ошибку в определении обнаружения какого-либо класса # Для этого вначале надо отсечь все найденные объекты, вероятность которых меньше установленного значения ignore_thresh # Определяем массив, который будет хранить данные о неподходящих значениях ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') # Приводим тип object_mask к типу 'bool' # Функция, определяющая данные, которые требуется игнорировать # Пробегаем по всем элементам пакета (b<m) # Получаем параметры реального bounding_box для текущей ячейки # Считаем IoU реального и спредиктенного # В зависимости от best_iou < ignore_thresh помечаем его как верно распознанный или неверено def loop_body( b, ignore_mask ): # в true_box запишутся первые 4 параметра (центр, высота и ширина объекта) того элемента, значение которого в object_mask_bool равно True true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) # Подсчитываем iou для спредиктенной ограничивающей рамки (pred_box) и оригинальной (true_box) iou = calc_iou(pred_box[b], true_box) # Находим лучшую ограничивающую рамку best_iou = K.max(iou, axis=-1) # Записываем в ignore_mask true или false в зависимости от (best_iou < ignore_thresh) ignore_mask = ignore_mask.write(b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask # Увеличиваем счетчик на единицу и возвращаем ignore_mask # Пробегаем в цикле по всем элементам в пределах значения m (m = batch size) _, ignore_mask = tf.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() # Приводим ignore_mask к тензору ignore_mask = K.expand_dims(ignore_mask, -1) # Добавляем еще одну размерность в конце ignore_mask # Считаем значение ошибки # 1 компонент - для значений, которые были верно спредиктены # 2 компонент - для значения, которые были неверно спредиктены confidence_loss = ( object_mask * K.binary_crossentropy(object_mask, feats[...,4:5], from_logits=True) + (1-object_mask) * K.binary_crossentropy(object_mask, feats[...,4:5], from_logits=True) * ignore_mask ) # Считаем ошибку в определении класса объекта class_loss = object_mask * K.binary_crossentropy(true_class, feats[...,5:], from_logits=True) # Считаем суммарную ошибку xy_loss = K.sum(xy_loss) / batch_size wh_loss = K.sum(wh_loss) / batch_size confidence_loss = K.sum(confidence_loss) / batch_size class_loss = K.sum(class_loss) / batch_size loss += xy_loss + wh_loss + confidence_loss + class_loss return loss # Возвращаем значение ошибки
def hw_flatten(x): # Input shape x: [BATCH, HEIGHT, WIDTH, CHANNELS] # flat the feature volume across the tensor width and height shape = K.int_shape(x) return K.reshape( x, [shape[0], -1, shape[-1]]) # return [BATCH, W*H, CHANNELS]
def call(self, x): #print("!",x[0].shape,x[1].shape) mat = x[0] val = x[1] con = val[:, :, :self.keepconst] var = val[:, :, self.keepconst:] mata = K.constant(self.getmata(self.gs)) matb = K.constant(self.getmatb(self.gs)) for i in range(self.iterations): #print("val",val.shape) valp = K.permute_dimensions(val, (0, 2, 1)) #print("valp",valp.shape) vA = K.dot(valp, mata) vB = K.dot(valp, matb) #print("vA",vA.shape,"vB",vB.shape) feat = K.permute_dimensions(vA, (0, 2, 1)) diff = K.permute_dimensions(vB - vA, (0, 2, 1)) #print("feat",feat.shape,"diff",diff.shape) premlp = K.concatenate((feat, diff), axis=-1) #print("premlp",premlp.shape) postmlp = premlp[:, :, :self.param - self.keepconst] #postmlp=self.mlp(premlp) #print("postmlp",postmlp.shape) ppmlp = K.permute_dimensions(postmlp, (0, 2, 1)) #print("ppmlp",ppmlp.shape) res = K.reshape( ppmlp, (-1, self.param - self.keepconst, self.gs, self.gs)) #print("res",res.shape) #print("mat",mat.shape) resp = K.permute_dimensions(res, (1, 0, 2, 3)) presmat = resp * mat #kinda wondering that this(resp*mat) actually works...tja it actually does not #print("presmat",presmat.shape) resmat = K.permute_dimensions(presmat, (1, 0, 2, 3)) #resmat=K.permute_dimensions(presmat,(1,2,0,3)) print("resmat", resmat.shape) #exit() summ = K.sum(resmat, axis=-1) #/msumtra print("summ", summ.shape) #print("summ",summ.shape) #print("pre",summ.shape) summ /= self.k #print("post",summ.shape) #exit() #print("permuted from",summ.shape) #var=K.permute_dimensions(summ,(2,0,1)) var = K.permute_dimensions( summ, (0, 2, 1) ) #hopefully implemented this into the resmat permute, nope, not diffbar #print("permuted to",var.shape) print("var", var.shape) #print("con",con.shape) #exit() if self.activate: var = self.advrelu(var, self.activation) val = K.concatenate((con, var), axis=-1) #print("concatting",con.shape,var.shape,"=>",val.shape) #print("val",val.shape) continue #print("returning",val.shape) #exit() return val
def train_lstm(n_symbols, embedding_matrix, config): #(batch_size,max_group_nums,max_post_nums,max_seq_len) max_group_nums = config.max_group_nums max_post_nums = config.max_post_nums max_seq_len = config.max_seq_len main_input = Input(shape=(max_group_nums, max_post_nums, max_seq_len)) sub_input = Input(shape=(max_group_nums, social_feature_nums)) embedding_layer_main = Embedding(input_dim=n_symbols, output_dim=config.embeddingSize, weights=[embedding_matrix], input_length=max_seq_len, mask_zero=True)(main_input) dropout_layer_1 = Dropout(config.dropoutKeepProb)(embedding_layer_main) #shape==(batch_size,max_group_nums,max_post_nums,max_seq_len,embeddingSize) bid_GRU_layer_1 = Bidirectional( GRU(32, activation="tanh", recurrent_dropout=0.5, return_sequences=True), merge_mode='concat')(backend.reshape( dropout_layer_1, shape=[-1, max_seq_len, config.embeddingSize])) #shape==(batch_size*max_group_nums*max_post_nums,max_seq_len,64) bn_layer_1 = BatchNormalization()(bid_GRU_layer_1) attention_layer_1 = AttentionLayer()( bn_layer_1) #(batch_size*max_group_nums*max_post_nums,64) bid_GRU_layer_2 = Bidirectional(GRU(32, activation='tanh', dropout=0.5, recurrent_dropout=0.5, return_sequences=True), merge_mode='concat')(backend.reshape( attention_layer_1, shape=[-1, max_post_nums, 64])) #shape==(batch_size*max_group_nums,max_post_nums,64) bn_layer_2 = BatchNormalization()(bid_GRU_layer_2) attention_layer_2 = AttentionLayer()( bn_layer_2) #(batch_size*max_group_nums,64) bid_GRU_layer_3 = Bidirectional(GRU(32, activation='tanh', dropout=0.5, recurrent_dropout=0.5, return_sequences=True), merge_mode='concat')(backend.reshape( attention_layer_2, shape=[-1, max_group_nums, 64])) bn_layer_3 = BatchNormalization()(concatenate([bid_GRU_layer_3, sub_input], axis=2)) attention_layer_3 = AttentionLayer()( bn_layer_3) #(batch_size,64+social_feature_nums) dense_layer_1 = Dense(64, activation="tanh")( sub_input) #(batch_size,max_group_nums,64) social_attention_layer = AttentionLayer()(dense_layer_1) #(batch_size,64) merge_layer = concatenate([attention_layer_3, social_attention_layer], axis=1) #(bathc_size,128+social_attention_layer) dropout_layer = Dropout(config.dropoutKeepProb)(merge_layer) output_layer = Dense(2, activation='softmax')(Dense( 32, activation="tanh")(dropout_layer)) model = Model([main_input, sub_input], output_layer) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def sinkhorn_loss(y_true, y_pred): y_true = K.cast(y_true, y_pred.dtype) y_pred = K.reshape(y_pred, (-1,48,1)) y_true = K.reshape(y_true, (-1,48,1)) cc = tf.concat([y_true, y_pred], axis=2) return K.mean( tf.map_fn(myfunc, cc), axis=(-1) )
def display_yolo(image, yolo_model, score_threshold, iou_threshold,\ train_batch_size =16, grid_h =8, grid_w =8, image_h =256, image_w =256, anchors =[0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828],\ plot = False): ''' Display predictions from YOLO model. Parameters ---------- - file : string list : list of images path. - yolo_model : YOLO model. - score_threshold : threshold used for filtering predicted bounding boxes. - iou_threshold : threshold used for non max suppression. ''' # load image input_image = image[:,:,::-1] input_image = image / 255. input_image = np.expand_dims(input_image, 0) # prediction y_pred = yolo_model.predict_on_batch(input_image) # post prediction process # grid coords tensor coord_x = tf.cast(tf.reshape(tf.tile(tf.range(grid_w), [grid_h]), (1, grid_h, grid_w, 1, 1)), tf.float32) coord_y = tf.transpose(coord_x, (0,2,1,3,4)) coords = tf.tile(tf.concat([coord_x,coord_y], -1), [train_batch_size, 1, 1, 5, 1]) dims = K.cast_to_floatx(K.int_shape(y_pred)[1:3]) dims = K.reshape(dims,(1,1,1,1,2)) # anchors tensor anchors = np.array(anchors) anchors = anchors.reshape(len(anchors) // 2, 2) # pred_xy and pred_wh shape (m, grid_w, grid_h, Anchors, 2) pred_xy = K.sigmoid(y_pred[:,:,:,:,0:2]) pred_xy = (pred_xy + coords) pred_xy = pred_xy / dims pred_wh = K.exp(y_pred[:,:,:,:,2:4]) pred_wh = (pred_wh * anchors) pred_wh = pred_wh / dims # pred_confidence box_conf = K.sigmoid(y_pred[:,:,:,:,4:5]) # pred_class box_class_prob = K.softmax(y_pred[:,:,:,:,5:]) # Reshape pred_xy = pred_xy[0,...] pred_wh = pred_wh[0,...] box_conf = box_conf[0,...] box_class_prob = box_class_prob[0,...] # Convert box coords from x,y,w,h to x1,y1,x2,y2 box_xy1 = pred_xy - 0.5 * pred_wh box_xy2 = pred_xy + 0.5 * pred_wh boxes = K.concatenate((box_xy1, box_xy2), axis=-1) # Filter boxes box_scores = box_conf * box_class_prob box_classes = K.argmax(box_scores, axis=-1) # best score index box_class_scores = K.max(box_scores, axis=-1) # best score prediction_mask = box_class_scores >= score_threshold boxes = tf.boolean_mask(boxes, prediction_mask) scores = tf.boolean_mask(box_class_scores, prediction_mask) classes = tf.boolean_mask(box_classes, prediction_mask) # Scale box to image shape boxes = boxes * image_h # Non Max Supression selected_idx = tf.image.non_max_suppression(boxes, scores, 50, iou_threshold=iou_threshold) boxes = K.gather(boxes, selected_idx) scores = K.gather(scores, selected_idx) classes = K.gather(classes, selected_idx) if plot: # Draw image plt.figure(figsize=(2,2)) f, (ax1) = plt.subplots(1,1, figsize=(10, 10)) ax1.imshow(image[:,:,::-1]) count_detected = boxes.shape[0] ax1.set_title('Detected objects count : {}'.format(count_detected)) for i in range(count_detected): box = boxes[i,...] x = box[0] y = box[1] w = box[2] - box[0] h = box[3] - box[1] classe = classes[i].numpy() if classe == 0: color = (0, 1, 0) else: color = (1, 0, 0) rect = patches.Rectangle((x.numpy(), y.numpy()), w.numpy(), h.numpy(), linewidth = 3, edgecolor=color,facecolor='none') ax1.add_patch(rect) return boxes, scores, classes
def testBackEnd(): x = tf.zeros([3, 4], tf.int32) print('x=', x) x = tf.zeros((3, 4), tf.int32) print('x=', x) return a = tf.constant([1, 2, 3, 4, 5, 6, 7, 8], dtype=tf.float32) a = K.reshape(a, (4, 4)) print('a=', a) a = tf.constant([[1, 2], [3, 4]], dtype=tf.float32) #a = K.abs(-1) print('a=', type(a), a) #a= tf.Tensor(1, shape=(), dtype=int32) # a = a.numpy() # print('a=',a) # a = tf.zeros([0, 3]) # a = tf.concat([a, [[1, 2, 3], [5, 6, 8]]], axis=0) # print('a=',type(a),a) b = tf.constant([[1, 8], [2, 3]], dtype=tf.float32) c = K.square(a - b) print('c=', c) d = K.sum(c, axis=0) print('d=', d) d = K.sum(c, axis=1) print('d=', d) d = K.sum(c, axis=[0, 1]) print('d=', d) return a = K.abs([-1, 0, 9, -10]) print('a=', a) #a= tf.Tensor([ 1 0 9 10], shape=(4,), dtype=int32) a = K.abs(np.array([-1, 0, 9, -10])) print('a=', a) #a= tf.Tensor([ 1 0 9 10], shape=(4,), dtype=int32) a = K.all(np.array([-1, 0, 9, -10]), axis=0) print('a=', a) #a= tf.Tensor(False, shape=(), dtype=bool) a = K.all(np.array([[-1, -2, -1], [-1, 0, 9]]), axis=0) #x axis print('a=', a) #a= tf.Tensor([ True False True], shape=(3,), dtype=bool) a = K.all(np.array([[-1, -2, -1], [-1, 0, 9]]), axis=1) #y axis print('a=', a) #a= tf.Tensor([ True False], shape=(2,), dtype=bool) a = K.arange(1, 100, 10) print( 'a=', a ) #a= tf.Tensor([ 1 11 21 31 41 51 61 71 81 91], shape=(10,), dtype=int32) a = K.sum(np.array([-1, 0, 9, -10])) print('a=', a) #a= tf.Tensor(-2, shape=(), dtype=int32) a = K.square(np.array([-1, 0, 9, -10])) print('a=', a) #a= tf.Tensor([ 1 0 81 100], shape=(4,), dtype=int32) x = K.placeholder(shape=(2, 3)) y = K.placeholder(shape=(3, 4)) xy = K.dot(x, y) shape = K.int_shape(xy) print('xy=', xy) #xy= Tensor("MatMul:0", shape=(2, 4), dtype=float32) print('xy shape=', shape) #xy shape= (2, 4) kvar = K.eye(3) #K.eval(kvar) print('kvar=', kvar) ''' array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]], dtype=float32)> ''' a = np.array([[1, 2], [3, 4]]) a = K.transpose(a) print('a=', a) ''' a= tf.Tensor( [[1 3] [2 4]], shape=(2, 2), dtype=int32) ''' a = K.clip(np.array([-1, 0, 1, 2, 3, 4, 5]), min_value=0, max_value=3) print('a=', a) #a= tf.Tensor([0 0 1 2 3 3 3], shape=(7,), dtype=int32)
def call(self, inputs, **kwargs): input_shape = K.int_shape(inputs) sequence_length, d_model = input_shape[-2:] # output of the "sigmoid halting unit" (not the probability yet) halting = K.sigmoid( K.reshape( K.bias_add( K.dot(K.reshape(inputs, [-1, d_model]), self.halting_kernel), self.halting_biases, data_format='channels_last'), [-1, sequence_length])) if self.zeros_like_halting is None: self.initialize_control_tensors(halting) # useful flags step_is_active = K.greater(self.halt_budget, 0) no_further_steps = K.less_equal(self.halt_budget - halting, 0) # halting probability is equal to # a. halting output if this isn't the last step (we have some budget) # b. to remainder if it is, # c. and zero for the steps that shouldn't be executed at all # (out of budget for them) halting_prob = K.switch( step_is_active, K.switch( no_further_steps, self.remainder, halting), self.zeros_like_halting) self.active_steps += K.switch( step_is_active, self.ones_like_halting, self.zeros_like_halting) # We don't know which step is the last, so we keep updating # expression for the loss with each call of the layer self.ponder_cost = ( self.time_penalty_t * K.mean(self.remainder + self.active_steps)) # Updating "the remaining probability" and the halt budget self.remainder = K.switch( no_further_steps, self.remainder, self.remainder - halting) self.halt_budget -= halting # OK to become negative # If none of the inputs are active at this step, then instead # of zeroing them out by multiplying to all-zeroes halting_prob, # we can simply use a constant tensor of zeroes, which means that # we won't even calculate the output of those steps, saving # some real computational time. if self.zeros_like_input is None: self.zeros_like_input = K.zeros_like( inputs, name='zeros_like_input') # just because K.any(step_is_active) doesn't work in PlaidML any_step_is_active = K.greater( K.sum(K.cast(step_is_active, 'int32')), 0) step_weighted_output = K.switch( any_step_is_active, K.expand_dims(halting_prob, -1) * inputs, self.zeros_like_input) if self.weighted_output is None: self.weighted_output = step_weighted_output else: self.weighted_output += step_weighted_output return [inputs, self.weighted_output]
def build_model(hidden_dim, max_seq_len, vocabulary_size): ## encoder Input and layers encoder_in = Input((max_seq_len, ), dtype='int32', name='encoder_in') ith_str = Input((1, ), dtype='int32', name='ith_str') word = Input((1, ), dtype='int32', name='word') OneHot = Lambda(lambda x: K.one_hot(x, vocabulary_size), name='OneHot') ## building encoder encoder_in_and_word = Concatenate()([ith_str, word, encoder_in]) encoder_GRU = GRU(hidden_dim, return_state=True, return_sequences=True) encoder_out, state = encoder_GRU(OneHot(encoder_in_and_word)) encoder_out_dup = RepeatVector(max_seq_len)(encoder_out[:, -1]) ## decoder Input and layers decoder_in = Input((max_seq_len, ), dtype='int32', name='decoder_in') ith = Input((1, ), dtype='int32', name='ith') decoder_GRU = GRU(hidden_dim, return_sequences=True, return_state=True) decoder_Dense = Dense(vocabulary_size, activation='softmax', name='decoder_out') ## building decoder ith_dup = RepeatVector(max_seq_len)(K.cast(ith, 'float')) word_dup = K.reshape(RepeatVector(max_seq_len)(word), (-1, max_seq_len)) x = Concatenate()( [ith_dup, OneHot(word_dup), OneHot(decoder_in), encoder_out_dup]) x, _ = decoder_GRU(x, initial_state=state) decoder_out = decoder_Dense(x) ## get the specific word gather = K.concatenate( [K.reshape(tf.range(K.shape(decoder_out)[0]), (-1, 1)), ith]) specific_word = tf.gather_nd(decoder_out, gather) specific_word = Lambda(tf.identity, name='word_out')( specific_word ) # Add this layer because the name of tf.gather_nd is too ugly model = Model([encoder_in, decoder_in, ith, ith_str, word], [decoder_out, specific_word]) ## building decoder model given encoder_out and states decoder_in_one_word = Input((1, ), dtype='int32', name='decoder_in_one_word') decoder_state_in = Input((hidden_dim, ), name='decoder_state_in') encoder_out = Input((hidden_dim, ), name='decoder_encoder_out') x = Concatenate()([ K.cast(ith, 'float')[:, tf.newaxis], OneHot(word), OneHot(decoder_in_one_word), encoder_out[:, tf.newaxis] ]) x, decoder_state = decoder_GRU(x, initial_state=decoder_state_in) decoder_out = decoder_Dense(x) decoder_model = Model( [decoder_in_one_word, encoder_out, decoder_state_in, ith, word], [decoder_out, decoder_state]) encoder_in = Input((None, ), dtype='int32') encoder_in_and_word = Concatenate()([ith_str, word, encoder_in]) encoder_out, state = encoder_GRU(OneHot(encoder_in_and_word)) encoder_model = Model([encoder_in, ith_str, word], [encoder_out, state]) return model, encoder_model, decoder_model
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss
def custom_loss(target, output): output **= (1 / hp.T) output /= K.reshape(K.sum(output, axis=1), (-1, 1)) return (hp.T)**2 * K.categorical_crossentropy(target, output)
def echo_sample(inputs, clip=None, d_max=100, batch=100, multiplicative=False, echo_mc = False, replace=False, fx_clip=None, plus_sx=True, calc_log=True, return_noise=False, **kwargs): # kwargs unused # inputs should be specified as list: # [ f(X), s(X) ] with s(X) in log space if calc_log = True # plus_sx = # True if logsigmoid activation for s(X) # False for softplus (equivalent) if isinstance(inputs, list): fx = inputs[0] sx = inputs[-1] else: fx = inputs # TO DO : CALC_LOG currently determines both whether to do log space calculations AND whether sx is a log fx_shape = fx.get_shape() sx_shape = sx.get_shape() z_dim = K.int_shape(fx)[-1] batch_size = batch batch = K.shape(fx)[0] if clip is None: # clip is multiplied times s(x) to ensure that last sampled term: # (clip^d_max)*f(x) < machine precision max_fx = fx_clip if fx_clip is not None else 1.0 clip = (2**(-23)/max_fx)**(1.0/d_max) # fx_clip can be used to restrict magnitude of f(x), not used in paper # defaults to no clipping and M = 1 (e.g. with tanh activation for f(x)) if fx_clip is not None: fx = K.clip(fx, -fx_clip, fx_clip) if not calc_log: sx = tf.multiply(clip,sx) sx = tf.where(tf.abs(sx) < K.epsilon(), K.epsilon()*tf.sign(sx), sx) #raise ValueError('calc_log=False is not supported; sx has to be log_sigmoid') else: # plus_sx based on activation for sx = s(x): # True for log_sigmoid # False for softplus sx = tf.log(clip) + (-1*sx if not plus_sx else sx) #if echo_mc is not None and echo_mc: # use mean centered fx for noise # fx = fx - K.mean(fx, axis = 0, keepdims = True) if replace: # replace doesn't set batch size (using permute_neighbor_indices does) sx = K.batch_flatten(sx) if len(sx_shape) > 2 else sx fx = K.batch_flatten(fx) if len(fx_shape) > 2 else fx inds = K.reshape(random_indices(batch, d_max), (-1, 1)) select_sx = gather_nd_reshape(sx, inds, (-1, d_max, z_dim)) select_fx = gather_nd_reshape(fx, inds, (-1, d_max, z_dim)) if len(sx_shape)>2: select_sx = K.expand_dims(K.expand_dims(select_sx, 2), 2) sx = K.expand_dims(K.expand_dims(sx, 1),1) if len(fx_shape)>2: select_fx = K.expand_dims(K.expand_dims(select_fx, 2), 2) fx = K.expand_dims(K.expand_dims(fx, 1),1) else: # batch x batch x z_dim # for all i, stack_sx[i, :, :] = sx repeat = tf.multiply(tf.ones_like(tf.expand_dims(fx, 0)), tf.ones_like(tf.expand_dims(fx, 1))) stack_fx = tf.multiply(fx, repeat) stack_sx = tf.multiply(sx, repeat) # select a set of dmax examples from original fx / sx for each batch entry inds = indices_without_replacement(batch, d_max) # Alterntive method: but note that permute_neighbor_indices sets the batch_size dimension != None # this necessitates the use of fit_generator, e.g. in training to avoid 'remainder' batches if data_size % batch > 0 #inds = permute_neighbor_indices(batch_size, d_max, replace = replace) select_sx = tf.gather_nd(stack_sx, inds) select_fx = tf.gather_nd(stack_fx, inds) if calc_log: sx_echoes = tf.cumsum(select_sx, axis = 1, exclusive = True) else: sx_echoes = tf.cumprod(select_sx, axis = 1, exclusive = True) # calculates S(x0)S(x1)...S(x_l)*f(x_(l+1)) sx_echoes = tf.exp(sx_echoes) if calc_log else sx_echoes fx_sx_echoes = tf.multiply(select_fx, sx_echoes) # performs the sum over dmax terms to calculate noise noise = tf.reduce_sum(fx_sx_echoes, axis = 1) sx = sx if not calc_log else tf.exp(sx) if multiplicative: # log z according to echo output = tf.exp(fx + tf.multiply(sx, noise)) else: output = fx + tf.multiply(sx, noise) return output if not return_noise else noise
def cat_acc(y_true, y_pred): y_true = K.reshape(y_true, shape=(-1, 7, 37)) y_pred = K.reshape(y_pred, shape=(-1, 7, 37)) return K.mean(tf.keras.metrics.categorical_accuracy(y_true, y_pred))
def body(X, Kernel, n, f, c, kH, kW, w_out, i, j): X_reshape = K.reshape(X[n, c, i:i + kH, j:j + kW], [1, -1]) a = K.sum(Kernel * X_reshape) return a
def top_3_k(y_true, y_pred): # Reshape into 2-d y_true = K.reshape(y_true, (-1, 37)) y_pred = K.reshape(y_pred, (-1, 37)) return K.mean( tf.keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k=3))
def call(self, obj_vecs, pred_vecs, edges): """ Inputs: - obj_vecs: FloatTensor of shape (B, O, D) giving vectors for all objects - pred_vecs: FloatTensor of shape (B, T, D) giving vectors for all predicates - edges: LongTensor of shape (B, T, 2) where edges[k] = [i, j] indicates the presence of a triple [obj_vecs[i], pred_vecs[k], obj_vecs[j]] Outputs: - new_obj_vecs: FloatTensor of shape (B, O, D) giving new vectors for objects - new_pred_vecs: FloatTensor of shape (B, T, D) giving new vectors for predicates """ O, T = K.int_shape(obj_vecs)[1], K.int_shape(pred_vecs)[1] Din, H, Dout = self.input_dim, self.hidden_dim, self.output_dim # Break apart indices for subjects and objects; these have shape (B, T,) s_idx, o_idx = tf.split(edges, 2, axis=2) #shape =(B,T,1) s_idx = K.reshape(s_idx, (-1, T)) #shape =(B,T) o_idx = K.reshape(o_idx, (-1, T)) i = tf.meshgrid(tf.range(self.batch_size), indexing="ij") i = K.reshape(i, (self.batch_size, 1)) i = tf.broadcast_to(i, (self.batch_size, T)) idx_s = tf.stack([i, s_idx], axis=-1) idx_o = tf.stack([i, o_idx], axis=-1) cur_s_vecs = tf.gather_nd(obj_vecs, idx_s) cur_o_vecs = tf.gather_nd(obj_vecs, idx_o) # Get current vectors for triples; shape is (B, T, 3 * Din) # Pass through net1 to get new triple vecs; shape is (B, T, 2 * H + Dout) cur_t_vecs = K.concatenate([cur_s_vecs, pred_vecs, cur_o_vecs], axis=2) new_t_vecs = self.net1(cur_t_vecs) # Break apart into new s, p, and o vecs; s and o vecs have shape (B, T, H) and # p vecs have shape (B, T, Dout) new_s_vecs = new_t_vecs[:, :, :H] new_p_vecs = new_t_vecs[:, :, H:(H + Dout)] new_o_vecs = new_t_vecs[:, :, (H + Dout):(2 * H + Dout)] # Allocate space for pooled object vectors of shape (B, O, H) pooled_obj_vecs = tf.zeros(shape=(self.batch_size, O, H)) shape = K.shape(pooled_obj_vecs) # Use scatter_add to sum vectors for objects that appear in multiple triples; # we first need to expand the indices to have shape (B, T, H) s_idx = K.reshape(s_idx, (-1, T)) o_idx = K.reshape(o_idx, (-1, T)) i = tf.meshgrid(tf.range(self.batch_size), indexing="ij") i = K.reshape(i, (self.batch_size, 1)) i = tf.broadcast_to(i, (self.batch_size, T)) idx_s = tf.stack([i, s_idx], axis=-1) idx_o = tf.stack([i, o_idx], axis=-1) pooled_obj_vecs = tf.scatter_nd(idx_s, new_s_vecs, shape=shape) pooled_obj_vecs = tf.scatter_nd(idx_o, new_o_vecs, shape=shape) # shape(B, O, H) if self.pooling == 'avg': # Figure out how many times each object has appeared, again using # some scatter_add trickery. obj_counts = tf.zeros(shape=(self.batch_size, O, H)) ones = tf.ones(shape=(self.batch_size, T, H)) obj_counts = tf.scatter_nd(idx_s, ones, shape=shape) obj_counts = tf.scatter_nd(idx_o, ones, shape=shape) # Divide the new object vectors by the number of times they # appeared, but first clamp at 1 to avoid dividing by zero; # objects that appear in no triples will have output vector 0 # so this will not affect them. obj_counts = K.clip(obj_counts, min_value=1, max_value=None) pooled_obj_vecs = pooled_obj_vecs / obj_counts # Send pooled object vectors through net2 to get output object vectors, of shape (O, Dout) new_obj_vecs = self.net2(pooled_obj_vecs) return new_obj_vecs, new_p_vecs
def styleTransfer(cData, sData, tData): print(" Building transfer model.") contentTensor = K.variable(cData, dtype=tf.float64) styleTensor = K.variable(sData, dtype=tf.float64) genFlatten = K.placeholder(CONTENT_IMG_H * CONTENT_IMG_W * 3, dtype=tf.float64) genTensor = K.reshape(genFlatten, (1, CONTENT_IMG_H, CONTENT_IMG_W, 3)) inputTensor = K.concatenate([contentTensor, styleTensor, genTensor], axis=0) model = vgg19.VGG19(include_top=False, weights='imagenet', input_tensor=inputTensor) outputDict = dict([(layer.name, layer.output) for layer in model.layers]) print(" VGG19 model loaded.") loss = 0.0 styleLayerNames = [ "block1_conv1", "block2_conv1", "block3_conv1", "block4_conv1", "block5_conv1" ] contentLayerName = "block5_conv2" print(" Calculating content loss.") contentLayer = outputDict[contentLayerName] contentOutput = contentLayer[0, :, :, :] genOutput = contentLayer[2, :, :, :] loss += CONTENT_WEIGHT * contentLoss(contentOutput, genOutput) print(" Calculating style loss.") styleLayerWeight = 1 / len(styleLayerNames) for layerName in styleLayerNames: styleLayer = outputDict[layerName] originalStyleOutput = styleLayer[1, :, :, :] genStyleOutput = styleLayer[2, :, :, :] loss += STYLE_WEIGHT * styleLayerWeight * styleLoss( originalStyleOutput, genStyleOutput) loss = totalLoss(loss) gradients = K.gradients(loss, genFlatten)[0] loss_function = K.function([genFlatten], [loss, gradients]) gen_image_np = tData.flatten() print(gradients) print(loss) print(gen_image_np.shape) print(" Beginning transfer.") for i in range(TRANSFER_ROUNDS): print(" Step %d." % i) #loss_val, grad_val = loss_function([gen_image_np]) #gen_image_np = gen_image_np + 0.1 * grad_val gen_image_np, f, d = fmin_l_bfgs_b(func=loss_function, x0=gen_image_np, maxiter=100, maxls=1200, maxfun=5000000) print(" Loss: %f." % f) img = deprocessImage(gen_image_np) saveFile = "./transfer_" + str(i) + ".jpg" imageio.imwrite(saveFile, img) #Uncomment when everything is working right. print(" Image saved to \"%s\"." % saveFile) print(" Transfer complete.")
def _process_sample(args): _hm, _reg, _wh, _kps, _hm_hp, _hp_offset = args _scores, _inds = tf.math.top_k(_hm, k=k, sorted=True) _classes = K.cast(_inds % cat, 'float32') _inds = K.cast(_inds / cat, 'int32') _xs = K.cast(_inds % width, 'float32') _ys = K.cast(K.cast(_inds / width, 'int32'), 'float32') _wh = K.gather(_wh, _inds) _reg = K.gather(_reg, _inds) _kps = K.gather(_kps, _inds) # shift keypoints by their center _kps_x = _kps[:, ::2] _kps_y = _kps[:, 1::2] _kps_x = _kps_x + K.expand_dims(_xs, -1) # k x J _kps_y = _kps_y + K.expand_dims(_ys, -1) # k x J _kps = K.stack([_kps_x, _kps_y], -1) # k x J x 2 _xs = _xs + _reg[..., 0] _ys = _ys + _reg[..., 1] _x1 = _xs - _wh[..., 0] / 2 _y1 = _ys - _wh[..., 1] / 2 _x2 = _xs + _wh[..., 0] / 2 _y2 = _ys + _wh[..., 1] / 2 # snap center keypoints to the closest heatmap keypoint def _process_channel(args): __kps, __hm_hp = args thresh = 0.1 __hm_scores, __hm_inds = tf.math.top_k(__hm_hp, k=k, sorted=True) __hm_xs = K.cast(__hm_inds % width, 'float32') __hm_ys = K.cast(K.cast(__hm_inds / width, 'int32'), 'float32') __hp_offset = K.gather(_hp_offset, __hm_inds) __hm_xs = __hm_xs + __hp_offset[..., 0] __hm_ys = __hm_ys + __hp_offset[..., 1] mask = K.cast(__hm_scores > thresh, 'float32') __hm_scores = (1. - mask) * -1. + mask * __hm_scores __hm_xs = (1. - mask) * -10000. + mask * __hm_xs __hm_ys = (1. - mask) * -10000. + mask * __hm_ys __hm_kps = K.stack([__hm_xs, __hm_ys], -1) # k x 2 __broadcast_hm_kps = K.expand_dims(__hm_kps, 1) # k x 1 x 2 __broadcast_kps = K.expand_dims(__kps, 0) # 1 x k x 2 dist = K.sqrt( K.sum(K.pow(__broadcast_kps - __broadcast_hm_kps, 2), 2)) # k, k min_dist = K.min(dist, 0) min_ind = K.argmin(dist, 0) __hm_scores = K.gather(__hm_scores, min_ind) __hm_kps = K.gather(__hm_kps, min_ind) mask = (K.cast(__hm_kps[..., 0] < _x1, 'float32') + K.cast(__hm_kps[..., 0] > _x2, 'float32') + K.cast(__hm_kps[..., 1] < _y1, 'float32') + K.cast(__hm_kps[..., 1] > _y2, 'float32') + K.cast(__hm_scores < thresh, 'float32') + K.cast( min_dist > 0.3 * (K.maximum(_wh[..., 0], _wh[..., 1])), 'float32')) mask = K.expand_dims(mask, -1) mask = K.cast(mask > 0, 'float32') __kps = (1. - mask) * __hm_kps + mask * __kps return __kps _kps = K.permute_dimensions(_kps, (1, 0, 2)) # J x k x 2 _hm_hp = K.permute_dimensions(_hm_hp, (1, 0)) # J x -1 _kps = K.map_fn(_process_channel, [_kps, _hm_hp], dtype='float32') _kps = K.reshape(K.permute_dimensions(_kps, (1, 2, 0)), (k, -1)) # k x J * 2 # rescale to image coordinates _x1 = output_stride * _x1 _y1 = output_stride * _y1 _x2 = output_stride * _x2 _y2 = output_stride * _y2 _kps = output_stride * _kps _boxes = K.stack([_x1, _y1, _x2, _y2], -1) _scores = K.expand_dims(_scores, -1) _classes = K.expand_dims(_classes, -1) _detection = K.concatenate([_boxes, _scores, _kps, _classes], -1) return _detection
def call(self, x): x = x[0] #print("!x",x.shape) gs = self.gs k = self.k param = self.param C = self.numericalC #print("gs",gs,"k",k,"param",param,"C",C) for i in range(10): t.print( "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" ) t.print("calling onetopk", self.metrik, output_stream=sys.stdout) #exit() mata = K.constant(self.getmata(gs)) matb = K.constant(self.getmatb(gs)) #print("mata",mata.shape) #print("matb",matb.shape) xp = K.permute_dimensions(x, (0, 2, 1)) #print("xp",xp.shape) xa = K.dot(xp, mata) xb = K.dot(xp, matb) #print("xa",xa.shape,"xb",xb.shape) #exit() isval = xa[:, self.flag, :] * xb[:, self.flag, :] #return isval,isval #print("isval",isval.shape) #exit() ds = xa - xb #print("ds",ds.shape) dsp = K.permute_dimensions(ds, (0, 2, 1)) #print("dsp",dsp.shape) dspsq = K.square(dsp) #print("dspsq",dspsq.shape) #print("self.metrik",self.metrik.shape) delt = K.dot(dspsq, self.metrik) #print("delt",delt.shape) delt = K.reshape( delt, (-1, self.gs * self.gs)) + (1 - isval) * self.emptyconst d = K.reshape(delt, (-1, gs, gs)) #print("d",d.shape) #return d,d #####no self interactions if self.self_interaction == False: one = K.eye(gs) #print("one",one.shape) d += self.self_interaction_const * one #####end no self interactions v, _ = t.math.top_k(-d, k=k) #print("v",v.shape) #return v,v vb = v[:, :, -1] #print("vb",vb.shape) vbs = K.reshape(vb, (-1, gs, 1)) #print("vbs",vbs.shape) su = d + vbs #plus since top_k(-d) #print("su",su.shape) #map anything above 0 to 0 and anything below to 1, also map 0 to 1 #p(-x)=C*d_C(-x) # =d(-C*x) # =1-r(Cx-1)+r(Cx) #experimentally: # r(1-Cx)-r(-Cx) rel = K.relu(1 - C * su) - K.relu(-C * su) #print("rel",rel.shape) rel = K.relu(rel) - K.relu(rel - 1) #return rel,rel dez1 = K.reshape(rel, (-1, self.gs * self.gs)) #print("dez1",dez1.shape) dez2 = dez1 * isval #print("dez2",dez2.shape) rel = K.reshape(dez2, (-1, self.gs, self.gs)) print("rel", rel.shape) numnei = K.sum(rel, axis=-1) print("numnei", numnei.shape) factor = self.k / (numnei + 0.00000000001) #return K.concatenate((numnei,factor),axis=-1),factor#,factor#numnei,numnei print("factor", factor.shape) refactor = K.repeat(factor, self.gs) print("refactor", refactor.shape) refactor = K.permute_dimensions(refactor, (0, 2, 1)) #return refactor,refactor rel = rel * refactor print("rel", rel.shape) #exit() if self.free == 0: return rel, x zero1 = K.zeros_like(x[:, :, 0]) zero1 = K.reshape(zero1, (-1, x.shape[1], 1)) #print("!",zero1.shape) zerolis = [] for i in range(self.free): zerolis.append(zero1) zeros = K.concatenate(zerolis, axis=-1) #print(zeros.shape) return rel, K.concatenate((x, zeros), axis=-1)
def sparse_gather(y_pred, target_indices, task_name): clf_h = Lambda(lambda x: K.reshape(x, (-1, K.int_shape(x)[-1])), name=task_name + '_flatten')(y_pred) return Lambda(lambda x: K.gather(x[0], K.cast(x[1], 'int32')), name=task_name + '_gather')([clf_h, target_indices])
def call(self, x, **kwargs): assert isinstance(x, list), 'SliceLayer input is not a list' return x[0] * K.reshape(x[1], (-1, 1))
def call(self, inputs, q_mask=None, v_mask=None, a_mask=None): """实现多头注意力 q_mask: 对输入的query序列的mask。 主要是将输出结果的padding部分置0。 v_mask: 对输入的value序列的mask。 主要是防止attention读取到padding信息。 a_mask: 对attention矩阵的mask。 不同的attention mask对应不同的应用。 """ q, k, v = inputs[:3] if a_mask: if len(inputs) == 3: a_mask = 'history_only' else: a_mask = inputs[3] if q_mask is not None: if not hasattr(self, 'q_mask_layer'): self.q_mask_layer = search_layer(q, q_mask) q_mask = self.q_mask_layer.output_mask if v_mask is not None: if not hasattr(self, 'v_mask_layer'): self.v_mask_layer = search_layer(v, v_mask) v_mask = self.v_mask_layer.output_mask # Pooling if self.pool_size > 1: is_self_attention = (q is k is v) q_in_len = K.shape(q)[1] q = sequence_masking(q, q_mask, 0) q = divisible_temporal_padding(q, self.pool_size) q = pool1d(q, self.pool_size, self.pool_size, pool_mode='avg') if is_self_attention: k = v = q else: k = sequence_masking(k, v_mask, 0) k = divisible_temporal_padding(k, self.pool_size) k = pool1d(k, self.pool_size, self.pool_size, pool_mode='avg') v = sequence_masking(v, v_mask, 0) v = divisible_temporal_padding(v, self.pool_size) v = pool1d(v, self.pool_size, self.pool_size, pool_mode='avg') if v_mask is not None: v_mask = v_mask[:, ::self.pool_size] if a_mask is not None and not is_string(a_mask): a_mask = a_mask[..., ::self.pool_size, ::self.pool_size] # 线性变换 qw = self.q_dense(q) kw = self.k_dense(k) vw = self.v_dense(v) # 形状变换 qw = K.reshape(qw, (-1, K.shape(q)[1], self.heads, self.key_size)) kw = K.reshape(kw, (-1, K.shape(k)[1], self.heads, self.key_size)) vw = K.reshape(vw, (-1, K.shape(v)[1], self.heads, self.head_size)) # Attention a = tf.einsum('bjhd,bkhd->bhjk', qw, kw) # 相对位置编码 if self.max_relative_position is not None: q_idxs = K.arange(0, K.shape(q)[1], dtype='int32') q_idxs = K.expand_dims(q_idxs, 1) v_idxs = K.arange(0, K.shape(v)[1], dtype='int32') v_idxs = K.expand_dims(v_idxs, 0) pos_ids = v_idxs - q_idxs pos_ids = K.clip(pos_ids, -self.max_relative_position, self.max_relative_position) pos_ids = pos_ids + self.max_relative_position pos_embeddings = K.gather(self.relative_embeddings, pos_ids) a = a + tf.einsum('bjhd,jkd->bhjk', qw, pos_embeddings) # Attention(续) a = a / self.key_size ** 0.5 a = sequence_masking(a, v_mask, 1, -1) if a_mask is not None: if is_string(a_mask): ones = K.ones_like(a[:1, :1]) a_mask = (ones - tf.linalg.band_part(ones, -1, 0)) * 1e12 a = a - a_mask else: a = a - (1 - a_mask) * 1e12 a = K.softmax(a) # 完成输出 o = tf.einsum('bhjk,bkhd->bjhd', a, vw) if self.max_relative_position is not None: o = o + tf.einsum('bhjk,jkd->bjhd', a, pos_embeddings) o = K.reshape(o, (-1, K.shape(o)[1], self.out_dim)) o = self.o_dense(o) # 恢复长度 if self.pool_size > 1: o = K.repeat_elements(o, self.pool_size, 1)[:, :q_in_len] # 返回结果 o = sequence_masking(o, q_mask, 0) return o
def call(self, x, mask=None): assert (len(x) == 2) img = x[0] rois = x[1] input_shape = K.shape(img) outputs = [] for roi_idx in range(self.num_rois): x = rois[0, roi_idx, 0] y = rois[0, roi_idx, 1] w = rois[0, roi_idx, 2] h = rois[0, roi_idx, 3] row_length = w / float(self.pool_size) col_length = h / float(self.pool_size) num_pool_regions = self.pool_size # NOTE: the RoiPooling implementation differs between theano and tensorflow due to the lack of a resize op # in theano. The theano implementation is much less efficient and leads to long compile times if self.dim_ordering == 'th': for jy in range(num_pool_regions): for ix in range(num_pool_regions): x1 = x + ix * row_length x2 = x1 + row_length y1 = y + jy * col_length y2 = y1 + col_length x1 = K.cast(x1, 'int32') x2 = K.cast(x2, 'int32') y1 = K.cast(y1, 'int32') y2 = K.cast(y2, 'int32') x2 = x1 + K.maximum(1, x2 - x1) y2 = y1 + K.maximum(1, y2 - y1) new_shape = [ input_shape[0], input_shape[1], y2 - y1, x2 - x1 ] x_crop = img[:, :, y1:y2, x1:x2] xm = K.reshape(x_crop, new_shape) pooled_val = K.max(xm, axis=(2, 3)) outputs.append(pooled_val) elif self.dim_ordering == 'tf': x = K.cast(x, 'int32') y = K.cast(y, 'int32') w = K.cast(w, 'int32') h = K.cast(h, 'int32') rs = tf.image.resize_images(img[:, y:y + h, x:x + w, :], (self.pool_size, self.pool_size)) outputs.append(rs) final_output = K.concatenate(outputs, axis=0) final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) if self.dim_ordering == 'th': final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3)) else: final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) return final_output