def add_input(self, x, condition=None): """compute the output distribution (represented by its parameters) for a step. It works similarily with the `forward` method but in a `step-in-step-out` fashion. Args: x (Variable): shape(B, T=1), dtype float32, a step of the input waveform. condition (Variable, optional): shape(B, C_cond, T=1), dtype float32, a step of the upsampled condition. Defaults to None. Returns: Variable: shape(B, T=1, C_output), dtype float32, the parameter of the output distributions. """ # Causal Conv if self.loss_type == "softmax": x = F.clip(x, min=-1., max=0.99999) x = quantize(x, self.output_dim) x = self.embed(x) # (B, T, C), T=1 else: x = F.unsqueeze(x, axes=[-1]) # (B, T, 1), T=1 x = self.embed(x) # (B, T, C) x = F.transpose(x, perm=[0, 2, 1]) # Residual & Skip-conenection & linears z = self.resnet.add_input(x, condition) z = F.transpose(z, [0, 2, 1]) z = F.relu(self.proj2(F.relu(self.proj1(z)))) # (B, T, C) # Output y = self.proj3(z) return y
def forward(self, x, condition=None): """compute the output distribution (represented by its parameters). Args: x (Variable): shape(B, T), dtype float32, the input waveform. condition (Variable, optional): shape(B, C_cond, T), dtype float32, the upsampled condition. Defaults to None. Returns: Variable: shape(B, T, C_output), dtype float32, the parameter of the output distributions. """ # Causal Conv if self.loss_type == "softmax": x = F.clip(x, min=-1., max=0.99999) x = quantize(x, self.output_dim) x = self.embed(x) # (B, T, C) else: x = F.unsqueeze(x, axes=[-1]) # (B, T, 1) x = self.embed(x) # (B, T, C) x = F.transpose(x, perm=[0, 2, 1]) # (B, C, T) # Residual & Skip-conenection & linears z = self.resnet(x, condition) z = F.transpose(z, [0, 2, 1]) z = F.relu(self.proj2(F.relu(self.proj1(z)))) y = self.proj3(z) return y
def forward(self, x): if self.inplace: x.set_value(relu(x)) return x else: y = relu(x) return y
def proto_net(x): x = P.conv2d(x, 256, filter_size=(3, 3), stride=1, padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="proto_net.0.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="proto_net.0.bias")) x = P.relu(x) x = P.conv2d(x, 256, filter_size=(3, 3), stride=1, padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="proto_net.2.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="proto_net.2.bias")) x = P.relu(x) x = P.conv2d(x, 256, filter_size=(3, 3), stride=1, padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="proto_net.4.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="proto_net.4.bias")) x = P.relu(x) x = P.resize_bilinear(x, scale=float(2)) x = P.relu(x) x = P.conv2d(x, 256, filter_size=(3, 3), stride=1, padding=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="proto_net.8.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="proto_net.8.bias")) x = P.relu(x) x = P.conv2d(x, 32, filter_size=(1, 1), stride=1, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0.0, 0.01), name="proto_net.10.weight"), bias_attr=ParamAttr(initializer=fluid.initializer.Constant(0.0), name="proto_net.10.bias")) return x
def forward(self, x): residual = x out = self.conv1(x) out = self.bn1(out) out = relu(out) out = self.conv2(out) out = self.bn2(out) if self.downsample is not None: residual = self.downsample(x) # print("out shape is ", out.shape) # print("residual shape is ", residual.shape) # print("now out shape is", out.shape) # print("use frame sub") frame_sub_tensor = self.frameSubLayer(out) frame_sub_tensor = relu(frame_sub_tensor) # print("frame sub tensor shape is ", frame_sub_tensor.shape) out = fluid.layers.concat([out, frame_sub_tensor], axis=1) out += residual out = relu(out) return out
def forward(self, encoder_output): """ Predict the duration of each character. Args: encoder_output (Variable): shape(B, T, C), dtype float32, the encoder output. Returns: out (Variable): shape(B, T, C), the output of duration predictor. """ # encoder_output.shape(N, T, C) out = layers.transpose(encoder_output, [0, 2, 1]) out = self.conv1(out) out = layers.transpose(out, [0, 2, 1]) out = layers.dropout(layers.relu(self.layer_norm1(out)), self.dropout, dropout_implementation='upscale_in_train') out = layers.transpose(out, [0, 2, 1]) out = self.conv2(out) out = layers.transpose(out, [0, 2, 1]) out = layers.dropout(layers.relu(self.layer_norm2(out)), self.dropout, dropout_implementation='upscale_in_train') out = layers.relu(self.linear(out)) out = layers.squeeze(out, axes=[-1]) return out
def forward(self, input): if self.inplace: input.set_value(layers.relu(input)) return input else: y = layers.relu(input) return y
def link_predictor(self, x, y): """ siamese network""" feat = x * y feat = L.fc(feat, size=self.hidden_size, name="link_predictor_1") feat = L.relu(feat) feat = L.fc(feat, size=self.hidden_size, name="link_predictor_2") feat = L.relu(feat) self.logits = L.fc(feat, size=1, act="sigmoid", name="link_predictor_logits")
def train_forward(self): entity_embedding, relation_embedding, transfer_matrix = self.create_share_variables( ) pos_head = self.lookup_table(self.train_pos_input[:, 0], entity_embedding) pos_tail = self.lookup_table(self.train_pos_input[:, 2], entity_embedding) pos_rel = self.lookup_table(self.train_pos_input[:, 1], relation_embedding) neg_head = self.lookup_table(self.train_neg_input[:, 0], entity_embedding) neg_tail = self.lookup_table(self.train_neg_input[:, 2], entity_embedding) neg_rel = self.lookup_table(self.train_neg_input[:, 1], relation_embedding) rel_matrix = layers.reshape( self.lookup_table(self.train_pos_input[:, 1], transfer_matrix), [-1, self.hidden_size, self.hidden_size]) pos_head_trans = self.matmul_with_expend_dims(pos_head, rel_matrix) pos_tail_trans = self.matmul_with_expend_dims(pos_tail, rel_matrix) rel_matrix_neg = layers.reshape( self.lookup_table(self.train_neg_input[:, 1], transfer_matrix), [-1, self.hidden_size, self.hidden_size]) neg_head_trans = self.matmul_with_expend_dims(neg_head, rel_matrix_neg) neg_tail_trans = self.matmul_with_expend_dims(neg_tail, rel_matrix_neg) pos_score = self._algorithm(pos_head_trans, pos_rel, pos_tail_trans) neg_score = self._algorithm(neg_head_trans, neg_rel, neg_tail_trans) pos = layers.reduce_sum(layers.abs(pos_score), -1, keep_dim=False) neg = layers.reduce_sum(layers.abs(neg_score), -1, keep_dim=False) neg = layers.reshape(neg, shape=[-1, 1], inplace=True) loss = layers.reduce_mean(layers.relu(pos - neg + self.margin)) return [loss]
def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 2) self.feed_vars.append( fluid.data( name="data2", shape=[128, 128], dtype=dtype)) # subgraph with 2 op nodes tmp_0 = self.feed_vars[0] * self.feed_vars[1] tmp_1 = layers.cast(tmp_0, dtype="float16") zero = layers.fill_constant(shape=[128], dtype="float16", value=0) # TODO(xreki): fix precision problem when using softmax of float16. # tmp_2 = layers.softmax(tmp_1) tmp_2 = layers.elementwise_add(tmp_1, zero) tmp_3 = layers.mul(tmp_0, self.feed_vars[2]) # subgraph with 4 op nodes tmp_3 = layers.cast(tmp_2, dtype="float16") tmp_4 = layers.relu(tmp_1 + tmp_3) tmp_5 = layers.cast(tmp_4, dtype=dtype) tmp_3 = layers.cast(tmp_2, dtype=dtype) self.append_gradients(tmp_5) self.num_fused_ops = 4 self.fetch_list = [tmp_5, self.grad(tmp_0)]
def __call__(self, input_tensor): x = self.conv1(input_tensor) x = self.conv2(x) x = self.conv3(x) x = L.elementwise_add(x=x, y=input_tensor, act=None) x = L.relu(x) return x
def forward(self, input): """ Compute feed forward network result. Args: input (Variable): shape(B, T, C), dtype float32, the input value. Returns: output (Variable): shape(B, T, C), the result after FFN. """ x = layers.transpose(input, [0, 2, 1]) #FFN Networt x = self.w_2(layers.relu(self.w_1(x))) # dropout x = layers.dropout(x, self.dropout, dropout_implementation='upscale_in_train') x = layers.transpose(x, [0, 2, 1]) # residual connection x = x + input #layer normalization output = self.layer_norm(x) return output
def forward(self, input, class_id, input_class_emb=False): if isinstance(input, list): codes = [input[0]] codes += [ input[2 * i + 1:2 * i + 3] for i in range(len(input) // 2) ] else: codes = layers.split(input, self.num_split, 1) if not input_class_emb: class_emb = self.embed_y(class_id) # 128 else: class_emb = class_id out = self.noise_fc(codes[0]) out = layers.transpose(layers.reshape(out, (out.shape[0], 4, 4, -1)), (0, 3, 1, 2)) for i, (code, gblock) in enumerate(zip(codes[1:], self.blocks)): if isinstance(input, list): condition = [layers.concat([c, class_emb], 1) for c in code] else: condition = layers.concat([code, class_emb], 1) out = gblock(out, condition) out = self.output_layer_bn(out) out = layers.relu(out) out = self.output_layer_conv(out) return (layers.tanh(out) + 1) / 2
def intersect(box_a, box_b): # 相交区域的面积 """ We resize both tensors to [A,B,2] without new malloc: [A,2] -> [A,1,2] -> [A,B,2] [B,2] -> [1,B,2] -> [A,B,2] Then we compute the area of intersect between box_a and box_b. Args: box_a: (tensor) bounding boxes, Shape: [n,A,4]. box_b: (tensor) bounding boxes, Shape: [n,B,4]. Return: (tensor) intersection area, Shape: [n,A,B]. """ n = P.shape(box_a)[0] A = P.shape(box_a)[1] B = P.shape(box_b)[1] box_a = P.reshape(box_a, (n, A, 1, 4)) box_b = P.reshape(box_b, (n, 1, B, 4)) expand_box_a = P.expand(box_a, [1, 1, B, 1]) expand_box_b = P.expand(box_b, [1, A, 1, 1]) # 相交矩形的左上角坐标、右下角坐标 left_up = P.elementwise_max(expand_box_a[:, :, :, :2], expand_box_b[:, :, :, :2]) right_down = P.elementwise_min(expand_box_a[:, :, :, 2:], expand_box_b[:, :, :, 2:]) inter_section = P.relu(right_down - left_up) return inter_section[:, :, :, 0] * inter_section[:, :, :, 1]
def build_program(self, dtype): with fluid.program_guard(self.main_program, self.startup_program): self.feed_vars = self._prepare_feed_vars([32, 128], dtype, 3) self.feed_vars.append( fluid.data(name="data3", shape=[128, 32], dtype=dtype)) # subgraph with 3 op node tmp_0 = self.feed_vars[0] + self.feed_vars[1] tmp_1 = layers.relu(self.feed_vars[2] * tmp_0) # subgraph with 2 op nodes tmp_2 = layers.relu(layers.sigmoid(self.feed_vars[3])) tmp_3 = layers.mul(tmp_1, tmp_2) self.append_gradients(tmp_3) self.num_fused_ops = 2 self.fetch_list = [tmp_3, self.grad(tmp_1)]
def label_embed_input(self, feature): label = F.data(name="label", shape=[None, 1], dtype="int64") label_idx = F.data(name='label_idx', shape=[None], dtype="int64") label = L.reshape(label, shape=[-1]) label = L.gather(label, label_idx, overwrite=False) lay_norm_attr = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=1)) lay_norm_bias = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=0)) feature = L.layer_norm(feature, name='layer_norm_feature_input1', param_attr=lay_norm_attr, bias_attr=lay_norm_bias) embed_attr = F.ParamAttr( initializer=F.initializer.NormalInitializer(loc=0.0, scale=1.0)) embed = F.embedding(input=label, size=(self.out_size, self.embed_size), param_attr=embed_attr) lay_norm_attr = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=1)) lay_norm_bias = F.ParamAttr( initializer=F.initializer.ConstantInitializer(value=0)) embed = L.layer_norm(embed, name='layer_norm_feature_input2', param_attr=lay_norm_attr, bias_attr=lay_norm_bias) embed = L.relu(embed) feature_label = L.gather(feature, label_idx, overwrite=False) feature_label = feature_label + embed feature = L.scatter(feature, label_idx, feature_label, overwrite=True) return feature
def _compute_pc(self, x, mask): if mask is not None: x -= (1 - mask) * 1e10 x = layers.reduce_max(x, dim=1, keep_dim=True) x = layers.relu(self.pc_fc1(x)) x = layers.sigmoid(self.pc_fc2(x)) return x
def forward(self, x): # print(x[0, :, :, :, :].shape) x = self.conv1(x) # print("conv1 shape", x.shape) x = self.bn1(x) x = relu(x) if not self.no_max_pool: x = pool3d(x, pool_size=3, pool_type='max', pool_stride=2, pool_padding=1, data_format="NCDHW") # print("conv1 shape", x.shape) x = self.layer1(x) # print("layer1 shape", x.shape) x = self.layer2(x) # print("layer2 shape", x.shape) x = self.layer3(x) # print("layer3 shape", x.shape) x = self.layer4(x) # print("layer4 shape", x.shape) x = adaptive_pool3d(x, pool_size=[1, 1, 1], pool_type='avg') x = flatten(x) x = self.fc(x) return x
def mlp(self, features, name): h = features dim = features.shape[-1] dim_list = [dim * 2, dim] for i in range(2): h = L.fc(h, size=dim_list[i], name="%s_fc_%s" % (name, i), act=None) if self.args.norm_type == "layer_norm": log.info("norm_type is %s" % self.args.norm_type) h = L.layer_norm( h, begin_norm_axis=1, param_attr=F.ParamAttr( name="norm_scale_%s_%s" % (name, i), initializer=F.initializer.Constant(1.0)), bias_attr=F.ParamAttr( name="norm_bias_%s_%s" % (name, i), initializer=F.initializer.Constant(0.0)), ) else: log.info("using batch_norm") h = L.batch_norm(h) h = pgl.layers.graph_norm(self.graph_wrapper, h) h = L.relu(h) return h
def forward(self): """forward""" features_list = [self.gw.node_feat["attr"]] for i in range(self.num_layers): h = gin(self.gw, features_list[i], hidden_size=self.hidden_size, activation="relu", name="gin_%s" % (i), init_eps=0.0, train_eps=self.train_eps) h = fl.batch_norm(h) h = fl.relu(h) features_list.append(h) output = 0 for i, h in enumerate(features_list): pooled_h = pgl.layers.graph_pooling(self.gw, h, self.pool_type) drop_h = fl.dropout(pooled_h, self.dropout_prob, dropout_implementation="upscale_in_train") output += fl.fc(drop_h, size=self.num_class, act=None, param_attr=fluid.ParamAttr(name="final_fc_%s" % (i))) # calculate loss self.loss = fl.softmax_with_cross_entropy(output, self.labels) self.loss = fl.reduce_mean(self.loss) self.acc = fl.accuracy(fl.softmax(output), self.labels)
def bbox_ciou(self, boxes1_x0y0x1y1, boxes2_x0y0x1y1): ''' 计算ciou = iou - p2/c2 - av :param boxes1: (batch_size, num_priors, 4) pred_x0y0x1y1 :param boxes2: (batch_size, num_priors, 4) label_x0y0x1y1 :return: ''' # 得到中心点坐标、宽高 boxes1 = P.concat( [(boxes1_x0y0x1y1[:, :, :2] + boxes1_x0y0x1y1[:, :, 2:]) * 0.5, boxes1_x0y0x1y1[:, :, 2:] - boxes1_x0y0x1y1[:, :, :2]], axis=-1) boxes2 = P.concat( [(boxes2_x0y0x1y1[:, :, :2] + boxes2_x0y0x1y1[:, :, 2:]) * 0.5, boxes2_x0y0x1y1[:, :, 2:] - boxes2_x0y0x1y1[:, :, :2]], axis=-1) # 两个矩形的面积 boxes1_area = (boxes1_x0y0x1y1[:, :, 2] - boxes1_x0y0x1y1[:, :, 0]) * ( boxes1_x0y0x1y1[:, :, 3] - boxes1_x0y0x1y1[:, :, 1]) boxes2_area = (boxes2_x0y0x1y1[:, :, 2] - boxes2_x0y0x1y1[:, :, 0]) * ( boxes2_x0y0x1y1[:, :, 3] - boxes2_x0y0x1y1[:, :, 1]) # 相交矩形的左上角坐标、右下角坐标 left_up = P.elementwise_max(boxes1_x0y0x1y1[:, :, :2], boxes2_x0y0x1y1[:, :, :2]) right_down = P.elementwise_min(boxes1_x0y0x1y1[:, :, 2:], boxes2_x0y0x1y1[:, :, 2:]) # 相交矩形的面积inter_area。iou inter_section = P.relu(right_down - left_up) inter_area = inter_section[:, :, 0] * inter_section[:, :, 1] union_area = boxes1_area + boxes2_area - inter_area iou = inter_area / union_area # 包围矩形的左上角坐标、右下角坐标 enclose_left_up = P.elementwise_min(boxes1_x0y0x1y1[:, :, :2], boxes2_x0y0x1y1[:, :, :2]) enclose_right_down = P.elementwise_max(boxes1_x0y0x1y1[:, :, 2:], boxes2_x0y0x1y1[:, :, 2:]) # 包围矩形的对角线的平方 enclose_wh = enclose_right_down - enclose_left_up enclose_c2 = P.pow(enclose_wh[:, :, 0], 2) + P.pow( enclose_wh[:, :, 1], 2) # 两矩形中心点距离的平方 p2 = P.pow(boxes1[:, :, 0] - boxes2[:, :, 0], 2) + P.pow( boxes1[:, :, 1] - boxes2[:, :, 1], 2) # 增加av。分母boxes2[:, :, 3]可能为0,所以加了极小的常数防止nan atan1 = P.atan(boxes1[:, :, 2] / (boxes1[:, :, 3] + 1e-9)) atan2 = P.atan(boxes2[:, :, 2] / (boxes2[:, :, 3] + 1e-9)) v = 4.0 * P.pow(atan1 - atan2, 2) / (math.pi**2) a = v / (1 - iou + v) ciou = iou - 1.0 * p2 / enclose_c2 - 1.0 * a * v return ciou
def __call__(self, x, residual=None): if residual is None: residual = x out = self.conv1(x) out = self.conv2(out) out = L.elementwise_add(x=out, y=residual, act=None) out = L.relu(out) return out
def __call__(self, *x): children = x x = L.concat(list(x), axis=1) x = self.conv(x) if self.residual: x += children[0] x = L.relu(x) return x
def __call__(self, user_feat, pos_item_feat, neg_item_feat): pos = L.reduce_sum(user_feat * pos_item_feat, -1, keep_dim=True) # [B, 1] all_pos = all_gather(pos) # [B * n, 1] all_neg_item_feat = all_gather(neg_item_feat) # [B * n, 1] all_user_feat = all_gather(user_feat) # [B * n, 1] neg1 = L.matmul(user_feat, all_neg_item_feat, transpose_y=True) # [B, B * n] neg2 = L.matmul(all_user_feat, neg_item_feat, transpose_y=True) # [B *n, B] loss1 = L.reduce_mean(L.relu(neg1 - pos + self.config.margin)) loss2 = L.reduce_mean(L.relu(neg2 - all_pos + self.config.margin)) loss = loss1 + loss2 return loss
def forward(self, input_): """ Convert linear spectrum to Mel spectrum. Args: input_ (Variable): shape(B, C, T), dtype float32, the sequentially input. Returns: out (Variable): shape(B, C, T), the CBHG output. """ conv_list = [] conv_input = input_ for i, (conv, batchnorm ) in enumerate(zip(self.conv_list, self.batchnorm_list)): conv_input = self._conv_fit_dim(conv(conv_input), i + 1) conv_input = layers.relu(batchnorm(conv_input)) conv_list.append(conv_input) conv_cat = layers.concat(conv_list, axis=1) conv_pool = self.max_pool(conv_cat)[:, :, :-1] conv_proj = layers.relu( self.batchnorm_proj_1( self._conv_fit_dim(self.conv_projection_1(conv_pool)))) conv_proj = self.batchnorm_proj_2( self._conv_fit_dim(self.conv_projection_2(conv_proj))) + input_ # conv_proj.shape = [N, C, T] highway = layers.transpose(conv_proj, [0, 2, 1]) highway = self.highway(highway) # highway.shape = [N, T, C] fc_forward = self.fc_forward1(highway) fc_reverse = self.fc_reverse1(highway) out_forward = self.gru_forward1(fc_forward) out_reverse = self.gru_reverse1(fc_reverse) out = layers.concat([out_forward, out_reverse], axis=-1) fc_forward = self.fc_forward2(out) fc_reverse = self.fc_reverse2(out) out_forward = self.gru_forward2(fc_forward) out_reverse = self.gru_reverse2(fc_reverse) out = layers.concat([out_forward, out_reverse], axis=-1) out = layers.transpose(out, [0, 2, 1]) return out
def forward(self, x): """ Prepare network input. Args: x (Variable): shape(B, T, C), dtype float32, the input value. Returns: output (Variable): shape(B, T, C), the result after pernet. """ x = layers.dropout(layers.relu(self.linear1(x)), self.dropout_rate, dropout_implementation='upscale_in_train') output = layers.dropout(layers.relu(self.linear2(x)), self.dropout_rate, dropout_implementation='upscale_in_train') return output
def bbox_iou(boxes1, boxes2): ''' 预测框 boxes1 (?, grid_h, grid_w, 3, 1, 4),神经网络的输出(tx, ty, tw, th)经过了后处理求得的(bx, by, bw, bh) 图片中所有的gt boxes2 (?, 1, 1, 1, 150, 4) paddle里不支持省略号,boxes1_area = boxes1[..., 2] * boxes1[..., 3] 冒号要写完 ''' boxes1_area = boxes1[:, :, :, :, :, 2] * boxes1[:, :, :, :, :, 3] # 所有格子的3个预测框的面积 boxes2_area = boxes2[:, :, :, :, :, 2] * boxes2[:, :, :, :, :, 3] # 所有ground truth的面积 # (x, y, w, h)变成(x0, y0, x1, y1) boxes1 = P.concat([ boxes1[:, :, :, :, :, :2] - boxes1[:, :, :, :, :, 2:] * 0.5, boxes1[:, :, :, :, :, :2] + boxes1[:, :, :, :, :, 2:] * 0.5 ], axis=-1) boxes2 = P.concat([ boxes2[:, :, :, :, :, :2] - boxes2[:, :, :, :, :, 2:] * 0.5, boxes2[:, :, :, :, :, :2] + boxes2[:, :, :, :, :, 2:] * 0.5 ], axis=-1) # 所有格子的3个预测框 分别 和 150个ground truth 计算iou。 所以left_up和right_down的shape = (?, grid_h, grid_w, 3, 150, 2) expand_boxes1 = P.expand(boxes1, [1, 1, 1, 1, P.shape(boxes2)[4], 1 ]) # 不同于pytorch和tf,boxes1和boxes2都要扩展为相同shape expand_boxes2 = P.expand( boxes2, [1, P.shape(boxes1)[1], P.shape(boxes1)[2], P.shape(boxes1)[3], 1, 1]) # 不同于pytorch和tf,boxes1和boxes2都要扩展为相同shape left_up = P.elementwise_max(expand_boxes1[:, :, :, :, :, :2], expand_boxes2[:, :, :, :, :, :2]) # 相交矩形的左上角坐标 right_down = P.elementwise_min(expand_boxes1[:, :, :, :, :, 2:], expand_boxes2[:, :, :, :, :, 2:]) # 相交矩形的右下角坐标 inter_section = P.relu( right_down - left_up) # 相交矩形的w和h,是负数时取0 (?, grid_h, grid_w, 3, 150, 2) inter_area = inter_section[:, :, :, :, :, 0] * inter_section[:, :, :, :, :, 1] # 相交矩形的面积 (?, grid_h, grid_w, 3, 150) expand_boxes1_area = P.expand(boxes1_area, [1, 1, 1, 1, P.shape(boxes2)[4]]) expand_boxes2_area = P.expand(boxes2_area, [ 1, P.shape(expand_boxes1_area)[1], P.shape(expand_boxes1_area)[2], P.shape(expand_boxes1_area)[3], 1 ]) union_area = expand_boxes1_area + expand_boxes2_area - inter_area # union_area (?, grid_h, grid_w, 3, 150) iou = 1.0 * inter_area / union_area # iou (?, grid_h, grid_w, 3, 150) return iou
def neighbor_aggregator(self, feature): for i in range(3): feature = L.fc(feature, size=self.hidden_size, name="simple_mlp_{}".format(i)) #feature = L.batch_norm(feature) feature = L.relu(feature) feature = L.dropout(feature, dropout_prob=self.drop_rate) return feature
def mlp(self, feat): for i in range(3): feat = L.fc(node, size=self.hidden_size, name="simple_mlp_{}".format(i)) feat = L.batch_norm(feat) feat = L.relu(feat) feat = L.dropout(feat, dropout_prob=0.5) return feat
def forward(self, features): pred_objectness_logits = [] pred_anchor_deltas = [] for x in features: t = L.relu(self.conv(x)) pred_objectness_logits.append(self.objectness_logits(t)) pred_anchor_deltas.append(self.anchor_deltas(t)) return pred_objectness_logits, pred_anchor_deltas