def divergence(self, x, y):

        tx = layers.pad(x[:, :, :, :], (0, 0, 0, 0, 0, 0, 1, 0))
        ty = layers.pad(y[:, :, :, :], (0, 0, 0, 0, 1, 0, 0, 0))
        grad_x = self.conv4px(tx)
        grad_y = self.conv4py(ty)

        return grad_x + grad_y
    def forward_grad(self, x):
        grad_x = self.conv4u(layers.pad(x, (0, 0, 0, 0, 0, 0, 0, 1)))
        tmp = layers.unstack(grad_x, axis=2)
        tmp[-1] = tmp[-1] - tmp[-1]  #tmp[-1]=0

        grad_x = layers.stack(tmp, axis=2)

        grad_y = self.conv4v(layers.pad(x, (0, 0, 0, 0, 0, 1, 0, 0)))

        tmp = layers.unstack(grad_y, axis=2)
        tmp[-1] = tmp[-1] - tmp[-1]  # tmp[-1]=0
        grad_y = layers.stack(tmp, axis=2)
        return grad_x, grad_y
Ejemplo n.º 3
0
def epoch_predict(env, args, model, loader):
    """Predict in one epoch"""
    model.eval()

    arcs, rels, probs = [], [], []
    for words, feats in loader():
        # ignore the first token of each sentence
        tmp_words = layers.pad(words[:, 1:],
                               paddings=[0, 0, 1, 0],
                               pad_value=args.pad_index)
        mask = tmp_words != args.pad_index
        lens = nn.reduce_sum(mask, -1)
        s_arc, s_rel = model(words, feats)
        arc_preds, rel_preds = decode(args, s_arc, s_rel, mask)
        arcs.extend(
            layers.split(nn.masked_select(arc_preds, mask),
                         lens.numpy().tolist()))
        rels.extend(
            layers.split(nn.masked_select(rel_preds, mask),
                         lens.numpy().tolist()))
        if args.prob:
            arc_probs = nn.index_sample(layers.softmax(s_arc, -1),
                                        layers.unsqueeze(arc_preds, -1))
            probs.extend(
                layers.split(
                    nn.masked_select(layers.squeeze(arc_probs, axes=[-1]),
                                     mask),
                    lens.numpy().tolist()))
    arcs = [seq.numpy().tolist() for seq in arcs]
    rels = [env.REL.vocab[seq.numpy().tolist()] for seq in rels]
    probs = [[round(p, 3) for p in seq.numpy().tolist()] for seq in probs]

    return arcs, rels, probs
Ejemplo n.º 4
0
def epoch_evaluate(args, model, loader, puncts):
    """Evaluate in one epoch"""
    model.eval()

    total_loss, metric = 0, Metric()

    for words, feats, arcs, rels in loader():
        # ignore the first token of each sentence
        tmp_words = layers.pad(words[:, 1:],
                               paddings=[0, 0, 1, 0],
                               pad_value=args.pad_index)
        mask = tmp_words != args.pad_index

        s_arc, s_rel = model(words, feats)
        loss = loss_function(s_arc, s_rel, arcs, rels, mask)
        arc_preds, rel_preds = decode(args, s_arc, s_rel, mask)
        # ignore all punctuation if not specified
        if not args.punct:
            punct_mask = layers.reduce_all(
                layers.expand(layers.unsqueeze(words, -1),
                              (1, 1, puncts.shape[0])) != layers.expand(
                                  layers.reshape(puncts, (1, 1, -1)),
                                  (*words.shape, 1)),
                dim=-1)
            mask = layers.logical_and(mask, punct_mask)

        metric(arc_preds, rel_preds, arcs, rels, mask)
        total_loss += loss.numpy().item()

    total_loss /= len(loader)

    return total_loss, metric
Ejemplo n.º 5
0
def epoch_train(args, model, optimizer, loader, epoch):
    """Train in one epoch"""
    model.train()
    total_loss = 0
    for batch, (words, feats, arcs, rels) in enumerate(loader(), start=1):
        model.clear_gradients()
        # ignore the first token of each sentence
        tmp_words = layers.pad(words[:, 1:],
                               paddings=[0, 0, 1, 0],
                               pad_value=args.pad_index)
        mask = tmp_words != args.pad_index
        s_arc, s_rel = model(words, feats)
        loss = loss_function(s_arc, s_rel, arcs, rels, mask)
        if args.use_data_parallel:
            loss = model.scale_loss(loss)
            loss.backward()
            model.apply_collective_grads()
        else:
            loss.backward()
        optimizer.minimize(loss)

        total_loss += loss.numpy().item()
        logging.info(
            f"epoch: {epoch}, batch: {batch}/{math.ceil(len(loader) / args.nranks)}, batch_size: {len(words)}, loss: {loss.numpy().item():.4f}"
        )
    total_loss /= len(loader)
    return total_loss
Ejemplo n.º 6
0
 def pad(self, input_ele):
     max_len = max([input_ele[i].shape[0] for i in range(len(input_ele))])
     out_list = []
     for i in range(len(input_ele)):
         pad_len = max_len - input_ele[i].shape[0]
         one_batch_padded = layers.pad(input_ele[i], [0, pad_len, 0, 0],
                                       pad_value=0.0)
         out_list.append(one_batch_padded)
     out_padded = layers.stack(out_list)
     return out_padded
Ejemplo n.º 7
0
def prepare_encoder_decoder(src_word,
                            src_pos,
                            src_vocab_size,
                            src_emb_dim,
                            src_max_len,
                            dropout_rate=0.,
                            word_emb_param_name=None,
                            training=True,
                            pos_enc_param_name=None,
                            is_src=True,
                            params_type="normal"):
    """Add word embeddings and position encodings.
    The output tensor has a shape of:
    [batch_size, max_src_length_in_batch, d_model].
    This module is used at the bottom of the encoder stacks.
    """
    assert params_type == "fixed" or params_type == "normal" or params_type == "new"
    pre_name = "densedense"

    if params_type == "fixed":
        pre_name = "fixed_densefixed_dense"
    elif params_type == "new":
        pre_name = "new_densenew_dense"

    src_word_emb = layers.embedding(
        src_word,
        size=[src_vocab_size, src_emb_dim],
        padding_idx=DenseModelHyperParams.bos_idx,  # set embedding of bos to 0
        param_attr=fluid.ParamAttr(name=pre_name + word_emb_param_name,
                                   initializer=fluid.initializer.Normal(
                                       0.,
                                       src_emb_dim**-0.5)))  #, is_sparse=True)
    if not is_src and training:
        src_word_emb = layers.pad(src_word_emb, [0, 0, 1, 0, 0, 0])
    src_word_emb = layers.scale(x=src_word_emb, scale=src_emb_dim**0.5)
    src_pos_enc = layers.embedding(src_pos,
                                   size=[src_max_len, src_emb_dim],
                                   param_attr=fluid.ParamAttr(
                                       trainable=False,
                                       name=pre_name + pos_enc_param_name))
    src_pos_enc.stop_gradient = True
    enc_input = src_word_emb + src_pos_enc
    return layers.dropout(enc_input,
                          dropout_prob=dropout_rate,
                          seed=DenseModelHyperParams.dropout_seed,
                          is_test=False,
                          dropout_implementation='upscale_in_train'
                          ) if dropout_rate else enc_input
    def forward(self, x):
        '''
        bt,c,w,h=x.shape
        tmp=layers.reshape(x,shape=[48,-1,c,w,h])
        res=layers.reshape(tmp[:,:-1],shape=[-1,c,w,h])'''
        x = self.bottleneck(x)
        inp = self.norm_img(x)
        bt, c, w, h = inp.shape
        inp = layers.reshape(inp, shape=[self.batch_size, -1, c, w, h])

        x = inp[:, :-1]
        y = inp[:, 1:]

        x = layers.reshape(layers.transpose(x, perm=[0, 2, 1, 3, 4]),
                           shape=[-1, c, h, w])
        y = layers.reshape(layers.transpose(y, perm=[0, 2, 1, 3, 4]),
                           shape=[-1, c, h, w])
        u1 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')
        u2 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')

        l_t = self.lamda * self.theta
        taut = self.tau / (self.theta + 1e-12)

        grad2_x = self.conv4Ix(layers.pad(y, (0, 0, 0, 0, 0, 0, 1, 1)))

        tmp = layers.unstack(grad2_x, axis=3)
        tmp[-1] = 0.5 * (x[:, :, :, -1] - x[:, :, :, -2])
        tmp[0] = 0.5 * (x[:, :, :, 1] - x[:, :, :, 0])
        grad2_x = layers.stack(tmp, axis=3)

        grad2_y = self.conv4Iy(layers.pad(y, (0, 0, 0, 0, 1, 1, 0, 0)))
        tmp = layers.unstack(grad2_y, axis=2)
        tmp[-1] = 0.5 * (x[:, :, -1, :] - x[:, :, -2, :])
        tmp[0] = 0.5 * (x[:, :, 1, :] - x[:, :, 0, :])
        grad2_y = layers.stack(tmp, axis=2)

        p11 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')
        p12 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')
        p21 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')
        p22 = fluid.dygraph.to_variable(np.zeros(x.shape)).astype('float32')

        gsqx = grad2_x**2
        gsqy = grad2_y**2

        grad = gsqx + gsqy + 1e-12

        rho_c = y - grad2_x * u1 - grad2_y * u2 - x
        for i in range(self.n_iter):
            rho = rho_c + grad2_x * u1 + grad2_y * u2 + 1e-12

            mask1 = (rho < -l_t * grad).detach().astype('float32')
            mask1.stop_gradient = True
            tmp1 = l_t * grad2_x
            tmp2 = l_t * grad2_y
            v1 = tmp1 * mask1
            v2 = tmp2 * mask1

            mask2 = (rho > l_t * grad).detach().astype('float32')
            mask2.stop_gradient = True
            v1 = -tmp1 * mask2 + v1
            v2 = -tmp2 * mask2 + v2

            mask3 = fluid.layers.ones(
                x.shape, dtype='float32') - (mask1 + mask2 - mask1 * mask2)
            mask3.stop_gradient = True
            tmp1 = (-rho / grad) * grad2_x
            tmp2 = (-rho / grad) * grad2_y

            v1 = tmp1 * mask3 + v1
            v2 = tmp2 * mask3 + v2

            del rho
            del mask1
            del mask2
            del mask3

            v1 += u1
            v2 += u2

            u1 = v1 + self.theta * self.divergence(p11, p12)
            u2 = v2 + self.theta * self.divergence(p21, p22)

            del v1
            del v2
            u1 = u1
            u2 = u2

            u1x, u1y = self.forward_grad(u1)
            u2x, u2y = self.forward_grad(u2)

            p11 = (p11 + taut * u1x) / (
                1. + taut * layers.sqrt(u1x**2 + u1y**2 + 1e-12))
            p12 = (p12 + taut * u1y) / (
                1. + taut * layers.sqrt(u1x**2 + u1y**2 + 1e-12))
            p21 = (p21 + taut * u2x) / (
                1. + taut * layers.sqrt(u2x**2 + u2y**2 + 1e-12))
            p22 = (p22 + taut * u2y) / (
                1. + taut * layers.sqrt(u2x**2 + u2y**2 + 1e-12))
            del u1x
            del u1y
            del u2x
            del u2y

        flow = layers.concat([u1, u2], axis=1)

        #  flow = layers.transpose(layers.reshape(flow,shape=[b,t,c*2,h,w]),perm=[0,2,1,3,4])
        flow = self.unbottleneck(flow)
        flow = self.bn(flow) if self.bn else flow
        return flow
Ejemplo n.º 9
0
    def forward(self, x, cls=None):
        # x is BxTxCxHxW 注意与2p1d网络输入格式不同
        # spatio-temporal video data
        b, t, c, h, w = x.shape
        # need to view it is B*TxCxHxW for 2D CNN
        # important to keep batch and time axis next to
        # eachother, so a simple view without tranposing is possible
        # 此处存疑,因为torch.dataloader作batch打包录入数据时,各类别是混起来的,而且同类视频间也不方便混起来的,因为要计算表示层光流
        x = reshape(x, shape=[b * t, c, h, w])

        x = self.conv1(x)
        x = self.maxpool(x)
        x = self.layer1(x)
        x = self.layer2(x)

        # 插入FCF层

        # res = x  # F.avg_pool2d(x, (3, 1), 1, 0)  # x[:,:,1:-1].contiguous() F表示torch.nn.functional
        res = x
        x = self.flow_cmp(x)
        x = self.flow_layer.norm_img(x)

        # compute flow for 0,1,...,T-1
        #        and       1,2,...,T
        b_t, c, h, w = x.shape
        x = reshape(x, shape=[b, -1, c, h, w])  #将x拆解为BTCHW,后续要对T维度操作
        # 根据有无x=x+res,下面两句二选一
        x = pad(x, paddings=[0, 0, 0, 1, 0, 0, 0, 0, 0, 0])
        # t -= 1  # Representation Flow操作后,t少一帧
        u, v = self.flow_layer(reshape(x[:, :-1], shape=[-1, c, h, w]),
                               reshape(x[:, 1:], shape=[-1, c, h, w]))

        x = concat([u, v], axis=1)

        x = self.flow_conv(x)

        # Flow-of-flow
        x = self.flow_cmp2(x)
        x = self.flow_layer.norm_img(x)
        # compute flow for 0,1,...,T-1
        #        and       1,2,...,T
        b_t, c, h, w = x.shape
        x = reshape(x, shape=[b, -1, c, h, w])
        # 根据有无x=x+res,下面两句二选一
        x = pad(x, paddings=[0, 0, 0, 1, 0, 0, 0, 0, 0, 0])
        # t -= 1  # Representation Flow操作后,t少一帧
        u, v = self.flow_layer2(reshape(x[:, :-1], shape=[-1, c, h, w]),
                                reshape(x[:, 1:], shape=[-1, c, h, w]))
        x = concat([u, v], axis=1)

        x = self.flow_conv2(x)
        x = self.bnf(x)

        x = x + res
        x = leaky_relu(x)

        #

        x = self.layer3(x)
        x = self.layer4(x)

        #print(x.size())
        x = self.avgpool(x)

        x = reshape(x, shape=[x.shape[0], -1])
        x = self.dropout(x)

        # currently making dense, per-frame predictions
        x = self.fc(x)

        # so view as BxTxClass
        x = reshape(x, shape=[b, t, -1])
        # mean-pool over time
        x = reduce_mean(x, dim=1)  # temporal维度合并

        # return BxClass prediction
        if cls is not None:
            acc = float(accuracy(input=x, label=cls))
            return x, acc
        else:
            return x
    def get_grad_w(self, w, b, grad):
        conv_in = self.x
        conv_out = self.y
        N, C, H, W = conv_in.shape
        N, out_C, out_H, out_W = conv_out.shape
        # w  [out_C, in_C, kH, kW]
        out_C, in_C, kH, kW = w.shape
        stride = self.stride
        padding = self.padding
        pad_H = H + padding * 2
        pad_W = W + padding * 2

        # loss对w的偏导数。
        conv_in = paddle.to_tensor(conv_in)
        pad_x = L.pad(
            conv_in,
            paddings=[0, 0, 0, 0, padding, padding, padding, padding],
            pad_value=0.0)  # [N, in_C, pad_H, pad_W]
        pad_x = L.transpose(pad_x, [2, 3, 0, 1])  # [pad_H, pad_W, N, in_C]
        if self.special_inds_dw is None:  # 只会做一次,即初始化。
            self.special_inds_dw = []
            # 卷积核滑动,只会在H和W两个方向上滑动
            for i in range(out_H):  # i是纵坐标
                for j in range(out_W):  # j是横坐标
                    ori_x = j * stride  # 卷积核在pad_x中的横坐标,等差数列,公差是stride
                    ori_y = i * stride  # 卷积核在pad_x中的纵坐标,等差数列,公差是stride
                    for i2 in range(kH):  # i2是纵坐标
                        for j2 in range(kW):  # j2是横坐标
                            point_x = ori_x + j2
                            point_y = ori_y + i2
                            self.special_inds_dw.append([point_y, point_x])
            # self.special_inds_dw.shape == [out_H*out_W*kH*kW, 2]
        special_inds_dw = paddle.to_tensor(self.special_inds_dw)
        special_inds_dw = L.cast(special_inds_dw, 'int32')
        special_inds_dw.stop_gradient = True
        x_in = L.gather_nd(pad_x,
                           special_inds_dw)  # [out_H*out_W*kH*kW, N, in_C]
        x_in = L.reshape(x_in, (out_H, out_W, kH, kW, N, in_C))
        x_in = L.transpose(
            x_in, [4, 5, 0, 1, 2, 3])  # [N, in_C, out_H, out_W, kH, kW]
        x_in = L.reshape(
            x_in,
            (N, in_C, out_H * out_W, kH, kW))  # [N, in_C, out_H*out_W, kH, kW]
        x_in = L.unsqueeze(x_in, 1)  # [N, 1, in_C, out_H*out_W, kH, kW]
        grad_r = L.reshape(grad, (N, out_C, 1, out_H * out_W, 1,
                                  1))  # [N, out_C, 1, out_H*out_W, 1, 1]

        # 乘法
        # dw = x_in * grad_r                                           # [N, out_C, in_C, out_H*out_W, kH, kW]
        # dL_dWeight = L.reduce_sum(dw, dim=[0, 3])                    # [out_C, in_C, kH, kW]

        # 根据https://github.com/miemie2013/Pure_Python_Deep_Learning  1x1conv.py里的口诀“13”,知道可以转换成1x1卷积。
        # 把x_in看作是卷积输入图像,该图像的批大小为in_C, 该图像的通道数为N*out_H*out_W
        # 把grad_r看作是卷积核,该卷积核的个数为out_C, 该卷积核的in_C为N*out_H*out_W
        x_in = L.transpose(
            x_in, [2, 1, 0, 3, 4, 5])  # [in_C,  1, N, out_H*out_W, kH, kW]
        x_in = L.reshape(
            x_in,
            (in_C, N * out_H * out_W, kH, kW))  # [in_C, N*out_H*out_W, kH, kW]
        grad_r = L.transpose(
            grad_r, [1, 2, 0, 3, 4, 5])  # [out_C, 1, N, out_H*out_W, 1, 1]
        grad_r = L.reshape(
            grad_r,
            (out_C, N * out_H * out_W, 1, 1))  # [out_C, N*out_H*out_W, 1, 1]
        dw = F.conv2d(x_in, grad_r, None)  # [in_C, out_C, kH, kW]
        dL_dWeight = L.transpose(dw, [1, 0, 2, 3])  # [out_C, in_C, kH, kW]
        return dL_dWeight
 def build(self,
           boxNum=64,
           learning_rate=0.001,
           beta1=0.9,
           beta2=0.999,
           epsilon=1e-08,
           regularization=None,
           lazy_mode=False):
     dataInput = pfl.data(name='data_input',
                          shape=[3, 416, 416],
                          dtype='float32')
     gtbox = pfl.data(name='data_gtbox', shape=[boxNum, 4], dtype='float32')
     gtlabel = pfl.data(name='data_gtlabel', shape=[boxNum], dtype='int32')
     anchors = [10, 14, 23, 27, 37, 58, 81, 82, 135, 169, 344, 319]
     layer0_output = _DBL(input=dataInput,
                          num_filters=16,
                          filter_size=3,
                          name='layer0')
     layer1_output = pfl.pool2d(input=layer0_output,
                                pool_size=2,
                                pool_type='max',
                                pool_stride=2,
                                name='layer1_max')
     layer2_output = _DBL(input=layer1_output,
                          num_filters=32,
                          filter_size=3,
                          name='layer2')
     layer3_output = pfl.pool2d(input=layer2_output,
                                pool_size=2,
                                pool_type='max',
                                pool_stride=2,
                                name='layer3_max')
     layer4_output = _DBL(input=layer3_output,
                          num_filters=64,
                          filter_size=3,
                          name='layer4')
     layer5_output = pfl.pool2d(input=layer4_output,
                                pool_size=2,
                                pool_type='max',
                                pool_stride=2,
                                name='layer5_max')
     layer6_output = _DBL(input=layer5_output,
                          num_filters=128,
                          filter_size=3,
                          name='layer6')
     layer7_output = pfl.pool2d(input=layer6_output,
                                pool_size=2,
                                pool_type='max',
                                pool_stride=2,
                                name='layer7_max')
     layer8_output = _DBL(input=layer7_output,
                          num_filters=256,
                          filter_size=3,
                          name='layer8')
     layer9_output = pfl.pool2d(input=layer8_output,
                                pool_size=2,
                                pool_type='max',
                                pool_stride=2,
                                name='layer9_max')
     layer10_output = _DBL(input=layer9_output,
                           num_filters=512,
                           filter_size=3,
                           name='layer10')
     layer11_output = pfl.pool2d(input=pfl.pad(
         layer10_output, paddings=[0, 0, 0, 0, 0, 1, 0, 1]),
                                 pool_size=2,
                                 pool_type='max',
                                 pool_stride=1,
                                 name='layer11_max')
     layer12_output = _DBL(input=layer11_output,
                           num_filters=1024,
                           filter_size=3,
                           name='layer12')
     layer13_output = _DBL(input=layer12_output,
                           num_filters=256,
                           filter_size=1,
                           padding=0,
                           name='layer13')
     layer14_output = _DBL(input=layer13_output,
                           num_filters=512,
                           filter_size=3,
                           name='layer14')
     layer15_output = pfl.conv2d(input=layer14_output,
                                 num_filters=18,
                                 filter_size=1,
                                 name='layer15_conv')
     # layer16_yolo -> -1 x 18 x 13 x 13
     yolo1_loss = pfl.yolov3_loss(name='yolo1_loss',
                                  x=layer15_output,
                                  gtbox=gtbox,
                                  gtlabel=gtlabel,
                                  anchors=anchors,
                                  anchor_mask=[3, 4, 5],
                                  class_num=1,
                                  ignore_thresh=0.5,
                                  downsample_ratio=32)
     # layer17_route_13
     layer18_output = _DBL(input=layer13_output,
                           num_filters=128,
                           filter_size=1,
                           padding=0,
                           name='layer18')
     layer19_output = pfl.expand(layer18_output,
                                 expand_times=[1, 1, 2, 2],
                                 name='layer19_upsample')
     # layer20_route_19_8
     layer20_output = pfl.concat([layer19_output, layer8_output],
                                 axis=1,
                                 name='layer20_concat')
     layer21_output = _DBL(layer20_output,
                           num_filters=256,
                           filter_size=3,
                           name='layer21')
     layer22_output = pfl.conv2d(input=layer21_output,
                                 num_filters=18,
                                 filter_size=1,
                                 name='layer22_conv')
     # layer23_yolo -> -1 x 18 x 26 x 26
     yolo2_loss = pfl.yolov3_loss(name='yolo2_loss',
                                  x=layer22_output,
                                  gtbox=gtbox,
                                  gtlabel=gtlabel,
                                  anchors=anchors,
                                  anchor_mask=[0, 1, 2],
                                  class_num=1,
                                  ignore_thresh=0.5,
                                  downsample_ratio=16)
     loss = pfl.reduce_mean(pfl.elementwise_add(yolo1_loss, yolo2_loss),
                            name="loss_output")
     optimizer = fluid.optimizer.AdamOptimizer(
         learning_rate=learning_rate,
         beta1=beta1,
         beta2=beta2,
         epsilon=epsilon,
         regularization=regularization,
         lazy_mode=lazy_mode)
     optimizer.minimize(loss)
     self._netOutput1, self._netOutput2 = layer15_output, layer22_output
     self._loss = loss
     self._trainExe = fluid.Executor(
         fluid.CUDAPlace(0)) if self._USE_CUDA else fluid.Executor(
             fluid.CPUPlace())