Esempio n. 1
0
    def test_uniform_initializer(self, dtype="float32"):
        """
        In dygraph mode, we can use initializer directly to initialize a tensor.
        """
        paddle.disable_static()

        tensor = paddle.zeros([1024, 1024, 16])
        tensor.stop_gradient = False
        self.assertTrue(np.allclose(np.zeros((1024, 1024, 16)), tensor.numpy()))

        uniform_ = paddle.nn.initializer.Uniform()
        uniform_(tensor)

        self.assertEqual(tensor.stop_gradient,
                         False)  # stop_gradient is not changed

        hist, prob = output_hist(tensor.numpy())

        self.assertTrue(
            np.allclose(
                hist, prob, rtol=0, atol=1e-3), "hist: " + str(hist))

        paddle.enable_static()
Esempio n. 2
0
    def inv_transform(self, prob_map):
        if self._object_roi is None:
            self._prev_probs = prob_map.numpy()
            return prob_map

        assert prob_map.shape[0] == 1
        rmin, rmax, cmin, cmax = self._object_roi
        prob_map = paddle.nn.functional.interpolate(prob_map,
                                                    size=(rmax - rmin + 1,
                                                          cmax - cmin + 1),
                                                    mode='bilinear',
                                                    align_corners=True)

        if self._prev_probs is not None:
            new_prob_map = paddle.zeros(shape=self._prev_probs.shape,
                                        dtype=prob_map.dtype)
            new_prob_map[:, :, rmin:rmax + 1, cmin:cmax + 1] = prob_map
        else:
            new_prob_map = prob_map

        self._prev_probs = new_prob_map.numpy()

        return new_prob_map
Esempio n. 3
0
def sample_bbox(matches,
                match_labels,
                gt_classes,
                batch_size_per_im,
                fg_fraction,
                num_classes,
                use_random=True,
                is_cascade=False):

    n_gt = gt_classes.shape[0]
    if n_gt == 0:
        # No truth, assign everything to background
        gt_classes = paddle.ones(matches.shape, dtype='int32') * num_classes
        #return matches, match_labels + num_classes
    else:
        gt_classes = paddle.gather(gt_classes, matches)
        gt_classes = paddle.where(match_labels == 0,
                                  paddle.ones_like(gt_classes) * num_classes,
                                  gt_classes)
        gt_classes = paddle.where(match_labels == -1,
                                  paddle.ones_like(gt_classes) * -1,
                                  gt_classes)
    if is_cascade:
        index = paddle.arange(matches.shape[0])
        return index, gt_classes
    rois_per_image = int(batch_size_per_im)

    fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image,
                                        fg_fraction, num_classes, use_random)
    if fg_inds.shape[0] == 0 and bg_inds.shape[0] == 0:
        # fake output labeled with -1 when all boxes are neither
        # foreground nor background
        sampled_inds = paddle.zeros([1], dtype='int32')
    else:
        sampled_inds = paddle.concat([fg_inds, bg_inds])
    sampled_gt_classes = paddle.gather(gt_classes, sampled_inds)
    return sampled_inds, sampled_gt_classes
Esempio n. 4
0
    def __call__(self, bbox_head_out, rois, im_shape, scale_factor):
        bbox_pred, cls_prob = bbox_head_out
        roi, rois_num = rois
        origin_shape = im_shape / scale_factor
        scale_list = []
        origin_shape_list = []
        for idx in range(self.batch_size):
            scale = scale_factor[idx, :][0]
            rois_num_per_im = rois_num[idx]
            expand_scale = paddle.expand(scale, [rois_num_per_im, 1])
            scale_list.append(expand_scale)
            expand_im_shape = paddle.expand(origin_shape[idx, :],
                                            [rois_num_per_im, 2])
            origin_shape_list.append(expand_im_shape)

        scale = paddle.concat(scale_list)
        origin_shape = paddle.concat(origin_shape_list)

        bbox = roi / scale
        bbox = ops.box_coder(prior_box=bbox,
                             prior_box_var=self.prior_box_var,
                             target_box=bbox_pred,
                             code_type=self.code_type,
                             box_normalized=self.box_normalized,
                             axis=self.axis)
        # TODO: Updata box_clip
        origin_h = paddle.unsqueeze(origin_shape[:, 0] - 1, axis=1)
        origin_w = paddle.unsqueeze(origin_shape[:, 1] - 1, axis=1)
        zeros = paddle.zeros(origin_h.shape, 'float32')
        x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros)
        y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros)
        x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros)
        y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros)
        bbox = paddle.stack([x1, y1, x2, y2], axis=-1)

        bboxes = (bbox, rois_num)
        return bboxes, cls_prob
Esempio n. 5
0
    def forward(self, input_ids, token_type_ids=None, position_ids=None):
        if position_ids is None:
            ones = paddle.ones_like(input_ids, dtype="int64")
            seq_length = paddle.cumsum(ones, axis=-1)

            content_len = paddle.shape(input_ids)[1] - self.cls_num
            position_ids = paddle.concat([
                paddle.zeros(shape=[self.cls_num], dtype="int64"),
                paddle.linspace(1, content_len, content_len, dtype="int64")
            ])
            position_ids.stop_gradient = True
        if token_type_ids is None:
            token_type_ids = paddle.zeros_like(input_ids, dtype="int64")

        input_embedings = self.word_embeddings(input_ids)
        position_embeddings = self.position_embeddings(position_ids)
        token_type_embeddings = self.token_type_embeddings(token_type_ids)

        embeddings = input_embedings + token_type_embeddings + position_embeddings

        embeddings = self.layer_norm(embeddings)
        embeddings = self.dropout(embeddings)

        return embeddings
Esempio n. 6
0
    def forward(self, x):

        out = self.bn1(x)
        out = self.conv1(out)

        out = self.bn2(out)
        out = self.relu(out)
        out = self.conv2(out)

        out = self.bn3(out)
        out = self.relu(out)
        out = self.conv3(out)

        out = self.bn4(out)

        if self.downsample is not None:
            shortcut = self.downsample(x)
            featuremap_size = shortcut.shape[2:4]
        else:
            shortcut = x
            featuremap_size = out.shape[2:4]

        batch_size = out.shape[0]
        residual_channel = out.shape[1]
        shortcut_channel = shortcut.shape[1]

        if residual_channel != shortcut_channel:
            padding = paddle.zeros([
                batch_size, residual_channel - shortcut_channel,
                featuremap_size[0], featuremap_size[1]
            ])
            out += paddle.concat([shortcut, padding], 1)
        else:
            out += shortcut

        return out
Esempio n. 7
0
    def forward(self, voxel_features, coords, batch_size):
        batch_canvas = []
        for batch_itt in range(batch_size):
            canvas = paddle.zeros((self.nchannels, self.nx * self.ny),
                                  dtype=voxel_features.dtype)

            batch_mask = coords[:, 0] == batch_itt
            if batch_mask.any().numpy()[0] == True:
                this_coords = mask_select(coords, batch_mask)
                indices = this_coords[:, 2] * self.nx + this_coords[:, 3]
                indices = indices.astype("int64")
                voxels = mask_select(voxel_features, batch_mask)
                voxels = voxels.t()

                canvas = select_change(canvas, voxels, indices)
            else:
                pass
            batch_canvas.append(canvas)

        batch_canvas = paddle.stack(batch_canvas, 0)
        batch_canvas = batch_canvas.reshape(
            (batch_size, self.nchannels, self.ny, self.nx))

        return batch_canvas
Esempio n. 8
0
def make_grid(tensor, nrow=8, normalize=False, range=None, scale_each=False):
    """Make a grid of images.
    Args:
        tensor (Tensor or list): 4D mini-batch Tensor of shape (B x C x H x W)
            or a list of images all of the same size.
        nrow (int, optional): Number of images displayed in each row of the grid.
            The final grid size is ``(B / nrow, nrow)``. Default: ``8``.
        normalize (bool, optional): If True, shift the image to the range (0, 1),
            by the min and max values specified by :attr:`range`. Default: ``False``.
        range (tuple, optional): tuple (min, max) where min and max are numbers,
            then these numbers are used to normalize the image. By default, min and max
            are computed from the tensor.
        scale_each (bool, optional): If ``True``, scale each image in the batch of
            images separately rather than the (min, max) over all images. Default: ``False``.
    """
    if not (isinstance(tensor, paddle.Tensor) or
            (isinstance(tensor, list)
             and all(isinstance(tensor, t) for t in tensor))):
        raise TypeError('tensor or list of tensors expected, got {}'.format(
            type(tensor)))

    # if list of tensors, convert to a 4D mini-batch Tensor
    if isinstance(tensor, list):
        tensor = paddle.stack(tensor, 0)

    if tensor.dim() == 2:  # single image H x W
        tensor = tensor.unsqueeze(0)
    if tensor.dim() == 3:  # single image
        if tensor.shape[0] == 1:  # if single-channel, convert to 3-channel
            tensor = paddle.concat([tensor, tensor, tensor], 0)
        tensor = tensor.unsqueeze(0)

    if tensor.dim() == 4 and tensor.shape[1] == 1:  # single-channel images
        tensor = paddle.concat([tensor, tensor, tensor], 1)

    if normalize is True:
        tensor = tensor.astype(tensor.dtype)  # avoid modifying tensor in-place
        if range is not None:
            assert isinstance(range, tuple), \
                "range has to be a tuple (min, max) if specified. min and max are numbers"

        def norm_ip(img, min, max):
            img[:] = img.clip(min=min, max=max)
            img[:] = (img - min) / (max - min + 1e-5)

        def norm_range(t, range):
            if range is not None:
                norm_ip(t, range[0], range[1])
            else:
                norm_ip(t, float(t.min()), float(t.max()))

        if scale_each is True:
            for t in tensor:  # loop over mini-batch dimension
                norm_range(t, range)
        else:
            norm_range(tensor, range)

    if tensor.shape[0] == 1:
        return tensor.squeeze(0)

    # make the mini-batch of images into a grid
    nmaps = tensor.shape[0]
    xmaps = min(nrow, nmaps)
    ymaps = int(math.ceil(float(nmaps) / xmaps))
    height, width = int(tensor.shape[2]), int(tensor.shape[3])
    num_channels = tensor.shape[1]
    canvas = paddle.zeros((num_channels, height * ymaps, width * xmaps),
                          dtype=tensor.dtype)
    k = 0
    for y in irange(ymaps):
        for x in irange(xmaps):
            if k >= nmaps:
                break
            canvas[:, y * height:(y + 1) * height,
                   x * width:(x + 1) * width] = tensor[k]
            k = k + 1
    return canvas
Esempio n. 9
0
    def __init__(self,
                 img_size=224,
                 patch_size=4,
                 in_chans=3,
                 num_classes=1000,
                 embed_dim=96,
                 depths=[2, 2, 6, 2],
                 num_heads=[3, 6, 12, 24],
                 window_size=4,
                 mlp_ratio=4.,
                 qkv_bias=True,
                 qk_scale=None,
                 drop_rate=0.,
                 attn_drop_rate=0.,
                 drop_path_rate=0.1,
                 norm_layer=nn.LayerNorm,
                 ape=False,
                 patch_norm=True,
                 use_checkpoint=False,
                 **kwargs):
        super().__init__()

        self.num_classes = num_classes
        self.num_layers = len(depths)
        self.embed_dim = embed_dim
        self.ape = ape
        self.patch_norm = patch_norm
        self.num_features = int(embed_dim * 2**(self.num_layers - 1))
        self.mlp_ratio = mlp_ratio

        # split image into non-overlapping patches
        self.patch_embed = PatchEmbed(
            img_size=img_size,
            patch_size=patch_size,
            in_chans=in_chans,
            embed_dim=embed_dim,
            norm_layer=norm_layer if self.patch_norm else None)
        num_patches = self.patch_embed.num_patches
        patches_resolution = self.patch_embed.patches_resolution
        self.patches_resolution = patches_resolution

        # absolute position embedding
        if self.ape:
            attr = ParamAttr(initializer=nn.initializer.Constant(0))
            self.absolute_pos_embed = self.create_parameter(shape=(1,
                                                                   num_patches,
                                                                   embed_dim),
                                                            attr=attr)
            paddle.assign(trunc_norm_(self.absolute_pos_embed.shape, std=0.02),
                          self.absolute_pos_embed)

        self.pos_drop = nn.Dropout(p=drop_rate)

        # stochastic depth
        dpr = [x for x in np.linspace(0, drop_path_rate, sum(depths))
               ]  # stochastic depth decay rule

        # build layers
        self.layers = nn.LayerList()
        for i_layer in range(self.num_layers):
            layer = BasicLayer(
                dim=int(embed_dim * 2**i_layer),
                input_resolution=(patches_resolution[0] // (2**i_layer),
                                  patches_resolution[1] // (2**i_layer)),
                depth=depths[i_layer],
                num_heads=num_heads[i_layer],
                window_size=window_size,
                mlp_ratio=self.mlp_ratio,
                qkv_bias=qkv_bias,
                qk_scale=qk_scale,
                drop=drop_rate,
                attn_drop=attn_drop_rate,
                drop_path=dpr[sum(depths[:i_layer]):sum(depths[:i_layer + 1])],
                norm_layer=norm_layer,
                downsample=PatchMerging if
                (i_layer < self.num_layers - 1) else None,
                use_checkpoint=use_checkpoint)
            self.layers.append(layer)

        self.norm = norm_layer(self.num_features)
        self.avgpool = nn.AdaptiveAvgPool1D(1)
        self.head = nn.Linear(self.num_features,
                              num_classes) if num_classes > 0 else Identity()

        for m in self.sublayers():
            if isinstance(m, nn.LayerNorm):
                paddle.assign(paddle.zeros(m.bias.shape), m.bias)
                paddle.assign(paddle.ones(m.weight.shape), m.weight)
            if isinstance(m, nn.Linear):
                try:
                    paddle.assign(trunc_norm_(m.weight.shape, std=0.02),
                                  m.weight)
                except:
                    print(m.weight.shape)
                if m.bias is not None:
                    paddle.assign(paddle.zeros(m.bias.shape), m.bias)
Esempio n. 10
0
    def __call__(self,
                 seg_preds,
                 seg_masks,
                 cate_labels,
                 cate_scores,
                 sum_masks=None):
        # sort and keep top nms_pre
        sort_inds = self._sort_score(cate_scores, self.pre_nms_top_n)
        seg_masks = paddle.gather(seg_masks, index=sort_inds)
        seg_preds = paddle.gather(seg_preds, index=sort_inds)
        sum_masks = paddle.gather(sum_masks, index=sort_inds)
        cate_scores = paddle.gather(cate_scores, index=sort_inds)
        cate_labels = paddle.gather(cate_labels, index=sort_inds)

        seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1)
        # inter.
        inter_matrix = paddle.mm(seg_masks,
                                 paddle.transpose(seg_masks, [1, 0]))
        n_samples = paddle.shape(cate_labels)
        # union.
        sum_masks_x = paddle.expand(sum_masks, shape=[n_samples, n_samples])
        # iou.
        iou_matrix = (inter_matrix /
                      (sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) -
                       inter_matrix))
        iou_matrix = paddle.triu(iou_matrix, diagonal=1)
        # label_specific matrix.
        cate_labels_x = paddle.expand(cate_labels,
                                      shape=[n_samples, n_samples])
        label_matrix = paddle.cast(
            (cate_labels_x == paddle.transpose(cate_labels_x, [1, 0])),
            'float32')
        label_matrix = paddle.triu(label_matrix, diagonal=1)

        # IoU compensation
        compensate_iou = paddle.max((iou_matrix * label_matrix), axis=0)
        compensate_iou = paddle.expand(compensate_iou,
                                       shape=[n_samples, n_samples])
        compensate_iou = paddle.transpose(compensate_iou, [1, 0])

        # IoU decay
        decay_iou = iou_matrix * label_matrix

        # matrix nms
        if self.kernel == 'gaussian':
            decay_matrix = paddle.exp(-1 * self.sigma * (decay_iou**2))
            compensate_matrix = paddle.exp(-1 * self.sigma *
                                           (compensate_iou**2))
            decay_coefficient = paddle.min(decay_matrix / compensate_matrix,
                                           axis=0)
        elif self.kernel == 'linear':
            decay_matrix = (1 - decay_iou) / (1 - compensate_iou)
            decay_coefficient = paddle.min(decay_matrix, axis=0)
        else:
            raise NotImplementedError

        # update the score.
        cate_scores = cate_scores * decay_coefficient
        y = paddle.zeros(shape=paddle.shape(cate_scores), dtype='float32')
        keep = paddle.where(cate_scores >= self.update_threshold, cate_scores,
                            y)
        keep = paddle.nonzero(keep)
        keep = paddle.squeeze(keep, axis=[1])
        # Prevent empty and increase fake data
        keep = paddle.concat(
            [keep,
             paddle.cast(paddle.shape(cate_scores)[0] - 1, 'int64')])

        seg_preds = paddle.gather(seg_preds, index=keep)
        cate_scores = paddle.gather(cate_scores, index=keep)
        cate_labels = paddle.gather(cate_labels, index=keep)

        # sort and keep top_k
        sort_inds = self._sort_score(cate_scores, self.post_nms_top_n)
        seg_preds = paddle.gather(seg_preds, index=sort_inds)
        cate_scores = paddle.gather(cate_scores, index=sort_inds)
        cate_labels = paddle.gather(cate_labels, index=sort_inds)
        return seg_preds, cate_scores, cate_labels
Esempio n. 11
0
        googLeNet_part3 = self.googLeNet_part3(googLeNet_part2)


        googLeNet_part3 = paddle.nn.functional.dropout(googLeNet_part3, p=0.6)

        out_final_2d = paddle.reshape(googLeNet_part3, [-1, googLeNet_part3.shape[1]])

        out_final_2d = paddle.reshape(out_final_2d, [-1, self.seg_num, out_final_2d.shape[1]])

        out_final_2d = paddle.mean(out_final_2d, axis=1)

        out_final = paddle.concat(x=[out_final_2d,out_final_3d], axis=1)

        out_final = self.out(out_final)

        out_final = paddle.nn.functional.softmax(out_final)
        
        if label is not None:
            acc = paddle.metric.accuracy(input=out_final, label=label)
            return out_final, acc
        else:
            return out_final


if __name__ == '__main__':
    network = GoogLeNet()
    img = paddle.zeros([1, 12, 3, 224, 224])
    outs = network(img)
    print(outs.shape)

Esempio n. 12
0
 def func_test_memory_reserved(self, device=None):
     if core.is_compiled_with_cuda():
         tensor = paddle.zeros(shape=[256])
         alloc_size = 4 * 256  # 256 float32 data, with 4 bytes for each one
         memory_reserved_size = memory_reserved(device)
         self.assertEqual(memory_reserved_size, alloc_size)
 def get_tensor(self, device="cpu"):
     self.device = device.lower()
     place = None
     tensor = paddle.zeros([5, 5], dtype="float32")
     return tensor
Esempio n. 14
0
    def forward(self, mol_batch, x_tree_vecs):
        """Tree decoding in training
        Args:
            mol_batch(list): mol objects in a batch.
            x_tree_vecs(tensor): tree latent representation.
        Returns:
            pred_loss: label prediction loss.
            stop_loss: topological prediction loss.
            pred_acc: label prediction accuracy.
            stop_acc: topological prediction accuracy.
        """
        pred_hiddens, pred_contexts, pred_targets = [], [], []
        stop_hiddens, stop_contexts, stop_targets = [], [], []
        traces = []

        for mol_tree in mol_batch:
            s = []
            dfs(s, mol_tree.nodes[0], -1)
            traces.append(s)
            for node in mol_tree.nodes:
                node.neighbors = []

        batch_size = len(mol_batch)

        pred_hiddens.append(paddle.zeros([len(mol_batch), self.hidden_size]))
        pred_targets.extend([mol_tree.nodes[0].wid for mol_tree in mol_batch])
        pred_contexts.append(paddle.to_tensor(list(range(batch_size))))

        max_iter = max([len(tr) for tr in traces])
        padding = paddle.zeros([self.hidden_size])
        padding.stop_gradient = False
        h = {}

        for t in range(max_iter):
            prop_list = []
            batch_list = []
            for i, plist in enumerate(traces):
                if t < len(plist):
                    prop_list.append(plist[t])
                    batch_list.append(i)

            cur_x = []
            cur_h_nei, cur_o_nei = [], []

            for node_x, real_y, _ in prop_list:
                cur_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors if node_y.idx != real_y.idx]
                pad_len = MAX_NB - len(cur_nei)
                cur_h_nei.extend(cur_nei)
                cur_h_nei.extend([padding] * pad_len)

                cur_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors]
                pad_len = MAX_NB - len(cur_nei)
                cur_o_nei.extend(cur_nei)
                cur_o_nei.extend([padding] * pad_len)

                cur_x.append(node_x.wid)

            cur_x = paddle.to_tensor(cur_x)
            cur_x = self.embedding(cur_x)

            cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[-1, MAX_NB, self.hidden_size])
            new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h)

            cur_o_nei = paddle.reshape(paddle.stack(cur_o_nei, axis=0), shape=[-1, MAX_NB, self.hidden_size])
            cur_o = paddle.sum(cur_o_nei, axis=1)

            pred_target, pred_list = [], []
            stop_target = []
            for i, m in enumerate(prop_list):
                node_x, node_y, direction = m
                x, y = node_x.idx, node_y.idx
                h[(x, y)] = new_h[i]
                node_y.neighbors.append(node_x)
                if direction == 1:
                    pred_target.append(node_y.wid)
                    pred_list.append(i)
                stop_target.append(direction)

            cur_batch = paddle.to_tensor((batch_list))
            stop_hidden = paddle.concat([cur_x, cur_o], axis=1)
            stop_hiddens.append(stop_hidden)
            stop_contexts.append(cur_batch)
            stop_targets.extend(stop_target)

            if len(pred_list) > 0:
                batch_list = [batch_list[i] for i in pred_list]
                cur_batch = paddle.to_tensor(batch_list)
                pred_contexts.append(cur_batch)

                cur_pred = paddle.to_tensor(pred_list)
                pred_hiddens.append(paddle.index_select(axis=0, index=cur_pred, x=new_h))
                pred_targets.extend(pred_target)

        cur_x, cur_o_nei = [], []
        for mol_tree in mol_batch:
            node_x = mol_tree.nodes[0]
            cur_x.append(node_x.wid)
            cur_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors]
            pad_len = MAX_NB - len(cur_nei)
            cur_o_nei.extend(cur_nei)
            cur_o_nei.extend([padding] * pad_len)

        cur_x = paddle.to_tensor(cur_x)
        cur_x = self.embedding(cur_x)
        cur_o_nei = paddle.reshape(paddle.stack(cur_o_nei, axis=0), shape=[-1, MAX_NB, self.hidden_size])
        cur_o = paddle.sum(cur_o_nei, axis=1)

        stop_hidden = paddle.concat([cur_x, cur_o], axis=1)
        stop_hiddens.append(stop_hidden)
        stop_contexts.append(paddle.to_tensor(list(range(batch_size))))
        stop_targets.extend([0] * len(mol_batch))

        pred_contexts = paddle.concat(pred_contexts, axis=0)
        pred_hiddens = paddle.concat(pred_hiddens, axis=0)
        pred_scores = self.aggregate(pred_hiddens, pred_contexts, x_tree_vecs, 'word')
        pred_targets = paddle.to_tensor(pred_targets)

        pred_loss = self.pred_loss(pred_scores, pred_targets) / len(mol_batch)
        preds = paddle.argmax(pred_scores, axis=1)
        pred_acc = paddle.equal(preds, pred_targets).astype('float32')
        pred_acc = paddle.sum(pred_acc) / pred_targets.size

        stop_contexts = paddle.concat(stop_contexts, axis=0)
        stop_hiddens = paddle.concat(stop_hiddens, axis=0)
        stop_hiddens = F.relu(self.U_i(stop_hiddens))
        stop_scores = self.aggregate(stop_hiddens, stop_contexts, x_tree_vecs, 'stop')
        stop_scores = stop_scores.squeeze(-1)
        stop_targets = paddle.to_tensor(stop_targets).astype('float32')

        stop_loss = self.stop_loss(stop_scores, stop_targets) / len(mol_batch)
        stops = paddle.greater_equal(stop_scores, paddle.ones(shape=[1])).astype('float32')
        stop_acc = paddle.equal(stops, stop_targets).astype('float32')
        stop_acc = paddle.sum(stop_acc) / stop_targets.size
        return {'pred_loss': pred_loss,
                'stop_loss': stop_loss,
                'pred_acc': float(pred_acc.numpy()),
                'stop_acc': float(stop_acc.numpy())}
Esempio n. 15
0
def do_train(args):
    paddle.set_device(args.device)
    if paddle.distributed.get_world_size() > 1:
        paddle.distributed.init_parallel_env()

    set_seed(args)

    args.task_name = args.task_name.lower()
    metric_class = METRIC_CLASSES[args.task_name]
    args.model_type = args.model_type.lower()
    model_class, tokenizer_class = MODEL_CLASSES[args.model_type]

    train_ds = load_dataset('glue', args.task_name, splits="train")

    tokenizer = tokenizer_class.from_pretrained(args.model_name_or_path)
    trans_func = partial(convert_example,
                         tokenizer=tokenizer,
                         label_list=train_ds.label_list,
                         max_seq_length=args.max_seq_length)
    train_ds = train_ds.map(trans_func, lazy=True)
    train_batch_sampler = paddle.io.DistributedBatchSampler(
        train_ds, batch_size=args.batch_size, shuffle=True)
    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # input
        Pad(axis=0, pad_val=tokenizer.pad_token_type_id),  # segment
        Stack(dtype="int64" if train_ds.label_list else "float32")  # label
    ): fn(samples)

    train_data_loader = DataLoader(dataset=train_ds,
                                   batch_sampler=train_batch_sampler,
                                   collate_fn=batchify_fn,
                                   num_workers=0,
                                   return_list=True)
    if args.task_name == "mnli":
        dev_ds_matched, dev_ds_mismatched = load_dataset(
            'glue', args.task_name, splits=["dev_matched", "dev_mismatched"])
        dev_ds_matched = dev_ds_matched.map(trans_func, lazy=True)
        dev_ds_mismatched = dev_ds_mismatched.map(trans_func, lazy=True)
        dev_batch_sampler_matched = paddle.io.BatchSampler(
            dev_ds_matched, batch_size=args.batch_size, shuffle=False)
        dev_data_loader_matched = DataLoader(
            dataset=dev_ds_matched,
            batch_sampler=dev_batch_sampler_matched,
            collate_fn=batchify_fn,
            num_workers=0,
            return_list=True)
        dev_batch_sampler_mismatched = paddle.io.BatchSampler(
            dev_ds_mismatched, batch_size=args.batch_size, shuffle=False)
        dev_data_loader_mismatched = DataLoader(
            dataset=dev_ds_mismatched,
            batch_sampler=dev_batch_sampler_mismatched,
            collate_fn=batchify_fn,
            num_workers=0,
            return_list=True)
    else:
        dev_ds = load_dataset('glue', args.task_name, splits='dev')
        dev_ds = dev_ds.map(trans_func, lazy=True)
        dev_batch_sampler = paddle.io.BatchSampler(dev_ds,
                                                   batch_size=args.batch_size,
                                                   shuffle=False)
        dev_data_loader = DataLoader(dataset=dev_ds,
                                     batch_sampler=dev_batch_sampler,
                                     collate_fn=batchify_fn,
                                     num_workers=0,
                                     return_list=True)

    num_labels = 1 if train_ds.label_list == None else len(train_ds.label_list)

    model = model_class.from_pretrained(args.model_name_or_path,
                                        num_classes=num_labels)

    # Step1: Initialize a dictionary to save the weights from the origin BERT model.
    origin_weights = model.state_dict()

    # Step2: Convert origin model to supernet.
    sp_config = supernet(expand_ratio=args.width_mult_list)
    model = Convert(sp_config).convert(model)
    # Use weights saved in the dictionary to initialize supernet.
    utils.set_state_dict(model, origin_weights)
    del origin_weights

    # Step3: Define teacher model.
    teacher_model = model_class.from_pretrained(args.model_name_or_path,
                                                num_classes=num_labels)

    # Step4: Config about distillation.
    mapping_layers = ['bert.embeddings']
    for idx in range(model.bert.config['num_hidden_layers']):
        mapping_layers.append('bert.encoder.layers.{}'.format(idx))

    default_distill_config = {
        'lambda_distill': 0.1,
        'teacher_model': teacher_model,
        'mapping_layers': mapping_layers,
    }
    distill_config = DistillConfig(**default_distill_config)

    # Step5: Config in supernet training.
    ofa_model = OFA(model,
                    distill_config=distill_config,
                    elastic_order=['width'])

    criterion = paddle.nn.loss.CrossEntropyLoss(
    ) if train_ds.label_list else paddle.nn.loss.MSELoss()

    metric = metric_class()

    if args.task_name == "mnli":
        dev_data_loader = (dev_data_loader_matched, dev_data_loader_mismatched)

    # Step6: Calculate the importance of neurons and head,
    # and then reorder them according to the importance.
    head_importance, neuron_importance = nlp_utils.compute_neuron_head_importance(
        args.task_name,
        ofa_model.model,
        dev_data_loader,
        loss_fct=criterion,
        num_layers=model.bert.config['num_hidden_layers'],
        num_heads=model.bert.config['num_attention_heads'])
    reorder_neuron_head(ofa_model.model, head_importance, neuron_importance)

    if paddle.distributed.get_world_size() > 1:
        ofa_model.model = paddle.DataParallel(ofa_model.model)

    if args.max_steps > 0:
        num_training_steps = args.max_steps
        num_train_epochs = math.ceil(num_training_steps /
                                     len(train_data_loader))
    else:
        num_training_steps = len(train_data_loader) * args.num_train_epochs
        num_train_epochs = args.num_train_epochs

    lr_scheduler = LinearDecayWithWarmup(args.learning_rate,
                                         num_training_steps, args.warmup_steps)

    # Generate parameter names needed to perform weight decay.
    # All bias and LayerNorm parameters are excluded.
    decay_params = [
        p.name for n, p in model.named_parameters()
        if not any(nd in n for nd in ["bias", "norm"])
    ]
    optimizer = paddle.optimizer.AdamW(
        learning_rate=lr_scheduler,
        epsilon=args.adam_epsilon,
        parameters=ofa_model.model.parameters(),
        weight_decay=args.weight_decay,
        apply_decay_param_fun=lambda x: x in decay_params)

    global_step = 0
    tic_train = time.time()
    for epoch in range(num_train_epochs):
        # Step7: Set current epoch and task.
        ofa_model.set_epoch(epoch)
        ofa_model.set_task('width')

        for step, batch in enumerate(train_data_loader):
            global_step += 1
            input_ids, segment_ids, labels = batch

            for width_mult in args.width_mult_list:
                # Step8: Broadcast supernet config from width_mult,
                # and use this config in supernet training.
                net_config = utils.dynabert_config(ofa_model, width_mult)
                ofa_model.set_net_config(net_config)
                logits, teacher_logits = ofa_model(input_ids,
                                                   segment_ids,
                                                   attention_mask=[None, None])
                rep_loss = ofa_model.calc_distill_loss()
                if args.task_name == 'sts-b':
                    logit_loss = paddle.zeros(shape=[1], dtype='float32')
                else:
                    logit_loss = soft_cross_entropy(logits,
                                                    teacher_logits.detach())
                loss = rep_loss + args.lambda_logit * logit_loss
                loss.backward()
            optimizer.step()
            lr_scheduler.step()
            optimizer.clear_grad()

            if global_step % args.logging_steps == 0:
                if paddle.distributed.get_rank() == 0:
                    logger.info(
                        "global step %d, epoch: %d, batch: %d, loss: %f, speed: %.2f step/s"
                        % (global_step, epoch, step, loss, args.logging_steps /
                           (time.time() - tic_train)))
                tic_train = time.time()

            if global_step % args.save_steps == 0:
                tic_eval = time.time()
                if args.task_name == "mnli":
                    evaluate(teacher_model,
                             criterion,
                             metric,
                             dev_data_loader_matched,
                             width_mult=100)
                    evaluate(teacher_model,
                             criterion,
                             metric,
                             dev_data_loader_mismatched,
                             width_mult=100)
                else:
                    evaluate(teacher_model,
                             criterion,
                             metric,
                             dev_data_loader,
                             width_mult=100)
                print("eval done total : %s s" % (time.time() - tic_eval))
                for idx, width_mult in enumerate(args.width_mult_list):
                    net_config = utils.dynabert_config(ofa_model, width_mult)
                    ofa_model.set_net_config(net_config)
                    tic_eval = time.time()
                    if args.task_name == "mnli":
                        acc = evaluate(ofa_model, criterion, metric,
                                       dev_data_loader_matched, width_mult)
                        evaluate(ofa_model, criterion, metric,
                                 dev_data_loader_mismatched, width_mult)
                        print("eval done total : %s s" %
                              (time.time() - tic_eval))
                    else:
                        acc = evaluate(ofa_model, criterion, metric,
                                       dev_data_loader, width_mult)
                        print("eval done total : %s s" %
                              (time.time() - tic_eval))

                    if paddle.distributed.get_rank() == 0:
                        output_dir = os.path.join(args.output_dir,
                                                  "model_%d" % global_step)
                        if not os.path.exists(output_dir):
                            os.makedirs(output_dir)
                        # need better way to get inner model of DataParallel
                        model_to_save = model._layers if isinstance(
                            model, paddle.DataParallel) else model
                        model_to_save.save_pretrained(output_dir)
                        tokenizer.save_pretrained(output_dir)
            if global_step >= num_training_steps:
                return
Esempio n. 16
0
    def forward(self, inputs, lengths):
        """
        Decode the highest scoring sequence of tags.

        Args:
            inputs:  The unary emission tensor with shape `[batch_size, sequence_length, num_tags]`.
            length: The input length tensor with shape `[batch_size]`, storing real length of each sequence for correctness.
        Returns:
            scores: The scores tensor containing the score for the Viterbi sequence, with shape `[batch_size]`.
            paths: The paths tensor containing the highest scoring tag indices, with shape `[batch_size, sequence_length`].
        """
        batch_size, seq_len, n_labels = inputs.shape
        inputs_t = inputs.transpose([1, 0, 2])
        trans_exp = self.transitions.unsqueeze(0).expand(
            [batch_size, n_labels, n_labels])

        all_alpha = []
        historys = []

        if self.with_start_stop_tag:
            alpha = self._initialize_alpha(batch_size)
        else:
            alpha = paddle.zeros((batch_size, self.num_tags), dtype='float32')

        for i, logit in enumerate(inputs_t):
            # if not with_start_stop_tag, the first label has not antecedent tag.
            if i == 0 and not self.with_start_stop_tag:
                alpha = logit
                all_alpha.append(alpha)
                continue
            alpha_exp = alpha.unsqueeze(2)
            # alpha_trn_sum: batch_size, n_labels, n_labels
            alpha_trn_sum = alpha_exp + trans_exp

            # alpha_max: batch_size, n_labels
            # We don't include the emission scores here because the max does not depend on them (we add them in below)
            alpha_max = alpha_trn_sum.max(1)
            # If with_start_stop_tag, the first antecedent tag must be START, else the first label has not antecedent tag.
            # So we can record the path from i=1.
            if i >= 1:
                alpha_argmax = alpha_trn_sum.argmax(1)
                historys.append(alpha_argmax)
            # Now add the emission scores
            alpha = alpha_max + logit
            all_alpha.append(alpha)

        # Get the valid alpha
        all_alpha = paddle.stack(all_alpha).transpose([1, 0, 2])
        batch_index = self._get_batch_index(batch_size)
        last_index = lengths - 1
        idxs = paddle.stack([batch_index, last_index], axis=1)
        alpha = paddle.gather_nd(all_alpha, idxs)

        if self.with_start_stop_tag:
            # The last one step
            alpha += self.transitions[self.stop_idx].unsqueeze(0).expand_as(
                alpha)
        scores, last_ids = alpha.max(1), alpha.argmax(1).numpy().tolist()
        # Trace back the best path
        # historys: seq_len, batch_size, n_labels
        historys = paddle.stack(historys).numpy()
        lengths_np = lengths.numpy()
        batch_path = []
        max_len = 0
        for batch_id in range(batch_size):
            best_last_tag = last_ids[batch_id]
            path = [best_last_tag]
            for hist in reversed(historys[:lengths_np[batch_id]]):
                # hist: batch_size, n_labels
                best_last_tag = hist[batch_id][best_last_tag]
                path.append(best_last_tag)
            path.reverse()
            max_len = max(max_len, len(path))
            # Pad to the max sequence length, so that the ChunkEvaluator can compute it
            batch_path.append(path)
        batch_path = [
            path + [0] * (max_len - len(path)) for path in batch_path
        ]
        batch_path = paddle.to_tensor(batch_path)
        return scores, batch_path
Esempio n. 17
0
    def __call__(self, box_cls, box_pred, scale_factor_wh, img_whwh):
        """
        Arguments:
            box_cls (Tensor): tensor of shape (batch_size, num_proposals, K).
                The tensor predicts the classification probability for each proposal.
            box_pred (Tensor): tensors of shape (batch_size, num_proposals, 4).
                The tensor predicts 4-vector (x,y,w,h) box
                regression values for every proposal
            scale_factor_wh (Tensor): tensors of shape [batch_size, 2] the scalor of  per img
            img_whwh (Tensor): tensors of shape [batch_size, 4]
        Returns:
            bbox_pred (Tensor): tensors of shape [num_boxes, 6] Each row has 6 values:
            [label, confidence, xmin, ymin, xmax, ymax]
            bbox_num (Tensor): tensors of shape [batch_size] the number of RoIs in each image.
        """
        assert len(box_cls) == len(scale_factor_wh) == len(img_whwh)

        img_wh = img_whwh[:, :2]

        scores = F.sigmoid(box_cls)
        labels = paddle.arange(0, self.num_classes). \
            unsqueeze(0).tile([self.num_proposals, 1]).flatten(start_axis=0, stop_axis=1)

        classes_all = []
        scores_all = []
        boxes_all = []
        for i, (scores_per_image,
                box_pred_per_image) in enumerate(zip(scores, box_pred)):

            scores_per_image, topk_indices = scores_per_image.flatten(
                0, 1).topk(
                    self.num_proposals, sorted=False)
            labels_per_image = paddle.gather(labels, topk_indices, axis=0)

            box_pred_per_image = box_pred_per_image.reshape([-1, 1, 4]).tile(
                [1, self.num_classes, 1]).reshape([-1, 4])
            box_pred_per_image = paddle.gather(
                box_pred_per_image, topk_indices, axis=0)

            classes_all.append(labels_per_image)
            scores_all.append(scores_per_image)
            boxes_all.append(box_pred_per_image)

        bbox_num = paddle.zeros([len(scale_factor_wh)], dtype="int32")
        boxes_final = []

        for i in range(len(scale_factor_wh)):
            classes = classes_all[i]
            boxes = boxes_all[i]
            scores = scores_all[i]

            boxes[:, 0::2] = paddle.clip(
                boxes[:, 0::2], min=0, max=img_wh[i][0]) / scale_factor_wh[i][0]
            boxes[:, 1::2] = paddle.clip(
                boxes[:, 1::2], min=0, max=img_wh[i][1]) / scale_factor_wh[i][1]
            boxes_w, boxes_h = (boxes[:, 2] - boxes[:, 0]).numpy(), (
                boxes[:, 3] - boxes[:, 1]).numpy()

            keep = (boxes_w > 1.) & (boxes_h > 1.)

            if (keep.sum() == 0):
                bboxes = paddle.zeros([1, 6]).astype("float32")
            else:
                boxes = paddle.to_tensor(boxes.numpy()[keep]).astype("float32")
                classes = paddle.to_tensor(classes.numpy()[keep]).astype(
                    "float32").unsqueeze(-1)
                scores = paddle.to_tensor(scores.numpy()[keep]).astype(
                    "float32").unsqueeze(-1)

                bboxes = paddle.concat([classes, scores, boxes], axis=-1)

            boxes_final.append(bboxes)
            bbox_num[i] = bboxes.shape[0]

        bbox_pred = paddle.concat(boxes_final)
        return bbox_pred, bbox_num
Esempio n. 18
0
    def __init__(self,
                 dim,
                 input_resolution,
                 num_heads,
                 window_size=7,
                 shift_size=0,
                 mlp_ratio=4.,
                 qkv_bias=True,
                 qk_scale=None,
                 drop=0.,
                 attn_drop=0.,
                 drop_path=0.,
                 act_layer=nn.GELU,
                 norm_layer=nn.LayerNorm):
        super().__init__()
        self.dim = dim
        self.input_resolution = input_resolution
        self.num_heads = num_heads
        self.window_size = window_size
        self.shift_size = shift_size
        self.mlp_ratio = mlp_ratio
        if min(self.input_resolution) <= self.window_size:
            # if window size is larger than input resolution, we don't partition windows
            self.shift_size = 0
            self.window_size = min(self.input_resolution)
        assert 0 <= self.shift_size < self.window_size, "shift_size must in 0-window_size"

        self.norm1 = norm_layer(dim)
        self.attn = WindowAttention(dim,
                                    window_size=(self.window_size,
                                                 self.window_size),
                                    num_heads=num_heads,
                                    qkv_bias=qkv_bias,
                                    qk_scale=qk_scale,
                                    attn_drop=attn_drop,
                                    proj_drop=drop)

        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
        self.norm2 = norm_layer(dim)
        mlp_hidden_dim = int(dim * mlp_ratio)
        self.mlp = Mlp(in_features=dim,
                       hidden_features=mlp_hidden_dim,
                       act_layer=act_layer,
                       drop=drop)

        if self.shift_size > 0:
            # calculate attention mask for SW-MSA
            H, W = self.input_resolution
            img_mask = paddle.zeros((1, H, W, 1))  # 1 H W 1
            h_slices = (slice(0, -self.window_size),
                        slice(-self.window_size,
                              -self.shift_size), slice(-self.shift_size, None))
            w_slices = (slice(0, -self.window_size),
                        slice(-self.window_size,
                              -self.shift_size), slice(-self.shift_size, None))
            cnt = 0
            for h in h_slices:
                for w in w_slices:
                    img_mask[:, h, w, :] = cnt
                    cnt += 1

            mask_windows = window_partition(
                img_mask, self.window_size)  # nW, window_size, window_size, 1
            mask_windows = mask_windows.reshape(
                (-1, self.window_size * self.window_size))
            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
            _mask = (attn_mask != 0).astype('float32')
            attn_mask *= 0
            attn_mask += _mask * float(-100)
            # attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
        else:
            attn_mask = None

        self.register_buffer("attn_mask", attn_mask)
Esempio n. 19
0
def test_assign_output(array):
    result1 = paddle.zeros(shape=[3, 2], dtype='float32')
    paddle.assign(array, result1)  # result1 = [[1, 1], [3 4], [1, 3]]
    return result1
Esempio n. 20
0
    def __init__(
        self,
        img_size=224,
        tokens_type="performer",
        in_chans=3,
        embed_dim=768,
        depth=12,
        num_heads=12,
        mlp_ratio=4.0,
        qkv_bias=False,
        qk_scale=None,
        drop_rate=0.0,
        attn_drop_rate=0.0,
        drop_path_rate=0.0,
        norm_layer=nn.LayerNorm,
        token_dim=64,
        class_dim=1000,
    ):
        super().__init__()
        self.class_dim = class_dim
        self.num_features = (
            self.embed_dim
        ) = embed_dim  # num_features for consistency with other models

        self.tokens_to_token = T2T_Layer(
            img_size=img_size,
            tokens_type=tokens_type,
            in_chans=in_chans,
            embed_dim=embed_dim,
            token_dim=token_dim,
        )

        num_patches = self.tokens_to_token.num_patches

        self.cls_token = add_parameter(self, paddle.zeros((1, 1, embed_dim)))
        self.pos_embed = add_parameter(
            self,
            get_sinusoid_encoding(n_position=num_patches + 1, d_hid=embed_dim))
        self.pos_drop = nn.Dropout(p=drop_rate)

        dpr = np.linspace(0, drop_path_rate,
                          depth)  # stochastic depth decay rule
        self.blocks = nn.LayerList([
            Block(
                dim=embed_dim,
                num_heads=num_heads,
                mlp_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                qk_scale=qk_scale,
                drop=drop_rate,
                attn_drop=attn_drop_rate,
                drop_path=dpr[i],
                norm_layer=norm_layer,
            ) for i in range(depth)
        ])
        self.norm = norm_layer(embed_dim)

        # Classifier head
        if class_dim > 0:
            self.head = nn.Linear(embed_dim, class_dim)

        trunc_normal_(self.cls_token)
        self.apply(self._init_weights)
Esempio n. 21
0
                    log_writer.add_scalar('eval/acc', acc, step=step)
                    log.debug('acc %.5f' % acc)
                    if args.save_dir is not None:
                        P.save(model.state_dict(), args.save_dir / 'ckpt.bin')
if args.save_dir is not None:
    P.save(model.state_dict(), args.save_dir / 'ckpt.bin')
if args.inference_model_dir is not None:

    class InferenceModel(ErnieModelForSequenceClassification):
        def forward(self, ids, sids):
            _, logits = super(InferenceModel, self).forward(ids, sids)
            return logits

    model.__class__ = InferenceModel
    log.debug('saving inference model')
    src_placeholder = P.zeros([2, 2], dtype='int64')
    sent_placehodler = P.zeros([2, 2], dtype='int64')
    _, static = P.jit.TracedLayer.trace(
        model, inputs=[src_placeholder, sent_placehodler])
    static.save_inference_model(str(args.inference_model_dir))

    #class InferenceModel(ErnieModelForSequenceClassification):
    #    @P.jit.to_static
    #    def forward(self, ids, sids):
    #        _, logits =  super(InferenceModel, self).forward(ids, sids, labels=None)
    #        return logits
    #model.__class__ = InferenceModel
    #src_placeholder = P.zeros([2, 2], dtype='int64')
    #sent_placehodler = P.zeros([2, 2], dtype='int64')
    #P.jit.save(model, args.inference_model_dir, input_var=[src_placeholder, sent_placehodler])
    log.debug('done')
Esempio n. 22
0
    def __init__(self,
                 input_nc=3,
                 output_nc=3,
                 ngf=128,
                 n_blocks=6,
                 norm_layer=nn.InstanceNorm2D,
                 load_checkpoint=None):
        super(MSGNet, self).__init__()
        self.gram = GramMatrix()
        block = Bottleneck
        upblock = UpBottleneck
        expansion = 4

        model1 = [
            ConvLayer(input_nc, 64, kernel_size=7, stride=1),
            norm_layer(64),
            nn.ReLU(),
            block(64, 32, 2, 1, norm_layer),
            block(32 * expansion, ngf, 2, 1, norm_layer)
        ]

        self.model1 = nn.Sequential(*tuple(model1))

        model = []
        model += model1

        self.ins = Inspiration(ngf * expansion)
        model.append(self.ins)
        for i in range(n_blocks):
            model += [block(ngf * expansion, ngf, 1, None, norm_layer)]

        model += [
            upblock(ngf * expansion, 32, 2, norm_layer),
            upblock(32 * expansion, 16, 2, norm_layer),
            norm_layer(16 * expansion),
            nn.ReLU(),
            ConvLayer(16 * expansion, output_nc, kernel_size=7, stride=1)
        ]
        model = tuple(model)
        self.model = nn.Sequential(*model)

        if load_checkpoint is not None:
            self.model_dict = paddle.load(load_checkpoint)
            self.set_dict(self.model_dict)
            print("load custom checkpoint success")

        else:
            checkpoint = os.path.join(self.directory, 'style_paddle.pdparams')
            model_dict = paddle.load(checkpoint)
            model_dict_clone = model_dict.copy()
            for key, value in model_dict_clone.items():
                if key.endswith(("scale")):
                    name = key.rsplit('.', 1)[0] + '.bias'
                    model_dict[name] = paddle.zeros(
                        shape=model_dict[name].shape, dtype='float32')
                    model_dict[key] = paddle.ones(shape=model_dict[key].shape,
                                                  dtype='float32')
            self.set_dict(model_dict)
            self.model_dict = model_dict
            print("load pretrained checkpoint success")

        self._vgg = None
Esempio n. 23
0
    def decode(self, x_tree_vecs, prob_decode):
        """
        Decode tree structre from tree latent space.
        Args:
            x_tree_mess(tensor): tree latent represenation.
            prob_decode(bool): using bernoulli distribution in tree decode if prob_decode=true.
        Returns:
            root node and all nodes.
        """
        assert x_tree_vecs.shape[0] == 1
        stack = []
        init_hiddens = paddle.zeros([1, self.hidden_size])
        zero_pad = paddle.zeros([1, 1, self.hidden_size])
        contexts = paddle.zeros([1]).astype('int64')

        root_score = self.aggregate(init_hiddens, contexts, x_tree_vecs, 'word')
        root_wid = paddle.argmax(root_score, axis=1)
        root_wid = int(root_wid.numpy())

        root = MolTreeNode(self.vocab.get_smiles(root_wid))
        root.wid = root_wid
        root.idx = 0
        stack.append((root, self.vocab.get_slots(root.wid)))

        all_nodes = [root]
        h = {}
        for step in range(MAX_DECODE_LEN):
            node_x, fa_slot = stack[-1]
            cur_h_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors]
            if len(cur_h_nei) > 0:
                cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[1, -1, self.hidden_size])
            else:
                cur_h_nei = zero_pad

            cur_x = paddle.to_tensor([node_x.wid])
            cur_x = self.embedding(cur_x)
            cur_h = paddle.sum(cur_h_nei, axis=1)
            stop_hiddens = paddle.concat([cur_x, cur_h], axis=1)
            stop_hiddens = F.relu(self.U_i(stop_hiddens))
            stop_score = self.aggregate(stop_hiddens, contexts, x_tree_vecs, 'stop')

            if prob_decode:
                backtrack = (paddle.bernoulli(F.sigmoid(stop_score)).item() == 0)
            else:
                backtrack = (float(stop_score.numpy()) < 0)

            if not backtrack:
                new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h)
                pred_score = self.aggregate(new_h, contexts, x_tree_vecs, 'word')

                if prob_decode:
                    sort_wid = paddle.multinomial(F.softmax(pred_score, axis=1).squeeze(), 5)
                else:
                    sort_wid = paddle.argsort(
                        pred_score, axis=1, descending=True)
                    sort_wid = sort_wid.squeeze()

                next_wid = None
                for wid in sort_wid[:5]:
                    slots = self.vocab.get_slots(wid)
                    node_y = MolTreeNode(self.vocab.get_smiles(wid))
                    if have_slots(fa_slot, slots) and can_assemble(node_x, node_y):
                        next_wid = wid
                        next_slots = slots
                        break

                if next_wid is None:
                    backtrack = True
                else:
                    node_y = MolTreeNode(self.vocab.get_smiles(next_wid))
                    node_y.wid = int(next_wid.numpy())
                    node_y.idx = len(all_nodes)
                    node_y.neighbors.append(node_x)
                    h[(node_x.idx, node_y.idx)] = new_h[0]
                    stack.append((node_y, next_slots))
                    all_nodes.append(node_y)

            if backtrack:
                if len(stack) == 1:
                    break

                node_fa, _ = stack[-2]
                cur_h_nei = [h[(node_y.idx, node_x.idx)] for node_y in node_x.neighbors if node_y.idx != node_fa.idx]
                if len(cur_h_nei) > 0:
                    cur_h_nei = paddle.reshape(paddle.stack(cur_h_nei, axis=0), shape=[1, -1, self.hidden_size])
                else:
                    cur_h_nei = zero_pad

                new_h = GRU(cur_x, cur_h_nei, self.W_z, self.W_r, self.U_r, self.W_h)
                h[(node_x.idx, node_fa.idx)] = new_h[0]
                node_fa.neighbors.append(node_x)
                stack.pop()

        return root, all_nodes
Esempio n. 24
0
    def forward(
        self,
        input_values,
        attention_mask=None,
        output_attentions=None,
        output_hidden_states=None,
        return_dict=None,
    ):

        output_attentions = False  #output_attentions if output_attentions is not None else self.config.output_attentions
        output_hidden_states = False  # (
        # output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
        #)
        return_dict = True  # return_dict if return_dict is not None else self.config.use_return_dict

        hidden_states = self.feature_extractor(input_values)
        hidden_states = hidden_states.transpose((0, 2, 1))

        if attention_mask is not None:
            # compute real output lengths according to convolution formula
            output_lengths = self._get_feat_extract_output_lengths(
                attention_mask.sum(-1))

            attention_mask = paddle.zeros(hidden_states.shape[:2],
                                          dtype=hidden_states.dtype)
            # these two operations makes sure that all values
            # before the output lengths indices are attended to
            attention_mask[(paddle.arange(0, end=attention_mask.shape[0]),
                            output_lengths - 1)] = 1
            attention_mask = attention_mask.flip([-1]).cumsum(-1).flip(
                [-1]).bool()

        hidden_states = self.feature_projection(hidden_states)

        if self.config.apply_spec_augment and self.training:
            batch_size, sequence_length, hidden_size = hidden_states.shape
            assert False
            # apply SpecAugment along time axis
            if self.config.mask_time_prob > 0:
                mask_time_indices = _compute_mask_indices(
                    (batch_size, sequence_length),
                    self.config.mask_time_prob,
                    self.config.mask_time_length,
                    attention_mask=attention_mask,
                    min_masks=2,
                )
                hidden_states[paddle.to_tensor(
                    mask_time_indices)] = self.masked_spec_embed.to(
                        hidden_states.dtype)

            # apply SpecAugment along feature axis
            if self.config.mask_feature_prob > 0:
                mask_feature_indices = _compute_mask_indices(
                    (batch_size, hidden_size),
                    self.config.mask_feature_prob,
                    self.config.mask_feature_length,
                )
                mask_feature_indices = paddle.to_tensor(mask_feature_indices)
                hidden_states[mask_feature_indices[:, None].expand(
                    -1, sequence_length, -1)] = 0
        encoder_outputs = self.encoder(
            hidden_states,
            attention_mask=attention_mask,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        hidden_states = encoder_outputs[0]

        if not return_dict:
            return (hidden_states, ) + encoder_outputs[1:]

        return BaseModelOutput(
            last_hidden_state=hidden_states,
            hidden_states=encoder_outputs.hidden_states,
            attentions=encoder_outputs.attentions,
        )
Esempio n. 25
0
    def forward(
        self,
        input_ids=None,
        token_type_ids=None,
        attention_mask=None,
        mems=None,
        perm_mask=None,
        target_mapping=None,
        input_mask=None,
        head_mask=None,
        inputs_embeds=None,
        use_mems_train=False,
        use_mems_eval=False,
        output_attentions=False,
        output_hidden_states=False,
        return_dict=False,
    ):

        if self.training:
            use_mems = use_mems_train
        else:
            use_mems = use_mems_eval

        # The original code for XLNet uses shapes [len, bsz] with the batch dimension at the end
        # but we want a unified interface in the library with the batch size on the first dimension
        # so we move here the first dimension (batch) to the end
        if input_ids is not None and inputs_embeds is not None:
            raise ValueError(
                "You cannot specify both input_ids and inputs_embeds at the same time"
            )
        elif input_ids is not None:
            input_ids = paddle.transpose(input_ids, perm=[1, 0])
            qlen, bsz = input_ids.shape[0], input_ids.shape[1]
        elif inputs_embeds is not None:
            inputs_embeds = paddle.transpose(inputs_embeds, perm=[1, 0])
            qlen, bsz = inputs_embeds.shape[0], inputs_embeds.shape[1]
        else:
            raise ValueError(
                "You have to specify either input_ids or inputs_embeds")

        token_type_ids = token_type_ids.transpose(
            [1, 0]) if token_type_ids is not None else None
        input_mask = input_mask.transpose(
            [1, 0]) if input_mask is not None else None
        attention_mask = attention_mask.transpose(
            [1, 0]) if attention_mask is not None else None
        perm_mask = perm_mask.transpose([1, 2, 0
                                         ]) if perm_mask is not None else None
        target_mapping = target_mapping.transpose(
            [1, 2, 0]) if target_mapping is not None else None

        mlen = mems[0].shape[
            0] if mems is not None and mems[0] is not None else 0
        klen = mlen + qlen

        # Attention mask
        # Causal attention mask
        if self.attn_type == "uni":
            attn_mask = self.create_mask(qlen, mlen)
            attn_mask = paddle.unsqueeze(attn_mask, axis=[2, 3])
        elif self.attn_type == "bi":
            attn_mask = None
        else:
            raise ValueError("Unsupported attention type: {}".format(
                self.attn_type))

        # Data mask: input mask & perm mask
        assert input_mask is None or attention_mask is None, "You can only use one of input_mask (uses 1 for padding) "
        "or attention_mask (uses 0 for padding, added for compatibility with BERT). Please choose one."
        if input_mask is None and attention_mask is not None:
            input_mask = 1.0 - attention_mask
        if input_mask is not None and perm_mask is not None:
            data_mask = paddle.unsqueeze(input_mask, axis=0) + perm_mask
        elif input_mask is not None and perm_mask is None:
            data_mask = paddle.unsqueeze(input_mask, axis=0)
        elif input_mask is None and perm_mask is not None:
            data_mask = perm_mask
        else:
            data_mask = None

        if data_mask is not None:
            # All mems can be attended to
            if mlen > 0:
                mems_mask = paddle.cast(paddle.zeros(
                    [data_mask.shape[0], mlen, bsz]),
                                        dtype=dtype_float)
                data_mask = paddle.concat([mems_mask, data_mask], axis=1)
            if attn_mask is None:
                attn_mask = paddle.unsqueeze(data_mask, axis=-1)
            else:
                attn_mask += paddle.unsqueeze(data_mask, axis=-1)

        if attn_mask is not None:
            attn_mask = paddle.cast((attn_mask > 0), dtype=dtype_float)

        if attn_mask is not None:
            non_tgt_mask = paddle.cast(-paddle.eye(qlen), dtype=dtype_float)

            if mlen > 0:
                non_tgt_mask = paddle.concat([
                    paddle.cast(paddle.zeros([qlen, mlen]), dtype=dtype_float),
                    non_tgt_mask
                ],
                                             axis=-1)
            non_tgt_mask = paddle.cast((
                (attn_mask + paddle.unsqueeze(non_tgt_mask, axis=[2, 3])) > 0),
                                       dtype=dtype_float)
        else:
            non_tgt_mask = None

        # Word embeddings and prepare h & g hidden states
        if inputs_embeds is not None:
            word_emb_k = inputs_embeds
        else:
            word_emb_k = self.word_embedding(input_ids)

        output_h = self.dropout(word_emb_k)
        if target_mapping is not None:
            word_emb_q = self.mask_emb.expand(
                [target_mapping.shape[0], bsz, -1])
            output_g = self.dropout(word_emb_q)
        else:
            output_g = None

        # Segment embedding
        if token_type_ids is not None:
            # Convert `token_type_ids` to one-hot `seg_mat`
            if mlen > 0:
                mem_pad = paddle.zeros(shape=[mlen, bsz], dtype='int64')
                cat_ids = paddle.concat(x=[mem_pad, token_type_ids], axis=0)
            else:
                cat_ids = token_type_ids

            # `1` indicates not in the same segment [qlen x klen x bsz]
            seg_mat = paddle.cast(paddle.unsqueeze(token_type_ids, axis=1) !=
                                  paddle.unsqueeze(cat_ids, axis=0),
                                  dtype='int64')
            seg_mat = paddle.cast(F.one_hot(seg_mat, num_classes=2),
                                  dtype=dtype_float)
        else:
            seg_mat = None

        # Positional encoding
        pos_emb = self.relative_positional_encoding(qlen, klen, bsz=bsz)
        pos_emb = self.dropout(pos_emb)

        # Prepare head mask if needed
        # 1.0 in head_mask indicate we keep the head
        # Attention_probs has shape bsz x n_heads x N x N
        # Input head_mask has shape [num_heads] or [num_hidden_layers x num_heads] (a head_mask for each layer)
        # And head_mask is converted to shape [num_hidden_layers x qlen x klen x bsz x n_head]
        if head_mask is not None:
            if head_mask.dim() == 1:
                head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze(
                    0).unsqueeze(0)
                head_mask = head_mask.expand([self.n_layer, -1, -1, -1, -1])
            elif head_mask.dim() == 2:
                head_mask = head_mask.unsqueeze(1).unsqueeze(1).unsqueeze(1)
        else:
            head_mask = [None] * self.n_layer

        new_mems = ()
        if mems is None:
            mems = [None] * len(self.layer)

        attentions = [] if output_attentions else None
        hidden_states = [] if output_hidden_states else None
        for i, layer_module in enumerate(self.layer):
            if use_mems:
                # Cache new mems
                new_mems = new_mems + (self.cache_mem(output_h, mems[i]), )
            if output_hidden_states:
                hidden_states.append((
                    output_h, output_g) if output_g is not None else output_h)

            outputs = layer_module(
                output_h,
                output_g,
                attn_mask_h=non_tgt_mask,
                attn_mask_g=attn_mask,
                r=pos_emb,
                seg_mat=seg_mat,
                mems=mems[i],
                target_mapping=target_mapping,
                head_mask=head_mask[i],
                output_attentions=output_attentions,
            )
            output_h, output_g = outputs[:2]

            if output_attentions:
                attentions.append(outputs[2])

        # Add last hidden state
        if output_hidden_states:
            hidden_states.append((
                output_h, output_g) if output_g is not None else output_h)

        output = self.dropout(output_g if output_g is not None else output_h)

        # Prepare outputs, we transpose back here to shape [bsz, len, hidden_dim] (cf. beginning of forward() method)
        output = paddle.transpose(output, perm=[1, 0, 2])

        if not use_mems:
            new_mems = None

        if output_hidden_states:
            if output_g is not None:
                hidden_states = tuple(
                    paddle.transpose(h, perm=[1, 0, 2]) for hs in hidden_states
                    for h in hs)
            else:
                hidden_states = tuple(
                    paddle.transpose(hs, perm=[1, 0, 2])
                    for hs in hidden_states)

        if output_attentions:
            if target_mapping is not None:
                # When target_mapping is provided, there are 2-tuple of attentions
                attentions = tuple(
                    tuple(
                        paddle.transpose(att_stream, perm=[2, 3, 0, 1])
                        for att_stream in t) for t in attentions)
            else:
                attentions = tuple(
                    paddle.transpose(t, perm=[2, 3, 0, 1]) for t in attentions)

        if not return_dict:
            return tuple(
                v for v in [output, new_mems, hidden_states, attentions]
                if v is not None)
        return {
            "last_hidden_state": output,
            "mems": new_mems,
            "hidden_states": hidden_states,
            "attentions": attentions,
        }
Esempio n. 26
0
    def forward(self,
                input_ids=None,
                bbox=None,
                image=None,
                token_type_ids=None,
                position_ids=None,
                attention_mask=None,
                head_mask=None,
                output_hidden_states=None,
                output_attentions=None):
        input_shape = input_ids.shape

        visual_shape = list(input_shape)
        visual_shape[1] = self.config["image_feature_pool_shape"][
            0] * self.config["image_feature_pool_shape"][1]
        final_shape = list(input_shape)
        final_shape[1] += visual_shape[1]

        visual_bbox_x = (paddle.arange(
            0,
            1000 * (self.config["image_feature_pool_shape"][1] + 1),
            1000,
            dtype=bbox.dtype, ) // self.config["image_feature_pool_shape"][1])
        visual_bbox_y = (paddle.arange(
            0,
            1000 * (self.config["image_feature_pool_shape"][0] + 1),
            1000,
            dtype=bbox.dtype, ) // self.config["image_feature_pool_shape"][0])

        expand_shape = self.config["image_feature_pool_shape"][0:2]

        visual_bbox = paddle.stack(
            [
                visual_bbox_x[:-1].expand(expand_shape),
                visual_bbox_y[:-1].expand(expand_shape[::-1]).transpose([1, 0]),
                visual_bbox_x[1:].expand(expand_shape),
                visual_bbox_y[1:].expand(expand_shape[::-1]).transpose([1, 0]),
            ],
            axis=-1, ).reshape([-1, bbox.shape[-1]])
        visual_bbox = visual_bbox.expand([final_shape[0], -1, -1])
        final_bbox = paddle.concat([bbox, visual_bbox], axis=1)

        if attention_mask is None:
            attention_mask = paddle.ones(input_shape)

        visual_attention_mask = paddle.ones(visual_shape)

        attention_mask = attention_mask.astype(visual_attention_mask.dtype)

        final_attention_mask = paddle.concat(
            [attention_mask, visual_attention_mask], axis=1)

        if token_type_ids is None:
            token_type_ids = paddle.zeros(input_shape, dtype=paddle.int64)

        if position_ids is None:
            seq_length = input_shape[1]
            position_ids = self.embeddings.position_ids[:, :seq_length]
            position_ids = position_ids.expand_as(input_ids)

        visual_position_ids = paddle.arange(0, visual_shape[1]).expand(
            [input_shape[0], -1])
        final_position_ids = paddle.concat(
            [position_ids, visual_position_ids], axis=1)

        if bbox is None:
            bbox = paddle.zeros(input_shape + [4])

        text_layout_emb = self._calc_text_embeddings(
            input_ids=input_ids,
            bbox=bbox,
            token_type_ids=token_type_ids,
            position_ids=position_ids, )

        visual_emb = self._calc_img_embeddings(
            image=image,
            bbox=visual_bbox,
            position_ids=visual_position_ids, )
        final_emb = paddle.concat([text_layout_emb, visual_emb], axis=1)

        extended_attention_mask = final_attention_mask.unsqueeze(1).unsqueeze(2)

        extended_attention_mask = (1.0 - extended_attention_mask) * -10000.0

        if head_mask is not None:
            if head_mask.dim() == 1:
                head_mask = head_mask.unsqueeze(0).unsqueeze(0).unsqueeze(
                    -1).unsqueeze(-1)
                head_mask = head_mask.expand(self.config["num_hidden_layers"],
                                             -1, -1, -1, -1)
            elif head_mask.dim() == 2:
                head_mask = head_mask.unsqueeze(1).unsqueeze(-1).unsqueeze(-1)
            head_mask = head_mask.to(dtype=next(self.parameters()).dtype)
        else:
            head_mask = [None] * self.config["num_hidden_layers"]

        encoder_outputs = self.encoder(
            final_emb,
            extended_attention_mask,
            bbox=final_bbox,
            position_ids=final_position_ids,
            head_mask=head_mask,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states, )
        sequence_output = encoder_outputs[0]
        pooled_output = self.pooler(sequence_output)

        return sequence_output, pooled_output
def init_memory(batch_size, memory_length, d_model, n_layers):
    return [
        paddle.zeros([batch_size, memory_length, d_model], dtype="float32")
        for _ in range(n_layers)
    ]
Esempio n. 28
0
def compute_alpha(beta, t):
    beta = paddle.concat([paddle.zeros([1]), beta], 0)
    a = (1 - beta).cumprod(0).index_select(t + 1, 0).reshape([-1, 1, 1, 1])
    return a
Esempio n. 29
0
    def __init__(self, num_classes=50, max_point=2048):
        super(PointNet_Seg, self).__init__()
        self.max_point = max_point
        self.input_transform_net = nn.Sequential(
            nn.Conv1D(3, 64, 1),
            nn.BatchNorm(64),
            nn.ReLU(),
            nn.Conv1D(64, 128, 1),
            nn.BatchNorm(128),
            nn.ReLU(),
            nn.Conv1D(128, 1024, 1),
            nn.BatchNorm(1024),
            nn.ReLU(),
            nn.MaxPool1D(max_point)
        )
        self.input_fc = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 9,
                weight_attr=paddle.framework.ParamAttr(initializer=paddle.nn.initializer.Assign(paddle.zeros((256, 9)))),
                bias_attr=paddle.framework.ParamAttr(initializer=paddle.nn.initializer.Assign(paddle.reshape(paddle.eye(3), [-1])))
            )
        )
        self.mlp_1 = nn.Sequential(
            nn.Conv1D(3, 64, 1),
            nn.BatchNorm(64),
            nn.ReLU(),
            nn.Conv1D(64, 64, 1),
            nn.BatchNorm(64),
            nn.ReLU(),
        )
        self.feature_transform_net = nn.Sequential(
            nn.Conv1D(64, 64, 1),
            nn.BatchNorm(64),
            nn.ReLU(),
            nn.Conv1D(64, 128, 1),
            nn.BatchNorm(128),
            nn.ReLU(),
            nn.Conv1D(128, 1024, 1),
            nn.BatchNorm(1024),
            nn.ReLU(),

            nn.MaxPool1D(max_point)
        )
        self.feature_fc = nn.Sequential(
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 64*64)
        )
        self.mlp_2 = nn.Sequential(
            nn.Conv1D(64, 64, 1),
            nn.BatchNorm(64),
            nn.ReLU(),
            nn.Conv1D(64, 128, 1),
            nn.BatchNorm(128),
            nn.ReLU(),
            nn.Conv1D(128, 1024, 1),
            nn.BatchNorm(1024),
            nn.ReLU(),
        )
        self.seg_net = nn.Sequential(
            nn.Conv1D(1024+64, 512, 1),
            nn.BatchNorm(512),
            nn.ReLU(),
            nn.Conv1D(512, 256, 1),
            nn.BatchNorm(256),
            nn.ReLU(),
            nn.Conv1D(256, 128, 1),
            nn.BatchNorm(128),
            nn.ReLU(),
            nn.Conv1D(128, 128, 1),
            nn.BatchNorm(128),
            nn.ReLU(),
            nn.Conv1D(128, num_classes, 1)
        )
Esempio n. 30
0
    def _append_optimize_op(self, block, param_and_grad):
        assert isinstance(block, fluid.framework.Block)
        block.program._use_lamb = True

        m = moment1 = self._get_accumulator(self._moment1_acc_str,
                                            param_and_grad[0])
        v = self._get_accumulator(self._moment2_acc_str, param_and_grad[0])
        beta_1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
                                               param_and_grad[0])
        beta_2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
                                               param_and_grad[0])

        beta_1 = layers.fill_constant(dtype='float32',
                                      shape=[1],
                                      value=self._beta1,
                                      name='lamb_beta_1')
        beta_2 = layers.fill_constant(dtype='float32',
                                      shape=[1],
                                      value=self._beta2,
                                      name='lamb_beta_2')
        epsilon = layers.fill_constant(dtype='float32',
                                       shape=[1],
                                       value=self._epsilon,
                                       name='epsilon')

        one = paddle.ones(shape=[1]).astype('float32')
        zero = paddle.zeros(shape=[1]).astype('float32')

        next_m = paddle.multiply(m, beta_1) + paddle.multiply(
            param_and_grad[1], one - beta_1)
        next_v = paddle.multiply(v, beta_2) + paddle.multiply(
            paddle.pow(param_and_grad[1], 2), one - beta_2)

        beta1_correction = one - beta_1_pow_acc
        beta2_correction = one - beta_2_pow_acc

        next_m_unbiased = next_m / beta1_correction
        next_v_unbiased = next_v / beta2_correction

        update = next_m_unbiased / (paddle.sqrt(next_v_unbiased) + epsilon)

        if self._exclude_from_weight_decay_fn is not None and self._exclude_from_weight_decay_fn(
                param_and_grad[0]):
            self._lamb_weight_decay = 0.0
        update += self._lamb_weight_decay * param_and_grad[0]

        w_norm = paddle.norm(param_and_grad[0], p=2)
        g_norm = paddle.norm(update, p=2)

        learning_rate = self._create_param_lr(param_and_grad)

        ratio = paddle.where(
            paddle.greater_than(w_norm, zero),
            paddle.where(paddle.greater_than(g_norm, zero), (w_norm / g_norm),
                         one), one)
        update_with_lr = ratio * learning_rate * update
        next_param = param_and_grad[0] - update_with_lr

        beta_1_pow_acc *= beta_1
        beta_2_pow_acc *= beta_2

        paddle.assign(next_m, m)
        paddle.assign(next_v, v)
        paddle.assign(next_param, param_and_grad[0])

        return None