Esempio n. 1
0
    def forward(self, inputs, targets):
        """
        Args:
            inputs (torch.Tensor): feature matrix with shape (batch_size, feat_dim).
            targets (torch.LongTensor): ground truth labels with shape (num_classes).
        """
        n = inputs.size(0)

        # Compute pairwise distance, replace by the official when merged
        dist = flow.pow(inputs, 2).sum(dim=1).expand(n, n)
        dist = dist + flow.transpose(dist, dim0=1, dim1=0)
        temp1 = -2 * flow.matmul(inputs, flow.transpose(inputs, dim0=1,
                                                        dim1=0))
        dist = flow.add(dist, temp1)
        dist = flow.sqrt(flow.clamp(dist, min=1e-12))
        # For each anchor, find the hardest positive and negative
        mask = targets.expand(n, n).eq(
            flow.transpose(targets.expand(n, n), dim0=1, dim1=0))
        dist_ap, dist_an = [], []
        y1 = flow.zeros((1, n), dtype=flow.float32).to("cuda")
        y2 = flow.Tensor(np.exp(100 * np.ones((1, n)))).to("cuda")

        for i in range(n):
            temp_dist = flow.slice(dist, [(i, i + 1, 1)])
            temp_mask = flow.slice(mask, [(i, i + 1, 1)])
            temp_mask_rev = flow.slice(1 - mask, [(i, i + 1, 1)])
            dist_ap.append(temp_mask.where(temp_dist, y1).max().unsqueeze(0))
            dist_an.append(
                temp_mask_rev.where(temp_dist, y2).min().unsqueeze(0))
        dist_ap = flow.cat(dist_ap)
        dist_an = flow.cat(dist_an)

        # Compute ranking hinge loss
        y = flow.ones_like(dist_an)
        return self.ranking_loss(dist_an, dist_ap, y)
Esempio n. 2
0
 def convert(box_xywh):
     box_xy = flow.slice(box_xywh,
                         begin=[None, None, None, None, None, 0],
                         size=[None, None, None, None, None, 2])
     box_wh = flow.slice(box_xywh,
                         begin=[None, None, None, None, None, 2],
                         size=[None, None, None, None, None, 2])
     box_lt = box_xy - box_wh * 0.5
     box_rb = box_xy + box_wh * 0.5
     box_lt = flow.math.minimum(box_lt, box_rb)
     box_rb = flow.math.maximum(box_lt, box_rb)
     return box_lt, box_rb
Esempio n. 3
0
def SQuAD(
    input_ids_blob,
    input_mask_blob,
    token_type_ids_blob,
    vocab_size,
    seq_length=512,
    hidden_size=768,
    num_hidden_layers=12,
    num_attention_heads=12,
    intermediate_size=3072,
    hidden_act="gelu",
    hidden_dropout_prob=0.1,
    attention_probs_dropout_prob=0.1,
    max_position_embeddings=512,
    type_vocab_size=16,
    initializer_range=0.02,
):

    backbone = bert_util.BertBackbone(
        input_ids_blob=input_ids_blob,
        input_mask_blob=input_mask_blob,
        token_type_ids_blob=token_type_ids_blob,
        vocab_size=vocab_size,
        seq_length=seq_length,
        hidden_size=hidden_size,
        num_hidden_layers=num_hidden_layers,
        num_attention_heads=num_attention_heads,
        intermediate_size=intermediate_size,
        hidden_act=hidden_act,
        hidden_dropout_prob=hidden_dropout_prob,
        attention_probs_dropout_prob=attention_probs_dropout_prob,
        max_position_embeddings=max_position_embeddings,
        type_vocab_size=type_vocab_size,
        initializer_range=initializer_range,
    )

    with flow.scope.namespace("cls-squad"):
        final_hidden = backbone.sequence_output()
        final_hidden_matrix = flow.reshape(final_hidden, [-1, hidden_size])
        logits = bert_util._FullyConnected(
            final_hidden_matrix,
            hidden_size,
            units=2,
            weight_initializer=bert_util.CreateInitializer(initializer_range),
            name='output')
        logits = flow.reshape(logits, [-1, seq_length, 2])

        start_logits = flow.slice(logits, [None, None, 0], [None, None, 1])
        end_logits = flow.slice(logits, [None, None, 1], [None, None, 1])

    return start_logits, end_logits
Esempio n. 4
0
def yolo_train_job():
    images, ground_truth, gt_valid_num = yolo_train_decoder(
        args.batch_size, args.image_height, args.image_width, args.classes,
        args.num_boxes, args.hue, args.jitter, args.saturation, args.exposure,
        args.dataset_dir, "yolo")
    gt_boxes = flow.slice(ground_truth, [None, 0, 0], [None, -1, 4],
                          name='gt_box')
    gt_labels = flow.cast(flow.slice(ground_truth, [None, 0, 4], [None, -1, 1],
                                     name='gt_label'),
                          dtype=flow.int32)
    yolo_loss_result, statistics_info_result = YoloTrainNet(
        images, gt_boxes, gt_labels, gt_valid_num, True)
    flow.losses.add_loss(yolo_loss_result[0])
    flow.losses.add_loss(yolo_loss_result[1])
    flow.losses.add_loss(yolo_loss_result[2])
    return yolo_loss_result, statistics_info_result
Esempio n. 5
0
    def __call__(self, x, training, mask):
        # Sequence length
        seq_len = x.shape[1]

        # Embedding
        with flow.scope.namespace("Encoder_Embedding"):
            x = EmbeddingLayer(x,
                               vocab_size=self.vocab_size,
                               embedding_size=self.d_model)
            d_model_constant = flow.constant_scalar(value=self.d_model,
                                                    dtype=flow.float32,
                                                    name="d_model_constant")
            x *= flow.math.sqrt(d_model_constant)

        # Position encoding
        with flow.scope.namespace("Encoder_Position_encoding"):
            # equal to self.pos_encoding[:, :seq_len, :]
            pos_encoding = flow.slice(self.pos_encoding,
                                      begin=[None, 0, None],
                                      size=[None, seq_len, None])
            x += pos_encoding
            if training:
                x = flow.nn.dropout(x, rate=self.rate)

        # Encoding
        with flow.scope.namespace("Encoder_Multi_encoder"):
            for i in range(self.num_layers):
                with flow.scope.namespace('encoder_{}'.format(i)):
                    x = self.enc_layers[i](x, training, mask)

        return x
Esempio n. 6
0
File: conv.py Progetto: zzk0/oneflow
def slice(x, begin, size):
    ndim = len(x.shape)
    if not isinstance(begin, (list, tuple)) or len(begin) != ndim:
        raise ValueError(
            "begin must be a list/tuple with the same length as input tensor's number of dimensions"
        )
    if not all((isinstance(b, int) or b is None for b in begin)):
        raise ValueError("element of begin must be a int or None")
    if not isinstance(size, (list, tuple)) or len(size) != ndim:
        raise ValueError(
            "size must be a list/tuple with the same length as input tensor's number of dimensions."
        )
    if not all((isinstance(s, int) or s is None for s in size)):
        raise ValueError("element of size must be a int or None")
    slice_tup_list = []
    for (b, s, dim_size) in zip(begin, size, x.shape):
        (start, stop, step) = (None, None, 1)
        if b is not None:
            if b < -dim_size or b >= dim_size:
                raise ValueError("element of begin is out of range")
            start = b
        if s is not None:
            if s == -1:
                stop = dim_size
            else:
                if s <= 0 or s > dim_size:
                    raise ValueError("element of size is invalid")
                if b + s < dim_size:
                    stop = b + s
        slice_tup_list.append((start, stop, step))
    return flow.slice(x, slice_tup_list)
Esempio n. 7
0
def _test_slice_4_dim(test_case, device):
    np_arr = np.random.randn(5, 3, 6, 9).astype(np.float32)
    x = flow.tensor(np_arr, device=flow.device(device))
    tup_list = [[0, 5, 2], [None, None, None], [0, 5, 2], [0, 6, 3]]
    y = flow.slice(x, slice_tup_list=tup_list)
    tmp = np_arr[0:5, 0:3, 0:5, 0:6]
    np_out = tmp[::2, ::1, ::2, ::3]
    test_case.assertTrue(np.array_equal(y.numpy(), np_out))
Esempio n. 8
0
def transformer_train_job(input: tp.Numpy.Placeholder(
    shape=(params.batch_size, params.max_length), dtype=flow.int64),
                          target: tp.Numpy.Placeholder(
                              shape=(params.batch_size, params.max_length),
                              dtype=flow.int64)) -> tp.Numpy:
    """
    The transformer training Job
    :param input: The input Sequence, we fix the shape to (_batch_size, _max_length)
    :param target: The target Sequence, we fix the shape to (_batch_size, _max_length)
    :return: Return the loss value.
    """
    sample_transformer = Transformer(
        num_layers=6,
        d_model=512,
        num_heads=8,
        dff=2048,
        input_vocab_size=params.TARGET_VOCAB_SIZE,
        target_vocab_size=params.TARGET_VOCAB_SIZE,
        pe_input=params.TARGET_VOCAB_SIZE,
        pe_target=params.TARGET_VOCAB_SIZE)

    tar_inp = flow.slice(target,
                         begin=[None, 1],
                         size=[None,
                               params.max_length - 1])  # (batch, seq_len - 1)
    tar_real = flow.slice(target,
                          begin=[None, 0],
                          size=[None,
                                params.max_length - 1])  # (batch, seq_len - 1)
    enc_padding_mask, combined_mask, dec_padding_mask = create_masks(
        input, tar_inp)

    prediction, _ = sample_transformer(input,
                                       tar_inp,
                                       training=False,
                                       enc_padding_mask=enc_padding_mask,
                                       look_ahead_mask=combined_mask,
                                       dec_padding_mask=dec_padding_mask)

    loss = loss_function(tar_real, prediction)

    lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.01])
    flow.optimizer.Adam(lr_scheduler).minimize(loss)

    return loss
Esempio n. 9
0
def _test_slice_empty(test_case, placement, sbp):
    dims = [random(1, 2) * 8 for _ in range(2)]
    input = random_tensor(2, *dims)
    x = input.to_global(placement=placement, sbp=sbp)
    slice_tup_list = [[3, 3, 1], [None, None, None]]
    of_out = flow.slice(x.oneflow, slice_tup_list=slice_tup_list)
    torch_out = x.pytorch[3:3:1, :]

    _check_forward_and_backward(test_case, input, of_out, torch_out)
Esempio n. 10
0
def _test_slice_backward(test_case, device):
    np_arr = np.random.randn(3, 6, 9).astype(np.float32)
    x = flow.tensor(np_arr, device=flow.device(device), requires_grad=True)
    tup_list = [[None, None, None], [0, 5, 2], [0, 6, 3]]
    y = flow.slice(x, slice_tup_list=tup_list)
    z = y.sum()
    z.backward()
    np_grad = np.zeros((3, 6, 9))
    np_grad[0:3, 0:5, 0:6][::1, ::2, ::3] = 1
    test_case.assertTrue(np.array_equal(x.grad.numpy(), np_grad))
Esempio n. 11
0
    def bbox_giou(self, boxes1, boxes2):
        ''' (x, y, w, h)
        :param boxes1: [N, H, W, 3, 4]  (x, y, w, h)
        :param boxes2:  [N, H, W, 3, 4]  (x, y, w, h)
        :return: [N, H, W, 3, 1]
        '''
        def convert(box_xywh):
            box_xy = flow.slice(box_xywh,
                                begin=[None, None, None, None, 0],
                                size=[None, None, None, None, 2])
            box_wh = flow.slice(box_xywh,
                                begin=[None, None, None, None, 2],
                                size=[None, None, None, None, 2])
            box_lt = box_xy - box_wh * 0.5
            box_rb = box_xy + box_wh * 0.5
            box_lt = flow.math.minimum(box_lt, box_rb)
            box_rb = flow.math.maximum(box_lt, box_rb)
            return box_lt, box_rb

        boxes1_lt, boxes1_rb = convert(boxes1)
        boxes1_wh = boxes1_rb - boxes1_lt
        # boxes1_wh = flow.math.clip_by_value(boxes1_rb - boxes1_lt, min_value=0)
        boxes1_area = flow.slice(boxes1_wh, begin=[None, None, None, None, 0], size=[None, None, None, None, 1]) * \
                      flow.slice(boxes1_wh, begin=[None, None, None, None, 1], size=[None, None, None, None, 1])

        boxes2_lt, boxes2_rb = convert(boxes2)
        boxes2_wh = boxes2_rb - boxes2_lt
        # boxes2_wh = flow.math.clip_by_value(boxes2_rb - boxes2_lt, min_value=0)
        boxes2_area = flow.slice(boxes2_wh, begin=[None, None, None, None, 0], size=[None, None, None, None, 1]) * \
                      flow.slice(boxes2_wh, begin=[None, None, None, None, 1], size=[None, None, None, None, 1])

        left_up = flow.math.maximum(boxes1_lt, boxes2_lt)
        right_down = flow.math.minimum(boxes1_rb, boxes2_rb)

        inter_section_wh = flow.math.clip_by_value(right_down - left_up,
                                                   min_value=0.0)
        inter_area = flow.slice(inter_section_wh, begin=[None, None, None, None, 0], size=[None, None, None, None, 1]) * \
                     flow.slice(inter_section_wh, begin=[None, None, None, None, 1], size=[None, None, None, None, 1])
        union_area = boxes1_area + boxes2_area - inter_area
        iou = inter_area / (union_area + 1e-6)
        # added 1e-6 in denominator to avoid generation of inf, which may cause nan loss

        enclose_left_up = flow.math.minimum(boxes1_lt, boxes2_lt)
        enclose_right_down = flow.math.maximum(boxes1_rb, boxes2_rb)
        enclose_wh = flow.math.clip_by_value(enclose_right_down -
                                             enclose_left_up,
                                             min_value=0.0)
        enclose_area = flow.slice(enclose_wh, begin=[None, None, None, None, 0], size=[None, None, None, None, 1]) * \
                       flow.slice(enclose_wh, begin=[None, None, None, None, 1], size=[None, None, None, None, 1])
        giou = iou - 1.0 * (enclose_area - union_area) / (enclose_area + 1e-6)
        # added 1e-6 in denominator to avoid generation of inf, which may cause nan loss

        return giou
Esempio n. 12
0
def nonzero_op(input, as_tuple=False):
    if as_tuple and not input.ndim:
        input = input.unsqueeze(0)
    (res, size) = flow._C.argwhere(input)
    slice_tup_list = [[0, int(size.numpy()), 1]]
    res = flow.slice(res, slice_tup_list=slice_tup_list)
    if as_tuple:
        return tuple(
            [flow._C.transpose(res, [1, 0])[x] for x in range(res.shape[1])])
    else:
        return res
Esempio n. 13
0
 def split(cls, x, axis, split_num):
     split_len = x.shape[axis] // split_num
     result_list = []
     slice_begin = [0] * len(x.shape)
     slice_size = [-1] * len(x.shape)
     slice_size[axis] = split_len
     for i in range(split_num):
         slice_begin[axis] = i * split_len
         result = flow.slice(x, slice_begin, slice_size)
         result_list.append(result)
     return result_list
Esempio n. 14
0
def YoloTrainLayer(in_blob, gt_bbox_blob, gt_label_blob, gt_valid_num_blob, i):
    global layer_number
    layer_name = 'yolo-layer' + str(layer_number)
    # placeholder for a reshape from (n,h,w,255)->(n,h,w*3,85)
    blob = flow.transpose(in_blob,
                          name=layer_name + '-yolo_transpose',
                          perm=[0, 2, 3, 1])
    reshape_blob = flow.reshape(blob,
                                shape=(blob.shape[0], -1, 85),
                                name=layer_name + '-yolo_reshape')
    position = flow.slice(reshape_blob, [None, 0, 0], [None, -1, 4],
                          name=layer_name + '-yolo_slice_pos')
    xy = flow.slice(position, [None, 0, 0], [None, -1, 2],
                    name=layer_name + '-yolo_slice_xy')
    wh = flow.slice(position, [None, 0, 2], [None, -1, 2],
                    name=layer_name + '-yolo_slice_wh')
    xy = logistic(xy, name=layer_name + '-yolo_ligistic_xy')
    # xy = flow.math.sigmoid(xy, name = layer_name + '-yolo_ligistic_xy')
    position = flow.concat([xy, wh], axis=2, name=layer_name + '-yolo_concat')
    confidence = flow.slice(reshape_blob, [None, 0, 4], [None, -1, 81],
                            name=layer_name + '-yolo_slice_prob')
    confidence = logistic(confidence, name=layer_name + '-yolo_ligistic_prob')
    # confidence = flow.math.sigmoid(confidence, name = layer_name+ '-yolo_ligistic_prob')

    objness = flow.slice(confidence, [None, 0, 0], [None, -1, 1],
                         name=layer_name + '-yolo_slice_objness')
    clsprob = flow.slice(confidence, [None, 0, 1], [None, -1, 80],
                         name=layer_name + '-yolo_slice_clsprob')
    bbox_loc_diff, pos_inds, pos_cls_label, neg_inds, valid_num, statistics_info = yolo_box_diff(
        position,
        gt_bbox_blob,
        gt_label_blob,
        gt_valid_num_blob,
        image_height=yolo_box_diff_conf[i]['image_height'],
        image_width=yolo_box_diff_conf[i]['image_width'],
        layer_height=yolo_box_diff_conf[i]['layer_height'],
        layer_width=yolo_box_diff_conf[i]['layer_width'],
        ignore_thresh=yolo_box_diff_conf[i]['ignore_thresh'],
        truth_thresh=yolo_box_diff_conf[i]['truth_thresh'],
        box_mask=yolo_box_diff_conf[i]['box_mask'],
        anchor_boxes_size=yolo_box_diff_conf[i]['anchor_boxes_size'],
        name=layer_name + '-yolo_box_loss')  # placeholder for yolobox layer
    bbox_objness_out, bbox_clsprob_out = yolo_prob_loss(objness,
                                                        clsprob,
                                                        pos_inds,
                                                        pos_cls_label,
                                                        neg_inds,
                                                        valid_num,
                                                        num_classes=80,
                                                        name=layer_name +
                                                        '-yolo_prob_loss')
    bbox_loss = flow.concat(
        [bbox_loc_diff, bbox_objness_out, bbox_clsprob_out],
        axis=2,
        name=layer_name + '-loss_concat')
    bbox_loss_reduce_sum = flow.math.reduce_sum(bbox_loss,
                                                axis=[1, 2],
                                                name=layer_name +
                                                '-bbox_loss_reduce_sum')
    return bbox_loss_reduce_sum, statistics_info
Esempio n. 15
0
    def call(self, y_pred, target, target_weight):
        batch_size = y_pred.shape[0]
        num_of_joints = y_pred.shape[-1]

        pred = flow.reshape(x=y_pred, shape=(batch_size, -1, num_of_joints))

        heatmap_pred_list = []
        for i in range(num_of_joints):
            tensor = flow.slice(pred,
                                begin=[None, None, i * 1],
                                size=[None, None, 1])
            heatmap_pred_list.append(tensor)

        gt = flow.reshape(x=target, shape=(batch_size, -1, num_of_joints))

        heatmap_gt_list = []
        for i in range(num_of_joints):
            tensor = flow.slice(gt,
                                begin=[None, None, i * 1],
                                size=[None, None, 1])
            heatmap_gt_list.append(tensor)

        loss = 0.0
        for i in range(num_of_joints):
            heatmap_pred = flow.squeeze(heatmap_pred_list[i])
            heatmap_gt = flow.squeeze(heatmap_gt_list[i])

            y_true = heatmap_pred * flow.reshape(
                flow.slice(target_weight,
                           begin=[None, i * 1, None],
                           size=[None, 1, None]), [batch_size, 1])

            y_pred = heatmap_gt * flow.reshape(
                flow.slice(target_weight,
                           begin=[None, i * 1, None],
                           size=[None, 1, None]), [batch_size, 1])

            loss += 0.5 * flow.nn.MSELoss(y_true, y_pred, reduction="mean")

        return loss / num_of_joints
Esempio n. 16
0
    def total_variance_loss(self, images, weight):
        assert images.shape == (
            self.batch_size, 3, self.hr_size,
            self.hr_size), "The shape of generated images is {}.".format(
                images.shape)

        def size_num(inputs):
            return inputs.shape[1] * inputs.shape[2] * inputs.shape[3]

        count_h = size_num(
            flow.slice(images, [None, 0, 1, 0],
                       [None, 3, self.hr_size, self.hr_size]))
        count_w = size_num(
            flow.slice(images, [None, 0, 0, 1],
                       [None, 3, self.hr_size, self.hr_size]))

        h_tv = flow.math.reduce_sum(
            flow.math.squared_difference(
                flow.slice(images, [None, 0, 1, 0],
                           [None, 3, self.hr_size, self.hr_size]),
                flow.slice(images, [None, 0, 0, 0],
                           [None, 3, self.hr_size - 1, self.hr_size])))

        w_tv = flow.math.reduce_sum(
            flow.math.squared_difference(
                flow.slice(images, [None, 0, 0, 1],
                           [None, 3, self.hr_size, self.hr_size]),
                flow.slice(images, [None, 0, 0, 0],
                           [None, 3, self.hr_size, self.hr_size - 1])))

        return weight * 2 * (h_tv / count_h + w_tv / count_w) / images.shape[0]
Esempio n. 17
0
def PooledOutput(sequence_output, hidden_size, initializer_range):
    with flow.scope.namespace("bert-pooler"):
        first_token_tensor = flow.slice(sequence_output, [None, 0, 0], [None, 1, -1])
        first_token_tensor = flow.reshape(first_token_tensor, [-1, hidden_size])
        pooled_output = bert_util._FullyConnected(
            first_token_tensor,
            input_size=hidden_size,
            units=hidden_size,
            weight_initializer=bert_util.CreateInitializer(initializer_range),
            name="dense",
        )
        pooled_output = flow.math.tanh(pooled_output)
    return pooled_output
Esempio n. 18
0
def YoloPredictLayer(in_blob, origin_image_info, i, trainable):
    global layer_number
    layer_name = 'yolo-layer' + str(layer_number)
    #placeholder for a reshape from (n,h,w,255)->(n,h,w*3,85)
    blob = flow.transpose(in_blob,
                          name=layer_name + '-yolo_transpose',
                          perm=[0, 2, 3, 1])
    reshape_blob = flow.reshape(blob,
                                shape=(blob.shape[0], -1, 85),
                                name=layer_name + '-yolo_reshape')
    position = flow.slice(reshape_blob, [None, 0, 0], [None, -1, 4],
                          name=layer_name + '-yolo_slice_pos')
    xy = flow.slice(position, [None, 0, 0], [None, -1, 2],
                    name=layer_name + '-yolo_slice_xy')
    wh = flow.slice(position, [None, 0, 2], [None, -1, 2],
                    name=layer_name + '-yolo_slice_wh')
    xy = flow.math.sigmoid(xy, name=layer_name + '-yolo_ligistic_xy')
    position = flow.concat([xy, wh], axis=2, name=layer_name + '-yolo_concat')
    confidence = flow.slice(reshape_blob, [None, 0, 4], [None, -1, 81],
                            name=layer_name + '-yolo_slice_prob')
    confidence = flow.math.sigmoid(confidence,
                                   name=layer_name + '-yolo_ligistic_prob')
    #[out_bbox, out_probs, valid_num] = flow.detection.yolo_detect(bbox=position, probs=confidence, origin_image_info=origin_image_info, image_height=608, image_width=608, layer_height=yolo_conf[i]['layer_height'], layer_width=yolo_conf[i]['layer_width'], prob_thresh=0.5, num_classes=80, max_out_boxes = max_out_boxes, anchor_boxes=yolo_conf[i]['anchor_boxes_size'])
    [out_bbox, out_probs, valid_num
     ] = flow.yolo_detect(bbox=position,
                          probs=confidence,
                          origin_image_info=origin_image_info,
                          image_height=608,
                          image_width=608,
                          layer_height=yolo_conf[i]['layer_height'],
                          layer_width=yolo_conf[i]['layer_width'],
                          prob_thresh=0.5,
                          num_classes=80,
                          max_out_boxes=max_out_boxes,
                          anchor_boxes=yolo_conf[i]['anchor_boxes_size'],
                          name=str(layer_name) + "yolo_detect")
    #print("out_bbox.shape",out_bbox.shape)
    return out_bbox, out_probs, valid_num
Esempio n. 19
0
    def __call__(self, x, enc_output, training, look_ahead_mask, padding_mask):
        """
        Forward
        :param x: The input X
        :param pos_encoding: The positional encoding
        :param enc_output: The encoder output
        :param training: Whether training
        :param look_ahead_mask: The look ahead mask
        :param padding_mask: The padding mask
        :return:
        """
        # Sequence length
        seq_len = x.shape[1]
        attention_weights = {}

        # Embedding
        with flow.scope.namespace("Decoder_Embedding"):
            x = EmbeddingLayer(x,
                               vocab_size=self.target_vocab_size,
                               embedding_size=self.d_model)
            d_model_constant = flow.constant(self.d_model,
                                             dtype=flow.float32,
                                             shape=(1,))
            x *= flow.math.sqrt(d_model_constant)
            # print(x.shape)

        # Position encoding
        with flow.scope.namespace("Decoder_Position_encoding"):
            pos_encoding = flow.slice(self.pos_encoding,
                                      begin=[None, 0, None],
                                      size=[None, seq_len, None])
            x += pos_encoding
            if training:
                x = flow.nn.dropout(x,
                                    rate=self.rate)

        # Decoding
        with flow.scope.namespace("Decoder_Multi_decoder"):
            for i in range(self.num_layers):
                with flow.scope.namespace('decoder_{}'.format(i)):
                    x, block1, block2 = self.dec_layers[i](x, enc_output, training,
                                                           look_ahead_mask, padding_mask)

                    attention_weights['decoder_layer{}_block1'.format(i + 1)] = block1
                    attention_weights['decoder_layer{}_block2'.format(i + 1)] = block2

        return x, attention_weights
Esempio n. 20
0
def _EmbeddingPostprocessor(
    input_blob,
    seq_length,
    embedding_size,
    use_token_type=False,
    token_type_ids_blob=None,
    token_type_vocab_size=16,
    token_type_embedding_name="token_type_embeddings",
    use_position_embeddings=True,
    position_embedding_name="position_embeddings",
    initializer_range=0.02,
    max_position_embeddings=512,
    dropout_prob=0.1,
):
    output = input_blob

    if use_token_type:
        assert token_type_ids_blob is not None
        token_type_table = flow.get_variable(
            name=token_type_embedding_name,
            shape=[token_type_vocab_size, embedding_size],
            dtype=input_blob.dtype,
            initializer=CreateInitializer(initializer_range),
        )
        token_type_embeddings = flow.gather(
            params=token_type_table, indices=token_type_ids_blob, axis=0
        )
        output = output + token_type_embeddings

    if use_position_embeddings:
        position_table = flow.get_variable(
            name=position_embedding_name,
            shape=[1, max_position_embeddings, embedding_size],
            dtype=input_blob.dtype,
            initializer=CreateInitializer(initializer_range),
        )
        assert seq_length <= max_position_embeddings
        if seq_length != max_position_embeddings:
            position_table = flow.slice(
                position_table, begin=[None, 0, 0], size=[None, seq_length, -1]
            )
        output = output + position_table

    output = _LayerNorm(output, embedding_size)
    output = _Dropout(output, dropout_prob)

    return output
Esempio n. 21
0
    def self_attn_qk_v_fw_bw(h: flow.typing.Numpy.Placeholder(
        shape=(seq_len, batch_size, hidden_size),
        dtype=flow.float32)) -> typing.Tuple[flow.typing.Numpy,
                                             flow.typing.Numpy]:
        var = flow.get_variable(
            "var",
            shape=(1, ),
            dtype=flow.float32,
            initializer=flow.constant_initializer(1.0, dtype=flow.float32),
            trainable=True,
        )
        h = h * var

        # save grad
        if fused:
            flow.watch_diff(h, test_global_storage.Setter("h_grad_fused"))
        else:
            flow.watch_diff(h, test_global_storage.Setter("h_grad"))

        if fp16:
            h = flow.amp_white_identity(h)

        alpha = get_alpha(head_size)

        if fused:
            qmk, v = flow.nn.fused_self_attention_query_mul_key_and_value(
                h, head_size=head_size, alpha=alpha)
        else:
            # (s, b, H) -> (s, b, n, 3 * h) -> (s, b, n, h) -> (b, n, s, h)
            h = flow.reshape(h, (seq_len, batch_size, -1, 3 * head_size))
            q, k, v = (flow.transpose(
                flow.slice(
                    h,
                    begin=[None, None, None, head_size * i],
                    size=[None, None, None, head_size],
                ),
                perm=[1, 2, 0, 3],
            ) for i in range(3))
            qmk = flow.matmul(q, k, transpose_b=True, alpha=alpha)

        # calc loss for grad
        h = flow.matmul(qmk, v)
        loss = flow.math.reduce_sum(h)
        flow.optimizer.SGD(get_lr_scheduler(), momentum=0).minimize(loss)

        return qmk, v
Esempio n. 22
0
def slice_wrapper(tensor, slice_tuple: Tuple[int, int, int]):
    with flow.no_grad():
        ndim = tensor.ndim
        slice_tuple_list = [slice_tuple] + [[None, None, None]] * (ndim - 1)
        # TODO(): a kind 'slice op' supports both local and consistent tensor
        if tensor.is_consistent:
            # input is s0, output is p
            # input is b, output is b
            # input is p, output is p
            # so 'to b' is not needed here
            tensor = flow.logical_slice(tensor, slice_tuple_list)
        else:
            tensor = flow.slice(tensor, slice_tuple_list)
        # TODO(): flow.sequeeze will fail in some consistent tensor case
        if tensor.shape[0] == 1 and ndim > 1:
            tensor = tensor.reshape(list(tensor.shape[1:]))
        return tensor
Esempio n. 23
0
def GPT(idx, config, target=None):
    b, t = idx.shape
    assert t <= config.block_size, "Cannot forward, model block size is exhausted."

    #forward the GPT model
    #token_embeddings = flow.layers.dense
    word_embedding = flow.get_variable(
        'word_emb',
        initializer=flow.random_normal_initializer(),
        shape=(config.vocab_size, config.n_embd))
    token_embeddings = flow.gather(word_embedding, idx)

    #positions embedding
    pos_emb = flow.get_variable(name='pos_emb',
                                shape=(1, config.block_size, config.n_embd),
                                dtype=flow.float32,
                                initializer=flow.zeros_initializer())
    #position_embeddings = fpos_emb[:, :t, :] # each position maps to a (learnable) vector
    position_embeddings = flow.slice(pos_emb, [None, 0, None], [None, t, None])
    x = flow.nn.dropout((token_embeddings + position_embeddings),
                        config.embd_pdrop)

    #Blocks
    for block_id in range(config.n_layer):
        with flow.scope.namespace('Block' + str(block_id)):
            x = Block(x, config)

    x = flow.layers.layer_norm(x, name='output_layernorm')

    logits = flow.layers.dense(x,
                               config.vocab_size,
                               use_bias=False,
                               activation=flow.zeros_initializer(),
                               name='output_logits')

    loss = None
    if target is not None:
        #TODO
        logits = flow.reshape(logits, [-1, config.vocab_size])
        target = flow.reshape(target, [-1])
        target = flow.one_hot(target,
                              depth=config.vocab_size,
                              dtype=flow.float32)
        loss = flow.nn.softmax_cross_entropy_with_logits(logits, target)
    return logits, loss
Esempio n. 24
0
def argwhere_op(input, dtype: Optional[flow.dtype] = flow.int32):
    """This operator finds the indices of input Tensor `input` elements that are non-zero. 

    It returns a list in which each element is a coordinate that points to a non-zero element in the condition.

    Args:
        input (oneflow.Tensor): The input Tensor.
        dtype (Optional[flow.dtype], optional): The data type of output. Defaults to None.

    Returns:
        oneflow.Tensor: The result Tensor.

    For example:

    .. code-block:: python

        >>> import numpy as np
        >>> import oneflow as flow
        >>> x = np.array([[0, 1, 0],
        ...            [2, 0, 2]]).astype(np.float32)
        
        >>> input = flow.Tensor(x)
        >>> output = flow.argwhere(input)
        >>> output
        tensor([[0, 1],
                [1, 0],
                [1, 2]], dtype=oneflow.int32)

    """

    if input.is_consistent:
        raise ValueError(
            "A consistent tensor can not be applied to argwhere, and use `tensor.to_local()` to convert it to local tensor first."
        )

    (res, size) = flow._C.argwhere(input, dtype=dtype)
    if input.is_lazy:
        raise NotImplementedError
        # return flow._C.sync_dynamic_resize(res, size, dim=0)
    else:
        slice_tup_list = [(0, size.numpy().item(), 1)]
        return flow.slice(res, slice_tup_list=slice_tup_list)
Esempio n. 25
0
    def bbox_iou(self, boxes1, boxes2):
        '''
        :param boxes1: [N, H, W, 3, 1, 4]  (x, y, w, h)
        :param boxes2:  [N, 1, 1, 1, V 4]  (x, y, w, h)
        :return: [N, H, W, 3, V, 1]
        '''
        def convert(box_xywh):
            box_xy = flow.slice(box_xywh,
                                begin=[None, None, None, None, None, 0],
                                size=[None, None, None, None, None, 2])
            box_wh = flow.slice(box_xywh,
                                begin=[None, None, None, None, None, 2],
                                size=[None, None, None, None, None, 2])
            box_lt = box_xy - box_wh * 0.5
            box_rb = box_xy + box_wh * 0.5
            box_lt = flow.math.minimum(box_lt, box_rb)
            box_rb = flow.math.maximum(box_lt, box_rb)
            return box_lt, box_rb

        boxes1_lt, boxes1_rb = convert(boxes1)
        boxes1_wh = boxes1_rb - boxes1_lt
        boxes1_area = flow.slice(boxes1_wh, begin=[None, None, None, None, None, 0],
                                 size=[None, None, None, None, None, 1]) * \
                      flow.slice(boxes1_wh, begin=[None, None, None, None, None, 1],
                                 size=[None, None, None, None, None, 1])

        boxes2_lt, boxes2_rb = convert(boxes2)
        boxes2_wh = boxes2_rb - boxes2_lt
        boxes2_area = flow.slice(boxes2_wh, begin=[None, None, None, None, None, 0],
                                 size=[None, None, None, None, None, 1]) * \
                      flow.slice(boxes2_wh, begin=[None, None, None, None, None, 1],
                                 size=[None, None, None, None, None, 1])

        left_up = flow.math.maximum(boxes1_lt, boxes2_lt)
        right_down = flow.math.minimum(boxes1_rb, boxes2_rb)

        inter_section_wh = flow.math.clip_by_value(right_down - left_up,
                                                   min_value=0.0)
        inter_area = flow.slice(inter_section_wh, begin=[None, None, None, None, None, 0],
                                size=[None, None, None, None, None, 1]) * \
                     flow.slice(inter_section_wh, begin=[None, None, None, None, None, 1],
                                size=[None, None, None, None, None, 1])
        union_area = boxes1_area + boxes2_area - inter_area
        iou = 1.0 * inter_area / (union_area + 1e-6)

        return iou
Esempio n. 26
0
    def build_network(self,inputs):
        b,c,t,h,w=inputs.shape
        N=self.time_dim
        templist=[]
        for i in range(N):
            tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f')    
            if i!=N//2:
                out = flow.range(t, dtype=flow.int64)
                one = flow.constant_like(out, i, dtype= flow.int64)
                out=flow.math.add(out, one)
                out=flow.expand_dims(out,axis=0)
                templist.append(out)
        neighbor_time_index=flow.concat(templist,axis=0)
        neighbor_time_index=flow.transpose(neighbor_time_index,[1,0])
        neighbor_time_index=flow.flatten(neighbor_time_index, start_dim=0, end_dim=-1)


    
        # feature map registration
        tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f')    

        init=flow.kaiming_initializer(shape=inputs.shape,mode="fan_out",nonlinearity="relu")
        semantic=conv3d_layer("conv_semantic_"+tempname,inputs,self.out_channels,
            kernel_size=1,use_bias=False,padding="VALID",trainable=self.trainable,
            weight_initializer=init
        )

        inputs_norm=flow.math.l2_normalize(
            semantic,axis=1
        )


        inputs_norm_padding=flow.pad(inputs_norm,paddings=[
            (0,0),(0,0),((self.time_dim-1)//2,(self.time_dim-1)//2), (0,0),(0,0)]
        )
        inputs_norm_expand=flow.expand_dims(inputs_norm,axis=3)
        temp_inputs_norm_expand=inputs_norm_expand
        for i in range(N-2):
            inputs_norm_expand=flow.concat(
               inputs=[ inputs_norm_expand,temp_inputs_norm_expand],
                axis=3
            )
       
        inputs_norm_expand=flow.transpose(inputs_norm_expand,perm=[0, 2, 3, 4, 5, 1])
        inputs_norm_expand=flow.reshape(inputs_norm_expand,shape=[-1, h*w, c//16])

        slice_list=[]
        for index in  neighbor_time_index:
            temp=flow.slice(
                inputs_norm_padding,
                begin=[None,None,int(index),None,None],
                size=[None,None,1,None,None]
            )      
            slice_list.append(temp)
        neighbor_norm=flow.concat(
            slice_list,axis=2
        )
        neighbor_norm=flow.transpose(neighbor_norm,perm=[0, 2, 1, 3, 4])
        neighbor_norm=flow.reshape(neighbor_norm,shape=[-1, c//16, h*w])

        similarity=flow.matmul(inputs_norm_expand,neighbor_norm)*self.temperature
        similarity=nn.softmax(similarity,axis=-1)

        inputs_padding=flow.pad(inputs,
        paddings=[
            (0,0),(0,0),((self.time_dim-1)//2,(self.time_dim-1)//2), (0,0),(0,0)]
        ) 
        slice_list=[]
        for index in  neighbor_time_index:
            temp=flow.slice(
                inputs_padding,
                begin=[None,None,int(index),None,None],
                size=[None,None,1,None,None]
            )      
            slice_list.append(temp)
        neighbor=flow.concat(
            slice_list,axis=2
        )
        neighbor=flow.transpose(neighbor,perm=[0,2,3,4,1])
        neighbor=flow.reshape(neighbor,shape=[-1, h*w, c]) 

        neighbor_new=flow.matmul(similarity,neighbor)
        neighbor_new=flow.reshape(neighbor_new,shape=[b, t*(N-1), h, w, c])
        neighbor_new=flow.transpose(neighbor_new,perm=[0, 4, 1, 2, 3])

         # contrastive attention
        if self.contrastive_att:        
            temp_input=flow.expand_dims(inputs,axis=3)
            temp_temp_input=temp_input
            for i in range(N-2):
                temp_input=flow.concat(
                inputs=[ temp_input,temp_temp_input],
                axis=3
            )
            temp_input=flow.reshape(temp_input,shape=[b, c, (N-1)*t, h, w])
            input_att=conv3d_layer(
                "conv3d_inputmapping_"+tempname,temp_input,self.out_channels,
                kernel_size=1, use_bias=False,trainable=False,weight_initializer=flow.kaiming_initializer(shape=temp_input.shape,mode="fan_out",nonlinearity="relu")
            )

            n_att=conv3d_layer(
                "conv3d_nmapping_"+tempname,neighbor_new,self.out_channels,
                kernel_size=1, use_bias=False,trainable=False,weight_initializer=flow.kaiming_initializer(shape=neighbor_new.shape,mode="fan_out",nonlinearity="relu")
            )
            temp_input=input_att*n_att
            contrastive_att_net=conv3d_layer(
                "conv3d_att_net_"+tempname,temp_input,1,
                kernel_size=1, use_bias=False,trainable=self.trainable,weight_initializer=flow.kaiming_initializer(shape=temp_input.shape,mode="fan_out",nonlinearity="relu")
            )
            contrastive_att_net=flow.math.sigmoid(contrastive_att_net)
            neighbor_new=flow.math.multiply(
                neighbor_new,contrastive_att_net
            )
        # integrating feature maps

        
        init = flow.zeros_initializer()
        tempname=datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S.%f')    

        input_offset = flow.get_variable(
            "input_offset_"+tempname,
            shape=(b, c, N*t, h, w),
            initializer=init,
            dtype=inputs.dtype,
            trainable=self.trainable)
        with flow.scope.placement("cpu", "0:0"):

        input_index=np.array(
            [i for i in range(t*N) if i%N==N//2]
        )
        neighbor_index=np.array(
            [i for i in range(t*N) if i%N!=N//2])
        input_offset_list=[]
        inputs_list=[]
        neighbor_new_list=[]
        for index in  range(input_offset.shape[2]):
            temp=flow.slice(
                input_offset,
                begin=[None,None,int(index),None,None],
                size=[None,None,1,None,None]
            )  
            input_offset_list.append(temp)
        for index in range(inputs.shape[2]):
            temp=flow.slice(
                inputs,
                begin=[None,None,int(index),None,None],
                size=[None,None,1,None,None]
            )
            inputs_list.append(temp)
        for index in range(neighbor_new.shape[2]):
            temp=flow.slice(
                neighbor_new,
                begin=[None,None,int(index),None,None],
                size=[None,None,1,None,None]
            )
            neighbor_new_list.append(temp)
        temp_index=0
        for index in input_index:
            input_offset_list[index]+=inputs_list[temp_index]
            temp_index+=1

        temp_index=0
        for index in neighbor_index:
            input_offset_list[index]+=neighbor_new_list[temp_index]
            temp_index+=1
        input_offset=flow.concat(
            input_offset_list,axis=2
        )

        return input_offset
Esempio n. 27
0
 def forward(self, x):
     tup_list = [[None, None, None], [0, 5, 2], [0, 6, 3]]
     out = flow.slice(x, slice_tup_list=tup_list)
     return out
Esempio n. 28
0
def lstm(input,
         units,
         return_sequence=False,
         initial_state=None,
         direction='forward',
         layer_index=0,
         is_train=True):
    '''
       input: sequence input tensor with shape [batch_size,sequence_length,embedding size]
       units: hidden units numbers
    '''
    batch_size = input.shape[0]
    seq_len = input.shape[1]
    input_size = input.shape[2]

    dtype = flow.float32
    with flow.scope.namespace('layer' + str(layer_index)):
        with flow.scope.namespace(direction):
            weight_blob_i = flow.get_variable(
                name='input' + '-weight',
                shape=[input_size, units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.glorot_normal_initializer())

            weight_blob_ih = flow.get_variable(
                name='input' + '-h-weight',
                shape=[units, units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.glorot_normal_initializer())

            bias_blob_i = flow.get_variable(
                name='input' + '-bias',
                shape=[units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.constant_initializer(0.0))

            weight_blob_f = flow.get_variable(
                name='forget' + '-weight',
                shape=[input_size, units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.glorot_normal_initializer())

            weight_blob_fh = flow.get_variable(
                name='forget' + '-h-weight',
                shape=[units, units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.glorot_normal_initializer())

            bias_blob_f = flow.get_variable(
                name='forget' + '-bias',
                shape=[units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.constant_initializer(0.0))

            weight_blob_c = flow.get_variable(
                name='cell' + '-weight',
                shape=[input_size, units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.glorot_normal_initializer())

            weight_blob_ch = flow.get_variable(
                name='cell' + '-h-weight',
                shape=[units, units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.glorot_normal_initializer())

            bias_blob_c = flow.get_variable(
                name='cell' + '-bias',
                shape=[units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.constant_initializer(0.0))

            weight_blob_o = flow.get_variable(
                name='output' + '-weight',
                shape=[input_size, units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.glorot_normal_initializer())

            weight_blob_oh = flow.get_variable(
                name='output' + '-h-weight',
                shape=[units, units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.glorot_normal_initializer())

            bias_blob_o = flow.get_variable(
                name='output' + '-bias',
                shape=[units],
                dtype=dtype,
                trainable=is_train,
                initializer=flow.constant_initializer(0.0))

    flow.watch(weight_blob_i, test_global_storage.Setter("weight_blob_i"))
    flow.watch(weight_blob_f, test_global_storage.Setter("weight_blob_f"))
    flow.watch(weight_blob_c, test_global_storage.Setter("weight_blob_c"))
    flow.watch(weight_blob_o, test_global_storage.Setter("weight_blob_o"))

    flow.watch(weight_blob_ih, test_global_storage.Setter("weight_blob_ih"))
    flow.watch(weight_blob_fh, test_global_storage.Setter("weight_blob_fh"))
    flow.watch(weight_blob_ch, test_global_storage.Setter("weight_blob_ch"))
    flow.watch(weight_blob_oh, test_global_storage.Setter("weight_blob_oh"))

    flow.watch(bias_blob_i, test_global_storage.Setter("bias_blob_i"))
    flow.watch(bias_blob_f, test_global_storage.Setter("bias_blob_f"))
    flow.watch(bias_blob_c, test_global_storage.Setter("bias_blob_c"))
    flow.watch(bias_blob_o, test_global_storage.Setter("bias_blob_o"))

    def step_function(input, states):

        hx = states[0]
        cx = states[1]

        x_i = _FullyConnected(input, weight_blob_i, bias_blob_i)  # input gate
        mark_int = x_i
        x_f = _FullyConnected(input, weight_blob_f, bias_blob_f)  # forget gate
        x_c = _FullyConnected(input, weight_blob_c, bias_blob_c)  # cell state
        x_o = _FullyConnected(input, weight_blob_o, bias_blob_o)  # output gate

        h_i = _FullyConnected(hx, weight_blob_ih, None)
        h_f = _FullyConnected(hx, weight_blob_fh, None)
        h_c = _FullyConnected(hx, weight_blob_ch, None)
        h_o = _FullyConnected(hx, weight_blob_oh, None)

        x_i = x_i + h_i
        x_f = x_f + h_f
        x_c = x_c + h_c
        x_o = x_o + h_o

        x_i = flow.math.sigmoid(x_i)
        x_f = flow.math.sigmoid(x_f)
        cellgate = flow.math.tanh(x_c)
        x_o = flow.math.sigmoid(x_o)

        cy = x_f * cx + x_i * cellgate

        hy = x_o * flow.math.tanh(cy)

        return hy, (hy, cy)

    if initial_state:
        states = initial_state
    else:
        states = [
            flow.constant(0, dtype=flow.float32, shape=[batch_size, units]),
            flow.constant(0, dtype=flow.float32, shape=[batch_size, units])
        ]

    successive_outputs = []
    successive_states = []

    for index in range(seq_len):
        # print('time step:',index)
        inp = flow.slice(input, [None, index, 0], [None, 1, input_size])
        # print(inp.shape)
        inp = flow.reshape(inp, [-1, input_size])
        # print(inp.shape)
        output, states = step_function(inp, states)

        output = flow.reshape(output, [-1, 1, units])
        successive_outputs.append(output)
        successive_states.append(states)
    last_output = successive_outputs[-1]
    new_states = successive_states[-1]
    outputs = flow.concat(successive_outputs, axis=1)

    if return_sequence:
        return outputs
    else:
        return flow.reshape(last_output, [-1, units])
Esempio n. 29
0
    def build_network(self, inputs):
        b, c, t, h, w = inputs.shape
        N = self.time_dim
        templist = [np.arange(0, t) + i for i in range(N) if i != N // 2]
        templist = np.expand_dims(templist, axis=0)
        neighbor_time_index = np.concatenate(templist, axis=0)
        # neighbor_time_index=flow.concat(
        #     templist,axis=0
        # )
        neighbor_time_index = np.transpose(neighbor_time_index)
        neighbor_time_index = np.ndarray.flatten(neighbor_time_index)
        #寻找tensor.long的代替(把tensor变成longtensor)
        #tensor 中long 是64整形
        neighbor_time_index = np.int64(neighbor_time_index)

        semantic = conv3d_layer("conv_semantic_",
                                inputs,
                                self.out_channels,
                                kernel_size=1,
                                use_bias=False,
                                padding="SAME")

        inputs_norm = flow.math.l2_normalize(semantic, axis=1)

        inputs_norm_padding = flow.pad(inputs_norm,
                                       paddings=[(0, 0), (0, 0),
                                                 ((self.time_dim - 1) // 2,
                                                  (self.time_dim - 1) // 2),
                                                 (0, 0), (0, 0)])
        inputs_norm_expand = flow.expand_dims(inputs_norm, axis=3)
        temp_inputs_norm_expand = inputs_norm_expand
        for i in range(N - 2):
            inputs_norm_expand = flow.concat(
                inputs=[inputs_norm_expand, temp_inputs_norm_expand], axis=3)

        #inputs_norm_expand=flow.transpose(inputs_norm_expand,perm=[0, 2, 3, 4, 5, 1])
        print("inputs_norm_expand", inputs_norm_expand.shape)
        inputs_norm_expand = flow.reshape(
            inputs_norm_expand,
            (inputs_norm_expand.shape[0], inputs_norm_expand.shape[2],
             inputs_norm_expand.shape[3], inputs_norm_expand.shape[4],
             inputs_norm_expand.shape[5], inputs_norm_expand.shape[1]))
        inputs_norm_expand = flow.reshape(inputs_norm_expand,
                                          shape=[-1, h * w, c // 16])

        slice_list = []
        for index in neighbor_time_index:
            temp = flow.slice(
                inputs_norm_padding,
                begin=[None, None, int(index), None, None],
                #size=[None,slice_shape[1],1,slice_shape[3],slice_shape[4]]
                size=[None, None, 1, None, None])
            slice_list.append(temp)
            neighbor_norm = flow.concat(slice_list, axis=2)
        neighbor_norm = flow.transpose(neighbor_norm, perm=[0, 2, 1, 3, 4])
        #inputs_norm_expand=flow.reshape(neighbor_norm,(neighbor_norm.shape[0],neighbor_norm.shape[2],neighbor_norm.shape[3],neighbor_norm.shape[4],neighbor_norm.shape[5],neighbor_norm.shape[1]))

        neighbor_norm = flow.reshape(neighbor_norm, shape=[-1, c // 16, h * w])
        similarity = flow.matmul(inputs_norm_expand,
                                 neighbor_norm) * self.temperature
        similarity = nn.softmax(similarity, axis=-1)
        inputs_padding = flow.pad(inputs,
                                  paddings=[(0, 0), (0, 0),
                                            ((self.time_dim - 1) // 2,
                                             (self.time_dim - 1) // 2), (0, 0),
                                            (0, 0)])
        #neighbor=inputs_padding[:, :, neighbor_time_index, :, :]
        slice_list = []
        for index in neighbor_time_index:
            temp = flow.slice(inputs_padding,
                              begin=[None, None,
                                     int(index), None, None],
                              size=[None, None, 1, None, None])
            slice_list.append(temp)
        neighbor = flow.concat(slice_list, axis=2)
        neighbor = flow.transpose(neighbor, perm=[0, 2, 3, 4, 1])

        neighbor = flow.reshape(neighbor, shape=[-1, h * w, c])

        neighbor_new = flow.matmul(similarity, neighbor)
        neighbor_new = flow.reshape(neighbor_new,
                                    shape=[b, t * (N - 1), h, w, c])
        neighbor_new = flow.transpose(neighbor_new, perm=[0, 4, 1, 2, 3])
        if self.contrastive_att:

            temp_input = flow.expand_dims(inputs, axis=3)
            temp_temp_input = temp_input
            temp_input = flow.concat(inputs=[temp_input, temp_temp_input],
                                     axis=3)
            temp_input = flow.reshape(temp_input,
                                      shape=[b, c, (N - 1) * t, h, w])
            input_att = conv3d_layer("conv3d_inputmapping",
                                     temp_input,
                                     self.out_channels,
                                     kernel_size=1,
                                     use_bias=False,
                                     trainable=False)

            n_att = conv3d_layer("conv3d_nmapping",
                                 neighbor_new,
                                 self.out_channels,
                                 kernel_size=1,
                                 use_bias=False,
                                 trainable=False)
            contrastive_att_net = conv3d_layer("conv3d_att_net",
                                               input_att * n_att,
                                               self.out_channels,
                                               kernel_size=1,
                                               use_bias=False)
            constastive_att = flow.math.sigmoid(contrastive_att_net)
            neighbor_new = neighbor_new * self.contrastive_att

            #device 暂时先空着了
        input_offset = np.zeros([b, c, N * t, h, w], dtype=np.float)

        init = flow.zeros_initializer()
        input_offset = flow.get_variable("input_offset",
                                         shape=(b, c, N * t, h, w),
                                         initializer=init,
                                         dtype=inputs.dtype,
                                         trainable=True)
        input_index = np.array([i for i in range(t * N) if i % N == N // 2])
        neighbor_index = np.array([i for i in range(t * N) if i % N != N // 2])
        # print("inputs: ",inputs.shape)
        # print("input_index:",input_index)
        # print("input_index_len:",len(input_index))
        print("input_offset:", input_offset.shape)
        input_offset_list = []
        inputs_list = []
        neighbor_new_list = []
        for index in range(input_offset.shape[2]):
            temp = flow.slice(input_offset,
                              begin=[None, None,
                                     int(index), None, None],
                              size=[None, None, 1, None, None])
            input_offset_list.append(temp)
        for index in range(inputs.shape[2]):
            temp = flow.slice(inputs,
                              begin=[None, None,
                                     int(index), None, None],
                              size=[None, None, 1, None, None])
            inputs_list.append(temp)
        for index in range(neighbor_new.shape[2]):
            temp = flow.slice(neighbor_new,
                              begin=[None, None,
                                     int(index), None, None],
                              size=[None, None, 1, None, None])
            neighbor_new_list.append(temp)
        temp_index = 0
        for index in input_index:
            input_offset_list[index] += inputs_list[temp_index]
            temp_index += 1
        # print("neighbor_new:",neighbor_new.shape)
        # print("neighbor_index:",neighbor_index.shape)
        temp_index = 0
        for index in neighbor_index:
            input_offset_list[index] += neighbor_new_list[temp_index]
            temp_index += 1
        # print("before",input_offset.shape)
        input_offset = flow.concat(input_offset_list, axis=2)
        print("after", input_offset.shape)

        return input_offset
def _test_fused_self_attention(test_case, batch_size, seq_len, num_heads,
                               head_size):
    hidden_size = num_heads * 3 * head_size

    x = np.random.randn(seq_len, batch_size, hidden_size)
    fused_input = flow.Tensor(x).to("cuda")
    fused_input.requires_grad = True
    (fused_qmk, fused_v) = flow._C.fused_self_attention(
        fused_input,
        head_size=head_size,
        alpha=1.0,
    )
    fused_atten = flow.matmul(fused_qmk, fused_v)
    fused_atten_sum = fused_atten.sum()

    origin_input = flow.Tensor(x).to("cuda")
    origin_input.requires_grad = True
    reshape_input = flow.reshape(origin_input,
                                 (seq_len, batch_size, -1, 3 * head_size))

    origin_q = flow.slice(
        reshape_input,
        slice_tup_list=[
            [None, None, None],
            [None, None, None],
            [None, None, None],
            [0, head_size, 1],
        ],
    ).permute(1, 2, 0, 3)
    origin_k = flow.slice(
        reshape_input,
        slice_tup_list=[
            [None, None, None],
            [None, None, None],
            [None, None, None],
            [head_size, 2 * head_size, 1],
        ],
    ).permute(1, 2, 0, 3)
    origin_v = flow.slice(
        reshape_input,
        slice_tup_list=[
            [None, None, None],
            [None, None, None],
            [None, None, None],
            [2 * head_size, 3 * head_size, 1],
        ],
    ).permute(1, 2, 0, 3)

    origin_k = origin_k.transpose(2, 3)
    origin_qmk = flow.matmul(origin_q, origin_k)
    origin_atten = flow.matmul(origin_qmk, origin_v)
    origin_atten_sum = origin_atten.sum()

    total_sum = fused_atten_sum + origin_atten_sum
    total_sum.backward()

    test_case.assertTrue(
        np.allclose(fused_atten.numpy(),
                    origin_atten.numpy(),
                    atol=1e-4,
                    rtol=1e-4))
    test_case.assertTrue(
        np.allclose(
            fused_input.grad.numpy(),
            origin_input.grad.numpy(),
            atol=1e-4,
            rtol=1e-4,
        ))