예제 #1
0
def nms(input_scores,
        input_boxes,
        threshold=0.7,
        numDetections=300,
        score_threshold=None,
        debugContext=''):
    load_lib()
    input_scores = input_scores.cast('FLOAT')
    input_boxes = input_boxes.cast('FLOAT')
    valid_area_mask = bF.transpose(get_valid_area_mask(input_boxes),
                                   [1, 0])  # 1,n
    input_scores = input_scores + 1e-6  # if score==0, proposals will be ignored
    local_input_scores = bF.identity(input_scores * valid_area_mask,
                                     debugContext=debugContext).detach()
    local_input_boxes = bF.identity(input_boxes,
                                    debugContext=debugContext).detach()

    if local_input_scores.shape.ndims == 1:
        local_input_scores = local_input_scores.unsqueeze(0)
    if local_input_boxes.shape.ndims == 2:
        local_input_boxes = local_input_boxes.unsqueeze(0)
    assert local_input_boxes.pureShape[0] == 1, 'only implemented batch=1'
    if score_threshold is not None:
        assert isinstance(score_threshold, float)
        local_mask = bF.greater(
            local_input_scores,
            bF.to_tensor(score_threshold, dtype=local_input_scores.dtype))
        local_mask = bF.cast(local_mask, target_type=local_input_scores.dtype)
        local_input_scores = local_input_scores * local_mask
    with bF.name_scope("nms"):
        out = bF.get_builder().customOp(opName="nms",
                                        opVersion=1,
                                        domain="ai.graphcore",
                                        inputs=[
                                            local_input_scores.getIpuIndex(),
                                            local_input_boxes.getIpuIndex()
                                        ],
                                        attributes={
                                            "threshold": threshold,
                                            "numDetections": numDetections
                                        },
                                        numOutputs=3,
                                        name="nmsCustomOp")
        #
        _, output_boxes, output_keep = out[0], bF.TTensor(out[1]), bF.TTensor(
            out[2])
        targetType = input_scores.dtype
        roiKeeps_flag = bF.cast(bF.greater(
            output_keep, bF.constant(np.asarray(-1, dtype=np.int32))),
            target_type='INT32')
        num_valids = bF.reduceSum(roiKeeps_flag, axes=[1])
        roiKeeps_flag = bF.cast(roiKeeps_flag, target_type=targetType)
        roiKeeps_flag = bF.unsqueeze(roiKeeps_flag, [-1])
        output_boxes = bF.mul([output_boxes, roiKeeps_flag])
    return output_boxes, output_keep, num_valids
예제 #2
0
def roi_align(bottom_data,
              bottom_rois,
              spatial_scale=1 / 16.0,
              num_rois=300,
              aligned_height=7,
              aligned_width=7,
              fp16_on=None):
    """roi_align implements."""

    load_lib()
    assert isinstance(aligned_height, int) and isinstance(
        aligned_width,
        int), 'they should be int or IndexError: map::at will raised'
    cast_flag, bottom_data, fp16_on = bF.deduce_half(bottom_data, fp16_on)
    if fp16_on:
        bottom_rois = bottom_rois.cast('FLOAT16')
    else:
        bottom_rois = bottom_rois.cast('FLOAT')

    if fp16_on:
        raise NotImplementedError('maybe not implemented')

    # same as detectron2 roi_align version2(aligned=True and sampling_ratio=1)
    batch_size, channels, height, width = bottom_data.pureShape
    with bF.name_scope("roiAlign"):
        out = bF.get_builder().customOp(
            opName="roiAlign",
            opVersion=1,
            domain="ai.graphcore",
            inputs=[bottom_data.getIpuIndex(),
                    bottom_rois.getIpuIndex()],
            attributes={
                "spatial_scale": spatial_scale,
                "batch_size": batch_size,
                "num_rois": num_rois,
                "height": height,
                "width": width,
                "channels": channels,
                "aligned_height": aligned_height,
                "aligned_width": aligned_width
            },
            numOutputs=1)
    result = bF.TTensor(out[0])

    if cast_flag:
        result = result.cast(cast_flag)

    return result
예제 #3
0
def conv2d(input,
           filters,
           ksize=3,
           bias=True,
           train=True,
           strides=[1, 1],
           dilations=[1, 1],
           group=1,
           filters_data=None,
           bias_data=None,
           fp16_on=None,
           weights_fp16_on=None,
           padding_mode='same',
           debugContext='conv',
           bias_training=None):

    cast_flag, input, fp16_on = bF.deduce_half(input, fp16_on)

    batch, c_in, height, width = input.pureShape
    if debugContext != '':
        debugContext = debugContext + '/'
    weights_shape = [filters, c_in, ksize, ksize]
    if filters_data is not None:
        assert np.all(
            np.asarray(filters_data.shape) == np.asarray(weights_shape))
    else:
        filters_data = np.ones(weights_shape, bF.mappin_gc2npy[input.dtype])
    local_weights_fp16_on = fp16_on
    if bF.get_weight_fp16() is not None:
        local_weights_fp16_on = bF.get_weight_fp16()
    if weights_fp16_on is not None:
        local_weights_fp16_on = weights_fp16_on
    if input.dtype.upper() in ['FLOAT', 'FLOAT32'] and local_weights_fp16_on:
        raise RuntimeError('weights cannnot be fp16 while input is fp32')
    weights = temporary_init_weights(filters_data,
                                     debugContext + "weight",
                                     fp16_on=local_weights_fp16_on,
                                     train=train)
    if fp16_on and local_weights_fp16_on is False:
        if isinstance(weights, bF.ConstantTensor):
            # casting 32 to 16 might be different between IPU and numpy
            weights = bF.TTensor(weights.getIpuIndex())
        weights = weights.cast('FLOAT16')

    # init bias
    bias_shape = [filters]
    if bias_data is not None:
        assert bias
        assert np.all(np.asarray(bias_data.shape) == np.asarray(bias_shape))
    else:
        if bias:
            bias_data = np.zeros(bias_shape, bF.mappin_gc2npy[input.dtype])
        else:
            bias_data = None
    if bias_data is not None:
        bias_training = train if bias_training is None else bias_training
        bias = temporary_init_weights(bias_data,
                                      debugContext + "bias",
                                      fp16_on=fp16_on,
                                      train=bias_training)
    else:
        bias = False

    if padding_mode == 'same':
        pads = [ksize // 2] * 4
    elif padding_mode == 'valid':
        pads = [0] * 4
    else:
        raise NotImplementedError

    result = bF._conv2d(input,
                        weights,
                        bias,
                        strides=strides,
                        pads=pads,
                        dilations=dilations,
                        group=group,
                        debugContext=debugContext)
    if cast_flag:
        result = result.cast(cast_flag)
    return result