def nms(input_scores, input_boxes, threshold=0.7, numDetections=300, score_threshold=None, debugContext=''): load_lib() input_scores = input_scores.cast('FLOAT') input_boxes = input_boxes.cast('FLOAT') valid_area_mask = bF.transpose(get_valid_area_mask(input_boxes), [1, 0]) # 1,n input_scores = input_scores + 1e-6 # if score==0, proposals will be ignored local_input_scores = bF.identity(input_scores * valid_area_mask, debugContext=debugContext).detach() local_input_boxes = bF.identity(input_boxes, debugContext=debugContext).detach() if local_input_scores.shape.ndims == 1: local_input_scores = local_input_scores.unsqueeze(0) if local_input_boxes.shape.ndims == 2: local_input_boxes = local_input_boxes.unsqueeze(0) assert local_input_boxes.pureShape[0] == 1, 'only implemented batch=1' if score_threshold is not None: assert isinstance(score_threshold, float) local_mask = bF.greater( local_input_scores, bF.to_tensor(score_threshold, dtype=local_input_scores.dtype)) local_mask = bF.cast(local_mask, target_type=local_input_scores.dtype) local_input_scores = local_input_scores * local_mask with bF.name_scope("nms"): out = bF.get_builder().customOp(opName="nms", opVersion=1, domain="ai.graphcore", inputs=[ local_input_scores.getIpuIndex(), local_input_boxes.getIpuIndex() ], attributes={ "threshold": threshold, "numDetections": numDetections }, numOutputs=3, name="nmsCustomOp") # _, output_boxes, output_keep = out[0], bF.TTensor(out[1]), bF.TTensor( out[2]) targetType = input_scores.dtype roiKeeps_flag = bF.cast(bF.greater( output_keep, bF.constant(np.asarray(-1, dtype=np.int32))), target_type='INT32') num_valids = bF.reduceSum(roiKeeps_flag, axes=[1]) roiKeeps_flag = bF.cast(roiKeeps_flag, target_type=targetType) roiKeeps_flag = bF.unsqueeze(roiKeeps_flag, [-1]) output_boxes = bF.mul([output_boxes, roiKeeps_flag]) return output_boxes, output_keep, num_valids
def roi_align(bottom_data, bottom_rois, spatial_scale=1 / 16.0, num_rois=300, aligned_height=7, aligned_width=7, fp16_on=None): """roi_align implements.""" load_lib() assert isinstance(aligned_height, int) and isinstance( aligned_width, int), 'they should be int or IndexError: map::at will raised' cast_flag, bottom_data, fp16_on = bF.deduce_half(bottom_data, fp16_on) if fp16_on: bottom_rois = bottom_rois.cast('FLOAT16') else: bottom_rois = bottom_rois.cast('FLOAT') if fp16_on: raise NotImplementedError('maybe not implemented') # same as detectron2 roi_align version2(aligned=True and sampling_ratio=1) batch_size, channels, height, width = bottom_data.pureShape with bF.name_scope("roiAlign"): out = bF.get_builder().customOp( opName="roiAlign", opVersion=1, domain="ai.graphcore", inputs=[bottom_data.getIpuIndex(), bottom_rois.getIpuIndex()], attributes={ "spatial_scale": spatial_scale, "batch_size": batch_size, "num_rois": num_rois, "height": height, "width": width, "channels": channels, "aligned_height": aligned_height, "aligned_width": aligned_width }, numOutputs=1) result = bF.TTensor(out[0]) if cast_flag: result = result.cast(cast_flag) return result
def conv2d(input, filters, ksize=3, bias=True, train=True, strides=[1, 1], dilations=[1, 1], group=1, filters_data=None, bias_data=None, fp16_on=None, weights_fp16_on=None, padding_mode='same', debugContext='conv', bias_training=None): cast_flag, input, fp16_on = bF.deduce_half(input, fp16_on) batch, c_in, height, width = input.pureShape if debugContext != '': debugContext = debugContext + '/' weights_shape = [filters, c_in, ksize, ksize] if filters_data is not None: assert np.all( np.asarray(filters_data.shape) == np.asarray(weights_shape)) else: filters_data = np.ones(weights_shape, bF.mappin_gc2npy[input.dtype]) local_weights_fp16_on = fp16_on if bF.get_weight_fp16() is not None: local_weights_fp16_on = bF.get_weight_fp16() if weights_fp16_on is not None: local_weights_fp16_on = weights_fp16_on if input.dtype.upper() in ['FLOAT', 'FLOAT32'] and local_weights_fp16_on: raise RuntimeError('weights cannnot be fp16 while input is fp32') weights = temporary_init_weights(filters_data, debugContext + "weight", fp16_on=local_weights_fp16_on, train=train) if fp16_on and local_weights_fp16_on is False: if isinstance(weights, bF.ConstantTensor): # casting 32 to 16 might be different between IPU and numpy weights = bF.TTensor(weights.getIpuIndex()) weights = weights.cast('FLOAT16') # init bias bias_shape = [filters] if bias_data is not None: assert bias assert np.all(np.asarray(bias_data.shape) == np.asarray(bias_shape)) else: if bias: bias_data = np.zeros(bias_shape, bF.mappin_gc2npy[input.dtype]) else: bias_data = None if bias_data is not None: bias_training = train if bias_training is None else bias_training bias = temporary_init_weights(bias_data, debugContext + "bias", fp16_on=fp16_on, train=bias_training) else: bias = False if padding_mode == 'same': pads = [ksize // 2] * 4 elif padding_mode == 'valid': pads = [0] * 4 else: raise NotImplementedError result = bF._conv2d(input, weights, bias, strides=strides, pads=pads, dilations=dilations, group=group, debugContext=debugContext) if cast_flag: result = result.cast(cast_flag) return result