def infer(self, image_path): """Infers model on given image. Args: image_path (str): image to run object detection model on """ # Load image into CPU img = self._load_img(image_path) # Copy it into appropriate place into memory # (self.inputs was returned earlier by allocate_buffers()) np.copyto(self.inputs[0].host, img.ravel()) # When infering on single image, we measure inference # time to output it to the user inference_start_time = time.time() # Fetch output from the model [detection_out, keepCount_out] = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream) cur_time = int(round((time.time() - inference_start_time) * 1000)) # Output inference time print("TensorRT inference time: {} ms".format(cur_time)) # And return results return cur_time, detection_out, keepCount_out
def rest(self): np.copyto(self.inputs[0].host, self.numpy_array.ravel()) detections_out = np.zeros((0), dtype=np.float32) #keep_counts_out = np.zeros((0), dtype=np.int32) # go through file batch by batch for i in range(0, max_batch_size, max_batch_size): # batch detections [detection_out ] = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream, batch_size=self.trt_engine.max_batch_size) # because image index in the batch are 0 (to BATCH_SIZE-1) based need to add absolute index of batch to get absolute image index # each image gets max 200 object detections each gets 7 floats and image_id float index is at position 0, see TRT_PREDICTION_LAYOUT #for f in range(self.trt_engine.max_batch_size): #for c in range(keep_count_out[f]): #detection_out[(f * 200 + c) * 7 + 0] += i detections_out = np.append(detections_out, detection_out, axis=0) #keep_counts_out = np.append(keep_counts_out, keep_count_out, axis=0) return detections_out #, keep_counts_out
def infer_batch(self, img_np): """Infers model on batch of same sized images resized to fit the model. Args: image_np (numpy): image, that will be packed into batch and fed into model """ max_batch_size = self.trt_engine.max_batch_size numpy_array = img_np actual_batch_size = len(img_np) results = np.zeros(0, dtype=np.float32) batch_num = 0 for start_idx in range(0, actual_batch_size, max_batch_size): print("Loop #{}".format(start_idx + 1)) batch_num += 1 end_idx = min(start_idx + max_batch_size, actual_batch_size) effective_batch_size = end_idx - start_idx self.inputs[0].host = numpy_array[start_idx:start_idx + effective_batch_size] [result] = common.do_inference(self.context, self.bindings, self.inputs, self.outputs, self.stream, effective_batch_size) results = np.append(results, result) if actual_batch_size < max_batch_size: results = results[:int( len(results) * (actual_batch_size / max_batch_size))] return results
def main_yolov3tiny_test(): anchors = [[(81, 82), (135, 169), (344, 319)], [(10, 14), (23, 27), (37, 58)]] yolo1 = YOLO_NP(anchors[0], 2, 416) yolo2 = YOLO_NP(anchors[1], 2, 416) trt_engine = './weights/yolov3-mytiny_98_0.96_warehouse_3.trt' # 128 float16 0.4s # engine = load_engine(trt_engine) inputs, outputs, bindings, stream = common.allocate_buffers(engine) t1 = time.time() img, org_size = get_sample() # with engine.create_execution_context() as context: # case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. np.copyto(inputs[0].host, img) res = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) t2 = time.time() # print(len(res)) # print(res[0].shape,res[1].shape,res[2].shape) o1 = res[0].reshape((1, 21, 13, 13)) o2 = res[1].reshape((1, 21, 26, 26)) yolo_output1 = yolo1(o1) yolo_output2 = yolo2(o2) detections = np.concatenate([yolo_output1, yolo_output2], 1) # print(detections.shape) detections = non_max_suppression_np(detections, 0.5, 0.4)[0] # print('org_size',org_size) detections = rescale_boxes(np.array(detections), 416, org_size) t3 = time.time() print('detect res ', len(detections)) # print(detections) print('raw_foward', t2 - t1) print('with nms', t3 - t1)
def infer_batch(self, img_np): """Infers model on batch of same sized images resized to fit the model. Args: image_np (numpy): image, that will be packed into batch and fed into model """ max_batch_size = self.trt_engine.max_batch_size actual_batch_size = len(img_np) # Load all images to CPU... # for i in range(actual_batch_size): # self.numpy_array[i] = np.array(img_np[i]).ravel() self.numpy_array = img_np # ...copy them into appropriate place into memory... np.copyto(self.inputs[0].host, self.numpy_array.ravel()) # ...fetch model outputs... # all detections detections_out = np.zeros((0), dtype=np.float32) keep_counts_out = np.zeros((0), dtype=np.int32) # go through file batch by batch for i in range(0, max_batch_size, max_batch_size): # batch detections [detection_out, keep_count_out ] = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream, batch_size=self.trt_engine.max_batch_size) # because image index in the batch are 0 (to BATCH_SIZE-1) based need to add absolute index of batch to get absolute image index # each image gets max 200 object detections each gets 7 floats and image_id float index is at position 0, see TRT_PREDICTION_LAYOUT for f in range(self.trt_engine.max_batch_size): for c in range(keep_count_out[f]): detection_out[(f * 200 + c) * 7 + 0] += i detections_out = np.append(detections_out, detection_out, axis=0) keep_counts_out = np.append(keep_counts_out, keep_count_out, axis=0) return detections_out, keep_counts_out
def run(self, ori_im): image_raw, image = self.preprocessor.process(ori_im) shape_orig_WH = image_raw.size # print('type of image:', type(image)) self.inputs[0].host = image trt_outputs = common.do_inference(self.context, bindings=self.bindings, inputs=self.inputs, outputs=self.outputs, stream=self.stream) trt_outputs = [ output.reshape(shape) for output, shape in zip(trt_outputs, self.output_shapes) ] bbox_xywh, cls_ids, cls_conf = self.postprocessor.process( trt_outputs, (shape_orig_WH)) if bbox_xywh is not None: # select person class mask = cls_ids == 0 bbox_xywh = bbox_xywh[mask] bbox_xywh[:, 3:] *= 1.2 cls_conf = cls_conf[mask] # print('hahahat', bbox_xywh.dtype) # do tracking outputs = self.deepsort.update(bbox_xywh, cls_conf, ori_im) # draw boxes for visualization if len(outputs) > 0: bbox_xyxy = outputs[:, :4] identities = outputs[:, -1] ori_im = draw_boxes(ori_im, bbox_xyxy, identities) return ori_im
def main(): args = parse_args() cfg = CFG(args) ''' tensorrt.DataType.FLOAT tensorrt.float32 tensorrt.DataType.HALF tensorrt.float16 tensorrt.DataType.INT32 tensorrt.int32 tensorrt.DataType.INT8 tensorrt.int8 ''' # assert os.path.exists(args.model_path) output_shapes = (64, 21, 10, 16) input_img = cv2.imread('trump.jpg') # BGR , HWC ori_shape = input_img.shape print(ori_shape) input_img = input_img[:, :, [2, 1, 0]] # BGR - RGB , HWC # bgr = input_img[:,:,::-1] # RGB - BGR , HWC # cv2.imwrite("testing/test2.jpg",bgr) batch_img = list(np.tile(input_img, [64, 1, 1, 1])) # pre-processing print(1, 64, batch_img[0].shape) batch_img = batch_resize(batch_img) print(2, batch_img.shape) batch_img = normalize(batch_img) print(3, batch_img.shape) # TensorRT batch_img = np.array(batch_img, dtype=np.float32, order='C') with get_engine( args, cfg) as engine, engine.create_execution_context() as context: inputs, outputs, bindings, stream = common.allocate_buffers(engine) inputs[0].host = batch_img trt_outputs = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream, batch_size=args.batch_size) print(trt_outputs) trt_outputs = trt_outputs[0].reshape(output_shapes) np.save('trt_outputs.npy', trt_outputs) print(trt_outputs.shape) rs = trt_outputs[0] print(rs.shape) # om = torch.argmax(out.squeeze(), dim=0).detach().cpu().numpy() om = np.argmax(rs, axis=0) print(om.shape) rgb = decode_segmap(om) bgr = rgb[:, :, ::-1] # RGB - BGR # rgb = rgb[...,[2,0,1]] # RGB2BGR print('rgb', bgr.shape) frame = cv2.resize(bgr, (ori_shape[0], ori_shape[1]), interpolation=cv2.INTER_LINEAR) frame = np.transpose(frame, (1, 0, 2)) # BGR , HWC cv2.imwrite("testing/test.jpg", frame) # import matplotlib.pyplot as plt # plt.imshow(rgb); plt.show() exit() # batch_img = np.ascontiguousarray(batch_img) # temp_img = temp_img.flatten() # get_engine(args,cfg) # print(trt_outputs) # Before doing post-processing, we need to reshape the outputs as the common.do_inference will give us flat arrays. # print(trt_outputs.shape) # for trt_output in trt_outputs: # print(trt_output) # om = np.argmax(trt_outputs) # with open('testing/colors.txt') as infile: # classes = [line.split('\n')[0]for line in infile.readlines()] # classes = np.array([[int(x)for x in shape.split(" ")] for shape in classes]) # print(classes.shape) for idx, _class in enumerate(classes): ''' print(idx, _class) # frame = np.array([np.ones((10,16))* RGB for RGB in _class]) # print(trt_outputs[idx]) frame = np.multiply(trt_outputs[idx],_class.reshape(3,1,1)) # RGB , CHW print(frame.shape) print(frame) # frame = np.dot(frame,trt_outputs[0][idx]) # print(frame) # for idx,value in enumerate(trt_outputs[0]): frame = np.transpose(frame,(1,2,0)) # RGB , HWC print(frame.shape, ori_shape) frame = cv2.resize(frame, (ori_shape[0],ori_shape[1]), interpolation=cv2.INTER_LINEAR) # frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) frame = frame[...,[2,0,1]] # normalise frame *= (255.0/frame.max()) print(frame) # cv2.imwrite("testing/layer_{}.jpg".format(idx),frame) ''' temp = cv2.resize(trt_outputs[idx], (ori_shape[1], ori_shape[0]), interpolation=cv2.INTER_LINEAR) # temp += 100 # print(temp.max(),temp.min()) # cv2.imwrite("testing/layer_{}.jpg".format(idx),temp) # cv2.imwrite("testing/test.jpg",input_img[0]) # trt_outputs = [output.reshape(shape) for output, shape in zip(trt_outputs, output_shapes)] # print(load_onnx_model2) ''' # Template TRT_LOGGER = trt.Logger(trt.Logger.WARNING) builder = trt.Builder(TRT_LOGGER) network = builder.create_network() dataLayer = network.add_input('data',trt.DataType.FLOAT,(c,h,w)) # Add network layer network.mark_output(outputLayer.get_output(0)) engine = builder.build_cuda_engine(network) context = engine.create_execution_context() context.execute_async(bindings=[d_input,d_output]) ''' '''
def main_yolov3_test(): anchors = [ [(116, 90), (156, 198), (373, 326)], # 13*13 上预测最大的 [(30, 61), (62, 45), (59, 119)], # 26*26 上预测次大的 [(10, 13), (16, 30), (33, 23)], # 13*13 上预测最小的 ] yolo1 = YOLO_NP(anchors[0], 2, 416) yolo2 = YOLO_NP(anchors[1], 2, 416) yolo3 = YOLO_NP(anchors[2], 2, 416) img, org_size = get_sample() # img1,org_size1 = get_sample() # print(sum(img-img1)) # # time.sleep(100000) print(img.shape) # trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_2.trt' # 128 2.6s一张 # trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_3.trt' # 256 3.0s # trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_4.trt' # 64 2.8s trt_engine = './weights/yolov3-myyolov3_99_0.96_warehouse_5.trt' # 128 float16 0.4s # engine = load_engine(trt_engine) # inputs, outputs, bindings, stream = common.allocate_buffers(engine) # with engine.create_execution_context() as context: # case_num = load_random_test_case(mnist_model, pagelocked_buffer=inputs[0].host) # For more information on performing inference, refer to the introductory samples. # The common.do_inference function will return a list of outputs - we only have one in this case. np.copyto(inputs[0].host, img) t1 = time.time() res = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) t2 = time.time() # print(len(res)) # print(res[0].shape,res[1].shape,res[2].shape) o1 = res[0].reshape((1, 21, 13, 13)) o2 = res[1].reshape((1, 21, 26, 26)) o3 = res[2].reshape((1, 21, 52, 52)) yolo_output1 = yolo1(o1) yolo_output2 = yolo2(o2) yolo_output3 = yolo3(o3) detections = np.concatenate([yolo_output1, yolo_output2, yolo_output3], 1) # print(detections.shape) detections = non_max_suppression_np(detections, 0.5, 0.4)[0] # print('org_size',org_size) detections = rescale_boxes(np.array(detections), 416, org_size) t3 = time.time() # print('detect res ', len(detections)) # print(detections) print('raw_foward', t2 - t1) print('with nms', t3 - t1)