def test_transforms_presets_yolo(): im_fname = gcv.utils.download('https://github.com/dmlc/web-data/blob/master/' + 'gluoncv/detection/biking.jpg?raw=true', path='biking.jpg') x, orig_img = yolo.load_test(im_fname, short=512) x1, orig_img1 = yolo.transform_test(mx.image.imread(im_fname), short=512) np.testing.assert_allclose(x.asnumpy(), x1.asnumpy()) np.testing.assert_allclose(orig_img, orig_img1) if not osp.isdir(osp.expanduser('~/.mxnet/datasets/voc')): return train_dataset = gcv.data.VOCDetection(splits=((2007, 'trainval'), (2012, 'trainval'))) val_dataset = gcv.data.VOCDetection(splits=[(2007, 'test')]) width, height = (512, 512) net = gcv.model_zoo.get_model('yolo3_darknet53_voc', pretrained=False, pretrained_base=False) net.initialize() num_workers = 0 batch_size = 4 batchify_fn = Tuple(*([Stack() for _ in range(6)] + [Pad(axis=0, pad_val=-1) for _ in range(1)])) train_loader = gluon.data.DataLoader( train_dataset.transform(yolo.YOLO3DefaultTrainTransform(width, height, net)), batch_size, True, batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers) val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1)) val_loader = gluon.data.DataLoader( val_dataset.transform(yolo.YOLO3DefaultValTransform(width, height)), batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep', num_workers=num_workers) train_loader2 = gluon.data.DataLoader( train_dataset.transform(yolo.YOLO3DefaultTrainTransform(width, height)), batch_size, True, batchify_fn=val_batchify_fn, last_batch='rollover', num_workers=num_workers) for loader in [train_loader, val_loader, train_loader2]: for i, batch in enumerate(loader): if i > 1: break pass
def test_transforms_presets_yolo(): im_fname = gcv.utils.download('https://github.com/dmlc/web-data/blob/master/' + 'gluoncv/detection/biking.jpg?raw=true', path='biking.jpg') x, orig_img = yolo.load_test(im_fname, short=512) x1, orig_img1 = yolo.transform_test(mx.image.imread(im_fname), short=512) np.testing.assert_allclose(x.asnumpy(), x1.asnumpy()) np.testing.assert_allclose(orig_img, orig_img1) if not osp.isdir(osp.expanduser('~/.mxnet/datasets/voc')): return train_dataset = VOCDetectionTiny() val_dataset = VOCDetectionTiny(splits=[('tiny_motorbike', 'test')]) width, height = (512, 512) net = gcv.model_zoo.get_model('yolo3_darknet53_voc', pretrained=False, pretrained_base=False) net.initialize() num_workers = 0 batch_size = 4 batchify_fn = Tuple(*([Stack() for _ in range(6)] + [Pad(axis=0, pad_val=-1) for _ in range(1)])) train_loader = gluon.data.DataLoader( train_dataset.transform(yolo.YOLO3DefaultTrainTransform(width, height, net)), batch_size, True, batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers) val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1)) val_loader = gluon.data.DataLoader( val_dataset.transform(yolo.YOLO3DefaultValTransform(width, height)), batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep', num_workers=num_workers) train_loader2 = gluon.data.DataLoader( train_dataset.transform(yolo.YOLO3DefaultTrainTransform(width, height)), batch_size, True, batchify_fn=val_batchify_fn, last_batch='rollover', num_workers=num_workers) for loader in [train_loader, val_loader, train_loader2]: for i, batch in enumerate(loader): if i > 1: break pass
def detectObjects(model_name, input_pic): # Load Model, assume pretrained net = get_model(model_name, pretrained=True) # Load Images img = image.imread("images/" + input_pic) # Transform img, chw_im = transform_test(img) pred = net(img) # Assume only one image in batch, use first array only pred = [array[0] for array in pred] # Unpack tuple into each array class_ind, prob, bounds = pred gcv.utils.viz.plot_bbox(chw_im, bounds, prob, class_ind, class_names=net.classes)
def preprocess(self, batch): img_arr = mx.img.imdecode(batch[0]['body']) x, img = transform_test([img_arr], short=512) return x, img
def detect_camera2(camera_id, output_path, thread): global camera_time_list global camera_all_list global camera_no_list global camera_yes_list import cv2 cap = cv2.VideoCapture(0) cap.set(3, 640) cap.set(4, 480) success, frame = cap.read() if not success: print('Failed to read video') sys.exit(1) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() img = nd.array(frame, dtype='uint8') x, img = transform_test(img, short=416) x = x.as_in_context(ctx) box_ids, scores, bboxes = net(x) video_size = (img.shape[1], img.shape[0]) frame, hat, person, colors, hat_num, person_num = sel_track( img, bboxes[0], scores[0], box_ids[0], trackerType="MEDIANFLOW") curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 cv2.putText(frame, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(255, 0, 0), thickness=2) cv2.imshow('MultiTracker', frame) ## 输出人数,戴安全帽的人数,未佩戴安全帽的人数 print("person:{}, hat: {}, person without hat: {}".format( hat_num + person_num, hat_num, person_num)) all = hat_num + person_num yes = hat_num no = person_num num_str = "person:{}, hat: {}, person without hat: {}".format( hat_num + person_num, hat_num, person_num) thread._signal.emit(num_str, frame, all, yes, no) ## 系统时间 theTime = datetime.datetime.now() ## 输出系统时间 print("system time: ", theTime) # cursor = conn.cursor() # insert_data_sql = 'insert into `%s`(detect_time,allp,yesp,nop)' % thread.table_name + ' values (%s,%s,%s,%s)' # cursor.execute(insert_data_sql, [str(theTime)[0:str(theTime).rfind(".")], all, yes, no]) # conn.commit() camera_time_list.append(str(theTime)[0:str(theTime).rfind(".")]) camera_all_list.append(all) camera_yes_list.append(yes) camera_no_list.append(no) milliseconds = cap.get(cv2.CAP_PROP_POS_MSEC) seconds = milliseconds // 1000 milliseconds = milliseconds % 1000 minutes = 0 hours = 0 if seconds >= 60: minutes = seconds // 60 seconds = seconds % 60 if minutes >= 60: hours = minutes // 60 minutes = minutes % 60 ## 小时 分钟 秒 毫秒 print("{}:{}:{}:{}".format(int(hours), int(minutes), int(seconds), int(milliseconds))) # quit on ESC button skip = 5 cnt = 0 while start_camera and cap.isOpened(): success, frame = cap.read() if not success: break cnt += 1 if cnt % skip == 0: img = nd.array(frame, dtype='uint8') x, img = transform_test(img, short=416) x = x.as_in_context(ctx) box_ids, scores, bboxes = net(x) frame, hat, person, colors, hat_num, person_num = sel_track( img, bboxes[0], scores[0], box_ids[0], trackerType="MEDIANFLOW") else: hat_num, person_num = 0, 0 # get updated location of objects in subsequent frames img = Image.fromarray(frame) frame = img.resize(video_size) frame = np.array(frame) success, boxes = hat.update(frame) # draw tracked objects bcolor = [x * 255 for x in colors[0]] for i, newbox in enumerate(boxes): hat_num += 1 p1 = (int(newbox[0]), int(newbox[1])) p2 = (int(newbox[0] + newbox[2]), int(newbox[1] + newbox[3])) cv2.rectangle(frame, p1, p2, bcolor, 2) y = p1[1] - 15 if p1[1] - 15 > 15 else p1[1] + 15 cv2.putText(frame, 'hat', (p1[0], y), cv2.FONT_HERSHEY_SIMPLEX, 1 / 2, bcolor, 1, lineType=cv2.LINE_AA) success, boxes = person.update(frame) # draw tracked objects bcolor = [x * 255 for x in colors[1]] for i, newbox in enumerate(boxes): person_num += 1 p1 = (int(newbox[0]), int(newbox[1])) p2 = (int(newbox[0] + newbox[2]), int(newbox[1] + newbox[3])) cv2.rectangle(frame, p1, p2, bcolor, 2) y = p1[1] - 15 if p1[1] - 15 > 15 else p1[1] + 15 cv2.putText(frame, 'person', (p1[0], y), cv2.FONT_HERSHEY_SIMPLEX, 1 / 2, bcolor, 1, lineType=cv2.LINE_AA) curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 cv2.putText(frame, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.50, color=(255, 0, 0), thickness=2) # show frame cv2.imshow('MultiTracker', frame) ## 输出人数,戴安全帽的人数,未佩戴安全帽的人数 print("person:{}, hat: {}, person without hat: {}".format( hat_num + person_num, hat_num, person_num)) all = hat_num + person_num yes = hat_num no = person_num num_str = "person:{}, hat: {}, person without hat: {}".format( hat_num + person_num, hat_num, person_num) if all > 0 and no > 0: t = AlertThread('报警') t.start() thread._signal.emit(num_str, frame, all, yes, no) ## 系统时间 theTime = datetime.datetime.now() ## 输出系统时间 print("system time: ", theTime) # cursor = conn.cursor() # insert_data_sql = 'insert into `%s`(detect_time,allp,yesp,nop)' % thread.table_name + ' values (%s,%s,%s,%s)' # cursor.execute(insert_data_sql, [str(theTime)[0:str(theTime).rfind(".")], all, yes, no]) # conn.commit() camera_time_list.append(str(theTime)[0:str(theTime).rfind(".")]) camera_all_list.append(all) camera_yes_list.append(yes) camera_no_list.append(no) milliseconds = cap.get(cv2.CAP_PROP_POS_MSEC) seconds = milliseconds // 1000 milliseconds = milliseconds % 1000 minutes = 0 hours = 0 if seconds >= 60: minutes = seconds // 60 seconds = seconds % 60 if minutes >= 60: hours = minutes // 60 minutes = minutes % 60 ## 小时 分钟 秒 毫秒 print("{}:{}:{}:{}".format(int(hours), int(minutes), int(seconds), int(milliseconds))) # quit on ESC button if cv2.waitKey(1) & 0xFF == 27: # Esc pressed break
def transform_fn(model, raw_data, input_content_type, output_content_type): """ Transform a request using the Gluon model. Called once per request. :param model: Tuple of the Gluon model and inference config objects :param data: The request payload. :param input_content_type: The request content type. :param output_content_type: The (desired) response content type. :return: response payload and content type. """ # Unpack the neural net and the additional algo config: net, inference_config = model # First step: Read the input data: resized = False batch = False # TODO: Support batch properly for numpy inputs if input_content_type == "application/x-npy": logger.info(f"Got raw tensor request {input_content_type}") stream = BytesIO(raw_data) data = mx.nd.array(np.load(stream)) logger.info(f"Parsed tensor shape {data.shape}") n_data_dims = len(data.shape) if n_data_dims == 4: batch = True elif n_data_dims != 3: raise ValueError( "Expect a [ndata x nchannels x width x height] (YOLO-normalized) batch image array, " f"or a single image with batch dimension omitted... Got shape {data.shape}" ) elif input_content_type == "application/x-image" or input_content_type.startswith( "image/"): logger.info(f"Got image request {input_content_type}") img_raw = mx.image.imdecode(bytearray(raw_data)) img_raw_shape = img_raw.shape[:-1] # Skip the channels dimension logger.info(f"Raw image shape {img_raw.shape}") data, _ = transform_test(img_raw, short=inference_config.image_size) img_transformed_shape = data.shape[ 2:] # Channels dimension is leading now resized = True logger.info( f"Transformed image len {len(data)}, image shape {data.shape}") logger.info(f"Parsed input shape {data.shape}") else: logger.error( f"Got unexpected request content type {input_content_type}") raise ValueError( f"Unsupported request content type {input_content_type}") # Run the data through the network: # YOLOv3 expects [ndata x nchannels x width x height], normalized pixel values and outputs: # Class IDs [ndata x boxlimit x 1], floating point dtype but integer values # Confidence scores [ndata x boxlimit x 1], float 0-1 # Bounding boxes [ndata x boxlimit x 4], float absolute pixel (xmin, ymin, xmax, ymax) # # Limit of 100 boxes by default, padded with -1s for null detections / no boxes. ids, scores, bboxes = net(data) logger.info( f"Model output: ids {ids.shape}[{ids.dtype}], scores {scores.shape}[{scores.dtype}], " f"bboxes {bboxes.shape}[{bboxes.dtype}]") # Resize and normalize the output bounding boxes if resized: bboxes = resize_bboxes(bboxes, in_size=img_transformed_shape, out_size=img_raw_shape) bboxes = resize_bboxes(bboxes, in_size=img_raw_shape, out_size=(1., 1.)) logger.info("Normalized bounding boxes") # Annoyingly, gluoncv.data.transforms.bbox.resize() messes about with the bboxes dtype # by casting .astype(float) - converting from original 32 bit to 64. concat()ing the # results requires shared dtype, so we'll do the same wasteful transform on the others: ids = ids.astype(float) scores = scores.astype(float) # Convert to numpy after stacking, because mx.nd doesn't implement tolist or savetxt for our # output serialization: stacked_np = mx.nd.concat(ids, scores, bboxes, dim=2).asnumpy() # [ndata x boxlimit x (1+1+4)] # Rather than a padded matrix, we'll return only what boxes we've found. # Note, this means that you won't be able to just interpret batch results as a 3D matrix! box_exists = stacked_np[:, :, 0] >= 0 # Boxes exist where class ID is not -1 if output_content_type == "text/csv": if batch: raise NotImplementedError( "Haven't implemented CSV output for batch requests yet! Use SingleRecord splitting" ) else: s = StringIO() np.savetxt(s, stacked_np[0, box_exists[0]], delimiter=",") return (s.getvalue(), output_content_type) else: if output_content_type not in ("application/json", None): logger.error( f"Got unexpected output content type {output_content_type}: Returning JSON" ) return ({ "prediction": ([ stacked_np[ix_img, box_exists[ix_img]].tolist() for ix_img in range(stacked_np.shape[0]) ] if batch else stacked_np[0, box_exists[0]].tolist()) }, "application/json")