def test(): import os im_file = 'demo/00001.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg' image = cv2.imread(im_file) model_file = './model/VGGnet_fast_rcnn_iter_70000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
def test(): import os im_file = 'demo/004545.jpg' image = cv2.imread(im_file) detector = FasterRCNN() network.load_net('/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) detector.cuda() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.3) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): if scores[i] < 0.3: continue det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
def image_test(net, image_file, anno_file): tree = ET.parse(anno_file) size = tree.find('size') img_w = int(size.find('width').text) img_h = int(size.find('height').text) objs = tree.findall('object') num_objs = len(objs) boxes = np.zeros((num_objs, 4), dtype=np.int32) for ix, obj in enumerate(objs): bbox = obj.find('bndbox') cx = int(bbox.find('cx').text) cy = int(bbox.find('cy').text) wid = int(bbox.find('wid').text) hei = int(bbox.find('hei').text) theta = float(bbox.find('theta').text) #boxes[ix, :] = [cx, cy, wid, hei, theta] if theta > 0: rect = ((cx, cy), (wid, hei), -theta) else: rect = ((cx, cy), (hei, wid), -90 - theta) pts = cv2.boxPoints(rect) pts = np.array(pts, np.int32) xymin = np.min(pts, axis=0).tolist() xymax = np.max(pts, axis=0).tolist() xmin = max(0, xymin[0]) ymin = max(0, xymin[1]) xmax = min(img_w - 1, xymax[0]) ymax = min(img_h - 1, xymax[1]) boxes[ix, :] = [xmin, ymin, xmax, ymax] image = cv2.imread(image_file) t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = net.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for box in boxes: box = tuple(int(x) for x in box) cv2.rectangle(im2show, box[0:2], box[2:4], (0, 0, 255), 2) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) im_name = os.path.basename(image_file) print(os.path.join('demo/det_results', im_name)) cv2.imwrite(os.path.join('demo/det_results', im_name), im2show)
def imdb_proposals(net, imdb): """Generate RPN proposals on all images in an imdb.""" _t = Timer() imdb_boxes = [[] for _ in range(imdb.num_images)] for i in range(imdb.num_images): im = cv2.imread(imdb.image_path_at(i)) _t.tic() imdb_boxes[i], scores = im_proposals(net, im) _t.toc() print ('im_proposals: {:d}/{:d} {:.3f}s' \ .format(i + 1, imdb.num_images, _t.average_time)) if 0: dets = np.hstack((imdb_boxes[i], scores)) # from IPython import embed; embed() _vis_proposals(im, dets[:3, :], thresh=0.9) plt.show() return imdb_boxes
def test(): import os im_file = 'demo/004545.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg' image = cv2.imread(im_file) # model_file = './VGGnet_fast_rcnn_iter_70000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' model_file = './models/saved_model_max/faster_rcnn_100000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) img = mpimg.imread(im_file) # Create figure and axes fig, ax = plt.subplots(1) # Display the image ax.imshow(img) # Create a Rectangle patch for i, det in enumerate(dets): w = det[2] - det[0] h = det[3] - det[1] rect = patches.Rectangle(det[0:2], w, h, linewidth=1, edgecolor='r', facecolor='none') # text plt.text(det[0], det[1], '%s: %.3f' % (classes[i], scores[i])) # Add the patch to the Axes ax.add_patch(rect) plt.show() print('aa')
def test(visualize=False): import os im_file = 'data/cervix/train/Type_2/1381.jpg' im_name = im_file.split('/')[-1] image = cv2.imread(im_file) # model_file = 'models/VGGnet_fast_rcnn_iter_70000.h5' model_file = 'models/saved_model3/faster_rcnn_100000.h5' expm = model_file.split('/')[-1].split('.')[0] expm_dir = os.path.join('demo', expm) if not os.path.exists(expm_dir): os.makedirs(expm_dir) detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval( ) # set model in evaluation mode, has effect on Dropout and Batchnorm. Use train() to set train mode. print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 4) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', expm, im_name), im2show) if visualize: im2show = cv2.resize(im2show, None, None, fx=0.15, fy=0.15, interpolation=cv2.INTER_LINEAR) cv2.imshow('demo', im2show) cv2.waitKey(0)
def test(): import os img_file = 'demo/images.jpeg' image = cv2.imread(img_file) #imdb_name = 'CaltechPedestrians_train' imdb_name = 'coco_2017_train' #imdb_name = 'voc_2007_trainval' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' #pre_model_name = 'VGGnet_fast_rcnn_iter_70000.h5' pre_model_name = 'coco_2017_train_10_vgg16_0.7_b1.h5' #pre_model_name = 'CaltechPedestrians_train_1_vgg16_0.7_b1.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) print(imdb.classes) if 'vgg16' in pre_model_name.split('_'): detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'resnet50' in pre_model_name.split('_'): detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() dets, scores, classes = detector.detect(image, blob, thr=0.7, nms_thresh=0.3) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15),\ cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show) cv2.imshow('demo', im2show) cv2.waitKey(0)
tp += float(net.tp) tf += float(net.tf) fg += net.fg_cnt bg += net.bg_cnt train_loss += loss.data[0] step_cnt += 1 # backward optimizer.zero_grad() loss.backward() network.clip_gradient(net, 10.) optimizer.step() if step % disp_interval == 0: duration = t.toc(average=False) fps = step_cnt / duration log_text = 'step %d, image: %s, loss: %.4f, fps: %.2f (%.2fs per batch)' % ( step, blobs['im_name'], train_loss / step_cnt, fps, 1./fps) log_print(log_text, color='green', attrs=['bold']) if _DEBUG: log_print('\tTP: %.2f%%, TF: %.2f%%, fg/bg=(%d/%d)' % (tp/fg*100., tf/bg*100., fg/step_cnt, bg/step_cnt)) log_print('\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box: %.4f' % ( net.rpn.cross_entropy.data.cpu().numpy()[0], net.rpn.loss_box.data.cpu().numpy()[0], net.cross_entropy.data.cpu().numpy()[0], net.loss_box.data.cpu().numpy()[0]) ) re_cnt = True if use_tensorboard and step % log_interval == 0:
def test(): import os im_file = 'demo/004545.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/disk2/data/ILSVRC2015/DET/Data/DET/val/ILSVRC2013_val_00004599.JPEG' image = cv2.imread(im_file) model_file = '/disk2/data/pytorch_models/trained_models/resnet152_imgsize1000/saved_model3/faster_rcnn_200000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' classes = np.array(['__background__',\ 'n02672831', 'n02691156', 'n02219486', 'n02419796', 'n07739125', 'n02454379',\ 'n07718747', 'n02764044', 'n02766320', 'n02769748', 'n07693725', 'n02777292',\ 'n07753592', 'n02786058', 'n02787622', 'n02799071', 'n02802426', 'n02807133',\ 'n02815834', 'n02131653', 'n02206856', 'n07720875', 'n02828884', 'n02834778',\ 'n02840245', 'n01503061', 'n02870880', 'n02879718', 'n02883205', 'n02880940',\ 'n02892767', 'n07880968', 'n02924116', 'n02274259', 'n02437136', 'n02951585', 'n02958343', 'n02970849', 'n02402425', 'n02992211', 'n01784675', 'n03000684',\ 'n03001627', 'n03017168', 'n03062245', 'n03063338', 'n03085013', 'n03793489',\ 'n03109150', 'n03128519', 'n03134739', 'n03141823', 'n07718472', 'n03797390',\ 'n03188531', 'n03196217', 'n03207941', 'n02084071', 'n02121808', 'n02268443',\ 'n03249569', 'n03255030', 'n03271574', 'n02503517', 'n03314780', 'n07753113',\ 'n03337140', 'n03991062', 'n03372029', 'n02118333', 'n03394916', 'n01639765',\ 'n03400231', 'n02510455', 'n01443537', 'n03445777', 'n03445924', 'n07583066',\ 'n03467517', 'n03483316', 'n03476991', 'n07697100', 'n03481172', 'n02342885',\ 'n03494278', 'n03495258', 'n03124170', 'n07714571', 'n03513137', 'n02398521',\ 'n03535780', 'n02374451', 'n07697537', 'n03584254', 'n01990800', 'n01910747',\ 'n01882714', 'n03633091', 'n02165456', 'n03636649', 'n03642806', 'n07749582',\ 'n02129165', 'n03676483', 'n01674464', 'n01982650', 'n03710721', 'n03720891',\ 'n03759954', 'n03761084', 'n03764736', 'n03770439', 'n02484322', 'n03790512',\ 'n07734744', 'n03804744', 'n03814639', 'n03838899', 'n07747607', 'n02444819',\ 'n03908618', 'n03908714', 'n03916031', 'n00007846', 'n03928116', 'n07753275',\ 'n03942813', 'n03950228', 'n07873807', 'n03958227', 'n03961711', 'n07768694',\ 'n07615774', 'n02346627', 'n03995372', 'n07695742', 'n04004767', 'n04019541',\ 'n04023962', 'n04026417', 'n02324045', 'n04039381', 'n01495701', 'n02509815',\ 'n04070727', 'n04074963', 'n04116512', 'n04118538', 'n04118776', 'n04131690',\ 'n04141076', 'n01770393', 'n04154565', 'n02076196', 'n02411705', 'n04228054',\ 'n02445715', 'n01944390', 'n01726692', 'n04252077', 'n04252225', 'n04254120',\ 'n04254680', 'n04256520', 'n04270147', 'n02355227', 'n02317335', 'n04317175',\ 'n04330267', 'n04332243', 'n07745940', 'n04336792', 'n04356056', 'n04371430',\ 'n02395003', 'n04376876', 'n04379243', 'n04392985', 'n04409515', 'n01776313',\ 'n04591157', 'n02129604', 'n04442312', 'n06874185', 'n04468005', 'n04487394',\ 'n03110669', 'n01662784', 'n03211117', 'n04509417', 'n04517823', 'n04536866',\ 'n04540053', 'n04542943', 'n04554684', 'n04557648', 'n04530566', 'n02062744',\ 'n04591713', 'n02391049']) detector = FasterRCNN(classes) network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.) print "classes:{},scores:{}".format(classes, scores) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
def test(): import os imdb_name = 'CaltechPedestrians_test' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' pre_model_name = 'CaltechPedestrians_train_10_vgg16_0.7_b3.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) if 'vgg16' in pre_model_name.split('_'): detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'res' in pre_model_name.split('_'): detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() cap = cv2.VideoCapture(video_file) init = True while (cap.isOpened()): ret, frame = cap.read() if ret: p = Timer() p.tic() if init: cnt = 1 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0])) init = False try: dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3) frame = np.copy(frame) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2) # cv2.putText(frame, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), \ # cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imshow('demo', frame) cv2.waitKey(1000) cv2.destroyAllWindows() except IndexError as e: pass finally: print(cnt, '-frame : {:.3f}s'.format(p.toc())) cnt += 1 out.write(frame) else: break runtime = t.toc() print('{} frames / total spend: {}s / {:2.1f} fps'.format( cnt, int(runtime), cnt / runtime)) cap.release() out.release()
gt_boxes = roidb[i]['boxes'].astype(np.float32) relu = True if 'relu' in name_blocks else False features.append( detector.extract_feature_vector(image, blob, gt_boxes, relu=relu).data.cpu().numpy()) ids.append(roidb[i]['ids'][0]) if BG_SHOW: bg_features.append( detector.extract_background_features(image, blob, gt_boxes, relu=relu).data.cpu().numpy()) if len(set(ids)) > id_limit: break print('{:3.2f}s feature extraction finished !'.format(t.toc(average=False))) features = np.asarray(features, dtype=np.float32) learning_rate = 100 model = TSNE(learning_rate=learning_rate) labels = np.array(ids) % 4 fig, ax = plt.subplots() font = { 'family': 'serif', 'color': 'blue', 'weight': 'normal', 'size': 7, } # Positive Anchors pos_data = model.fit_transform(features) xs = pos_data[:, 0] ys = pos_data[:, 1]
tp += float(net.tp) tf += float(net.tf) fg += net.fg_cnt bg += net.bg_cnt train_loss += loss.data[0] step_cnt += 1 # backward optimizer.zero_grad() loss.backward() network.clip_gradient(net, 10.) optimizer.step() if step % disp_interval == 0: duration = time.toc(average=False) fps = step_cnt / duration log_text = 'step %d, image: %s, loss: %.4f, fps: %.2f (%.2fs per batch)' % ( step, blobs['im_name'], train_loss / step_cnt, fps, 1. / fps) log_print(log_text) if _DEBUG: log_print('\tTP: %.2f%%, TF: %.2f%%, fg/bg=(%d/%d)' % (tp / float(fg) * 100., tf / float(bg) * 100., fg / float(step_cnt), bg / float(step_cnt))) log_print( '\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box: %.4f' % (net.rpn.cross_entropy.data.cpu().numpy(), net.rpn.loss_box.data.cpu().numpy(), net.cross_entropy.data.cpu().numpy(),
def test(): import os # im_file = 'demo/004545.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg' matName = 'exp5bC9.mat' model_file = '/home/dong/PycharmProjects/fasterRCNN/faster_rcnn_pytorch-master/model/CLASP_m_rotation_withNoRot_More/faster_rcnn_20000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' #CLASP_class = np.asarray(['__background__', # always index 0 # 'person', 'bin']) UCF_class = np.asarray(['__background__', 'person', 'bin']) label = UCF_class[1:] #CLASP_class[1:] detector = FasterRCNN(UCF_class) #CLASP_class network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') #filename = "/home/dong/PycharmProjects/fasterRCNN/faster_rcnn_pytorch-master/CLASP/video/07212017_EXPERIMENT_9A_Aug7/mp4s/Camera_9.mp4" #vid = imageio.get_reader(filename, 'ffmpeg') imgPath = "/home/dong/PycharmProjects/fasterRCNN/faster_rcnn_pytorch-master/CLASP/exp5bC9/exp5bC9/" #"/home/dong/PycharmProjects/fasterRCNN/faster_rcnn_pytorch-master/CLASP/C11_50_selected/" imgType = '*.jpg' image_list = [] for filename in glob.glob(imgPath + imgType): # assuming jpg #im = Image.open(filename) image_list.append(filename) #im.close() spliter = 'Frame' #'Frame' result = {x: np.zeros([1, 5]) for x in label} for i, name in enumerate(image_list): ele = Image.open(name) image = np.asarray(ele) str = ele.filename str = str.split(spliter)[1].split('.')[0] ind = int(str) t = Timer() t.tic() dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() for j, label in enumerate(classes): tmp = np.empty([1, 5]) tmp[0][0:4] = dets[j] tmp[0][4] = ind if result[label].max() == 0: result[label][0] = tmp else: result[label] = np.append(result[label], tmp, axis=0) print('Progress: {a:8.2f}%'.format(a=i * 100.0 / image_list.__len__())) print('total spend: {}s'.format(runtime)) ele.close() sio.savemat(matName, result) #result_9AC11_selected.mat #for im in enumerate(vid): #image = np.asarray(im) # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 '''
# x1, y1, x2, y2 = det # img_roi = img[y1:y2, x1:x2] # if img_size: # img = cv2.resize(img, img_size) # if i == 0: # img_name = filename[:-len(suffix)] + dest_type # path = os.path.join(dest_subdir, img_name) # cv2.imwrite(path, img_roi) # else: # img_name = filename[:-(len(suffix)+1)] + '_' + str(i) + '.' + dest_type # path = os.path.join(dest_subdir, img_name) # cv2.imwrite(path, img_roi) except: print('bad image: %s' % path) t.toc(average=True) print('{} imgs written to subdir: {}'.format(num_total_in_class, dest_subdir)) num_total += num_total_in_class print('{} imgs of type {} written to dir: {}'.format(num_total, dest_type, dest_dir)) print('average {}s per img'.format(t.toc(average=True))) return num_total, dest_dir if __name__ == '__main__': data_dir = 'data/cervix/test' # data_dir = 'data/cervix/train' # data_dir = 'data/cervix/additional' dest_dir = 'data/cervix_roi/test' cut_det(data_dir, dest_dir, img_size=None)
def track(): def id_track(dataset, features): from collections import Counter def dist(f1, f2): score = (torch.sqrt((f1 - f2) ** 2)).sum(0).data.cpu().numpy()[0] return score id_list = [] id_count = {'f' + str(i): [] for i in range(len(features))} for dataframe in dataset: for i, f in enumerate(features): init_val = 1e15 for data in dataframe: score = dist(f, data['feature']) if score < init_val: init_val = score id = data['id'] id_count['f' + str(i)].append(id) for list in id_count.values(): c1 = Counter(list) most_id = c1.most_common(1)[0][0] id_list.append(most_id) return id_list import os imdb_name = 'CaltechPedestrians_test' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' pre_model_name = 'CaltechPedestrians_train_2_vgg16_0.7_b3.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) name_blocks = pre_model_name.split('_') if 'vgg16' in name_blocks: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'resnet50' in name_blocks: detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) relu = True if 'relu' in name_blocks else False network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() cap = cv2.VideoCapture(video_file) init = True while (cap.isOpened()): ret, frame = cap.read() if ret: p = Timer() p.tic() if init: cnt = 1 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0])) init = False try: # detect tid = (cnt-1) % tps dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3) frame = np.copy(frame) # feature extraction features = [] for i, det in enumerate(dets): gt_box = det[np.newaxis,:] features.append(detector.extract_feature_vector(frame, blob, gt_box, relu=relu)) det = tuple(int(x) for x in det) cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2) dataframe = [] if tid == 0: dataset = [] for i, f in enumerate(features): data = {} data['id'] = i data['feature'] = f dataframe.append(data) dataset.append(dataframe) anchors = dets elif tid > 0 and tid < tps-1: overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float) \ , np.ascontiguousarray(dets, dtype=np.float)) # max : K max overlaps score about N dets overlaps = np.multiply(overlaps, overlaps > 0.7) max_arg = overlaps.argmax(axis=0) for i, arg in enumerate(max_arg): if arg >= len(features): continue data = {} data['id'] = arg data['feature'] = features[arg] dataframe.append(data) dataset.append(dataframe) anchors = dets else: id_list = id_track(dataset, features) for i, id in enumerate(id_list): det = tuple(int(x)-2 for x in dets[i]) cv2.putText(frame, 'id: ' + str(id), det[0:2], cv2.FONT_HERSHEY_PLAIN, 2.0, (0, 0, 255)) # cv2.imshow('demo', frame) # cv2.waitKey(1000) # cv2.destroyAllWindows() except: pass finally: if cnt % 10 == 0: print(cnt,'-frame : {:.3f}s'.format(p.toc())) cnt += 1 out.write(frame) else: break runtime = t.toc() print('{} frames / total spend: {}s / {:2.1f} fps'.format(cnt, int(runtime), cnt/runtime)) cap.release() out.release()