def test(): import os im_file = 'demo/004545.jpg' image = cv2.imread(im_file) detector = FasterRCNN() network.load_net('/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) detector.cuda() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.3) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): if scores[i] < 0.3: continue det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
def main(): global args print "Loading testing set..." # train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') print "Done." test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = RPN(args.use_kmeans_anchors) network.load_net('./output/RPN/RPN_relationship_best_kmeans.h5', net) net.cuda() # best_recall = np.array([0.0, 0.0]) # Testing recall = test(test_loader, net) print( 'Recall: ' 'object: {recall[0]: .3f}%' 'relationship: {recall[1]: .3f}%'.format(recall=recall * 100))
def test(): import os im_file = 'demo/00001.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg' image = cv2.imread(im_file) model_file = './model/VGGnet_fast_rcnn_iter_70000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
def main(): global args print "Loading training set and testing set..." # train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') print "Done." # train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = RPN(args.use_normal_anchors) network.load_net('./output/RPN/RPN_region_best.h5', net) # network.set_trainable(net.features, requires_grad=False) net.cuda() best_recall = np.array([0.0, 0.0]) # Testing recall = test(test_loader, net) print( 'Recall: ' 'object: {recall[0]: .3f}%% (Best: {best_recall[0]: .3f}%%)' 'relationship: {recall[1]: .3f}%% (Best: {best_recall[1]: .3f}%%)'. format(recall=recall * 100, best_recall=best_recall * 100))
def __init__(self, weights=None): if weights is None: if not os.path.exists('weights'): os.mkdir('weights') download_url = 'https://github.com/ArnholdInstitute/ColdSpots/releases/download/1.0/faster-rcnn.zip' if not os.path.exists('weights/faster-rcnn'): print('Downloading weights for faster-rcnn') if not os.path.exists(os.path.join('weights/faster-rcnn.zip')): check_output([ 'wget', download_url, '-O', 'weights/faster-rcnn.zip' ]) print('Unzipping...') check_output( ['unzip', 'weights/faster-rcnn.zip', '-d', 'weights']) description = json.load( open('weights/faster-rcnn/description.json')) weights = os.path.join('weights/faster-rcnn', description['weights']) print('Building model...') self.model = FasterRCNNModel(classes=['__backround__', 'building'], debug=False) network.load_net(weights, self.model) self.model.cuda() self.model.eval()
def main(): global args print "Loading training set and testing set..." train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') print "Done." train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = RPN(not args.use_normal_anchors) if args.resume_training: print 'Resume training from: {}'.format(args.resume_model) if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, net) optimizer = torch.optim.SGD([ {'params': list(net.parameters())[26:]}, ], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) else: print 'Training from scratch...Initializing network...' optimizer = torch.optim.SGD(list(net.parameters())[26:], lr=args.lr, momentum=args.momentum, weight_decay=0.0005) network.set_trainable(net.features, requires_grad=False) net.cuda() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) best_recall = 0.0 for epoch in range(0, args.max_epoch): # Training # train(train_loader, net, optimizer, epoch) # Testing recall, RPN_precision, RPN_recall = test(test_loader, net) print('Epoch[{epoch:d}]: ' 'Recall: ' 'object: {recall: .3f}%% (Best: {best_recall: .3f}%%)'.format( epoch = epoch, recall=recall * 100, best_recall=best_recall * 100)) print('object: {precision: .3f}%% ' 'object: {recall: .3f}%% '.format(precision=RPN_precision*100, recall=RPN_recall*100)) # update learning rate if epoch % args.step_size == 0: args.disable_clip_gradient = True args.lr /= 10 for param_group in optimizer.param_groups: param_group['lr'] = args.lr save_name = os.path.join(args.output_dir, '{}_epoch_{}.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) print('save model: {}'.format(save_name)) if np.all(recall > best_recall): best_recall = recall save_name = os.path.join(args.output_dir, '{}_best.h5'.format(args.model_name, epoch)) network.save_net(save_name, net)
def load_model(model_file_path): detector = FasterRCNN() network.load_net(model_file_path, detector) detector.cuda() detector.eval() print('load model successfully!') return detector
def build_extractor(model_file, classes=None): if classes is None: extractor = FasterRCNN() else: extractor = FasterRCNN(classes) extractor.cuda() extractor.eval() network.load_net(model_file, extractor) print('load model successfully!') return extractor
def test(): import os im_file = 'demo/004545.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg' image = cv2.imread(im_file) # model_file = './VGGnet_fast_rcnn_iter_70000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' model_file = './models/saved_model_max/faster_rcnn_100000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) img = mpimg.imread(im_file) # Create figure and axes fig, ax = plt.subplots(1) # Display the image ax.imshow(img) # Create a Rectangle patch for i, det in enumerate(dets): w = det[2] - det[0] h = det[3] - det[1] rect = patches.Rectangle(det[0:2], w, h, linewidth=1, edgecolor='r', facecolor='none') # text plt.text(det[0], det[1], '%s: %.3f' % (classes[i], scores[i])) # Add the patch to the Axes ax.add_patch(rect) plt.show() print('aa')
def test(visualize=False): import os im_file = 'data/cervix/train/Type_2/1381.jpg' im_name = im_file.split('/')[-1] image = cv2.imread(im_file) # model_file = 'models/VGGnet_fast_rcnn_iter_70000.h5' model_file = 'models/saved_model3/faster_rcnn_100000.h5' expm = model_file.split('/')[-1].split('.')[0] expm_dir = os.path.join('demo', expm) if not os.path.exists(expm_dir): os.makedirs(expm_dir) detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval( ) # set model in evaluation mode, has effect on Dropout and Batchnorm. Use train() to set train mode. print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.7) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 4) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', expm, im_name), im2show) if visualize: im2show = cv2.resize(im2show, None, None, fx=0.15, fy=0.15, interpolation=cv2.INTER_LINEAR) cv2.imshow('demo', im2show) cv2.waitKey(0)
def test(): import os img_file = 'demo/images.jpeg' image = cv2.imread(img_file) #imdb_name = 'CaltechPedestrians_train' imdb_name = 'coco_2017_train' #imdb_name = 'voc_2007_trainval' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' #pre_model_name = 'VGGnet_fast_rcnn_iter_70000.h5' pre_model_name = 'coco_2017_train_10_vgg16_0.7_b1.h5' #pre_model_name = 'CaltechPedestrians_train_1_vgg16_0.7_b1.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) print(imdb.classes) if 'vgg16' in pre_model_name.split('_'): detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'resnet50' in pre_model_name.split('_'): detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() dets, scores, classes = detector.detect(image, blob, thr=0.7, nms_thresh=0.3) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15),\ cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show) cv2.imshow('demo', im2show) cv2.waitKey(0)
def analysis_video(self, result_dir): self.statusbar_stringvar.set('Analysis..Please wait..') model_file = 'model.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') info_dict = {} info_dict['pictures'] = [] for index in range(len(self.image_list)): accuracy = 0. pic_info = {} pic_info['objects'] = [] dets, scores, classes = detector.detect(self.image_list[index], 0.8) im2show = np.copy(self.image_list[index]) for i, det in enumerate(dets): object_info = {} det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) accuracy += scores[i] #object info initial object_info['name'] = classes[i] object_info['accuracy'] = scores[i] object_info['bbox'] = det pic_info['objects'].append(object_info) # pic_info initial pic_info['filename'] = os.path.basename(self.video_path).split('.')[0] + '_' + str(index + 1) + '.jpg' pic_info['size'] = im2show.shape info_dict['pictures'].append(pic_info) cv2.imwrite(os.path.join(result_dir, pic_info['filename']), im2show) self.view_table.update(index + 1, **{ 'name': pic_info['filename'], 'accuracy': accuracy / len(classes), 'state': 'yes' }) self.statusbar_stringvar.set('Analysis done!') return info_dict
def main(): global args print "Loading training set and testing set..." # train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') object_classes = test_set.object_classes print "Done." # train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) net = FasterRCNN(use_kmeans_anchors=args.use_kmeans_anchors, n_classes=len(object_classes), model=args.base_model) network.load_net('./output/detection/Faster_RCNN_small_vgg_12epoch_epoch_11.h5', net) # network.load_net('./output/detection/RPN_object1_best.h5', net) # network.set_trainable(net.features, requires_grad=False) net.cuda() # Testing recall = test(test_loader, net) print('Recall: ' 'object: {recall: .3f}%'.format(recall=recall*100))
def test(): # Set up dataloader data_loader = DAVIS_seq_dataloader(split='val') model_file = './model/VGGnet_fast_rcnn_iter_70000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('Load Faster R-CNN model successfully!') # unet_model = './model/vgg_unet_1e-4_500.h5' # unet = UNet() # network.load_net(unet_model, unet) # unet.cuda() # network.weights_normal_init(unet, dev=0.01) # unet.load_from_faster_rcnn_h5(h5py.File(model_file)) criterion_bce = torch.nn.BCELoss().cuda() weight_decay = 5e-5 # optimizer = torch.optim.SGD(list(unet.parameters())[26:], lr=1e-4, weight_decay=weight_decay) # print('Load U-Net model successfully!') crop_set = [] # Iterate for i in range(data_loader.num_seq): # Get the first frame info seq = data_loader.seq_list[data_loader.out_pointer] seq_len = data_loader.seq_len[seq] img_blobs, seg_blobs = data_loader.get_next_minibatch() img = img_blobs[0,:,:,:] im_data, im_scales = detector.get_image_blob(img) im_info = np.array([[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32) # Get the category of the object in the first frame rmin, rmax, cmin, cmax = bbox(seg_blobs[0,:,:,0]) features, rois = detector(im_data, im_info, rpn_only=True) new_rois_np = np.array([[0, cmin, rmin, cmax, rmax]], dtype=np.float32) new_rois_t = torch.from_numpy(new_rois_np).cuda() new_rois = Variable(new_rois_t, requires_grad=False) pooled_features = detector.roi_pool(features, new_rois) x = pooled_features.view(pooled_features.size()[0], -1) x = detector.fc6(x) x = detector.fc7(x) cls_score = detector.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = detector.bbox_fc(x) cls_prob_np = cls_prob.cpu().data.numpy() bbox_pred_np = bbox_pred.cpu().data.numpy() cls_idx = cls_prob_np.argmax() cls_conf = cls_prob_np.max() # Overfit U-Net with the first frame # for i in range(100): # unet.train() # img_t = torch.from_numpy(img_blobs).permute(0,3,1,2).float().cuda() # img_v = Variable(img_t, requires_grad=False) # seg_t = torch.from_numpy(seg_blobs).permute(0,3,1,2).float().cuda() # seg_v = Variable(seg_t, requires_grad=False) # pred = unet(img_v) # loss = criterion_bce(pred, seg_v) # pred_view = pred.view(-1, 1) # seg_view = seg_v.view(-1, 1) # EPS = 1e-6 # loss = 0.6 * seg_view.mul(torch.log(pred_view+EPS)) + 0.4 * seg_view.mul(-1).add(1).mul(torch.log(1-pred+EPS)) # loss = -torch.mean(loss) # loss_val = loss.data[0] # optimizer.zero_grad() # loss.backward() # optimizer.step() # print('{}/100: {}'.format(i, loss_val)) # unet.eval() # Merge region proposals overlapping with last frame proposal for j in range(1, seq_len): img_blobs, _ = data_loader.get_next_minibatch() img = img_blobs[0,:,:,:] im_data, im_scales = detector.get_image_blob(img) # 300 x 5, the first elements are useless here features, rois = detector(im_data, im_info, rpn_only=True) x1, y1, x2, y2 = merge_rois((rmin, rmax, cmin, cmax), rois.cpu().data.numpy(), thres=0.75) # Have overlapping proposals if x1 is not None: # Send to following layers to refine the bbox new_rois_np = np.array([[0, x1, y1, x2, y2]], dtype=np.float32) new_rois_t = torch.from_numpy(new_rois_np).cuda() new_rois = Variable(new_rois_t, requires_grad=False) pooled_features = detector.roi_pool(features, new_rois) x = pooled_features.view(pooled_features.size()[0], -1) x = detector.fc6(x) x = detector.fc7(x) cls_score = detector.score_fc(x) cls_prob = F.softmax(cls_score) bbox_pred = detector.bbox_fc(x) cls_prob_np = cls_prob.cpu().data.numpy() bbox_pred_np = bbox_pred.cpu().data.numpy() # Only regress bbox when confidence is greater than 0.8 if cls_prob_np.max() > 0.8 and cls_prob_np.argmax() != 0: keep = cls_prob_np.argmax() pred_boxes, scores, classes = detector.interpret_faster_rcnn(cls_prob, bbox_pred, new_rois, im_info, im_data.shape, 0.8) cx = (x1 + x2) / 2 cy = (y1 + y2) / 2 width = x2 - x1 + 1 height = y2 - y1 + 1 dx = bbox_pred_np[0,keep*4+0] dy = bbox_pred_np[0,keep*4+1] dw = bbox_pred_np[0,keep*4+2] dh = bbox_pred_np[0,keep*4+3] pred_x = dx * width + cx pred_y = dy * height + cy pred_w = np.exp(dw) * width pred_h = np.exp(dh) * height x1 = pred_x - pred_w / 2 x2 = pred_x + pred_w / 2 y1 = pred_y - pred_h / 2 y2 = pred_y + pred_h / 2 # No overlapping proposals if x1 is None: # Using Faster R-CNN again to find potential objects dets, scores, classes = detector.detect(img, 0.6) # Cannot find any salient object if dets.shape[0] == 0: x1, y1, x2, y2 = cmin, rmin, cmax, rmax else: x1 = dets[:,0] y1 = dets[:,1] x2 = dets[:,2] y2 = dets[:,3] pred_area = (x2 - x1 + 1) * (y2 - y1 + 1) init_area = (cmax - cmin + 1) * (rmax - rmin + 1) xx1 = np.maximum(x1, cmin) xx2 = np.minimum(x2, cmax) yy1 = np.maximum(y1, rmin) yy2 = np.minimum(y2, rmax) inter = (xx2 - xx1 + 1) * (yy2 - yy1 + 1) ovr = inter / (pred_area + init_area - inter) # If there is overlapping, choose the largest IoU bbox try: ovr = ovr[ovr > 0.3] ovr_idx = np.argsort(ovr)[-1] x1 = dets[ovr_idx,0] y1 = dets[ovr_idx,1] x2 = dets[ovr_idx,2] y2 = dets[ovr_idx,3] # Else, choose the highest objectness score one except: if cls_idx == 0: temp_idx = scores.argmax() x1 = dets[temp_idx,0] y1 = dets[temp_idx,1] x2 = dets[temp_idx,2] y2 = dets[temp_idx,3] else: cx = (x1 + x2) / 2 cy = (y1 + y2) / 2 cc = (cmin + cmax) / 2 cr = (rmin + rmax) / 2 dist = np.sqrt(np.square(cx-cc) + np.square(cy-cr)) dist_idx = np.argsort(dist) for di in dist_idx: if classes[di] == _CLASSES[cls_idx]: x1 = dets[di,0] y1 = dets[di,1] x2 = dets[di,2] y2 = dets[di,3] # Crop the region and send it to U-Net try: x1 = int(max(x1, 0)) x2 = int(min(x2, im_data.shape[2])) y1 = int(max(y1, 0)) y2 = int(min(y2, im_data.shape[1])) except: x1 = int(max(x1[0], 0)) x2 = int(min(x2[0], im_data.shape[2])) y1 = int(max(y1[0], 0)) y2 = int(min(y2[0], im_data.shape[1])) # MEAN_PIXEL = np.array([103.939, 116.779, 123.68]) # crop = img_blobs[:, y1:y2+1, x1:x2+1, :] - MEAN_PIXEL # crop = img_blobs[:,:,:,:] - MEAN_PIXEL # crop_v = Variable(torch.from_numpy(crop).permute(0, 3, 1, 2).cuda(), requires_grad=False) # pred = unet(crop_v) # pred_np = pred.cpu().data.numpy()[0,0,:,:] # pred_np[pred_np < 0.5] = 0 # pred_np[pred_np >= 0.5] = 1 # pred_np = pred_np * 255 # res = pred_np.astype(int) # cv2.imwrite('test.png', res) if y2 - y1 <= 1 or x2 - x1 <= 1: ipdb.set_trace() cv2.imwrite(os.path.join('demo', 'crop_{}_{}.png'.format(i, j)), img[y1:y2+1,x1:x2+1,:]) rmin = y1 rmax = y2 cmin = x1 cmax = x2 im2show = np.copy(img) cv2.rectangle(im2show, (int(x1),int(y1)), (int(x2),int(y2)), (0, 255, 0), 2) cv2.imwrite(os.path.join('demo', '{}_{}.jpg'.format(i, j)), im2show) temp = [i, j, x1, y1, x2, y2] crop_set.append(temp) # Save crop_set = np.array(crop_set) np.save('crop', crop_set)
nms_time = _t['misc'].toc(average=False) print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, detect_time, nms_time) df = pandas.DataFrame(all_boxes) df.columns = ['x1', 'y1', 'x2', 'y2', 'score', 'image_id'] df.to_csv('predictions.csv', index=False) print('Total time: %.4f, per image: %.4f' % (total_time, total_time / num_images)) if __name__ == '__main__': # load net net = FasterRCNN(classes=['__backround__', 'building'], debug=False) network.load_net(args.weights, net) print('load model successfully!') net.cuda() net.eval() val_data = json.load(open(args.test_boxes)) # evaluation test_net(net, val_data, max_per_image, thresh=thresh, vis=vis, data_dir='../data')
def test(): import os im_file = 'demo/004545.jpg' # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg' # im_file = '/disk2/data/ILSVRC2015/DET/Data/DET/val/ILSVRC2013_val_00004599.JPEG' image = cv2.imread(im_file) model_file = '/disk2/data/pytorch_models/trained_models/resnet152_imgsize1000/saved_model3/faster_rcnn_200000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5' # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5' classes = np.array(['__background__',\ 'n02672831', 'n02691156', 'n02219486', 'n02419796', 'n07739125', 'n02454379',\ 'n07718747', 'n02764044', 'n02766320', 'n02769748', 'n07693725', 'n02777292',\ 'n07753592', 'n02786058', 'n02787622', 'n02799071', 'n02802426', 'n02807133',\ 'n02815834', 'n02131653', 'n02206856', 'n07720875', 'n02828884', 'n02834778',\ 'n02840245', 'n01503061', 'n02870880', 'n02879718', 'n02883205', 'n02880940',\ 'n02892767', 'n07880968', 'n02924116', 'n02274259', 'n02437136', 'n02951585', 'n02958343', 'n02970849', 'n02402425', 'n02992211', 'n01784675', 'n03000684',\ 'n03001627', 'n03017168', 'n03062245', 'n03063338', 'n03085013', 'n03793489',\ 'n03109150', 'n03128519', 'n03134739', 'n03141823', 'n07718472', 'n03797390',\ 'n03188531', 'n03196217', 'n03207941', 'n02084071', 'n02121808', 'n02268443',\ 'n03249569', 'n03255030', 'n03271574', 'n02503517', 'n03314780', 'n07753113',\ 'n03337140', 'n03991062', 'n03372029', 'n02118333', 'n03394916', 'n01639765',\ 'n03400231', 'n02510455', 'n01443537', 'n03445777', 'n03445924', 'n07583066',\ 'n03467517', 'n03483316', 'n03476991', 'n07697100', 'n03481172', 'n02342885',\ 'n03494278', 'n03495258', 'n03124170', 'n07714571', 'n03513137', 'n02398521',\ 'n03535780', 'n02374451', 'n07697537', 'n03584254', 'n01990800', 'n01910747',\ 'n01882714', 'n03633091', 'n02165456', 'n03636649', 'n03642806', 'n07749582',\ 'n02129165', 'n03676483', 'n01674464', 'n01982650', 'n03710721', 'n03720891',\ 'n03759954', 'n03761084', 'n03764736', 'n03770439', 'n02484322', 'n03790512',\ 'n07734744', 'n03804744', 'n03814639', 'n03838899', 'n07747607', 'n02444819',\ 'n03908618', 'n03908714', 'n03916031', 'n00007846', 'n03928116', 'n07753275',\ 'n03942813', 'n03950228', 'n07873807', 'n03958227', 'n03961711', 'n07768694',\ 'n07615774', 'n02346627', 'n03995372', 'n07695742', 'n04004767', 'n04019541',\ 'n04023962', 'n04026417', 'n02324045', 'n04039381', 'n01495701', 'n02509815',\ 'n04070727', 'n04074963', 'n04116512', 'n04118538', 'n04118776', 'n04131690',\ 'n04141076', 'n01770393', 'n04154565', 'n02076196', 'n02411705', 'n04228054',\ 'n02445715', 'n01944390', 'n01726692', 'n04252077', 'n04252225', 'n04254120',\ 'n04254680', 'n04256520', 'n04270147', 'n02355227', 'n02317335', 'n04317175',\ 'n04330267', 'n04332243', 'n07745940', 'n04336792', 'n04356056', 'n04371430',\ 'n02395003', 'n04376876', 'n04379243', 'n04392985', 'n04409515', 'n01776313',\ 'n04591157', 'n02129604', 'n04442312', 'n06874185', 'n04468005', 'n04487394',\ 'n03110669', 'n01662784', 'n03211117', 'n04509417', 'n04517823', 'n04536866',\ 'n04540053', 'n04542943', 'n04554684', 'n04557648', 'n04530566', 'n02062744',\ 'n04591713', 'n02391049']) detector = FasterRCNN(classes) network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector) # print('save model succ') t = Timer() t.tic() # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255 dets, scores, classes = detector.detect(image, 0.) print "classes:{},scores:{}".format(classes, scores) runtime = t.toc() print('total spend: {}s'.format(runtime)) im2show = np.copy(image) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2) cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
def test(): import os imdb_name = 'CaltechPedestrians_test' imdb = get_imdb(imdb_name) cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' pre_model_name = 'CaltechPedestrians_train_10_vgg16_0.7_b3.h5' pretrained_model = model_dir + pre_model_name cfg_from_file(cfg_file) if 'vgg16' in pre_model_name.split('_'): detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) elif 'res' in pre_model_name.split('_'): detector = FasterRCNN_RES(classes=imdb.classes, debug=False) else: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) network.load_net(pretrained_model, detector) detector.cuda() detector.eval() print('load model successfully!') blob = init_data(is_cuda=True) t = Timer() t.tic() cap = cv2.VideoCapture(video_file) init = True while (cap.isOpened()): ret, frame = cap.read() if ret: p = Timer() p.tic() if init: cnt = 1 fourcc = cv2.VideoWriter_fourcc(*'XVID') out = cv2.VideoWriter(output_file, fourcc, fps, (frame.shape[1], frame.shape[0])) init = False try: dets, scores, classes = detector.detect(frame, blob, thr=0.7, nms_thresh=0.3) frame = np.copy(frame) for i, det in enumerate(dets): det = tuple(int(x) for x in det) cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2) # cv2.putText(frame, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), \ # cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1) cv2.imshow('demo', frame) cv2.waitKey(1000) cv2.destroyAllWindows() except IndexError as e: pass finally: print(cnt, '-frame : {:.3f}s'.format(p.toc())) cnt += 1 out.write(frame) else: break runtime = t.toc() print('{} frames / total spend: {}s / {:2.1f} fps'.format( cnt, int(runtime), cnt / runtime)) cap.release() out.release()
def main(): global args, optimizer_select # To set the model name automatically print args lr = args.lr args = get_model_name(args) print 'Model name: {}'.format(args.model_name) # To set the random seed random.seed(args.seed) torch.manual_seed(args.seed + 1) torch.cuda.manual_seed(args.seed + 2) # print("Loading training set and testing set..."), # train_set = visual_genome(args.dataset_option, 'train') # test_set = visual_genome('small', 'test') # print("Done.") # train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) # test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) image_set = prepare_image(datapath=args.total_image_path) image_loader = torch.utils.data.DataLoader(image_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) # Model declaration net = Hierarchical_Descriptive_Model( nhidden=args.mps_feature_len, n_object_cats=5, n_predicate_cats=5, n_vocab=5, voc_sign=5, max_word_length=5, MPS_iter=args.MPS_iter, use_language_loss=not args.disable_language_model, object_loss_weight=5, predicate_loss_weight=5, dropout=args.dropout, use_kmeans_anchors=not args.use_normal_anchors, gate_width=args.gate_width, nhidden_caption=args.nhidden_caption, nembedding=args.nembedding, rnn_type=args.rnn_type, rnn_droptout=args.caption_use_dropout, rnn_bias=args.caption_use_bias, use_region_reg=args.region_bbox_reg, use_kernel=args.use_kernel_function) params = list(net.parameters()) for param in params: print param.size() print net # To group up the features vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features = group_features( net) # Setting the state of the training model net.cuda() net.train() logger_path = "log/logger/{}".format(args.model_name) if os.path.exists(logger_path): shutil.rmtree(logger_path) configure(logger_path, flush_secs=5) # setting up the logger network.set_trainable(net, False) # network.weights_normal_init(net, dev=0.01) if args.finetune_language_model: print 'Only finetuning the language model from: {}'.format( args.resume_model) args.train_all = False if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, net) optimizer_select = 3 elif args.load_RPN: print 'Loading pretrained RPN: {}'.format(args.saved_model_path) args.train_all = False network.load_net(args.saved_model_path, net.rpn) net.reinitialize_fc_layers() optimizer_select = 1 elif args.resume_training: print 'Resume training from: {}'.format(args.resume_model) if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, net) args.train_all = True optimizer_select = 2 else: print 'Training from scratch.' net.rpn.initialize_parameters() net.reinitialize_fc_layers() optimizer_select = 0 args.train_all = True optimizer = network.get_optimizer(lr, optimizer_select, args, vgg_features_var, rpn_features, hdn_features, language_features) target_net = net if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) top_Ns = [50, 100] best_recall = np.zeros(len(top_Ns)) extract_features(image_loader, net)
rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net print "Creating net..." net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) print "Loading weight..." # pretrained_model = pytorchpath+'data/pretrained_model/VGG_imagenet.npy' # network.load_pretrained_npy(net, pretrained_model) # model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5' # model_file = 'models/saved_model3/faster_rcnn_60000.h5' model_file = '/home/jguerry/workspace/jg_dl/jg_pyt/models/sunrgbd_train_rgb_i_100_8bits/faster_rcnn_200000.h5' network.load_net(model_file, net) print "Configuring parameters..." # exp_name = 'vgg16_02-19_13-24' start_step = 200000 end_step = 400000 lr_decay_steps = {220000, 240000, 260000, 280000, 300000, 350000} lr_decay = 1. / 10 rand_seed = 1024 lr = 0.0001 # network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01) disp_interval = 1000 save_interval = 20000 if rand_seed is not None:
thickness=1) im_name = os.path.basename(image_file) print(os.path.join('demo/det_results', im_name)) cv2.imwrite(os.path.join('demo/det_results', im_name), im2show) #cv2.imshow('demo', im2show) #cv2.waitKey(0) def folder_test(net, folder): txt_file = folder + 'JPEGImages/file_name.txt' with open(txt_file) as f: for line in f: img_path = folder + 'JPEGImages/' + line.strip('\n') + '.JPG' anno_path = folder + 'Annotations/' + line.strip('\n') + '.xml' image_test(net, img_path, anno_path) if __name__ == '__main__': model_file = 'models/saved_model3/faster_rcnn_100000.h5' detector = FasterRCNN() network.load_net(model_file, detector) detector.cuda() detector.eval() print('load model successfully!') #image_file = 'demo/000001.JPG' #image_test(detector, image_file, None) folder = '/data/jmtian/PlateData/PVW_WRM_CUT/' folder_test(detector, folder)
# create VGG model for state featurization print("Loading image embedding model...") if args.image_embedding_model_type == "resnet": im_emb_model = ResNet50() elif args.image_embedding_model_type == "vgg": im_emb_model = VGG16() else: print("--image_embedding_model_type must be either resnet or vgg") sys.exit(0) print("Done!") # create Faster-RCNN model for state featurization print("Loading Fast-RCNN...") model_file = 'VGGnet_fast_rcnn_iter_70000.h5' model_frcnn = FasterRCNN() network.load_net(model_file, model_frcnn) model_frcnn.cuda() model_frcnn.eval() print("Done!") # create DQN's for the next object, predicates, and attributes print("Creating DQN models...") DQN_next_object_main = DQN_MLP(2048*3+9600 + parameters["maximum_num_entities_per_image"], 1) DQN_next_object_target = DQN_MLP(2048*3+9600 + parameters["maximum_num_entities_per_image"], 1) DQN_predicate_main = DQN_MLP(2048*3+9600 + len(semantic_action_graph.predicate_nodes), 1) DQN_predicate_target = DQN_MLP(2048*3+9600 + len(semantic_action_graph.predicate_nodes), 1) DQN_attribute_main = DQN_MLP(2048*3+9600 + len(semantic_action_graph.attribute_nodes), 1) DQN_attribute_target = DQN_MLP(2048*3+9600 + len(semantic_action_graph.attribute_nodes), 1) print("Done!") # create shared optimizer
def main(): global args, optimizer_select # To set the model name automatically print args lr = args.lr args = get_model_name(args) print 'Model name: {}'.format(args.model_name) # To set the random seed random.seed(args.seed) torch.manual_seed(args.seed + 1) torch.cuda.manual_seed(args.seed + 2) print("Loading training set and testing set..."), train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') print("Done.") train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) # Model declaration net = Hierarchical_Descriptive_Model( nhidden=args.mps_feature_len, n_object_cats=train_set.num_object_classes, n_predicate_cats=train_set.num_predicate_classes, n_vocab=train_set.voc_size, voc_sign=train_set.voc_sign, max_word_length=train_set.max_size, MPS_iter=args.MPS_iter, use_language_loss=not args.disable_language_model, object_loss_weight=train_set.inverse_weight_object, predicate_loss_weight=train_set.inverse_weight_predicate, dropout=args.dropout, use_kmeans_anchors=not args.use_normal_anchors, gate_width=args.gate_width, nhidden_caption=args.nhidden_caption, nembedding=args.nembedding, rnn_type=args.rnn_type, rnn_droptout=args.caption_use_dropout, rnn_bias=args.caption_use_bias, use_region_reg=args.region_bbox_reg, use_kernel=args.use_kernel_function) params = list(net.parameters()) for param in params: print param.size() print net # To group up the features vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features = group_features( net) # Setting the state of the training model net.cuda() net.train() logger_path = "log/logger/{}".format(args.model_name) if os.path.exists(logger_path): shutil.rmtree(logger_path) configure(logger_path, flush_secs=5) # setting up the logger network.set_trainable(net, False) # network.weights_normal_init(net, dev=0.01) if args.finetune_language_model: print 'Only finetuning the language model from: {}'.format( args.resume_model) args.train_all = False if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, net) optimizer_select = 3 elif args.load_RPN: print 'Loading pretrained RPN: {}'.format(args.saved_model_path) args.train_all = False network.load_net(args.saved_model_path, net.rpn) net.reinitialize_fc_layers() optimizer_select = 1 elif args.resume_training: print 'Resume training from: {}'.format(args.resume_model) if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, net) args.train_all = True optimizer_select = 2 else: print 'Training from scratch.' net.rpn.initialize_parameters() net.reinitialize_fc_layers() optimizer_select = 0 args.train_all = True optimizer = network.get_optimizer(lr, optimizer_select, args, vgg_features_var, rpn_features, hdn_features, language_features) target_net = net if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) top_Ns = [50, 100] best_recall = np.zeros(len(top_Ns)) if args.evaluate: recall = test(test_loader, net, top_Ns) print('======= Testing Result =======') for idx, top_N in enumerate(top_Ns): print( '[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)' .format(top_N=top_N, recall=recall[idx] * 100, best_recall=best_recall[idx] * 100)) print('==============================') else: for epoch in range(0, args.max_epoch): # Training train(train_loader, target_net, optimizer, epoch) # snapshot the state save_name = os.path.join( args.output_dir, '{}_epoch_{}.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) print('save model: {}'.format(save_name)) # Testing # network.set_trainable(net, False) # Without backward(), requires_grad takes no effect recall = test(test_loader, net, top_Ns) if np.all(recall > best_recall): best_recall = recall save_name = os.path.join(args.output_dir, '{}_best.h5'.format(args.model_name)) network.save_net(save_name, net) print('\nsave model: {}'.format(save_name)) print('Epoch[{epoch:d}]:'.format(epoch=epoch)), for idx, top_N in enumerate(top_Ns): print( '\t[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)' .format(top_N=top_N, recall=recall[idx] * 100, best_recall=best_recall[idx] * 100)), # updating learning policy if epoch % args.step_size == 0 and epoch > 0: lr /= 10 args.lr = lr print '[learning rate: {}]'.format(lr) args.enable_clip_gradient = False if not args.finetune_language_model: args.train_all = True optimizer_select = 2 # update optimizer and correponding requires_grad state optimizer = network.get_optimizer(lr, optimizer_select, args, vgg_features_var, rpn_features, hdn_features, language_features)
print ('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \ .format(i + 1, num_images, detect_time, nms_time)) if vis: cv2.imshow('test', im2show) cv2.waitKey(1) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('=====Evaluating detections=====') #评估检测结果,计算MAP。 imdb.evaluate_detections(all_boxes, output_dir) if __name__ == '__main__': # load data imdb = get_imdb(imdb_name) #返回一个pascal_voc对象 imdb.competition_mode(on=True) print("load data finished!") # load net net = FasterRCNN(classes=imdb.classes, debug=False) network.load_net(trained_model, net) #加载训练好的模型。 print('load model successfully!') # net.cuda() net.eval() # evaluation test_net(save_name, net, imdb, max_per_image, thresh=thresh, vis=vis)
def main(): global args, optimizer_select # To set the model name automatically print args lr = args.lr args = get_model_name(args) print 'Model name: {}'.format(args.model_name) # To set the random seed random.seed(args.seed) torch.manual_seed(args.seed + 1) torch.cuda.manual_seed(args.seed + 2) print("Loading training set and testing set...") train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome(args.dataset_option, 'test') print("Done.") train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) net = Hierarchical_Descriptive_Model( nhidden=args.mps_feature_len, n_object_cats=train_set.num_object_classes, n_predicate_cats=train_set.num_predicate_classes, MPS_iter=args.MPS_iter, object_loss_weight=train_set.inverse_weight_object, predicate_loss_weight=train_set.inverse_weight_predicate, dropout=args.dropout, use_kmeans_anchors=args.use_kmeans_anchors, base_model=args.base_model) #True # params = list(net.parameters()) # for param in params: # print param.size() print net # Setting the state of the training model net.cuda() net.train() network.set_trainable(net, False) # network.weights_normal_init(net, dev=0.01) if args.resume_model: print 'Resume training from: {}'.format(args.HDN_model) if len(args.HDN_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.HDN_model, net) # network.load_net(args.RPN_model, net.rpn) args.train_all = True optimizer_select = 3 elif args.load_RCNN: print 'Loading pretrained RCNN: {}'.format(args.RCNN_model) args.train_all = False network.load_net(args.RCNN_model, net.rcnn) optimizer_select = 2 elif args.load_RPN: print 'Loading pretrained RPN: {}'.format(args.RPN_model) args.train_all = False network.load_net(args.RPN_model, net.rpn) net.reinitialize_fc_layers() optimizer_select = 1 else: print 'Training from scratch.' net.rpn.initialize_parameters() net.reinitialize_fc_layers() optimizer_select = 0 args.train_all = True # To group up the features # vgg_features_fix, vgg_features_var, rpn_features, hdn_features = group_features(net) basenet_features, rpn_features, rcnn_feature, hdn_features = group_features( net) optimizer = network.get_optimizer(lr, optimizer_select, args, basenet_features, rpn_features, rcnn_feature, hdn_features) target_net = net if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) top_Ns = [50, 100] best_recall = np.zeros(len(top_Ns)) if args.evaluate: recall = test(test_loader, target_net, top_Ns, train_set.object_classes) print('======= Testing Result =======') for idx, top_N in enumerate(top_Ns): print( '[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)' .format(top_N=top_N, recall=recall[idx] * 100, best_recall=best_recall[idx] * 100)) print('==============================') else: for epoch in range(0, args.max_epoch): # Training train(train_loader, target_net, optimizer, epoch) # snapshot the state save_name = os.path.join( args.output_dir, '{}_epoch_{}.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) print('save model: {}'.format(save_name)) recall = test(test_loader, target_net, top_Ns, train_set.object_classes) if np.all(recall > best_recall): best_recall = recall save_name = os.path.join(args.output_dir, '{}_best.h5'.format(args.model_name)) network.save_net(save_name, net) print('\nsave model: {}'.format(save_name)) print('Epoch[{epoch:d}]:'.format(epoch=epoch)), for idx, top_N in enumerate(top_Ns): print( '\t[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)' .format(top_N=top_N, recall=recall[idx] * 100, best_recall=best_recall[idx] * 100)) # updating learning policy if (epoch + 1) % args.step_size == 0 or (epoch + 1) % ( args.step_size + 2) == 0: lr /= 10 args.lr = lr print '[learning rate: {}]'.format(lr) args.enable_clip_gradient = False args.train_all = False optimizer_select = 2 # update optimizer and correponding requires_grad state optimizer = network.get_optimizer(lr, optimizer_select, args, basenet_features, rpn_features, rcnn_feature, hdn_features)
# load data imdb = get_imdb(imdb_name) rdl_roidb.prepare_roidb(imdb) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = RFCN(classes=imdb.classes, debug=_DEBUG) #init_modules = [net.rpn.conv1, net.rpn.score_conv, net.rpn.bbox_conv, net.fc6, net.fc7, net.score_fc, net.bbox_fc] #network.weights_normal_init(init_modules, dev=0.01) network.weights_normal_init(net, dev=0.01) network.load_pretrained_npy(net, pretrained_model) if resume: pretrained_model_file = 'models/saved_model3/faster_rcnn_resnet101_20000.h5' network.load_net(pretrained_model_file, net) start_step = 20000 print 'Resume training...' net.cuda() net.train() params = list(net.parameters()) # optimizer = torch.optim.Adam(params[-8:], lr=lr) optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay) if not os.path.exists(output_dir): os.mkdir(output_dir)
if sav: cv2.imwrite(output_dir_detections + str(i) + '.png', im2show) with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print 'Evaluating detections' imdb_0.evaluate_detections(all_boxes, output_dir) if __name__ == '__main__': imdb_0 = get_imdb(imdb_test_name_0) imdb_0.competition_mode(on=True) net_0 = FasterRCNN(classes=imdb_0.classes, debug=False) network.load_net(trained_model_0, net_0) print('load model 0 successfully!') net_0.cuda() net_0.eval() imdb_1 = get_imdb(imdb_test_name_1) imdb_1.competition_mode(on=True) net_1 = FasterRCNN(classes=imdb_1.classes, debug=False) network.load_net(trained_model_1, net_1) print('load model 1 successfully!') net_1.cuda() net_1.eval() net_x = FasterRCNN_x(classes=imdb_0.classes, debug=False) net_x.frcnn_0 = net_0 net_x.frcnn_1 = net_1
# for dataset in whole_data: # for element in dataset: # data_comp.append(element) if args.encoding_file is None: encoder = imSituVerbRoleLocalNounEncoder(train_set) torch.save(encoder, args.output_dir + "/encoder") else: encoder = torch.load(args.encoding_file) model = baseline_crf(encoder, cnn_type = args.cnn_type) if args.weights_file is not None: if args.cnn_type == 'faster_rcnn': network.load_net(args.weights_file, model) else: model.load_state_dict(torch.load(args.weights_file)) dataset_train = imSituSituation(args.image_dir, train_set, encoder, model.train_preprocess()) dataset_dev = imSituSituation(args.image_dir, dev_set, encoder, model.dev_preprocess()) ngpus = 1 device_array = [i for i in range(0,ngpus)] #batch_size = args.batch_size*ngpus batch_size = 1 train_loader = torch.utils.data.DataLoader(dataset_train, batch_size = batch_size, shuffle = True, num_workers = 1) dev_loader = torch.utils.data.DataLoader(dataset_dev, batch_size = batch_size, shuffle = True, num_workers = 1)
models = os.listdir(model_dir) pretrained_model = [ os.path.join(model_dir, model) for model in models if db_only in model.split('_') ] pretrained_model.sort() imdb = get_imdb(imdb_name) prepare_roidb(imdb) roidb = imdb.roidb f = open(os.path.join(model_dir, 'performance.txt'), 'a') for model in pretrained_model: is_resnet = True if 'res' in model.split('/') else False if model.endswith('txt'): continue if not is_resnet: detector = FasterRCNN_VGG(classes=imdb.classes, debug=False) else: detector = FasterRCNN_RES(classes=imdb.classes, debug=False) network.load_net(model, detector) match = id_match_test(model, detector, imdb, roidb) if cfg.TRIPLET.IS_TRUE else 0. prec, rec = test(model, detector, imdb, roidb) # pos, neg, bg = score_analysis(model, detector, imdb, roidb) del detector # f.write(model+' -----pos: {:.4f} neg: {:.4f} bg: {:.4f}\n'.format(pos, neg, bg)) f.write(model + ' ----[prec: {:.2f}%, rec: {:.2f}%] / {:.2f}%\n'.format( prec, rec, match)) f.close()
cfg_from_file(cfg_file) lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY disp_interval = cfg.TRAIN.DISPLAY log_interval = cfg.TRAIN.LOG_IMAGE_ITERS # load data imdb = VisualGenome(split=0, num_im=50) roidb = imdb.roidb data_layer = RoIDataLayer(roidb, imdb.num_classes) # load net net = FasterRCNN(classes=imdb.classes, debug=_DEBUG) network.weights_normal_init(net, dev=0.01) network.load_net(pretrained_model, net) # network.load_pretrained_npy(net, 'checkpoints/VGG_imagenet.npy') net.cuda() net.train() params = list(net.parameters()) print("Params are {}".format( '\n'.join(['{}: {}'.format(n, p.size()) for n,p in net.named_parameters()])) ) # optimizer = torch.optim.Adam(params, lr=0.001, eps=1e-4, weight_decay=weight_decay) optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay) if not os.path.exists(output_dir): os.mkdir(output_dir)
cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml' model_dir = 'data/pretrained_model/' output_dir = 'models/saved_model3' pre_model_name = 'CaltechPedestrians_train_triplet_1_vgg16_cls_0.7_b3.h5' pretrained_model = model_dir + pre_model_name _DEBUG = False BG_SHOW = True id_limit = 20 if BG_SHOW else 50 # load config cfg_from_file(cfg_file) # load data imdb, roidb, ratio_list, ratio_index = extract_roidb(imdb_name) detector = FasterRCNN_VGG(classes=imdb.classes, debug=_DEBUG) network.load_net(pretrained_model, detector) blob = init_data(is_cuda=True) detector.cuda() detector.eval() name_blocks = pre_model_name.split('_') batch_size = imdb.num_triplet_test_images test_num = len(roidb) blob = init_data(is_cuda=True) features = [] bg_features = [] ids = [] print('Extracting features...') t = Timer() t.tic()