def test():
    import os
    im_file = 'demo/004545.jpg'
    image = cv2.imread(im_file)

    detector = FasterRCNN()
    network.load_net('/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5',
                     detector)
    detector.cuda()
    print('load model successfully!')

    # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector)
    # print('save model succ')

    t = Timer()
    t.tic()
    # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255
    dets, scores, classes = detector.detect(image, 0.3)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    for i, det in enumerate(dets):
        if scores[i] < 0.3:
            continue
        det = tuple(int(x) for x in det)
        cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2)
        cv2.putText(im2show,
                    '%s: %.3f' % (classes[i], scores[i]),
                    (det[0], det[1] + 15),
                    cv2.FONT_HERSHEY_PLAIN,
                    1.0, (0, 0, 255),
                    thickness=1)
    cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
Exemple #2
0
def main():
    global args
    print "Loading testing set..."
    # train_set = visual_genome(args.dataset_option, 'train')
    test_set = visual_genome('small', 'test')
    print "Done."

    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=8,
                                              pin_memory=True)
    net = RPN(args.use_kmeans_anchors)
    network.load_net('./output/RPN/RPN_relationship_best_kmeans.h5', net)
    net.cuda()

    # best_recall = np.array([0.0, 0.0])

    # Testing
    recall = test(test_loader, net)

    print(
        'Recall: '
        'object: {recall[0]: .3f}%'
        'relationship: {recall[1]: .3f}%'.format(recall=recall * 100))
def test():
    import os
    im_file = 'demo/00001.jpg'
    # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg'
    # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg'
    image = cv2.imread(im_file)

    model_file = './model/VGGnet_fast_rcnn_iter_70000.h5'
    # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5'
    # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5'
    detector = FasterRCNN()
    network.load_net(model_file, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector)
    # print('save model succ')

    t = Timer()
    t.tic()
    # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255
    dets, scores, classes = detector.detect(image, 0.7)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    for i, det in enumerate(dets):
        det = tuple(int(x) for x in det)
        cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2)
        cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN,
                    1.0, (0, 0, 255), thickness=1)
    cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
Exemple #4
0
def main():
    global args
    print "Loading training set and testing set..."
    # train_set = visual_genome(args.dataset_option, 'train')
    test_set = visual_genome('small', 'test')
    print "Done."

    # train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=8,
                                              pin_memory=True)
    net = RPN(args.use_normal_anchors)
    network.load_net('./output/RPN/RPN_region_best.h5', net)
    # network.set_trainable(net.features, requires_grad=False)
    net.cuda()

    best_recall = np.array([0.0, 0.0])

    # Testing
    recall = test(test_loader, net)

    print(
        'Recall: '
        'object: {recall[0]: .3f}%% (Best: {best_recall[0]: .3f}%%)'
        'relationship: {recall[1]: .3f}%% (Best: {best_recall[1]: .3f}%%)'.
        format(recall=recall * 100, best_recall=best_recall * 100))
    def __init__(self, weights=None):
        if weights is None:
            if not os.path.exists('weights'):
                os.mkdir('weights')
            download_url = 'https://github.com/ArnholdInstitute/ColdSpots/releases/download/1.0/faster-rcnn.zip'
            if not os.path.exists('weights/faster-rcnn'):
                print('Downloading weights for faster-rcnn')
                if not os.path.exists(os.path.join('weights/faster-rcnn.zip')):
                    check_output([
                        'wget', download_url, '-O', 'weights/faster-rcnn.zip'
                    ])
                print('Unzipping...')
                check_output(
                    ['unzip', 'weights/faster-rcnn.zip', '-d', 'weights'])
            description = json.load(
                open('weights/faster-rcnn/description.json'))
            weights = os.path.join('weights/faster-rcnn',
                                   description['weights'])
            print('Building model...')

        self.model = FasterRCNNModel(classes=['__backround__', 'building'],
                                     debug=False)
        network.load_net(weights, self.model)

        self.model.cuda()
        self.model.eval()
Exemple #6
0
def main():
    global args
    print "Loading training set and testing set..."
    train_set = visual_genome(args.dataset_option, 'train')
    test_set = visual_genome('small', 'test')
    print "Done."

    train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True)
    net = RPN(not args.use_normal_anchors)
    if args.resume_training:
        print 'Resume training from: {}'.format(args.resume_model)
        if len(args.resume_model) == 0:
            raise Exception('[resume_model] not specified')
        network.load_net(args.resume_model, net)
        optimizer = torch.optim.SGD([
                {'params': list(net.parameters())[26:]}, 
                ], lr=args.lr, momentum=args.momentum, weight_decay=0.0005)
    else:
        print 'Training from scratch...Initializing network...'
        optimizer = torch.optim.SGD(list(net.parameters())[26:], lr=args.lr, momentum=args.momentum, weight_decay=0.0005)

    network.set_trainable(net.features, requires_grad=False)
    net.cuda()

    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    best_recall = 0.0
    
    for epoch in range(0, args.max_epoch):
        
        # Training
        # train(train_loader, net, optimizer, epoch)

        # Testing
        recall, RPN_precision, RPN_recall = test(test_loader, net)
        print('Epoch[{epoch:d}]: '
              'Recall: '
              'object: {recall: .3f}%% (Best: {best_recall: .3f}%%)'.format(
               epoch = epoch, recall=recall * 100, best_recall=best_recall * 100))
        print('object: {precision: .3f}%% '
              'object: {recall: .3f}%% '.format(precision=RPN_precision*100, recall=RPN_recall*100))

        # update learning rate
        if epoch % args.step_size == 0:
            args.disable_clip_gradient = True
            args.lr /= 10
            for param_group in optimizer.param_groups:
                param_group['lr'] = args.lr

        save_name = os.path.join(args.output_dir, '{}_epoch_{}.h5'.format(args.model_name, epoch))
        network.save_net(save_name, net)
        print('save model: {}'.format(save_name))

        if np.all(recall > best_recall):
            best_recall = recall
            save_name = os.path.join(args.output_dir, '{}_best.h5'.format(args.model_name, epoch))
            network.save_net(save_name, net)
Exemple #7
0
def load_model(model_file_path):

    detector = FasterRCNN()
    network.load_net(model_file_path, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    return detector
Exemple #8
0
def build_extractor(model_file, classes=None):
    if classes is None:
        extractor = FasterRCNN()
    else:
        extractor = FasterRCNN(classes)
    extractor.cuda()
    extractor.eval()
    network.load_net(model_file, extractor)
    print('load model successfully!')
    return extractor
Exemple #9
0
def test():
    import os
    im_file = 'demo/004545.jpg'
    # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg'
    # im_file = '/media/longc/Data/data/2DMOT2015/test/ETH-Crossing/img1/000100.jpg'
    image = cv2.imread(im_file)

    # model_file = './VGGnet_fast_rcnn_iter_70000.h5'
    # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5'
    # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5'
    model_file = './models/saved_model_max/faster_rcnn_100000.h5'
    detector = FasterRCNN()
    network.load_net(model_file, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector)
    # print('save model succ')

    t = Timer()
    t.tic()
    # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255
    dets, scores, classes = detector.detect(image, 0.7)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    img = mpimg.imread(im_file)
    # Create figure and axes
    fig, ax = plt.subplots(1)

    # Display the image
    ax.imshow(img)
    # Create a Rectangle patch
    for i, det in enumerate(dets):
        w = det[2] - det[0]
        h = det[3] - det[1]
        rect = patches.Rectangle(det[0:2],
                                 w,
                                 h,
                                 linewidth=1,
                                 edgecolor='r',
                                 facecolor='none')
        # text
        plt.text(det[0], det[1], '%s: %.3f' % (classes[i], scores[i]))

        # Add the patch to the Axes
        ax.add_patch(rect)

    plt.show()
    print('aa')
Exemple #10
0
def test(visualize=False):
    import os
    im_file = 'data/cervix/train/Type_2/1381.jpg'
    im_name = im_file.split('/')[-1]
    image = cv2.imread(im_file)

    # model_file = 'models/VGGnet_fast_rcnn_iter_70000.h5'
    model_file = 'models/saved_model3/faster_rcnn_100000.h5'
    expm = model_file.split('/')[-1].split('.')[0]
    expm_dir = os.path.join('demo', expm)
    if not os.path.exists(expm_dir):
        os.makedirs(expm_dir)

    detector = FasterRCNN()
    network.load_net(model_file, detector)
    detector.cuda()
    detector.eval(
    )  # set model in evaluation mode, has effect on Dropout and Batchnorm. Use train() to set train mode.
    print('load model successfully!')

    # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector)
    # print('save model succ')

    t = Timer()
    t.tic()
    # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255
    dets, scores, classes = detector.detect(image, 0.7)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    for i, det in enumerate(dets):
        det = tuple(int(x) for x in det)
        cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 4)
        cv2.putText(im2show,
                    '%s: %.3f' % (classes[i], scores[i]),
                    (det[0], det[1] + 15),
                    cv2.FONT_HERSHEY_PLAIN,
                    1.0, (0, 0, 255),
                    thickness=1)
    cv2.imwrite(os.path.join('demo', expm, im_name), im2show)

    if visualize:
        im2show = cv2.resize(im2show,
                             None,
                             None,
                             fx=0.15,
                             fy=0.15,
                             interpolation=cv2.INTER_LINEAR)
        cv2.imshow('demo', im2show)
        cv2.waitKey(0)
def test():
    import os
    img_file = 'demo/images.jpeg'
    image = cv2.imread(img_file)

    #imdb_name = 'CaltechPedestrians_train'
    imdb_name = 'coco_2017_train'
    #imdb_name = 'voc_2007_trainval'
    imdb = get_imdb(imdb_name)
    cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
    model_dir = 'data/pretrained_model/'
    #pre_model_name = 'VGGnet_fast_rcnn_iter_70000.h5'
    pre_model_name = 'coco_2017_train_10_vgg16_0.7_b1.h5'
    #pre_model_name = 'CaltechPedestrians_train_1_vgg16_0.7_b1.h5'
    pretrained_model = model_dir + pre_model_name
    cfg_from_file(cfg_file)
    print(imdb.classes)
    if 'vgg16' in pre_model_name.split('_'):
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    elif 'resnet50' in pre_model_name.split('_'):
        detector = FasterRCNN_RES(classes=imdb.classes, debug=False)
    else:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    network.load_net(pretrained_model, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    blob = init_data(is_cuda=True)

    t = Timer()
    t.tic()

    dets, scores, classes = detector.detect(image,
                                            blob,
                                            thr=0.7,
                                            nms_thresh=0.3)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    for i, det in enumerate(dets):
        det = tuple(int(x) for x in det)
        cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2)
        cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15),\
                    cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1)

    cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
    cv2.imshow('demo', im2show)
    cv2.waitKey(0)
Exemple #12
0
 def analysis_video(self, result_dir):
     
     self.statusbar_stringvar.set('Analysis..Please wait..')
     model_file = 'model.h5'
     detector = FasterRCNN()
     network.load_net(model_file, detector)
     detector.cuda()
     detector.eval()
     print('load model successfully!')
     
     info_dict = {}
     info_dict['pictures'] = []
     for index in range(len(self.image_list)):
         accuracy = 0.
         pic_info = {}
         pic_info['objects'] = []
         dets, scores, classes = detector.detect(self.image_list[index], 0.8)
         im2show = np.copy(self.image_list[index])
         for i, det in enumerate(dets):
             object_info = {}
             det = tuple(int(x) for x in det)
             cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2)
             cv2.putText(im2show, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), cv2.FONT_HERSHEY_PLAIN,
                     1.0, (0, 0, 255), thickness=1)
             accuracy += scores[i]
             #object info initial
             object_info['name'] = classes[i]
             object_info['accuracy'] = scores[i]
             object_info['bbox'] = det
             pic_info['objects'].append(object_info)
             
         # pic_info initial
         
         pic_info['filename'] = os.path.basename(self.video_path).split('.')[0] + '_' + str(index + 1) + '.jpg'
         pic_info['size'] = im2show.shape
         info_dict['pictures'].append(pic_info)
         
         cv2.imwrite(os.path.join(result_dir, pic_info['filename']), im2show)
         self.view_table.update(index + 1, **{
                 'name': pic_info['filename'],
                 'accuracy': accuracy / len(classes),
                 'state': 'yes'
             })
     self.statusbar_stringvar.set('Analysis done!')
     return info_dict
Exemple #13
0
def main():
	global args
	print "Loading training set and testing set..."
	# train_set = visual_genome(args.dataset_option, 'train')
	test_set = visual_genome('small', 'test')
	object_classes = test_set.object_classes
	print "Done."

	# train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True)
	test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True)
	net = FasterRCNN(use_kmeans_anchors=args.use_kmeans_anchors, n_classes=len(object_classes), model=args.base_model)
	network.load_net('./output/detection/Faster_RCNN_small_vgg_12epoch_epoch_11.h5', net)
	# network.load_net('./output/detection/RPN_object1_best.h5', net)
	# network.set_trainable(net.features, requires_grad=False)
	net.cuda()

	# Testing
	recall = test(test_loader, net)

	print('Recall: '
	      'object: {recall: .3f}%'.format(recall=recall*100))
def test():
    # Set up dataloader
    data_loader = DAVIS_seq_dataloader(split='val')

    model_file = './model/VGGnet_fast_rcnn_iter_70000.h5'
    detector = FasterRCNN()
    network.load_net(model_file, detector)
    detector.cuda()
    detector.eval()
    print('Load Faster R-CNN model successfully!')

    # unet_model = './model/vgg_unet_1e-4_500.h5'
    # unet = UNet()
    # network.load_net(unet_model, unet)
    # unet.cuda()
    # network.weights_normal_init(unet, dev=0.01)
    # unet.load_from_faster_rcnn_h5(h5py.File(model_file))
    criterion_bce = torch.nn.BCELoss().cuda()
    weight_decay = 5e-5
    # optimizer = torch.optim.SGD(list(unet.parameters())[26:], lr=1e-4, weight_decay=weight_decay)
    # print('Load U-Net model successfully!')

    crop_set = []
    # Iterate
    for i in range(data_loader.num_seq):
        # Get the first frame info
        seq = data_loader.seq_list[data_loader.out_pointer]
        seq_len = data_loader.seq_len[seq]
        img_blobs, seg_blobs = data_loader.get_next_minibatch()
        img = img_blobs[0,:,:,:]
        im_data, im_scales = detector.get_image_blob(img)
        im_info = np.array([[im_data.shape[1], im_data.shape[2], im_scales[0]]], dtype=np.float32)
        # Get the category of the object in the first frame
        rmin, rmax, cmin, cmax = bbox(seg_blobs[0,:,:,0])
        features, rois = detector(im_data, im_info, rpn_only=True)
        new_rois_np = np.array([[0, cmin, rmin, cmax, rmax]], dtype=np.float32)
        new_rois_t = torch.from_numpy(new_rois_np).cuda()
        new_rois = Variable(new_rois_t, requires_grad=False)
        pooled_features = detector.roi_pool(features, new_rois)
        x = pooled_features.view(pooled_features.size()[0], -1)
        x = detector.fc6(x)
        x = detector.fc7(x)
        cls_score = detector.score_fc(x)
        cls_prob = F.softmax(cls_score)
        bbox_pred = detector.bbox_fc(x)
        cls_prob_np = cls_prob.cpu().data.numpy()
        bbox_pred_np = bbox_pred.cpu().data.numpy()
        cls_idx = cls_prob_np.argmax()
        cls_conf = cls_prob_np.max()

        # Overfit U-Net with the first frame
        # for i in range(100):
        #     unet.train()
        #     img_t = torch.from_numpy(img_blobs).permute(0,3,1,2).float().cuda()
        #     img_v = Variable(img_t, requires_grad=False)
        #     seg_t = torch.from_numpy(seg_blobs).permute(0,3,1,2).float().cuda()
        #     seg_v = Variable(seg_t, requires_grad=False)
        #     pred = unet(img_v)
            # loss = criterion_bce(pred, seg_v)
        #     pred_view = pred.view(-1, 1)
        #     seg_view = seg_v.view(-1, 1)    
        #     EPS = 1e-6
        #     loss = 0.6 * seg_view.mul(torch.log(pred_view+EPS)) + 0.4 * seg_view.mul(-1).add(1).mul(torch.log(1-pred+EPS))
        #     loss = -torch.mean(loss)
        #     loss_val = loss.data[0]
        #     optimizer.zero_grad()
        #     loss.backward()
        #     optimizer.step()
        #     print('{}/100: {}'.format(i, loss_val))
        # unet.eval()

        # Merge region proposals overlapping with last frame proposal
        for j in range(1, seq_len):
            img_blobs, _ = data_loader.get_next_minibatch()
            img = img_blobs[0,:,:,:]
            im_data, im_scales = detector.get_image_blob(img)
            # 300 x 5, the first elements are useless here
            features, rois = detector(im_data, im_info, rpn_only=True)
            x1, y1, x2, y2 = merge_rois((rmin, rmax, cmin, cmax), rois.cpu().data.numpy(), thres=0.75)

            # Have overlapping proposals
            if x1 is not None:
                # Send to following layers to refine the bbox
                new_rois_np = np.array([[0, x1, y1, x2, y2]], dtype=np.float32)
                new_rois_t = torch.from_numpy(new_rois_np).cuda()
                new_rois = Variable(new_rois_t, requires_grad=False)
                pooled_features = detector.roi_pool(features, new_rois)
                x = pooled_features.view(pooled_features.size()[0], -1)
                x = detector.fc6(x)
                x = detector.fc7(x)
                cls_score = detector.score_fc(x)
                cls_prob = F.softmax(cls_score)
                bbox_pred = detector.bbox_fc(x)
                cls_prob_np = cls_prob.cpu().data.numpy()
                bbox_pred_np = bbox_pred.cpu().data.numpy()

                # Only regress bbox when confidence is greater than 0.8
                if cls_prob_np.max() > 0.8 and cls_prob_np.argmax() != 0:
                    keep = cls_prob_np.argmax()
                    pred_boxes, scores, classes = detector.interpret_faster_rcnn(cls_prob, bbox_pred, new_rois, im_info, im_data.shape, 0.8)

                    cx = (x1 + x2) / 2
                    cy = (y1 + y2) / 2
                    width = x2 - x1 + 1
                    height = y2 - y1 + 1
                    dx = bbox_pred_np[0,keep*4+0]
                    dy = bbox_pred_np[0,keep*4+1]
                    dw = bbox_pred_np[0,keep*4+2]
                    dh = bbox_pred_np[0,keep*4+3]
            
                    pred_x = dx * width + cx
                    pred_y = dy * height + cy
                    pred_w = np.exp(dw) * width
                    pred_h = np.exp(dh) * height

                    x1 = pred_x - pred_w / 2
                    x2 = pred_x + pred_w / 2
                    y1 = pred_y - pred_h / 2
                    y2 = pred_y + pred_h / 2

            # No overlapping proposals
            if x1 is None:
                # Using Faster R-CNN again to find potential objects
                dets, scores, classes = detector.detect(img, 0.6)
                # Cannot find any salient object
                if dets.shape[0] == 0:
                    x1, y1, x2, y2 = cmin, rmin, cmax, rmax
                else:
                    x1 = dets[:,0]
                    y1 = dets[:,1]
                    x2 = dets[:,2]
                    y2 = dets[:,3]
                    pred_area = (x2 - x1 + 1) * (y2 - y1 + 1)
                    init_area = (cmax - cmin + 1) * (rmax - rmin + 1)
                    xx1 = np.maximum(x1, cmin)
                    xx2 = np.minimum(x2, cmax)
                    yy1 = np.maximum(y1, rmin)
                    yy2 = np.minimum(y2, rmax)
                    inter = (xx2 - xx1 + 1) * (yy2 - yy1 + 1)
                    ovr = inter / (pred_area + init_area - inter)
                    # If there is overlapping, choose the largest IoU bbox
                    try:
                        ovr = ovr[ovr > 0.3]
                        ovr_idx = np.argsort(ovr)[-1]
                        x1 = dets[ovr_idx,0]
                        y1 = dets[ovr_idx,1]
                        x2 = dets[ovr_idx,2]
                        y2 = dets[ovr_idx,3]
                    # Else, choose the highest objectness score one
                    except:
                        if cls_idx == 0:
                            temp_idx = scores.argmax()
                            x1 = dets[temp_idx,0]
                            y1 = dets[temp_idx,1]
                            x2 = dets[temp_idx,2]
                            y2 = dets[temp_idx,3]
                        else:
                            cx = (x1 + x2) / 2
                            cy = (y1 + y2) / 2
                            cc = (cmin + cmax) / 2
                            cr = (rmin + rmax) / 2
                            dist = np.sqrt(np.square(cx-cc) + np.square(cy-cr))
                            dist_idx = np.argsort(dist)
                            for di in dist_idx:
                                if classes[di] == _CLASSES[cls_idx]:
                                    x1 = dets[di,0]
                                    y1 = dets[di,1]
                                    x2 = dets[di,2]
                                    y2 = dets[di,3]

            # Crop the region and send it to U-Net
            try:
                x1 = int(max(x1, 0))
                x2 = int(min(x2, im_data.shape[2]))
                y1 = int(max(y1, 0))
                y2 = int(min(y2, im_data.shape[1]))
            except:
                x1 = int(max(x1[0], 0))
                x2 = int(min(x2[0], im_data.shape[2]))
                y1 = int(max(y1[0], 0))
                y2 = int(min(y2[0], im_data.shape[1]))

            # MEAN_PIXEL = np.array([103.939, 116.779, 123.68])
            # crop = img_blobs[:, y1:y2+1, x1:x2+1, :] - MEAN_PIXEL
            # crop = img_blobs[:,:,:,:] - MEAN_PIXEL
            # crop_v = Variable(torch.from_numpy(crop).permute(0, 3, 1, 2).cuda(), requires_grad=False)
            # pred = unet(crop_v)
            # pred_np = pred.cpu().data.numpy()[0,0,:,:]
            # pred_np[pred_np < 0.5] = 0
            # pred_np[pred_np >= 0.5] = 1
            # pred_np = pred_np * 255
            # res = pred_np.astype(int)
            # cv2.imwrite('test.png', res)

            if y2 - y1 <= 1 or x2 - x1 <= 1:
                ipdb.set_trace()
            cv2.imwrite(os.path.join('demo', 'crop_{}_{}.png'.format(i, j)), img[y1:y2+1,x1:x2+1,:])

            rmin = y1
            rmax = y2
            cmin = x1
            cmax = x2

            im2show = np.copy(img)
            cv2.rectangle(im2show, (int(x1),int(y1)), (int(x2),int(y2)), (0, 255, 0), 2)
            cv2.imwrite(os.path.join('demo', '{}_{}.jpg'.format(i, j)), im2show)
            temp = [i, j, x1, y1, x2, y2]
            crop_set.append(temp)

    # Save
    crop_set = np.array(crop_set)
    np.save('crop', crop_set)
        nms_time = _t['misc'].toc(average=False)

        print 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, detect_time, nms_time)
    df = pandas.DataFrame(all_boxes)
    df.columns = ['x1', 'y1', 'x2', 'y2', 'score', 'image_id']
    df.to_csv('predictions.csv', index=False)
    print('Total time: %.4f, per image: %.4f' %
          (total_time, total_time / num_images))


if __name__ == '__main__':

    # load net
    net = FasterRCNN(classes=['__backround__', 'building'], debug=False)
    network.load_net(args.weights, net)
    print('load model successfully!')

    net.cuda()
    net.eval()

    val_data = json.load(open(args.test_boxes))

    # evaluation
    test_net(net,
             val_data,
             max_per_image,
             thresh=thresh,
             vis=vis,
             data_dir='../data')
def test():
    import os
    im_file = 'demo/004545.jpg'
    # im_file = 'data/VOCdevkit2007/VOC2007/JPEGImages/009036.jpg'
    # im_file = '/disk2/data/ILSVRC2015/DET/Data/DET/val/ILSVRC2013_val_00004599.JPEG'
    image = cv2.imread(im_file)

    model_file = '/disk2/data/pytorch_models/trained_models/resnet152_imgsize1000/saved_model3/faster_rcnn_200000.h5'
    # model_file = '/media/longc/Data/models/faster_rcnn_pytorch3/faster_rcnn_100000.h5'
    # model_file = '/media/longc/Data/models/faster_rcnn_pytorch2/faster_rcnn_2000.h5'

    classes = np.array(['__background__',\
                         'n02672831', 'n02691156', 'n02219486', 'n02419796', 'n07739125', 'n02454379',\
                         'n07718747', 'n02764044', 'n02766320', 'n02769748', 'n07693725', 'n02777292',\
                         'n07753592', 'n02786058', 'n02787622', 'n02799071', 'n02802426', 'n02807133',\
                         'n02815834', 'n02131653', 'n02206856', 'n07720875', 'n02828884', 'n02834778',\
                         'n02840245', 'n01503061', 'n02870880', 'n02879718', 'n02883205', 'n02880940',\
                         'n02892767', 'n07880968', 'n02924116', 'n02274259', 'n02437136', 'n02951585',
                         'n02958343', 'n02970849', 'n02402425', 'n02992211', 'n01784675', 'n03000684',\
                         'n03001627', 'n03017168', 'n03062245', 'n03063338', 'n03085013', 'n03793489',\
                         'n03109150', 'n03128519', 'n03134739', 'n03141823', 'n07718472', 'n03797390',\
                         'n03188531', 'n03196217', 'n03207941', 'n02084071', 'n02121808', 'n02268443',\
                         'n03249569', 'n03255030', 'n03271574', 'n02503517', 'n03314780', 'n07753113',\
                         'n03337140', 'n03991062', 'n03372029', 'n02118333', 'n03394916', 'n01639765',\
                         'n03400231', 'n02510455', 'n01443537', 'n03445777', 'n03445924', 'n07583066',\
                         'n03467517', 'n03483316', 'n03476991', 'n07697100', 'n03481172', 'n02342885',\
                         'n03494278', 'n03495258', 'n03124170', 'n07714571', 'n03513137', 'n02398521',\
                         'n03535780', 'n02374451', 'n07697537', 'n03584254', 'n01990800', 'n01910747',\
                         'n01882714', 'n03633091', 'n02165456', 'n03636649', 'n03642806', 'n07749582',\
                         'n02129165', 'n03676483', 'n01674464', 'n01982650', 'n03710721', 'n03720891',\
                         'n03759954', 'n03761084', 'n03764736', 'n03770439', 'n02484322', 'n03790512',\
                         'n07734744', 'n03804744', 'n03814639', 'n03838899', 'n07747607', 'n02444819',\
                         'n03908618', 'n03908714', 'n03916031', 'n00007846', 'n03928116', 'n07753275',\
                         'n03942813', 'n03950228', 'n07873807', 'n03958227', 'n03961711', 'n07768694',\
                         'n07615774', 'n02346627', 'n03995372', 'n07695742', 'n04004767', 'n04019541',\
                         'n04023962', 'n04026417', 'n02324045', 'n04039381', 'n01495701', 'n02509815',\
                         'n04070727', 'n04074963', 'n04116512', 'n04118538', 'n04118776', 'n04131690',\
                         'n04141076', 'n01770393', 'n04154565', 'n02076196', 'n02411705', 'n04228054',\
                         'n02445715', 'n01944390', 'n01726692', 'n04252077', 'n04252225', 'n04254120',\
                         'n04254680', 'n04256520', 'n04270147', 'n02355227', 'n02317335', 'n04317175',\
                         'n04330267', 'n04332243', 'n07745940', 'n04336792', 'n04356056', 'n04371430',\
                         'n02395003', 'n04376876', 'n04379243', 'n04392985', 'n04409515', 'n01776313',\
                         'n04591157', 'n02129604', 'n04442312', 'n06874185', 'n04468005', 'n04487394',\
                         'n03110669', 'n01662784', 'n03211117', 'n04509417', 'n04517823', 'n04536866',\
                         'n04540053', 'n04542943', 'n04554684', 'n04557648', 'n04530566', 'n02062744',\
                         'n04591713', 'n02391049'])

    detector = FasterRCNN(classes)
    network.load_net(model_file, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')

    # network.save_net(r'/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5', detector)
    # print('save model succ')

    t = Timer()
    t.tic()
    # image = np.zeros(shape=[600, 800, 3], dtype=np.uint8) + 255
    dets, scores, classes = detector.detect(image, 0.)
    print "classes:{},scores:{}".format(classes, scores)
    runtime = t.toc()
    print('total spend: {}s'.format(runtime))

    im2show = np.copy(image)
    for i, det in enumerate(dets):
        det = tuple(int(x) for x in det)
        cv2.rectangle(im2show, det[0:2], det[2:4], (255, 205, 51), 2)
        cv2.putText(im2show,
                    '%s: %.3f' % (classes[i], scores[i]),
                    (det[0], det[1] + 15),
                    cv2.FONT_HERSHEY_PLAIN,
                    1.0, (0, 0, 255),
                    thickness=1)
    cv2.imwrite(os.path.join('demo', 'out.jpg'), im2show)
def test():
    import os
    imdb_name = 'CaltechPedestrians_test'
    imdb = get_imdb(imdb_name)
    cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
    model_dir = 'data/pretrained_model/'
    pre_model_name = 'CaltechPedestrians_train_10_vgg16_0.7_b3.h5'
    pretrained_model = model_dir + pre_model_name
    cfg_from_file(cfg_file)

    if 'vgg16' in pre_model_name.split('_'):
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
    elif 'res' in pre_model_name.split('_'):
        detector = FasterRCNN_RES(classes=imdb.classes, debug=False)
    else:
        detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)

    network.load_net(pretrained_model, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')
    blob = init_data(is_cuda=True)

    t = Timer()
    t.tic()
    cap = cv2.VideoCapture(video_file)
    init = True
    while (cap.isOpened()):
        ret, frame = cap.read()
        if ret:
            p = Timer()
            p.tic()
            if init:
                cnt = 1
                fourcc = cv2.VideoWriter_fourcc(*'XVID')
                out = cv2.VideoWriter(output_file, fourcc, fps,
                                      (frame.shape[1], frame.shape[0]))
                init = False
            try:
                dets, scores, classes = detector.detect(frame,
                                                        blob,
                                                        thr=0.7,
                                                        nms_thresh=0.3)
                frame = np.copy(frame)
                for i, det in enumerate(dets):
                    det = tuple(int(x) for x in det)
                    cv2.rectangle(frame, det[0:2], det[2:4], (255, 205, 51), 2)
                    # cv2.putText(frame, '%s: %.3f' % (classes[i], scores[i]), (det[0], det[1] + 15), \
                    #             cv2.FONT_HERSHEY_PLAIN, 1.0, (0, 0, 255), thickness=1)
                cv2.imshow('demo', frame)
                cv2.waitKey(1000)
                cv2.destroyAllWindows()
            except IndexError as e:
                pass
            finally:
                print(cnt, '-frame : {:.3f}s'.format(p.toc()))
                cnt += 1
                out.write(frame)
        else:
            break
    runtime = t.toc()
    print('{} frames  /  total spend: {}s  /  {:2.1f} fps'.format(
        cnt, int(runtime), cnt / runtime))
    cap.release()
    out.release()
Exemple #18
0
def main():
    global args, optimizer_select
    # To set the model name automatically
    print args
    lr = args.lr
    args = get_model_name(args)
    print 'Model name: {}'.format(args.model_name)

    # To set the random seed
    random.seed(args.seed)
    torch.manual_seed(args.seed + 1)
    torch.cuda.manual_seed(args.seed + 2)

    # print("Loading training set and testing set..."),
    # train_set = visual_genome(args.dataset_option, 'train')
    # test_set = visual_genome('small', 'test')

    # print("Done.")

    # train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True)
    # test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True)

    image_set = prepare_image(datapath=args.total_image_path)
    image_loader = torch.utils.data.DataLoader(image_set,
                                               batch_size=1,
                                               shuffle=False,
                                               num_workers=8,
                                               pin_memory=True)

    # Model declaration
    net = Hierarchical_Descriptive_Model(
        nhidden=args.mps_feature_len,
        n_object_cats=5,
        n_predicate_cats=5,
        n_vocab=5,
        voc_sign=5,
        max_word_length=5,
        MPS_iter=args.MPS_iter,
        use_language_loss=not args.disable_language_model,
        object_loss_weight=5,
        predicate_loss_weight=5,
        dropout=args.dropout,
        use_kmeans_anchors=not args.use_normal_anchors,
        gate_width=args.gate_width,
        nhidden_caption=args.nhidden_caption,
        nembedding=args.nembedding,
        rnn_type=args.rnn_type,
        rnn_droptout=args.caption_use_dropout,
        rnn_bias=args.caption_use_bias,
        use_region_reg=args.region_bbox_reg,
        use_kernel=args.use_kernel_function)

    params = list(net.parameters())
    for param in params:
        print param.size()
    print net

    # To group up the features
    vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features = group_features(
        net)

    # Setting the state of the training model
    net.cuda()
    net.train()
    logger_path = "log/logger/{}".format(args.model_name)
    if os.path.exists(logger_path):
        shutil.rmtree(logger_path)
    configure(logger_path, flush_secs=5)  # setting up the logger

    network.set_trainable(net, False)
    #  network.weights_normal_init(net, dev=0.01)
    if args.finetune_language_model:
        print 'Only finetuning the language model from: {}'.format(
            args.resume_model)
        args.train_all = False
        if len(args.resume_model) == 0:
            raise Exception('[resume_model] not specified')
        network.load_net(args.resume_model, net)
        optimizer_select = 3

    elif args.load_RPN:
        print 'Loading pretrained RPN: {}'.format(args.saved_model_path)
        args.train_all = False
        network.load_net(args.saved_model_path, net.rpn)
        net.reinitialize_fc_layers()
        optimizer_select = 1

    elif args.resume_training:
        print 'Resume training from: {}'.format(args.resume_model)
        if len(args.resume_model) == 0:
            raise Exception('[resume_model] not specified')
        network.load_net(args.resume_model, net)
        args.train_all = True
        optimizer_select = 2

    else:
        print 'Training from scratch.'
        net.rpn.initialize_parameters()
        net.reinitialize_fc_layers()
        optimizer_select = 0
        args.train_all = True

    optimizer = network.get_optimizer(lr, optimizer_select, args,
                                      vgg_features_var, rpn_features,
                                      hdn_features, language_features)

    target_net = net
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    top_Ns = [50, 100]
    best_recall = np.zeros(len(top_Ns))

    extract_features(image_loader, net)
Exemple #19
0
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

# load net
print "Creating net..."
net = FasterRCNN(classes=imdb.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)

print "Loading weight..."
# pretrained_model = pytorchpath+'data/pretrained_model/VGG_imagenet.npy'
# network.load_pretrained_npy(net, pretrained_model)
# model_file = '/media/longc/Data/models/VGGnet_fast_rcnn_iter_70000.h5'
# model_file = 'models/saved_model3/faster_rcnn_60000.h5'
model_file = '/home/jguerry/workspace/jg_dl/jg_pyt/models/sunrgbd_train_rgb_i_100_8bits/faster_rcnn_200000.h5'
network.load_net(model_file, net)

print "Configuring parameters..."
# exp_name = 'vgg16_02-19_13-24'
start_step = 200000
end_step = 400000
lr_decay_steps = {220000, 240000, 260000, 280000, 300000, 350000}
lr_decay = 1. / 10
rand_seed = 1024
lr = 0.0001
# network.weights_normal_init([net.bbox_fc, net.score_fc, net.fc6, net.fc7], dev=0.01)

disp_interval = 1000
save_interval = 20000

if rand_seed is not None:
Exemple #20
0
                    thickness=1)
    im_name = os.path.basename(image_file)
    print(os.path.join('demo/det_results', im_name))
    cv2.imwrite(os.path.join('demo/det_results', im_name), im2show)
    #cv2.imshow('demo', im2show)
    #cv2.waitKey(0)


def folder_test(net, folder):
    txt_file = folder + 'JPEGImages/file_name.txt'

    with open(txt_file) as f:
        for line in f:
            img_path = folder + 'JPEGImages/' + line.strip('\n') + '.JPG'
            anno_path = folder + 'Annotations/' + line.strip('\n') + '.xml'
            image_test(net, img_path, anno_path)


if __name__ == '__main__':
    model_file = 'models/saved_model3/faster_rcnn_100000.h5'
    detector = FasterRCNN()
    network.load_net(model_file, detector)
    detector.cuda()
    detector.eval()
    print('load model successfully!')
    #image_file = 'demo/000001.JPG'
    #image_test(detector, image_file, None)

    folder = '/data/jmtian/PlateData/PVW_WRM_CUT/'
    folder_test(detector, folder)
	# create VGG model for state featurization
	print("Loading image embedding model...")
        if args.image_embedding_model_type == "resnet":
	    im_emb_model = ResNet50()
        elif args.image_embedding_model_type == "vgg":
            im_emb_model = VGG16()
        else:
            print("--image_embedding_model_type must be either resnet or vgg")
            sys.exit(0)
	print("Done!")

	# create Faster-RCNN model for state featurization
	print("Loading Fast-RCNN...")
	model_file = 'VGGnet_fast_rcnn_iter_70000.h5'
	model_frcnn = FasterRCNN()
	network.load_net(model_file, model_frcnn)
	model_frcnn.cuda()
	model_frcnn.eval()
	print("Done!")

	# create DQN's for the next object, predicates, and attributes
	print("Creating DQN models...")
	DQN_next_object_main = DQN_MLP(2048*3+9600 + parameters["maximum_num_entities_per_image"], 1)
	DQN_next_object_target = DQN_MLP(2048*3+9600 + parameters["maximum_num_entities_per_image"], 1)
	DQN_predicate_main = DQN_MLP(2048*3+9600 + len(semantic_action_graph.predicate_nodes), 1)
	DQN_predicate_target = DQN_MLP(2048*3+9600 + len(semantic_action_graph.predicate_nodes), 1)
	DQN_attribute_main = DQN_MLP(2048*3+9600 + len(semantic_action_graph.attribute_nodes), 1)
	DQN_attribute_target = DQN_MLP(2048*3+9600 + len(semantic_action_graph.attribute_nodes), 1)
	print("Done!")

	# create shared optimizer
Exemple #22
0
def main():
    global args, optimizer_select
    # To set the model name automatically
    print args
    lr = args.lr
    args = get_model_name(args)
    print 'Model name: {}'.format(args.model_name)

    # To set the random seed
    random.seed(args.seed)
    torch.manual_seed(args.seed + 1)
    torch.cuda.manual_seed(args.seed + 2)

    print("Loading training set and testing set..."),
    train_set = visual_genome(args.dataset_option, 'train')
    test_set = visual_genome('small', 'test')
    print("Done.")

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=1,
                                               shuffle=True,
                                               num_workers=8,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=8,
                                              pin_memory=True)

    # Model declaration
    net = Hierarchical_Descriptive_Model(
        nhidden=args.mps_feature_len,
        n_object_cats=train_set.num_object_classes,
        n_predicate_cats=train_set.num_predicate_classes,
        n_vocab=train_set.voc_size,
        voc_sign=train_set.voc_sign,
        max_word_length=train_set.max_size,
        MPS_iter=args.MPS_iter,
        use_language_loss=not args.disable_language_model,
        object_loss_weight=train_set.inverse_weight_object,
        predicate_loss_weight=train_set.inverse_weight_predicate,
        dropout=args.dropout,
        use_kmeans_anchors=not args.use_normal_anchors,
        gate_width=args.gate_width,
        nhidden_caption=args.nhidden_caption,
        nembedding=args.nembedding,
        rnn_type=args.rnn_type,
        rnn_droptout=args.caption_use_dropout,
        rnn_bias=args.caption_use_bias,
        use_region_reg=args.region_bbox_reg,
        use_kernel=args.use_kernel_function)

    params = list(net.parameters())
    for param in params:
        print param.size()
    print net

    # To group up the features
    vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features = group_features(
        net)

    # Setting the state of the training model
    net.cuda()
    net.train()
    logger_path = "log/logger/{}".format(args.model_name)
    if os.path.exists(logger_path):
        shutil.rmtree(logger_path)
    configure(logger_path, flush_secs=5)  # setting up the logger

    network.set_trainable(net, False)
    #  network.weights_normal_init(net, dev=0.01)
    if args.finetune_language_model:
        print 'Only finetuning the language model from: {}'.format(
            args.resume_model)
        args.train_all = False
        if len(args.resume_model) == 0:
            raise Exception('[resume_model] not specified')
        network.load_net(args.resume_model, net)
        optimizer_select = 3

    elif args.load_RPN:
        print 'Loading pretrained RPN: {}'.format(args.saved_model_path)
        args.train_all = False
        network.load_net(args.saved_model_path, net.rpn)
        net.reinitialize_fc_layers()
        optimizer_select = 1

    elif args.resume_training:
        print 'Resume training from: {}'.format(args.resume_model)
        if len(args.resume_model) == 0:
            raise Exception('[resume_model] not specified')
        network.load_net(args.resume_model, net)
        args.train_all = True
        optimizer_select = 2

    else:
        print 'Training from scratch.'
        net.rpn.initialize_parameters()
        net.reinitialize_fc_layers()
        optimizer_select = 0
        args.train_all = True

    optimizer = network.get_optimizer(lr, optimizer_select, args,
                                      vgg_features_var, rpn_features,
                                      hdn_features, language_features)

    target_net = net
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    top_Ns = [50, 100]
    best_recall = np.zeros(len(top_Ns))

    if args.evaluate:
        recall = test(test_loader, net, top_Ns)
        print('======= Testing Result =======')
        for idx, top_N in enumerate(top_Ns):
            print(
                '[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)'
                .format(top_N=top_N,
                        recall=recall[idx] * 100,
                        best_recall=best_recall[idx] * 100))

        print('==============================')
    else:
        for epoch in range(0, args.max_epoch):
            # Training
            train(train_loader, target_net, optimizer, epoch)
            # snapshot the state
            save_name = os.path.join(
                args.output_dir,
                '{}_epoch_{}.h5'.format(args.model_name, epoch))
            network.save_net(save_name, net)
            print('save model: {}'.format(save_name))

            # Testing
            # network.set_trainable(net, False) # Without backward(), requires_grad takes no effect

            recall = test(test_loader, net, top_Ns)

            if np.all(recall > best_recall):
                best_recall = recall
                save_name = os.path.join(args.output_dir,
                                         '{}_best.h5'.format(args.model_name))
                network.save_net(save_name, net)
                print('\nsave model: {}'.format(save_name))

            print('Epoch[{epoch:d}]:'.format(epoch=epoch)),
            for idx, top_N in enumerate(top_Ns):
                print(
                    '\t[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)'
                    .format(top_N=top_N,
                            recall=recall[idx] * 100,
                            best_recall=best_recall[idx] * 100)),

            # updating learning policy
            if epoch % args.step_size == 0 and epoch > 0:
                lr /= 10
                args.lr = lr
                print '[learning rate: {}]'.format(lr)

                args.enable_clip_gradient = False
                if not args.finetune_language_model:
                    args.train_all = True
                    optimizer_select = 2
                # update optimizer and correponding requires_grad state
                optimizer = network.get_optimizer(lr, optimizer_select, args,
                                                  vgg_features_var,
                                                  rpn_features, hdn_features,
                                                  language_features)
        print ('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
            .format(i + 1, num_images, detect_time, nms_time))

        if vis:
            cv2.imshow('test', im2show)
            cv2.waitKey(1)

    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    print('=====Evaluating detections=====')  #评估检测结果,计算MAP。
    imdb.evaluate_detections(all_boxes, output_dir)


if __name__ == '__main__':
    # load data
    imdb = get_imdb(imdb_name)  #返回一个pascal_voc对象
    imdb.competition_mode(on=True)
    print("load data finished!")

    # load net
    net = FasterRCNN(classes=imdb.classes, debug=False)
    network.load_net(trained_model, net)  #加载训练好的模型。
    print('load model successfully!')

    # net.cuda()
    net.eval()

    # evaluation
    test_net(save_name, net, imdb, max_per_image, thresh=thresh, vis=vis)
Exemple #24
0
def main():
    global args, optimizer_select
    # To set the model name automatically
    print args
    lr = args.lr
    args = get_model_name(args)
    print 'Model name: {}'.format(args.model_name)

    # To set the random seed
    random.seed(args.seed)
    torch.manual_seed(args.seed + 1)
    torch.cuda.manual_seed(args.seed + 2)

    print("Loading training set and testing set...")
    train_set = visual_genome(args.dataset_option, 'train')
    test_set = visual_genome(args.dataset_option, 'test')
    print("Done.")

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=1,
                                               shuffle=True,
                                               num_workers=8,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=1,
                                              shuffle=True,
                                              num_workers=8,
                                              pin_memory=True)

    net = Hierarchical_Descriptive_Model(
        nhidden=args.mps_feature_len,
        n_object_cats=train_set.num_object_classes,
        n_predicate_cats=train_set.num_predicate_classes,
        MPS_iter=args.MPS_iter,
        object_loss_weight=train_set.inverse_weight_object,
        predicate_loss_weight=train_set.inverse_weight_predicate,
        dropout=args.dropout,
        use_kmeans_anchors=args.use_kmeans_anchors,
        base_model=args.base_model)  #True

    # params = list(net.parameters())
    # for param in params:
    #     print param.size()
    print net

    # Setting the state of the training model
    net.cuda()
    net.train()
    network.set_trainable(net, False)
    # network.weights_normal_init(net, dev=0.01)

    if args.resume_model:
        print 'Resume training from: {}'.format(args.HDN_model)
        if len(args.HDN_model) == 0:
            raise Exception('[resume_model] not specified')
        network.load_net(args.HDN_model, net)
        # network.load_net(args.RPN_model, net.rpn)
        args.train_all = True
        optimizer_select = 3

    elif args.load_RCNN:
        print 'Loading pretrained RCNN: {}'.format(args.RCNN_model)
        args.train_all = False
        network.load_net(args.RCNN_model, net.rcnn)
        optimizer_select = 2

    elif args.load_RPN:
        print 'Loading pretrained RPN: {}'.format(args.RPN_model)
        args.train_all = False
        network.load_net(args.RPN_model, net.rpn)
        net.reinitialize_fc_layers()
        optimizer_select = 1

    else:
        print 'Training from scratch.'
        net.rpn.initialize_parameters()
        net.reinitialize_fc_layers()
        optimizer_select = 0
        args.train_all = True

    # To group up the features
    # vgg_features_fix, vgg_features_var, rpn_features, hdn_features = group_features(net)
    basenet_features, rpn_features, rcnn_feature, hdn_features = group_features(
        net)
    optimizer = network.get_optimizer(lr, optimizer_select, args,
                                      basenet_features, rpn_features,
                                      rcnn_feature, hdn_features)

    target_net = net
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    top_Ns = [50, 100]
    best_recall = np.zeros(len(top_Ns))

    if args.evaluate:
        recall = test(test_loader, target_net, top_Ns,
                      train_set.object_classes)
        print('======= Testing Result =======')
        for idx, top_N in enumerate(top_Ns):
            print(
                '[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)'
                .format(top_N=top_N,
                        recall=recall[idx] * 100,
                        best_recall=best_recall[idx] * 100))

        print('==============================')
    else:
        for epoch in range(0, args.max_epoch):
            # Training
            train(train_loader, target_net, optimizer, epoch)
            # snapshot the state
            save_name = os.path.join(
                args.output_dir,
                '{}_epoch_{}.h5'.format(args.model_name, epoch))
            network.save_net(save_name, net)
            print('save model: {}'.format(save_name))

            recall = test(test_loader, target_net, top_Ns,
                          train_set.object_classes)

            if np.all(recall > best_recall):
                best_recall = recall
                save_name = os.path.join(args.output_dir,
                                         '{}_best.h5'.format(args.model_name))
                network.save_net(save_name, net)
                print('\nsave model: {}'.format(save_name))

            print('Epoch[{epoch:d}]:'.format(epoch=epoch)),
            for idx, top_N in enumerate(top_Ns):
                print(
                    '\t[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)'
                    .format(top_N=top_N,
                            recall=recall[idx] * 100,
                            best_recall=best_recall[idx] * 100))

            # updating learning policy
            if (epoch + 1) % args.step_size == 0 or (epoch + 1) % (
                    args.step_size + 2) == 0:
                lr /= 10
                args.lr = lr
                print '[learning rate: {}]'.format(lr)

                args.enable_clip_gradient = False
                args.train_all = False
                optimizer_select = 2
                # update optimizer and correponding requires_grad state
                optimizer = network.get_optimizer(lr, optimizer_select, args,
                                                  basenet_features,
                                                  rpn_features, rcnn_feature,
                                                  hdn_features)
Exemple #25
0
# load data
imdb = get_imdb(imdb_name)
rdl_roidb.prepare_roidb(imdb)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

# load net
net = RFCN(classes=imdb.classes, debug=_DEBUG)
#init_modules = [net.rpn.conv1, net.rpn.score_conv, net.rpn.bbox_conv, net.fc6, net.fc7, net.score_fc, net.bbox_fc]
#network.weights_normal_init(init_modules, dev=0.01)
network.weights_normal_init(net, dev=0.01)
network.load_pretrained_npy(net, pretrained_model)
if resume:
    pretrained_model_file = 'models/saved_model3/faster_rcnn_resnet101_20000.h5'
    network.load_net(pretrained_model_file, net)
    start_step = 20000
    print 'Resume training...'

net.cuda()
net.train()

params = list(net.parameters())
# optimizer = torch.optim.Adam(params[-8:], lr=lr)
optimizer = torch.optim.SGD(params[8:],
                            lr=lr,
                            momentum=momentum,
                            weight_decay=weight_decay)

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
Exemple #26
0
        if sav:
            cv2.imwrite(output_dir_detections + str(i) + '.png', im2show)

    with open(det_file, 'wb') as f:
        cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL)

    print 'Evaluating detections'
    imdb_0.evaluate_detections(all_boxes, output_dir)


if __name__ == '__main__':

    imdb_0 = get_imdb(imdb_test_name_0)
    imdb_0.competition_mode(on=True)
    net_0 = FasterRCNN(classes=imdb_0.classes, debug=False)
    network.load_net(trained_model_0, net_0)
    print('load model 0 successfully!')
    net_0.cuda()
    net_0.eval()

    imdb_1 = get_imdb(imdb_test_name_1)
    imdb_1.competition_mode(on=True)
    net_1 = FasterRCNN(classes=imdb_1.classes, debug=False)
    network.load_net(trained_model_1, net_1)
    print('load model 1 successfully!')
    net_1.cuda()
    net_1.eval()

    net_x = FasterRCNN_x(classes=imdb_0.classes, debug=False)
    net_x.frcnn_0 = net_0
    net_x.frcnn_1 = net_1
    # for dataset in whole_data:
    #     for element in dataset:
    #         data_comp.append(element)

    if args.encoding_file is None: 
      encoder = imSituVerbRoleLocalNounEncoder(train_set)
      torch.save(encoder, args.output_dir + "/encoder")
    else:
      encoder = torch.load(args.encoding_file)
  
    model = baseline_crf(encoder, cnn_type = args.cnn_type)
    
    if args.weights_file is not None:
        if args.cnn_type == 'faster_rcnn':
            network.load_net(args.weights_file, model)

        else:
            model.load_state_dict(torch.load(args.weights_file))
    
    dataset_train = imSituSituation(args.image_dir, train_set, encoder, model.train_preprocess())
    dataset_dev = imSituSituation(args.image_dir, dev_set, encoder, model.dev_preprocess())

    ngpus = 1
    device_array = [i for i in range(0,ngpus)]
    #batch_size = args.batch_size*ngpus
    batch_size = 1

    train_loader  = torch.utils.data.DataLoader(dataset_train, batch_size = batch_size, shuffle = True, num_workers = 1)
    dev_loader  = torch.utils.data.DataLoader(dataset_dev, batch_size = batch_size, shuffle = True, num_workers = 1)
Exemple #28
0
    models = os.listdir(model_dir)
    pretrained_model = [
        os.path.join(model_dir, model) for model in models
        if db_only in model.split('_')
    ]
    pretrained_model.sort()

    imdb = get_imdb(imdb_name)
    prepare_roidb(imdb)
    roidb = imdb.roidb
    f = open(os.path.join(model_dir, 'performance.txt'), 'a')

    for model in pretrained_model:
        is_resnet = True if 'res' in model.split('/') else False
        if model.endswith('txt'):
            continue
        if not is_resnet:
            detector = FasterRCNN_VGG(classes=imdb.classes, debug=False)
        else:
            detector = FasterRCNN_RES(classes=imdb.classes, debug=False)
        network.load_net(model, detector)
        match = id_match_test(model, detector, imdb,
                              roidb) if cfg.TRIPLET.IS_TRUE else 0.
        prec, rec = test(model, detector, imdb, roidb)
        # pos, neg, bg = score_analysis(model, detector, imdb, roidb)
        del detector
        # f.write(model+'  -----pos: {:.4f} neg: {:.4f} bg: {:.4f}\n'.format(pos, neg, bg))
        f.write(model +
                '  ----[prec: {:.2f}%, rec: {:.2f}%] / {:.2f}%\n'.format(
                    prec, rec, match))
    f.close()
Exemple #29
0
cfg_from_file(cfg_file)
lr = cfg.TRAIN.LEARNING_RATE
momentum = cfg.TRAIN.MOMENTUM
weight_decay = cfg.TRAIN.WEIGHT_DECAY
disp_interval = cfg.TRAIN.DISPLAY
log_interval = cfg.TRAIN.LOG_IMAGE_ITERS

# load data
imdb = VisualGenome(split=0, num_im=50)
roidb = imdb.roidb
data_layer = RoIDataLayer(roidb, imdb.num_classes)

# load net
net = FasterRCNN(classes=imdb.classes, debug=_DEBUG)
network.weights_normal_init(net, dev=0.01)
network.load_net(pretrained_model, net)
# network.load_pretrained_npy(net, 'checkpoints/VGG_imagenet.npy')
net.cuda()
net.train()

params = list(net.parameters())

print("Params are {}".format(
    '\n'.join(['{}: {}'.format(n, p.size()) for n,p in net.named_parameters()]))
)
# optimizer = torch.optim.Adam(params, lr=0.001, eps=1e-4, weight_decay=weight_decay)
optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay)

if not os.path.exists(output_dir):
    os.mkdir(output_dir)
cfg_file = 'experiments/cfgs/faster_rcnn_end2end.yml'
model_dir = 'data/pretrained_model/'
output_dir = 'models/saved_model3'
pre_model_name = 'CaltechPedestrians_train_triplet_1_vgg16_cls_0.7_b3.h5'
pretrained_model = model_dir + pre_model_name

_DEBUG = False
BG_SHOW = True
id_limit = 20 if BG_SHOW else 50
# load config
cfg_from_file(cfg_file)
# load data
imdb, roidb, ratio_list, ratio_index = extract_roidb(imdb_name)

detector = FasterRCNN_VGG(classes=imdb.classes, debug=_DEBUG)
network.load_net(pretrained_model, detector)

blob = init_data(is_cuda=True)

detector.cuda()
detector.eval()
name_blocks = pre_model_name.split('_')
batch_size = imdb.num_triplet_test_images
test_num = len(roidb)
blob = init_data(is_cuda=True)
features = []
bg_features = []
ids = []
print('Extracting features...')
t = Timer()
t.tic()