def test_rpn_training(self):

        # setup
        anchors = get_anchors(anchor_scales)
        anchors_per_loc = len(anchors)
        root_dir = os.getcwd()
        ref_weights_path = os.path.join(root_dir, 'reference_rpn_weights.h5')
        tmp_weights_path = os.path.join(root_dir, 'tmp_rpn_weights.h5')

        train_images = make_image_object(os.path.join(root_dir, 'data'), codeTesting=True)
        processed_imgs, _ = resize_imgs(train_images, min_size=resize_min, max_size=resize_max)

        base_model = Models.vgg16_base(weight_regularizer=Models.WEIGHT_REGULARIZER,
                                       bias_regularizer=Models.BIAS_REGULARIZER)
        rpn_model = Models.vgg16_rpn(base_model, weight_regularizer=Models.WEIGHT_REGULARIZER,
                                     bias_regularizer=Models.BIAS_REGULARIZER, anchors_per_loc=anchors_per_loc)
        preprocess_func = Models.vgg_preprocess
        get_conv_rows_cols_func = Models.vgg_get_conv_rows_cols
        stride = Models.VGG_Stride
        training_manager = RpnTrainingManager(get_conv_rows_cols_func, stride, preprocess_func=preprocess_func,
                                              anchor_dims=anchors)

        # action being tested
        rpn_model = train_rpn(rpn_model, processed_imgs, training_manager, optimizer,
                              phases=[[1, 0.001]])
        print("Testing Done")
Ejemplo n.º 2
0
    def test_resnet_frcnn_training_phase_2(self):
        # setup
        anchors = get_anchors(anchor_scales=[128, 256, 512])
        anchors_per_loc = len(anchors)
        cur_dir = os.path.abspath(os.path.dirname(__file__))
        test_dir = os.path.join(cur_dir, os.pardir, 'test_data')
        base_dir = os.path.join(test_dir, 'VOC_test')
        ref_weights_path = os.path.join(test_dir, 'reference_r50_frcnn_step2_weights.h5')
        tmp_weights_path = os.path.join(test_dir, 'tmp_r50_frcnn_weights.h5')
        rpn_weights_path = os.path.join(test_dir, 'r50_rpn_step1.h5')
        img = extract_img_data(base_dir, '000005')
        training_imgs, resized_ratios = resize_imgs([img])

        model_rpn = resnet50_rpn(resnet50_base(), anchors_per_loc=anchors_per_loc)
        model_rpn.load_weights(filepath=rpn_weights_path)
        model_frcnn = resnet50_classifier(num_rois=64, num_classes=21, base_model=resnet50_base())

        class_mapping = VOC_CLASS_MAPPING
        training_manager = DetTrainingManager(rpn_model=model_rpn, class_mapping=class_mapping, num_rois=NUM_ROIS,
                                              preprocess_func=resnet.preprocess, anchor_dims=anchors)
        optimizer = Adam(lr=0.001)

        # action being tested
        train_detector_step2(detector=model_frcnn, images=training_imgs, training_manager=training_manager,
                             optimizer=optimizer, phases=[[1, 0.0001]])

        # assertion
        last_layer_weights = model_frcnn.get_layer('res5c_branch2c').get_weights()[0]
        with h5py.File(tmp_weights_path, 'w') as file:
            file.create_dataset('last_layer_weights', data=last_layer_weights)
        process = Popen(['h5diff', ref_weights_path, tmp_weights_path], stdout=PIPE, stderr=PIPE)
        process.communicate()
        self.assertEqual(process.returncode, 0)
Ejemplo n.º 3
0
    def test_rpn_training(self):
        # setup
        anchors = get_anchors(anchor_scales=[128, 256, 512])
        anchors_per_loc = len(anchors)
        model_rpn = vgg16_rpn(vgg16_base(), anchors_per_loc=anchors_per_loc)
        cur_dir = os.path.abspath(os.path.dirname(__file__))
        test_dir = os.path.join(cur_dir, os.pardir, 'test_data')
        base_dir = os.path.join(test_dir, 'VOC_test')
        ref_weights_path = os.path.join(test_dir, 'reference_rpn_weights.h5')
        tmp_weights_path = os.path.join(test_dir, 'tmp_rpn_weights.h5')
        image = extract_img_data(base_dir, '000005')
        training_manager = RpnTrainingManager(vgg.get_conv_rows_cols, vgg.STRIDE, preprocess_func=vgg.preprocess,
                                              anchor_dims=anchors)
        optimizer = Adam(lr=0.001)

        # action being tested
        train_rpn(model_rpn, [image], training_manager, optimizer, phases=[[1, 0.001]])

        # assertion
        last_layer_weights = model_rpn.get_layer('block5_conv3').get_weights()[0]
        with h5py.File(tmp_weights_path, 'w') as file:
            file.create_dataset('last_layer_weights', data=last_layer_weights)
        process = Popen(['h5diff', ref_weights_path, tmp_weights_path], stdout=PIPE, stderr=PIPE)
        process.communicate()
        self.assertEqual(process.returncode, 0)
def train_rpn_step1():
    root_dir = os.getcwd()
    path = os.path.join(root_dir, 'data')
    train_images = make_image_object(path, codeTesting=False)
    print("Done making image Objects")

    anchors = get_anchors(anchor_scales)
    anchors_per_loc = len(anchors)
    processed_imgs, resized_ratios = resize_imgs(train_images,
                                                 min_size=resize_min,
                                                 max_size=resize_max)
    stride, get_conv_rows_cols_func, preprocess_func, rpn_model = None, None, None, None

    if network == "vgg16":
        base_model = Models.vgg16_base(
            weight_regularizer=Models.WEIGHT_REGULARIZER,
            bias_regularizer=Models.BIAS_REGULARIZER)
        rpn_model = Models.vgg16_rpn(
            base_model,
            weight_regularizer=Models.WEIGHT_REGULARIZER,
            bias_regularizer=Models.BIAS_REGULARIZER,
            anchors_per_loc=anchors_per_loc)
        preprocess_func = Models.vgg_preprocess
        get_conv_rows_cols_func = Models.vgg_get_conv_rows_cols
        stride = Models.VGG_Stride

    elif network == "resnet50":
        base_model = Models.resnet50_base(
            weight_regularizer=Models.WEIGHT_REGULARIZER,
            bias_regularizer=Models.BIAS_REGULARIZER)
        rpn_model = Models.resnet50_rpn(
            base_model,
            weight_regularizer=Models.WEIGHT_REGULARIZER,
            bias_regularizer=Models.BIAS_REGULARIZER,
            anchors_per_loc=anchors_per_loc)
        preprocess_func = Models.resnet50_preprocess
        get_conv_rows_cols_func = Models.resnet50_get_conv_rows_cols
        stride = Models.ResNet_Stride

    save_weights_dest = "models/rpn_weights_{}_step1.h5".format(network)
    save_model_dest = "models/rpn_model_{}_step1.h5".format(network)
    training_manager = RpnTrainingManager(get_conv_rows_cols_func,
                                          stride,
                                          preprocess_func=preprocess_func,
                                          anchor_dims=anchors)
    rpn_model = train_rpn(rpn_model,
                          processed_imgs,
                          training_manager,
                          optimizer,
                          phases=phases,
                          save_frequency=2000,
                          save_weights_dest=save_weights_dest,
                          save_model_dest=save_model_dest)

    rpn_model.save_weights(save_weights_dest)
    print('Saved {} rpn weights to {}'.format(network, save_weights_dest))
    rpn_model.save(save_model_dest)
    print('Saved {} rpn model to {}'.format(network, save_model_dest))
Ejemplo n.º 5
0
 def build_proposal(self):
     self.anchor_list = get_anchors(self.feature_input, self.im_info,
                                    self.anchor_ratio, self.base_anchors)
     self.rpn_layer()
     return self.proposal_layer()
Ejemplo n.º 6
0
                        default='.')
    parser.add_argument(
        '--det_threshold',
        dest='det_threshold',
        help=
        'Minimum confidence level (from 0 to 1) needed to output a detection',
        default=DEFAULT_DET_THRESHOLD)

    args = parser.parse_args()
    det_threshold = float(args.det_threshold)

    test_imgs = base_paths_to_imgs(args.voc_path,
                                   img_set=args.img_set,
                                   do_flip=False)
    anchor_scales = anchor_scales_from_str(args.anchor_scales)
    anchors = get_anchors(anchor_scales)
    anchors_per_loc = len(anchors)
    print("num test_imgs: ", len(test_imgs))
    class_mapping = KITTI_CLASS_MAPPING if args.kitti else VOC_CLASS_MAPPING
    num_classes = len(class_mapping)

    if args.network == 'vgg16':
        # don't need to worry about freezing/regularizing rpn because we're not training it
        model_rpn = vgg.rpn_from_h5(args.step3_model_path,
                                    anchors_per_loc=anchors_per_loc)
        model_det = vgg.det_from_h5(args.step4_model_path,
                                    num_classes=num_classes)
        stride = vgg.STRIDE
    else:
        model_rpn = resnet.rpn_from_h5(args.step3_model_path,
                                       anchors_per_loc=anchors_per_loc)
def train_rpn_det():
    """
        ## NOTE: Make NMS use 2k proposals at train time
        ## NOTE: DEBUGGING Script consisting of all the print statements
    """
    root_dir = os.getcwd()
    path = os.path.join(root_dir, 'data')
    train_images = make_image_object(path, codeTesting=False)
    print("Done making image Objects")

    anchors = get_anchors(anchor_scales)
    anchors_per_loc = len(anchors)
    processed_imgs, resized_ratios = resize_imgs(train_images,
                                                 min_size=resize_min,
                                                 max_size=resize_max)
    num_classes = 2
    class_mapping = {'Table': 0, 'bg': 1}

    # Create the record.csv file to record losses, acc and mAP
    record_df = pd.DataFrame(columns=[
        'mean_overlapping_bboxes', 'class_acc', 'loss_rpn_cls',
        'loss_rpn_regr', 'loss_class_cls', 'loss_class_regr', 'curr_loss',
        'mAP'
    ])

    preprocess_func = Models.vgg_preprocess
    get_conv_rows_cols_func = Models.vgg_get_conv_rows_cols
    stride = Models.VGG_Stride

    # Working with VGG only. RPN Model: input=Input(shape=(None, None, 3)), outputs=[x_class, x_regr, base_model.output]
    base_model = Models.vgg16_base(
        weight_regularizer=Models.WEIGHT_REGULARIZER,
        bias_regularizer=Models.BIAS_REGULARIZER)
    rpn_model = Models.vgg16_rpn(base_model,
                                 include_conv=False,
                                 weight_regularizer=Models.WEIGHT_REGULARIZER,
                                 bias_regularizer=Models.BIAS_REGULARIZER,
                                 anchors_per_loc=anchors_per_loc)

    # Detector Model: inputs=[base_model.input, roi_input], outputs=[out_class, out_reg]
    detector_base = Models.vgg16_base(
        weight_regularizer=Models.WEIGHT_REGULARIZER,
        bias_regularizer=Models.BIAS_REGULARIZER)
    detector_model = Models.vgg16_classifier(
        NUM_ROIS,
        num_classes,
        detector_base,
        weight_regularizer=Models.WEIGHT_REGULARIZER,
        bias_regularizer=Models.BIAS_REGULARIZER)

    # # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    # img_input = Input(shape=(None, None, 3))
    # roi_input = Input(shape=(None, 4), name='roi_input')
    # model_all = Model([img_input, roi_input], rpn_model.output[:2] + detector_model.output)

    rpn_save_weights_dest = "models/combinedTraining_rpn_weights_{}.h5".format(
        network)
    det_save_weights_dest = "models/combinedTraining_detector_weights_{}.h5".format(
        network)
    rpn_save_model_dest = "models/combinedTraining_rpn_model_{}.h5".format(
        network)
    det_save_model_dest = "models/combinedTraining_detector_model_{}.h5".format(
        network)
    record_path = "models/record.csv"

    rpn_training_manager = RpnTrainingManager(get_conv_rows_cols_func,
                                              stride,
                                              preprocess_func=preprocess_func,
                                              anchor_dims=anchors)
    det_training_manager = DetTrainingManager(rpn_model=rpn_model,
                                              class_mapping=class_mapping,
                                              preprocess_func=preprocess_func,
                                              num_rois=NUM_ROIS,
                                              stride=stride,
                                              anchor_dims=anchors)

    rpn_model, detector_model = combined_rpn_det_trainer(
        rpn_model,
        detector_model,
        processed_imgs,
        rpn_training_manager,
        det_training_manager,
        optimizer=optimizer,
        phases=phases,
        save_frequency=2000,
        rpn_save_weights_dest=rpn_save_weights_dest,
        det_save_weights_dest=det_save_weights_dest,
        recordCSV=record_df,
        record_path=record_path)

    # # Weights corresponding to minimum loss already getting saved in combined_rpn_det_trainer
    # rpn_model.save_weights(rpn_save_weights_dest)
    # print('Saved {} RPN weights to {}'.format(args.network, rpn_save_weights_dest))
    # detector_model.save_weights(det_save_weights_dest)
    # print('Saved {} DET weights to {}'.format(args.network, det_save_weights_dest))

    rpn_model.save(rpn_save_model_dest)
    print('Saved {} RPN model to {}'.format(network, rpn_save_model_dest))
    detector_model.save(det_save_model_dest)
    print('Saved {} DET model to {}'.format(network, det_save_model_dest))
    print("\n Training Complete.")

    print("Plotting Losses")
    plotLosses(record_path, r_epochs=40)
def test_rpn_step1():
    """
    ## NOTE: For evaluation, work with diff num of nms proposals at test time such as 100, 300, 1k
    """

    imgName = "1_Page1.png"
    anchors = get_anchors(anchor_scales)
    anchors_per_loc = len(anchors)
    root_dir = os.getcwd()
    test_path = os.path.join(root_dir, 'data')
    img = cv2.imread(os.path.join(test_path, imgName))

    ## For testing on images with no GT (gt_boxes=0)
    img_obj = Image(name=imgName,
                    width=img.shape[1],
                    height=img.shape[0],
                    gt_boxes=[],
                    image_path=os.path.join(test_path, imgName))
    resized_img, resized_ratio = img_obj.resize_within_bounds(min_size=600,
                                                              max_size=1000)
    rpn_model, preprocess_func = None, None
    if network == "vgg16":
        base_model = Models.vgg16_base(
            weight_regularizer=Models.WEIGHT_REGULARIZER,
            bias_regularizer=Models.BIAS_REGULARIZER)
        rpn_model = Models.vgg16_rpn(
            base_model,
            weight_regularizer=Models.WEIGHT_REGULARIZER,
            bias_regularizer=Models.BIAS_REGULARIZER,
            anchors_per_loc=anchors_per_loc)
        preprocess_func = Models.vgg_preprocess
        get_conv_rows_cols_func = Models.vgg_get_conv_rows_cols
        stride = Models.VGG_Stride

    elif network == "resnet50":
        base_model = Models.resnet50_base(
            weight_regularizer=Models.WEIGHT_REGULARIZER,
            bias_regularizer=Models.BIAS_REGULARIZER)
        rpn_model = Models.resnet50_rpn(
            base_model,
            weight_regularizer=Models.WEIGHT_REGULARIZER,
            bias_regularizer=Models.BIAS_REGULARIZER,
            anchors_per_loc=anchors_per_loc)
        preprocess_func = Models.resnet50_preprocess
        get_conv_rows_cols_func = Models.resnet50_get_conv_rows_cols
        stride = Models.ResNet_Stride

    save_weights_dest = "models/rpn_weights_{}_step1.h5".format(network)
    rpn_model.load_weights(save_weights_dest, by_name=True)
    rpn_model.compile(optimizer=optimizer,
                      loss=[
                          cls_loss_rpn(anchors_per_loc=anchors_per_loc),
                          bbreg_loss_rpn(anchors_per_loc=anchors_per_loc)
                      ])

    batched_img = np.expand_dims(preprocess_func(resized_img.data), axis=0)

    # [x_class, x_regr] = rpn_model.predict_on_batch(batched_img)

    # Using get_training_input() of train_detector_step2(), writing a custom PredictionToROIs
    rois, rois_prob = RPNsToROIs(rpn_model,
                                 batched_img,
                                 anchors,
                                 stride=stride)
    pred = create_Prediction_Dict(rois, rois_prob)
    print("Region Predictions: {}".format(pred))
def test_rpn_det():

    anchors = get_anchors(anchor_scales)
    anchors_per_loc = len(anchors)
    root_dir = os.getcwd()
    test_path = os.path.join(root_dir, 'data', 'test')
    num_classes = 2
    bbox_threshold = 0.7

    # Switch key value for class mapping
    class_mapping = {'Table': 0, 'bg': 1}
    class_mapping = {v: k for k, v in class_mapping.items()}
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }

    rpn_save_weights_dest = "models/combinedTraining_rpn_weights_{}.h5".format(
        network)
    det_save_weights_dest = "models/combinedTraining_detector_weights_{}.h5".format(
        network)
    preprocess_func = Models.vgg_preprocess
    stride = Models.VGG_Stride

    base_model = Models.vgg16_base(
        weight_regularizer=Models.WEIGHT_REGULARIZER,
        bias_regularizer=Models.BIAS_REGULARIZER)
    rpn_model = Models.vgg16_rpn(base_model,
                                 include_conv=False,
                                 weight_regularizer=Models.WEIGHT_REGULARIZER,
                                 bias_regularizer=Models.BIAS_REGULARIZER,
                                 anchors_per_loc=anchors_per_loc)
    detector_base = Models.vgg16_base(
        weight_regularizer=Models.WEIGHT_REGULARIZER,
        bias_regularizer=Models.BIAS_REGULARIZER)
    detector_model = Models.vgg16_classifier(
        NUM_ROIS,
        num_classes,
        detector_base,
        weight_regularizer=Models.WEIGHT_REGULARIZER,
        bias_regularizer=Models.BIAS_REGULARIZER)
    print("RPN model I/P shape {} \nand O/P shape {}".format(
        rpn_model.inputs, rpn_model.output))
    print("Detector model I/P shape {} \nand O/P shape {}".format(
        detector_model.inputs, detector_model.outputs))
    print("Loading weights")
    rpn_model.load_weights(rpn_save_weights_dest)
    detector_model.load_weights(det_save_weights_dest)
    rpn_model.compile(optimizer='sgd', loss='mse')
    detector_model.compile(optimizer='sgd', loss='mse')

    print("----------------- RPN MODEL -----------------")
    # print(rpn_model.summary())
    # plot_model(rpn_model, to_file='rpnModel.png', show_shapes=True)

    print("----------------- DET MODEL -----------------")
    # print(detector_model.summary())
    # plot_model(detector_model, to_file="detModel.png", show_shapes=True)

    test_images = [
        image for image in os.listdir(test_path) if image.endswith(".png")
    ]
    for imgName in test_images:
        print(imgName)
        img = cv2.imread(os.path.join(test_path, imgName))
        img_obj = Image(name=imgName,
                        width=img.shape[1],
                        height=img.shape[0],
                        gt_boxes=[],
                        image_path=os.path.join(test_path, imgName))
        resized_img_obj, resized_ratio = img_obj.resize_within_bounds(
            min_size=600, max_size=1000)
        batched_img = np.expand_dims(preprocess_func(resized_img_obj.data),
                                     axis=0)

        # [Y1, Y2] = rpn_model.predict_on_batch(batched_img)

        # Get bboxes by applying NMS
        # R.shape = (300, 4)
        R, rois_prob = RPNsToROIs(rpn_model,
                                  batched_img,
                                  anchors,
                                  stride=stride)

        # convert from (x1,y1,x2,y2) to (x1,y1,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // NUM_ROIS + 1):  # comes out to 4 + 1

            ROIs = np.expand_dims(R[NUM_ROIS * jk:NUM_ROIS * (jk + 1), :],
                                  axis=0)

            if ROIs.shape[1] == 0:
                break  # For Images on which RPN returns zero ROIs

            if jk == R.shape[0] // NUM_ROIS:
                # padding R if the last set ROIS is less than the required NUM_ROIS,
                # Reqd. since DET network uses fc layer with input size (64,7,7,512)
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], NUM_ROIS, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            print("ROIs: {} having shape = {}".format("ROIs", ROIs.shape))
            print("Batched Image: {}".format(batched_img.shape))

            [P_cls, P_regr] = detector_model.predict_on_batch(
                [batched_img,
                 ROIs])  # P_cls.shape = (1,64,2) and P_regr.shape = (1,64,4)

            # Calculate all classes' b-boxes coordinates on re-sized image
            # Drop 'bg' classes b-boxes
            for ii in range(P_cls.shape[1]):

                # Ignore 'bg' class
                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]
                ) == (
                        P_cls.shape[2] - 1
                ):  # i.e. if index containing max prob is == shape(=2 for 2 class) - 1
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                    (x, y, w, h) = ROIs[0, ii, :]
                    cls_num = np.argmax(P_cls[0, ii, :])

                    try:
                        (tx, ty, tw,
                         th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                        x, y, w, h = apply_regr(x, y, w, h, tx, ty, tw, th)
                    except:
                        pass

                    bboxes[cls_name].append([
                        stride * x, stride * y, stride * (x + w),
                        stride * (y + h)
                    ])
                    probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            bbox = np.array(bboxes[key])
            new_boxes, new_probs = nms(bbox,
                                       np.array(probs[key]),
                                       overlap_thresh=0.2)

            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]
                # Calculate real coordinates on original image
                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(resized_ratio, x1, y1, x2, y2)

                cv2.rectangle(
                    img, (real_x1, real_y1), (real_x2, real_y2),
                    (int(class_to_color[key][0]), int(class_to_color[key][1]),
                     int(class_to_color[key][2])), 4)
                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))
                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (real_x1, real_y1 - 0)

                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (0, 0, 0), 1)
                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (255, 255, 255), -1)
                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX,
                            1, (0, 0, 0), 1)

        print(all_dets)
        plt.figure(figsize=(10, 10))
        plt.grid()
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.show()