Esempio n. 1
0
                    )

            X, Y, img_data = next(data_gen_train)

            loss_rpn = model_rpn.train_on_batch(X, Y)

            P_rpn = model_rpn.predict_on_batch(X)

            # data_gen_train是一个迭代器。
            #返回的是 np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)], img_data_aug
            #(我们这里假设数据没有进行水平翻转等操作。那么,x_img = img_data_aug),y_rpn_cls和y_rpn_regr是RPN的两个损失函数

            R = roi_helpers.rpn_to_roi(P_rpn[0],
                                       P_rpn[1],
                                       C,
                                       K.image_dim_ordering(),
                                       use_regr=True,
                                       overlap_thresh=0.7,
                                       max_boxes=300)
            # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
            X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C,
                                                    class_mapping)

            if X2 is None:
                rpn_accuracy_rpn_monitor.append(0)
                rpn_accuracy_for_epoch.append(0)
                continue

            neg_samples = np.where(Y1[0, :, -1] == 1)
            pos_samples = np.where(Y1[0, :, -1] == 0)
Esempio n. 2
0
def detect_img(img_name):

    use_horizontal_flips = False
    use_vertical_flips = False
    rot_90 = False
    im_size = 600
    anchor_box_scales = [64, 128, 256, 512]
    anchor_box_ratios = [[1, 1], [1, 2], [2, 1]]
    im_size = 600
    img_channel_mean = [103.939, 116.779, 123.68]
    img_scaling_factor = 1.0
    num_rois = 4
    rpn_stride = 16
    balanced_classes = False
    std_scaling = 4.0
    classifier_regr_std = [8.0, 8.0, 4.0, 4.0]
    rpn_min_overlap = 0.3
    rpn_max_overlap = 0.7
    classifier_min_overlap = 0.1
    classifier_max_overlap = 0.5
    class_mapping = {'MALIGNANT': 0, 'BENIGN': 1, 'bg': 2}

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }

    num_features = 1024

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(anchor_box_scales) * len(anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    model_path = "frcnn\\model_final_1.hdf5"
    # print('Loading weights from {}'.format(model_path))
    # model_rpn.load_weights(model_path, by_name=True)
    # model_classifier.load_weights(model_path, by_name=True)

    # model_rpn.compile(optimizer='sgd', loss='mse')
    # model_classifier.compile(optimizer='sgd', loss='mse')
    model_rpn, model_classifier = get_model(model_path, model_rpn,
                                            model_classifier)

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    print(img_name)
    st = time.time()
    # filepath = os.path.join(img_path,img_name)

    img = cv2.imread(img_name)

    X, ratio = format_img(img, im_size, img_channel_mean, img_scaling_factor)

    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))

    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)

    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               anchor_box_scales,
                               anchor_box_ratios,
                               std_scaling,
                               rpn_stride,
                               K.image_dim_ordering(),
                               overlap_thresh=0.5)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}

    for jk in range(R.shape[0] // num_rois + 1):
        ROIs = np.expand_dims(R[num_rois * jk:num_rois * (jk + 1), :], axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // num_rois:
            #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

        for ii in range(P_cls.shape[1]):

            if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= classifier_regr_std[0]
                ty /= classifier_regr_std[1]
                tw /= classifier_regr_std[2]
                th /= classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                rpn_stride * x, rpn_stride * y, rpn_stride * (x + w),
                rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    all_dets = []

    for key in bboxes:
        bbox = np.array(bboxes[key])

        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.5)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]

            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(
                img, (real_x1, real_y1), (real_x2, real_y2),
                (int(class_to_color[key][0]), int(
                    class_to_color[key][1]), int(class_to_color[key][2])), 2)

            textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
            all_dets.append((key, 100 * new_probs[jk]))

            (retval, baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
            textOrg = (real_x1, real_y1 - 0)

            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (0, 0, 0), 2)
            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 0), 1)

    # print('Elapsed time = {}'.format(time.time() - st))
    # print(all_dets)
    # cv2.imshow('img', img)
    # cv2.waitKey(0)
    img_name = img_name.split('\\')[-1]
    # cv2.imwrite(f'./static/images/{img_name}.png', img)
    cv2.imwrite('./predict/kq.jpg', img)
    try:
        a = all_dets[0]
    except:
        a = ("khong phat hien", "khong phat hien")

    print(a)
    return img_name, a


# print("tp: {} \nfp: {}".format(tp, fp))

# img_name = r"D:\Desktop\thesis\Images\mass_crop_train\P_01981_RIGHT_MLO_FULL.jpg"

# a, b = detect_img(img_name)
# print(b[0], b[1])
# print(type(b[0]), type(b[1]))
Esempio n. 3
0
def train(im_size,
          train_mode,
          epoch_length=1000,
          num_epochs=10,
          val_steps=200,
          lr_mode='adaptive'):
    """ Train the model
    Args:
        im_size: user input. Input images are resized to this size.
        train_mode: 1 to train new model with pre-trained weights on generic dataset.
                    2 to keep training on the WashingtonOB Race dataset.
        epoch_length: number of steps per training epoch
        num_epochs: maximum number of training epochs
        val_steps: number of validation steps, at the end of each training epoch
        lr_mode: if 'adaptive', learning rate varies with eponential decay. If 'constant', it is 1e-5.

    Returns:
       None - (trained model saved automatically)
    """

    C = config.Config()
    C.network = 'resnet50'
    C.im_size = im_size
    C.model_path = f'models/model_frcnn_{C.im_size}.hdf5'

    all_imgs, classes_count, class_mapping = get_data(
        'data/training/corners_training.txt')

    # add background class
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)

    C.class_mapping = class_mapping

    print(
        f'Total number of objects per class (across all dataset): {class_mapping}'
    )

    config_output_filename = f'models/conf/config_frcnn_{C.im_size}.pickle'

    record_path = f'models/logs/frcnn_{C.im_size}.csv'  # Where to record data (used to save the losses, classification accuracy and mean average precision)
    if lr_mode == 'adaptive':
        record_path = f'models/logs/frcnn_{C.im_size}_adap.csv'

    pickle.dump(C, open(config_output_filename, 'wb'))

    train_imgs = [s for s in all_imgs if s['imageset'] == 'train']
    val_imgs = [s for s in all_imgs if s['imageset'] == 'val']

    # Shuffle the images with seed 1
    random.seed(1)
    random.shuffle(train_imgs)
    random.shuffle(val_imgs)

    print(
        f'{len(train_imgs)} and {len(val_imgs)} training and validation samples (before augmentation), respectively.'
    )

    data_gen_train = regions_proposal_network.get_anchor_gt(
        train_imgs, C, nn.get_img_output_length, mode='train')
    data_gen_val = regions_proposal_network.get_anchor_gt(
        val_imgs, C, nn.get_img_output_length, mode='val')

    X, Y, image_data = next(data_gen_train)

    print('Original image: height=%d width=%d' %
          (image_data['height'], image_data['width']))
    print('Resized image:  height=%d width=%d' % (X.shape[1], X.shape[2]))
    print('Feature map size: height=%d width=%d C.rpn_stride=%d' %
          (Y[0].shape[1], Y[0].shape[2], C.rpn_stride))

    input_shape_img = (None, None, 3)
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(classes_count))

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    model_rpn, model_classifier, record_df = load_weights(
        im_size, model_rpn, model_classifier, train_mode)

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses.rpn_loss_cls(num_anchors),
                          losses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses.class_loss_cls,
            losses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    # Training setting
    total_epochs = len(record_df)
    if len(record_df) == 0:
        best_loss = np.Inf
    else:
        best_loss = np.min(record_df['curr_loss_val'])
        print(
            f'Resuming training. Already trained for {len(record_df)} epochs.')

    validation_trend_hold = False

    total_epochs += num_epochs
    iter_num = 0

    loss = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []

    print('Starting training')
    start_time = time.time()

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                # use next to extract data since it is a generator
                X, Y, img_data = next(data_gen_train)

                # Train rpn model and get loss value [_, loss_rpn_cls, loss_rpn_regr]
                current_learning_rate = calculate_learning_rate(epoch_num,
                                                                mode=lr_mode)
                K.set_value(model_rpn.optimizer.lr, current_learning_rate)
                loss_rpn = model_rpn.train_on_batch(X, Y)

                # Get predicted rpn from rpn model [rpn_cls, rpn_regr]
                P_rpn = model_rpn.predict_on_batch(X)

                # R: bboxes (shape=(im_size,4))
                # Convert rpn layer to roi bboxes
                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           overlap_thresh=0.6,
                                           max_boxes=C.max_boxes)

                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                # X2: bboxes with iou > C.classifier_min_overlap for all gt bboxes in 20 non_max_suppression bboxes
                # Y2: corresponding labels and corresponding ground truth bboxes
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:

                    # If number of positive anchors is larger than 4//2 = 2, randomly choose 2 pos samples
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()

                    # Randomly choose (num_rois - num_pos) neg samples
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    # Save all the pos and neg samples in sel_samples
                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                # training_data: [X, X2[:, sel_samples, :]]
                # labels: [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]
                #  X                     => img_data resized image
                #  X2[:, sel_samples, :] => num_rois (4 in here) bboxes which contains selected neg and pos
                #  Y1[:, sel_samples, :] => one hot encode for num_rois bboxes which contains selected neg and pos
                #  Y2[:, sel_samples, :] => labels and gt bboxes for num_rois bboxes which contains selected neg and pos
                K.set_value(model_classifier.optimizer.lr,
                            current_learning_rate)
                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                loss[iter_num, 0] = loss_rpn[1]
                loss[iter_num, 1] = loss_rpn[2]

                loss[iter_num, 2] = loss_class[1]
                loss[iter_num, 3] = loss_class[2]
                loss[iter_num, 4] = loss_class[3]

                progbar.update(
                    iter_num + 1,
                    [('RPN Classifier Loss', loss[iter_num, 0]),
                     ('RPN Regression Loss', loss[iter_num, 1]),
                     ('Detector Classifier Loss', loss[iter_num, 2]),
                     ('Detector Regression Loss', loss[iter_num, 3])])

                iter_num += 1

                # end of epoch check
                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(loss[:, 0])
                    loss_rpn_regr = np.mean(loss[:, 1])
                    loss_class_cls = np.mean(loss[:, 2])
                    loss_class_regr = np.mean(loss[:, 3])
                    class_acc = np.mean(loss[:, 4])

                    print("Performing validation.")
                    val_loss = validate(val_steps, data_gen_val, model_rpn, C,
                                        class_mapping, model_classifier)

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            f'Classifier accuracy for bounding boxes: {class_acc}'
                        )

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0

                    if val_loss['curr_loss'] <= best_loss:
                        if C.verbose:
                            print(
                                f'Total validation loss decreased from {best_loss} to {val_loss["curr_loss"]}, saving weights.'
                            )
                            print('')
                        best_loss = val_loss['curr_loss']
                        model_all.save_weights(C.model_path)
                        validation_trend_hold = False

                    elif not validation_trend_hold:
                        if C.verbose:
                            print(
                                f'Total validation loss increased for the first time, from {best_loss} to {val_loss["curr_loss"]}. Performing one more epoch to verify trend. Not saving weights for now.'
                            )
                            print('')
                            validation_trend_hold = True

                    else:
                        if C.verbose:
                            print(
                                f'Total validation loss increased for the second time, from {best_loss} to {val_loss["curr_loss"]}.'
                            )
                            print(
                                f'Terminating training now to prevent over-fitting. Keeping weights from epoch {epoch_num - 1}.'
                            )
                            exit()

                    new_row = {
                        'mean_overlapping_bboxes':
                        round(mean_overlapping_bboxes, 3),
                        'class_acc':
                        round(class_acc, 3),
                        'loss_rpn_cls':
                        round(loss_rpn_cls, 3),
                        'loss_rpn_regr':
                        round(loss_rpn_regr, 3),
                        'loss_class_cls':
                        round(loss_class_cls, 3),
                        'loss_class_regr':
                        round(loss_class_regr, 3),
                        'curr_loss':
                        round(curr_loss, 3),
                        'class_acc_val':
                        round(val_loss['class_acc'], 3),
                        'curr_loss_val':
                        round(val_loss['curr_loss'], 3),
                        'elapsed_time':
                        round(time.time() - start_time, 3)
                    }

                    start_time = time.time()
                    record_df = record_df.append(new_row, ignore_index=True)
                    record_df.to_csv(record_path, index=False)

                    break

            except Exception as e:
                print(f'Exception: {e}')
                continue

    print('Training complete.')
    return
Esempio n. 4
0
def validate(val_steps, data_gen_val, model_rpn, C, class_mapping,
             model_classifier):
    """ Perform validation by evaluating both netowrks on unseen data and calculate total loss
        (regression and classification for both networks)
    """

    val_loss = np.zeros((val_steps, 5))

    progbar2 = generic_utils.Progbar(val_steps)

    for idx in range(val_steps):

        # use next to extract data since it is a generator
        X, Y, img_data = next(data_gen_val)

        # evaluate model on validation data
        loss_rpn = model_rpn.evaluate(X, Y, verbose=0)

        # Get predicted rpn from rpn model [rpn_cls, rpn_regr]
        P_rpn = model_rpn.predict_on_batch(X)

        # R: bboxes (shape=(im_size,4))
        # Convert rpn layer to roi bboxes
        R = roi_helpers.rpn_to_roi(P_rpn[0],
                                   P_rpn[1],
                                   C,
                                   overlap_thresh=0.6,
                                   max_boxes=C.max_boxes)

        # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
        # X2: bboxes with iou > C.classifier_min_overlap for all gt bboxes in 300 non_max_suppression bboxes
        # Y2: corresponding labels and corresponding ground truth bboxes
        X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C, class_mapping)

        if X2 is None:
            continue

        neg_samples = np.where(Y1[0, :, -1] == 1)  # wo letzte Klasse (bg) = 1
        pos_samples = np.where(Y1[
            0, :,
            -1] == 0)  # wo andere Klasse = 1, i.e. background = 0, da one hot

        if len(neg_samples) > 0:
            neg_samples = neg_samples[0]
        else:
            neg_samples = []

        if len(pos_samples) > 0:
            pos_samples = pos_samples[0]
        else:
            pos_samples = []

        if C.num_rois > 1:

            # If number of positive anchors is larger than 4//2 = 2, randomly choose 2 pos samples
            if len(pos_samples) < C.num_rois // 2:
                selected_pos_samples = pos_samples.tolist()
            else:
                selected_pos_samples = np.random.choice(
                    pos_samples, C.num_rois // 2, replace=False).tolist()

            # Randomly choose (num_rois - num_pos) neg samples
            try:
                selected_neg_samples = np.random.choice(
                    neg_samples,
                    C.num_rois - len(selected_pos_samples),
                    replace=False).tolist()
            except:
                selected_neg_samples = np.random.choice(
                    neg_samples,
                    C.num_rois - len(selected_pos_samples),
                    replace=True).tolist()

            # Save all the pos and neg samples in sel_samples
            sel_samples = selected_pos_samples + selected_neg_samples
        else:
            # in the extreme case where num_rois = 1, we pick a random pos or neg sample
            if np.random.randint(0, 2):
                sel_samples = random.choice(neg_samples)
            else:
                sel_samples = random.choice(pos_samples)

        # validation_data: [X, X2[:, sel_samples, :]]
        # labels: [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]
        #  X                     => img_data resized image
        #  X2[:, sel_samples, :] => num_rois (4 in here) bboxes which contains selected neg and pos
        #  Y1[:, sel_samples, :] => one hot encode for num_rois bboxes which contains selected neg and pos
        #  Y2[:, sel_samples, :] => labels and gt bboxes for num_rois bboxes which contains selected neg and pos
        loss_class = model_classifier.evaluate(
            [X, X2[:, sel_samples, :]],
            [Y1[:, sel_samples, :], Y2[:, sel_samples, :]],
            verbose=0)

        val_loss[idx, 0] = loss_rpn[1]
        val_loss[idx, 1] = loss_rpn[2]
        val_loss[idx, 2] = loss_class[1]
        val_loss[idx, 3] = loss_class[2]
        val_loss[idx, 4] = loss_class[3]

        progbar2.update(idx + 1)

    val = {
        'loss_rpn_cls': np.mean(val_loss[:, 0]),
        'loss_rpn_regr': np.mean(val_loss[:, 1]),
        'loss_class_cls': np.mean(val_loss[:, 2]),
        'loss_class_regr': np.mean(val_loss[:, 3]),
        'class_acc': np.mean(val_loss[:, 4])
    }

    val['curr_loss'] = val['loss_rpn_cls'] + val['loss_rpn_regr'] + val[
        'loss_class_cls'] + val['loss_class_regr']

    return val
Esempio n. 5
0
	print(img_name)
	st = time.time()
	filepath = os.path.join(img_path,img_name)

	img = cv2.imread(filepath)
	img = cv2.medianBlur(img, 1)
	X, ratio = format_img(img, C)

	if K.image_data_format() == 'channels_last':
		X = np.transpose(X, (0, 2, 3, 1))

	# get the feature maps and output from the RPN
	[Y1, Y2, F] = model_rpn.predict(X)
	

	R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_data_format(), overlap_thresh=0.7)

	# convert from (x1,y1,x2,y2) to (x,y,w,h)
	R[:, 2] -= R[:, 0]
	R[:, 3] -= R[:, 1]

	# apply the spatial pyramid pooling to the proposed regions
	bboxes = {}
	probs = {}

	for jk in range(R.shape[0]//C.num_rois + 1):
		ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
		if ROIs.shape[1] == 0:
			break

		if jk == R.shape[0]//C.num_rois:
Esempio n. 6
0
def findBBox(Q_frcnn,side,C,options,model_rpn,model_classifier_only,overlap=70):

	# correction applied to bbox co-ordinates oif scanning right side of img
	if side == 'R':
		org_shift = 814-overlap/2
	elif side == 'L':
		org_shift = 0

	# obtaining classes used to train classifier
	class_mapping = C.class_mapping

	if 'bg' not in class_mapping:
		class_mapping['bg'] = len(class_mapping)

	class_mapping = {v: k for k, v in class_mapping.iteritems()}
	class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}


	while not Q_frcnn.empty():
		try:
			# get image from queue
			img = Q_frcnn.get()

			# removing channel means
			X = img[:, :, (2, 1, 0)]
			X = X.astype(np.float32)

			X[:, :, 0] -= C.img_channel_mean[0]
			X[:, :, 1] -= C.img_channel_mean[1]
			X[:, :, 2] -= C.img_channel_mean[2]

			X = np.transpose(X, (2, 0, 1))
			X = np.expand_dims(X, axis=0)


			if K.image_dim_ordering() == 'tf':
				X = np.transpose(X, (0, 2, 3, 1))

			# get the feature maps and output from the RPN
			[Y1, Y2, F] = model_rpn.predict(X)

			# convert rpn output into co-ordinates of corners of bbox
			R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=options.non_maxima_suprresion_threshold-0.2)

			# convert from (x1,y1,x2,y2) to (x,y,w,h)
			R[:, 2] -= R[:, 0]
			R[:, 3] -= R[:, 1]

			# apply the spatial pyramid pooling to the proposed regions
			bboxes = {}
			probs = {}

			for jk in range(R.shape[0]//C.num_rois + 1):
				ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
				if ROIs.shape[1] == 0:
					break

				if jk == R.shape[0]//C.num_rois:
					# padding R
					curr_shape = ROIs.shape
					target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
					ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
					ROIs_padded[:, :curr_shape[1], :] = ROIs
					ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
					ROIs = ROIs_padded

				# passing proposed ROIs to classifier
				[P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

				for ii in range(P_cls.shape[1]):

					if np.max(P_cls[0, ii, :]) < options.bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
						continue

					cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

					if cls_name not in bboxes:
						bboxes[cls_name] = []
						probs[cls_name] = []

					(x, y, w, h) = ROIs[0, ii, :]

					cls_num = np.argmax(P_cls[0, ii, :])

					try:
						(tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
						tx /= C.classifier_regr_std[0]
						ty /= C.classifier_regr_std[1]
						tw /= C.classifier_regr_std[2]
						th /= C.classifier_regr_std[3]
						x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
					except:
						pass

					rpns = C.rpn_stride

					bboxes[cls_name].append([rpns*x, rpns*y, rpns*(x+w), rpns*(y+h)])
					probs[cls_name].append(np.max(P_cls[0, ii, :]))

			bboxForFrames = []

			for key in bboxes:
				bbox = np.array(bboxes[key])
				new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=options.non_maxima_suprresion_threshold-0.2)

				for jk in range(new_boxes.shape[0]):
					(x1, y1, x2, y2) = new_boxes[jk,:]

					x1 += org_shift				# adjusting for change in origin when dividing image into two halves
					x2 += org_shift				# co-ordinates system of righ half shifted
					y1 = 0 if y1<0 else y1		# negative co-ordinates clamped to zero
					y2 = 0 if y2<0 else y2
					bboxForFrames.append((x1,y1,x2,y2))

			Q_frcnn.task_done()
			yield bboxForFrames

		except StopIteration:
			return
Esempio n. 7
0
def train(data_gen_train, model_rpn, model_classifier, model_all, num_epochs,
          epoch_length):
    iter_num = 0
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    print('Starting training')

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           use_regr=True,
                                           overlap_thresh=0.7,
                                           max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, C.class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    # selected_pos_samples = pos_samples.tolist()
                    # selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(C.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                continue
Esempio n. 8
0
    # st = time.time()
    filepath = img_name
    #filepath = os.path.join(img_path, img_name)
    img = cv2.imread(filepath)

    X, ratio = format_img(img, C)

    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))

    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)

    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               C,
                               K.image_dim_ordering(),
                               overlap_thresh=0.7)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}

    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
Esempio n. 9
0
for idx, image_path in enumerate(image_paths):
    # for idx, image_file in enumerate(sorted(os.listdir(images_dir))):
    #     if not image_file.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
    #         continue
    #     logger.debug('image_file={}'.format(image_file))
    logger.debug('image_path={}'.format(image_path))
    start = time.time()
    # image_path = os.path.join(images_dir, image_file)
    image = cv2.imread(image_path)
    image_input, ratio = format_image(image, C)

    # get the feature maps and output from the RPN
    # Y11: rpn_class Y12: rpn_regr
    rpn_class, rpn_regr = model_rpn.predict(image_input)

    rois = roi_helpers.rpn_to_roi(rpn_class, rpn_regr, C, overlap_thresh=0.7)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    rois[:, 2] -= rois[:, 0]
    rois[:, 3] -= rois[:, 1]

    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}

    # 把所有的 num_rois 分成 n 份, 每份 C.num_rois 个
    for batch_idx in range(rois.shape[0] // C.num_rois + 1):
        # show_batch_rois(image, rois[C.num_rois * batch_idx:C.num_rois * (batch_idx + 1), :], ratio)
        batch_rois = np.expand_dims(rois[C.num_rois * batch_idx:C.num_rois *
                                         (batch_idx + 1), :],
                                    axis=0)
Esempio n. 10
0
def test(im_size=600,
         mode='normal',
         detect_threshold=0.9,
         overlap_threshold=0.5):
    """ Test the model
    Args:
        im_size: trained model available for 300, 400 or 600. Input images are resized to this size.
        mode: 'normal' means predictions will be saved. Any other mode means only a CSV corner file will be saved.
        detect_threshold: minimum class belonging probability for a proposal to be accepted
        overlap_threshold: maximum IoU between two proposals

    Returns:
        avg_time: time taken over the last 10 images. Time is measured from loading the image to saving the predictions.
    """

    conf_file = f'models/conf/config_frcnn_{im_size}.pickle'
    C = pickle.load(open(conf_file, 'rb'))

    img_path = 'data/testing/images'

    class_mapping = C.class_mapping

    class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    class_to_color = {
        class_mapping[v]: np.array([0, 128, 255])
        for v in class_mapping
    }

    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base resnet network
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping))

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    model_rpn, model_classifier = load_weights(im_size, model_rpn,
                                               model_classifier)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    output_folder = f'output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}'
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    output_file = open(
        f'output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}/'
        f'predicted_corners_size{int(im_size)}_p{int(detect_threshold * 100)}.csv',
        'w')
    writer = csv.writer(output_file)

    print(
        f'Predicting gates for im_size={im_size} and detection probability threshold of {int(detect_threshold * 100)}%.'
    )
    print(
        f'Output to be saved in directory "/output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}/"'
    )
    progbar = Progbar(len(os.listdir(img_path)) - 1)

    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(('.png', '.jpg')):
            continue

        filepath = os.path.join(img_path, img_name)

        start_time = time.time()
        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   overlap_thresh=overlap_threshold,
                                   max_boxes=C.max_boxes)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}
        time_list = []

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < detect_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                # only gate objects
                cls_name = 'gate'

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                # only gate objects, which is index 0
                cls_num = 0

                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass

                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=overlap_threshold)

            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                w = (x2 - x1)
                h = (y2 - y1)
                scale = 0.16
                x1 += w * scale
                x2 -= w * scale
                y1 += h * scale
                y2 -= h * scale

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                writer.writerow([
                    img_name, real_x1, real_y1, real_x2, real_y1, real_x2,
                    real_y2, real_x1, real_y2
                ])

                if mode == 'normal':
                    cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2),
                                  (int(class_to_color[key][0]),
                                   int(class_to_color[key][1]),
                                   int(class_to_color[key][2])), 2)

                    textLabel = f'{key}: {int(100 * new_probs[jk])}%'

                    (retval,
                     baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_SIMPLEX, 0.5,
                                                 1)
                    textOrg = (real_x1, real_y1)

                    cv2.rectangle(img, (textOrg[0], textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] - 5),
                                  (255, 255, 255), 2)
                    cv2.rectangle(img, (textOrg[0], textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] - 5), (0, 128, 255),
                                  -1)
                    cv2.putText(img, textLabel, (real_x1, real_y1 - 3),
                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255),
                                1)

        time_taken = time.time() - start_time
        progbar.update(idx)
        print(f'   -   Elapsed time: {time_taken:.3}s for {img_name}')
        if idx > 20:
            time_list.append(time_taken)

        if mode == 'normal':
            cv2.imwrite(
                f'output/predictions/frcnn_size{int(im_size)}_p{int(detect_threshold * 100)}/predict_{img_name}',
                img)
            plt.close()

    output_file.close()

    # return the prediction time over the last 10 images
    return sum(time_list) / len(time_list)
Esempio n. 11
0
logger.info('Training starts...')
for epoch_idx in range(num_epochs):
    progbar = generic_utils.Progbar(num_steps)
    logger.info('Epoch {}/{}'.format(epoch_idx + 1, num_epochs))
    while True:
        try:
            X1, Y1, augmented_annotation = next(train_data_gen)
            # loss_rpn = [loss,rpn_out_class_loss,rpn_out_regress_loss], 名字的组成有最后一层的 name + '_loss'
            # 这里还要注意 label 的 shape 可以和模型输出的 shape 不一样,这取决于 loss function
            rpn_loss = model_rpn.train_on_batch(X1, Y1)
            # [(1,m,n,9),(1,m,n,36)]
            rpn_prediction = model_rpn.predict_on_batch(X1)
            # rois 的 shape 为 (None,4) (x1,y1,x2,y2)
            rois = roi_helpers.rpn_to_roi(rpn_prediction[0],
                                          rpn_prediction[1],
                                          C,
                                          overlap_thresh=0.7,
                                          max_rois=300)
            # NOTE: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
            # X2: x_roi Y21: y_class Y22: y_regr
            X2, Y21, Y22, IoUs = roi_helpers.calc_iou(rois,
                                                      augmented_annotation, C,
                                                      class_name_idx_mapping)

            if X2 is None:
                num_pos_rois_per_epoch.append(0)
                continue
            # 假设 Y21 为 np.array([[[0,0,0,1],[0,0,0,0]]]),np.where(Y21[0,:,-1]==1) 返回 (array([0]),)
            # Y21[0,:,-1] 表示的 class 为 'bg' 的值, 若为 1 表示 negative, 为 0 表示 positive
            neg_roi_idxs = np.where(Y21[0, :, -1] == 1)[0]
            pos_roi_idxs = np.where(Y21[0, :, -1] == 0)[0]
def trainModel(options):

    import random
    import pprint
    import sys
    import time
    import numpy as np
    import pickle
    import os

    from keras import backend as K
    from keras.optimizers import Adam, SGD, RMSprop
    from keras.layers import Input
    from keras.models import Model
    from frcnn import config, data_generators
    from frcnn import losses as losses
    from frcnn import resnet as nn
    import frcnn.roi_helpers as roi_helpers
    from keras.utils import generic_utils
    from frcnn.simple_parser import get_data
    from frcnn.simple_parser import load_data

    # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

    import tensorflow as tf
    configTF = tf.ConfigProto()
    configTF.gpu_options.allow_growth = True
    sess = tf.Session(config=configTF)

    sys.setrecursionlimit(40000)

    # Config class stores all relevant settings to be recalled during test/further training
    C = config.Config()
    C.im_size = options.im_size
    C.anchor_box_scales = options.anchor_box_scales
    C.anchor_box_ratios = options.anchor_box_ratios
    C.num_rois = int(options.num_rois)
    C.use_horizontal_flips = bool(options.horizontal_flips)
    C.use_vertical_flips = bool(options.vertical_flips)
    C.rot_90 = bool(options.rot_90)
    C.rpn_max_overlap = options.rpn_max_overlap_threshold
    C.model_path = options.output_weight_path
    C.balanced_classes = options.balanced_classes
    C.rpn_stride = options.rpn_stride
    C.cutImage = options.cutImage

    # loading old weights to continue training
    if options.load_weights:
        C.base_net_weights = options.input_weight_path

    # get_data() function returns image list along with info on bbox,
    # height, width in all_imgs,and class data in classes_count and class_mapping
    if options.load_data:
        all_imgs, classes_count, class_mapping = load_data(options.name)
    else:
        all_imgs, classes_count, class_mapping = get_data(
            options.train_path, C, options.name, options.num_frames)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    C.class_mapping = class_mapping

    # making assigned value of class the key to index the dictionary
    inv_map = {v: k for k, v in class_mapping.iteritems()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    print '\nUsing RPN Stride = %d' % C.rpn_stride
    config_output_filename = options.config_filename

    with open(config_output_filename, 'w') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))

    random.shuffle(all_imgs)
    num_imgs = len(all_imgs)

    train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']

    print('\nTraining on {} Frames'.format(len(train_imgs)))

    # ground truth boxes are obtained
    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   C,
                                                   K.image_dim_ordering(),
                                                   mode='train')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        model_rpn.load_weights(C.base_net_weights, by_name=True)
        model_classifier.load_weights(C.base_net_weights, by_name=True)
        print('\nLoaded weights from {}'.format(C.base_net_weights))
    except:
        print('\nNo pretrained weights found in folder...')
        print('Proceeding to train from scratch\n\n')

    # setting learning rates and optimizers
    optimizer = Adam(lr=1e-4)
    optimizer_classifier = Adam(lr=1e-4)

    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses.rpn_loss_cls(num_anchors),
                          losses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses.class_loss_cls,
            losses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})

    model_all.compile(optimizer='sgd', loss='mae')

    # if epoch_length was set as default, a epoch goes over entire trainset images
    if options.epoch_length == 'default':
        epoch_length = len(train_imgs)

    # else it uses the number of images given
    else:
        epoch_length = int(options.epoch_length)

    num_epochs = int(options.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.iteritems()}
    print('Starting training\n')

    prev_pos_samples = []
    prev_neg_samples = []

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:
                if iter_num == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []

                    if mean_overlapping_bboxes == 0:
                        print(
                            '\n\nRPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.\n'
                        )

                # date_gen_train is a generator defined in data_generators.py
                # it reads the the image with the filepath in image list and returns relevant data for training
                X, Y, img_data = data_gen_train.next()

                # train_on_batch() is the keras function defined in Sequential.
                # does a single gradient update on given data (essentially, one epoch)
                loss_rpn = model_rpn.train_on_batch(X, Y)

                # uses trained model to make prediction
                P_rpn = model_rpn.predict_on_batch(X)

                # converting RPN prediction to ROI
                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           K.image_dim_ordering(),
                                           use_regr=True,
                                           overlap_thresh=0.7,
                                           max_boxes=300)

                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2 = roi_helpers.calc_iou(R, img_data, C,
                                                  class_mapping)

                # if no ROI is detected
                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    # Take half of positive samples and half of negative samples for classfier training

                    if len(pos_samples) < C.num_rois / 2:
                        selected_pos_samples = pos_samples.tolist()

                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois / 2,
                            replace=False).tolist()

                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()

                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples

                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()

                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]
                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('rpn_overlap', float(sum(rpn_accuracy_rpn_monitor)) /
                      len(rpn_accuracy_rpn_monitor))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            '\n---------------------------------------------------------------------------------------'
                        )
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:

                        if C.verbose:
                            print(
                                '\nTotal loss decreased from {} to {}'.format(
                                    best_loss, curr_loss))
                            print(
                                '---------------------------------------------------------------------------------------\n'
                            )
                        best_loss = curr_loss

                        # saving weights with smallest loss (overwrite)
                        model_all.save_weights(C.model_path)
                    else:
                        if C.verbose:
                            print('\nLoss did not improve')
                            print(
                                '---------------------------------------------------------------------------------------\n'
                            )

                    # also saving weights for each epoch
                    model_all.save_weights(C.model_path[:-5] + '_%03d' %
                                           (epoch_num + 1) +
                                           '_%2.2f' % mean_overlapping_bboxes +
                                           '.hdf5')

                    break

            except Exception as e:
                print('\nException: {}\n'.format(e))
                continue

    print('\n-----------------\nTraining complete!\n')
Esempio n. 13
0
                    .format(mean_overlapping_bboxes, epoch_length))
                if mean_overlapping_bboxes == 0:
                    print(
                        'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                    )

            X, Y, img_data = next(data_gen_train)

            loss_rpn = model_rpn.train_on_batch(X, Y)

            P_rpn = model_rpn.predict_on_batch(X)

            R = roi_helpers.rpn_to_roi(P_rpn[0],
                                       P_rpn[1],
                                       C,
                                       K.image_data_format(),
                                       use_regr=True,
                                       overlap_thresh=0.7,
                                       max_boxes=300)
            # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
            X2, Y1, Y2, IouS = roi_helpers.calc_iou(R, img_data, C,
                                                    class_mapping)

            if X2 is None:
                rpn_accuracy_rpn_monitor.append(0)
                rpn_accuracy_for_epoch.append(0)
                continue

            neg_samples = np.where(Y1[0, :, -1] == 1)
            pos_samples = np.where(Y1[0, :, -1] == 0)