Beispiel #1
0

if K.image_dim_ordering() == 'th':
	input_shape_img = (3, None, None)
	input_shape_features = (num_features, None, None)
else:
	input_shape_img = (None, None, 3)
	input_shape_features = (None, None, num_features)


target_img_input = Input(shape=input_shape_img)
target_roi_input = Input(shape=(target_C.num_rois, 4))
target_feature_map_input = Input(shape=input_shape_features)

# define the base network (resnet here, can be VGG, Inception, etc)
target_shared_layers = nn.nn_base(target_img_input, trainable=True)

# define the RPN, built on the base layers
target_num_anchors = len(target_C.anchor_box_scales) * len(target_C.anchor_box_ratios)
target_rpn_layers = nn.rpn(target_shared_layers, target_num_anchors)

target_classifier = nn.classifier(target_feature_map_input, target_roi_input, target_C.num_rois, nb_classes=len(target_class_mapping), trainable=True)

target_model_rpn = Model(target_img_input, target_rpn_layers)
target_model_classifier_only = Model([target_feature_map_input, target_roi_input], target_classifier)

target_model_classifier = Model([target_feature_map_input, target_roi_input], target_classifier)

print('Loading weights from {}'.format(target_C.model_path))
target_model_rpn.load_weights(target_C.model_path, by_name=True)
target_model_classifier.load_weights(target_C.model_path, by_name=True)
def train_kitti():
    # region config for data argument
    cfg = config.Config()
    cfg.use_horizontal_flips = False
    cfg.use_vertical_flips = False
    cfg.rot_90 = False
    # todo
    cfg.num_rois = 32

    # TODO: the only file should to be change for other data to train
    cfg.simple_label_file = './dataset/samples_V2_1019_two_shelves.txt'  # label file

    # endregion

    # region data preprocessing
    all_images, classes_count, class_mapping = get_data(
        cfg.simple_label_file)  # parsing the data

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)
        # {'Car': 0, 'bg': 1}

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))
    # endregion

    # generate the anchors/proposals and label the proposals
    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 cfg,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')

    # nn.get_img_output_length: calculate the output size of conv operation
    # K.image_dim_ordering(): backend, th or tf. Here, tf & channels_last

    # region define the model architecture
    # tf, channels_last
    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    # Input() is used to instantiate a Keras tensor.
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(
        cfg.anchor_box_ratios)  # 3 * 3 = 9
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    # endregion
    # region plot the model
    ###################################################################
    '''
    model_rpn.summary()
    model_classifier.summary()
    model_all.summary()

    tb = TensorBoard(log_dir='./logs')
    tb.set_model(model_rpn)
    tb.set_model(model_classifier)
    tb.set_model(model_all)
    '''
    '''
    plot_model(model_rpn, to_file='./model/model_rpn.png', show_shapes=True, show_layer_names=True)
    plot_model(model_classifier, to_file='./model/model_classifier.png', show_shapes=True, show_layer_names=True)
    plot_model(model_all, to_file='./model/model_all.png', show_shapes=True, show_layer_names=True)
    '''
    # endregion
    #####################################################################
    # region load pretrained weights

    try:
        print('loading weights from {}'.format(cfg.last_model_path))
        model_rpn.load_weights(cfg.last_model_path,
                               by_name=True,
                               skip_mismatch=True)
        model_classifier.load_weights(cfg.last_model_path,
                                      by_name=True,
                                      skip_mismatch=True)
    except Exception as e:
        print(e)
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder '
            'https://github.com/fchollet/keras/tree/master/keras/applications')
    # endregion

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)

    # the loss can change acc. to the #classes, don't have to change it manually
    model_rpn.compile(
        optimizer=optimizer,
        loss=[
            losses_fn.rpn_loss_cls(num_anchors),
            losses_fn.rpn_loss_regr(num_anchors)
        ])  # sum of two individual losses, lambda can be change in losses.py
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 100
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(
            epoch_length)  # Displays a progress bar.
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(
                    X, Y
                )  # Runs a single gradient update on a single batch of data.

                P_rpn = model_rpn.predict_on_batch(
                    X)  # Returns predictions for a single batch of samples.

                # generate 300 proposals
                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                # generate gt
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist(
                        )  # Return the array as a (possibly nested) list.
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_save_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_save_path)
                continue
    print('Training complete, exiting.')
def train_kitti():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = './model/kitti_frcnn_last.hdf5'

    cfg.simple_label_file = 'kitti_simple_label.txt'
    #查看绝对路径
    #t = os.path.abspath('kitti_simple_label.txt')

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 cfg,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)
    #img_input:  三通道,为输入图片
    img_input = Input(shape=input_shape_img)
    #roi_input:为输入图片boudingbox的四维值
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    #shared_layers : 基础的网络结构(例如: resnet,vgg)通过该网络来提取原始图片的featuremap特征,最后将这些特征送入RPN网络和RCNN网络
    # 1.定义nn的输入层,faster-rcnn共享卷积层,
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers 2.定义RPN层
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    #RPN网络用于生成region proposals,该层通过sigmoid函数判断anchors属于foreground或者background, 再利用bounding box regression修正anchors获得修正后的RoI。
    # rpn: 在基础的网络结构使用9个bounding box产生了分类和回归的rpn网络。定义rpn层,return [x_class, x_regr, base_layers]
    rpn = nn.rpn(shared_layers, num_anchors)
    #定义分类器层,定义classifier的输入和输出
    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)
    #定义rpn模型的输入和输出一个框2分类(最后使用的sigmod而不是softmax)和框的回归
    model_rpn = Model(img_input, rpn[:2])
    #定义classifier的输入和输出
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        #TODO 第一次运行因为model_path没有hdf5文件,因此修改为cfg.base_net_weights,现在可以修改回来
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
        # model_rpn.load_weights(cfg.base_net_weights, by_name=True)
        # model_classifier.load_weights(cfg.base_net_weights, by_name=True)
    except Exception as e:
        print(e)
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder '
            'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    #todo 增加tensorboard日志文件
    log_path = './graph'
    callback = TensorBoard(log_path,
                           histogram_freq=0,
                           write_graph=True,
                           write_images=True)
    callback.set_model(model_all)
    epoch_length = len(train_imgs)

    #epoch_length = 47182
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    rpn_accuracy_rpn_monitor_val = []
    rpn_accuracy_for_epoch_val = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1
                print("img_data:")
                print(img_data)
                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))

                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Beispiel #4
0
def main():
	cleanup()
	sys.setrecursionlimit(40000)
	config_output_filename = 'config.pickle'

	with open(config_output_filename, 'r') as f_in:
		C = pickle.load(f_in)

	# turn off any data augmentation at test time
	C.use_horizontal_flips = False
	C.use_vertical_flips = False
	C.rot_90 = False
	class_mapping = C.class_mapping

	if 'bg' not in class_mapping:
		class_mapping['bg'] = len(class_mapping)

	class_mapping = {v: k for k, v in class_mapping.iteritems()}
	print(class_mapping)
	class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
	C.num_rois = num_rois

	if K.image_dim_ordering() == 'th':
		input_shape_img = (3, None, None)
		input_shape_features = (1024, None, None)
	else:
		input_shape_img = (None, None, 3)
		input_shape_features = (None, None, 1024)


	img_input = Input(shape=input_shape_img)
	roi_input = Input(shape=(C.num_rois, 4))
	feature_map_input = Input(shape=input_shape_features)

	# define the base network (resnet here, can be VGG, Inception, etc)
	shared_layers = nn.nn_base(img_input, trainable=True)

	# define the RPN, built on the base layers
	num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
	rpn_layers = nn.rpn(shared_layers, num_anchors)

	classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

	model_rpn = Model(img_input, rpn_layers)
	model_classifier_only = Model([feature_map_input, roi_input], classifier)

	model_classifier = Model([feature_map_input, roi_input], classifier)

	model_rpn.load_weights(C.model_path, by_name=True)
	model_classifier.load_weights(C.model_path, by_name=True)

	model_rpn.compile(optimizer='sgd', loss='mse')
	model_classifier.compile(optimizer='sgd', loss='mse')

	all_imgs = []

	classes = {}

	bbox_threshold = 0.8

	visualise = True

	print("Converting video to images..")
	convert_to_images()
	print("anotating...")

	list_files = sorted(get_file_names(img_path), key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])
	for img_name in list_files:
		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
			continue
		print(img_name)
		st = time.time()
		filepath = os.path.join(img_path,img_name)
		img = cv2.imread(filepath)
		X = format_img(img, C)

		img_scaled = np.transpose(X.copy()[0, (2, 1, 0), :, :], (1, 2, 0)).copy()
		img_scaled[:, :, 0] += 123.68
		img_scaled[:, :, 1] += 116.779
		img_scaled[:, :, 2] += 103.939

		img_scaled = img_scaled.astype(np.uint8)

		if K.image_dim_ordering() == 'tf':
			X = np.transpose(X, (0, 2, 3, 1))

		# get the feature maps and output from the RPN
		[Y1, Y2, F] = model_rpn.predict(X)


		R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

		# convert from (x1,y1,x2,y2) to (x,y,w,h)
		R[:, 2] -= R[:, 0]
		R[:, 3] -= R[:, 1]

		# apply the spatial pyramid pooling to the proposed regions
		bboxes = {}
		probs = {}

		for jk in range(R.shape[0]//C.num_rois + 1):
			ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
			if ROIs.shape[1] == 0:
				break

			if jk == R.shape[0]//C.num_rois:
				#pad R
				curr_shape = ROIs.shape
				target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
				ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
				ROIs_padded[:, :curr_shape[1], :] = ROIs
				ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
				ROIs = ROIs_padded

			[P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

			for ii in range(P_cls.shape[1]):

				if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
					continue

				cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

				if cls_name not in bboxes:
					bboxes[cls_name] = []
					probs[cls_name] = []

				(x, y, w, h) = ROIs[0, ii, :]

				cls_num = np.argmax(P_cls[0, ii, :])
				try:
					(tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
					tx /= C.classifier_regr_std[0]
					ty /= C.classifier_regr_std[1]
					tw /= C.classifier_regr_std[2]
					th /= C.classifier_regr_std[3]
					x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
				except:
					pass
				bboxes[cls_name].append([16*x, 16*y, 16*(x+w), 16*(y+h)])
				probs[cls_name].append(np.max(P_cls[0, ii, :]))

		all_dets = []
		all_objects = []

		for key in bboxes:
			bbox = np.array(bboxes[key])

			new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
			for jk in range(new_boxes.shape[0]):
				(x1, y1, x2, y2) = new_boxes[jk,:]

				cv2.rectangle(img_scaled,(x1, y1), (x2, y2), class_to_color[key],2)

				textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
				all_dets.append((key,100*new_probs[jk]))
				all_objects.append((key, 1))

				(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
				textOrg = (x1, y1-0)

				cv2.rectangle(img_scaled, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
				cv2.rectangle(img_scaled, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
				cv2.putText(img_scaled, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
		print('Elapsed time = {}'.format(time.time() - st))
		height, width, channels = img_scaled.shape
		cv2.rectangle(img_scaled, (0,0), (width, 30), (0, 0, 0), -1)
		cv2.putText(img_scaled, "Obj count: " + str(list(accumulate(all_objects))), (5, 19), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 255, 255), 1)
		cv2.imwrite(os.path.join(output_path, img_name), img_scaled)
		print(all_dets)
	print("saving to video..")
	save_to_video()
Beispiel #5
0
def model():

    sys.setrecursionlimit(40000)

    config_output_filename = "config.pickle"

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)
        K.clear_session()

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = "crops"

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):

        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))

        return (real_x1, real_y1, real_x2, real_y2)

    #class_mapping = C.class_mapping

    #if 'bg' not in class_mapping:
    #   class_mapping['bg'] = len(class_mapping)

    #class_mapping = {v: k for k, v in class_mapping.items()}
    #print(class_mapping)
    #class_to_color = {class_mapping[v]: np.random.randint(0,255,3) for v in class_mapping}

    C.num_rois = 32

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    #classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)
    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=3,
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    disease_bbx_list = []
    picture = None
    ## MULTIPLE MODELS
    cls_map_SPD = {'SPD': 0, 'OK': 1, 'bg': 2}
    cls_map_EPD = {'OK': 0, 'EPD': 1, 'bg': 2}
    cls_map_SS = {'N': 0, 'SS': 1, 'bg': 2}
    cls_map_ADB = {'N': 0, 'ADB': 1, 'bg': 2}

    sve_loc_list = [
        './SPD_result/', './SS_result/', './EPD_result/', './ADB_result/'
    ]

    wght_loc_list = [
        './Weights/SS/model_frcnn.hdf5', './Weights/SS/model_frcnn.hdf5',
        './Weights/SS/model_frcnn.hdf5', './Weights/SS/model_frcnn.hdf5'
    ]
    for w_path, sv_loc in zip(wght_loc_list, sve_loc_list):

        if 'SPD' in sv_loc:
            class_mapping = cls_map_SPD
        elif 'EPD' in sv_loc:
            class_mapping = cls_map_EPD
        elif 'SS' in sv_loc:
            class_mapping = cls_map_SS
        elif 'ADB' in sv_loc:
            class_mapping = cls_map_ADB
        else:
            print("Classes not found in config.pickle")
            break

        if 'bg' not in class_mapping:
            class_mapping['bg'] = len(class_mapping)

        class_mapping = {v: k for k, v in class_mapping.items()}
        print()
        print(class_mapping)
        #class_to_color = {class_mapping[v]: np.random.randint(0,255,3) for v in class_mapping}

        #print('Loading weights from {}'.format(C.model_path))
        print('Loading weights from {}'.format(w_path))
        print()

        #model_rpn.load_weights(C.model_path, by_name=True)
        model_rpn.load_weights(w_path, by_name=True)

        #model_classifier.load_weights(C.model_path, by_name=True)
        model_classifier.load_weights(w_path, by_name=True)

        model_rpn.compile(optimizer='sgd', loss='mse')
        model_classifier.compile(optimizer='sgd', loss='mse')

        all_imgs = []

        classes = {}

        bbox_threshold = 0.8

        visualise = True
        img_name_list = []
        lists = []
        thic = 2
        box_color = (0, 0, 255)
        label_color = (255, 255, 255)

        for idx, img_name in enumerate(sorted(os.listdir(img_path))):
            if not img_name.lower().endswith(
                ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            print(img_name)
            img_name_list.append(img_name)
            #print(img_name_list)
            st = time.time()
            filepath = os.path.join(img_path, img_name)

            img = cv2.imread(filepath)

            X, ratio = format_img(img, C)

            if K.image_dim_ordering() == 'tf':
                X = np.transpose(X, (0, 2, 3, 1))

            # get the feature maps and output from the RPN
            [Y1, Y2, F] = model_rpn.predict(X)

            R = roi_helpers.rpn_to_roi(Y1,
                                       Y2,
                                       C,
                                       K.image_dim_ordering(),
                                       overlap_thresh=0.7)

            # convert from (x1,y1,x2,y2) to (x,y,w,h)
            R[:, 2] -= R[:, 0]
            R[:, 3] -= R[:, 1]

            # apply the spatial pyramid pooling to the proposed regions
            bboxes = {}
            probs = {}

            for jk in range(R.shape[0] // C.num_rois + 1):
                ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                        (jk + 1), :],
                                      axis=0)
                if ROIs.shape[1] == 0:
                    break

                if jk == R.shape[0] // C.num_rois:
                    #pad R
                    curr_shape = ROIs.shape
                    target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                    ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                    ROIs_padded[:, :curr_shape[1], :] = ROIs
                    ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                    ROIs = ROIs_padded

                [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

                for ii in range(P_cls.shape[1]):

                    if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                            P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                        continue

                    cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                    if cls_name not in bboxes:
                        bboxes[cls_name] = []
                        probs[cls_name] = []

                    (x, y, w, h) = ROIs[0, ii, :]

                    cls_num = np.argmax(P_cls[0, ii, :])
                    try:
                        (tx, ty, tw,
                         th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                        tx /= C.classifier_regr_std[0]
                        ty /= C.classifier_regr_std[1]
                        tw /= C.classifier_regr_std[2]
                        th /= C.classifier_regr_std[3]
                        x, y, w, h = roi_helpers.apply_regr(
                            x, y, w, h, tx, ty, tw, th)
                    except:
                        pass
                    bboxes[cls_name].append([
                        C.rpn_stride * x, C.rpn_stride * y,
                        C.rpn_stride * (x + w), C.rpn_stride * (y + h)
                    ])
                    probs[cls_name].append(np.max(P_cls[0, ii, :]))

            all_dets = []

            #i = 0

            for key in bboxes:
                bbox = np.array(bboxes[key])

                new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                    bbox, np.array(probs[key]), overlap_thresh=0.5)
                for jk in range(new_boxes.shape[0]):
                    (x1, y1, x2, y2) = new_boxes[jk, :]

                    (real_x1, real_y1, real_x2,
                     real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)
                    #bbx_df = pd.DataFrame((real_x1, real_y1, real_x2, real_y2))

                    #print("X1 ",real_x1)
                    #print("Y1 ",real_y1)
                    #print("X2 ",real_x2)
                    #print("Y2 ",real_y2)

                    ##cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)

                    textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                    ##all_dets.append((key,100*new_probs[jk]))

                    lists.append([
                        real_x1, real_x2, real_y1, real_y2, img_name, idx, key,
                        filepath, textLabel
                    ])
                    if (key == 'OK') or (key == 'N') or (key == '0'):
                        continue
                    else:
                        disease_bbx_list.append([
                            real_x1, real_x2, real_y1, real_y2, key, img_name,
                            filepath, textLabel
                        ])
                        img = cv2.rectangle(img, (real_x1, real_y1),
                                            (real_x2, real_y2), box_color,
                                            thic)
                        img = cv2.putText(img, textLabel, (real_x1, real_y1),
                                          cv2.FONT_HERSHEY_DUPLEX, 0.5,
                                          label_color)

                    ##(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,0.4,1)
                    ##textOrg = (real_x1, real_y1-0)

                    #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), color = None)
                    ##cv2.rectangle(img, (textOrg[0] - 3,textOrg[1]+baseLine - 3), (textOrg[0]+retval[0], textOrg[1]-retval[1]), (255,255,255), -1)
                    ##cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 0.3, (0, 0, 255))

            print('Elapsed time = {}'.format(time.time() - st))
            print(all_dets)
            #cv2.imshow('img', img)
            #cv2.waitKey(0)
            cv2.imwrite(sv_loc + img_name, img)
            #cv2.imwrite('{}{}'.format(sv_loc,img_name),img)
        bbx_df = pd.DataFrame(lists,
                              columns=('x1', 'x2', 'y1', 'y2', 'img_name',
                                       'index', 'class', 'path', 'label'))
        bbx_df.to_csv(sv_loc + 'bbx_df.csv', index=None, sep=',')
    disease_df = pd.DataFrame(disease_bbx_list,
                              columns=('x1', 'x2', 'y1', 'y2', 'class',
                                       'img_name', 'path', 'label'))
    disease_df.to_csv('final_bbx.csv', index=None, sep=',')
    thic = 2
    box_color_SPD = (102, 102, 255)
    box_color_ADB = (102, 255, 255)
    box_color_SS = (102, 255, 102)
    box_color_EPD = (255, 255, 102)
    label_color = (255, 255, 255)
    for i, j in zip(disease_df.img_name.unique(), disease_df.path.unique()):
        img = cv2.imread(j)
        cv2.rectangle(img, (4, 4), (6, 10), box_color_SPD, 2)
        cv2.rectangle(img, (4, 17), (6, 23), box_color_ADB, 2)
        cv2.rectangle(img, (4, 29), (6, 35), box_color_SS, 2)
        cv2.rectangle(img, (4, 41), (6, 47), box_color_EPD, 2)
        cv2.putText(img, "SPD", (10, 11), cv2.FONT_HERSHEY_DUPLEX, 0.4,
                    box_color_SPD)
        cv2.putText(img, "ADB", (10, 24), cv2.FONT_HERSHEY_DUPLEX, 0.4,
                    box_color_ADB)
        cv2.putText(img, "SS", (10, 36), cv2.FONT_HERSHEY_DUPLEX, 0.4,
                    box_color_SS)
        cv2.putText(img, "EPD", (10, 48), cv2.FONT_HERSHEY_DUPLEX, 0.4,
                    box_color_EPD)
        for _, row in disease_df[disease_df.img_name == i].iterrows():
            if row['class'] == "SPD":
                img = cv2.rectangle(img, (row.x1, row.y1), (row.x2, row.y2),
                                    box_color_SPD, thic)
                #img = cv2.putText(img, row['class'], (row.x1,row.y1), cv2.FONT_HERSHEY_DUPLEX, 0.5, label_color)
            elif row['class'] == "ADB":
                img = cv2.rectangle(img, (row.x1, row.y1), (row.x2, row.y2),
                                    box_color_ADB, thic)
                #img = cv2.putText(img, row['class'], (row.x1,row.y1), cv2.FONT_HERSHEY_DUPLEX, 0.5, label_color)
            elif row['class'] == "SS":
                img = cv2.rectangle(img, (row.x1, row.y1), (row.x2, row.y2),
                                    box_color_SS, thic)
                #img = cv2.putText(img, row['class'], (row.x1,row.y1), cv2.FONT_HERSHEY_DUPLEX, 0.5, label_color)
            elif row['class'] == "EPD":
                img = cv2.rectangle(img, (row.x1, row.y1), (row.x2, row.y2),
                                    box_color_EPD, thic)
                #img = cv2.putText(img, row['class'], (row.x1,row.y1), cv2.FONT_HERSHEY_DUPLEX, 0.5, label_color)
        cv2.imwrite('results_imgs/{}'.format(i), img)
    for root, dirnames, filenames in os.walk('results_imgs/'):
        for filename in filenames:
            filename = filename
    return render_template("result.html", user_image=filename)
    #zipf = zipfile.ZipFile('RESULTS.zip','w', zipfile.ZIP_DEFLATED)
    #for root,dirs, files in os.walk('results_imgs/'):
    #   for file in files:
    #      zipf.write('results_imgs/'+file)
    #zipf.close()
    #return send_file('RESULTS.zip',
    #       mimetype = 'zip',
    #      attachment_filename= 'RESULTS.zip',
    #     as_attachment = True)
    #return send_from_directory(app.config['RESULT_FOLDER'],
    #                          filename=filename + '.jpg', as_attachment=True)
    #lIndex=req.rfind(".")
    #global res
    #global domain
    #domain=req[lIndex::]
    #print(domain)
    #res="0"+res[lIndex::]
    #print(res)
    K.clear_session()
def operation():
    sys.setrecursionlimit(40000)

    parser = OptionParser()

    parser.add_option("-p",
                      "--path",
                      dest="test_path",
                      default="images",
                      help="Path to test data.")
    parser.add_option(
        "-n",
        "--num_rois",
        dest="num_rois",
        help="Number of ROIs per iteration. Higher means more memory use.",
        default=32)
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to read the metadata related to the training (generated when training).",
        default="config.pickle")
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default='resnet50')

    (options, args) = parser.parse_args()

    if not options.test_path:  # if filename is not given
        parser.error(
            'Error: path to test data must be specified. Pass --path to command line'
        )

    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = options.test_path

    def format_img_size(img, C):
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):

        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))

        return (real_x1, real_y1, real_x2, real_y2)

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    visualise = True
    object = []
    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)
        st = time.time()
        filepath = os.path.join(img_path, img_name)

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}
        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []
        item = []
        for key in bboxes:
            bbox = np.array(bboxes[key])
            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                cv2.rectangle(
                    img, (real_x1, real_y1), (real_x2, real_y2),
                    (int(class_to_color[key][0]), int(class_to_color[key][1]),
                     int(class_to_color[key][2])), 2)

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))

                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (real_x1, real_y1 - 0)

                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (0, 0, 0), 2)
                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (255, 255, 255), -1)
                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX,
                            1, (0, 0, 0), 1)
                # print(textLabel)
                # print(real_x1, real_y1, real_x2, real_y2)
                if (100 * new_probs[jk]) > 95:
                    item.append([key, [real_x1, real_y1, real_x2, real_y2]])
        print(all_dets)
        object.append([[idx], [item]])

        # print (object)

        # print('Elapsed time = {}'.format(time.time() - st))

        # cv2.imshow('img', img)
        # cv2.waitKey(0)
        cv2.imwrite('./results_imgs/{}.png'.format(idx), img)
    print("=======================")

    return object
def train_kitti():
    # Set config for data argument
    cfg = config.Config()
    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should be changed for other data to train
    # -cfg.model_path = './model/kitti_frcnn_last.hdf5'
    cfg.model_path = './model/kitti_frcnn_last.hdf5'
    cfg.simple_label_file = 'kitti_simple_label.txt'

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    # Change K.image_dim_ordering() to K.image_data_format() due to TensorFlow 2.x
    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_data_format(),
                                                   mode='train')
    # # Change K.image_dim_ordering() to K.image_data_format() due to TensorFlow 2.x
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 cfg,
                                                 nn.get_img_output_length,
                                                 K.image_data_format(),
                                                 mode='val')

    # -if K.image_dim_ordering() == 'th':
    if K.image_data_format() == 'channels_first':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # Define the base network (resnet50 here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # Define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # This is the model that holds both the RPN and the classifier used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        # -model_rpn.load_weights(cfg.model_path, by_name=True)
        model_rpn.load_weights(cfg.base_net_weights, by_name=True)
        # -model_classifier.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.base_net_weights, by_name=True)
    except Exception as e:
        print(e)
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder '
            'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    # Initially train the dataset with 10 epochs and change it to 100 or 1000 after a success runn.
    # -epoch_length = 1000
    epoch_length = 10
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):
        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:
                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)
                loss_rpn = model_rpn.train_on_batch(X, Y)
                P_rpn = model_rpn.predict_on_batch(X)
                # For TensorFlow 2.x, chnage K.image_dim_ordering() to K.image_data_format()
                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_data_format(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # For the extreme case: num_rois = 1, please ick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save the model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
def work(input, output, textlabel, piclabel, primpiclabel):
    textlabel.append("Detecting now:")
    test_path = input + '/'
    output_path = output + '/'
    #test_path = "../big_test_input/"
    sys.setrecursionlimit(40000)

    parser = OptionParser()

    parser.add_option("-p",
                      "--path",
                      dest="test_path",
                      help="Path to test data.",
                      default=test_path)
    parser.add_option(
        "-n",
        "--num_rois",
        type="int",
        dest="num_rois",
        help="Number of ROIs per iteration. Higher means more memory use.",
        default=32)
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to read the metadata related to the training (generated when training).",
        default="./config.pickle")
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default='resnet50')

    (options, args) = parser.parse_args()

    if not options.test_path:  # if filename is not given
        parser.error(
            'Error: path to test data must be specified. Pass --path to command line'
        )

    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = options.test_path

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    textlabel.append('Loading weights from {}'.format(C.model_path))
    ########################这两句以后要加上#################
    #model_rpn.load_weights(C.model_path, by_name=True)
    #model_classifier.load_weights(C.model_path, by_name=True)
    #########################################################
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    visualise = True

    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)
        ##################更新窗口################
        textlabel.append(img_name)
        oldpath = test_path + img_name
        beforeimage = QtGui.QPixmap(oldpath)
        primpiclabel.setPixmap(beforeimage)
        primpiclabel.setScaledContents(True)
        ##########################################
        st = time.time()
        filepath = os.path.join(img_path, img_name)

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []
        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                color = [0, 0, 0]
                if key == "airbase": color = [0, 0, 255]
                if key == "harbour": color = [21, 159, 235]
                if key == "island": color = [59, 197, 184]

                cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2),
                              color, 2)

                #textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
                #all_dets.append((key,100*new_probs[jk]))
                #textlabel.append('{}: {}'.format(key,100*new_probs[jk]))

                #(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
                #textOrg = (real_x1, real_y1-0)

                #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
                #cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
                #cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 0), 1)

        print('Elapsed time = {}'.format(time.time() - st))
        textlabel.append('Elapsed time = {}'.format(time.time() - st))
        print(all_dets)

        #cv2.imshow('img', img)
        #cv2.waitKey(0)

        cv2.imwrite(output_path + '{}.png'.format(idx), img)
        afterimage = QtGui.QPixmap(output_path + '{}.png'.format(idx))
        piclabel.setPixmap(afterimage)
        piclabel.setScaledContents(True)
Beispiel #9
0
def uploadtest(request):
    if request.FILES:
        str_info = ''
        pic = request.FILES.get('pic')
        sava_path = '%s/test/%s' % (settings.MEDIA_ROOT, pic.name)
        with open(sava_path, 'wb') as f:
            for content in pic.chunks():
                f.write(content)
        print('-->图片上传成功...')
        str_info += '-->图片上传成功...\n'
        # 以下是测试过程:
        sys.setrecursionlimit(40000)

        config_output_filename = os.path.join(settings.CONFIG_BISHE, 'bishe/config.pickle')
        #  print(config_output_filename)
        print('-->正在检测...')
        str_info += '-->正在检测...\n'
        with open(config_output_filename, 'rb') as f_in:
            C = pickle.load(f_in)
            print('-->找到配置文件...')

        C.model_path = os.path.join(settings.CONFIG_BISHE, C.model_path)
        print('-->找到模型信息...')
        str_info += '-->找到模型信息...\n'
        print('-->模型路径地址:' + C.model_path)
        str_info += '-->模型路径地址:' + C.model_path + '\n'
        if C.network == 'resnet50':
            import keras_frcnn.resnet as nn
        elif C.network == 'vgg':
            import keras_frcnn.vgg as nn

        # turn off any data augmentation at test time
        C.use_horizontal_flips = False
        C.use_vertical_flips = False
        C.rot_90 = False
        C.num_rois = 10

        def format_img_size(img, C):
            """ formats the image size based on config """
            img_min_side = float(C.im_size)
            (height, width, _) = img.shape

            if width <= height:
                ratio = img_min_side / width
                new_height = int(ratio * height)
                new_width = int(img_min_side)
            else:
                ratio = img_min_side / height
                new_width = int(ratio * width)
                new_height = int(img_min_side)
            img = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_CUBIC)
            return img, ratio

        def format_img_channels(img, C):
            """ formats the image channels based on config """
            img = img[:, :, (2, 1, 0)]
            img = img.astype(np.float32)
            img[:, :, 0] -= C.img_channel_mean[0]
            img[:, :, 1] -= C.img_channel_mean[1]
            img[:, :, 2] -= C.img_channel_mean[2]
            img /= C.img_scaling_factor
            img = np.transpose(img, (2, 0, 1))
            img = np.expand_dims(img, axis=0)
            return img

        def format_img(img, C):
            """ formats an image for model prediction based on config """
            img, ratio = format_img_size(img, C)
            img = format_img_channels(img, C)
            return img, ratio

        # Method to transform the coordinates of the bounding box to its original size
        def get_real_coordinates(ratio, x1, y1, x2, y2):

            real_x1 = int(round(x1 // ratio))
            real_y1 = int(round(y1 // ratio))
            real_x2 = int(round(x2 // ratio))
            real_y2 = int(round(y2 // ratio))

            return (real_x1, real_y1, real_x2, real_y2)

        class_mapping = C.class_mapping

        if 'bg' not in class_mapping:
            class_mapping['bg'] = len(class_mapping)

        class_mapping = {v: k for k, v in class_mapping.items()}

        print('-->乳腺癌病症种类:' + str(class_mapping))
        str_info += '-->乳腺癌病症种类:' + str(class_mapping) + '\n'

        class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}

        if C.network == 'resnet50':
            num_features = 1024
        elif C.network == 'vgg':
            num_features = 512

        if K.image_dim_ordering() == 'th':
            input_shape_img = (3, None, None)
            input_shape_features = (num_features, None, None)
        else:
            input_shape_img = (None, None, 3)
            input_shape_features = (None, None, num_features)

        img_input = Input(shape=input_shape_img)
        roi_input = Input(shape=(C.num_rois, 4))
        feature_map_input = Input(shape=input_shape_features)

        # define the base network (resnet here, can be VGG, Inception, etc)
        shared_layers = nn.nn_base(img_input, trainable=True)

        # define the RPN, built on the base layers
        num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
        rpn_layers = nn.rpn(shared_layers, num_anchors)

        classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping),
                                   trainable=True)

        model_rpn = Model(img_input, rpn_layers)
        model_classifier_only = Model([feature_map_input, roi_input], classifier)

        model_classifier = Model([feature_map_input, roi_input], classifier)

        print('-->由 {} 加载权重信息...'.format(C.model_path))
        str_info += '-->由 {} 加载权重信息...'.format(C.model_path) + '\n'
        model_rpn.load_weights(C.model_path, by_name=True)
        model_classifier.load_weights(C.model_path, by_name=True)

        model_rpn.compile(optimizer='sgd', loss='mse')
        model_classifier.compile(optimizer='sgd', loss='mse')

        all_imgs = []

        classes = {}

        bbox_threshold = 0.8

        st = time.time()

        filepath = sava_path

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append(
                    [C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w), C.rpn_stride * (y + h)])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]),
                                                                        overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]
                (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2), (
                    int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])), 2)
                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))
                (retval, baseLine) = cv2.getTextSize(textLabel, cv2.FONT_HERSHEY_COMPLEX, 1, 1)
                textOrg = (real_x1, real_y1 - 0)
                cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                              (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (0, 0, 0), 2)
                cv2.rectangle(img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                              (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5), (255, 255, 255), -1)
                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
        timeused = (time.time() - st)
        print('-->检测完成,用时: {}...'.format(timeused))
        str_info += '-->检测完成,用时: {}'.format(timeused) + '\n'
        aaa = str(all_dets)
        print('-->检测结果:' + aaa)
        str_info += '-->检测结果:' + aaa + '\n'
        result_path = '%s/result/%s' % (settings.MEDIA_ROOT, pic.name)
        cv2.imwrite(result_path, img)
        print(str_info)
        # 将图片路径回传,为上传数据库做准备
        test_pic = '/static/media/test/%s' % (pic.name)
        result_pic = '/static/media/result/%s' % (pic.name)
        user_id = UserInfo.objects.get(username=request.session.get('username')).id
        return JsonResponse(
            {'res': 1, 'result_pic': result_pic, 'test_pic': test_pic, 'user_id': user_id, 'str_info': str_info})
    return JsonResponse({'res': 0})
    def start_training(self, num_images=None):
        """
        Start training Keras-FRCNN
        :return: void
        """
        self._config = frcnn_config.Config()
        self._config.num_rois = int(self._num_rois)
        self._config.model_path = generate_filename(self._weights_folder)
        self._config.class_mapping = self._class_mapping

        if self._initial_weights_path is not None:
            self._config.base_net_weights = self._initial_weights_path

        # Define the networks
        if keras_backend.image_dim_ordering() == 'th':
            input_shape_img = (3, None, None)
        else:
            input_shape_img = (None, None, 3)

        img_input = Input(shape=input_shape_img)
        roi_input = Input(shape=(self._config.num_rois, 4))

        # define the base network (resnet here, can be VGG, Inception, etc)
        shared_layers = nn.nn_base(img_input, trainable=True)

        # define the RPN, built on the base layers
        # TODO: These parameters are never set, uses defaults from keras_frcnn.config
        num_anchors = len(self._config.anchor_box_scales) * len(
            self._config.anchor_box_ratios)
        rpn = nn.rpn(shared_layers, num_anchors)

        classifier = nn.classifier(shared_layers,
                                   roi_input,
                                   self._config.num_rois,
                                   nb_classes=len(self._class_mapping),
                                   trainable=True)

        self._model_rpn = Model(img_input, rpn[:2])
        self._model_classifier = Model([img_input, roi_input], classifier)

        # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
        self._model_all = Model([img_input, roi_input], rpn[:2] + classifier)

        # Load pre-trained weights from a file
        # Weights can be found at:
        # https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5
        # https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
        if self._initial_weights_path is not None:
            self._model_rpn.load_weights(self._initial_weights_path,
                                         by_name=True)
            self._model_classifier.load_weights(self._initial_weights_path,
                                                by_name=True)

        optimizer = Adam(lr=1e-4)
        optimizer_classifier = Adam(lr=1e-4)
        self._model_rpn.compile(optimizer=optimizer,
                                loss=[
                                    frcnn_losses.rpn_loss_cls(num_anchors),
                                    frcnn_losses.rpn_loss_regr(num_anchors)
                                ])
        self._model_classifier.compile(
            optimizer=optimizer_classifier,
            loss=[
                frcnn_losses.class_loss_cls,
                frcnn_losses.class_loss_regr(len(self._class_mapping) - 1)
            ],
            metrics={
                'dense_class_{}'.format(len(self._class_mapping)): 'accuracy'
            })
        self._model_all.compile(optimizer='sgd', loss='mae')

        self._best_validation_loss = np.Inf
        if self._self_assessment_interval > 0:
            # If we want to measure the training loss, build a buffer for the losses, and remember the best
            self._training_losses = np.zeros(
                (self._self_assessment_interval, 4))
            self._best_training_loss = np.Inf
            self._sa_image_count = 0
Beispiel #11
0
def train_mscoco():
    # ===========================模型的配置和加载======================================
    # config for data argument
    cfg = config.Config()
    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    #resnet前四卷积部分的权值
    cfg.base_net_weights = nn.get_weight_path()
    #保存模型的权重值
    cfg.model_path = './model/mscoco_frcnn.hdf5'
    #all_images, class_mapping = get_data()
    #加载训练的图片
    train_imgs, class_mapping = get_data('train')

    cfg.class_mapping = class_mapping
    print('Num classes (including bg) = {}'.format(len(class_mapping)))
    #保存所有的配置文件
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))
    #图片随机洗牌
    random.shuffle(train_imgs)
    print('Num train samples {}'.format(len(train_imgs)))
    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   class_mapping,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    # ==============================================================================

    # ===============================模型的定义======================================
    #keras内核为tensorflow
    input_shape_img = (None, None, 3)
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))
    # define the base resnet50 network
    shared_layers = nn.nn_base(img_input, trainable=False)
    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)
    #model(input=,output=)
    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)
    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)
    # ==============================================================================

    # ===========================基本模型加载ImageNet权值=============================
    try:
        print('loading base model weights from {}'.format(
            cfg.base_net_weights))
        model_rpn.load_weights(cfg.base_net_weights, by_name=True)
        model_classifier.load_weights(cfg.base_net_weights, by_name=True)
    except Exception as e:
        print('基本模型加载ImageNet权值: ', e)
        print('Could not load pretrained model weights on ImageNet.')
    # ==============================================================================

    # ===============================模型优化========================================
    #在调用model.compile()之前初始化一个优化器对象,然后传入该函数
    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(class_mapping) - 1)
        ],
        metrics={'dense_class_{}'.format(len(class_mapping)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')
    # ==============================================================================

    # ================================训练、输出设置==================================
    epoch_length = len(train_imgs)
    num_epochs = int(cfg.num_epochs)
    iter_num = 0
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()
    best_loss = np.Inf

    logger = Logger(os.path.join('.', 'log.txt'))
    # ==============================================================================

    print('Starting training')
    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        logger.write('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:
                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )
                #图片,标准的cls、rgr,盒子数据
                X, Y, img_data = next(data_gen_train)

                #训练rpn
                loss_rpn = model_rpn.train_on_batch(X, Y)

                #边训练rpn得到的区域送入roi
                #x_class, x_regr, base_layers
                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                #区域、cls、rgr、iou
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                #训练classifier
                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        logger.write(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        logger.write(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        logger.write(
                            'Loss RPN classifier: {}'.format(loss_rpn_cls))
                        logger.write(
                            'Loss RPN regression: {}'.format(loss_rpn_regr))
                        logger.write('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        logger.write('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        logger.write('Elapsed time: {}'.format(time.time() -
                                                               start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            logger.write(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)
                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Beispiel #12
0
def predict(args_):
    path = args_.path
    with open('config.pickle', 'rb') as f_in:
        cfg = pickle.load(f_in)
    cfg.use_horizontal_flips = False
    cfg.use_vertical_flips = False
    cfg.rot_90 = False
    if args_.model_path == 'None':
        model_path = cfg.model_path
    else:
        model_path = args_.model_path

    class_mapping = cfg.class_mapping
    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(cfg.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)
    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(model_path))
    model_rpn.load_weights(model_path, by_name=True)
    model_classifier.load_weights(model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    if os.path.isdir(path):
        for idx, img_name in enumerate(sorted(os.listdir(path))):
            if not img_name.lower().endswith(
                ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            print(img_name)
            predict_single_image(os.path.join(path, img_name), model_rpn,
                                 model_classifier_only, cfg, class_mapping)


#==============================================================================
#         #the results of a tomogram can be output to a .txt file
#         tomo_name = path.split('\\')[-1]
#         tomo_coor = path + '\\' + tomo_name + '.txt'
#         coor = []
#         for idx, img_name in enumerate(sorted(os.listdir(path))):
#             if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
#                 continue
#             print(img_name)
#             res = predict_single_image(os.path.join(path, img_name), model_rpn,
#                                  model_classifier_only, cfg, class_mapping)
#             ind = img_name.split('.')[-2].split('_')[-1]
#             for box in res:
#                 coor.append([int(ind),float(box[0]),float(box[1]),float(box[2]),float(box[3]),float(box[-1])])
#         coor = np.array(coor)
#         coor_arg = np.argsort(coor[:,0])
#         coor = coor[coor_arg]
#         print(coor)
#         print(coor.shape)
#         np.savetxt(tomo_coor,coor,'%d,%d,%d,%d,%d,%.3f',delimiter=" ")
#==============================================================================

    elif os.path.isfile(path):
        print('predict image from {}'.format(path))
        predict_single_image(path, model_rpn, model_classifier_only, cfg,
                             class_mapping)
def main():
	cleanup()
	sys.setrecursionlimit(40000)
	config_output_filename = 'config.pickle'

	with open(config_output_filename, 'r') as f_in:
		C = pickle.load(f_in)

	# turn off any data augmentation at test time
	C.use_horizontal_flips = False
	C.use_vertical_flips = False
	C.rot_90 = False
	class_mapping = C.class_mapping

	if 'bg' not in class_mapping:
		class_mapping['bg'] = len(class_mapping)

	class_mapping = {v: k for k, v in class_mapping.iteritems()}
	print(class_mapping)
	class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
	C.num_rois = num_rois

	if K.image_dim_ordering() == 'th':
		input_shape_img = (3, None, None)
		input_shape_features = (1024, None, None)
	else:
		input_shape_img = (None, None, 3)
		input_shape_features = (None, None, 1024)


	img_input = Input(shape=input_shape_img)
	roi_input = Input(shape=(C.num_rois, 4))
	feature_map_input = Input(shape=input_shape_features)

	# define the base network (resnet here, can be VGG, Inception, etc)
	shared_layers = nn.nn_base(img_input, trainable=True)

	# define the RPN, built on the base layers
	num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
	rpn_layers = nn.rpn(shared_layers, num_anchors)

	classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

	model_rpn = Model(img_input, rpn_layers)
	model_classifier_only = Model([feature_map_input, roi_input], classifier)

	model_classifier = Model([feature_map_input, roi_input], classifier)

	model_rpn.load_weights(C.model_path, by_name=True)
	model_classifier.load_weights(C.model_path, by_name=True)

	model_rpn.compile(optimizer='sgd', loss='mse')
	model_classifier.compile(optimizer='sgd', loss='mse')

	all_imgs = []

	classes = {}

	bbox_threshold = 0.8

	visualise = True

	print("Converting video to images..")
	convert_to_images()
	print("anotating...")

	list_files = sorted(get_file_names(img_path), key=lambda var:[int(x) if x.isdigit() else x for x in re.findall(r'[^0-9]|[0-9]+', var)])
	for img_name in list_files:
		if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
			continue
		print(img_name)
		st = time.time()
		filepath = os.path.join(img_path,img_name)
		img = cv2.imread(filepath)
		X = format_img(img, C)

		img_scaled = np.transpose(X.copy()[0, (2, 1, 0), :, :], (1, 2, 0)).copy()
		img_scaled[:, :, 0] += 123.68
		img_scaled[:, :, 1] += 116.779
		img_scaled[:, :, 2] += 103.939

		img_scaled = img_scaled.astype(np.uint8)

		if K.image_dim_ordering() == 'tf':
			X = np.transpose(X, (0, 2, 3, 1))

		# get the feature maps and output from the RPN
		[Y1, Y2, F] = model_rpn.predict(X)


		R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

		# convert from (x1,y1,x2,y2) to (x,y,w,h)
		R[:, 2] -= R[:, 0]
		R[:, 3] -= R[:, 1]

		# apply the spatial pyramid pooling to the proposed regions
		bboxes = {}
		probs = {}

		for jk in range(R.shape[0]//C.num_rois + 1):
			ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
			if ROIs.shape[1] == 0:
				break

			if jk == R.shape[0]//C.num_rois:
				#pad R
				curr_shape = ROIs.shape
				target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
				ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
				ROIs_padded[:, :curr_shape[1], :] = ROIs
				ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
				ROIs = ROIs_padded

			[P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

			for ii in range(P_cls.shape[1]):

				if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
					continue

				cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

				if cls_name not in bboxes:
					bboxes[cls_name] = []
					probs[cls_name] = []

				(x, y, w, h) = ROIs[0, ii, :]

				cls_num = np.argmax(P_cls[0, ii, :])
				try:
					(tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
					tx /= C.classifier_regr_std[0]
					ty /= C.classifier_regr_std[1]
					tw /= C.classifier_regr_std[2]
					th /= C.classifier_regr_std[3]
					x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
				except:
					pass
				bboxes[cls_name].append([16*x, 16*y, 16*(x+w), 16*(y+h)])
				probs[cls_name].append(np.max(P_cls[0, ii, :]))

		all_dets = []
		all_objects = []

		for key in bboxes:
			bbox = np.array(bboxes[key])

			new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
			for jk in range(new_boxes.shape[0]):
				(x1, y1, x2, y2) = new_boxes[jk,:]

				cv2.rectangle(img_scaled,(x1, y1), (x2, y2), class_to_color[key],2)

				textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
				all_dets.append((key,100*new_probs[jk]))
				all_objects.append((key, 1))

				(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
				textOrg = (x1, y1-0)

				cv2.rectangle(img_scaled, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
				cv2.rectangle(img_scaled, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
				cv2.putText(img_scaled, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
		print('Elapsed time = {}'.format(time.time() - st))
		height, width, channels = img_scaled.shape
		cv2.rectangle(img_scaled, (0,0), (width, 30), (0, 0, 0), -1)
		cv2.putText(img_scaled, "Obj count: " + str(list(accumulate(all_objects))), (5, 19), cv2.FONT_HERSHEY_TRIPLEX, 0.5, (255, 255, 255), 1)
		cv2.imwrite(os.path.join(output_path, img_name), img_scaled)
		print(all_dets)
	print("saving to video..")
	save_to_video()
Beispiel #14
0
def predict(args_):
    path = args_.path
    with open('config.pickle', 'rb') as f_in:
        cfg = pickle.load(f_in)
    cfg.use_horizontal_flips = False
    cfg.use_vertical_flips = False
    cfg.rot_90 = False

    class_mapping = cfg.class_mapping
    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(cfg.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)
    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(cfg.model_path))
    model_rpn.load_weights(cfg.model_path, by_name=True)
    model_classifier.load_weights(cfg.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    if type(path) == list:
        print("Doing custom batch prediction...")
        predictions = []
        count = 0
        for p in path:
            count += 1
            print("Starting image %d of %d" % (count, len(path)))
            predictions.append(
                predict_single_image(p, model_rpn, model_classifier_only, cfg,
                                     class_mapping))
        return predictions
    elif os.path.isdir(path):
        for idx, img_name in enumerate(sorted(os.listdir(path))):
            if not img_name.lower().endswith(
                ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            print(img_name)
            predict_single_image(os.path.join(path, img_name), model_rpn,
                                 model_classifier_only, cfg, class_mapping)
    elif os.path.isfile(path):
        print('predict image from {}'.format(path))
        return predict_single_image(path, model_rpn, model_classifier_only,
                                    cfg, class_mapping)
def train_kitti():
    # config for data argument
    cfg = config.Config()
    cfg.balanced_classes = True
    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 50  # 对于星图杯的光学遥感飞机检测,应该改为50+
    cfg.anchor_box_scales = [41, 70, 120, 20, 90]
    cfg.anchor_box_ratios = [[1, 1.4], [1, 0.84], [1, 1.17], [1, 0.64], [1, 1]]

    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = './model/kitti_frcnn_last.hdf5'
    cfg.simple_label_file = 'E:/Xingtubei/official_datas/OpticalAircraft/laptop_Chreoc_OpticalAircraft_bboxes.txt'  # '/media/liuhuaqing/Elements/Xingtubei/official_datas/OpticalAircraft/Chreoc_OpticalAircraft_bboxes.txt'#'F:/Xingtubei/official_datas/OpticalAircraft/Chreoc_OpticalAircraft_bboxes.txt' # 'kitti_simple_label.txt'

    all_images, classes_count, class_mapping = get_data(
        cfg.simple_label_file)  #读取数据集,cv2.imread()要求数据里不能有中文路径

    if 'bg' not in classes_count:  #'bg'应该是代表背景
        classes_count['bg'] = 0  # =0表示训练数据中没有“背景”这一类别
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}  #class_mapping的逆向map

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images
                  if s['imageset'] == 'trainval']  #训练集,列表形式,列表中的元素是字典
    val_imgs = [s for s in all_images
                if s['imageset'] == 'test']  #验证集,列表形式,列表中的元素是字典

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(
        train_imgs,
        classes_count,
        cfg,
        nn.get_img_output_length,
        K.image_dim_ordering(),
        mode='train')  #数据扩增,然后生成frcnn所需的训练数据(如:图片、rpn的梯度等等)
    data_gen_val = data_generators.get_anchor_gt(
        val_imgs,
        classes_count,
        cfg,
        nn.get_img_output_length,
        K.image_dim_ordering(),
        mode='val')  #数据扩增,然后生成frcnn所需的验证数据(如:图片、rpn的梯度等等)

    # 根据keras实际用的后端,定义相应的输入数据维度,因为两类后端的维度顺序不一样
    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)  #当后端是thaneo
    else:
        input_shape_img = (None, None, 3)  #当后端是tensorflow

    img_input = Input(shape=input_shape_img)  # 输入图片
    roi_input = Input(shape=(None, 4))  # 输入人工标注的roi坐标,4表示x1,y1,x2,y2

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(
        img_input,
        trainable=True)  # shared_layers是frcnn网络底部那些共享的层,在这里是ResNet。由nn定义好

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(
        img_input,
        rpn[:2])  #rpn网络由keras_frcnn/resnet定义好。rpn[:2]的前两个元素分别表示rpn网络的分类输出和回归输出
    model_classifier = Model([img_input, roi_input],
                             classifier)  #Keras的函数式模型为Model,即广义的拥有输入和输出的模型

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input],
                      rpn[:2] + classifier)  #rpn[:2]+classifier的含义是??????

    try:
        # 尝试载入与训练网络权值
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder '
            'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)  # 定义一个Adam求解器,学习率lr
    optimizer_classifier = Adam(lr=1e-5)  # 定义一个Adam求解器,学习率lr
    # num_anchors等于9
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 100  # 每迭代epoch_length次就检查一次是否要保存网络权值,然后重置iter_num = 0
    num_epochs = int(cfg.num_epochs)
    iter_num = 0  # 迭代次数的初值

    losses = np.zeros((epoch_length, 5))  # 初始化loss数组,记录每个周期的loss
    rpn_accuracy_rpn_monitor = []  # 初始化一个数组,记录rpn的训练过程中的精度变化
    rpn_accuracy_for_epoch = []  # 初始化一个数组,记录rpn的每个训练周期的的精度变化
    start_time = time.time()  # 开始训练的时间

    best_loss = np.Inf  # 改变量纪律训练以来最小的loss

    class_mapping_inv = {v: k
                         for k, v in class_mapping.items()
                         }  # class_mapping_inv是一个字典,key是目标类别编号,value是类别名称
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)  # 生成一个进度条对象
        print('Epoch {}/{}'.format(epoch_num + 1,
                                   num_epochs))  # 输出当前训练周期数/总周期数

        while True:  # 什么时候才结束这个循环?答:第247行的break(每迭代epoch_length次)
            try:

                if len(
                        rpn_accuracy_rpn_monitor
                ) == epoch_length and cfg.verbose:  # 每epoch_length次训练周期就在窗口显示一次RPN平均精度
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

                #X应该是图像,如kitti尺寸是(1,600,1987,3)。Y是label,img_data是字典,包含文件名、尺寸、人工标记的roi和类别等
                X, Y, img_data = next(data_gen_train)
                Y_1 = Y[0]
                Y_1 = Y_1[0, :, :, :]

                loss_rpn = model_rpn.train_on_batch(
                    X, Y)  #为什么Y的尺寸与P_rpn的尺寸不同?为什么loss_rpn的尺寸是3,含义是什么,在哪里定义的?

                P_rpn = model_rpn.predict_on_batch(
                    X)  #P_rpn的尺寸是(1, 124, 38, 9) (1, 124, 38, 36)

                result = roi_helpers.rpn_to_roi(
                    P_rpn[0],
                    P_rpn[1],
                    cfg,
                    K.image_dim_ordering(),
                    use_regr=True,
                    overlap_thresh=0.7,
                    max_boxes=300)  #result的尺寸是300*4
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                # X2的尺寸是100*4,Y1的尺寸是1*100*8(8=训练集中目标类别总数),IouS尺寸是100
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping
                )  #Y2的尺寸是1*1*56,56=28*2,(28=4*7)前28是coords,后28是labels(是该类别则标1)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(
                    Y1[0, :, -1] == 1)  #Y1的尺寸是1*1*8表示分类预测结果,最后一个元素为1表示是背景
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]
                     ])  #用rpn输出的roi输入给classifier

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:  # 每迭代epoch_length次就检查一次是否要保存网络权值,然后重置iter_num = 0
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
    def __init__(self, modelPath, configPath, classificationPath=''):
        self.modelPath = modelPath
        self.configPath = configPath

        print('Model loading...')

        self.classification_model = load_model(classificationPath)

        with open(self.configPath, 'rb') as f_in:
            self.C = pickle.load(f_in)

        if self.C.network == 'resnet50':
            import keras_frcnn.resnet as nn
        elif self.C.network == 'vgg':
            import keras_frcnn.vgg as nn

        self.class_mapping = self.C.class_mapping

        if 'bg' not in self.class_mapping:
            self.class_mapping['bg'] = len(self.class_mapping)

        self.class_mapping = {v: k for k, v in self.class_mapping.items()}

        self.C.num_rois = 4

        if self.C.network == 'resnet50':
            num_features = 1024
        elif self.C.network == 'vgg':
            num_features = 512

        if K.image_data_format() == 'th':
            input_shape_img = (3, None, None)
            input_shape_features = (num_features, None, None)
        else:
            input_shape_img = (None, None, 3)
            input_shape_features = (None, None, num_features)

        img_input = Input(shape=input_shape_img)
        roi_input = Input(shape=(self.C.num_rois, 4))
        feature_map_input = Input(shape=input_shape_features)

        # define the base network (resnet here, can be VGG, Inception, etc)
        shared_layers = nn.nn_base(img_input, trainable=True)

        # define the RPN, built on the base layers
        num_anchors = len(self.C.anchor_box_scales) * len(
            self.C.anchor_box_ratios)
        rpn_layers = nn.rpn(shared_layers, num_anchors)

        classifier = nn.classifier(feature_map_input,
                                   roi_input,
                                   self.C.num_rois,
                                   nb_classes=len(self.class_mapping),
                                   trainable=True)

        self.model_rpn = Model(img_input, rpn_layers)
        self.model_classifier_only = Model([feature_map_input, roi_input],
                                           classifier)

        self.model_classifier = Model([feature_map_input, roi_input],
                                      classifier)

        print('Loading weights from {}'.format(self.C.model_path))
        self.model_rpn.load_weights(self.C.model_path, by_name=True)
        self.model_classifier.load_weights(self.C.model_path, by_name=True)

        self.model_rpn.compile(optimizer='sgd', loss='mse')
        self.model_classifier.compile(optimizer='sgd', loss='mse')
Beispiel #17
0
	num_features = 512

if K.image_dim_ordering() == 'th':
	input_shape_img = (3, None, None)
	input_shape_features = (num_features, None, None)
else:
	input_shape_img = (None, None, 3)
	input_shape_features = (None, None, num_features)


img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C.num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn_layers = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

model_rpn = Model(img_input, rpn_layers)
model_classifier_only = Model([feature_map_input, roi_input], classifier)

model_classifier = Model([feature_map_input, roi_input], classifier)

print('Loading weights from {}'.format(C.model_path))
model_rpn.load_weights(C.model_path, by_name=True)
model_classifier.load_weights(C.model_path, by_name=True)
Beispiel #18
0
    def __init__(self):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333)

        sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)

        sys.setrecursionlimit(40000)

        self.num_features = 1024

        self.input_shape_img = (None, None, 3)
        self.input_shape_features = (None, None, self.num_features)
        self.C = None
        self.model_rpn = None
        self.model_classifier_only = None
        self.bbox_threshold = 0.5
        with open("config.pickle", 'rb') as f_in:
            self.C = pickle.load(f_in)

        self.class_mapping = self.C.class_mapping
        if 'bg' not in self.class_mapping:
            self.class_mapping['bg'] = len(self.class_mapping)

        self.class_mapping = {v: k for k, v in self.class_mapping.items()}
        #print(self.class_mapping)
        class_to_color = {
            self.class_mapping[v]: np.random.randint(0, 255, 3)
            for v in self.class_mapping
        }
        self.C.num_rois = int(4)  #32 num of rois

        img_input = Input(shape=self.input_shape_img)
        roi_input = Input(shape=(self.C.num_rois, 4))
        feature_map_input = Input(shape=self.input_shape_features)

        # define the base network (resnet here, can be VGG, Inception, etc)
        shared_layers = nn.nn_base(img_input, trainable=True)

        # define the RPN, built on the base layers
        num_anchors = len(self.C.anchor_box_scales) * len(
            self.C.anchor_box_ratios)
        rpn_layers = nn.rpn(shared_layers, num_anchors)
        classifier = nn.classifier(feature_map_input,
                                   roi_input,
                                   self.C.num_rois,
                                   nb_classes=len(self.class_mapping),
                                   trainable=True)
        self.model_rpn = Model(img_input, rpn_layers)
        self.model_classifier_only = Model([feature_map_input, roi_input],
                                           classifier)

        model_classifier = Model([feature_map_input, roi_input], classifier)
        print('Loading weights from {}'.format(self.C.model_path))
        self.model_rpn.load_weights(self.C.model_path, by_name=True)
        model_classifier.load_weights(self.C.model_path, by_name=True)

        self.model_rpn.compile(optimizer='sgd', loss='mse')
        model_classifier.compile(optimizer='sgd', loss='mse')
Beispiel #19
0
def predict(args_):
    path = args_.path
    with open('config.pickle', 'rb') as f_in:
        cfg = pickle.load(f_in)
    cfg.use_horizontal_flips = False
    cfg.use_vertical_flips = False
    cfg.rot_90 = False

    class_mapping = cfg.class_mapping
    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(cfg.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(feature_map_input, roi_input, cfg.num_rois, nb_classes=len(class_mapping),
                               trainable=True)
    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(cfg.model_path))
    model_rpn.load_weights(cfg.model_path, by_name=True)
    model_classifier.load_weights(cfg.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    preprocess = False
    if args_.ios == "True":
        preprocess = True

    if os.path.isdir(path):
        total_acc = 0
        total_overlap = 0
        total_time = 0
        count = 0
        for idx, img_name in enumerate(sorted(os.listdir(path))):
            if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            print(img_name)
            boxes, class_mapping, elapsed_time, avg_overlap, avg_accurate = predict_single_image(os.path.join(path, img_name), model_rpn,
                                 model_classifier_only, cfg, class_mapping, preprocess)
            count += 1
            total_acc += avg_accurate
            total_overlap += avg_overlap
            total_time += elapsed_time
        avg_acc = total_acc / (count + 1e-6)
        avg_overlap = total_overlap / (count + 1e-6)
        avg_elapsed_time = total_time / (count + 1e-6)
        print("For {} test images, ".format(count) + "the average elapsed time = {}, ".format(avg_elapsed_time) +
              "the average overlap rate = {}, ".format(avg_overlap) + "the average accuracy = {} ".format(avg_acc))
    elif os.path.isfile(path):
        print('predict image from {}'.format(path))
        predict_single_image(path, model_rpn, model_classifier_only, cfg, class_mapping, preprocess)
Beispiel #20
0
def train_net():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = False
    cfg.use_vertical_flips = False
    cfg.rot_90 = False
    cfg.num_rois = 32  # config中设置的是4
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = 'samples.hdf5'

    cfg.simple_label_file = 'annotations_train.txt' # 训练集产生的标签

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(
            cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    # there图片
    data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, cfg, nn.get_img_output_length,
                                                   K.image_dim_ordering(), mode='train')

    data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, cfg, nn.get_img_output_length,
                                                 K.image_dim_ordering(), mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)
    # classifier是什么?
    # classes_count {} 每一个类的数量:{'cow': 4, 'dog': 10, ...}
    # C.num_rois每次取的感兴趣区域,默认为32
    # roi_input = Input(shape=(None, 4)) 框框
    # classifier是faster rcnn的两个损失函数[out_class, out_reg]
    # shared_layers是vgg的输出feature map
    classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=len(classes_count), trainable=True)
    # 定义model_rpn
    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print('Could not load pretrained model weights. Weights can be found in the keras application folder '
              'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)])
    model_classifier.compile(optimizer=optimizer_classifier,
                             loss=[losses_fn.class_loss_cls, losses_fn.class_loss_regr(len(classes_count) - 1)],
                             metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 10
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:
                # 用来监督每一次epoch的平均正回归框的个数
                if len(rpn_accuracy_rpn_monitor) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(
                            mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        # 每次都框不到正样本,说明rpn有问题
                        print('RPN is not producing bounding boxes that overlap'
                              ' the ground truth boxes. Check RPN settings or keep training.')

                # 迭代器,取数据
                # 训练rpn网络,X是图片,Y是对应类别和回归梯度(不是所有的点都参加训练,符合条件才参加训练)
                # next(data_gen_train)是一个迭代器。
                # 返回的是 np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)],
                # img_data_aug(我们这里假设数据没有进行水平翻转等操作。那么,x_img = img_data_aug),
                # y_rpn_cls和y_rpn_regr是RPN的两个损失函数。
                X, Y, img_data = next(data_gen_train)


                # classifer和rpn网络交叉训练
                loss_rpn = model_rpn.train_on_batch(X, Y)
                P_rpn = model_rpn.predict_on_batch(X)

                # result是得到的预选框
                # 得到了region proposals,接下来另一个重要的思想就是ROI pooling,
                # 可将不同shape的特征图转化为固定shape,送到全连接层进行最终的预测。
                # rpn_to_roi接收的是每张图片的预测输出,返回的R = [boxes, probs]
                # ---------------------
                result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, K.image_dim_ordering(), use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)

                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                # Y1根据预选框,得到这个预选框属于哪一类,
                # Y2这个类相应的回归梯度
                # X2是返回这个框
                """
                # 通过calc_iou()找出剩下的不多的region对应ground truth里重合度最高的bbox,从而获得model_classifier的数据和标签。
                # X2保留所有的背景和match bbox的框; Y1 是类别one-hot转码; Y2是对应类别的标签及回归要学习的坐标位置; IouS是debug用的。
                """
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, class_mapping)

                if X2 is None:
                    # 如果没有有效的预选框则结束本次循环
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                # 因为是one—hot,最后一位是1,则代表是背景
                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0] # 将其变为1维的数组
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    # 选择num_rois个数的框,送入classifier网络进行训练。 分类网络一次要训练多少个框
                    # 思路:当num_rois大于1的时候正负样本尽量取到一半,小于1的时候正负样本随机取一个。
                    if len(pos_samples) < cfg.num_rois // 2:
                        # 挑选正样本
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(pos_samples, cfg.num_rois // 2, replace=False).tolist()
                    try:
                        # 挑选负样本
                        selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples),
                                                                replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples),
                                                                replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                # 训练classifier网络
                # 是从位置中挑选,
                loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]],
                                                             [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                #
                losses[iter_num, 0] = loss_rpn[1] # rpn_cls平均值
                losses[iter_num, 1] = loss_rpn[2] # rpn_regr平均值

                losses[iter_num, 2] = loss_class[1] # detector_cls平均值
                losses[iter_num, 3] = loss_class[2] # detector_regr平均值
                losses[iter_num, 4] = loss_class[3] # 4是准确率

                iter_num += 1

                # 进度条更新
                progbar.update(iter_num,
                               [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                ('detector_cls', np.mean(losses[:iter_num, 2])),
                                ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])  # loss中存放了每一次训练出的losses
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        # 打印出前n次loss的平均值
                        print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(
                            mean_overlapping_bboxes))
                        print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(loss_class_cls))
                        print('Loss Detector regression: {}'.format(loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() - start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        # 当结束一轮的epoch时,只有当这轮epoch的loss小于最优的时候才会存储这轮的训练数据,
                        # 并结束这轮epoch进入下一轮epoch。
                        if cfg.verbose:
                            print('Total loss decreased from {} to {}, saving weights'.format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Beispiel #21
0
def work(textedit,pic_label,input,model,output):
    #run test_frcnn.py -p ./testImages/
    sys.setrecursionlimit(40000)
    keras.backend.clear_session()
    test_path=input + "/"
    output = output + "/"


    parser = OptionParser()

    parser.add_option("-p", "--path", dest="test_path", help="Path to test data.", default=test_path)
    parser.add_option("-n", "--num_rois", type="int", dest="num_rois",
                    help="Number of ROIs per iteration. Higher means more memory use.", default=256)
    parser.add_option("--config_filename", dest="config_filename", help=
                    "Location to re ad the metadata related to the training (generated when training).",
                    default="config.pickle")
    parser.add_option("--network", dest="network", help="Base network to use. Supports vgg or resnet50.", default='resnet50')

    (options, args) = parser.parse_args()

    if not options.test_path:   # if filename is not given
        parser.error('Error: path to test data must be specified. Pass --path to command line')


    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = options.test_path

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {class_mapping[v]: np.random.randint(0, 255, 3) for v in class_mapping}
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)


    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input, roi_input, C.num_rois, nb_classes=len(class_mapping), trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.8

    visualise = True

    strideN = 400 ##步长
    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        print("开始检测:")
        if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)

        filepath = os.path.join(img_path,img_name)

        image = QtGui.QPixmap(filepath)
        pic_label.setPixmap(image)
        pic_label.setScaledContents(True)

        imgO = cv2.imread(filepath)
        (height,width,_) = imgO.shape
        mH = int((height-strideN)/strideN)
        mW = int((width-strideN)/strideN)

        ####对图像进行分割处理,网格搜索
        object_key = []
        object_pro = []
        object_x1 = []
        object_y1 = []
        object_x2 = []
        object_y2 = []

        for m in range(mH):
            for n in range (mW):
                print(m*mW+n)

                imgCopy = imgO.copy()

                img = imgCopy[strideN*m:strideN*(m+2),strideN*n:strideN*(n+2)]##height,width

                st = time.time()

                X, ratio = format_img(img, C)
                if K.image_dim_ordering() == 'tf':
                    X = np.transpose(X, (0, 2, 3, 1))

                # get the feature maps and output from the RPN
                [Y1, Y2, F] = model_rpn.predict(X)

                R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

                # convert from (x1,y1,x2,y2) to (x,y,w,h)
                R[:, 2] -= R[:, 0]
                R[:, 3] -= R[:, 1]

                # apply the spatial pyramid pooling to the proposed regions
                bboxes = {}
                probs = {}

                for jk in range(R.shape[0]//C.num_rois + 1):
                    ROIs = np.expand_dims(R[C.num_rois*jk:C.num_rois*(jk+1), :], axis=0)
                    if ROIs.shape[1] == 0:
                        break

                    if jk == R.shape[0]//C.num_rois:
                        #pad R
                        curr_shape = ROIs.shape
                        target_shape = (curr_shape[0],C.num_rois,curr_shape[2])
                        ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                        ROIs_padded[:, :curr_shape[1], :] = ROIs
                        ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                        ROIs = ROIs_padded

                    [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

                    for ii in range(P_cls.shape[1]):
                        if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                            continue
                        cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                        if cls_name not in bboxes:
                            bboxes[cls_name] = []
                            probs[cls_name] = []

                        (x, y, w, h) = ROIs[0, ii, :]
                        cls_num = np.argmax(P_cls[0, ii, :])

                        try:
                            (tx, ty, tw, th) = P_regr[0, ii, 4*cls_num:4*(cls_num+1)]
                            tx /= C.classifier_regr_std[0]
                            ty /= C.classifier_regr_std[1]
                            tw /= C.classifier_regr_std[2]
                            th /= C.classifier_regr_std[3]
                            x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
                        except:
                            pass
                        bboxes[cls_name].append([C.rpn_stride*x, C.rpn_stride*y, C.rpn_stride*(x+w), C.rpn_stride*(y+h)])
                        probs[cls_name].append(np.max(P_cls[0, ii, :]))

                all_dets = []

                for key in bboxes:
                    print(key)
                    bbox = np.array(bboxes[key])

                    new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
                    for jk in range(new_boxes.shape[0]):
                        print("test")
                        (x1, y1, x2, y2) = new_boxes[jk,:]
                        (real_x1, real_y1, real_x2, real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                        color = [0,0,255]
                        if key == "airbase": color= [0,0,255]
                        if key == "harbour": color = [0,159,255]
                        if key == "island": color = [0,255,0]


                        print(real_x1)
                        print(real_y1)
                        print(real_x2)
                        print(real_y2)

                        cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), color,2)

                        textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
                        print(textLabel)
                        all_dets.append((key,100*new_probs[jk]))

                        object_real_x1 = real_x1 + strideN*n
                        object_real_y1 = real_y1 + strideN*m
                        object_real_x2 = real_x2 + strideN*n
                        object_real_y2 = real_y2 + strideN*m

                        object_key.append(key)
                        object_pro.append(new_probs[jk])
                        object_x1.append(object_real_x1)
                        object_y1.append(object_real_y1)
                        object_x2.append(object_real_x2)
                        object_y2.append(object_real_y2)

                        #(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
                        #textOrg = (real_x1, real_y1-0)

                        #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
                        #cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
                        #cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 0), 1)

                print('Elapsed time = {}'.format(time.time() - st))
                cursor = textedit.textCursor()
                cursor.movePosition(QtGui.QTextCursor.End)
                cursor.insertText(str(m*mW+n))
                cursor.insertText("\r\n")
                cursor.insertText('Elapsed time = {}'.format(time.time() - st))
                cursor.insertText("\r\n")
                # textedit.append('Elapsed time = {}'.format(time.time() - st))
                textedit.setTextCursor(cursor)
                textedit.ensureCursorVisible()
                #print(all_dets)
                aaa=filepath.split('/')
                aab=aaa[-1].split('.')
                cv2.imwrite(output + aab[0]  + '_' + str(m*mW+n+1) + '.' + aab[1],img)
                image = QtGui.QPixmap(output + aab[0]  + '_' + str(m*mW+n+1) + '.' + aab[1])
                pic_label.setPixmap(image)
                pic_label.setScaledContents(True)
                #cv2.imwrite('./results_imgs/{}.jpg'.format(m*mW+n),img)

        ##非极大值抑制
        imgCopy2 = imgO.copy()
        object_name = ["airbase","harbour","island"]
        for object_class in range (len(object_name)):
            x1 = []
            y1 = []
            x2 = []
            y2 = []
            prob=[]
            for numR in range (len(object_key)):
                if object_key[numR]==object_name[object_class]:
                    x1.append(object_x1[numR])
                    y1.append(object_y1[numR])
                    x2.append(object_x2[numR])
                    y2.append(object_y2[numR])
                    prob.append(object_pro[numR])
            if len(x1)>0:
                x1=np.array(x1)
                y1=np.array(y1)
                x2=np.array(x2)
                y2=np.array(y2)
                prob=np.array(prob)

                x1, y1, x2, y2, probs =  non_max_suppression(x1,y1,x2,y2, prob, overlap_thresh=0.5, max_boxes=300)

                for numLR in range (len(x1)):
                    real_x1 = x1[numLR]
                    real_y1 = y1[numLR]
                    real_x2 = x2[numLR]
                    real_y2 = y2[numLR]

                    color = [0,0,255]
                    if object_name[object_class] == "airbase": color= [0,0,255]
                    if object_name[object_class] == "harbour": color = [0,159,255]
                    if object_name[object_class] == "island": color = [0,255,0]
                    cv2.rectangle(imgCopy2,(real_x1, real_y1), (real_x2, real_y2), color,2)
        #cv2.imwrite('./results_imgs/{}.jpg'.format(9999),imgCopy2)
        cv2.imwrite(output + filepath.split('/')[-1],imgCopy2)
        image = QtGui.QPixmap(output + filepath.split('/')[-1])
        pic_label.setPixmap(image)
        pic_label.setScaledContents(True)

        '''
def handler(event, context):
    img_name = event['img_process']

    client.download_file('adaproject', img_name, '/tmp/' + img_name)
    X = np.load('/tmp/' + img_name)

    with open('config.pickle', 'rb') as f_in:
        C = pickle.load(f_in)
    class_mapping = C.class_mapping
    num_features = 1024
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, num_features)
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)
    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)
    model_rpn = Model(img_input, rpn_layers)
    model_classifier = Model([feature_map_input, roi_input], classifier)

    BUCKET_NAME = 'adaproject'  # replace with your bucket name
    KEY = 'model_frcnn.hdf5'  # replace with your object key

    s3 = boto3.resource('s3')

    try:
        s3.Bucket(BUCKET_NAME).download_file(KEY, '/tmp/model_frcnn.hdf5')
        print 'File Found'
    except botocore.exceptions.ClientError as e:
        if e.response['Error']['Code'] == "404":
            print("The object does not exist.")
        else:
            raise
    model_rpn.load_weights('/tmp/model_frcnn.hdf5', by_name=True)
    model_classifier.load_weights('/tmp/model_frcnn.hdf5', by_name=True)
    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    # Starting RPN prediction
    [Y1, Y2, F] = model_rpn.predict(X)
    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               C,
                               K.image_dim_ordering(),
                               overlap_thresh=0.7)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]
    bboxes = {}
    probs = {}
    bbox_threshold = 0.8
    class_mapping = {v: k for k, v in class_mapping.items()}
    # print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // C.num_rois:
            # pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded
        [P_cls, P_regr] = model_classifier.predict([F, ROIs])
        for ii in range(P_cls.shape[1]):

            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                C.rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))
    final_data = []
    output = {}
    for key in bboxes:
        data = {}
        bbox = np.array(bboxes[key])
        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.5)
        data[key] = {}
        for i in range(new_boxes.shape[0]):
            data[key]['x'] = str(new_boxes[i][0])
            data[key]['y'] = str(new_boxes[i][1])
            data[key]['w'] = str(new_boxes[i][2])
            data[key]['z'] = str(new_boxes[i][3])
            data[key]['prob'] = str(new_probs[i])
            final_data.append(data)

    output['bboxes'] = bboxes
    output['rpn'] = final_data
    timestamp = int(time.time() * 1000)
    table = dynamodb.Table(os.environ['DYNAMODB_TABLE'])
    result = table.update_item(
        Key={'requestId': event['requestId']},
        ExpressionAttributeNames={
            '#status': 'status',
            '#result': 'result',
        },
        ExpressionAttributeValues={
            ':status': 'DONE',
            ':result': output,
            ':updatedAt': timestamp,
        },
        UpdateExpression='SET #status = :status, '
        '#result = :result, '
        'updatedAt = :updatedAt',
        ReturnValues='ALL_NEW',
    )
    response = {
        "statusCode": 200,
        "body": json.dumps(result['Attributes'], cls=DecimalEncoder)
    }

    return response
Beispiel #23
0
print('VLAD docs loaded.')

#for CNN
print('Preparing CNN config...')
config_output_filename = options.config
with open(config_output_filename, 'rb') as f_in:
    C = pickle.load(f_in)

C.use_horizontal_flips = False
C.use_vertical_flips = False
C.rot_90 = False

input_shape_img = (None, None, 3)
# input_shape_img = (224, 224, 3)
img_input = Input(shape=input_shape_img)
shared_layers = nn.nn_base(img_input, trainable=False)
x = conv_block(shared_layers, 3, [512, 512, 2048], stage=5, block='a')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
x = AveragePooling2D((7, 7), name='avg_pool')(x)
scene_extractor = Model(img_input, x)
# x = Flatten()(x)
# x = Dense(256, activation='relu')(x)
# x = Dense(1000, activation='softmax', name='fc1000')(x)
# global_extractor = Model(img_input, x)

model_path = options.model_path
# pre_weight_path = "/users/sunjingxuan/desktop/frcnn-original-weights/model_frcnn.hdf5"
# pre_weight_path = '/users/sunjingxuan/desktop/second_res_more_epoch.h5'
pre_weight_path = '/users/sunjingxuan/desktop/resnet50_weights_tf_dim_ordering_tf_kernels.h5'
if model_path is not 'placeholder':
Beispiel #24
0
    for v in class_mapping
}
C.num_rois = int(options.num_rois)

if K.image_dim_ordering() == 'th':
    input_shape_img = (3, None, None)
    input_shape_features = (1024, None, None)
else:
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, 1024)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C.num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

base_output = nn.nn_base(img_input, trainable=True)
model_base = Model(img_input, base_output)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(base_output, num_anchors)

classifier_only = nn.classifier(feature_map_input,
                                roi_input,
                                C.num_rois,
                                nb_classes=21,
                                trainable=True)
model_classifier_only = Model([feature_map_input, roi_input], classifier_only)

model_rpn = Model(img_input, rpn)
model_rpn_features = Model(img_input, rpn[2:])
Beispiel #25
0
from keras.callbacks import ModelCheckpoint
from keras.models import Model
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras_frcnn import losses

if K.image_dim_ordering() == 'th':
    input_shape_img = (3, None, None)
else:
    input_shape_img = (None, None, 3)

img_input = Input(shape=input_shape_img)

roi_input = Input(shape=(C.num_rois, 4))

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn = nn.rpn(shared_layers, num_anchors)

# the classifier is build on top of the base layers + the ROI pooling layer + extra layers
classifier = nn.classifier(shared_layers,
                           roi_input,
                           C.num_rois,
                           nb_classes=len(classes_count))

# define the full model
model = Model([img_input, roi_input], rpn + classifier)

try:
def upload_file():
    print("request is ", request.files)
    st = time.time()
    content_length = request.content_length
    print(f"Content_length : {content_length}")
    print("data type is ", type(request))
    print("data type of request files  ", type(request.files))
    data_dict = request.form.to_dict()
    #print(type(data_dict))
    #print(data_dict['file'])
    #print('data from frontend',data_dict)
    data = (data_dict['file'].split(',')[1])
    l, b = (data_dict['imgDimensions'].split(','))
    l = int(l)
    b = int(b)
    print('width of image', l)
    print('type of l ', type(l))
    print('height of image', b)
    #print(data)
    #print(len(data_dict))
    #print(data)
    imgdata = base64.b64decode(data)
    print("imagedata type is", type(imgdata))
    img2 = Image.open(io.BytesIO(imgdata))
    print(type(img2))
    #img2.show()
    #img = cv2.imread(img2)
    #print('opencv type' , type(img))
    #print(type(img))
    a = np.array(img2.getdata()).astype(np.float64)
    #print('datatype of w ', w.dtype)
    #b = np.ones(172800,3)
    #a = np.concatenate((w,b), axis=None)
    print('type of data to model ', type(a))
    print('shape of data from frontend', a.shape)
    #r, c = a.shape
    #print('Value of r', r)
    """
	if a.shape == (480000, 3):
		data = a.reshape(600, 800, 3)
	else: data = a.reshape(480, 640, 3)
	"""
    data = a.reshape(b, l, 3)

    st = time.time()

    parser = OptionParser()

    parser.add_option(
        "-n",
        "--num_rois",
        type="int",
        dest="num_rois",
        help="Number of ROIs per iteration. Higher means more memory use.",
        default=64)
    parser.add_option(
        "--config_filename",
        dest="config_filename",
        help=
        "Location to read the metadata related to the training (generated when training).",
        default="config.pickle")
    parser.add_option("--network",
                      dest="network",
                      help="Base network to use. Supports vgg or resnet50.",
                      default='resnet50')

    (options, args) = parser.parse_args()

    config_output_filename = options.config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(options.num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.6

    visualise = True

    #if not img_name.lower().endswith(('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
    #	continue
    #print(img_name)
    #filepath = os.path.join(img_path,img_name)

    img = data

    #cv2.imshow('img', img)
    #cv2.waitKey(0)

    X, ratio = format_img(img, C)

    if K.image_dim_ordering() == 'tf':
        X = np.transpose(X, (0, 2, 3, 1))

    # get the feature maps and output from the RPN
    [Y1, Y2, F] = model_rpn.predict(X)

    R = roi_helpers.rpn_to_roi(Y1,
                               Y2,
                               C,
                               K.image_dim_ordering(),
                               overlap_thresh=0.6)

    # convert from (x1,y1,x2,y2) to (x,y,w,h)
    R[:, 2] -= R[:, 0]
    R[:, 3] -= R[:, 1]

    # apply the spatial pyramid pooling to the proposed regions
    bboxes = {}
    probs = {}

    for jk in range(R.shape[0] // C.num_rois + 1):
        ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                              axis=0)
        if ROIs.shape[1] == 0:
            break

        if jk == R.shape[0] // C.num_rois:
            #pad R
            curr_shape = ROIs.shape
            target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
            ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
            ROIs_padded[:, :curr_shape[1], :] = ROIs
            ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
            ROIs = ROIs_padded

        [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

        for ii in range(P_cls.shape[1]):

            if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                    P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                continue

            cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

            if cls_name not in bboxes:
                bboxes[cls_name] = []
                probs[cls_name] = []

            (x, y, w, h) = ROIs[0, ii, :]

            cls_num = np.argmax(P_cls[0, ii, :])
            try:
                (tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                tx /= C.classifier_regr_std[0]
                ty /= C.classifier_regr_std[1]
                tw /= C.classifier_regr_std[2]
                th /= C.classifier_regr_std[3]
                x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
            except:
                pass
            bboxes[cls_name].append([
                C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                C.rpn_stride * (y + h)
            ])
            probs[cls_name].append(np.max(P_cls[0, ii, :]))

    all_dets = []

    for key in bboxes:
        bbox = np.array(bboxes[key])

        new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
            bbox, np.array(probs[key]), overlap_thresh=0.6)
        for jk in range(new_boxes.shape[0]):
            (x1, y1, x2, y2) = new_boxes[jk, :]

            (real_x1, real_y1, real_x2,
             real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

            cv2.rectangle(
                img, (real_x1, real_y1), (real_x2, real_y2),
                (int(class_to_color[key][0]), int(
                    class_to_color[key][1]), int(class_to_color[key][2])), 2)

            textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
            all_dets.append((key, 100 * new_probs[jk]))

            (retval, baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
            textOrg = (real_x1, real_y1 - 0)

            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (0, 0, 0), 2)
            cv2.rectangle(
                img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                (255, 255, 255), -1)
            cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1,
                        (0, 0, 0), 1)

    print('Elapsed time = {}'.format(time.time() - st))
    print('number of windoiws detected', len(all_dets))
    print(all_dets)
    r = len(all_dets)
    img3 = normalize(img)
    #plt.imshow(img)
    #cv2.imshow('img3', img3)
    #cv2.waitKey(0)

    K.clear_session()
    #data = process(data)
    #print('after reshape',data.shape)
    im2 = Image.fromarray(img.astype("uint8"), "RGB")
    print("im2 data type is ", type(im2))
    #to_frontend = (" ".join(str(x) for x in data))
    db = data.tobytes()
    print('type of data to database :', type(db))
    todb = insertBLOB('Image007', db)
    print('final data shape fed to model : ', data.shape)
    # ImageFile img = db.b64encode()
    # with open("t.png", "rb") as imageFile:
    # str = base64.b64encode(imageFile.read())
    #cv2.imshow('image', cv2.cvtColor(data, cv2.COLOR_BGR2GRAY))
    #cv2.waitKey()
    #str = base64.b64encode(data)
    # return jsonify(to_frontend, r)

    #img = Image.open( 'C:\Window Counter_Project\Flickr\Window_101 (131).jpg' )
    #img.load()

    #data = np.asarray( img, dtype="int32" )
    #im = Image.fromarray(data.astype("uint8"))
    #im.show()  # uncomment to look at the image
    rawBytes = io.BytesIO()
    print(rawBytes)
    im2.save(rawBytes, "jpeg")
    #im2.show()
    print('type of im2 is ', type(im2))
    rawBytes.seek(0)  # return to the start of the file
    response_obj = {
        'count': r,
        'image':
        "data:image/jpeg;base64," + str(base64.b64encode(rawBytes.read()))
    }
    #print("response is", type(response_obj))
    return jsonify(Data=response_obj)
Beispiel #27
0
def run_prediction(config_filename,
                   model_path,
                   test_path,
                   out_path,
                   network='resnet50',
                   num_rois=32):

    with open(config_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = test_path

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    #print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    C.model_path = model_path
    #print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    bbox_threshold = 0.9  # default is 0.8

    visualise = True

    preresults = []  # add this

    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        #print(img_name)
        #st = time.time()
        filepath = os.path.join(img_path, img_name)

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)  # default is 0.7

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []
        bbox_results = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]),
                overlap_thresh=0.4)  # default is 0.5
            for jk in range(new_boxes.shape[0]):
                img_use = img.copy()
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                #cv2.rectangle(img,(real_x1, real_y1), (real_x2, real_y2), (int(class_to_color[key][0]), int(class_to_color[key][1]), int(class_to_color[key][2])),2)
                cv2.rectangle(img_use, (real_x1, real_y1), (real_x2, real_y2),
                              (0, 0, 255), 4)

                #textLabel = '{}: {}'.format(key,int(100*new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))
                bbox_results.append(
                    (key, (real_x1, real_y1, real_x2, real_y2)))

                #(retval,baseLine) = cv2.getTextSize(textLabel,cv2.FONT_HERSHEY_COMPLEX,1,1)
                #textOrg = (real_x1, real_y1-0)

                #cv2.rectangle(img, (textOrg[0] - 5, textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (0, 0, 0), 2)
                #cv2.rectangle(img, (textOrg[0] - 5,textOrg[1]+baseLine - 5), (textOrg[0]+retval[0] + 5, textOrg[1]-retval[1] - 5), (255, 255, 255), -1)
                #cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)
                if not os.path.exists(out_path):
                    os.makedirs(out_path)
                page_id = int(img_name.split('.')[0].split('_')[-1]) + 1
                page_name = 'page_' + str(page_id)
                cv2.imwrite(
                    os.path.join(
                        out_path,
                        '{}.jpg'.format(page_name + '_id_' + str(jk + 1))),
                    img_use)  #format(idx)

        #print('Elapsed time = {}'.format(time.time() - st))
        #print(all_dets)
        #print(bbox_results)
        preresults.append([img_name, bbox_results])  ## add this
        # cv2.imshow('img', img)
        # cv2.waitKey(0)
        #outpath = './results/result_scanned/'
        #if not os.path.exists(out_path):
        #os.makedirs(out_path)
        #cv2.imwrite(out_path + '{}.jpg'.format(img_name.split('.')[0]),img) #format(idx)

    preresults = pd.DataFrame(preresults)
    preresults.to_csv(os.path.join(out_path, 'preresults.txt'),
                      header=None,
                      index=None)
Beispiel #28
0
def Test_frcnn(test_path,
               config_filename,
               num_rois=32,
               network="vgg",
               terminal_flag=False):
    """
    Test the object detection network
    
    test_path --str: Full Path to the folder containing the test images (No default)
    config_filename --str: Full path to the config_file.pickle, generated while training (No default)
    num_rois --int: number of ROIs to process at once (Default 32)
    network --str: The base network to use (One of 'vgg','resnet50') (Default 'vgg')
    terminal_flag --bool: Flag to test if accessing from terminal do not pass anything to it while calling this function
                          
    OUTPUT:
    When the script is called from terminal the images are displayed using opencv (images are in BGR format)
    When called as a function returns the images, dets as 2 lists (images are in RGB format)
    """

    config_output_filename = config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif network == 'vgg':
        import keras_frcnn.vgg as nn
    elif network == "mobilenet":
        import keras_frcnn.mobilenet as nn
    C.model_path = 'epoch-176.hdf5'
    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = test_path

    def format_img_size(img, C):  # utility function 1
        """ formats the image size based on config """
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            ratio = img_min_side / width
            new_height = int(ratio * height)
            new_width = int(img_min_side)
        else:
            ratio = img_min_side / height
            new_width = int(ratio * width)
            new_height = int(img_min_side)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        return img, ratio

    def format_img_channels(img, C):  #utility function 2
        """ formats the image channels based on config """
        img = img[:, :, (2, 1, 0)]
        img = img.astype(np.float32)
        img[:, :, 0] -= C.img_channel_mean[0]
        img[:, :, 1] -= C.img_channel_mean[1]
        img[:, :, 2] -= C.img_channel_mean[2]
        img /= C.img_scaling_factor
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, axis=0)
        return img

    def format_img(img, C):  # utility function 3
        """ formats an image for model prediction based on config """
        img, ratio = format_img_size(img, C)
        img = format_img_channels(img, C)
        return img, ratio

    # Method to transform the coordinates of the bounding box to its original size
    def get_real_coordinates(ratio, x1, y1, x2, y2):  #utility function 4

        real_x1 = int(round(x1 // ratio))
        real_y1 = int(round(y1 // ratio))
        real_x2 = int(round(x2 // ratio))
        real_y2 = int(round(y2 // ratio))

        return (real_x1, real_y1, real_x2, real_y2)

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    C.num_rois = int(num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512
    elif C.network == 'mobilenet':
        num_features = 512

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(C.model_path))
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    bbox_threshold = 0.8

    list_of_all_images = []
    list_of_all_dets = []

    for idx, img_name in enumerate(sorted(os.listdir(img_path))):
        if not img_name.lower().endswith(
            ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
            continue
        print(img_name)
        st = time.time()
        filepath = os.path.join(img_path, img_name)

        img = cv2.imread(filepath)

        X, ratio = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                #pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                        P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append([
                    C.rpn_stride * x, C.rpn_stride * y, C.rpn_stride * (x + w),
                    C.rpn_stride * (y + h)
                ])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]

                (real_x1, real_y1, real_x2,
                 real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                cv2.rectangle(
                    img, (real_x1, real_y1), (real_x2, real_y2),
                    (int(class_to_color[key][0]), int(class_to_color[key][1]),
                     int(class_to_color[key][2])), 2)

                textLabel = '{}: {}'.format(key, int(100 * new_probs[jk]))
                all_dets.append((key, 100 * new_probs[jk]))

                (retval, baseLine) = cv2.getTextSize(textLabel,
                                                     cv2.FONT_HERSHEY_COMPLEX,
                                                     1, 1)
                textOrg = (real_x1, real_y1 - 0)

                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (0, 0, 0), 2)
                cv2.rectangle(
                    img, (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                    (textOrg[0] + retval[0] + 5, textOrg[1] - retval[1] - 5),
                    (255, 255, 255), -1)
                cv2.putText(img, textLabel, textOrg, cv2.FONT_HERSHEY_DUPLEX,
                            1, (0, 0, 0), 1)

        if terminal_flag:
            print('Elapsed time = {}'.format(time.time() - st))
            print(all_dets)
            if len(all_dets) > 0:
                cv2.imwrite(img_name + '_new.png', img)
                cv2.waitKey(0)
        else:
            list_of_all_images.append(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            list_of_all_dets.append(all_dets)

    if not terminal_flag:
        return (list_of_all_images, list_of_all_dets)
Beispiel #29
0
    def __init__(self, config_path, model_path):
        #init config param
        config_output_filename = config_path
        with open(config_output_filename, 'rb') as f_in:
            self.C = pickle.load(f_in)

        # turn off any data augmentation at test time
        self.C.use_horizontal_flips = False
        self.C.use_vertical_flips = False
        self.C.rot_90 = False

        #init class maps
        #e.g. {0: "class", 1: "another class"}
        self.class_mapping = self.C.class_mapping
        if 'bg' not in self.class_mapping:
            self.class_mapping['bg'] = len(self.class_mapping)
        self.class_mapping = {v: k for k, v in self.class_mapping.items()}

        #init viz
        self.class_to_color = {
            self.class_mapping[v]: np.random.randint(0, 255, 3)
            for v in self.class_mapping
        }
        self.bbox_threshold = 0.85

        #init nn params
        self.C.num_rois = 16
        num_features = 1024

        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

        img_input = Input(shape=input_shape_img)
        roi_input = Input(shape=(self.C.num_rois, 4))
        feature_map_input = Input(shape=input_shape_features)

        # define the base network (resnet here, can be VGG, Inception, etc)
        shared_layers = nn.nn_base(img_input, trainable=True)

        # define the RPN, built on the base layers
        num_anchors = len(self.C.anchor_box_scales) * len(
            self.C.anchor_box_ratios)
        rpn_layers = nn.rpn(shared_layers, num_anchors)

        classifier = nn.classifier(feature_map_input,
                                   roi_input,
                                   self.C.num_rois,
                                   nb_classes=len(self.class_mapping),
                                   trainable=True)

        #init models
        self.model_rpn = Model(img_input, rpn_layers)
        self.model_classifier_only = Model([feature_map_input, roi_input],
                                           classifier)

        self.model_classifier = Model([feature_map_input, roi_input],
                                      classifier)

        #load weights
        print('Loading weights from {}'.format(model_path))
        self.model_rpn.load_weights(model_path, by_name=True)
        self.model_classifier.load_weights(model_path, by_name=True)

        #compile
        self.model_rpn.compile(optimizer='adam', loss='mse')  #sgd
        self.model_classifier.compile(optimizer='adam', loss='mse')
Beispiel #30
0
def build_and_train(hype_space, save_best_weights=False):
    train_path = '/home/comp/e4252392/retraindata4frcnn.txt'
    config_output_filename = '/home/comp/e4252392/hyperopt/hyperopt_config.pickle'
    num_epochs = 20
    #for retrain best model only
    diagnose_path = '/home/comp/e4252392/hyperopt/models/hyperopt_loss_ap_plt.npy'
    real_model_path = '/home/comp/e4252392/hyperopt/models/hyperopt_model_plt_'

    print("Hyperspace:")
    print(hype_space)
    C = config.Config()
    C.num_rois = int(hype_space['num_rois'])  #why int?
    # C.anchor_box_scales = hype_space['anchor_box_scales']
    # C.base_net_weights = '/home/comp/e4252392/second_res_more_epoch.h5'
    C.base_net_weights = 'model_frcnn.hdf5'

    #data
    all_imgs, classes_count, class_mapping = get_data(train_path)
    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)
    C.class_mapping = class_mapping

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))

    with open(config_output_filename, 'wb') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))

    random.shuffle(all_imgs)
    num_imgs = len(all_imgs)
    train_imgs = [s for s in all_imgs]
    print('Num train samples {}'.format(len(train_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   C,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    #data

    # build_model
    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))
    shared_layers = nn.nn_base(int(hype_space['kernel_size']),
                               img_input,
                               trainable=True)

    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(int(hype_space['kernel_size']), shared_layers, num_anchors)

    classifier = nn.classifier(int(hype_space['kernel_size']),
                               shared_layers,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(C.base_net_weights))
        model_rpn.load_weights(C.base_net_weights, by_name=True)
        model_classifier.load_weights(C.base_net_weights, by_name=True)
    except:
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder \
			https://github.com/fchollet/keras/tree/master/keras/applications')

    # optimizer = Adam(lr=1e-5)
    # optimizer_classifier = Adam(lr=1e-5)
    optimizer = Adam(lr=hype_space['optimizer_lr'],
                     decay=hype_space['optimizer_decay'])
    optimizer_classifier = Adam(lr=hype_space['optimizer_lr'],
                                decay=hype_space['optimizer_decay'])
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          thelosses.rpn_loss_cls(num_anchors),
                          thelosses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            thelosses.class_loss_cls,
            thelosses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    sgd = SGD(lr=hype_space['sgd_lr'], decay=hype_space['sgd_decay'])
    model_all.compile(optimizer=sgd, loss='mae')
    # build_model

    #build_and_train
    epoch_length = 10
    iter_num = 0
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()
    best_loss = np.Inf
    print('Starting training')

    loss_array = []
    ap_array = []
    epoch_array = []
    epoch_array.append(0)

    result = {}
    model_name = ''

    for epoch_num in range(num_epochs):
        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                # train
                X, Y, img_data = next(data_gen_train)
                loss_rpn = model_rpn.train_on_batch(X, Y)
                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           K.image_dim_ordering(),
                                           use_regr=True,
                                           overlap_thresh=0.7,
                                           max_boxes=300)
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, class_mapping)
                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue
                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)
                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []
                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []
                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])
                # train

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]
                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1
                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    # result
                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss

                        if save_best_weights:
                            real_model_path = real_model_path + str(
                                epoch_num + 1) + '.hdf5'
                            model_all.save_weights(real_model_path,
                                                   overwrite=True)
                            print("Best weights so far saved to " +
                                  real_model_path + ". best_loss = " +
                                  str(best_loss))
                            epoch_array.append(epoch_num + 1)
                            loss_array.append([
                                loss_rpn_cls, loss_rpn_regr, loss_class_cls,
                                loss_class_regr, best_loss
                            ])
                            album_ap, logo_ap, mAP = measure_map.measure_map(
                                config_output_filename, real_model_path)
                            ap_array.append([album_ap, logo_ap, mAP])
                            np.save(diagnose_path,
                                    [epoch_array, loss_array, ap_array])
                        else:
                            album_ap = 'not applicable'
                            logo_ap = 'not applicable'
                            mAP = 'not applicable'
                        model_name = "model_{}_{}".format(
                            str(best_loss),
                            str(uuid.uuid4())[:5])
                        result = {
                            'loss': best_loss,
                            'loss_rpn_cls': loss_rpn_cls,
                            'loss_rpn_regr': loss_rpn_regr,
                            'loss_class_cls': loss_class_cls,
                            'loss_class_regr': loss_class_regr,
                            'album_ap': album_ap,
                            'logo_ap': logo_ap,
                            'mAP': mAP,
                            'model_name': model_name,
                            'space': hype_space,
                            'status': STATUS_OK
                        }
                        print("RESULT UPDATED.")
                        print("Model name: {}".format(model_name))
                    # result
                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                continue

    print('Training complete, exiting.')
    print("BEST MODEL: {}".format(model_name))
    print("FINAL RESULT:")
    print_json(result)
    save_json_result(model_name, result)
    try:
        K.clear_session()
        del model_all, model_rpn, model_classifier
    except Exception as err:
        try:
            K.clear_session()
        except:
            pass
        err_str = str(err)
        print(err_str)
        traceback_str = str(traceback.format_exc())
        print(traceback_str)
        return {
            'status': STATUS_FAIL,
            'err': err_str,
            'traceback': traceback_str
        }
    print("\n\n")
    return model_name, result
elif C.network == 'vgg':
    num_features = 512

if K.image_data_format() == 'th':
    input_shape_img = (3, None, None)
    input_shape_features = (num_features, None, None)
else:
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, num_features)

img_input = Input(shape=input_shape_img)
roi_input = Input(shape=(C.num_rois, 4))
feature_map_input = Input(shape=input_shape_features)

# define the base network (resnet here, can be VGG, Inception, etc)
shared_layers = nn.nn_base(img_input, trainable=True)

# define the RPN, built on the base layers
num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
rpn_layers = nn.rpn(shared_layers, num_anchors)

classifier = nn.classifier(feature_map_input,
                           roi_input,
                           C.num_rois,
                           nb_classes=len(class_mapping),
                           trainable=True)

model_rpn = Model(img_input, rpn_layers)
model_classifier_only = Model([feature_map_input, roi_input], classifier)

model_classifier = Model([feature_map_input, roi_input], classifier)
Beispiel #32
0
def predict(args_):
    path = args_.path
    print(path)

    with open('/content/gdrive/My Drive/SOP/keras_frcnn_updated/config.pickle',
              'rb') as f_in:
        cfg = pickle.load(f_in)
    cfg.use_horizontal_flips = False
    cfg.use_vertical_flips = False
    cfg.rot_90 = False

    class_mapping = cfg.class_mapping
    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    input_shape_img = (None, None, 3)
    input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(cfg.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)
    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    #Newly added for the sake of temporary run
    cfg.model_path = "/content/gdrive/My Drive/SOP/keras_frcnn_updated/model/50Epochs.hdf5"

    print('Loading weights from {}'.format(cfg.model_path))
    model_rpn.load_weights(cfg.model_path, by_name=True)
    model_classifier.load_weights(cfg.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    print("Proceeding to processing the image at")
    print(path)

    detected = 0
    total_images = 0

    if os.path.isdir(path):
        for idx, img_name in enumerate(sorted(os.listdir(path))):
            if not img_name.lower().endswith(
                ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            print(img_name)
            predict_single_image(os.path.join(path, img_name), model_rpn,
                                 model_classifier_only, cfg, class_mapping,
                                 detected)
        print('The accuracy rate =')
        print(detected)

    elif os.path.isfile(path):
        print('predict image from {}'.format(path))
        predict_single_image(path, model_rpn, model_classifier_only, cfg,
                             class_mapping, detected)