Ejemplo n.º 1
0
def generate_mean_pixel_file():
    C = Config()
    all_imgs, _, _ = get_data(ROI_BBOX_FILE)

    avg = [0, 0, 0]
    for img_data in all_imgs:
        print(img_data['filepath'])
        img_data_aug, x_img = augment(img_data, C, augment=False)

        (width, height) = (img_data_aug['width'], img_data_aug['height'])
        (rows, cols, _) = x_img.shape

        # get image dimensions for resizing
        (resized_width,
         resized_height) = get_new_img_size(width, height, C.im_size)

        # resize the image so that smalles side is length = 600px
        x_img = cv2.resize(x_img, (resized_width, resized_height),
                           interpolation=cv2.INTER_CUBIC)
        pixels = (resized_width * resized_height)
        avg[0] += np.sum(x_img[:, :, 0]) / pixels
        avg[1] += np.sum(x_img[:, :, 1]) / pixels
        avg[2] += np.sum(x_img[:, :, 2]) / pixels
    avg = [a / len(all_imgs) for a in list(avg)]
    np.savetxt(MEAN_PIXEL_FILE, avg, delimiter=',')
Ejemplo n.º 2
0
def train(model_name, epochs=60, batch_size=1, lr=0.0001, decay=0.001):
    t = time.time()
    all_imgs, classes_count, class_mapping = get_data(ROI_BBOX_FILE)
    print("Parsing annotation files took " + str((time.time() - t) / 1000) + "s")

    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)

    C.model_name = model_name

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    if not os.path.isfile(ROI_CLASSES_FILE):
        with open(ROI_CLASSES_FILE, 'w') as class_data_json:
            json.dump(class_mapping, class_data_json)

    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_imgs)

    train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_imgs if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    model = build_model(classes_count, num_anchors)

    optimizer = Adam(lr=lr, decay=decay)
    model.compile(optimizer=optimizer,
                  loss=[losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors),
                        losses.class_loss_cls,
                        losses.class_loss_regr(C.num_rois, len(classes_count) - 1)],
                  metrics={'dense_class_{}_loss'.format(len(classes_count)): 'accuracy'})

    data_gen_train = data_generators.get_anchor_gt(train_imgs, class_mapping, classes_count,
                                                   C, K.image_dim_ordering(), mode='train')

    data_gen_val = data_generators.get_anchor_gt(val_imgs, class_mapping, classes_count,
                                                 C, K.image_dim_ordering(), mode='val')

    callbacks = [EarlyStopping(monitor='val_loss', patience=20, verbose=0),
                 ModelCheckpoint(C.get_model_path(), monitor='val_loss', save_best_only=True, verbose=0),
                 ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=1e-7, verbose=1),
                 LoggingCallback(C)]

    print('Starting training')
    model.fit_generator(data_gen_train, steps_per_epoch=ceil(len(train_imgs) / batch_size),
                        epochs=epochs, validation_data=data_gen_val,
                        validation_steps=ceil(len(train_imgs) / batch_size),
                        callbacks=callbacks, max_q_size=1, workers=1)
Ejemplo n.º 3
0
def train_kitti():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())
    # cfg.base_net_weights=r''

    # TODO: the only file should to be change for other data to train
    cfg.model_path = '/media/private/Ci/log/plane/frcnn/vgg-adam'

    now = datetime.datetime.now()
    day = now.strftime('%y-%m-%d')
    for i in range(10000):
        if not os.path.exists('%s-%s-%d' % (cfg.model_path, day, i)):
            cfg.model_path = '%s-%s-%d' % (cfg.model_path, day, i)
            break

    make_dir(cfg.model_path)
    make_dir(cfg.model_path + '/loss')
    make_dir(cfg.model_path + '/loss_rpn_cls')
    make_dir(cfg.model_path + '/loss_rpn_regr')
    make_dir(cfg.model_path + '/loss_class_cls')
    make_dir(cfg.model_path + '/loss_class_regr')

    cfg.simple_label_file = '/media/public/GEOWAY/plane/plane0817.csv'

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    cfg.config_save_file = os.path.join(cfg.model_path, 'config.pickle')
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 cfg,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')
    Q = multiprocessing.Manager().Queue(maxsize=30)

    def fill_Q(n):
        while True:

            if not Q.full():
                Q.put(next(data_gen_train))
                #print(Q.qsize(),'put',n)
            else:
                time.sleep(0.00001)

    threads = []
    for i in range(4):
        thread = multiprocessing.Process(target=fill_Q, args=(i, ))
        threads.append(thread)
        thread.start()

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)
    # model_all.summary()
    from keras.utils import plot_model
    # os.environ['PATH'] = os.environ['PATH'] + r';C:\Program Files (x86)\Graphviz2.38\bin;'

    plot_model(model_all,
               'model_all.png',
               show_layer_names=True,
               show_shapes=True)
    plot_model(model_classifier,
               'model_classifier.png',
               show_layer_names=True,
               show_shapes=True)
    plot_model(model_rpn,
               'model_rpn.png',
               show_layer_names=True,
               show_shapes=True)
    '''
    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print('Could not load pretrained model weights. Weights can be found in the keras application folder '
              'https://github.com/fchollet/keras/tree/master/keras/applications')
    '''

    optimizer = adadelta()
    optimizer_classifier = adadelta()
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 10
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf
    best_rpn_cls = np.Inf
    best_rpn_regr = np.Inf
    best_class_cls = np.Inf
    best_class_regr = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

            #    X, Y, img_data = next(data_gen_train)
                while True:

                    if Q.empty():
                        time.sleep(0.00001)
                        continue

                    X, Y, img_data = Q.get()
                    #    print(Q.qsize(),'get')
                    break
            #  print(X.shape,Y.shape)
                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss', epoch_num, curr_loss,
                               loss_rpn_cls, loss_rpn_regr, loss_class_cls,
                               loss_class_regr))
                    if loss_rpn_cls < best_rpn_cls:
                        if cfg.verbose:
                            print(
                                'loss_rpn_cls decreased from {} to {}, saving weights'
                                .format(best_rpn_cls, loss_rpn_cls))
                            best_rpn_cls = loss_rpn_cls
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_rpn_cls', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))
                    if loss_rpn_regr < best_rpn_regr:
                        if cfg.verbose:
                            print(
                                'loss_rpn_regr decreased from {} to {}, saving weights'
                                .format(best_rpn_regr, loss_rpn_regr))
                            best_rpn_regr = loss_rpn_regr
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_rpn_regr', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))
                    if loss_class_cls < best_class_cls:
                        if cfg.verbose:
                            print(
                                'loss_class_cls decreased from {} to {}, saving weights'
                                .format(best_loss, loss_class_cls))
                            best_class_cls = loss_class_cls
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_class_cls', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))
                    if loss_class_regr < best_class_regr:
                        if cfg.verbose:
                            print(
                                'loss_class_regr decreased from {} to {}, saving weights'
                                .format(best_loss, loss_class_regr))
                            best_class_regr = loss_class_regr
                        model_all.save_weights(
                            '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5'
                            % (cfg.model_path, 'loss_class_regr', epoch_num,
                               curr_loss, loss_rpn_cls, loss_rpn_regr,
                               loss_class_cls, loss_class_regr))

                    break

            except Exception as e:
                #   print('Exception: {}'.format(e))
                # save model
                #    model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
def Measure_map(test_path,
                network_arch,
                config_filename,
                preprocessing_function=None,
                mAP_threshold=0.5):
    """Function to measure Mean Average prediction metric for object detection

    Keyword Arguments
    test_path --str: Path to the .txt file of testing train (No default)
    network_arc --object: the full faster rcnn network .py file passed as an object (no default)
    config_filename --str: Path to config file (No default)
    preprocessing_function --function: optional image preprocessing function (Default None)
    mAP threshold --float: (0,1) The min threshold to consider as a correct prediction (default 0.5)

    Output:
    prints the Map on the test dataset and returns a list of all Maps
    """
    nn = network_arch

    def get_map(pred, gt, f, threshold):
        T = {}
        P = {}
        fx, fy = f

        for bbox in gt:
            bbox['bbox_matched'] = False

        pred_probs = np.array([s['prob'] for s in pred])
        box_idx_sorted_by_prob = np.argsort(pred_probs)[::-1]

        for box_idx in box_idx_sorted_by_prob:
            pred_box = pred[box_idx]
            pred_class = pred_box['class']
            pred_x1 = pred_box['x1']
            pred_x2 = pred_box['x2']
            pred_y1 = pred_box['y1']
            pred_y2 = pred_box['y2']
            pred_prob = pred_box['prob']
            if pred_class not in P:
                P[pred_class] = []
                T[pred_class] = []
            P[pred_class].append(pred_prob)
            found_match = False

            for gt_box in gt:
                gt_class = gt_box['class']
                gt_x1 = gt_box['x1'] / fx
                gt_x2 = gt_box['x2'] / fx
                gt_y1 = gt_box['y1'] / fy
                gt_y2 = gt_box['y2'] / fy
                gt_seen = gt_box['bbox_matched']
                if gt_class != pred_class:
                    continue
                if gt_seen:
                    continue
                iou = data_generators.iou((pred_x1, pred_y1, pred_x2, pred_y2),
                                          (gt_x1, gt_y1, gt_x2, gt_y2))
                if iou >= threshold:  #0.5 default
                    found_match = True
                    gt_box['bbox_matched'] = True
                    break
                else:
                    continue

            T[pred_class].append(int(found_match))

        for gt_box in gt:
            if not gt_box['bbox_matched']:
                if gt_box['class'] not in P:
                    P[gt_box['class']] = []
                    T[gt_box['class']] = []

                T[gt_box['class']].append(1)
                P[gt_box['class']].append(0)

        #import pdb
        #pdb.set_trace()
        return T, P

    with open(config_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    # turn off any train augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    def format_img(img, C, preprocessing_function):
        img_min_side = float(C.im_size)
        (height, width, _) = img.shape

        if width <= height:
            f = img_min_side / width
            new_height = int(f * height)
            new_width = int(img_min_side)
        else:
            f = img_min_side / height
            new_width = int(f * width)
            new_height = int(img_min_side)
        fx = width / float(new_width)
        fy = height / float(new_height)
        img = cv2.resize(img, (new_width, new_height),
                         interpolation=cv2.INTER_CUBIC)
        img = img[:, :, (2, 1, 0)]  #bgr to RGB
        if preprocessing_function:
            img = preprocessing_function(img)
        # img = np.transpose(img, (2, 0, 1)) theano format
        img = np.expand_dims(img, axis=0)
        return img, fx, fy

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    print(class_mapping)

    # load the models
    input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))
    shared_layers = nn.nn_base(img_input)

    num_features = shared_layers.get_shape().as_list()[3]  #512 for vgg-16
    feature_map_input = Input(shape=(None, None, num_features))
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(feature_map_input, roi_input, C.num_rois,
                               len(class_mapping))
    # create a keras model
    model_rpn = Model(img_input, rpn)
    model_classifier = Model([feature_map_input, roi_input], classifier)

    #Note: The model_classifier in training and testing are different.
    # In training model_classifier and model_rpn both have the base_nn.
    # while testing only model_rpn has the base_nn it returns the FM of base_nn
    # Thus the model_classifier has the FM and ROI as input
    # This is done to increase the testing speed

    print('Loading weights from {}'.format(C.weights_all_path))
    model_rpn.load_weights(C.weights_all_path, by_name=True)
    model_classifier.load_weights(C.weights_all_path, by_name=True)

    test_imgs, _, _ = get_data(test_path)

    T = {}
    P = {}
    ALL_MAP_LIST = []
    for idx, img_data in enumerate(test_imgs):
        print('{}/{}'.format(idx, len(test_imgs)))
        st = time.time()
        filepath = img_data['filepath']

        img = cv2.imread(filepath)

        X, fx, fy = format_img(img, C, preprocessing_function)

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=C.rpn_nms_threshold,
                                   flag="test")

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append(
                    [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []

        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox,
                np.array(probs[key]),
                overlap_thresh=C.test_roi_nms_threshold,
                max_boxes=C.TEST_RPN_POST_NMS_TOP_N)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]
                det = {
                    'x1': x1,
                    'x2': x2,
                    'y1': y1,
                    'y2': y2,
                    'class': key,
                    'prob': new_probs[jk]
                }
                all_dets.append(det)

        print('Elapsed time = {}'.format(time.time() - st))
        t, p = get_map(all_dets, img_data['bboxes'], (fx, fy), mAP_threshold)
        for key in t.keys():
            if key not in T:
                T[key] = []
                P[key] = []
            T[key].extend(t[key])
            P[key].extend(p[key])
        all_aps = []
        for key in T.keys():
            ap = average_precision_score(T[key], P[key])
            print('{} AP: {}'.format(key, ap))
            all_aps.append(ap)
        print('mAP = {}'.format(np.mean(np.array(all_aps))))
        ALL_MAP_LIST.append(np.mean(np.array(all_aps)))
    return (ALL_MAP_LIST)
with open(config_output_filename, 'rb') as f_in:
    C = pickle.load(f_in)

if C.network == 'resnet50':
    import keras_frcnn.resnet as nn
elif C.network == 'vgg':
    import keras_frcnn.vgg as nn

# turn off any data augmentation at test time
C.use_horizontal_flips = False
C.use_vertical_flips = False
C.rot_90 = False

#img_path = options.test_path
img_path, _, _ = get_data(options.test_path)


def format_img_size(img, C):
    """ formats the image size based on config """
    img_min_side = float(C.im_size)
    (height, width, _) = img.shape

    if width <= height:
        ratio = img_min_side / width
        new_height = int(ratio * height)
        new_width = int(img_min_side)
    else:
        ratio = img_min_side / height
        new_width = int(ratio * width)
        new_height = int(img_min_side)
Ejemplo n.º 6
0
with open(config_output_filename, 'rb') as f_in:
    C = pickle.load(f_in)

if C.network == 'resnet50':
    import keras_frcnn.resnet as nn
elif C.network == 'vgg':
    import keras_frcnn.vgg as nn

# turn off any data augmentation at test time
C.use_horizontal_flips = False
C.use_vertical_flips = False
C.rot_90 = False

class_mapping = C.class_mapping

all_imgs, classes_count, class_mapping = get_data(options.test_path)

if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)

test_imgs = [
    s for s in all_imgs
    if s['imageset'] == 'trainval' or s['imageset'] == 'test'
]

print('Num train samples {}'.format(len(test_imgs)))

data_gen_test = data_generators.get_anchor_gt(test_imgs,
                                              classes_count,
                                              C,
Ejemplo n.º 7
0
if not os.path.isdir("models"):
  os.mkdir("models")

C.num_rois = int(options.num_rois)

# we will use resnet. may change to others
from keras_frcnn import vgg16 as nn

# check if weight path was passed via command line
if options.input_weight_path:
    C.model_path = options.input_weight_path
else:
    # set the path to weights based on backend and model
    C.model_path = nn.get_weight_path()

all_imgs, classes_count, class_mapping = get_data(options.train_path, options.cat)

if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)

C.class_mapping = class_mapping

inv_map = {v: k for k, v in class_mapping.items()}

print('Training images per class:')
pprint.pprint(classes_count)
print('Num classes (including bg) = {}'.format(len(classes_count)))

config_output_filename = options.config_filename
Ejemplo n.º 8
0
# load model
nn: Any = None
num_features = 1024
if C.network == "vgg":
    from keras_frcnn import vgg
    nn = vgg
    num_features = 512
elif C.network == "resnet":
    from keras_frcnn import resnet
    nn = resnet
else:
    print(f"Not a valid model: {C.network}")
    raise ValueError

all_imgs, classes_count, class_mapping = get_data(C.train_path)
train_imgs, val_imgs = train_test_split(all_imgs,
                                        test_size=0.2,
                                        random_state=C.seed)

if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)

C.class_mapping = class_mapping

print(class_mapping)

inv_map = {v: k for k, v in class_mapping.items()}

print('Training images per class:')
def train_fasterrcnn():
    # config for data argument
    cfg = config.Config()
    cfg.balanced_classes = True
    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 50  #50# 对于星图杯的光学遥感飞机检测,应该改为50+
    cfg.anchor_box_scales = [10, 30, 50, 80, 100]  #[41,70,120,20,90]
    cfg.anchor_box_ratios = [[1, 1.2], [1, 1],
                             [1.2,
                              1]]  #[[1,1.4],[1,0.84],[1,1.17],[1,0.64],[1,1]]
    #cfg.rpn_stride = 8
    cfg.im_size = 512
    cfg.num_epochs = 100
    cfg.epoch_length = 150  #1462

    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = './model/kitti_frcnn_last.hdf5'
    cfg.simple_label_file = 'DOTA2018_OpticalAircraft_bboxes.txt'  #'kitti_simple_label.txt'#'E:/Xingtubei/official_datas/OpticalAircraft/laptop_Chreoc_OpticalAircraft_bboxes.txt' # '/media/liuhuaqing/Elements/Xingtubei/official_datas/OpticalAircraft/Chreoc_OpticalAircraft_bboxes.txt'#'F:/Xingtubei/official_datas/OpticalAircraft/Chreoc_OpticalAircraft_bboxes.txt' # 'kitti_simple_label.txt'

    all_images, classes_count, class_mapping = get_data(
        cfg.simple_label_file)  #读取数据集,cv2.imread()要求数据里不能有中文路径

    if 'bg' not in classes_count:  #'bg'应该是代表背景
        classes_count['bg'] = 0  # =0表示训练数据中没有“背景”这一类别
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}  #class_mapping的逆向map

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images
                  if s['imageset'] == 'trainval']  #训练集,列表形式,列表中的元素是字典
    val_imgs = [s for s in all_images
                if s['imageset'] == 'test']  #验证集,列表形式,列表中的元素是字典

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(
        train_imgs,
        classes_count,
        cfg,
        nn.get_img_output_length,
        K.image_dim_ordering(),
        mode='train')  #数据扩增,然后生成frcnn所需的训练数据(如:图片、rpn的梯度等等)
    data_gen_val = data_generators.get_anchor_gt(
        val_imgs,
        classes_count,
        cfg,
        nn.get_img_output_length,
        K.image_dim_ordering(),
        mode='val')  #数据扩增,然后生成frcnn所需的验证数据(如:图片、rpn的梯度等等)

    # 根据keras实际用的后端,定义相应的输入数据维度,因为两类后端的维度顺序不一样
    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)  #当后端是thaneo
    else:
        input_shape_img = (None, None, 3)  #(None, None, 3)#当后端是tensorflow

    img_input = Input(shape=input_shape_img)  # 输入图片
    roi_input = Input(shape=(None, 4))  # 输入人工标注的roi坐标,4表示x1,y1,x2,y2

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layer, shared_layers_stage3, shared_layers_stage4 = nn.nn_base(
        img_input,
        trainable=True)  # shared_layers是frcnn网络底部那些共享的层,在这里是ResNet。由nn定义好

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn_stage3 = nn.rpn(shared_layers_stage3, num_anchors)
    print(rpn_stage3[1].shape)
    rpn_stage4 = nn.rpn(shared_layers_stage4,
                        num_anchors)  # [x_class, x_regr, base_layers]
    print(rpn_stage4[1].shape)
    # x_class的shape是(?,sharelayer的w/2,sharelayer的h/2,scale数*ratio数),x_regr的shape是(?,sharelayer的w/2,sharelayer的h/2,4*scale数*ratio数)

    rpn = nn.rpn(shared_layer, num_anchors)
    print(rpn[1].shape)

    # 在这里合并两个rpn分支

    classifier = nn.classifier(shared_layer,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(
        inputs=img_input, outputs=rpn[:2]
    )  #rpn网络由keras_frcnn/resnet定义。rpn[:2]的前两个元素分别表示rpn网络的分类输出和回归输出
    model_classifier = Model(
        inputs=[img_input, roi_input],
        outputs=classifier)  #Keras的函数式模型为Model,即广义的拥有输入和输出的模型

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model(inputs=[img_input, roi_input],
                      outputs=rpn[:2] +
                      classifier)  #rpn[:2]+classifier的含义是??????

    try:
        # 尝试载入已训练网络权值
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder '
            'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)  # 定义一个Adam求解器,学习率lr
    optimizer_classifier = Adam(lr=1e-5)  # 定义一个Adam求解器,学习率lr
    # num_anchors等于9
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')  #mae表示绝对值均差

    epoch_length = cfg.epoch_length  # epoch_length是一个周期的迭代次数(也等于训练数据量)。每迭代epoch_length次就检查一次是否要保存网络权值,然后重置iter_num = 0
    num_epochs = int(cfg.num_epochs)
    iter_num = 0  # 迭代次数的初值

    losses = np.zeros((epoch_length, 5))  # 初始化loss数组,记录每个周期的loss
    rpn_accuracy_rpn_monitor = []  # 初始化一个数组,记录rpn的训练过程中的精度变化
    rpn_accuracy_for_epoch = []  # 初始化一个数组,记录rpn的每个训练周期的的精度变化
    start_time = time.time()  # 开始训练的时间

    best_loss = np.Inf  # 训练以来最小的loss

    class_mapping_inv = {v: k
                         for k, v in class_mapping.items()
                         }  # class_mapping_inv是一个字典,key是目标类别编号,value是类别名称
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)  # 生成一个进度条对象
        print('Epoch {}/{}'.format(epoch_num + 1,
                                   num_epochs))  # 输出当前训练周期数/总周期数

        while True:  # 什么时候才结束这个循环?答:第247行的break(每迭代epoch_length次)
            try:

                if len(
                        rpn_accuracy_rpn_monitor
                ) == epoch_length and cfg.verbose:  # 每epoch_length次训练周期就在窗口显示一次RPN平均精度
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )

                # X是原图,如kitti尺寸是(1,600,1987,3)。
                # Y是label,是有两个元素的list,
                #   其中第一个元素是类别,具体:shape是(1,share_layer_h,share_layer_w,2*scale数*ratio数),前一个元素为1(0)则表示是(不是)正或负样本,后一个为1(0)则表示是(不是)正样本
                #   第二个元素是bbox,具体:shape是(1,share_layer_h,share_layer_w,8*scale数*ratio数),前四个元素表示是不是正样,后四个元素才是bbox#为什么来个repeat赋值给前面一半
                # img_data是字典,包含文件名、尺寸、人工标记的roi和类别等
                #
                X, Y, img_data = next(data_gen_train)
                #Y_1=Y[0]
                #Y_1=Y_1[0,:,:,:]

                loss_rpn = model_rpn.train_on_batch(
                    X, Y)  #为什么Y的尺寸与P_rpn的尺寸不同?为什么loss_rpn的尺寸是3,含义是什么,在哪里定义的?

                P_rpn = model_rpn.predict_on_batch(
                    X)  #P_rpn的尺寸是(1, 124, 38, 9) (1, 124, 38, 36)

                result = roi_helpers.rpn_to_roi(
                    P_rpn[0],
                    P_rpn[1],
                    cfg,
                    K.image_dim_ordering(),
                    use_regr=True,
                    overlap_thresh=0.7,
                    max_boxes=300)  #result的尺寸是300*4
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                # X2的尺寸是100*4,Y1的尺寸是1*100*8(8=训练集中目标类别总数),IouS尺寸是100
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping
                )  #Y2的尺寸是1*1*56,56=28*2,(28=4*7)前28是coords,后28是labels(是该类别则标1)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(
                    Y1[0, :, -1] == 1)  #Y1的尺寸是1*1*8表示分类预测结果,最后一个元素为1表示是背景
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]
                     ])  #用rpn输出的roi输入给classifier

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:  # 每迭代epoch_length次就检查一次是否要保存网络权值,然后重置iter_num = 0
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Ejemplo n.º 10
0
# pass the settings from the command line, and persist them in the config object
C = config.Config()

# set data augmentation
C.model_path = options.output_weight_path
C.num_rois = int(options.num_rois)

# we will use resnet. may change to vgg
if options.network == 'resnet50':
	from keras_frcnn import resnet as nn
	C.network = 'resnet50'
else:
	print('Not a valid model')
	raise ValueError

all_imgs, classes_count, _ = get_data(options.train_path)

# add background class as 21st class
if 'bg' not in classes_count:
	classes_count['bg'] = 0

print('Training images per class:')
pprint.pprint(classes_count)
print('Num classes (including bg) = {}'.format(len(classes_count)))

with open(config_output_filename, 'wb') as config_f:
	pickle.dump(C,config_f)
	print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(config_output_filename))

random.shuffle(all_imgs)
num_imgs = len(all_imgs)
Ejemplo n.º 11
0
def measure_map(config_output_filename, real_model_path):
    with open(config_output_filename, 'r') as f_in:
        C = pickle.load(f_in)

    # img_path = options.test_path
    img_path = '/home/comp/e4252392/map4frcnn.txt'

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.iteritems()}
    print(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }
    # C.num_rois = int(options.num_rois)
    C.num_rois = 32

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
        input_shape_features = (1024, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, 1024)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    model_rpn.load_weights(real_model_path, by_name=True)
    model_classifier.load_weights(real_model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs, _, _ = get_data(img_path)
    # test_imgs = [s for s in all_imgs if s['imageset'] == 'test']
    test_imgs = [s for s in all_imgs]

    T = {}
    P = {}
    print('Calculating mAP')
    st = time.time()
    for idx, img_data in enumerate(test_imgs):
        # print('{}/{}'.format(idx,len(test_imgs)))
        # st = time.time()
        filepath = img_data['filepath']

        img = cv2.imread(filepath)

        X, fx, fy = format_img(img, C)

        if K.image_dim_ordering() == 'tf':
            X = np.transpose(X, (0, 2, 3, 1))

        # get the feature maps and output from the RPN
        [Y1, Y2, F] = model_rpn.predict(X)

        R = roi_helpers.rpn_to_roi(Y1,
                                   Y2,
                                   C,
                                   K.image_dim_ordering(),
                                   overlap_thresh=0.7)

        # convert from (x1,y1,x2,y2) to (x,y,w,h)
        R[:, 2] -= R[:, 0]
        R[:, 3] -= R[:, 1]

        # apply the spatial pyramid pooling to the proposed regions
        bboxes = {}
        probs = {}

        for jk in range(R.shape[0] // C.num_rois + 1):
            ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :],
                                  axis=0)
            if ROIs.shape[1] == 0:
                break

            if jk == R.shape[0] // C.num_rois:
                # pad R
                curr_shape = ROIs.shape
                target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                ROIs_padded[:, :curr_shape[1], :] = ROIs
                ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                ROIs = ROIs_padded

            [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

            for ii in range(P_cls.shape[1]):

                if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                    continue

                cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                if cls_name not in bboxes:
                    bboxes[cls_name] = []
                    probs[cls_name] = []

                (x, y, w, h) = ROIs[0, ii, :]

                cls_num = np.argmax(P_cls[0, ii, :])
                try:
                    (tx, ty, tw, th) = P_regr[0, ii,
                                              4 * cls_num:4 * (cls_num + 1)]
                    tx /= C.classifier_regr_std[0]
                    ty /= C.classifier_regr_std[1]
                    tw /= C.classifier_regr_std[2]
                    th /= C.classifier_regr_std[3]
                    x, y, w, h = roi_helpers.apply_regr(
                        x, y, w, h, tx, ty, tw, th)
                except:
                    pass
                bboxes[cls_name].append(
                    [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
                probs[cls_name].append(np.max(P_cls[0, ii, :]))

        all_dets = []
        album_ap = 0.0
        logo_ap = 0.0
        for key in bboxes:
            bbox = np.array(bboxes[key])

            new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                bbox, np.array(probs[key]), overlap_thresh=0.5)
            for jk in range(new_boxes.shape[0]):
                (x1, y1, x2, y2) = new_boxes[jk, :]
                det = {
                    'x1': x1,
                    'x2': x2,
                    'y1': y1,
                    'y2': y2,
                    'class': key,
                    'prob': new_probs[jk]
                }
                all_dets.append(det)

        # print('Elapsed time = {}'.format(time.time() - st))
        t, p = get_map(all_dets, img_data['bboxes'], (fx, fy))
        for key in t.keys():
            if key not in T:
                T[key] = []
                P[key] = []
            T[key].extend(t[key])
            P[key].extend(p[key])
        all_aps = []
        for key in T.keys():
            ap = average_precision_score(T[key], P[key])
            # print('{} AP: {}'.format(key, ap))
            all_aps.append(ap)

            if idx == len(test_imgs) - 1:
                if key == 'album':
                    album_ap = ap
                if key == 'logo':
                    logo_ap = ap

        # print('mAP = {}'.format(np.mean(np.array(all_aps))))
        if idx == len(test_imgs) - 1:
            mAP = np.mean(np.array(all_aps))
            print('Elapsed time = {}'.format(time.time() - st))
            print('album ap = {}'.format(album_ap))
            print('logo ap = {}'.format(logo_ap))
            print('mAP = {}'.format(mAP))

    return [album_ap, logo_ap, mAP]
                           C.num_rois,
                           nb_classes=len(class_mapping))

model_rpn = Model(img_input, rpn_layers)
model_classifier = Model([feature_map_input, roi_input], classifier)

# model loading
print('Loading weights from {}'.format(options.load))
model_rpn.load_weights(options.load, by_name=True)
model_classifier.load_weights(options.load, by_name=True)

model_rpn.compile(optimizer='adam', loss='mse')
model_classifier.compile(optimizer='adam', loss='mse')

from keras_frcnn.simple_parser import get_data
all_imgs, classes_count, class_mapping_2 = get_data(options.test_path,
                                                    test_only=True)

print(f'{len(all_imgs)} images to test.')

with open('log/frcnn/frcnn_results.csv', 'w') as result_csv:
    log_writer = csv.writer(result_csv, delimiter=';')
    log_writer.writerow([
        'iou_threshold', 'precision', 'recall', 'f1_score', 'avg_precision',
        'mean_distance', 'precision_b_small', 'precision_b_medium',
        'precision_b_large', 'recall_b_small', 'recall_b_medium',
        'recall_b_large', 'mean_distance_b_small', 'mean_distance_b_medium',
        'mean_distance_b_high'
    ])

for iou_threshold_tp in [0.1, 0.2, 0.3, 0.4]:
    # Confusion Matrix initialization
Ejemplo n.º 13
0
def testModel(config_filename='config_ui.pickle'):

    st.markdown('## Starting validation of test data set')
    sys.setrecursionlimit(40000)

    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    config.log_device_placement = True
    sess = tf.compat.v1.Session(config=config)
    K.set_session(sess)

    test_path = 'test'
    num_rois = 4
    config_filename = config_filename
    network = 'resnet50'

    config_output_filename = config_filename

    with open(config_output_filename, 'rb') as f_in:
        C = pickle.load(f_in)

    if C.network == 'resnet50':
        import keras_frcnn.resnet as nn
    elif C.network == 'vgg':
        import keras_frcnn.vgg as nn

    # turn off any data augmentation at test time
    C.use_horizontal_flips = False
    C.use_vertical_flips = False
    C.rot_90 = False

    img_path = test_path

    class_mapping = C.class_mapping

    if 'bg' not in class_mapping:
        class_mapping['bg'] = len(class_mapping)

    class_mapping = {v: k for k, v in class_mapping.items()}
    st.write('Class Mapping', class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }

    C.num_rois = int(num_rois)

    if C.network == 'resnet50':
        num_features = 1024
    elif C.network == 'vgg':
        num_features = 512

    if K.image_data_format() == 'channels_first':
        input_shape_img = (3, None, None)
        input_shape_features = (num_features, None, None)
    else:
        input_shape_img = (None, None, 3)
        input_shape_features = (None, None, num_features)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(C.num_rois, 4))
    feature_map_input = Input(shape=input_shape_features)

    ## Defining Model

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn_layers = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(feature_map_input,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)

    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    st.write(f'Loading weights from {C.model_path}')
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []

    classes = {}

    ## few hyper parameters to select bbox
    bbox_threshold = 0.9

    visualise = True

    set_Overlap_threshold = 0.1
    progress_bar = st.progress(0.0)
    with st.spinner('Wait for sample test images...'):

        for idx, img_name in enumerate(sorted(os.listdir(img_path))):
            progress_bar.progress((idx + 0.1) / len(os.listdir(img_path)))
            if not img_name.lower().endswith(
                ('.bmp', '.jpeg', '.jpg', '.png', '.tif', '.tiff')):
                continue
            st.write(img_name)
            stTime = time.time()
            filepath = os.path.join(img_path, img_name)

            img = cv2.imread(filepath)

            X, ratio = format_img(img, C)

            if K.image_data_format() == 'channels_last':
                X = np.transpose(X, (0, 2, 3, 1))

            # get the feature maps and output from the RPN
            [Y1, Y2, F] = model_rpn.predict(X)

            R = roi_helpers.rpn_to_roi(Y1,
                                       Y2,
                                       C,
                                       K.image_data_format(),
                                       overlap_thresh=0.7)  #0.7

            # convert from (x1,y1,x2,y2) to (x,y,w,h)
            R[:, 2] -= R[:, 0]
            R[:, 3] -= R[:, 1]

            # apply the spatial pyramid pooling to the proposed regions
            bboxes = {}
            probs = {}

            for jk in range(R.shape[0] // C.num_rois + 1):
                ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                        (jk + 1), :],
                                      axis=0)
                if ROIs.shape[1] == 0:
                    print("ROI Shape: ", ROIs.shape[1])
                    break

                if jk == R.shape[0] // C.num_rois:
                    #pad R
                    curr_shape = ROIs.shape
                    target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                    ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                    ROIs_padded[:, :curr_shape[1], :] = ROIs
                    ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                    ROIs = ROIs_padded

                [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

                for ii in range(P_cls.shape[1]):
                    #print("np max:",np.max(P_cls[0, ii, :]))
                    #print("np argmax:",np.argmax(P_cls[0, ii, :]))
                    #print(np.max(P_cls[0, ii, :]) < bbox_threshold)
                    if np.max(P_cls[0, ii, :]) < bbox_threshold or np.argmax(
                            P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                        continue

                    cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]
                    #print('class name:',cls_name)
                    if cls_name not in bboxes:
                        bboxes[cls_name] = []
                        probs[cls_name] = []

                    (x, y, w, h) = ROIs[0, ii, :]

                    cls_num = np.argmax(P_cls[0, ii, :])
                    try:
                        (tx, ty, tw,
                         th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                        tx /= C.classifier_regr_std[0]
                        ty /= C.classifier_regr_std[1]
                        tw /= C.classifier_regr_std[2]
                        th /= C.classifier_regr_std[3]
                        x, y, w, h = roi_helpers.apply_regr(
                            x, y, w, h, tx, ty, tw, th)
                    except:
                        pass
                    bboxes[cls_name].append([
                        C.rpn_stride * x, C.rpn_stride * y,
                        C.rpn_stride * (x + w), C.rpn_stride * (y + h)
                    ])
                    probs[cls_name].append(np.max(P_cls[0, ii, :]))

            all_dets = []

            for key in bboxes:
                bbox = np.array(bboxes[key])

                new_boxes, new_probs = roi_helpers.non_max_suppression_fast(
                    bbox,
                    np.array(probs[key]),
                    overlap_thresh=set_Overlap_threshold)
                for jk in range(new_boxes.shape[0]):
                    (x1, y1, x2, y2) = new_boxes[jk, :]

                    (real_x1, real_y1, real_x2,
                     real_y2) = get_real_coordinates(ratio, x1, y1, x2, y2)

                    cv2.rectangle(img, (real_x1, real_y1), (real_x2, real_y2),
                                  (int(class_to_color[key][0]),
                                   int(class_to_color[key][1]),
                                   int(class_to_color[key][2])), 2)

                    textLabel = f'{key}: {int(100*new_probs[jk])}'
                    all_dets.append((key, 100 * new_probs[jk]))

                    (retval,
                     baseLine) = cv2.getTextSize(textLabel,
                                                 cv2.FONT_HERSHEY_COMPLEX, 1,
                                                 1)
                    textOrg = (real_x1, real_y1 - 0)

                    cv2.rectangle(img,
                                  (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] - 5), (0, 0, 0), 2)
                    cv2.rectangle(img,
                                  (textOrg[0] - 5, textOrg[1] + baseLine - 5),
                                  (textOrg[0] + retval[0] + 5,
                                   textOrg[1] - retval[1] - 5),
                                  (255, 255, 255), -1)
                    cv2.putText(img, textLabel, textOrg,
                                cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 0), 1)

            st.write(f'Elapsed time = {time.time() - stTime}')
            st.write(all_dets)
            plt.figure(figsize=(10, 10))
            plt.grid()
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            #plt.show()
            st.image(img, use_column_width=True, clamp=True)

            #cv2.imwrite('./results_imgs-fp-mappen-test/{}.png'.format(os.path.splitext(str(img_name))[0]),img)

    from keras_frcnn.simple_parser import get_data
    test_path = 'test_annotationAlt.txt'  # Test data (annotation file)

    startTime = time.time()
    test_imgs, classes_count, class_mapping = get_data(test_path)

    st.write('Spend %0.2f mins to load test data' %
             ((time.time() - startTime) / 60))

    class_mapping = C.class_mapping
    class_mapping = {v: k for k, v in class_mapping.items()}
    st.write(class_mapping)
    class_to_color = {
        class_mapping[v]: np.random.randint(0, 255, 3)
        for v in class_mapping
    }

    from sklearn.metrics import average_precision_score
    set_Overlap_threshold = 0.1

    T = {}
    P = {}
    mAPs = []
    iou_map = []

    progress_bar1 = st.progress(0.0)
    with st.spinner('Wait for test set evaluation...'):

        for idx, img_data in enumerate(test_imgs):
            progress_bar1.progress((idx + 0.1) / len(test_imgs))
            st.write('{}/{}'.format(idx, len(test_imgs)))
            startTime = time.time()
            filepath = img_data['filepath']

            img = cv2.imread(filepath)

            X, fx, fy = format_img_map(img, C)

            # Change X (img) shape from (1, channel, height, width) to (1, height, width, channel)
            X = np.transpose(X, (0, 2, 3, 1))

            # get the feature maps and output from the RPN
            [Y1, Y2, F] = model_rpn.predict(X)

            R = rpn_to_roi(Y1,
                           Y2,
                           C,
                           K.image_data_format(),
                           overlap_thresh=0.7)

            # convert from (x1,y1,x2,y2) to (x,y,w,h)
            R[:, 2] -= R[:, 0]
            R[:, 3] -= R[:, 1]

            # apply the spatial pyramid pooling to the proposed regions
            bboxes = {}
            probs = {}

            for jk in range(R.shape[0] // C.num_rois + 1):
                ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois *
                                        (jk + 1), :],
                                      axis=0)
                if ROIs.shape[1] == 0:
                    break

                if jk == R.shape[0] // C.num_rois:
                    # pad R
                    curr_shape = ROIs.shape
                    target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
                    ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
                    ROIs_padded[:, :curr_shape[1], :] = ROIs
                    ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
                    ROIs = ROIs_padded

                [P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

                # Calculate all classes' bboxes coordinates on resized image (300, 400)
                # Drop 'bg' classes bboxes
                for ii in range(P_cls.shape[1]):

                    # If class name is 'bg', continue

                    if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
                        continue

                    # Get class name
                    cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

                    if cls_name not in bboxes:
                        bboxes[cls_name] = []
                        probs[cls_name] = []

                    (x, y, w, h) = ROIs[0, ii, :]

                    cls_num = np.argmax(P_cls[0, ii, :])
                    try:
                        (tx, ty, tw,
                         th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
                        tx /= C.classifier_regr_std[0]
                        ty /= C.classifier_regr_std[1]
                        tw /= C.classifier_regr_std[2]
                        th /= C.classifier_regr_std[3]
                        x, y, w, h = roi_helpers.apply_regr(
                            x, y, w, h, tx, ty, tw, th)
                    except:
                        pass
                    bboxes[cls_name].append(
                        [16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
                    probs[cls_name].append(np.max(P_cls[0, ii, :]))

            all_dets = []

            for key in bboxes:
                bbox = np.array(bboxes[key])

                # Apply non-max-suppression on final bboxes to get the output bounding boxe
                new_boxes, new_probs = non_max_suppression_fast(
                    bbox,
                    np.array(probs[key]),
                    overlap_thresh=set_Overlap_threshold)
                for jk in range(new_boxes.shape[0]):
                    (x1, y1, x2, y2) = new_boxes[jk, :]
                    det = {
                        'x1': x1,
                        'x2': x2,
                        'y1': y1,
                        'y2': y2,
                        'class': key,
                        'prob': new_probs[jk]
                    }
                    all_dets.append(det)

            st.write('Elapsed time = {}'.format(time.time() - startTime))
            t, p, iouVal = get_map(all_dets, img_data['bboxes'], (fx, fy))
            for key in t.keys():
                if key not in T:
                    T[key] = []
                    P[key] = []
                T[key].extend(t[key])
                P[key].extend(p[key])
            all_aps = []
            for key in T.keys():
                ap = average_precision_score(T[key], P[key])
                st.write('{} AP: {}'.format(key, ap))
                all_aps.append(ap)
            st.write('mAP = {}'.format(np.mean(np.array(all_aps))))
            st.write('iou = {}'.format(np.mean(iouVal)))
            mAPs.append(np.mean(np.array(all_aps)))
            iou_map.append(iouVal)

    st.markdown('## Mean IOU:', Average(iou_map))
    st.markdown('## Mean average precision:', np.mean(np.array(mAPs)))
Ejemplo n.º 14
0
def train_mscoco():
    # ===========================模型的配置和加载======================================
    # config for data argument
    cfg = config.Config()
    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    #resnet前四卷积部分的权值
    cfg.base_net_weights = nn.get_weight_path()
    #保存模型的权重值
    cfg.model_path = './model/mscoco_frcnn.hdf5'
    #all_images, class_mapping = get_data()
    #加载训练的图片
    train_imgs, class_mapping = get_data('train')

    cfg.class_mapping = class_mapping
    print('Num classes (including bg) = {}'.format(len(class_mapping)))
    #保存所有的配置文件
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))
    #图片随机洗牌
    random.shuffle(train_imgs)
    print('Num train samples {}'.format(len(train_imgs)))
    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   class_mapping,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    # ==============================================================================

    # ===============================模型的定义======================================
    #keras内核为tensorflow
    input_shape_img = (None, None, 3)
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))
    # define the base resnet50 network
    shared_layers = nn.nn_base(img_input, trainable=False)
    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)
    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(class_mapping),
                               trainable=True)
    #model(input=,output=)
    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)
    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)
    # ==============================================================================

    # ===========================基本模型加载ImageNet权值=============================
    try:
        print('loading base model weights from {}'.format(
            cfg.base_net_weights))
        model_rpn.load_weights(cfg.base_net_weights, by_name=True)
        model_classifier.load_weights(cfg.base_net_weights, by_name=True)
    except Exception as e:
        print('基本模型加载ImageNet权值: ', e)
        print('Could not load pretrained model weights on ImageNet.')
    # ==============================================================================

    # ===============================模型优化========================================
    #在调用model.compile()之前初始化一个优化器对象,然后传入该函数
    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(class_mapping) - 1)
        ],
        metrics={'dense_class_{}'.format(len(class_mapping)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')
    # ==============================================================================

    # ================================训练、输出设置==================================
    epoch_length = len(train_imgs)
    num_epochs = int(cfg.num_epochs)
    iter_num = 0
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()
    best_loss = np.Inf

    logger = Logger(os.path.join('.', 'log.txt'))
    # ==============================================================================

    print('Starting training')
    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        logger.write('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:
                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )
                #图片,标准的cls、rgr,盒子数据
                X, Y, img_data = next(data_gen_train)

                #训练rpn
                loss_rpn = model_rpn.train_on_batch(X, Y)

                #边训练rpn得到的区域送入roi
                #x_class, x_regr, base_layers
                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                #区域、cls、rgr、iou
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                #训练classifier
                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        logger.write(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        logger.write(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        logger.write(
                            'Loss RPN classifier: {}'.format(loss_rpn_cls))
                        logger.write(
                            'Loss RPN regression: {}'.format(loss_rpn_regr))
                        logger.write('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        logger.write('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        logger.write('Elapsed time: {}'.format(time.time() -
                                                               start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            logger.write(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)
                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Ejemplo n.º 15
0
# model loading
C.model_path = options.load
print('Loading weights from {}'.format(C.model_path))
model_rpn.load_weights(C.model_path, by_name=True)

optimizer = Adam(lr=1e-3, clipnorm=0.001)
model_rpn.compile(optimizer=optimizer,
                  loss=[
                      loss_func.rpn_loss_cls(num_anchors),
                      loss_func.rpn_loss_regr(num_anchors)
                  ])

#### load images here ####
from keras_frcnn.simple_parser import get_data
all_imgs, classes_count, class_mapping = get_data(options.test_path,
                                                  test_only=False)

train_imgs = [s for s in all_imgs if s['imageset'] == 'train']
data_gen_val = data_generators.get_anchor_gt(train_imgs,
                                             classes_count,
                                             C,
                                             nn.get_img_output_length,
                                             K.common.image_dim_ordering(),
                                             mode='val')

with open('log/losses.csv', 'w') as log:
    log_writer = csv.writer(log, delimiter=';')
    log_writer.writerow(['loss', 'loss_rpn_cls', 'loss_rpn_regr', 'img'])
    for img in train_imgs:
        X_gen, Y_gen, imgdata = next(data_gen_val)
Ejemplo n.º 16
0
def train_net():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = False
    cfg.use_vertical_flips = False
    cfg.rot_90 = False
    cfg.num_rois = 32  # config中设置的是4
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = 'samples.hdf5'

    cfg.simple_label_file = 'annotations_train.txt' # 训练集产生的标签

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(
            cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    # there图片
    data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, cfg, nn.get_img_output_length,
                                                   K.image_dim_ordering(), mode='train')

    data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, cfg, nn.get_img_output_length,
                                                 K.image_dim_ordering(), mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)
    # classifier是什么?
    # classes_count {} 每一个类的数量:{'cow': 4, 'dog': 10, ...}
    # C.num_rois每次取的感兴趣区域,默认为32
    # roi_input = Input(shape=(None, 4)) 框框
    # classifier是faster rcnn的两个损失函数[out_class, out_reg]
    # shared_layers是vgg的输出feature map
    classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=len(classes_count), trainable=True)
    # 定义model_rpn
    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print('Could not load pretrained model weights. Weights can be found in the keras application folder '
              'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)])
    model_classifier.compile(optimizer=optimizer_classifier,
                             loss=[losses_fn.class_loss_cls, losses_fn.class_loss_regr(len(classes_count) - 1)],
                             metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 10
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:
                # 用来监督每一次epoch的平均正回归框的个数
                if len(rpn_accuracy_rpn_monitor) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(
                            mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        # 每次都框不到正样本,说明rpn有问题
                        print('RPN is not producing bounding boxes that overlap'
                              ' the ground truth boxes. Check RPN settings or keep training.')

                # 迭代器,取数据
                # 训练rpn网络,X是图片,Y是对应类别和回归梯度(不是所有的点都参加训练,符合条件才参加训练)
                # next(data_gen_train)是一个迭代器。
                # 返回的是 np.copy(x_img), [np.copy(y_rpn_cls), np.copy(y_rpn_regr)],
                # img_data_aug(我们这里假设数据没有进行水平翻转等操作。那么,x_img = img_data_aug),
                # y_rpn_cls和y_rpn_regr是RPN的两个损失函数。
                X, Y, img_data = next(data_gen_train)


                # classifer和rpn网络交叉训练
                loss_rpn = model_rpn.train_on_batch(X, Y)
                P_rpn = model_rpn.predict_on_batch(X)

                # result是得到的预选框
                # 得到了region proposals,接下来另一个重要的思想就是ROI pooling,
                # 可将不同shape的特征图转化为固定shape,送到全连接层进行最终的预测。
                # rpn_to_roi接收的是每张图片的预测输出,返回的R = [boxes, probs]
                # ---------------------
                result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, K.image_dim_ordering(), use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)

                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                # Y1根据预选框,得到这个预选框属于哪一类,
                # Y2这个类相应的回归梯度
                # X2是返回这个框
                """
                # 通过calc_iou()找出剩下的不多的region对应ground truth里重合度最高的bbox,从而获得model_classifier的数据和标签。
                # X2保留所有的背景和match bbox的框; Y1 是类别one-hot转码; Y2是对应类别的标签及回归要学习的坐标位置; IouS是debug用的。
                """
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, class_mapping)

                if X2 is None:
                    # 如果没有有效的预选框则结束本次循环
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                # 因为是one—hot,最后一位是1,则代表是背景
                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0] # 将其变为1维的数组
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    # 选择num_rois个数的框,送入classifier网络进行训练。 分类网络一次要训练多少个框
                    # 思路:当num_rois大于1的时候正负样本尽量取到一半,小于1的时候正负样本随机取一个。
                    if len(pos_samples) < cfg.num_rois // 2:
                        # 挑选正样本
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(pos_samples, cfg.num_rois // 2, replace=False).tolist()
                    try:
                        # 挑选负样本
                        selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples),
                                                                replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples),
                                                                replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                # 训练classifier网络
                # 是从位置中挑选,
                loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]],
                                                             [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                #
                losses[iter_num, 0] = loss_rpn[1] # rpn_cls平均值
                losses[iter_num, 1] = loss_rpn[2] # rpn_regr平均值

                losses[iter_num, 2] = loss_class[1] # detector_cls平均值
                losses[iter_num, 3] = loss_class[2] # detector_regr平均值
                losses[iter_num, 4] = loss_class[3] # 4是准确率

                iter_num += 1

                # 进度条更新
                progbar.update(iter_num,
                               [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                ('detector_cls', np.mean(losses[:iter_num, 2])),
                                ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])  # loss中存放了每一次训练出的losses
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        # 打印出前n次loss的平均值
                        print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(
                            mean_overlapping_bboxes))
                        print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(loss_class_cls))
                        print('Loss Detector regression: {}'.format(loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() - start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        # 当结束一轮的epoch时,只有当这轮epoch的loss小于最优的时候才会存储这轮的训练数据,
                        # 并结束这轮epoch进入下一轮epoch。
                        if cfg.verbose:
                            print('Total loss decreased from {} to {}, saving weights'.format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Ejemplo n.º 17
0
    model_rpn = Model(img_input, rpn_layers)
    model_classifier_only = Model([feature_map_input, roi_input], classifier)

    model_classifier = Model([feature_map_input, roi_input], classifier)

    print('Loading weights from {}'.format(
        C.model_path))  # model_path specified in config file
    model_rpn.load_weights(C.model_path, by_name=True)
    model_classifier.load_weights(C.model_path, by_name=True)

    model_rpn.compile(optimizer='sgd', loss='mse')
    model_classifier.compile(optimizer='sgd', loss='mse')

    all_imgs = []
    classes = {}
    all_imgs, _, _ = get_data(options.test_path)
    #test_imgs = [s for s in all_imgs if s['imageset'] == 'train']      # mAP
    print("DEBUGGING 218: test imgs: ", len(test_imgs))
    classification_threshold = 0.8  # threshold above which we classify as positive

    # for mAP
    T, P = {}, {}

    counter = 0
    for idx, img_data in enumerate(test_imgs):
        print('{}/{}'.format(idx, len(test_imgs)))
        img_name = img_data['filepath'].split('/')[-1]
        print("DEBUGGING 232 img_name:", img_name)
        print("img {}: {}".format(str(counter), img_name))
        counter += 1
        start_time = time.time()
Ejemplo n.º 18
0
def build_and_train(hype_space, save_best_weights=False):
    train_path = '/home/comp/e4252392/retraindata4frcnn.txt'
    config_output_filename = '/home/comp/e4252392/hyperopt/hyperopt_config.pickle'
    num_epochs = 20
    #for retrain best model only
    diagnose_path = '/home/comp/e4252392/hyperopt/models/hyperopt_loss_ap_plt.npy'
    real_model_path = '/home/comp/e4252392/hyperopt/models/hyperopt_model_plt_'

    print("Hyperspace:")
    print(hype_space)
    C = config.Config()
    C.num_rois = int(hype_space['num_rois'])  #why int?
    # C.anchor_box_scales = hype_space['anchor_box_scales']
    # C.base_net_weights = '/home/comp/e4252392/second_res_more_epoch.h5'
    C.base_net_weights = 'model_frcnn.hdf5'

    #data
    all_imgs, classes_count, class_mapping = get_data(train_path)
    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)
    C.class_mapping = class_mapping

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))

    with open(config_output_filename, 'wb') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_output_filename))

    random.shuffle(all_imgs)
    num_imgs = len(all_imgs)
    train_imgs = [s for s in all_imgs]
    print('Num train samples {}'.format(len(train_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   C,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    #data

    # build_model
    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))
    shared_layers = nn.nn_base(int(hype_space['kernel_size']),
                               img_input,
                               trainable=True)

    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(int(hype_space['kernel_size']), shared_layers, num_anchors)

    classifier = nn.classifier(int(hype_space['kernel_size']),
                               shared_layers,
                               roi_input,
                               C.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(C.base_net_weights))
        model_rpn.load_weights(C.base_net_weights, by_name=True)
        model_classifier.load_weights(C.base_net_weights, by_name=True)
    except:
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder \
			https://github.com/fchollet/keras/tree/master/keras/applications')

    # optimizer = Adam(lr=1e-5)
    # optimizer_classifier = Adam(lr=1e-5)
    optimizer = Adam(lr=hype_space['optimizer_lr'],
                     decay=hype_space['optimizer_decay'])
    optimizer_classifier = Adam(lr=hype_space['optimizer_lr'],
                                decay=hype_space['optimizer_decay'])
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          thelosses.rpn_loss_cls(num_anchors),
                          thelosses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            thelosses.class_loss_cls,
            thelosses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    sgd = SGD(lr=hype_space['sgd_lr'], decay=hype_space['sgd_decay'])
    model_all.compile(optimizer=sgd, loss='mae')
    # build_model

    #build_and_train
    epoch_length = 10
    iter_num = 0
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()
    best_loss = np.Inf
    print('Starting training')

    loss_array = []
    ap_array = []
    epoch_array = []
    epoch_array.append(0)

    result = {}
    model_name = ''

    for epoch_num in range(num_epochs):
        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                # train
                X, Y, img_data = next(data_gen_train)
                loss_rpn = model_rpn.train_on_batch(X, Y)
                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           K.image_dim_ordering(),
                                           use_regr=True,
                                           overlap_thresh=0.7,
                                           max_boxes=300)
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, class_mapping)
                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue
                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)
                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []
                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []
                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])
                # train

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]
                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1
                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    # result
                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss

                        if save_best_weights:
                            real_model_path = real_model_path + str(
                                epoch_num + 1) + '.hdf5'
                            model_all.save_weights(real_model_path,
                                                   overwrite=True)
                            print("Best weights so far saved to " +
                                  real_model_path + ". best_loss = " +
                                  str(best_loss))
                            epoch_array.append(epoch_num + 1)
                            loss_array.append([
                                loss_rpn_cls, loss_rpn_regr, loss_class_cls,
                                loss_class_regr, best_loss
                            ])
                            album_ap, logo_ap, mAP = measure_map.measure_map(
                                config_output_filename, real_model_path)
                            ap_array.append([album_ap, logo_ap, mAP])
                            np.save(diagnose_path,
                                    [epoch_array, loss_array, ap_array])
                        else:
                            album_ap = 'not applicable'
                            logo_ap = 'not applicable'
                            mAP = 'not applicable'
                        model_name = "model_{}_{}".format(
                            str(best_loss),
                            str(uuid.uuid4())[:5])
                        result = {
                            'loss': best_loss,
                            'loss_rpn_cls': loss_rpn_cls,
                            'loss_rpn_regr': loss_rpn_regr,
                            'loss_class_cls': loss_class_cls,
                            'loss_class_regr': loss_class_regr,
                            'album_ap': album_ap,
                            'logo_ap': logo_ap,
                            'mAP': mAP,
                            'model_name': model_name,
                            'space': hype_space,
                            'status': STATUS_OK
                        }
                        print("RESULT UPDATED.")
                        print("Model name: {}".format(model_name))
                    # result
                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                continue

    print('Training complete, exiting.')
    print("BEST MODEL: {}".format(model_name))
    print("FINAL RESULT:")
    print_json(result)
    save_json_result(model_name, result)
    try:
        K.clear_session()
        del model_all, model_rpn, model_classifier
    except Exception as err:
        try:
            K.clear_session()
        except:
            pass
        err_str = str(err)
        print(err_str)
        traceback_str = str(traceback.format_exc())
        print(traceback_str)
        return {
            'status': STATUS_FAIL,
            'err': err_str,
            'traceback': traceback_str
        }
    print("\n\n")
    return model_name, result
Ejemplo n.º 19
0
C = config.Config()
C.num_rois = 6

C.use_vertical_flips = True
C.use_horizontal_flips = True
C.scale_augment = True
C.rot_90 = True
C.balanced_classes = False

C2 = config.Config()
C2.num_rois = 6

#from keras_frcnn.pascal_voc_parser import get_data
from keras_frcnn.simple_parser import get_data

all_imgs, classes_count, class_mapping = get_data('C:/Fraps/im2txt.txt')

if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)

with open('classes.json', 'w') as class_data_json:
    json.dump(class_mapping, class_data_json)

inv_map = {v: k for k, v in class_mapping.items()}

#pprint.pprint(classes_count)
print(('Num classes (including bg) = {}'.format(len(classes_count))))
random.shuffle(all_imgs)

num_imgs = len(all_imgs)
Ejemplo n.º 20
0
def Train_frcnn(
        train_path='./data/flickr_logos_27_dataset_training_set_annotation.txt',  # path to the text file containing the data
        network_arch='vgg',  # the type of the base faster rcnn network architecture
        num_epochs=50,  # num of epochs
        output_weight_path='./models/model_frcnn.hdf5',  # path to save the model_all.weights as hdf5
        preprocessing_function=None,
        config_filename="config.pickle",
        input_weights_path='./models/vgg16_weights_tf_dim_ordering_tf_kernels.h5',
        train_rpn=True,
        train_final_classifier=True,
        train_base_nn=True,
        losses_to_watch=['rpn_cls', 'rpn_reg', 'final_cls', 'final_reg'],
        tb_log_dir="log",
        num_rois=32,
        horizontal_flips=False,
        vertical_flips=False,
        rot_90=False,
        anchor_box_scales=[128, 256, 512],
        anchor_box_ratios=[[1, 1], [1. / math.sqrt(2), 2. / math.sqrt(2)],
                           [2. / math.sqrt(2), 1. / math.sqrt(2)]],
        im_size=600,
        rpn_stride=16,  # depends on network architecture
        visualize_model=None,
        verify_trainable=True,
        optimizer_rpn=Adam(lr=1e-5),
        optimizer_classifier=Adam(lr=1e-5),
        validation_interval=3,
        rpn_min_overlap=0.3,
        rpn_max_overlap=0.7,
        classifier_min_overlap=0.1,
        classifier_max_overlap=0.5,
        rpn_nms_threshold=0.7,  # original implementation
        seed=5000):
    """
    Trains a Faster RCNN for object detection in keras
    
    NOTE: This trains 2 models namely model_rpn and model_classifer with the same shared base_nn (fixed feature extractor)
          
    Keyword Arguments
    train_path -- str: path to the text file or pascal_voc (no Default)
    network_arch --object: the full faster rcnn network .py file passed as an object (no default)
    num_epochs -- int: number of epochs to train (no Default)
    output_weight_path --str: path to save the frcnn weights (no Default)
    preprocessing_function --function: Optional preprocessing function (must be defined like given in keras docs) (Default None)
    config_filename --str: Path to save the config file. Used when testing (Default "config.pickle")
    input_weight_path --str: Path to hdf5 file containing weights for the model (Default None)
                             you can pass path to both classification and detection checkpoints as long as the names dont' change
    train_rpn --bool: whether to train the rpn layer (Default True)
    train_final_classifier --bool:Whether to train the final_classifier (Fast Rcnn layer) (Default True)
    train_base_nn --bool:Whether to train the base_nn/fixed_feature_extractor (Default True)
    losses_to_watch --list: A list of losses to watch (Default ['rpn_cls','rpn_reg','final_cls','final_reg']).
                            The losses in this list are added and then weights are saved wrt to that.
                            The list can contain any combination of the above 4 only.
    tb_log_dir --str: path to log dir for tensorboard logging (Default 'log')
    num_rois --int: The number of rois to use at once (Default = 32)
    horizontal_flips --bool: augment training data by horizontal flips (Default False)
    vertical_flips --bool: augment training data by vertical flips (Default False)
    rot_90 --bool: augment training data by 90 deg rotations (Default False)
    anchor_box_scales --list: The list of anchor box scales to use (Default [128,256,512])
    anchor_box ratios --list of list: The list of anchorbox aspect ratios to use (Default [[1, 1], [1./math.sqrt(2), 2./math.sqrt(2)], [2./math.sqrt(2), 1./math.sqrt(2)]])
    im_size --int: The size to resize the image (Default 600). This is the smallest side of Pascal VOC format
    rpn_stride --int: The stride for rpn (Default = 16)
    visualize_model --str: Path to save the model as .png file
    verify_trainable --bool: print layer wise names and prints if it is trainable or not (Default True)
    optimizer_rpn --keras.optimizer: The optimizer for rpn (Default Adam(lr=1e-5))
    optimizer_classifier --keras.optimizer: The optimizer for classifier (Default Adam(lr=1e-5))
    validation_interval --int: The frequency (in epochs) to do validation. supply 0 if no validation
    rpn_min_overlap --float: (0,1) The Min IOU in rpn layer (Default 0.3) (original implementation)
    rpn_max_overlap --float: (0,1) The max IOU in rpn layer (Default 0.7) (original implementation)
    classifier_min_overlap --float: (0,1) same as above but in final classifier (Default 0.1) (original implementation)
    classifier_max_overlap --float: (0,1) same as above (Default 0.5) (original implementation)
    rpn_nms_threshold --float :(0,1) The threshold above which to supress the bbox using Non max supression in rpn (Default 0.7)(from original implementation)
    seed --int: To seed the random shuffling of training data (Default = 5000)
    
    Performing alternating training:
    - Use the train_rpn,train_final_classifier and train_base_nn boolean arguments to accomplish
    alternating training.
    - While using the above arguments change the members of losses_to_watch = ['rpn_cls','rpn_reg','final_cls','final_reg']
      accordingly else it will throw error
    - for eg if you are training only the base_nn and the rpn set:
         train_rpn = True
         train_base_nn = True
         train_final_classifier = False
         losses_to_watch = ['rpn_cls','rpn_reg'] (do not include 'final_cls', 'final_reg')
    
    OUTPUT:
    prints the training log. Does not return anything
    
    Save details:
    1.saves the weights of the full FRCNN model as .h5
    2.saves a tensorboard file
    3.saves the history of weights saved in ./saving_log.txt so that it can be known at which epoch the model is saved
    4.saves the model configuration as a .pickle file
    5.optionally saves the full FRCNN architecture as .png
    
    NOTE: 
    as of now the batch size = 1
    Prints loss = 0 for losses from model which is not being trained
    
    TODO: The training is a bit slow because of the data generation step. Generate_data in multiple threads and queue them for faster training
    
    """
    check_list = ['rpn_cls', 'rpn_reg', 'final_cls', 'final_reg']
    for n in losses_to_watch:
        if n not in check_list:
            raise ValueError(
                "unsupported loss the supported losses are: {}".format(
                    check_list))

    if not train_rpn:
        if "rpn_cls" in losses_to_watch or "rpn_reg" in losses_to_watch:
            raise ValueError(
                "Cannot watch rpn_cls and rpn_reg when train_rpn == False")
    if not train_final_classifier:
        if "final_cls" in losses_to_watch or "final_reg" in losses_to_watch:
            raise ValueError(
                "cannot watch final_cls and final_reg when train_final_classifier == False"
            )

    if network_arch == 'vgg':
        from keras_frcnn import nn_arch_vgg16 as nn
    elif network_arch == 'resnet50':
        from keras_frcnn import nn_arch_resnet50 as nn
    else:
        print('Not a valid model')
        raise ValueError

    random.seed(seed)
    np.random.seed(seed)

    # pass the settings from the function call, and persist them in the config object
    C = config.Config()
    C.rpn_max_overlap = rpn_max_overlap
    C.rpn_min_overlap = rpn_min_overlap
    C.classifier_min_overlap = classifier_min_overlap
    C.classifier_max_overlap = classifier_max_overlap
    C.anchor_box_scales = anchor_box_scales
    C.anchor_box_ratios = anchor_box_ratios
    C.im_size = im_size
    C.use_horizontal_flips = bool(horizontal_flips)
    C.use_vertical_flips = bool(vertical_flips)
    C.rot_90 = bool(rot_90)
    C.rpn_stride = rpn_stride
    C.rpn_nms_threshold = rpn_nms_threshold
    C.weights_all_path = output_weight_path
    C.num_rois = int(num_rois)

    # check if weight path was passed via command line
    if input_weights_path:
        C.initial_weights = input_weights_path

    all_imgs, classes_count, class_mapping = get_data(train_path)

    print("The class mapping is:")
    print(class_mapping)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    C.class_mapping = class_mapping

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))

    with open(config_filename, 'wb') as config_f:
        pickle.dump(C, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(config_filename))

    np.random.shuffle(all_imgs)

    train_imgs = [s for s in all_imgs if s['imageset'] == 'train']
    val_imgs = [s for s in all_imgs if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    input_shape_img = (None, None, 3)
    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))
    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=train_base_nn)
    # define the RPN, built on the base layers
    num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors, trainable=train_rpn)
    # define the classifier, built on base layers
    classifier = nn.classifier(shared_layers,
                               roi_input,
                               C.num_rois,
                               len(classes_count),
                               trainable=train_final_classifier)
    # create models
    model_base = Model(img_input,
                       shared_layers)  # for computing the output shape
    model_rpn = Model(img_input, rpn[:2])  # used for training
    model_classifier = Model([img_input, roi_input],
                             classifier)  # used for training
    # this is a model that holds both the RPN and the classifier, used to load/save and freeze/unfreeze weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)
    # tensorboard
    tbCallBack = TensorBoard(log_dir=tb_log_dir,
                             histogram_freq=1,
                             write_graph=False,
                             write_images=False)
    tbCallBack.set_model(model_all)

    #NOTE: both model_rpn and model_classifer contains the base_nn

    try:
        print('loading weights from {}'.format(C.initial_weights))
        model_all.load_weights(C.initial_weights, by_name=True)
    except:
        print('Could not load pretrained model weights')

    # number of trainable parameters
    trainable_count = int(
        np.sum([K.count_params(p) for p in set(model_all.trainable_weights)]))
    non_trainable_count = int(
        np.sum(
            [K.count_params(p) for p in set(model_all.non_trainable_weights)]))

    print('Total params: {:,}'.format(trainable_count + non_trainable_count))
    print('Trainable params: {:,}'.format(trainable_count))
    print('Non-trainable params: {:,}'.format(non_trainable_count))

    if verify_trainable:
        for layer in model_all.layers:
            print(layer.name, layer.trainable)

    model_rpn.compile(optimizer=optimizer_rpn,
                      loss=[
                          Losses.rpn_loss_cls(num_anchors),
                          Losses.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            Losses.class_loss_cls,
            Losses.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mse')
    # save model_all as png for visualization
    if visualize_model != None:
        # from IPython.display import SVG
        # from keras.utils.vis_utils import model_to_dot
        # SVG(model_to_dot(model_all).create(prog='dot', format='svg'))
        plot_model(model=model_all,
                   to_file=visualize_model,
                   show_shapes=True,
                   show_layer_names=True)

    epoch_length = len(train_imgs)
    validation_epoch_length = len(val_imgs)
    num_epochs = int(num_epochs)
    iter_num = 0

    # train and valid data generator
    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   C,
                                                   model_base,
                                                   K.image_dim_ordering(),
                                                   preprocessing_function,
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 C,
                                                 model_base,
                                                 K.image_dim_ordering(),
                                                 preprocessing_function,
                                                 mode='val')

    losses_val = np.zeros((validation_epoch_length, 5))
    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf
    val_best_loss = np.Inf
    val_best_loss_epoch = 0

    print('Starting training')

    def write_log(callback, names, logs, batch_no):
        for name, value in zip(names, logs):
            summary = tf.Summary()
            summary_value = summary.value.add()
            summary_value.simple_value = value
            summary_value.tag = name
            callback.writer.add_summary(summary, batch_no)
            callback.writer.flush()

    train_names = [
        'train_loss_rpn_cls', 'train_loss_rpn_reg', 'train_loss_class_cls',
        'train_loss_class_reg', 'train_total_loss', 'train_acc'
    ]
    val_names = [
        'val_loss_rpn_cls', 'val_loss_rpn_reg', 'val_loss_class_cls',
        'val_loss_class_reg', 'val_total_loss', 'val_acc'
    ]

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
                        )

                X, Y, img_data = next(data_gen_train)

                if train_rpn:
                    loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                R = roi_helpers.rpn_to_roi(P_rpn[0],
                                           P_rpn[1],
                                           C,
                                           K.image_dim_ordering(),
                                           use_regr=True,
                                           overlap_thresh=C.rpn_nms_threshold,
                                           flag="train")
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                    R, img_data, C, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if C.num_rois > 1:
                    if len(pos_samples) < C.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, C.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            C.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                if train_final_classifier:
                    loss_class = model_classifier.train_on_batch(
                        [X, X2[:, sel_samples, :]],
                        [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                # losses

                if train_rpn:
                    losses[iter_num, 0] = loss_rpn[1]
                    losses[iter_num, 1] = loss_rpn[2]
                else:
                    losses[iter_num, 0] = 0
                    losses[iter_num, 1] = 0

                if train_final_classifier:
                    losses[iter_num, 2] = loss_class[1]
                    losses[iter_num, 3] = loss_class[2]
                    losses[iter_num, 4] = loss_class[3]  # accuracy
                else:
                    losses[iter_num, 2] = 0
                    losses[iter_num, 3] = 0
                    losses[iter_num, 4] = 0

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    if train_rpn:
                        loss_rpn_cls = np.mean(losses[:, 0])
                        loss_rpn_regr = np.mean(losses[:, 1])
                    else:
                        loss_rpn_cls = 0
                        loss_rpn_regr = 0

                    if train_final_classifier:
                        loss_class_cls = np.mean(losses[:, 2])
                        loss_class_regr = np.mean(losses[:, 3])
                        class_acc = np.mean(losses[:, 4])
                    else:
                        loss_class_cls = 0
                        loss_class_regr = 0
                        class_acc = 0

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if C.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    loss_dict_train = {
                        "rpn_cls": loss_rpn_cls,
                        "rpn_reg": loss_rpn_regr,
                        "final_cls": loss_class_cls,
                        "final_reg": loss_class_regr
                    }

                    curr_loss = 0
                    for l in losses_to_watch:
                        curr_loss += loss_dict_train[l]

                    iter_num = 0
                    start_time = time.time()
                    write_log(tbCallBack, train_names, [
                        loss_rpn_cls, loss_rpn_regr, loss_class_cls,
                        loss_class_regr, curr_loss, class_acc
                    ], epoch_num)

                    if curr_loss < best_loss:
                        if C.verbose:
                            print(
                                'Total loss decreased from {} to {} in training, saving weights'
                                .format(best_loss, curr_loss))
                            save_log_data = '\nTotal loss decreased from {} to {} in epoch {}/{} in training, saving weights'.format(
                                best_loss, curr_loss, epoch_num + 1,
                                num_epochs)
                            with open("./saving_log.txt", "a") as f:
                                f.write(save_log_data)

                        best_loss = curr_loss
                        model_all.save_weights(C.weights_all_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                continue

        if validation_interval > 0:
            # validation
            if (epoch_num + 1) % validation_interval == 0:
                progbar = generic_utils.Progbar(validation_epoch_length)
                print("Validation... \n")
                while True:
                    try:
                        X, Y, img_data = next(data_gen_val)

                        if train_rpn:
                            val_loss_rpn = model_rpn.test_on_batch(X, Y)

                        P_rpn = model_rpn.predict_on_batch(X)
                        R = roi_helpers.rpn_to_roi(
                            P_rpn[0],
                            P_rpn[1],
                            C,
                            K.image_dim_ordering(),
                            use_regr=True,
                            overlap_thresh=C.rpn_nms_threshold,
                            flag="train")
                        # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                        X2, Y1, Y2, IouS = roi_helpers.calc_iou(
                            R, img_data, C, class_mapping)

                        neg_samples = np.where(Y1[0, :, -1] == 1)
                        pos_samples = np.where(Y1[0, :, -1] == 0)

                        if len(neg_samples) > 0:
                            neg_samples = neg_samples[0]
                        else:
                            neg_samples = []

                        if len(pos_samples) > 0:
                            pos_samples = pos_samples[0]
                        else:
                            pos_samples = []

                        rpn_accuracy_rpn_monitor.append(len(pos_samples))
                        rpn_accuracy_for_epoch.append((len(pos_samples)))

                        if C.num_rois > 1:
                            if len(pos_samples) < C.num_rois // 2:
                                selected_pos_samples = pos_samples.tolist()
                            else:
                                selected_pos_samples = np.random.choice(
                                    pos_samples,
                                    C.num_rois // 2,
                                    replace=False).tolist()
                            try:
                                selected_neg_samples = np.random.choice(
                                    neg_samples,
                                    C.num_rois - len(selected_pos_samples),
                                    replace=False).tolist()
                            except:
                                selected_neg_samples = np.random.choice(
                                    neg_samples,
                                    C.num_rois - len(selected_pos_samples),
                                    replace=True).tolist()

                            sel_samples = selected_pos_samples + selected_neg_samples
                        else:
                            # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                            selected_pos_samples = pos_samples.tolist()
                            selected_neg_samples = neg_samples.tolist()
                            if np.random.randint(0, 2):
                                sel_samples = random.choice(neg_samples)
                            else:
                                sel_samples = random.choice(pos_samples)
                        if train_final_classifier:
                            val_loss_class = model_classifier.test_on_batch(
                                [X, X2[:, sel_samples, :]],
                                [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                        if train_rpn:
                            losses_val[iter_num, 0] = val_loss_rpn[1]
                            losses_val[iter_num, 1] = val_loss_rpn[2]
                        else:
                            losses_val[iter_num, 0] = 0
                            losses_val[iter_num, 1] = 0

                        if train_final_classifier:
                            losses_val[iter_num, 2] = val_loss_class[1]
                            losses_val[iter_num, 3] = val_loss_class[2]
                            losses_val[iter_num, 4] = val_loss_class[3]
                        else:
                            losses_val[iter_num, 2] = 0
                            losses_val[iter_num, 3] = 0
                            losses_val[iter_num, 4] = 0

                        iter_num += 1

                        progbar.update(
                            iter_num,
                            [('rpn_cls', np.mean(losses_val[:iter_num, 0])),
                             ('rpn_regr', np.mean(losses_val[:iter_num, 1])),
                             ('detector_cls', np.mean(losses_val[:iter_num,
                                                                 2])),
                             ('detector_regr', np.mean(losses_val[:iter_num,
                                                                  3]))])

                        if iter_num == validation_epoch_length:
                            if train_rpn:
                                val_loss_rpn_cls = np.mean(losses_val[:, 0])
                                val_loss_rpn_regr = np.mean(losses_val[:, 1])
                            else:
                                val_loss_rpn_cls = 0
                                val_loss_rpn_regr = 0
                            if train_final_classifier:
                                val_loss_class_cls = np.mean(losses_val[:, 2])
                                val_loss_class_regr = np.mean(losses_val[:, 3])
                                val_class_acc = np.mean(losses_val[:, 4])
                            else:
                                val_loss_class_cls = 0
                                val_loss_class_regr = 0
                                val_class_acc = 0

                            mean_overlapping_bboxes = float(
                                sum(rpn_accuracy_for_epoch)) / len(
                                    rpn_accuracy_for_epoch)
                            rpn_accuracy_for_epoch = []

                            loss_dict_valid = {
                                "rpn_cls": val_loss_rpn_cls,
                                "rpn_reg": val_loss_rpn_regr,
                                "final_cls": val_loss_class_cls,
                                "final_reg": val_loss_class_regr
                            }

                            val_curr_loss = 0
                            for l in losses_to_watch:
                                val_curr_loss += loss_dict_valid[l]

                            write_log(tbCallBack, val_names, [
                                val_loss_rpn_cls, val_loss_rpn_regr,
                                val_loss_class_cls, val_loss_class_regr,
                                val_curr_loss, val_class_acc
                            ], epoch_num)

                            if C.verbose:
                                print('[INFO VALIDATION]')
                                print(
                                    'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                                    .format(mean_overlapping_bboxes))
                                print(
                                    'Classifier accuracy for bounding boxes from RPN: {}'
                                    .format(val_class_acc))
                                print('Loss RPN classifier: {}'.format(
                                    val_loss_rpn_cls))
                                print('Loss RPN regression: {}'.format(
                                    val_loss_rpn_regr))
                                print('Loss Detector classifier: {}'.format(
                                    val_loss_class_cls))
                                print('Loss Detector regression: {}'.format(
                                    val_loss_class_regr))
                                print(
                                    "current loss: %.2f, best loss: %.2f at epoch: %d"
                                    % (val_curr_loss, val_best_loss,
                                       val_best_loss_epoch))
                                print('Elapsed time: {}'.format(time.time() -
                                                                start_time))

                            if val_curr_loss < val_best_loss:
                                if C.verbose:
                                    print(
                                        'Total loss decreased from {} to {}, saving weights'
                                        .format(val_best_loss, val_curr_loss))
                                    save_log_data = '\nTotal loss decreased from {} to {} in epoch {}/{} in validation, saving weights'.format(
                                        val_best_loss, val_curr_loss,
                                        epoch_num + 1, num_epochs)
                                    with open("./saving_log.txt", "a") as f:
                                        f.write(save_log_data)
                                val_best_loss = val_curr_loss
                                val_best_loss_epoch = epoch_num
                                model_all.save_weights(C.weights_all_path)
                            start_time = time.time()
                            iter_num = 0
                            break
                    except:
                        pass

    print('Training complete, exiting.')
Ejemplo n.º 21
0
import pprint
import sys
import json
from keras_frcnn import config

sys.setrecursionlimit(40000)

C = config.Config()
C.num_rois = 2

C.use_vertical_flips = True

#from keras_frcnn.pascal_voc_parser import get_data
from keras_frcnn.simple_parser import get_data

all_imgs, classes_count, class_mapping = get_data(sys.argv[1])

if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)

with open('classes.json', 'w') as class_data_json:
    json.dump(class_mapping, class_data_json)

inv_map = {v: k for k, v in class_mapping.items()}

#pprint.pprint(classes_count)
print(('Num classes (including bg) = {}'.format(len(classes_count))))
random.shuffle(all_imgs)

num_imgs = len(all_imgs)
def test_kitti():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = './model/kitti_frcnn_last.hdf5'

    cfg.simple_label_file = 'kitti_simple_label.txt'
    #查看绝对路径
    #t = os.path.abspath('kitti_simple_label.txt')

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)
    pedestrain_num = classes_count['Pedestrian']
    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print(
            'Config has been written to {}, and can be loaded when testing to ensure correct results'
            .format(cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs,
                                                   classes_count,
                                                   cfg,
                                                   nn.get_img_output_length,
                                                   K.image_dim_ordering(),
                                                   mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs,
                                                 classes_count,
                                                 cfg,
                                                 nn.get_img_output_length,
                                                 K.image_dim_ordering(),
                                                 mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)
    #img_input:  三通道,为输入图片
    img_input = Input(shape=input_shape_img)
    #roi_input:为输入图片boudingbox的四维值
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    #shared_layers : 基础的网络结构(例如: resnet,vgg)通过该网络来提取原始图片的featuremap特征,最后将这些特征送入RPN网络和RCNN网络
    # 1.定义nn的输入层,faster-rcnn共享卷积层,
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers 2.定义RPN层
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    #RPN网络用于生成region proposals,该层通过sigmoid函数判断anchors属于foreground或者background, 再利用bounding box regression修正anchors获得修正后的RoI。
    # rpn: 在基础的网络结构使用9个bounding box产生了分类和回归的rpn网络。定义rpn层,return [x_class, x_regr, base_layers]
    rpn = nn.rpn(shared_layers, num_anchors)
    #定义分类器层,定义classifier的输入和输出
    classifier = nn.classifier(shared_layers,
                               roi_input,
                               cfg.num_rois,
                               nb_classes=len(classes_count),
                               trainable=True)
    #定义rpn模型的输入和输出一个框2分类(最后使用的sigmod而不是softmax)和框的回归
    model_rpn = Model(img_input, rpn[:2])
    #定义classifier的输入和输出
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        #TODO 第一次运行因为model_path没有hdf5文件,因此修改为cfg.base_net_weights,现在可以修改回来
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
        # model_rpn.load_weights(cfg.base_net_weights, by_name=True)
        # model_classifier.load_weights(cfg.base_net_weights, by_name=True)
    except Exception as e:
        print(e)
        print(
            'Could not load pretrained model weights. Weights can be found in the keras application folder '
            'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[
                          losses_fn.rpn_loss_cls(num_anchors),
                          losses_fn.rpn_loss_regr(num_anchors)
                      ])
    model_classifier.compile(
        optimizer=optimizer_classifier,
        loss=[
            losses_fn.class_loss_cls,
            losses_fn.class_loss_regr(len(classes_count) - 1)
        ],
        metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    #todo 增加tensorboard日志文件
    log_path = './graph'
    callback = TensorBoard(log_path,
                           histogram_freq=0,
                           write_graph=True,
                           write_images=True)
    callback.set_model(model_all)

    #todo epoch的大小为训练图片的个数
    # epoch_length = len(train_imgs)
    epoch_length = len(val_imgs)
    #epoch_length = 47182
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    #todo
    losses_val = np.zeros((epoch_length, 5))

    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    rpn_accuracy_rpn_monitor_val = []
    rpn_accuracy_for_epoch_val = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting testing')

    vis = True

    allbbox = 0

    #只有训练的,改成只有测试的
    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor
                       ) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(
                        sum(rpn_accuracy_rpn_monitor)) / len(
                            rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'
                        .format(mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print(
                            'RPN is not producing bounding boxes that overlap'
                            ' the ground truth boxes. Check RPN settings or keep training.'
                        )
                #todo train修改为val
                # X, Y, img_data = next(data_gen_train)
                X, Y, img_data = next(data_gen_val)

                # loss_rpn = model_rpn.train_on_batch(X, Y)
                loss_rpn = model_rpn.test_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0],
                                                P_rpn[1],
                                                cfg,
                                                K.image_dim_ordering(),
                                                use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format

                #todo 增加 count
                # X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, class_mapping)
                X2, Y1, Y2, IouS, count = roi_helpers.calc_iou(
                    result, img_data, cfg, class_mapping)
                allbbox = allbbox + count
                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(
                            pos_samples, cfg.num_rois // 2,
                            replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(
                            neg_samples,
                            cfg.num_rois - len(selected_pos_samples),
                            replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch(
                    [X, X2[:, sel_samples, :]],
                    [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(
                    iter_num,
                    [('rpn_cls', np.mean(losses[:iter_num, 0])),
                     ('rpn_regr', np.mean(losses[:iter_num, 1])),
                     ('detector_cls', np.mean(losses[:iter_num, 2])),
                     ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(
                        rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print(
                            'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'
                            .format(mean_overlapping_bboxes))
                        print(
                            'Classifier accuracy for bounding boxes from RPN: {}'
                            .format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(
                            loss_class_cls))
                        print('Loss Detector regression: {}'.format(
                            loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() -
                                                        start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print(
                                'Total loss decreased from {} to {}, saving weights'
                                .format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print("检测准确率:")
    print(float(allbbox / pedestrain_num))
    print('testing complete, exiting.')
Ejemplo n.º 23
0
from keras.utils import generic_utils

from keras_frcnn.simple_parser import get_data

C = config.Config()

now = datetime.datetime.now()
otherStyleTime = now.strftime("%Y-%m-%d %H:%M:%S")

model_path = 'model/' + str(otherStyleTime) + 'stag_model_frcnn.hdf5'
config_path = 'model/' + str(otherStyleTime) + 'stag_config.pickle'

C.model_path = model_path
C.num_rois = 32

all_imgs, classes_count, class_mapping = get_data('managedata/stag_data_with_person.txt')

if 'bg' not in classes_count:
    classes_count['bg'] = 0
    class_mapping['bg'] = len(class_mapping)

C.class_mapping = class_mapping

inv_map = {v: k for k, v in class_mapping.iteritems()}

print('Training images per class:')
pprint.pprint(classes_count)
print('Num classes (including bg) = {}'.format(len(classes_count)))

config_output_filename = config_path
Ejemplo n.º 24
0
def train_kitti():
    # config for data argument
    cfg = config.Config()

    cfg.use_horizontal_flips = True
    cfg.use_vertical_flips = True
    cfg.rot_90 = True
    cfg.num_rois = 32
    cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path())

    # TODO: the only file should to be change for other data to train
    cfg.model_path = './model/kitti_frcnn_last.hdf5'
    cfg.simple_label_file = 'kitti_simple_label.txt'

    all_images, classes_count, class_mapping = get_data(cfg.simple_label_file)

    if 'bg' not in classes_count:
        classes_count['bg'] = 0
        class_mapping['bg'] = len(class_mapping)

    cfg.class_mapping = class_mapping
    with open(cfg.config_save_file, 'wb') as config_f:
        pickle.dump(cfg, config_f)
        print('Config has been written to {}, and can be loaded when testing to ensure correct results'.format(
            cfg.config_save_file))

    inv_map = {v: k for k, v in class_mapping.items()}

    print('Training images per class:')
    pprint.pprint(classes_count)
    print('Num classes (including bg) = {}'.format(len(classes_count)))
    random.shuffle(all_images)
    num_imgs = len(all_images)
    train_imgs = [s for s in all_images if s['imageset'] == 'trainval']
    val_imgs = [s for s in all_images if s['imageset'] == 'test']

    print('Num train samples {}'.format(len(train_imgs)))
    print('Num val samples {}'.format(len(val_imgs)))

    data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, cfg, nn.get_img_output_length,
                                                   K.image_dim_ordering(), mode='train')
    data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, cfg, nn.get_img_output_length,
                                                 K.image_dim_ordering(), mode='val')

    if K.image_dim_ordering() == 'th':
        input_shape_img = (3, None, None)
    else:
        input_shape_img = (None, None, 3)

    img_input = Input(shape=input_shape_img)
    roi_input = Input(shape=(None, 4))

    # define the base network (resnet here, can be VGG, Inception, etc)
    shared_layers = nn.nn_base(img_input, trainable=True)

    # define the RPN, built on the base layers
    num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios)
    rpn = nn.rpn(shared_layers, num_anchors)

    classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=len(classes_count), trainable=True)

    model_rpn = Model(img_input, rpn[:2])
    model_classifier = Model([img_input, roi_input], classifier)

    # this is a model that holds both the RPN and the classifier, used to load/save weights for the models
    model_all = Model([img_input, roi_input], rpn[:2] + classifier)

    try:
        print('loading weights from {}'.format(cfg.base_net_weights))
        model_rpn.load_weights(cfg.model_path, by_name=True)
        model_classifier.load_weights(cfg.model_path, by_name=True)
    except Exception as e:
        print(e)
        print('Could not load pretrained model weights. Weights can be found in the keras application folder '
              'https://github.com/fchollet/keras/tree/master/keras/applications')

    optimizer = Adam(lr=1e-5)
    optimizer_classifier = Adam(lr=1e-5)
    model_rpn.compile(optimizer=optimizer,
                      loss=[losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors)])
    model_classifier.compile(optimizer=optimizer_classifier,
                             loss=[losses_fn.class_loss_cls, losses_fn.class_loss_regr(len(classes_count) - 1)],
                             metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'})
    model_all.compile(optimizer='sgd', loss='mae')

    epoch_length = 1000
    num_epochs = int(cfg.num_epochs)
    iter_num = 0

    losses = np.zeros((epoch_length, 5))
    rpn_accuracy_rpn_monitor = []
    rpn_accuracy_for_epoch = []
    start_time = time.time()

    best_loss = np.Inf

    class_mapping_inv = {v: k for k, v in class_mapping.items()}
    print('Starting training')

    vis = True

    for epoch_num in range(num_epochs):

        progbar = generic_utils.Progbar(epoch_length)
        print('Epoch {}/{}'.format(epoch_num + 1, num_epochs))

        while True:
            try:

                if len(rpn_accuracy_rpn_monitor) == epoch_length and cfg.verbose:
                    mean_overlapping_bboxes = float(sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor)
                    rpn_accuracy_rpn_monitor = []
                    print(
                        'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations'.format(
                            mean_overlapping_bboxes, epoch_length))
                    if mean_overlapping_bboxes == 0:
                        print('RPN is not producing bounding boxes that overlap'
                              ' the ground truth boxes. Check RPN settings or keep training.')

                X, Y, img_data = next(data_gen_train)

                loss_rpn = model_rpn.train_on_batch(X, Y)

                P_rpn = model_rpn.predict_on_batch(X)

                result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, K.image_dim_ordering(), use_regr=True,
                                                overlap_thresh=0.7,
                                                max_boxes=300)
                # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format
                X2, Y1, Y2, IouS = roi_helpers.calc_iou(result, img_data, cfg, class_mapping)

                if X2 is None:
                    rpn_accuracy_rpn_monitor.append(0)
                    rpn_accuracy_for_epoch.append(0)
                    continue

                neg_samples = np.where(Y1[0, :, -1] == 1)
                pos_samples = np.where(Y1[0, :, -1] == 0)

                if len(neg_samples) > 0:
                    neg_samples = neg_samples[0]
                else:
                    neg_samples = []

                if len(pos_samples) > 0:
                    pos_samples = pos_samples[0]
                else:
                    pos_samples = []

                rpn_accuracy_rpn_monitor.append(len(pos_samples))
                rpn_accuracy_for_epoch.append((len(pos_samples)))

                if cfg.num_rois > 1:
                    if len(pos_samples) < cfg.num_rois // 2:
                        selected_pos_samples = pos_samples.tolist()
                    else:
                        selected_pos_samples = np.random.choice(pos_samples, cfg.num_rois // 2, replace=False).tolist()
                    try:
                        selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples),
                                                                replace=False).tolist()
                    except:
                        selected_neg_samples = np.random.choice(neg_samples, cfg.num_rois - len(selected_pos_samples),
                                                                replace=True).tolist()

                    sel_samples = selected_pos_samples + selected_neg_samples
                else:
                    # in the extreme case where num_rois = 1, we pick a random pos or neg sample
                    selected_pos_samples = pos_samples.tolist()
                    selected_neg_samples = neg_samples.tolist()
                    if np.random.randint(0, 2):
                        sel_samples = random.choice(neg_samples)
                    else:
                        sel_samples = random.choice(pos_samples)

                loss_class = model_classifier.train_on_batch([X, X2[:, sel_samples, :]],
                                                             [Y1[:, sel_samples, :], Y2[:, sel_samples, :]])

                losses[iter_num, 0] = loss_rpn[1]
                losses[iter_num, 1] = loss_rpn[2]

                losses[iter_num, 2] = loss_class[1]
                losses[iter_num, 3] = loss_class[2]
                losses[iter_num, 4] = loss_class[3]

                iter_num += 1

                progbar.update(iter_num,
                               [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])),
                                ('detector_cls', np.mean(losses[:iter_num, 2])),
                                ('detector_regr', np.mean(losses[:iter_num, 3]))])

                if iter_num == epoch_length:
                    loss_rpn_cls = np.mean(losses[:, 0])
                    loss_rpn_regr = np.mean(losses[:, 1])
                    loss_class_cls = np.mean(losses[:, 2])
                    loss_class_regr = np.mean(losses[:, 3])
                    class_acc = np.mean(losses[:, 4])

                    mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch)
                    rpn_accuracy_for_epoch = []

                    if cfg.verbose:
                        print('Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(
                            mean_overlapping_bboxes))
                        print('Classifier accuracy for bounding boxes from RPN: {}'.format(class_acc))
                        print('Loss RPN classifier: {}'.format(loss_rpn_cls))
                        print('Loss RPN regression: {}'.format(loss_rpn_regr))
                        print('Loss Detector classifier: {}'.format(loss_class_cls))
                        print('Loss Detector regression: {}'.format(loss_class_regr))
                        print('Elapsed time: {}'.format(time.time() - start_time))

                    curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr
                    iter_num = 0
                    start_time = time.time()

                    if curr_loss < best_loss:
                        if cfg.verbose:
                            print('Total loss decreased from {} to {}, saving weights'.format(best_loss, curr_loss))
                        best_loss = curr_loss
                        model_all.save_weights(cfg.model_path)

                    break

            except Exception as e:
                print('Exception: {}'.format(e))
                # save model
                model_all.save_weights(cfg.model_path)
                continue
    print('Training complete, exiting.')
Ejemplo n.º 25
0
def meature_map(test_path,length):
	test_imgs, _, _ = sp.get_data(test_path)
	T = {}
	P = {}
	mAPs = []
	for idx, img_data in enumerate(test_imgs):
		print('{}/{}'.format(idx, len(test_imgs)))
		st = time.time()
		filepath = img_data['filepath']

		img = cv2.imread(filepath)

		X, fx, fy = format_img_map(img, C)

		# Change X (img) shape from (1, channel, height, width) to (1, height, width, channel)
		X = np.transpose(X, (0, 2, 3, 1))

		# get the feature maps and output from the RPN
		[Y1, Y2, F] = model_rpn.predict(X)

		R = roi_helpers.rpn_to_roi(Y1, Y2, C, K.image_dim_ordering(), overlap_thresh=0.7)

		# convert from (x1,y1,x2,y2) to (x,y,w,h)
		R[:, 2] -= R[:, 0]
		R[:, 3] -= R[:, 1]

		# apply the spatial pyramid pooling to the proposed regions
		bboxes = {}
		probs = {}

		for jk in range(R.shape[0] // C.num_rois + 1):
			ROIs = np.expand_dims(R[C.num_rois * jk:C.num_rois * (jk + 1), :], axis=0)
			if ROIs.shape[1] == 0:
				break

			if jk == R.shape[0] // C.num_rois:
				# pad R
				curr_shape = ROIs.shape
				target_shape = (curr_shape[0], C.num_rois, curr_shape[2])
				ROIs_padded = np.zeros(target_shape).astype(ROIs.dtype)
				ROIs_padded[:, :curr_shape[1], :] = ROIs
				ROIs_padded[0, curr_shape[1]:, :] = ROIs[0, 0, :]
				ROIs = ROIs_padded

			[P_cls, P_regr] = model_classifier_only.predict([F, ROIs])

			# Calculate all classes' bboxes coordinates on resized image (300, 400)
			# Drop 'bg' classes bboxes
			for ii in range(P_cls.shape[1]):

				# If class name is 'bg', continue
				if np.argmax(P_cls[0, ii, :]) == (P_cls.shape[2] - 1):
					continue

				# Get class name
				cls_name = class_mapping[np.argmax(P_cls[0, ii, :])]

				if cls_name not in bboxes:
					bboxes[cls_name] = []
					probs[cls_name] = []

				(x, y, w, h) = ROIs[0, ii, :]

				cls_num = np.argmax(P_cls[0, ii, :])
				try:
					(tx, ty, tw, th) = P_regr[0, ii, 4 * cls_num:4 * (cls_num + 1)]
					tx /= C.classifier_regr_std[0]
					ty /= C.classifier_regr_std[1]
					tw /= C.classifier_regr_std[2]
					th /= C.classifier_regr_std[3]
					x, y, w, h = roi_helpers.apply_regr(x, y, w, h, tx, ty, tw, th)
				except:
					pass
				bboxes[cls_name].append([16 * x, 16 * y, 16 * (x + w), 16 * (y + h)])
				probs[cls_name].append(np.max(P_cls[0, ii, :]))

		all_dets = []

		for key in bboxes:
			bbox = np.array(bboxes[key])

			# Apply non-max-suppression on final bboxes to get the output bounding boxe
			new_boxes, new_probs = roi_helpers.non_max_suppression_fast(bbox, np.array(probs[key]), overlap_thresh=0.5)
			for jk in range(new_boxes.shape[0]):
				(x1, y1, x2, y2) = new_boxes[jk, :]
				det = {'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': key, 'prob': new_probs[jk]}
				all_dets.append(det)

		print('Elapsed time = {}'.format(time.time() - st))
		t, p = get_map(all_dets, img_data['bboxes'], (fx, fy))
		for key in t.keys():
			if key not in T:
				T[key] = []
				P[key] = []
			T[key].extend(t[key])
			P[key].extend(p[key])
		all_aps = []
		for key in T.keys():
			ap = average_precision_score(T[key], P[key])
			print('{} AP: {}'.format(key, ap))
			all_aps.append(ap)
		print('mAP = {}'.format(np.mean(np.array(all_aps))))
		mAPs.append(np.mean(np.array(all_aps)))
	# print(T)
	# print(P)

	print()
	print('mean average precision:', np.mean(np.array(mAPs)))
	mAP = [mAP for mAP in mAPs if str(mAP) != 'nan']
	mean_average_prec = round(np.mean(np.array(mAP)), 3)
	print('After training %dk batches, the mean average precision is %0.3f' % (length, mean_average_prec))