Ejemplo n.º 1
0
    def get_detector_mask(self, boxes, anchors):
        '''
        Precompute detectors_mask and matching_true_boxes for training.
        Detectors mask is 1 for each spatial position in the final conv layer and
        anchor that should be active for the given boxes and 0 otherwise.
        Matching true boxes gives the regression targets for the ground truth box
        that caused a detector to be active or 0 otherwise.
        Copied from YAD2K retrain_yolo.py
        '''
        detector_save_path = os.path.join(self.output_path, "KITTI-masks.npz")
        if os.path.exists(detector_save_path):
            self.__print("Loading detector masks from file...")
            data = np.load(detector_save_path)
            detectors_mask = data['detectors_mask']
            matching_true_boxes = data['matching_true_boxes']
        else:
            self.__print("Computing detector masks...")
            detectors_mask = [0 for i in range(len(boxes))]
            matching_true_boxes = [0 for i in range(len(boxes))]
            for i, box in enumerate(boxes):
                detectors_mask[i], matching_true_boxes[i] = \
                    preprocess_true_boxes(box, anchors, [self.image_data_size[1], self.image_data_size[0]])
            detectors_mask = np.array(detectors_mask)
            matching_true_boxes = np.array(matching_true_boxes)
            np.savez(detector_save_path, detectors_mask=detectors_mask, matching_true_boxes=matching_true_boxes)

        return np.array(detectors_mask, dtype=np.bool), np.array(matching_true_boxes)
Ejemplo n.º 2
0
    def get_detector_mask(boxes, anchors):
        detectors_mask = [0 for i in range(len(boxes))]
        matching_true_boxes = [0 for i in range(len(boxes))]
        for i, box in enumerate(boxes):
            box = box.reshape((-1, 5))
            detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(
                box, anchors, [416, 416])

        return np.array(detectors_mask), np.array(matching_true_boxes)
Ejemplo n.º 3
0
def get_all_detector_masks(data, anchors):
    #data is full dict from process_MOT_dataset
    for video in data:
        video['detector_mask'] = {}
        for frame_id in video['frame']:
            detectors_mask, matching_true_box = preprocess_true_boxes(
                video['frame'][frame_id], anchors, [608, 608])
            video['detector_mask'][frame_id] = [
                detectors_mask, matching_true_box
            ]
    return data
Ejemplo n.º 4
0
def get_detector_mask(boxes, anchors):
    '''
    Precompute detectors_mask and matching_true_boxes for training.
    Detectors mask is 1 for each spatial position in the final conv layer and
    anchor that should be active for the given boxes and 0 otherwise.
    Matching true boxes gives the regression targets for the ground truth box
    that caused a detector to be active or 0 otherwise.
    '''
    detectors_mask = [0 for i in range(len(boxes))]
    matching_true_boxes = [0 for i in range(len(boxes))]
    for i, box in enumerate(boxes):
        detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [224, 320])

    return np.array(detectors_mask), np.array(matching_true_boxes)
Ejemplo n.º 5
0
def get_detector_mask(boxes, anchors):
    '''
    Precompute detectors_mask and matching_true_boxes for training.
    Detectors mask is 1 for each spatial position in the final conv layer and
    anchor that should be active for the given boxes and 0 otherwise.
    Matching true boxes gives the regression targets for the ground truth box
    that caused a detector to be active or 0 otherwise.
    '''
    detectors_mask = [0 for i in range(len(boxes))]
    matching_true_boxes = [0 for i in range(len(boxes))]
    for i, box in enumerate(boxes):
        detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [416, 416])

    return np.array(detectors_mask), np.array(matching_true_boxes)
Ejemplo n.º 6
0
def get_detector_mask(boxes, anchors):
    """
        Precompute detectors_mask and matching_true_boxes methods for training.
        (Detectors mask is 1 for each spatial position in the final conv layer and anchor that should be active for the
        given boxes and 0 otherwise. Matching true boxes gives the regression targets for the ground truth box that
        caused a detector to be active or 0 otherwise).
        :param boxes: list of bounding box information from the annotation
        :param anchors: list of anchors from the annotation
        :return:
    """
    detectors_mask = [0 for i in range(len(boxes))]
    matching_true_boxes = [0 for i in range(len(boxes))]
    for i, box in enumerate(boxes):
        detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(
            box, anchors, [416, 416])
    return np.array(detectors_mask), np.array(matching_true_boxes)
Ejemplo n.º 7
0
def get_detector_mask(boxes, anchors):
    '''
    Precompute detectors_mask and matching_true_boxes for training.
    Detectors mask is 1 for each spatial position in the final conv layer and
    anchor that should be active for the given boxes and 0 otherwise.
    Matching true boxes gives the regression targets for the ground truth box
    that caused a detector to be active or 0 otherwise.
    
    Remember: matching true boxes is Corresponding ground truth boxes for positive detector positions
              with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class.
  
    '''
    detectors_mask = [0 for i in range(len(boxes))]
    matching_true_boxes = [0 for i in range(len(boxes))]
    for i, box in enumerate(
            boxes):  #enumerate returns index and its corresponding value
        detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(
            box, anchors, [416, 416])

    return np.array(detectors_mask), np.array(matching_true_boxes)
Ejemplo n.º 8
0
def _main():
    voc_path = os.path.expanduser(
        '/Users/ss/Data/VOCdevkit/pascal_voc_07_07.hdf5')
    classes_path = os.path.expanduser('model_data/pascal_classes.txt')

    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    voc = h5py.File(voc_path, 'r')
    image = PIL.Image.open(io.BytesIO(voc['train/images'][28]))
    orig_size = np.array([image.width, image.height])
    orig_size = np.expand_dims(orig_size, axis=0)

    # Image preprocessing.
    image = image.resize((416, 416), PIL.Image.BICUBIC)
    image_data = np.array(image, dtype=np.float)
    image_data /= 255.

    # Box preprocessing.
    # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max.
    boxes = voc['train/boxes'][28]
    boxes = boxes.reshape((-1, 5))
    # Get extents as y_min, x_min, y_max, x_max, class for comparision with
    # model output.
    boxes_extents = boxes[:, [2, 1, 4, 3, 0]]

    # Get box parameters as x_center, y_center, box_width, box_height, class.
    boxes_xy = 0.5 * (boxes[:, 3:5] + boxes[:, 1:3])
    boxes_wh = boxes[:, 3:5] - boxes[:, 1:3]
    boxes_xy = boxes_xy / orig_size
    boxes_wh = boxes_wh / orig_size
    boxes = np.concatenate((boxes_xy, boxes_wh, boxes[:, 0:1]), axis=1)

    # Precompute detectors_mask and matching_true_boxes for training.
    # Detectors mask is 1 for each spatial position in the final conv layer and
    # anchor that should be active for the given boxes and 0 otherwise.
    # Matching true boxes gives the regression targets for the ground truth box
    # that caused a detector to be active or 0 otherwise.
    anchors = COCO_ANCHORS
    detectors_mask_shape = (13, 13, 5, 1)
    matching_boxes_shape = (13, 13, 5, 5)
    detectors_mask, matching_true_boxes = preprocess_true_boxes(
        boxes, anchors, [416, 416])

    # Create model input layers.
    image_input = Input(shape=(416, 416, 3))
    boxes_input = Input(shape=(None, 5))
    detectors_mask_input = Input(shape=detectors_mask_shape)
    matching_boxes_input = Input(shape=matching_boxes_shape)

    print(boxes)
    print(boxes_extents)
    print(np.where(detectors_mask == 1)[:-1])
    print(matching_true_boxes[np.where(detectors_mask == 1)[:-1]])

    # Create model body.
    model_body = yolo_body(image_input, len(anchors), len(class_names))
    model_body = Model(image_input, model_body.output)
    # Place model loss on CPU to reduce GPU memory usage.
    with tf.device('/cpu:0'):
        # TODO: Replace Lambda with custom Keras layer for loss.
        model_loss = Lambda(yolo_loss,
                            output_shape=(1, ),
                            name='yolo_loss',
                            arguments={
                                'anchors': anchors,
                                'num_classes': len(class_names)
                            })([
                                model_body.output, boxes_input,
                                detectors_mask_input, matching_boxes_input
                            ])
    model = Model(
        [image_input, boxes_input, detectors_mask_input, matching_boxes_input],
        model_loss)
    model.compile(
        optimizer='adam', loss={
            'yolo_loss': lambda y_true, y_pred: y_pred
        })  # This is a hack to use the custom loss function in the last layer.

    # Add batch dimension for training.
    image_data = np.expand_dims(image_data, axis=0)
    boxes = np.expand_dims(boxes, axis=0)
    detectors_mask = np.expand_dims(detectors_mask, axis=0)
    matching_true_boxes = np.expand_dims(matching_true_boxes, axis=0)

    num_steps = 1000
    # TODO: For full training, put preprocessing inside training loop.
    # for i in range(num_steps):
    #     loss = model.train_on_batch(
    #         [image_data, boxes, detectors_mask, matching_true_boxes],
    #         np.zeros(len(image_data)))

    model.fit([image_data, boxes, detectors_mask, matching_true_boxes],
              np.zeros(len(image_data)),
              batch_size=1,
              epochs=num_steps)
    model.save_weights('overfit_weights.h5')

    # Create output variables for prediction.
    yolo_outputs = yolo_head(model_body.output, anchors, len(class_names))
    input_image_shape = K.placeholder(shape=(2, ))
    boxes, scores, classes = yolo_eval(yolo_outputs,
                                       input_image_shape,
                                       score_threshold=.3,
                                       iou_threshold=.9)

    # Run prediction on overfit image.
    sess = K.get_session()  # TODO: Remove dependence on Tensorflow session.
    out_boxes, out_scores, out_classes = sess.run(
        [boxes, scores, classes],
        feed_dict={
            model_body.input: image_data,
            input_image_shape: [image.size[1], image.size[0]],
            K.learning_phase(): 0
        })
    print('Found {} boxes for image.'.format(len(out_boxes)))
    print(out_boxes)

    # Plot image with predicted boxes.
    image_with_boxes = draw_boxes(image_data[0], out_boxes, out_classes,
                                  class_names, out_scores)
    plt.imshow(image_with_boxes, interpolation='nearest')
    plt.show()
Ejemplo n.º 9
0
def _main(args):
    #訓練データセットに対する情報の読み込みfrom voc_to_hdf5
    voc_path = os.path.expanduser(args.data_path)
    #ラベル情報.txtへのpath
    classes_path = os.path.expanduser(args.classes_path)
    #アスペクト比の格納?
    anchors_path = os.path.expanduser(args.anchors_path)

    #クラス名のリストを作成
    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    if os.path.isfile(anchors_path):
        with open(anchors_path) as f:
            anchors = f.readline()
            anchors = [float(x) for x in anchors.split(',')]
            anchors = np.array(anchors).reshape(-1, 2)
    else:
        anchors = YOLO_ANCHORS

    voc = h5py.File(voc_path, 'r')
    image = PIL.Image.open(io.BytesIO(voc['test/images'][0]))
    orig_size = np.array([image.width, image.height])
    orig_size = np.expand_dims(orig_size, axis=0)

    # Image preprocessing.
    image = image.resize((416, 416), PIL.Image.BICUBIC)
    image_data = np.array(image, dtype=np.float)
    image_data /= 255.

    # Box preprocessing.
    # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max.
    boxes = voc['test/boxes'][0]
    boxes = boxes.reshape((-1, 5))
    # Get extents as y_min, x_min, y_max, x_max, class for comparision with
    # model output.
    boxes_extents = boxes[:, [2, 1, 4, 3, 0]]

    # Get box parameters as x_center, y_center, box_width, box_height, class.
    boxes_xy = 0.5 * (boxes[:, 3:5] + boxes[:, 1:3])
    boxes_wh = boxes[:, 3:5] - boxes[:, 1:3]
    boxes_xy = boxes_xy / orig_size
    boxes_wh = boxes_wh / orig_size
    #annotation部分のxy座標、幅
    boxes = np.concatenate((boxes_xy, boxes_wh, boxes[:, 0:1]), axis=1)

    # Precompute detectors_mask and matching_true_boxes for training.
    # Detectors mask is 1 for each spatial position in the final conv layer and
    # anchor that should be active for the given boxes and 0 otherwise.
    # Matching true boxes gives the regression targets for the ground truth box
    # that caused a detector to be active or 0 otherwise.
    detectors_mask_shape = (13, 13, 5, 1)
    matching_boxes_shape = (13, 13, 5, 5)
    detectors_mask, matching_true_boxes = preprocess_true_boxes(
        boxes, anchors, [416, 416])

    # Create model input layers.
    image_input = Input(shape=(416, 416, 3))
    boxes_input = Input(shape=(None, 5))
    detectors_mask_input = Input(shape=detectors_mask_shape)
    matching_boxes_input = Input(shape=matching_boxes_shape)

    print('Boxes:')
    print(boxes)
    print('Box corners:')
    print(boxes_extents)
    print('Active detectors:')
    print(np.where(detectors_mask == 1)[:-1])
    print('Matching boxes for active detectors:')
    print(matching_true_boxes[np.where(detectors_mask == 1)[:-1]])

    # Create model body.
    model_body = yolo_body(image_input, len(anchors), len(class_names))
    model_body = Model(image_input, model_body.output)
    # Place model loss on CPU to reduce GPU memory usage.
    with tf.device('/cpu:0'):
        # TODO: Replace Lambda with custom Keras layer for loss.
        model_loss = Lambda(yolo_loss,
                            output_shape=(1, ),
                            name='yolo_loss',
                            arguments={
                                'anchors': anchors,
                                'num_classes': len(class_names)
                            })([
                                model_body.output, boxes_input,
                                detectors_mask_input, matching_boxes_input
                            ])
    model = Model(
        [image_input, boxes_input, detectors_mask_input, matching_boxes_input],
        model_loss)
    model.compile(
        optimizer='adam', loss={
            'yolo_loss': lambda y_true, y_pred: y_pred
        })  # This is a hack to use the custom loss function in the last layer.

    # Add batch dimension for training.
    image_data = np.expand_dims(image_data, axis=0)
    boxes = np.expand_dims(boxes, axis=0)
    detectors_mask = np.expand_dims(detectors_mask, axis=0)
    matching_true_boxes = np.expand_dims(matching_true_boxes, axis=0)

    num_steps = 50

    #checkpoint = ModelCheckpoint('model1.h5', monitor='val_loss', verbose=1,
    #save_best_only=True, save_weights_only=False, mode='auto', period=1)
    # TODO: For full training, put preprocessing inside training loop.
    # for i in range(num_steps):
    #     loss = model.train_on_batch(
    #         [image_data, boxes, detectors_mask, matching_true_boxes],
    #         np.zeros(len(image_data)))
    model.fit(
        [image_data, boxes, detectors_mask, matching_true_boxes],
        np.zeros(len(image_data)),
        batch_size=1,
        epochs=num_steps,
        #callbacks=[checkpoint]
    )
    model.save('model.h5')
    model.save_weights('overfit_weights.h5')

    #model_json_str = model.to_json()
    #open('mlp_model.json', 'w').write(model_json_str)
    #model.save_weights('mlp_weights.h5');

    # Create output variables for prediction.
    yolo_outputs = yolo_head(model_body.output, anchors, len(class_names))
    input_image_shape = K.placeholder(shape=(2, ))
    boxes, scores, classes = yolo_eval(yolo_outputs,
                                       input_image_shape,
                                       score_threshold=.3,
                                       iou_threshold=.9)

    # Run prediction on overfit image.
    sess = K.get_session()  # TODO: Remove dependence on Tensorflow session.
    out_boxes, out_scores, out_classes = sess.run(
        [boxes, scores, classes],
        feed_dict={
            model_body.input: image_data,
            input_image_shape: [image.size[1], image.size[0]],
            K.learning_phase(): 0
        })
    print('Found {} boxes for image.'.format(len(out_boxes)))
    print(out_boxes)

    # Plot image with predicted boxes.
    image_with_boxes = draw_boxes(image_data[0], out_boxes, out_classes,
                                  class_names, out_scores)
    plt.imshow(image_with_boxes, interpolation='nearest')
    plt.show()
Ejemplo n.º 10
0
def train_yolo_model(numb_image=10,
                     learning_rate=0.00005,
                     num_epochs=100,
                     minibatch_size=5,
                     print_cost=True,
                     model_path="./model_data/yolo_trained_model.h5"):
    """ Create and train yolo model
    Parameters:
    -----------
    numb_image : int
        number of images used for training
    learning_rate : float
        learning rate of the optimization
    num_epochs : int
        number of epochs of the optimization loop
    minibatch_size : int
        size of a minibatch
    print_cost : boolean
        True to print the cost every 5 epochs
    model_path : string
        location to save trained model

    Returns:
    --------
    costs : list
        list of cost values
    """
    costs = []  # To keep track of the cost

    # Load data
    image_size = (608, 608)
    dataset = load_data(numb_image, "model_data/true_boxes.h5")

    # Create model
    X = Input(batch_shape=[None, 608, 608, 3], dtype=tf.float32)
    Y1 = tf.placeholder(shape=[None, 10, 5], dtype=tf.float32)  # true boxes
    Y2 = tf.placeholder(shape=[None, 19, 19, 5, 1],
                        dtype=tf.float32)  # detector mask
    Y3 = tf.placeholder(shape=[None, 19, 19, 5, 5],
                        dtype=tf.float32)  # matching true boxes
    yolo_m = yolo_body(inputs=X,
                       num_anchors=len(anchors),
                       num_classes=len(class_names))

    args = [yolo_m.output, Y1, Y2, Y3]
    cost = yolo_loss(args, anchors, len(class_names))

    # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer that minimizes the cost.
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)

    # Initialize all the variables globally
    init = tf.global_variables_initializer()

    # Run session to train network
    with tf.Session() as sess:
        # Run initialization
        sess.run(init)

        # # Run training loop
        for epoch in range(num_epochs):
            minibatch_cost = 0.
            num_minibatches = int(
                numb_image / minibatch_size
            )  # number of minibatches of size minibatch_size in the train set
            minibatches = random_mini_batches(dataset['X'], dataset['Y'],
                                              minibatch_size)

            for minibatch in minibatches:
                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch

                # Preprocess true boxes
                detectors_mask = []
                matching_true_boxes = []
                for i in range(0, minibatch_Y.shape[0]):
                    detectors_mask_tmp, matching_true_boxes_tmp = preprocess_true_boxes(
                        minibatch_Y[i, :, :], anchors, image_size)
                    if len(detectors_mask) == 0:
                        detectors_mask = np.expand_dims(detectors_mask_tmp,
                                                        axis=0)
                    else:
                        detectors_mask = np.append(detectors_mask,
                                                   np.expand_dims(
                                                       detectors_mask_tmp,
                                                       axis=0),
                                                   axis=0)

                    if len(matching_true_boxes) == 0:
                        matching_true_boxes = np.expand_dims(
                            matching_true_boxes_tmp, axis=0)
                    else:
                        matching_true_boxes = np.append(
                            matching_true_boxes,
                            np.expand_dims(matching_true_boxes_tmp, axis=0),
                            axis=0)

                # Run the session to execute the optimizer and the cost
                _, temp_cost = sess.run(
                    [optimizer, cost],
                    feed_dict={
                        X: minibatch_X,
                        Y1: minibatch_Y,
                        Y2: detectors_mask,
                        Y3: matching_true_boxes
                    })
                minibatch_cost += temp_cost / num_minibatches

            # Print the cost every epoch
            if print_cost == True and epoch % 5 == 0:
                print("Cost after epoch %i: %f" % (epoch, minibatch_cost))
            if print_cost == True and epoch % 1 == 0:
                costs.append(minibatch_cost)

        yolo_m.save_weights(model_path)

        return costs
Ejemplo n.º 11
0
def _main(args):
    voc_path = os.path.expanduser(args.data_path)
    classes_path = os.path.expanduser(args.classes_path)
    anchors_path = os.path.expanduser(args.anchors_path)

    with open(classes_path) as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]

    if os.path.isfile(anchors_path):
        with open(anchors_path) as f:
            anchors = f.readline()
            anchors = [float(x) for x in anchors.split(',')]
            anchors = np.array(anchors).reshape(-1, 2)
    else:
        anchors = YOLO_ANCHORS

    voc = h5py.File(voc_path, 'r')
    image = PIL.Image.open(io.BytesIO(voc['train/images'][28]))
    orig_size = np.array([image.width, image.height])
    orig_size = np.expand_dims(orig_size, axis=0)

    # Image preprocessing.
    image = image.resize((416, 416), PIL.Image.BICUBIC)
    image_data = np.array(image, dtype=np.float)
    image_data /= 255.

    # Box preprocessing.
    # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max.
    boxes = voc['train/boxes'][28]
    boxes = boxes.reshape((-1, 5))
    # Get extents as y_min, x_min, y_max, x_max, class for comparision with
    # model output.
    boxes_extents = boxes[:, [2, 1, 4, 3, 0]]

    # Get box parameters as x_center, y_center, box_width, box_height, class.
    boxes_xy = 0.5 * (boxes[:, 3:5] + boxes[:, 1:3])
    boxes_wh = boxes[:, 3:5] - boxes[:, 1:3]
    boxes_xy = boxes_xy / orig_size
    boxes_wh = boxes_wh / orig_size
    boxes = np.concatenate((boxes_xy, boxes_wh, boxes[:, 0:1]), axis=1)

    # Precompute detectors_mask and matching_true_boxes for training.
    # Detectors mask is 1 for each spatial position in the final conv layer and
    # anchor that should be active for the given boxes and 0 otherwise.
    # Matching true boxes gives the regression targets for the ground truth box
    # that caused a detector to be active or 0 otherwise.
    detectors_mask_shape = (13, 13, 5, 1)
    matching_boxes_shape = (13, 13, 5, 5)
    detectors_mask, matching_true_boxes = preprocess_true_boxes(boxes, anchors,
                                                                [416, 416])

    # Create model input layers.
    image_input = Input(shape=(416, 416, 3))
    boxes_input = Input(shape=(None, 5))
    detectors_mask_input = Input(shape=detectors_mask_shape)
    matching_boxes_input = Input(shape=matching_boxes_shape)

    print('Boxes:')
    print(boxes)
    print('Box corners:')
    print(boxes_extents)
    print('Active detectors:')
    print(np.where(detectors_mask == 1)[:-1])
    print('Matching boxes for active detectors:')
    print(matching_true_boxes[np.where(detectors_mask == 1)[:-1]])

    # Create model body.
    model_body = yolo_body(image_input, len(anchors), len(class_names))
    model_body = Model(image_input, model_body.output)
    # Place model loss on CPU to reduce GPU memory usage.
    with tf.device('/cpu:0'):
        # TODO: Replace Lambda with custom Keras layer for loss.
        model_loss = Lambda(
            yolo_loss,
            output_shape=(1, ),
            name='yolo_loss',
            arguments={'anchors': anchors,
                       'num_classes': len(class_names)})([
                           model_body.output, boxes_input,
                           detectors_mask_input, matching_boxes_input
                       ])
    model = Model(
        [image_input, boxes_input, detectors_mask_input,
         matching_boxes_input], model_loss)
    model.compile(
        optimizer='adam', loss={
            'yolo_loss': lambda y_true, y_pred: y_pred
        })  # This is a hack to use the custom loss function in the last layer.

    # Add batch dimension for training.
    image_data = np.expand_dims(image_data, axis=0)
    boxes = np.expand_dims(boxes, axis=0)
    detectors_mask = np.expand_dims(detectors_mask, axis=0)
    matching_true_boxes = np.expand_dims(matching_true_boxes, axis=0)

    num_steps = 1000
    # TODO: For full training, put preprocessing inside training loop.
    # for i in range(num_steps):
    #     loss = model.train_on_batch(
    #         [image_data, boxes, detectors_mask, matching_true_boxes],
    #         np.zeros(len(image_data)))
    model.fit([image_data, boxes, detectors_mask, matching_true_boxes],
              np.zeros(len(image_data)),
              batch_size=1,
              epochs=num_steps)
    model.save_weights('overfit_weights.h5')

    # Create output variables for prediction.
    yolo_outputs = yolo_head(model_body.output, anchors, len(class_names))
    input_image_shape = K.placeholder(shape=(2, ))
    boxes, scores, classes = yolo_eval(
        yolo_outputs, input_image_shape, score_threshold=.3, iou_threshold=.9)

    # Run prediction on overfit image.
    sess = K.get_session()  # TODO: Remove dependence on Tensorflow session.
    out_boxes, out_scores, out_classes = sess.run(
        [boxes, scores, classes],
        feed_dict={
            model_body.input: image_data,
            input_image_shape: [image.size[1], image.size[0]],
            K.learning_phase(): 0
        })
    print('Found {} boxes for image.'.format(len(out_boxes)))
    print(out_boxes)

    # Plot image with predicted boxes.
    image_with_boxes = draw_boxes(image_data[0], out_boxes, out_classes,
                                  class_names, out_scores)
    plt.imshow(image_with_boxes, interpolation='nearest')
    plt.show()
Ejemplo n.º 12
0
true_boxes = np.array(
    [[0.16, 0.51355422, 0.296, 0.68975904, 14.],
     [0.428, 0.47590361, 0.276, 0.60240964, 14.],
     [0.664, 0.5813253, 0.432, 0.8373494, 14.],
     [0.863, 0.64457831, 0.274, 0.7108433, 14]])

image_size = (416, 416)
anchors = np.array([
    [0.738768, 0.874946],
    [2.42204, 2.65704],
    [4.30971, 7.04493],
    [10.246, 4.59428],
    [12.6868, 11.8741]
])
actual_dm, actual_mtb = preprocess_true_boxes(true_boxes, anchors, image_size)

def numpy_ptb():

    height, width = image_size
    num_anchors = len(anchors)
    # Downsampling factor of 5x 2-stride max_pools == 32.
    assert height % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.'
    assert width % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.'
    conv_height = height // 32
    conv_width = width // 32
    detectors_mask = np.zeros((conv_height, conv_width, num_anchors, 1))
    matching_true_boxes = np.zeros((conv_height, conv_width, num_anchors, 5))

    boxes_i = np.floor(true_boxes[:,1] * conv_height).astype('int')
    boxes_j = np.floor(true_boxes[:,0] * conv_width).astype('int')