def get_detector_mask(self, boxes, anchors): ''' Precompute detectors_mask and matching_true_boxes for training. Detectors mask is 1 for each spatial position in the final conv layer and anchor that should be active for the given boxes and 0 otherwise. Matching true boxes gives the regression targets for the ground truth box that caused a detector to be active or 0 otherwise. Copied from YAD2K retrain_yolo.py ''' detector_save_path = os.path.join(self.output_path, "KITTI-masks.npz") if os.path.exists(detector_save_path): self.__print("Loading detector masks from file...") data = np.load(detector_save_path) detectors_mask = data['detectors_mask'] matching_true_boxes = data['matching_true_boxes'] else: self.__print("Computing detector masks...") detectors_mask = [0 for i in range(len(boxes))] matching_true_boxes = [0 for i in range(len(boxes))] for i, box in enumerate(boxes): detectors_mask[i], matching_true_boxes[i] = \ preprocess_true_boxes(box, anchors, [self.image_data_size[1], self.image_data_size[0]]) detectors_mask = np.array(detectors_mask) matching_true_boxes = np.array(matching_true_boxes) np.savez(detector_save_path, detectors_mask=detectors_mask, matching_true_boxes=matching_true_boxes) return np.array(detectors_mask, dtype=np.bool), np.array(matching_true_boxes)
def get_detector_mask(boxes, anchors): detectors_mask = [0 for i in range(len(boxes))] matching_true_boxes = [0 for i in range(len(boxes))] for i, box in enumerate(boxes): box = box.reshape((-1, 5)) detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes( box, anchors, [416, 416]) return np.array(detectors_mask), np.array(matching_true_boxes)
def get_all_detector_masks(data, anchors): #data is full dict from process_MOT_dataset for video in data: video['detector_mask'] = {} for frame_id in video['frame']: detectors_mask, matching_true_box = preprocess_true_boxes( video['frame'][frame_id], anchors, [608, 608]) video['detector_mask'][frame_id] = [ detectors_mask, matching_true_box ] return data
def get_detector_mask(boxes, anchors): ''' Precompute detectors_mask and matching_true_boxes for training. Detectors mask is 1 for each spatial position in the final conv layer and anchor that should be active for the given boxes and 0 otherwise. Matching true boxes gives the regression targets for the ground truth box that caused a detector to be active or 0 otherwise. ''' detectors_mask = [0 for i in range(len(boxes))] matching_true_boxes = [0 for i in range(len(boxes))] for i, box in enumerate(boxes): detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [224, 320]) return np.array(detectors_mask), np.array(matching_true_boxes)
def get_detector_mask(boxes, anchors): ''' Precompute detectors_mask and matching_true_boxes for training. Detectors mask is 1 for each spatial position in the final conv layer and anchor that should be active for the given boxes and 0 otherwise. Matching true boxes gives the regression targets for the ground truth box that caused a detector to be active or 0 otherwise. ''' detectors_mask = [0 for i in range(len(boxes))] matching_true_boxes = [0 for i in range(len(boxes))] for i, box in enumerate(boxes): detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [416, 416]) return np.array(detectors_mask), np.array(matching_true_boxes)
def get_detector_mask(boxes, anchors): """ Precompute detectors_mask and matching_true_boxes methods for training. (Detectors mask is 1 for each spatial position in the final conv layer and anchor that should be active for the given boxes and 0 otherwise. Matching true boxes gives the regression targets for the ground truth box that caused a detector to be active or 0 otherwise). :param boxes: list of bounding box information from the annotation :param anchors: list of anchors from the annotation :return: """ detectors_mask = [0 for i in range(len(boxes))] matching_true_boxes = [0 for i in range(len(boxes))] for i, box in enumerate(boxes): detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes( box, anchors, [416, 416]) return np.array(detectors_mask), np.array(matching_true_boxes)
def get_detector_mask(boxes, anchors): ''' Precompute detectors_mask and matching_true_boxes for training. Detectors mask is 1 for each spatial position in the final conv layer and anchor that should be active for the given boxes and 0 otherwise. Matching true boxes gives the regression targets for the ground truth box that caused a detector to be active or 0 otherwise. Remember: matching true boxes is Corresponding ground truth boxes for positive detector positions with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. ''' detectors_mask = [0 for i in range(len(boxes))] matching_true_boxes = [0 for i in range(len(boxes))] for i, box in enumerate( boxes): #enumerate returns index and its corresponding value detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes( box, anchors, [416, 416]) return np.array(detectors_mask), np.array(matching_true_boxes)
def _main(): voc_path = os.path.expanduser( '/Users/ss/Data/VOCdevkit/pascal_voc_07_07.hdf5') classes_path = os.path.expanduser('model_data/pascal_classes.txt') with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] voc = h5py.File(voc_path, 'r') image = PIL.Image.open(io.BytesIO(voc['train/images'][28])) orig_size = np.array([image.width, image.height]) orig_size = np.expand_dims(orig_size, axis=0) # Image preprocessing. image = image.resize((416, 416), PIL.Image.BICUBIC) image_data = np.array(image, dtype=np.float) image_data /= 255. # Box preprocessing. # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max. boxes = voc['train/boxes'][28] boxes = boxes.reshape((-1, 5)) # Get extents as y_min, x_min, y_max, x_max, class for comparision with # model output. boxes_extents = boxes[:, [2, 1, 4, 3, 0]] # Get box parameters as x_center, y_center, box_width, box_height, class. boxes_xy = 0.5 * (boxes[:, 3:5] + boxes[:, 1:3]) boxes_wh = boxes[:, 3:5] - boxes[:, 1:3] boxes_xy = boxes_xy / orig_size boxes_wh = boxes_wh / orig_size boxes = np.concatenate((boxes_xy, boxes_wh, boxes[:, 0:1]), axis=1) # Precompute detectors_mask and matching_true_boxes for training. # Detectors mask is 1 for each spatial position in the final conv layer and # anchor that should be active for the given boxes and 0 otherwise. # Matching true boxes gives the regression targets for the ground truth box # that caused a detector to be active or 0 otherwise. anchors = COCO_ANCHORS detectors_mask_shape = (13, 13, 5, 1) matching_boxes_shape = (13, 13, 5, 5) detectors_mask, matching_true_boxes = preprocess_true_boxes( boxes, anchors, [416, 416]) # Create model input layers. image_input = Input(shape=(416, 416, 3)) boxes_input = Input(shape=(None, 5)) detectors_mask_input = Input(shape=detectors_mask_shape) matching_boxes_input = Input(shape=matching_boxes_shape) print(boxes) print(boxes_extents) print(np.where(detectors_mask == 1)[:-1]) print(matching_true_boxes[np.where(detectors_mask == 1)[:-1]]) # Create model body. model_body = yolo_body(image_input, len(anchors), len(class_names)) model_body = Model(image_input, model_body.output) # Place model loss on CPU to reduce GPU memory usage. with tf.device('/cpu:0'): # TODO: Replace Lambda with custom Keras layer for loss. model_loss = Lambda(yolo_loss, output_shape=(1, ), name='yolo_loss', arguments={ 'anchors': anchors, 'num_classes': len(class_names) })([ model_body.output, boxes_input, detectors_mask_input, matching_boxes_input ]) model = Model( [image_input, boxes_input, detectors_mask_input, matching_boxes_input], model_loss) model.compile( optimizer='adam', loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # This is a hack to use the custom loss function in the last layer. # Add batch dimension for training. image_data = np.expand_dims(image_data, axis=0) boxes = np.expand_dims(boxes, axis=0) detectors_mask = np.expand_dims(detectors_mask, axis=0) matching_true_boxes = np.expand_dims(matching_true_boxes, axis=0) num_steps = 1000 # TODO: For full training, put preprocessing inside training loop. # for i in range(num_steps): # loss = model.train_on_batch( # [image_data, boxes, detectors_mask, matching_true_boxes], # np.zeros(len(image_data))) model.fit([image_data, boxes, detectors_mask, matching_true_boxes], np.zeros(len(image_data)), batch_size=1, epochs=num_steps) model.save_weights('overfit_weights.h5') # Create output variables for prediction. yolo_outputs = yolo_head(model_body.output, anchors, len(class_names)) input_image_shape = K.placeholder(shape=(2, )) boxes, scores, classes = yolo_eval(yolo_outputs, input_image_shape, score_threshold=.3, iou_threshold=.9) # Run prediction on overfit image. sess = K.get_session() # TODO: Remove dependence on Tensorflow session. out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ model_body.input: image_data, input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) print('Found {} boxes for image.'.format(len(out_boxes))) print(out_boxes) # Plot image with predicted boxes. image_with_boxes = draw_boxes(image_data[0], out_boxes, out_classes, class_names, out_scores) plt.imshow(image_with_boxes, interpolation='nearest') plt.show()
def _main(args): #訓練データセットに対する情報の読み込みfrom voc_to_hdf5 voc_path = os.path.expanduser(args.data_path) #ラベル情報.txtへのpath classes_path = os.path.expanduser(args.classes_path) #アスペクト比の格納? anchors_path = os.path.expanduser(args.anchors_path) #クラス名のリストを作成 with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] if os.path.isfile(anchors_path): with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] anchors = np.array(anchors).reshape(-1, 2) else: anchors = YOLO_ANCHORS voc = h5py.File(voc_path, 'r') image = PIL.Image.open(io.BytesIO(voc['test/images'][0])) orig_size = np.array([image.width, image.height]) orig_size = np.expand_dims(orig_size, axis=0) # Image preprocessing. image = image.resize((416, 416), PIL.Image.BICUBIC) image_data = np.array(image, dtype=np.float) image_data /= 255. # Box preprocessing. # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max. boxes = voc['test/boxes'][0] boxes = boxes.reshape((-1, 5)) # Get extents as y_min, x_min, y_max, x_max, class for comparision with # model output. boxes_extents = boxes[:, [2, 1, 4, 3, 0]] # Get box parameters as x_center, y_center, box_width, box_height, class. boxes_xy = 0.5 * (boxes[:, 3:5] + boxes[:, 1:3]) boxes_wh = boxes[:, 3:5] - boxes[:, 1:3] boxes_xy = boxes_xy / orig_size boxes_wh = boxes_wh / orig_size #annotation部分のxy座標、幅 boxes = np.concatenate((boxes_xy, boxes_wh, boxes[:, 0:1]), axis=1) # Precompute detectors_mask and matching_true_boxes for training. # Detectors mask is 1 for each spatial position in the final conv layer and # anchor that should be active for the given boxes and 0 otherwise. # Matching true boxes gives the regression targets for the ground truth box # that caused a detector to be active or 0 otherwise. detectors_mask_shape = (13, 13, 5, 1) matching_boxes_shape = (13, 13, 5, 5) detectors_mask, matching_true_boxes = preprocess_true_boxes( boxes, anchors, [416, 416]) # Create model input layers. image_input = Input(shape=(416, 416, 3)) boxes_input = Input(shape=(None, 5)) detectors_mask_input = Input(shape=detectors_mask_shape) matching_boxes_input = Input(shape=matching_boxes_shape) print('Boxes:') print(boxes) print('Box corners:') print(boxes_extents) print('Active detectors:') print(np.where(detectors_mask == 1)[:-1]) print('Matching boxes for active detectors:') print(matching_true_boxes[np.where(detectors_mask == 1)[:-1]]) # Create model body. model_body = yolo_body(image_input, len(anchors), len(class_names)) model_body = Model(image_input, model_body.output) # Place model loss on CPU to reduce GPU memory usage. with tf.device('/cpu:0'): # TODO: Replace Lambda with custom Keras layer for loss. model_loss = Lambda(yolo_loss, output_shape=(1, ), name='yolo_loss', arguments={ 'anchors': anchors, 'num_classes': len(class_names) })([ model_body.output, boxes_input, detectors_mask_input, matching_boxes_input ]) model = Model( [image_input, boxes_input, detectors_mask_input, matching_boxes_input], model_loss) model.compile( optimizer='adam', loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # This is a hack to use the custom loss function in the last layer. # Add batch dimension for training. image_data = np.expand_dims(image_data, axis=0) boxes = np.expand_dims(boxes, axis=0) detectors_mask = np.expand_dims(detectors_mask, axis=0) matching_true_boxes = np.expand_dims(matching_true_boxes, axis=0) num_steps = 50 #checkpoint = ModelCheckpoint('model1.h5', monitor='val_loss', verbose=1, #save_best_only=True, save_weights_only=False, mode='auto', period=1) # TODO: For full training, put preprocessing inside training loop. # for i in range(num_steps): # loss = model.train_on_batch( # [image_data, boxes, detectors_mask, matching_true_boxes], # np.zeros(len(image_data))) model.fit( [image_data, boxes, detectors_mask, matching_true_boxes], np.zeros(len(image_data)), batch_size=1, epochs=num_steps, #callbacks=[checkpoint] ) model.save('model.h5') model.save_weights('overfit_weights.h5') #model_json_str = model.to_json() #open('mlp_model.json', 'w').write(model_json_str) #model.save_weights('mlp_weights.h5'); # Create output variables for prediction. yolo_outputs = yolo_head(model_body.output, anchors, len(class_names)) input_image_shape = K.placeholder(shape=(2, )) boxes, scores, classes = yolo_eval(yolo_outputs, input_image_shape, score_threshold=.3, iou_threshold=.9) # Run prediction on overfit image. sess = K.get_session() # TODO: Remove dependence on Tensorflow session. out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ model_body.input: image_data, input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) print('Found {} boxes for image.'.format(len(out_boxes))) print(out_boxes) # Plot image with predicted boxes. image_with_boxes = draw_boxes(image_data[0], out_boxes, out_classes, class_names, out_scores) plt.imshow(image_with_boxes, interpolation='nearest') plt.show()
def train_yolo_model(numb_image=10, learning_rate=0.00005, num_epochs=100, minibatch_size=5, print_cost=True, model_path="./model_data/yolo_trained_model.h5"): """ Create and train yolo model Parameters: ----------- numb_image : int number of images used for training learning_rate : float learning rate of the optimization num_epochs : int number of epochs of the optimization loop minibatch_size : int size of a minibatch print_cost : boolean True to print the cost every 5 epochs model_path : string location to save trained model Returns: -------- costs : list list of cost values """ costs = [] # To keep track of the cost # Load data image_size = (608, 608) dataset = load_data(numb_image, "model_data/true_boxes.h5") # Create model X = Input(batch_shape=[None, 608, 608, 3], dtype=tf.float32) Y1 = tf.placeholder(shape=[None, 10, 5], dtype=tf.float32) # true boxes Y2 = tf.placeholder(shape=[None, 19, 19, 5, 1], dtype=tf.float32) # detector mask Y3 = tf.placeholder(shape=[None, 19, 19, 5, 5], dtype=tf.float32) # matching true boxes yolo_m = yolo_body(inputs=X, num_anchors=len(anchors), num_classes=len(class_names)) args = [yolo_m.output, Y1, Y2, Y3] cost = yolo_loss(args, anchors, len(class_names)) # Backpropagation: Define the tensorflow optimizer. Use an AdamOptimizer that minimizes the cost. optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) # Initialize all the variables globally init = tf.global_variables_initializer() # Run session to train network with tf.Session() as sess: # Run initialization sess.run(init) # # Run training loop for epoch in range(num_epochs): minibatch_cost = 0. num_minibatches = int( numb_image / minibatch_size ) # number of minibatches of size minibatch_size in the train set minibatches = random_mini_batches(dataset['X'], dataset['Y'], minibatch_size) for minibatch in minibatches: # Select a minibatch (minibatch_X, minibatch_Y) = minibatch # Preprocess true boxes detectors_mask = [] matching_true_boxes = [] for i in range(0, minibatch_Y.shape[0]): detectors_mask_tmp, matching_true_boxes_tmp = preprocess_true_boxes( minibatch_Y[i, :, :], anchors, image_size) if len(detectors_mask) == 0: detectors_mask = np.expand_dims(detectors_mask_tmp, axis=0) else: detectors_mask = np.append(detectors_mask, np.expand_dims( detectors_mask_tmp, axis=0), axis=0) if len(matching_true_boxes) == 0: matching_true_boxes = np.expand_dims( matching_true_boxes_tmp, axis=0) else: matching_true_boxes = np.append( matching_true_boxes, np.expand_dims(matching_true_boxes_tmp, axis=0), axis=0) # Run the session to execute the optimizer and the cost _, temp_cost = sess.run( [optimizer, cost], feed_dict={ X: minibatch_X, Y1: minibatch_Y, Y2: detectors_mask, Y3: matching_true_boxes }) minibatch_cost += temp_cost / num_minibatches # Print the cost every epoch if print_cost == True and epoch % 5 == 0: print("Cost after epoch %i: %f" % (epoch, minibatch_cost)) if print_cost == True and epoch % 1 == 0: costs.append(minibatch_cost) yolo_m.save_weights(model_path) return costs
def _main(args): voc_path = os.path.expanduser(args.data_path) classes_path = os.path.expanduser(args.classes_path) anchors_path = os.path.expanduser(args.anchors_path) with open(classes_path) as f: class_names = f.readlines() class_names = [c.strip() for c in class_names] if os.path.isfile(anchors_path): with open(anchors_path) as f: anchors = f.readline() anchors = [float(x) for x in anchors.split(',')] anchors = np.array(anchors).reshape(-1, 2) else: anchors = YOLO_ANCHORS voc = h5py.File(voc_path, 'r') image = PIL.Image.open(io.BytesIO(voc['train/images'][28])) orig_size = np.array([image.width, image.height]) orig_size = np.expand_dims(orig_size, axis=0) # Image preprocessing. image = image.resize((416, 416), PIL.Image.BICUBIC) image_data = np.array(image, dtype=np.float) image_data /= 255. # Box preprocessing. # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max. boxes = voc['train/boxes'][28] boxes = boxes.reshape((-1, 5)) # Get extents as y_min, x_min, y_max, x_max, class for comparision with # model output. boxes_extents = boxes[:, [2, 1, 4, 3, 0]] # Get box parameters as x_center, y_center, box_width, box_height, class. boxes_xy = 0.5 * (boxes[:, 3:5] + boxes[:, 1:3]) boxes_wh = boxes[:, 3:5] - boxes[:, 1:3] boxes_xy = boxes_xy / orig_size boxes_wh = boxes_wh / orig_size boxes = np.concatenate((boxes_xy, boxes_wh, boxes[:, 0:1]), axis=1) # Precompute detectors_mask and matching_true_boxes for training. # Detectors mask is 1 for each spatial position in the final conv layer and # anchor that should be active for the given boxes and 0 otherwise. # Matching true boxes gives the regression targets for the ground truth box # that caused a detector to be active or 0 otherwise. detectors_mask_shape = (13, 13, 5, 1) matching_boxes_shape = (13, 13, 5, 5) detectors_mask, matching_true_boxes = preprocess_true_boxes(boxes, anchors, [416, 416]) # Create model input layers. image_input = Input(shape=(416, 416, 3)) boxes_input = Input(shape=(None, 5)) detectors_mask_input = Input(shape=detectors_mask_shape) matching_boxes_input = Input(shape=matching_boxes_shape) print('Boxes:') print(boxes) print('Box corners:') print(boxes_extents) print('Active detectors:') print(np.where(detectors_mask == 1)[:-1]) print('Matching boxes for active detectors:') print(matching_true_boxes[np.where(detectors_mask == 1)[:-1]]) # Create model body. model_body = yolo_body(image_input, len(anchors), len(class_names)) model_body = Model(image_input, model_body.output) # Place model loss on CPU to reduce GPU memory usage. with tf.device('/cpu:0'): # TODO: Replace Lambda with custom Keras layer for loss. model_loss = Lambda( yolo_loss, output_shape=(1, ), name='yolo_loss', arguments={'anchors': anchors, 'num_classes': len(class_names)})([ model_body.output, boxes_input, detectors_mask_input, matching_boxes_input ]) model = Model( [image_input, boxes_input, detectors_mask_input, matching_boxes_input], model_loss) model.compile( optimizer='adam', loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # This is a hack to use the custom loss function in the last layer. # Add batch dimension for training. image_data = np.expand_dims(image_data, axis=0) boxes = np.expand_dims(boxes, axis=0) detectors_mask = np.expand_dims(detectors_mask, axis=0) matching_true_boxes = np.expand_dims(matching_true_boxes, axis=0) num_steps = 1000 # TODO: For full training, put preprocessing inside training loop. # for i in range(num_steps): # loss = model.train_on_batch( # [image_data, boxes, detectors_mask, matching_true_boxes], # np.zeros(len(image_data))) model.fit([image_data, boxes, detectors_mask, matching_true_boxes], np.zeros(len(image_data)), batch_size=1, epochs=num_steps) model.save_weights('overfit_weights.h5') # Create output variables for prediction. yolo_outputs = yolo_head(model_body.output, anchors, len(class_names)) input_image_shape = K.placeholder(shape=(2, )) boxes, scores, classes = yolo_eval( yolo_outputs, input_image_shape, score_threshold=.3, iou_threshold=.9) # Run prediction on overfit image. sess = K.get_session() # TODO: Remove dependence on Tensorflow session. out_boxes, out_scores, out_classes = sess.run( [boxes, scores, classes], feed_dict={ model_body.input: image_data, input_image_shape: [image.size[1], image.size[0]], K.learning_phase(): 0 }) print('Found {} boxes for image.'.format(len(out_boxes))) print(out_boxes) # Plot image with predicted boxes. image_with_boxes = draw_boxes(image_data[0], out_boxes, out_classes, class_names, out_scores) plt.imshow(image_with_boxes, interpolation='nearest') plt.show()
true_boxes = np.array( [[0.16, 0.51355422, 0.296, 0.68975904, 14.], [0.428, 0.47590361, 0.276, 0.60240964, 14.], [0.664, 0.5813253, 0.432, 0.8373494, 14.], [0.863, 0.64457831, 0.274, 0.7108433, 14]]) image_size = (416, 416) anchors = np.array([ [0.738768, 0.874946], [2.42204, 2.65704], [4.30971, 7.04493], [10.246, 4.59428], [12.6868, 11.8741] ]) actual_dm, actual_mtb = preprocess_true_boxes(true_boxes, anchors, image_size) def numpy_ptb(): height, width = image_size num_anchors = len(anchors) # Downsampling factor of 5x 2-stride max_pools == 32. assert height % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.' assert width % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.' conv_height = height // 32 conv_width = width // 32 detectors_mask = np.zeros((conv_height, conv_width, num_anchors, 1)) matching_true_boxes = np.zeros((conv_height, conv_width, num_anchors, 5)) boxes_i = np.floor(true_boxes[:,1] * conv_height).astype('int') boxes_j = np.floor(true_boxes[:,0] * conv_width).astype('int')