def get_model(weights_path): ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) K.clear_session() model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # 2: Load some weights into the model. model.load_weights(weights_path, by_name=True) sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) model.compile(optimizer=sgd, loss=ssd_loss.compute_loss) return model
def create_model(model_type='ssd300', dataset='voc2007', dtype='float32'): if model_type == 'ssd300': model = ssd_300(image_size=(300, 300, 3), n_classes=20 if dataset == 'voc2007' else 80, mode='inference', l2_regularization=0.0005, scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] if dataset == 'voc2007' else [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05], aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.01, iou_threshold=0.45, top_k=200, nms_max_output_size=400, dtype=dtype) if dataset == 'voc2007':
def build_model(args: argparse.Namespace, weights_path: str) -> Model: K.clear_session() model = ssd_300(image_size=(args.img_height, args.img_width, args.img_channels), n_classes=args.n_classes, mode='training', l2_regularization=0.0005, scales=args.scales, aspect_ratios_per_layer=args.aspect_ratios, two_boxes_for_ar1=args.two_boxes_for_ar1, steps=args.steps, offsets=args.offsets, clip_boxes=args.clip_boxes, variances=args.variances, normalize_coords=args.normalize_coords, subtract_mean=args.mean_color, swap_channels=args.swap_channels) model.load_weights(weights_path, by_name=True) sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=sgd, loss=ssd_loss.compute_loss) return model
def get_model(self, mode='inference', weights_path='', n_classes='', id2digit=''): # # n_classes, id2digit: for inference config = self.config if n_classes: # inference setting self.n_classes = n_classes if id2digit: self.id2digit = id2digit self.model = ssd_300( image_size=(config.img_height, config.img_width, config.img_channels), n_classes=self.n_classes, mode=mode, l2_regularization=0.0005, scales=config.scales, aspect_ratios_per_layer=config.aspect_ratios, two_boxes_for_ar1=config.two_boxes_for_ar1, steps=config.steps, offsets=config.offsets, clip_boxes=config.clip_boxes, variances=config.variances, normalize_coords=config.normalize_coords, subtract_mean=config.subtract_mean, # divide_by_stddev=None, # swap_channels=config.swap_channels, confidence_thresh=0.5, # iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False) if weights_path: print(f'Loading weights from {weights_path}') self.model.load_weights(weights_path, by_name=True) self.weights_path = weights_path #adam = Adam(lr=0.005, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) #sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0) self.model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
def __init__(self, confidence_threshold=0.5): self.confidence_th = confidence_threshold # 0: Set the image size. img_height = 300 img_width = 300 # 1: Build the Keras model self.loaded_model = ssd_300( image_size=(img_height, img_width, 3), n_classes=20, mode='inference', l2_regularization=0.0005, scales=[ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400) # 2: Load the trained weights into the model. weights_path = 'models/VGG_VOC0712_SSD_300x300_iter_240000.h5' self.loaded_model.load_weights(weights_path, by_name=True) # 3: Compile the model so that Keras won't complain the next time you load it. adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) self.loaded_model.compile(optimizer=adam, loss=ssd_loss.compute_loss) # 4: make prediction (graph) self.loaded_model._make_predict_function()
def build_model_300(self): # 1: Build the Keras model K.clear_session() # Clear previous models from memory. self.model = ssd_300( image_size=(self.img_height, self.img_width, 3), n_classes=20, mode='inference', l2_regularization=0.0005, scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400) # 2: Load the trained weights into the model. # TODO: Set the path of the trained weights. weights_path = self.weights_path self.model.load_weights(weights_path, by_name=True) # 3: Compile the model so that Keras won't complain the next time you load it. adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) self.model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
def load_ssd300(self): print('loading SSD 300 ... ') img_shape = self.conf['IMG_SHAPE'] classes = self.conf['CLASSES'] swap_channels = [ 2, 1, 0 ] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = len(classes) scales_pascal = [ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets scales = scales_pascal aspect_ratios = [ [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5] ] # The anchor box aspect ratios used in the original SSD300; the order matters steps = [ 8, 16, 32, 64, 100, 300 ] # The space between two adjacent anchor box center points for each predictor layer. two_boxes_for_ar1 = True mean_color = [123, 117, 104] #TODO : add this as a parameter offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] clip_boxes = False variances = [0.1, 0.1, 0.2, 0.2] normalize_coords = True batch_size = self.conf['BATCH_SIZE'] model = ssd_300(image_size=tuple(img_shape), n_classes=20, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) self.load_weights(model) return model
def init_model(weights_path='./ssdweights/rovio_v2.h5'): img_height = 300 img_width = 300 dirname = os.path.dirname(os.path.abspath(__file__)) assert os.path.exists(weights_path), '%s not found...' % dirname K.clear_session() # to clear all memory in the RAM model = ssd_300(image_size=(img_height, img_width, 3), n_classes=2, mode='inference_fast', l2_regularization=0.0005, scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], limit_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], coords='centroids', normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=True, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400) model.load_weights(weights_path, by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04) ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) return model, ['background', 'rovio', 'rovio']
def __init__(self,required_class=[2,6,7,14,15],weights_path='./VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5',img_height = 300,img_width = 300): self.img_height,self.img_width=img_height,img_width K.clear_session() # Clear previous models from memory. self.model = ssd_300(image_size=(self.img_height, self.img_width, 3), n_classes=20, mode='inference', l2_regularization=0.0005, scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400) self.classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] self.required_class=required_class self.model.load_weights(weights_path, by_name=True)
def create_network(): # 1: Build the Keras model. K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # 2: Load some weights into the model. # TODO: Set the path to the weights you want to load. weights_path = 'D:/Develop/models/VOC0712/SSD_300x300/VGG_VOC0712_SSD_300x300_iter_120000.h5' model.load_weights(weights_path, by_name=True) freeze = [ 'input_1', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'pool3' ] #, # 'conv4_1', 'conv4_2', 'conv4_3', 'pool4'] for L in model.layers: if L.name in freeze: L.trainable = False return model
def perimeter_detection(weights_path, video_path, result_path, threshold, perimeter_a, perimeter_b): img_height = 300 img_width = 300 K.clear_session() # Clear previous models from memory. model = ssd_300( image_size=(img_height, img_width, 3), n_classes=20, mode='inference', l2_regularization=0.0005, scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.1, iou_threshold=0.45, top_k=200, nms_max_output_size=400) model.load_weights(weights_path, by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) original_images = [] process_images = [] cap = cv2.VideoCapture(video_path) num = 0 while (cap.isOpened()): ret, frame = cap.read() if ret == True: transposed_frame = cv2.transpose(frame) transposed_frame = cv2.flip(transposed_frame, 1) original_images.append(transposed_frame) subtracted_image = cv2.subtract(transposed_frame, original_images[0]) subtracted_image = subtracted_image[600:1000, 0:720] #subtracted_image = cv2.bitwise_not(subtracted_image) subtracted_image = enhance_image(subtracted_image) cv2.imwrite( 'perimeter_detection/sub_images/sub_' + str(num) + '.jpg', subtracted_image) resize_image = cv2.resize(subtracted_image, (img_height, img_width)) process_images.append(resize_image) num += 1 k = cv2.waitKey(20) if k & 0xff == ord('q'): break else: break print(len(original_images)) process_images = np.array(process_images) cap.release() # start_time = time.time() y_pred = model.predict(process_images, batch_size=8) # end_time = time.time() # print(end_time - start_time) confidence_threshold = 0.1 y_pred_thresh = [ y_pred[k][y_pred[k, :, 1] > confidence_threshold] for k in range(y_pred.shape[0]) ] np.set_printoptions(precision=2, suppress=True, linewidth=90) print(' class conf xmin ymin xmax ymax') fourcc = cv2.VideoWriter_fourcc(*'MJPG') result_video = cv2.VideoWriter( 'result.avi', fourcc, 25.0, (original_images[0].shape[0], original_images[0].shape[1])) for k in range(len(y_pred_thresh)): print(k) print(y_pred_thresh[k]) #colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() classes = [ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] #plt.figure(figsize=(12, 8)) #plt.imshow(original_images[k]) #plt.xticks([]) #plt.yticks([]) #current_axis = plt.gca() for box in y_pred_thresh[k]: if box[0] != 15: continue # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions. #xmin = box[2] * original_images[k].shape[1] / img_width #ymin = box[3] * original_images[k].shape[0] / img_height #xmax = box[4] * original_images[k].shape[1] / img_width #ymax = box[5] * original_images[k].shape[0] / img_height xmin = box[2] * 720 / 300 ymin = box[3] * 400 / 300 + 600 xmax = box[4] * 720 / 300 ymax = box[5] * 400 / 300 + 600 if xmin < 400: continue #color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) #current_axis.add_patch( # plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) #current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0}) cv2.rectangle(original_images[k], (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 2) #plt.savefig(result_path + '/detection_' + str(k) + '.jpg', format='jpg') cv2.imwrite(result_path + '/detection_' + str(k) + '.jpg', original_images[k]) result_image = original_images[k] transposed_image = cv2.transpose(result_image) transposed_image = cv2.flip(transposed_image, 0) result_video.write(transposed_image) plt.close('all') result_video.release() cv2.destroyAllWindows() '''
def __init__(self): rospy.init_node('model_tester_keras') self.t_detect = 1.5 # minimum time between inferences self.t_last_detect = rospy.Time.now() self.img_pub = rospy.Publisher('image_detect', Image, queue_size=1) img_height = 300 # Height of the input images img_width = 300 # Width of the input images img_channels = 3 # Number of color channels of the input images subtract_mean = [123, 117, 104 ] # The per-channel mean of the images in the dataset swap_channels = [ 2, 1, 0 ] # The color channel order in the original SSD is BGR, so we should set this to `True`, but weirdly the results are better without swapping. n_classes = 8 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales = [ 0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05 ] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets. # scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets. aspect_ratios = [ [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5] ] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [ 8, 16, 32, 64, 100, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries variances = [ 0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are scaled as in the original implementation normalize_coords = True keras.backend.clear_session() self.model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='inference', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=subtract_mean, divide_by_stddev=None, swap_channels=swap_channels, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False) self.model.load_weights(os.path.join(module_path, 'ssdx_wt.h5'), by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) self.model.compile(optimizer=adam, loss=ssd_loss.compute_loss) # keras voodoo global graph graph = tf.get_default_graph() self.model._make_predict_function() img_sub = rospy.Subscriber('image_color', Image, self.img_cb, queue_size=1) self.bridge = CvBridge() rospy.loginfo("READY") rospy.spin()
def detect_from_video(config: Dict): """Inference on a video with output a video showing all prediction Parameters ---------- config : Dict Config yaml/json containing all parameter """ video = config['inference']['video_input']['video_input_path'] vp = VideoProcessing(video=video) vp.generate_frames(export_path=config['inference']['video_input']['video_to_frames_export_path']) if config['inference']['video_input']['video_to_frames_export_path'] == config['inference']['predicted_frames_export_path']: print("[Warning]... You have given Video to frame path same as prediction output path /nPredicted output will overwrite video to frame") img_height = config['inference']['img_height'] img_width = config['inference']['img_width'] model = ssd_300(image_size=(img_height, img_width, 3), n_classes=config['inference']['n_classes'], mode='inference', l2_regularization=0.0005, scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400) # Load the trained weights into the model. weights_path = config['inference']['weights_path'] model.load_weights(weights_path, by_name=True) # Working with image all_images = glob.glob(f"{config['inference']['video_input']['video_to_frames_export_path']}/*/*") # Setting Up Prediction Threshold confidence_threshold = config['inference']['confidence_threshold'] # Setting Up Classes (Note Should be in same order as in training) classes = config['inference']['classes'] vp.existsFolder(f"{config['inference']['predicted_frames_export_path']}/{video.split('.')[0]}") # Working with image for current_img in tqdm(all_images): current_img_name = current_img.split('/')[-1] orig_image = cv2.imread(current_img) input_images = [] # Store resized versions of the images here img = image.load_img(current_img, target_size=(img_height, img_width)) img = image.img_to_array(img) input_images.append(img) input_images = np.array(input_images) # Prediction y_pred = model.predict(input_images) # Using threshold y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])] # Drawing Boxes for box in y_pred_thresh[0]: xmin = box[2] * orig_image.shape[1] / img_width ymin = box[3] * orig_image.shape[0] / img_height xmax = box[4] * orig_image.shape[1] / img_width ymax = box[5] * orig_image.shape[0] / img_height label = f"{classes[int(box[0])]}: {box[1]:.2f}" cv2.rectangle(orig_image, (int(xmin), int(ymin)), (int(xmax),int(ymax)), (255, 0, 0), 2) cv2.putText(orig_image, label, (int(xmin), int(ymin)), cv2.FONT_HERSHEY_SIMPLEX, 2, (255, 255, 255), 2, cv2.LINE_AA) cv2.imwrite(f"{config['inference']['predicted_frames_export_path']}/{video.split('.')[0]}/{current_img_name}", orig_image) # Creating video vp.generate_video(import_path=config['inference']['predicted_frames_export_path'], export_path=config['inference']['video_input']['video_output_path'])
def perimeter_detection(weights_path, image_path, result_path, threshold, perimeter_a, perimeter_b): img_height = 300 img_width = 300 K.clear_session() # Clear previous models from memory. model = ssd_300( image_size=(img_height, img_width, 3), n_classes=20, mode='inference', l2_regularization=0.0005, scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400) model.load_weights(weights_path, by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) original_images = [] process_images = [] file_names = [] for root, dirs, files in os.walk(image_path): for file in files: file_names.append(file) img_path = image_path + '/' + file original_images.append(imread(img_path)) resize_image = image.load_img(img_path, target_size=(img_height, img_width)) resize_image = image.img_to_array(resize_image) process_images.append(resize_image) process_images = np.array(process_images) #start_time = time.time() y_pred = model.predict(process_images, batch_size=8) #end_time = time.time() #print(end_time - start_time) confidence_threshold = 0.5 y_pred_thresh = [ y_pred[k][y_pred[k, :, 1] > confidence_threshold] for k in range(y_pred.shape[0]) ] np.set_printoptions(precision=2, suppress=True, linewidth=90) print(' class conf xmin ymin xmax ymax') ''' for k in range(len(y_pred_thresh)): print(file_names[k]) print(y_pred_thresh[k]) colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist() classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] plt.figure(figsize=(12, 8)) plt.imshow(original_images[k]) plt.xticks([]) plt.yticks([]) current_axis = plt.gca() for box in y_pred_thresh[k]: # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions. xmin = box[2] * original_images[k].shape[1] / img_width ymin = box[3] * original_images[k].shape[0] / img_height xmax = box[4] * original_images[k].shape[1] / img_width ymax = box[5] * original_images[k].shape[0] / img_height color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0}) plt.savefig(result_path + '/detection_' + file_names[k], format='jpg') plt.close('all') ''' #start_time = time.time() vector_a = np.array( [perimeter_a[0] - perimeter_b[0], perimeter_a[1] - perimeter_b[1]]) distance_a = np.linalg.norm(vector_a) for k in range(len(y_pred_thresh)): print(file_names[k]) print(y_pred_thresh[k]) plt.figure(figsize=(12, 8)) plt.imshow(original_images[k]) plt.xticks([]) plt.yticks([]) current_axis = plt.gca() flag = 0 for box in y_pred_thresh[k]: if box[0] != 15: continue # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions. xmin = box[2] * original_images[k].shape[1] / img_width ymin = box[3] * original_images[k].shape[0] / img_height xmax = box[4] * original_images[k].shape[1] / img_width ymax = box[5] * original_images[k].shape[0] / img_height vector_b = np.array([xmin - perimeter_a[0], ymin - perimeter_a[1]]) vector_cross = np.cross(vector_a, vector_b) distance = np.linalg.norm(vector_cross / distance_a) if vector_cross >= 0 or distance < threshold: flag = 1 current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='#FF0000', fill=False, linewidth=2)) continue vector_b = np.array([xmin - perimeter_a[0], ymax - perimeter_a[1]]) vector_cross = np.cross(vector_a, vector_b) distance = np.linalg.norm(vector_cross / distance_a) if vector_cross >= 0 or distance < threshold: flag = 1 current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='#FF0000', fill=False, linewidth=2)) continue vector_b = np.array([xmax - perimeter_a[0], ymin - perimeter_a[1]]) vector_cross = np.cross(vector_a, vector_b) distance = np.linalg.norm(vector_cross / distance_a) if vector_cross >= 0 or distance < threshold: flag = 1 current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='#FF0000', fill=False, linewidth=2)) continue vector_b = np.array([xmax - perimeter_a[0], ymax - perimeter_a[1]]) vector_cross = np.cross(vector_a, vector_b) distance = np.linalg.norm(vector_cross / distance_a) if vector_cross >= 0 or distance < threshold: flag = 1 current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='#FF0000', fill=False, linewidth=2)) continue current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='#00FF00', fill=False, linewidth=2)) print(flag) line = Line2D([perimeter_a[0], perimeter_b[0]], [perimeter_a[1], perimeter_b[1]], color='#000000') current_axis.add_line(line) #plt.plot([perimeter_a[0], perimeter_b[0]], [perimeter_a[1], perimeter_b[1]], 'k') plt.savefig(result_path + '/perimeter_' + file_names[k], format='jpg') plt.close('all')
def _main_(args): print('Hello World! This is {:s}'.format(args.desc)) # config_path = args.conf # with open(config_path) as config_buffer: # config = json.loads(config_buffer.read()) ############################################################# # Set model parameters ############################################################# img_height = 300 # Height of the model input images img_width = 300 # Width of the model input images img_channels = 3 # Number of color channels of the model input images mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets scales = scales_pascal aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer. offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True ############################################################# # Create the model ############################################################# # 1: Build the Keras model. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # 2: Load some weights into the model. # 3: Instantiate an optimizer and the SSD loss function and compile the model. adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) ############################################################# # Prepare the data ############################################################# # 1: Instantiate two `DataGenerator` objects: One for training, one for validation. train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. VOC_2007_images_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/JPEGImages' VOC_2007_annotations_dir = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/Annotations' VOC_2007_train_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/train.txt' VOC_2007_val_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/val.txt' # VOC_2007_trainval_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtrainval_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/trainval.txt' # VOC_2007_test_image_set_filename = '/home/minhnc-lab/WORKSPACES/AI/data/VOC/VOCtest_06-Nov-2007/VOCdevkit/VOC2007/ImageSets/Main/test.txt' classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] train_dataset.parse_xml(images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_train_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml(images_dirs=[VOC_2007_images_dir], image_set_filenames=[VOC_2007_val_image_set_filename], annotations_dirs=[VOC_2007_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False) train_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07+12_trainval.h5', resize=False, variable_image_size=True, verbose=True) val_dataset.create_hdf5_dataset(file_path='dataset_pascal_voc_07_test.h5', resize=False, variable_image_size=True, verbose=True) # 3: Set the batch size. batch_size = 8 # Change the batch size if you like, or if you run into GPU memory issues. # 4: Set the image transformations for pre-processing and data augmentation options. ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. predictor_sizes = [model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3]] ssd_input_encoder = SSDInputEncoder(img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate(batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate(batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format(train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format(val_dataset_size)) ############################################################# # Kick off the training ############################################################# # Define model callbacks. model_checkpoint = ModelCheckpoint( filepath='ssd300_pascal_07+12_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) csv_logger = CSVLogger(filename='ssd300_pascal_07+12_training_log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan] # Train initial_epoch = 0 final_epoch = 120 steps_per_epoch = 1000 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) ############################################################# # Run the evaluation ############################################################# # 1: Set the generator for the predictions. predict_generator = val_dataset.generate(batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels'}, keep_images_without_gt=False) # 2: Generate samples. batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) i = 0 # Which batch item to look at print("Image:", batch_filenames[i]) print() print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) # 3: Make predictions. y_pred = model.predict(batch_images) # 4: Decode the raw predictions in `y_pred`. y_pred_decoded = decode_detections(y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=normalize_coords, img_height=img_height, img_width=img_width) # 5: Convert the predictions for the original image. y_pred_decoded_inv = apply_inverse_transforms(y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) # 6: Draw the predicted boxes onto the image # Set the colors for the bounding boxes colors = plt.cm.hsv(np.linspace(0, 1, n_classes + 1)).tolist() classes = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor'] plt.figure(figsize=(20, 12)) plt.imshow(batch_original_images[i]) current_axis = plt.gca() for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': 'green', 'alpha': 1.0}) for box in y_pred_decoded_inv[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor': color, 'alpha': 1.0})
def main(job_dir, **args): ##Setting up the path for saving logs logs_dir = job_dir + 'logs/' data_dir = "gs://deeplearningteam11/data" print("Current Directory: " + os.path.dirname(__file__)) print("Lets copy the data to: " + os.path.dirname(__file__)) os.system("gsutil -m cp -r " + data_dir + " " + os.path.dirname(__file__) + " > /dev/null 2>&1 ") #exit(0) with tf.device('/device:GPU:0'): # 1: Build the Keras model. K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # Set the path to the `.h5` file of the model to be loaded. model_file = file_io.FileIO('gs://deeplearningteam11/vgg19BNmodel.h5', mode='rb') # Store model locally on instance model_path = 'model.h5' with open(model_path, 'wb') as f: f.write(model_file.read()) model_file.close() ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'DecodeDetections': DecodeDetections, 'compute_loss': ssd_loss.compute_loss }) for layer in model.layers: layer.trainable = True model.summary() # 1: Instantiate two `DataGenerator` objects: One for training, one for validation. train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. # VOC 2007 # The directories that contain the images. VOC_2007_train_images_dir = 'data/data/VOC2007/train/JPEGImages/' VOC_2007_test_images_dir = 'data/data/VOC2007/test/JPEGImages/' VOC_2007_train_anns_dir = 'data/data/VOC2007/train/Annotations/' VOC_2007_test_anns_dir = 'data/data/VOC2007/test/Annotations/' # The paths to the image sets. VOC_2007_trainval_image_set_dir = 'data/data/VOC2007/train/ImageSets/Main/' VOC_2007_test_image_set_dir = 'data/data/VOC2007/test/ImageSets/Main/' VOC_2007_train_images_dir = os.path.dirname( __file__) + "/" + VOC_2007_train_images_dir VOC_2007_test_images_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_images_dir VOC_2007_train_anns_dir = os.path.dirname( __file__) + "/" + VOC_2007_train_anns_dir VOC_2007_test_anns_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_anns_dir VOC_2007_trainval_image_set_dir = os.path.dirname( __file__) + "/" + VOC_2007_trainval_image_set_dir VOC_2007_test_image_set_dir = os.path.dirname( __file__) + "/" + VOC_2007_test_image_set_dir VOC_2007_trainval_image_set_filename = VOC_2007_trainval_image_set_dir + '/trainval.txt' VOC_2007_test_image_set_filename = VOC_2007_test_image_set_dir + '/test.txt' # The XML parser needs to now what object class names to look for and in which order to map them to integers. classes = [ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] print("Parsing Training Data ...") train_dataset.parse_xml( images_dirs=[VOC_2007_train_images_dir], image_set_filenames=[VOC_2007_trainval_image_set_filename], annotations_dirs=[VOC_2007_train_anns_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False, verbose=False) print("Done") print( "================================================================") print("Parsing Test Data ...") val_dataset.parse_xml( images_dirs=[VOC_2007_test_images_dir], image_set_filenames=[VOC_2007_test_image_set_filename], annotations_dirs=[VOC_2007_test_anns_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=True, ret=False, verbose=False) print("Done") print( "================================================================") # 3: Set the batch size. batch_size = 32 # Change the batch size if you like, or if you run into GPU memory issues. # 4: Set the image transformations for pre-processing and data augmentation options. # For the training generator: ssd_data_augmentation = SSDDataAugmentation(img_height=img_height, img_width=img_width, background=mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_4_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3], model.get_layer('conv11_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) # Define a learning rate schedule. def lr_schedule(epoch): return 1e-6 # if epoch < 80: # return 0.001 # elif epoch < 100: # return 0.0001 # else: # return 0.00001 learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [learning_rate_scheduler, terminate_on_nan] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 120 final_epoch = 200 steps_per_epoch = 500 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) model_name = "vgg19BNmodel_cont.h5" model.save(model_name) with file_io.FileIO(model_name, mode='rb') as input_f: with file_io.FileIO("gs://deeplearningteam11/" + model_name, mode='w+') as output_f: output_f.write(input_f.read())
# 1: Build the Keras model K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, #mode='inference', mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=subtract_mean, divide_by_stddev=None, swap_channels=swap_channels, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False) print("Model built.") # 2: Load the sub-sampled weights into the model.
def main(): # create dataset dataset = DataGenerator() dataset.parse_xml(images_dirs=[dataset_images_dir], image_set_filenames=[test_image_set_filename], annotations_dirs=[dataset_annotations_dir], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) # create model model = ssd_300( image_size=(img_height, img_width, 3), n_classes=n_classes, mode=model_mode, l2_regularization=0.0005, scales=[ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=None, offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=1.0e-4, iou_threshold=0.45, top_k=200, nms_max_output_size=400) # load weights and compile it model.load_weights(weights_path, by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) evaluator = Evaluator(model=model, n_classes=n_classes, data_generator=dataset, model_mode=model_mode) results = evaluator(img_height=img_height, img_width=img_width, batch_size=8, data_generator_mode='resize', round_confidences=False, matching_iou_threshold=0.5, border_pixels='include', sorting_algorithm='quicksort', average_precision_mode='sample', num_recall_points=11, ignore_neutral_boxes=True, return_precisions=True, return_recalls=True, return_average_precisions=True, verbose=True) mean_average_precision, average_precisions, precisions, recalls = results for i in range(1, len(average_precisions)): print("{:<14}{:<6}{}".format(classes[i], 'AP', round(average_precisions[i], 3))) print() print("{:<14}{:<6}{}".format('', 'mAP', round(mean_average_precision, 3))) m = max((n_classes + 1) // 2, 2) n = 2 fig, cells = plt.subplots(m, n, figsize=(n * 8, m * 8)) for i in range(m): for j in range(n): if n * i + j + 1 > n_classes: break cells[i, j].plot(recalls[n * i + j + 1], precisions[n * i + j + 1], color='blue', linewidth=1.0) cells[i, j].set_xlabel('recall', fontsize=14) cells[i, j].set_ylabel('precision', fontsize=14) cells[i, j].grid(True) cells[i, j].set_xticks(np.linspace(0, 1, 6)) cells[i, j].set_yticks(np.linspace(0, 1, 6)) cells[i, j].set_xlim(0.0, 1.0) cells[i, j].set_ylim(0.0, 1.0) cells[i, j].set_title("{}, AP: {:.3f}".format( classes[n * i + j + 1], average_precisions[n * i + j + 1]), fontsize=16) if not os.path.isdir("evaluate_result"): os.makedirs("evaluate_result") plt.savefig('evaluate_result/ssd300_face_detection.png')
def main(): model_mode = 'inference' K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(Config.img_height, Config.img_width, Config.img_channels), n_classes=Config.n_classes, mode=model_mode, l2_regularization=Config.l2_regularization, scales=Config.scales, aspect_ratios_per_layer=Config.aspect_ratios, two_boxes_for_ar1=True, steps=Config.steps, offsets=Config.offsets, clip_boxes=False, variances=Config.variances, normalize_coords=Config.normalize_coords, subtract_mean=Config.mean_color, swap_channels=[2, 1, 0], confidence_thresh=0.01, iou_threshold=0.45, top_k=200, nms_max_output_size=400) # 2: Load the trained weights into the model. weights_path = os.getcwd() + '/weights/' + args.model_name + ".h5" model.load_weights(weights_path, by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) test_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_test.h5') test_dataset_size = test_dataset.get_dataset_size() print("Number of images in the test dataset:\t{:>6}".format( test_dataset_size)) classes = ['background', 'polyp'] generator = test_dataset.generate(batch_size=1, shuffle=True, transformations=[], returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) # Generate a batch and make predictions. i = 0 confidence_threshold = Config.confidence_threshold for val in range(test_dataset_size): batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( generator) print("Ground truth boxes:\n") print(np.array(batch_original_labels[i])) y_pred = model.predict(batch_images) # Perform confidence thresholding. y_pred_thresh = [ y_pred[k][y_pred[k, :, 1] > confidence_threshold] for k in range(y_pred.shape[0]) ] # Convert the predictions for the original image. # y_pred_thresh_inv = apply_inverse_transforms(y_pred_thresh, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_thresh[i]) plt.figure(figsize=(20, 12)) plt.imshow(batch_images[i]) current_axis = plt.gca() colors = plt.cm.hsv( np.linspace(0, 1, Config.n_classes + 1)).tolist() # Set the colors for the bounding boxes classes = [ 'background', 'polyps' ] # Just so we can print class names onto the image instead of IDs for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) for box in y_pred_thresh[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) image = plt.gcf() # plt.show() plt.draw() image.savefig(os.getcwd() + "/val_ssd300/val_" + str(val) + ".png", dpi=100) evaluator = Evaluator(model=model, n_classes=Config.n_classes, data_generator=test_dataset, model_mode=model_mode) results = evaluator(img_height=Config.img_height, img_width=Config.img_width, batch_size=args.batch_size, data_generator_mode='resize', round_confidences=False, matching_iou_threshold=0.3, border_pixels='include', sorting_algorithm='quicksort', average_precision_mode='sample', num_recall_points=11, ignore_neutral_boxes=True, return_precisions=True, return_recalls=True, return_average_precisions=True, verbose=True) mean_average_precision, average_precisions, precisions, recalls, tp_count, fp_count, fn_count, polyp_precision, polyp_recall = results print("TP : %d, FP : %d, FN : %d " % (tp_count, fp_count, fn_count)) print("{:<14}{:<6}{}".format('polyp', 'Precision ', round(polyp_precision, 3))) print("{:<14}{:<6}{}".format('polyp', 'Recall ', round(polyp_recall, 3))) # for i in range(1, len(average_precisions)): # print("{:<14}{:<6}{}".format(classes[i], 'AP', round(average_precisions[i], 3))) # # print("{:<14}{:<6}{}".format('', 'mAP', round(mean_average_precision, 3))) # print('Precisions', np.mean(precisions[1])) # print('Recalls', np.mean(recalls[1])) m = max((Config.n_classes + 1) // 2, 2) n = 2 fig, cells = plt.subplots(m, n, figsize=(n * 8, m * 8)) val = 0 for i in range(m): for j in range(n): if n * i + j + 1 > Config.n_classes: break cells[i, j].plot(recalls[n * i + j + 1], precisions[n * i + j + 1], color='blue', linewidth=1.0) cells[i, j].set_xlabel('recall', fontsize=14) cells[i, j].set_ylabel('precision', fontsize=14) cells[i, j].grid(True) cells[i, j].set_xticks(np.linspace(0, 1, 11)) cells[i, j].set_yticks(np.linspace(0, 1, 11)) cells[i, j].set_title("{}, AP: {:.3f}".format( classes[n * i + j + 1], average_precisions[n * i + j + 1]), fontsize=16) image = plt.gcf() # plt.show() plt.draw() image.savefig(os.getcwd() + "/test_out/test_" + str(val) + ".png", dpi=100) val += 1
def _main_(args): config_path = args.conf with open(config_path) as config_buffer: config = json.loads(config_buffer.read()) ############################### # Parse the annotations ############################### path_imgs_training = config['train']['train_image_folder'] path_anns_training = config['train']['train_annot_folder'] path_imgs_val = config['valid']['valid_image_folder'] path_anns_val = config['valid']['valid_annot_folder'] labels = config['model']['labels'] categories = {} #categories = {"Razor": 1, "Gun": 2, "Knife": 3, "Shuriken": 4} #la categorÃa 0 es la background for i in range(len(labels)): categories[labels[i]] = i + 1 print('\nTraining on: \t' + str(categories) + '\n') #################################### # Parameters ################################### #%% img_height = config['model']['input'] # Height of the model input images img_width = config['model']['input'] # Width of the model input images img_channels = 3 # Number of color channels of the model input images mean_color = [ 123, 117, 104 ] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights. swap_channels = [ 2, 1, 0 ] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images. n_classes = len( labels ) # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales_pascal = [ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets #scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets scales = scales_pascal aspect_ratios = [ [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0 ], [1.0, 2.0, 0.5 ], [1.0, 2.0, 0.5] ] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [ 8, 16, 32, 64, 100, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [ 0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True K.clear_session() # Clear previous models from memory. model_path = config['train']['saved_weights_name'] # 3: Instantiate an optimizer and the SSD loss function and compile the model. # If you want to follow the original Caffe implementation, use the preset SGD # optimizer, otherwise I'd recommend the commented-out Adam optimizer. if config['model']['backend'] == 'ssd512': aspect_ratios = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] steps = [ 8, 16, 32, 64, 100, 200, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05] elif config['model']['backend'] == 'ssd7': #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' scales = [ 0.08, 0.16, 0.32, 0.64, 0.96 ] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`. aspect_ratios = [0.5, 1.0, 2.0] # The list of aspect ratios for the anchor boxes two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1 steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended offsets = None if os.path.exists(model_path): print("\nLoading pretrained weights.\n") # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'compute_loss': ssd_loss.compute_loss }) else: #################################### # Build the Keras model. ################################### if config['model']['backend'] == 'ssd300': #weights_path = 'VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5' from models.keras_ssd300 import ssd_300 as ssd model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) elif config['model']['backend'] == 'ssd512': #weights_path = 'VGG_VOC0712Plus_SSD_512x512_ft_iter_160000.h5' from models.keras_ssd512 import ssd_512 as ssd # 2: Load some weights into the model. model = ssd(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, swap_channels=swap_channels) elif config['model']['backend'] == 'ssd7': #weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' from models.keras_ssd7 import build_model as ssd scales = [ 0.08, 0.16, 0.32, 0.64, 0.96 ] # An explicit list of anchor box scaling factors. If this is passed, it will override `min_scale` and `max_scale`. aspect_ratios = [ 0.5, 1.0, 2.0 ] # The list of aspect ratios for the anchor boxes two_boxes_for_ar1 = True # Whether or not you want to generate two anchor boxes for aspect ratio 1 steps = None # In case you'd like to set the step sizes for the anchor box grids manually; not recommended offsets = None model = ssd(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_global=aspect_ratios, aspect_ratios_per_layer=None, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=None, divide_by_stddev=None) else: print('Wrong Backend') print('OK create model') #sgd = SGD(lr=config['train']['learning_rate'], momentum=0.9, decay=0.0, nesterov=False) # TODO: Set the path to the weights you want to load. only for ssd300 or ssd512 weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' print("\nLoading pretrained weights VGG.\n") model.load_weights(weights_path, by_name=True) # 3: Instantiate an optimizer and the SSD loss function and compile the model. # If you want to follow the original Caffe implementation, use the preset SGD # optimizer, otherwise I'd recommend the commented-out Adam optimizer. #adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) #sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) optimizer = Adam(lr=config['train']['learning_rate'], beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=optimizer, loss=ssd_loss.compute_loss) model.summary() ##################################################################### # Instantiate two `DataGenerator` objects: One for training, one for validation. ###################################################################### # Optional: If you have enough memory, consider loading the images into memory for the reasons explained above. train_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) val_dataset = DataGenerator(load_images_into_memory=False, hdf5_dataset_path=None) # 2: Parse the image and label lists for the training and validation datasets. This can take a while. # The XML parser needs to now what object class names to look for and in which order to map them to integers. classes = ['background'] + labels train_dataset.parse_xml( images_dirs=[config['train']['train_image_folder']], image_set_filenames=[config['train']['train_image_set_filename']], annotations_dirs=[config['train']['train_annot_folder']], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) val_dataset.parse_xml( images_dirs=[config['valid']['valid_image_folder']], image_set_filenames=[config['valid']['valid_image_set_filename']], annotations_dirs=[config['valid']['valid_annot_folder']], classes=classes, include_classes='all', exclude_truncated=False, exclude_difficult=False, ret=False) ######################### # 3: Set the batch size. ######################### batch_size = config['train'][ 'batch_size'] # Change the batch size if you like, or if you run into GPU memory issues. ########################## # 4: Set the image transformations for pre-processing and data augmentation options. ########################## # For the training generator: # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=img_height, width=img_width) ######################################3 # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. ######################################### # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. if config['model']['backend'] == 'ssd512': predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3], model.get_layer('conv10_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) elif config['model']['backend'] == 'ssd300': predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=normalize_coords) elif config['model']['backend'] == 'ssd7': predictor_sizes = [ model.get_layer('classes4').output_shape[1:3], model.get_layer('classes5').output_shape[1:3], model.get_layer('classes6').output_shape[1:3], model.get_layer('classes7').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=img_height, img_width=img_width, n_classes=n_classes, predictor_sizes=predictor_sizes, scales=scales, aspect_ratios_global=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.3, normalize_coords=normalize_coords) ####################### # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. ####################### train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ SSDDataAugmentation(img_height=img_height, img_width=img_width) ], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) # Get the number of samples in the training and validations datasets. train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) ########################## # Define model callbacks. ######################### # TODO: Set the filepath under which you want to save the model. model_checkpoint = ModelCheckpoint( filepath=config['train']['saved_weights_name'], monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=1) #model_checkpoint.best = csv_logger = CSVLogger(filename='log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan ] #print(model.summary()) batch_images, batch_labels = next(train_generator) # i = 0 # Which batch item to look at # # print("Image:", batch_filenames[i]) # print() # print("Ground truth boxes:\n") # print(batch_labels[i]) initial_epoch = 0 final_epoch = config['train']['nb_epochs'] #final_epoch = 20 steps_per_epoch = 500 history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch, verbose=1 if config['train']['debug'] else 2)
steps_per_epoch = 1000 # checkpoint_path = './checkpoints/final_ssd.h5' checkpoint_path = './checkpoints/final_ssd' os.makedirs(os.path.dirname(checkpoint_path), exist_ok=True) config = SSD300Config(pos_iou_threshold=0.5, neg_iou_limit=0.3) model, preprocess_input, predictor_sizes = ssd_300( weights='imagenet', image_size=config.input_shape, n_classes=num_classes, mode='training', l2_regularization=0.0005, scales=config.scales, aspect_ratios_per_layer=config.aspect_ratios, two_boxes_for_ar1=config.two_boxes_for_ar1, steps=config.strides, offsets=config.offsets, clip_boxes=config.clip_boxes, variances=config.variances, normalize_coords=config.normalize_coords, return_predictor_sizes=True) parser = Tfrpaser(config=config, predictor_sizes=predictor_sizes, num_classes=num_classes, batch_size=batch_size, preprocess_input=preprocess_input) dataset = parser.parse_tfrecords(
confidence_threshold = 0.7 K.clear_session() # Clear previous models from memory. model = ssd_300( image_size=(img_height, img_width, 3), n_classes=20, mode='inference', l2_regularization=0.0005, scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=None, offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=confidence_threshold, iou_threshold=0.45, top_k=200, nms_max_output_size=400) # 2: Load the trained weights into the model. # TODO: Set the path of the trained weights.
def perimeter_detection(weights_path, video_path, result_path, threshold, perimeter_a, perimeter_b): img_height = 300 img_width = 300 K.clear_session() # Clear previous models from memory. model = ssd_300( image_size=(img_height, img_width, 3), n_classes=20, mode='inference', l2_regularization=0.0005, scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.1, iou_threshold=0.45, top_k=200, nms_max_output_size=400) model.load_weights(weights_path, by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) original_images = [] process_images = [] cap = cv2.VideoCapture(video_path) while (cap.isOpened()): ret, frame = cap.read() if ret == True: transposed_frame = cv2.transpose(frame) transposed_frame = cv2.flip(transposed_frame, 1) original_images.append(transposed_frame) k = cv2.waitKey(20) if k & 0xff == ord('q'): break else: break cap.release() cv2.destroyAllWindows() for k in range(8250): sub_image = cv2.imread('perimeter_detection/sub_images/sub_' + str(k) + '.jpg') resize_image = cv2.resize(sub_image, (img_height, img_width)) process_images.append(resize_image) print(len(original_images)) process_images = np.array(process_images) # start_time = time.time() y_pred = model.predict(process_images, batch_size=8) # end_time = time.time() # print(end_time - start_time) confidence_threshold = 0.1 y_pred_thresh = [ y_pred[k][y_pred[k, :, 1] > confidence_threshold] for k in range(y_pred.shape[0]) ] np.set_printoptions(precision=2, suppress=True, linewidth=90) print(' class conf xmin ymin xmax ymax') fourcc = cv2.VideoWriter_fourcc(*'MJPG') result_video = cv2.VideoWriter( 'result1.avi', fourcc, 25.0, (original_images[0].shape[0], original_images[0].shape[1])) vector_a = np.array( [perimeter_a[0] - perimeter_b[0], perimeter_a[1] - perimeter_b[1]]) distance_a = np.linalg.norm(vector_a) for k in range(len(y_pred_thresh)): print(k) print(y_pred_thresh[k]) classes = [ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] for box in y_pred_thresh[k]: if box[0] != 15: continue xmin = box[2] * 720 / 300 ymin = box[3] * 400 / 300 + 600 xmax = box[4] * 720 / 300 ymax = box[5] * 400 / 300 + 600 if xmin < 400: continue vector_b = np.array([xmin - perimeter_a[0], ymin - perimeter_a[1]]) vector_cross = np.cross(vector_a, vector_b) distance = np.linalg.norm(vector_cross / distance_a) if vector_cross >= 0 or distance < threshold: cv2.rectangle(original_images[k], (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255), 2) continue vector_b = np.array([xmin - perimeter_a[0], ymax - perimeter_a[1]]) vector_cross = np.cross(vector_a, vector_b) distance = np.linalg.norm(vector_cross / distance_a) if vector_cross >= 0 or distance < threshold: cv2.rectangle(original_images[k], (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255), 2) continue vector_b = np.array([xmax - perimeter_a[0], ymin - perimeter_a[1]]) vector_cross = np.cross(vector_a, vector_b) distance = np.linalg.norm(vector_cross / distance_a) if vector_cross >= 0 or distance < threshold: cv2.rectangle(original_images[k], (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255), 2) continue vector_b = np.array([xmax - perimeter_a[0], ymax - perimeter_a[1]]) vector_cross = np.cross(vector_a, vector_b) distance = np.linalg.norm(vector_cross / distance_a) if vector_cross >= 0 or distance < threshold: cv2.rectangle(original_images[k], (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255), 2) continue cv2.rectangle(original_images[k], (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 255, 0), 2) cv2.line(original_images[k], (int(perimeter_a[0]), int(perimeter_a[1])), (int(perimeter_b[0]), int(perimeter_b[1])), (0, 255, 255), 2) cv2.imwrite(result_path + '/detection_' + str(k) + '.jpg', original_images[k]) result_image = original_images[k] transposed_image = cv2.transpose(result_image) transposed_image = cv2.flip(transposed_image, 0) result_video.write(transposed_image) result_video.release() cv2.destroyAllWindows() '''
def init_model(model_file=None): K.clear_session() global img_height global img_width if not model_file: img_height = 300 img_height = 300 model = ssd_300( image_size=(img_height, img_width, 3), n_classes=20, mode='inference', l2_regularization=0.0005, scales=[ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400) # TODO: Set the path of the trained weights. weights_path = './VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5' model.load_weights(weights_path, by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) else: img_height = 224 img_width = 224 ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model = keras.models.load_model(model_file, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'DecodeDetections': DecodeDetections, 'compute_loss': ssd_loss.compute_loss }) print(model.summary()) return model
0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries variances = [ 0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='inference', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) weights_path = './trained_model/ssd300_epoch-1177_loss-5.6914_val_loss-5.5798.h5' model.load_weights(weights_path, by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) orig_images = [] # Store the images here. input_images = [] # Store resized versions of the images here.
def main(): create_new_model = True if args.model_name == 'default' else False if create_new_model: K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(Config.img_height, Config.img_width, Config.img_channels), n_classes=Config.n_classes, mode='training', l2_regularization=Config.l2_regularization, scales=Config.scales, aspect_ratios_per_layer=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, normalize_coords=Config.normalize_coords, subtract_mean=Config.mean_color, swap_channels=Config.swap_channels) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) else: model_path = "weights/" + args.model_name + ".h5" # We need to create an SSDLoss object in order to pass that to the model loader. ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. model = load_model(model_path, custom_objects={ 'AnchorBoxes': AnchorBoxes, 'L2Normalization': L2Normalization, 'compute_loss': ssd_loss.compute_loss }) # Load the data train_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_train.h5') val_dataset = DataGenerator(load_images_into_memory=True, hdf5_dataset_path=os.getcwd() + "/data/" + args.dataset + '/polyp_val.h5') train_dataset_size = train_dataset.get_dataset_size() val_dataset_size = val_dataset.get_dataset_size() print("Number of images in the training dataset:\t{:>6}".format( train_dataset_size)) print("Number of images in the validation dataset:\t{:>6}".format( val_dataset_size)) batch_size = args.batch_size # For the training generator: ssd_data_augmentation = SSDDataAugmentation(img_height=Config.img_height, img_width=Config.img_width, background=Config.mean_color) # For the validation generator: convert_to_3_channels = ConvertTo3Channels() resize = Resize(height=Config.img_height, width=Config.img_width) # 5: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function. # The encoder constructor needs the spatial dimensions of the model's predictor layers to create the anchor boxes. predictor_sizes = [ model.get_layer('conv4_3_norm_mbox_conf').output_shape[1:3], model.get_layer('fc7_mbox_conf').output_shape[1:3], model.get_layer('conv6_2_mbox_conf').output_shape[1:3], model.get_layer('conv7_2_mbox_conf').output_shape[1:3], model.get_layer('conv8_2_mbox_conf').output_shape[1:3], model.get_layer('conv9_2_mbox_conf').output_shape[1:3] ] ssd_input_encoder = SSDInputEncoder( img_height=Config.img_height, img_width=Config.img_width, n_classes=Config.n_classes, predictor_sizes=predictor_sizes, scales=Config.scales, aspect_ratios_per_layer=Config.aspect_ratios, two_boxes_for_ar1=Config.two_boxes_for_ar1, steps=Config.steps, offsets=Config.offsets, clip_boxes=Config.clip_boxes, variances=Config.variances, matching_type='multi', pos_iou_threshold=0.5, neg_iou_limit=0.5, normalize_coords=Config.normalize_coords) # 6: Create the generator handles that will be passed to Keras' `fit_generator()` function. train_generator = train_dataset.generate( batch_size=batch_size, shuffle=True, transformations=[ssd_data_augmentation], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) val_generator = val_dataset.generate( batch_size=batch_size, shuffle=False, transformations=[convert_to_3_channels, resize], label_encoder=ssd_input_encoder, returns={'processed_images', 'encoded_labels'}, keep_images_without_gt=False) model_checkpoint = ModelCheckpoint( filepath=os.getcwd() + '/weights/ssd300_epoch-{epoch:02d}_loss-{loss:.4f}_val_loss-{val_loss:.4f}.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=False, mode='auto', period=5) csv_logger = CSVLogger(filename='ssd300_training_log.csv', separator=',', append=True) learning_rate_scheduler = LearningRateScheduler(schedule=lr_schedule) terminate_on_nan = TerminateOnNaN() tf_log = keras.callbacks.TensorBoard(log_dir=TF_LOG_PATH + args.tf_logs, histogram_freq=0, batch_size=batch_size, write_graph=True, write_grads=False, write_images=False) callbacks = [ model_checkpoint, csv_logger, learning_rate_scheduler, terminate_on_nan, tf_log ] # If you're resuming a previous training, set `initial_epoch` and `final_epoch` accordingly. initial_epoch = 0 final_epoch = args.final_epoch steps_per_epoch = 500 # Train/Fit the model if args.predict_mode == 'train': history = model.fit_generator(generator=train_generator, steps_per_epoch=steps_per_epoch, epochs=final_epoch, callbacks=callbacks, validation_data=val_generator, validation_steps=ceil(val_dataset_size / batch_size), initial_epoch=initial_epoch) # Prediction Output predict_generator = val_dataset.generate( batch_size=1, shuffle=True, transformations=[convert_to_3_channels, resize], label_encoder=None, returns={ 'processed_images', 'filenames', 'inverse_transform', 'original_images', 'original_labels' }, keep_images_without_gt=False) i = 0 for val in range(val_dataset_size): batch_images, batch_filenames, batch_inverse_transforms, batch_original_images, batch_original_labels = next( predict_generator) y_pred = model.predict(batch_images) y_pred_decoded = decode_detections( y_pred, confidence_thresh=0.5, iou_threshold=0.4, top_k=200, normalize_coords=Config.normalize_coords, img_height=Config.img_height, img_width=Config.img_width) # 5: Convert the predictions for the original image. y_pred_decoded_inv = apply_inverse_transforms( y_pred_decoded, batch_inverse_transforms) np.set_printoptions(precision=2, suppress=True, linewidth=90) print("Predicted boxes:\n") print(' class conf xmin ymin xmax ymax') print(y_pred_decoded_inv[i]) plt.figure(figsize=(20, 12)) plt.imshow(batch_images[i]) current_axis = plt.gca() colors = plt.cm.hsv( np.linspace(0, 1, Config.n_classes + 1)).tolist() # Set the colors for the bounding boxes classes = [ 'background', 'polyps' ] # Just so we can print class names onto the image instead of IDs for box in batch_original_labels[i]: xmin = box[1] ymin = box[2] xmax = box[3] ymax = box[4] label = '{}'.format(classes[int(box[0])]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color='green', fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': 'green', 'alpha': 1.0 }) for box in y_pred_decoded_inv[i]: xmin = box[2] ymin = box[3] xmax = box[4] ymax = box[5] color = colors[int(box[0])] label = '{}: {:.2f}'.format(classes[int(box[0])], box[1]) current_axis.add_patch( plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, color=color, fill=False, linewidth=2)) current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={ 'facecolor': color, 'alpha': 1.0 }) image = plt.gcf() plt.draw() image.savefig(os.getcwd() + "/val_ssd300/val_" + str(val) + ".png", dpi=100)
def __init__(self, model_name, n_classes=1, mode='inference'): self.model_name = model_name self.n_classes = n_classes self.mode = mode if self.model_name == 'ssd_7': self.image_size = (300, 300, 3) self.intensity_mean = 127.5 self.intensity_range = 127.5 self.scales = [0.08, 0.16, 0.32, 0.64, 0.96] self.aspect_ratios_per_layer = None self.two_boxes_for_ar1 = True self.steps = None self.offsets = None self.clip_boxes = False self.variances = [1.0, 1.0, 1.0, 1.0] self.normalize_coords = True self.model = build_model( image_size=self.image_size, n_classes=self.n_classes, mode=self.mode, l2_regularization=0.0005, scales=self.scales, aspect_ratios_global=[0.5, 1.0, 2.0], aspect_ratios_per_layer=self.aspect_ratios_per_layer, two_boxes_for_ar1=self.two_boxes_for_ar1, steps=self.steps, offsets=self.offsets, clip_boxes=self.clip_boxes, variances=self.variances, normalize_coords=self.normalize_coords, subtract_mean=self.intensity_mean, divide_by_stddev=self.intensity_range) elif self.model_name == 'ssd_300': self.image_size = (300, 300, 3) self.mean_color = [123, 117, 104] self.swap_channels = [2, 1, 0] self.scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] self.aspect_ratios_per_layer = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] self.two_boxes_for_ar1 = True self.steps = [8, 16, 32, 64, 100, 300] self.offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] self.clip_boxes = False self.variances = [0.1, 0.1, 0.2, 0.2] self.normalize_coords = True self.model = ssd_300( image_size=self.image_size, n_classes=self.n_classes, mode=self.mode, l2_regularization=0.0005, scales=self.scales, aspect_ratios_per_layer=self.aspect_ratios_per_layer, two_boxes_for_ar1=self.two_boxes_for_ar1, steps=self.steps, offsets=self.offsets, clip_boxes=self.clip_boxes, variances=self.variances, normalize_coords=self.normalize_coords, subtract_mean=self.mean_color, swap_channels=self.swap_channels) elif self.model_name == 'ssd_512': self.image_size = (512, 512, 3) self.mean_color = [123, 117, 104] self.swap_channels = [2, 1, 0] self.scales = [0.07, 0.15, 0.3, 0.45, 0.6, 0.75, 0.9, 1.05] self.aspect_ratios_per_layer = [[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]] self.two_boxes_for_ar1 = True self.steps = [8, 16, 32, 64, 128, 256, 512] self.offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5] self.clip_boxes = False self.variances = [0.1, 0.1, 0.2, 0.2] self.normalize_coords = True self.model = ssd_512( image_size=self.image_size, n_classes=self.n_classes, mode=self.mode, l2_regularization=0.0005, scales=self.scales, aspect_ratios_per_layer=self.aspect_ratios_per_layer, two_boxes_for_ar1=self.two_boxes_for_ar1, steps=self.steps, offsets=self.offsets, clip_boxes=self.clip_boxes, variances=self.variances, normalize_coords=self.normalize_coords, subtract_mean=self.mean_color, swap_channels=self.swap_channels) else: print('creating ssd_7') self.model_name = 'ssd_7' self.image_size = (300, 300, 3) self.intensity_mean = 127.5 self.intensity_range = 127.5 self.scales = [0.08, 0.16, 0.32, 0.64, 0.96] self.aspect_ratios_per_layer = None self.two_boxes_for_ar1 = True self.steps = None self.offsets = None self.clip_boxes = False self.variances = [1.0, 1.0, 1.0, 1.0] self.normalize_coords = True self.model = build_model( image_size=self.image_size, n_classes=self.n_classes, mode=self.mode, l2_regularization=0.0005, scales=self.scales, aspect_ratios_global=[0.5, 1.0, 2.0], aspect_ratios_per_layer=self.aspect_ratios_per_layer, two_boxes_for_ar1=self.two_boxes_for_ar1, steps=self.steps, offsets=self.offsets, clip_boxes=self.clip_boxes, variances=self.variances, normalize_coords=self.normalize_coords, subtract_mean=self.intensity_mean, divide_by_stddev=self.intensity_range)
def load_weight(self): self.weights_path = 'ssd300_pascal_07+12_epoch-08_loss-1.9471_val_loss-1.9156.h5' adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) K.clear_session() # Clear previous models from memory. #limit memory: https://www.tensorflow.org/guide/gpu#limiting_gpu_memory_growth # TF 2.x options works for TF 1.15.5 gpus = tf.config.experimental.list_physical_devices('GPU') if gpus: try: # option1: fixed size tf.config.experimental.set_virtual_device_configuration( gpus[self.gpu_no], [ tf.config.experimental.VirtualDeviceConfiguration( memory_limit=1024) ]) # option2: dynamic #for gpu in gpus: # tf.config.experimental.set_memory_growth(gpu, True) # choose gpu device tf.config.experimental.set_visible_devices( gpus[self.gpu_no], 'GPU') except RuntimeError as e: print(e) self.session = tf.Session() self.graph = tf.get_default_graph() with self.graph.as_default(): with self.session.as_default(): self.model = ssd_300( image_size=(self.img_height, self.img_width, 3), n_classes=2, mode='inference', l2_regularization=0.0005, scales=[ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400) self.model.load_weights(self.weights_path, by_name=True) self.model.compile(optimizer=adam, loss=ssd_loss.compute_loss)
0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True # 1: Build the Keras model. K.clear_session() # Clear previous models from memory. model, predictor_sizes = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, confidence_thresh=0.5, iou_threshold=0.45) # 3: Instantiate an optimizer and the SSD loss function and compile the model. # If you want to follow the original Caffe implementation, use the preset SGD # optimizer, otherwise I'd recommend the commented-out Adam optimizer. print(model.summary()) weights_path = 'VGG_ILSVRC_16_layers_fc_reduced.h5' model.load_weights(weights_path, by_name=True)
def model_detection(): img_height = 300 # Height of the input images img_width = 300 # Width of the input images img_channels = 3 # Number of color channels of the input images subtract_mean = [123, 117, 104] # The per-channel mean of the images in the dataset swap_channels = [ 2, 1, 0 ] # The color channel order in the original SSD is BGR, so we should set this to `True`, but weirdly the results are better without swapping. # TODO: Set the number of classes. n_classes = 8 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales = [ 0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05 ] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets. # scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets. aspect_ratios = [ [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0 ], [1.0, 2.0, 0.5 ], [1.0, 2.0, 0.5] ] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [ 8, 16, 32, 64, 100, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries variances = [ 0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are scaled as in the original implementation normalize_coords = True weights_path = 'C:\\Users\\lamin\\Desktop\\PFE\\traffic_objetct_detection\\weights\\VGG_coco_SSD_300x300_iter_400000_subsampled_8_classes.h5' model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='inference', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=subtract_mean, divide_by_stddev=None, swap_channels=swap_channels, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False) print("Model built.") # 2: Load the sub-sampled weights into the model. # Load the weights that we've just created via sub-sampling. model.load_weights(weights_path, by_name=True) print("Weights file loaded:", weights_path) return model