def get_model(dataset): """ :return: keras_ssd300.ssd300 model """ if dataset == "VOC": ssd300_nclasses = 20 ssd300_scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] weights_filename = VOC_WEIGHTS_FILENAME elif dataset == "COCO": ssd300_nclasses = 80 ssd300_scales = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] weights_filename = COCO_WEIGHTS_FILENAME else: raise ValueError("Unrecognised dataset: {}".format(dataset)) K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(IMAGE_HEIGHT, IMAGE_WIDTH, 3), n_classes=ssd300_nclasses, l2_regularization=0.0005, scales=ssd300_scales, # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], limit_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], coords='centroids', normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=True) # 2: Load the trained weights into the model. # TODO: Set the path of the trained weights. model.load_weights(weights_filename, by_name=True) # 3: Compile the model so that Keras won't complain the next time you load it. adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04) ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) return model
def __load_model(self): # 2: Build the Keras model (and possibly load some trained weights) K.clear_session() # Clear previous models from memory. # The output `predictor_sizes` is needed below to set up `SSDBoxEncoder` model, predictor_sizes = ssd_300( image_size=(self.img_train_height, self.img_train_width, self.img_channels), n_classes=self.n_classes, min_scale= None, # You could pass a min scale and max scale instead of the `scales` list, but we're not doing that here max_scale=None, scales=self.scales, aspect_ratios_global=None, aspect_ratios_per_layer=self.aspect_ratios, two_boxes_for_ar1=self.two_boxes_for_ar1, limit_boxes=self.limit_boxes, variances=self.variances, coords=self.coords, normalize_coords=self.normalize_coords) # Set the path to the VGG-16 weights below. ### Set up training # 3: Instantiate an Adam optimizer and the SSD loss function and compile the model adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=5e-04) ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=0.1) model.compile(optimizer=adam, loss=ssd_loss.compute_loss) # 4: Instantiate an encoder that can encode ground truth labels into the format needed by the SSD loss function ssd_box_encoder = SSDBoxEncoder( img_height=self.img_train_height, img_width=self.img_train_width, n_classes=self.n_classes, predictor_sizes=self.predictor_sizes, min_scale=None, max_scale=None, scales=self.scales, aspect_ratios_global=None, aspect_ratios_per_layer=self.aspect_ratios, two_boxes_for_ar1=self.two_boxes_for_ar1, limit_boxes=self.limit_boxes, variances=self.variances, pos_iou_threshold=0.5, neg_iou_threshold=0.2, coords=self.coords, normalize_coords=self.normalize_coords)
def __init__(self): self.img_height = 300 self.img_width = 300 self.frame = None self.model = ssd_300(image_size=(self.img_height, self.img_width, 3), n_classes=10, mode='inference', l2_regularization=0.0005, scales=[0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05], # The scales for MS COCO [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5, 3.0, 1.0/3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], clip_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=[2, 1, 0], confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400) #weights_path = os.path.join('/home/saif/Documents/datasets/droneSet/weights/myTrain/dronenet_fpn_scale/' + 'dronenet_epoch-160_loss-3.2399_val_loss-3.0114.h5') weights_path = os.path.join('/home/saif/Documents/datasets/droneSet/weights/myTrain/droneset_ssd300_vgg16_original_augment/droneset_ssd300_vgg16_epoch-226_loss-3.6563_val_loss-4.0523.h5') self.model.load_weights(weights_path, by_name=True) adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False) ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0) self.model.compile(optimizer=sgd, loss=ssd_loss.compute_loss) self.graph = tf.get_default_graph() self.color = list(np.random.choice(range(0, 256, 50), size=3)) self.classes = ['background', 'Christmas toy', 'coffee machine', 'potted plant', 'tissue box', 'robot', 'soccer ball', 'turtle bot', 'uav', 'fire alarm', 'tennis racket']
def main(): rospy.init_node('new_detect_pkg', anonymous=True) ic = image_converter() icp = image_converter_pointcloud() r = rospy.Rate(50) rospy.sleep(0.5) publisher() print('---------initialization model...please wait----------') # ssd_entity = SSD_entity() img_height = 272 # Height of the input images img_width = 480 # Width of the input images img_channels = 3 # Number of color channels of the input images subtract_mean = [123, 117, 104] # The per-channel mean of the images in the dataset swap_channels = [ 2, 1, 0 ] # The color channel order in the original SSD is BGR, so we should set this to `True`, but weirdly the results are better without swapping. # TODO: Set the number of classes. n_classes = 6 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO scales = [ 0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05 ] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets. # scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets. aspect_ratios = [ [1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0 ], [1.0, 2.0, 0.5 ], [1.0, 2.0, 0.5] ] # The anchor box aspect ratios used in the original SSD300; the order matters two_boxes_for_ar1 = True steps = [ 8, 16, 32, 64, 100, 300 ] # The space between two adjacent anchor box center points for each predictor layer. offsets = [ 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer. clip_boxes = False # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries variances = [ 0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are scaled as in the original implementation normalize_coords = True # 1: Build the Keras model K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='inference', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=subtract_mean, divide_by_stddev=None, swap_channels=swap_channels, confidence_thresh=0.5, iou_threshold=0.45, top_k=200, nms_max_output_size=400, return_predictor_sizes=False) print("Model built.") # 2: Load the sub-sampled weights into the model. # Load the weights that we've just created via sub-sampling. model.load_weights( '/home/ogai1234/catkin_ws/src/detect_pkg/bin/ssd300_weights_epoch-45_loss-4.3010_val_loss-4.3788.h5', by_name=True) print('---------model done----------') # model.load_weights(weights_path, by_name=True) # print("Weights file loaded:", weights_path) classes = [ "background", "dog", "umbrellaman", "cone", "car", "bicycle", "person" ] key = '' t0 = time.time() frame_code = 1 linewidth = 3 figsize = (10, 10) # initilize the filter cach global record_xmin, record_xmax, record_ymin, record_ymax, record_cls_id, record_obj_id global record_obj_num_appeared, total_objPerFrame record_obj_num_appeared = 0 record_xmin = [[0 for i in range(11)] for i in range(2)] record_xmax = [[0 for i in range(11)] for i in range(2)] record_ymin = [[0 for i in range(11)] for i in range(2)] record_ymax = [[0 for i in range(11)] for i in range(2)] record_cls_id = [[0 for i in range(11)] for i in range(2)] record_obj_id = [[0 for i in range(11)] for i in range(2)] global objPerFrame objPerFrame = 0 total_objPerFrame = 0 font = cv2.FONT_HERSHEY_TRIPLEX while (key != 113) and (not rospy.is_shutdown()): t1 = time.time() #one frame start frame_code = frame_code + 1 print("_________________________Frame:", frame_code) # for ros topic publish global objPerlastFrame, type_code_list, confidence_list, distance_list, x_list, y_list, z_list objPerlastFrame = total_objPerFrame # calculate the num of object appeared in one frame objPerFrame = 0 total_objPerFrame = 0 # Ros message content initial type_code_list = [0 for i in range(11)] confidence_list = [0 for i in range(11)] distance_list = [0 for i in range(11)] x_list = [0 for i in range(11)] y_list = [0 for i in range(11)] z_list = [0 for i in range(11)] # load zed image from ros topic image = ic.zed_image frame = image # frame = frame[:,:,0:3] frame = cv2.resize(frame, (480, 272)) frame_np = np.array(frame) frame_np = frame_np[np.newaxis, :, :, :] # put frame into SSD network to do classification y_pred = model.predict(frame_np) confidence_threshold = 0.5 y_pred_thresh = [ y_pred[k][y_pred[k, :, 1] > confidence_threshold] for k in range(y_pred.shape[0]) ] colors = dict() for box in y_pred_thresh[0]: if objPerFrame > 9: break cls_id = int(box[0]) if cls_id not in colors: colors[cls_id] = (random.random(), random.random(), random.random()) score = box[1] xmin = int(box[2] * frame.shape[1] / img_width) ymin = int(box[3] * frame.shape[0] / img_height) xmax = int(box[4] * frame.shape[1] / img_width) ymax = int(box[5] * frame.shape[0] / img_height) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), color=colors[cls_id], thickness=linewidth) # class_name = str(cls_id) # calculate distance and position x_center = round(((xmin + xmax) / 2) * 1.38) y_center = round(((ymin + ymax) / 2) * 1.4) # print('11111111111:icp.dict_1 = :',icp.dict_1) # print('11111111111:icp.dict_2 = :',icp.dict_2) icp.dict_1 = x_center icp.dict_2 = y_center # print('22222222222:icp.dict_1 = :',icp.dict_1) # print('22222222222:icp.dict_2 = :',icp.dict_2) rospy.sleep(0.04) # print('4444444444:icp.dict_1 = :',icp.dict_1) # print('4444444444:icp.dict_2 = :',icp.dict_2) point_cloud = icp.zed_image_pointcloud # print('5555555555:icp.dict_1 = :',icp.dict_1) # print('5555555555:icp.dict_2 = :',icp.dict_2) x, y, z = 0, 0, 0 #x,y,z is the position obtained from pointcloud2 for p in point_cloud: x, y, z = p break distance = math.sqrt(x * x + y * y + z * z) type_code = cls_id class_name = classes[cls_id] # for person and car, collect its information and transfer to ROS if cls_id == 6: # Person's typecode is 6 type_code = 1 total_objPerFrame = total_objPerFrame + 1 appeared = frameFilter(cls_id, xmin, xmax, ymin, ymax) if (appeared == 1): objRecord2Ros(class_name, score, cls_id, type_code, distance, x, y, z) cv2.putText(frame, '{:s} | {:.2f} |{:}'.format( class_name, distance, record_obj_id[1][total_objPerFrame]), (xmin, ymin + 2), font, 0.5, (255, 255, 255), thickness=1) if cls_id == 4: # Car's typecode is 4 type_code = 4 total_objPerFrame = total_objPerFrame + 1 appeared = frameFilter(cls_id, xmin, xmax, ymin, ymax) if (appeared == 1): objRecord2Ros(class_name, score, cls_id, type_code, distance, x, y, z) cv2.putText(frame, '{:s} | {:.2f} |{:}'.format( class_name, distance, record_obj_id[1][total_objPerFrame]), (xmin, ymin + 2), font, 0.5, (255, 255, 255), thickness=1) # if want show all classes, uncomment this line , and comment the upper 2 ifs # objRecord2Ros(class_name,score,cls_id,type_code,distance,x,y,z) # draw the bounding box font = cv2.FONT_HERSHEY_TRIPLEX # print format: class|conf|distance|x|y # show the image with bounding box cv2.imshow("image_back", frame) key = cv2.waitKey(1) t21 = time.time() # calculate fps # print('fps {:f}'.format( 1 / (t21 - t1))) talker() # last frame info move forward for i in range(10): # print('record_xmin[0][i]:',record_xmin[0][i+1]) # print('record_xmin[1][i]:',record_xmin[1][i+1]) record_xmin[0][i + 1] = record_xmin[1][i + 1] record_xmax[0][i + 1] = record_xmax[1][i + 1] record_ymin[0][i + 1] = record_ymin[1][i + 1] record_ymax[0][i + 1] = record_ymax[1][i + 1] record_cls_id[0][i + 1] = record_cls_id[1][i + 1] record_obj_id[0][i + 1] = record_obj_id[1][i + 1] record_xmin[1][i + 1] = 0 record_xmax[1][i + 1] = 0 record_ymin[1][i + 1] = 0 record_ymax[1][i + 1] = 0 record_cls_id[1][i + 1] = 0 record_obj_id[1][i + 1] = 0
0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are scaled as in the original implementation coords = 'centroids' # Whether the box coordinates to be used as targets for the model should be in the 'centroids' or 'minmax' format, see documentation normalize_coords = True # 2: Build the Keras model (and possibly load some trained weights) K.clear_session() # Clear previous models from memory. # The output `predictor_sizes` is needed below to set up `SSDBoxEncoder` model, predictor_sizes = ssd_300( image_size=(img_height, img_width, img_channels), n_classes=n_classes, min_scale=None, # You could pass a min scale and max scale instead of the `scales` list, but we're not doing that here max_scale=None, scales=scales, aspect_ratios_global=None, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords) # model.load_weights('./ssd300_weights.h5', by_name=True) # You should load pre-trained weights for the modified VGG-16 base network here ### Make predictions # 1: Set the generator predict_generator = val_dataset.generate(batch_size=1, train=False, equalize=False,
variances = [ 0.1, 0.1, 0.2, 0.2 ] # The variances by which the encoded target coordinates are divided as in the original implementation normalize_coords = True # + colab={"base_uri": "https://localhost:8080/", "height": 423} colab_type="code" id="iGkaLj0AT_je" outputId="f40bfd9e-6a49-47df-bff0-8bc1c98869bb" K.clear_session() # Clear previous models from memory. model = ssd_300(image_size=(img_height, img_width, img_channels), n_classes=n_classes, mode='training', l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, clip_boxes=clip_boxes, variances=variances, normalize_coords=normalize_coords, subtract_mean=mean_color, swap_channels=swap_channels) # 2: Load some weights into the model. # # TODO: Set the path to the weights you want to load. # weights_path = 'path/to/VGG_ILSVRC_16_layers_fc_reduced.h5' # model.load_weights(weights_path, by_name=True) # 3: Instantiate an optimizer and the SSD loss function and compile the model.
# 1.1. build a new SSD # 1.1.1: Build the Keras model K.clear_session() # Clear previous models from memory. model = ssd_300( image_size=(img_height, img_width, 3), n_classes=20, l2_regularization=0.0005, scales=[ 0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05 ], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] aspect_ratios_per_layer=[[1.0, 2.0, 0.5], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5, 3.0, 1.0 / 3.0], [1.0, 2.0, 0.5], [1.0, 2.0, 0.5]], two_boxes_for_ar1=True, steps=[8, 16, 32, 64, 100, 300], offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5], limit_boxes=False, variances=[0.1, 0.1, 0.2, 0.2], coords='centroids', normalize_coords=True, subtract_mean=[123, 117, 104], swap_channels=True) # 1.1.2: Load the trained weights into the model. # TODO: Set the path of the trained weights. weights_path = 'path/to/trained/weights/VGG_VOC0712_SSD_300x300_iter_120000.h5'
variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are scaled as in the original implementation coords = 'centroids' # Whether the box coordinates to be used as targets for the model should be in the 'centroids', 'corners', or 'minmax' format, see documentation normalize_coords = True # 1: Build the Keras model K.clear_session() # Clear previous models from memory. #img_height,img_width,img_channels esto estaba abajo model = ssd_300(image_size=(1080, 1920, img_channels), n_classes=n_classes, l2_regularization=0.0005, scales=scales, aspect_ratios_per_layer=aspect_ratios, two_boxes_for_ar1=two_boxes_for_ar1, steps=steps, offsets=offsets, limit_boxes=limit_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, subtract_mean=subtract_mean, divide_by_stddev=None, swap_channels=swap_channels) print("Model built.") # 2: Load the sub-sampled weights into the model. # Load the weights that we've just created via sub-sampling. weights_path = weights_destination_path