def __init__(self, class_names, model, input_shape): self.class_names = class_names self.num_classes = len(class_names) self.model = model self.input_shape = input_shape self.bbox_util = BBoxUtility(self.num_classes) self.timer = Timer(1, self.timer_callback) self.current_time = 0 self.current_fps = 0 self.exec_time = None self.prev_extra_time = None self.extra_time = None self.fps_time_slot = list() self.is_finish = False # Create unique and somewhat visually distinguishable bright # colors for the different classes. self.class_colors = [] for i in range(0, self.num_classes): # This can probably be written in a more elegant manner hue = 255*i/self.num_classes col = np.zeros((1,1,3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 # Saturation col[0][0][2] = 255 # Value cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) self.class_colors.append(col)
def __init__(self): NUM_CLASSES = 3 + 1 input_shape = (300, 300, 3) #config_string = rospy.get_param("/traffic_light_config") #self.config = yaml.load(config_string) #self.stop_line_positions = self.config['stop_line_positions'] # get path to resources #path_to_resources = os.path.join(os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', '..', 'tlc') # "prior boxes" in the paper #priors = pickle.load(open(os.path.join(path_to_resources, 'prior_boxes_ssd300.pkl'), 'rb')) priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb')) self.bbox_util = BBoxUtility(NUM_CLASSES, priors) # Traffic Light Classifier model and its weights self.model = SSD300(input_shape, num_classes=NUM_CLASSES) #self.model.load_weights(os.path.join(path_to_resources, self.config['classifier_weights_file']), by_name=True) #self.model.load_weights('weights.180314.hdf5', by_name=True) self.model.load_weights('checkpoints/weights.07-0.70.hdf5', by_name=True) # prevent TensorFlow's ValueError when no raised backend dummy = np.zeros((1, 300, 300, 3)) _ = self.model.predict(dummy, batch_size=1, verbose=0) # prevent TensorFlow's ValueError when no raised backend dummy = np.zeros((1, 300, 300, 3)) _ = self.model.predict(dummy, batch_size=1, verbose=0) self.is_in_progress = False self.last_result = TrafficLight.UNKNOWN
def predict_img(numpy_array, orig_numpy_array): # Save the original image for attachment scipy.misc.imsave('temp_cat_motion.jpg', np.uint8(orig_numpy_array)) # Number of voc_classes + 1 NUM_CLASSES = 3 input_shape=(300, 300, 3) # SSD model model = SSD300(input_shape, num_classes=NUM_CLASSES) model.load_weights('./model/weights.18-0.09.hdf5', by_name=True) bbox_util = BBoxUtility(NUM_CLASSES) # Inception v3 transfer learning model model_cnn = load_model(filepath='./model/model_v2.03-0.40.hdf5') ssd_img_size=300 img_size=299 inputs = [] images = [] images.append(orig_numpy_array) inputs.append(numpy_array.copy()) inputs = preprocess_input(np.array(inputs)) preds = model.predict(inputs, batch_size=1, verbose=0) results = bbox_util.detection_out(preds) cat_inside_image = False # If the SSD model does not find a cat, return False for i, img in enumerate(images): cat_inside_image = ssd_image(img, results, i) return cat_inside_image
def __init__(self, argv): app_name = os.path.basename(argv[0]) name, _ = app_name.split(".") inifile = name + ".ini" print("inifile {}".format(inifile)) parser = configparser.ConfigParser() parser.read(inifile) self.weightfile = parser.get("WEIGHT_FILE", "filename") self.showimage = int(parser.get("SHOW_IMAGE", "show")) self.confidence = float(parser.get("DETECTION", "confidence") ) # For example, this may take a string C:/ssd_keras/weights_SSD300.hdf5 self.classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'] self.n_classes = len(self.classes) + 1 self.input_shape = (300, 300, 3) self.model = SSD300v2(self.input_shape, num_classes=self.n_classes) self.model.load_weights(self.weightfile, by_name=True) self.bbox_util = BBoxUtility(self.n_classes)
def feature_flow(): bbox_util = BBoxUtility(NUM_CLASSES) raw_inputs, images = load_inputs(image_files) inputs = preprocess_input(np.array(raw_inputs)) dump_activation_layer = 'conv4_2' compare_layer_name = 'conv6_2' print('dump_activation_layer', dump_activation_layer) print('target_layer_name', compare_layer_name) # normal SSD network model1 = SSD300v2(input_shape, num_classes=NUM_CLASSES) model1.load_weights('weights_SSD300.hdf5', by_name=True) predictions = run_network(model1, inputs) results = bbox_util.detection_out(predictions) plot_detections(images, results) # get dump layer's output (as input for flow network) input_img2 = inputs[1:2, :, :, :] layer_dump = get_layer_output(model=model1, inputs=input_img2, output_layer_name=dump_activation_layer) print('layer_dump.shape = ', layer_dump.shape) # flow (raw rgb) flow_rgb = compute_flow(image_files[1], image_files[0]) print('flow.shape', flow_rgb.shape) imshow_fig(cv2.cvtColor(draw_hsv(flow_rgb), cv2.COLOR_BGR2RGB), title='flow_rgb') # flow (re-sized for feature map) flow_feature = get_flow_for_filter(flow_rgb) # imshow_fig(flow_feature[:, :, 0], title='flow_feature_y', cmap='gray') # imshow_fig(flow_feature[:, :, 1], title='flow_feature_x', cmap='gray') # warp image by flow_rgb iimg1 = cv2.imread(image_files[0]) img_warp = warp_flow(iimg1, flow_rgb) imshow_fig(cv2.cvtColor(img_warp, cv2.COLOR_BGR2RGB), title='frame_2_warp') # shift feature shifted_feature = shift_filter(layer_dump, flow_feature) # flow net model2 = SSD300_conv4_3((128, 128, 512), num_classes=NUM_CLASSES) model2.load_weights('weights_SSD300.hdf5', by_name=True) predictions = run_network(model2, shifted_feature) results = bbox_util.detection_out(predictions) plot_detections(images[1:2], results) # get specific layer's output and compare them (for debugging) compare_model_layer(model1, input_img2, compare_layer_name, model2, shifted_feature, compare_layer_name, True) sess.close() plt.show()
def __init__(self): #顔検出モデルと年齢・性別検出モデルを復元 self.age_detector = load_model("transfer_Xception_29.h5") NUM_CLASSES = 2 input_shape = (300, 300, 3) priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb')) self.bbox_util = BBoxUtility(NUM_CLASSES, priors) self.face_detector = SSD300(input_shape, num_classes=NUM_CLASSES) self.face_detector.load_weights('weights.05-3.15.hdf5', by_name=True)
def __init__(self): self.node_name = "ssd_keras" rospy.init_node(self.node_name) self.class_names = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] self.num_classes = len(self.class_names) self.input_shape = (300, 300, 3) self.model = SSD(self.input_shape, num_classes=self.num_classes) self.model.load_weights(pkg_path + '/resources/ssd_keras/weights_SSD300.hdf5') self.bbox_util = BBoxUtility(self.num_classes) self.conf_thresh = 0.25 self.model._make_predict_function() self.graph = tf.get_default_graph() self.detection_index = DL_msgs_boxes() # Create unique and somewhat visually distinguishable bright # colors for the different classes. self.class_colors = [] for i in range(0, self.num_classes): # This can probably be written in a more elegant manner hue = 255 * i / self.num_classes col = np.zeros((1, 1, 3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 # Saturation col[0][0][2] = 255 # Value cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) self.class_colors.append(col) self.bridge = CvBridge() # Create the cv_bridge object self.Image_Status = "Not_Ready" self.StartImage = cv2.imread(pkg_path + '/resources/start.jpg') self.to_draw = cv2.resize(self.StartImage, (640, 480)) self.image_sub = rospy.Subscriber( "/floating_sensor/camera/rgb/image_raw", Image, self.detect_image, queue_size=1) # the appropriate callbacks self.box_coordinate_pub = rospy.Publisher( "/ssd_detction/box", DL_msgs_boxes, queue_size=5) # the appropriate callbacks self.SSD_Serv = rospy.Service('SSD_Detection', DL_box, self.SSD_Detection_Server)
def __init__(self): #TODO load classifier NUM_CLASSES = 3 + 1 input_shape = (300, 300, 3) # "prior boxes" in the paper priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb')) self.bbox_util = BBoxUtility(NUM_CLASSES, priors) self.model = SSD300(input_shape, num_classes=NUM_CLASSES) self.model.load_weights('weights.180314.hdf5', by_name=True)
def __init__(self, input_shape = (300, 300, 3)): self.num_class = config.NUM_CLASSES self.input_tensor = tf.placeholder(tf.float32, [None, input_shape[0], input_shape[1], input_shape[2]]) self.label_tensor = tf.placeholder(tf.float32, [None, 7308, 4 + config.NUM_CLASSES + 8]) self.predicts = self.build(input_shape, config.NUM_CLASSES) self.input_shape = input_shape self.global_step = tf.train.create_global_step() var_list = tf.global_variables() var_list = [var for var in var_list if "Adam" not in var.name] self.saver = tf.train.Saver(var_list, max_to_keep=1) self.bbox_util = BBoxUtility(self.num_class)
def __init__(self): self.image_width = 300 self.image_height = 300 self.voc_classes = [ 'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor' ] self.NUM_CLASSES = len(self.voc_classes) + 1 self.model = SSD300((self.image_height, self.image_width, 3), num_classes=self.NUM_CLASSES) self.model.load_weights('weights_SSD300.hdf5', by_name=True) self.bbox_util = BBoxUtility(self.NUM_CLASSES)
def __init__(self): voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'] NUM_CLASSES = len(voc_classes) + 1 input_shape=(300, 300, 3) self.model = SSD300(input_shape, num_classes=NUM_CLASSES) weights_file = "./checkpoints/weights.10-2.85.hdf5" #weights_file = "./checkpoints/weights.39-1.61_ubuntu.hdf5" self.model.load_weights(weights_file, by_name=True) self.bbox_util = BBoxUtility(NUM_CLASSES)
def __init__(self, modelfile, shape=(300, 300, 3), num_classes=21, conf_thresh=0.6): self.input_shape = shape self.num_classes = num_classes self.conf_thresh = conf_thresh # モデル作成 model = SSD(shape, num_classes=num_classes) model.load_weights(modelfile) self.model = model # バウンディングボックス作成ユーティリティ self.bbox_util = BBoxUtility(self.num_classes)
def train(self): self.loss = MultiboxLoss(self.num_class, neg_pos_ratio=2.0).compute_loss(self.label_tensor, self.predicts) self.loss_avg = tf.reduce_mean(self.loss) learning_rate = tf.train.exponential_decay(config.lr, self.global_step, 10000 ,0.9, True, name='learning_rate') self.train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss, global_step = self.global_step) self.train_loss_summary = tf.summary.scalar("loss_train", self.loss_avg) self.val_loss_summary = tf.summary.scalar("loss_val", self.loss_avg) self.writer = tf.summary.FileWriter(FLAGS.checkpoint) priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb')) self.bbox_util = BBoxUtility(self.num_class, priors) gt = pickle.load(open(FLAGS.label_file, 'rb')) keys = sorted(gt.keys()) num_train = int(round(0.8 * len(keys))) train_keys = keys[:num_train] val_keys = keys[num_train:] gen = Generator(gt, self.bbox_util, config.BATCH_SIZE, FLAGS.images_dir, train_keys, val_keys, (self.input_shape[0], self.input_shape[1]))#, do_crop=False, saturation_var = 0, brightness_var = 0, contrast_var = 0, lighting_std = 0, hflip_prob = 0, vflip_prob = 0) c = tf.ConfigProto() c.gpu_options.allow_growth = True with tf.Session(config=c) as sess: sess.run(tf.global_variables_initializer()) self.writer.add_graph(sess.graph) self.restore(sess) for inputs, labels in gen.generate(True): _, lo, step, summary = sess.run([self.train_op, self.loss_avg, self.global_step, self.train_loss_summary], feed_dict = {self.input_tensor: inputs, self.label_tensor: labels}) sys.stdout.write("train loss: %d %.3f \r"%(step, lo)) sys.stdout.flush() self.writer.add_summary(summary, step) if step % config.save_step == config.save_step - 1: self.saver.save(sess, os.path.join(FLAGS.checkpoint, "ckpt"), global_step=self.global_step) print("saved") if step % config.snapshot_step == 0: val_in, val_la = next(gen.generate(False)) lo, s, preds = sess.run([self.loss_avg, self.train_loss_summary, self.predicts], feed_dict = {self.input_tensor: val_in, self.label_tensor: val_la}) self.writer.add_summary(s, step) print("val loss:", step, lo) images = [np.array(val_in[v]) for v in range(val_in.shape[0])] self.paint_imgs(preds, images) print("Train finished. Checkpoint saved in", FLAGS.checkpoint)
def __init__(self, conf_limit=0.6): self.conf_limit = conf_limit np.set_printoptions(suppress=True) config = tf.ConfigProto() #config.gpu_options.per_process_gpu_memory_fraction = 0.45 set_session(tf.Session(config=config)) self.voc_classes = [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] NUM_CLASSES = len(self.voc_classes) + 1 self.bbox_util = BBoxUtility(NUM_CLASSES) input_shape = (300, 300, 3) self.model = SSD300(input_shape, num_classes=NUM_CLASSES) self.model.load_weights('weights_SSD300.hdf5', by_name=True)
class UseSSD: def __init__(self): self.image_width = 300 self.image_height = 300 self.voc_classes = [ 'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor' ] self.NUM_CLASSES = len(self.voc_classes) + 1 self.model = SSD300((self.image_height, self.image_width, 3), num_classes=self.NUM_CLASSES) self.model.load_weights('weights_SSD300.hdf5', by_name=True) self.bbox_util = BBoxUtility(self.NUM_CLASSES) def normalize(self, img_array): return (img_array - np.mean(img_array)) / np.std(img_array) * 16 + 64 def has_category(self, img_filepath, category_label_name, confidence): # 解析用 with load_img(img_filepath, target_size=(self.image_height, self.image_width)) as img: img_array = img_to_array(img) img_array = self.normalize(img_array) img_array = np.expand_dims(img_array, axis=0) img_array = preprocess_input(img_array) preds = self.model.predict(img_array, batch_size=1, verbose=1) results = self.bbox_util.detection_out(preds) if len(results) <= 0: return det_label = results[0][:, 0] det_conf = results[0][:, 1] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() for i in range(top_conf.shape[0]): label = int(top_label_indices[i]) label_name = self.voc_classes[label - 1] if category_label_name == label_name: return True return False
def __init__( self, class_number=21, input_shape=(300, 300, 3), priors_file='prior_boxes_ssd300.pkl', train_file='VOC2007.pkl', path_prefix='./VOCdevkit/VOC2007/JPEGImages/', model=None, weight_file='weights_SSD300.hdf5', freeze=('input_1', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'pool3'), save_weight_file='/src/resource/checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5', # noqa optim=None, batch_size=20, nb_worker=1): """ Setting below parameter :param class_number(int): class number :param input_shape(set): set input shape :param priors_file(str): set prior file name :param train_file(str): train file name :param path_prefix(str): path prefix :param model(keras model): set the keras model such as the ssd :param weight_file(str): weight file name :param freeze(set): set untraining layer """ self.input_shape = input_shape priors = pickle.load(open(priors_file, 'rb')) self.bbox_utils = BBoxUtility(class_number, priors) self.train_data = pickle.load(open(train_file, 'rb')) keys = sorted(self.train_data.keys()) num_train = int(round(0.8 * len(keys))) self.train_keys = keys[:num_train] self.val_keys = keys[num_train:] self.num_val = len(self.val_keys) self.batch_size = batch_size self.gen = Generator(self.train_data, self.bbox_utils, batch_size, path_prefix, self.train_keys, self.val_keys, (self.input_shape[0], self.input_shape[1]), do_crop=True) self.model = model model.load_weights(weight_file, by_name=True) self.freeze = list(freeze) self.save_weight_file = save_weight_file self.optim = optim self.nb_worker = nb_worker self.model.compile(optimizer=optim, metrics=['accuracy'], loss=MultiboxLoss(class_number, neg_pos_ratio=2.0).compute_loss)
def __init__( self, path_weights="/home/francisco/git/ssd_keras/weights_SSD300.hdf5"): config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.45 set_session(tf.Session(config=config)) self.labels = [ 'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor' ] NUM_CLASSES = len(self.labels) + 1 input_shape = (300, 300, 3) # Get detections with confidence higher than 0.6. self.detection_confidence = 0.6 self.model = SSD300(input_shape, num_classes=NUM_CLASSES) self.model.load_weights(path_weights, by_name=True) self.bbox_util = BBoxUtility(NUM_CLASSES) self.detections = []
def predict_img(numpy_array, orig_numpy_array): # Save the original image for attachment scipy.misc.imsave('temp_cat_water.jpg', np.uint8(orig_numpy_array)) # Number of voc_classes + 1 NUM_CLASSES = 3 input_shape = (300, 300, 3) # SSD model model = SSD300(input_shape, num_classes=NUM_CLASSES) model.load_weights('./model/weights.18-0.09.hdf5', by_name=True) bbox_util = BBoxUtility(NUM_CLASSES) # Inception v3 transfer learning model model_cnn = load_model(filepath='./model/model_v2.03-0.40.hdf5') ssd_img_size = 300 img_size = 299 inputs = [] images = [] images.append(orig_numpy_array) inputs.append(numpy_array.copy()) inputs = preprocess_input(np.array(inputs)) preds = model.predict(inputs, batch_size=1, verbose=0) results = bbox_util.detection_out(preds) # If the SSD model does not find an appropriate object, automatically return 0.00 for i, img in enumerate(images): if type(results[i]) is not list: ssd_img = ssd_image(img, results, i) resize_img = imresize(ssd_img, (img_size, img_size)) x = np.expand_dims(resize_img, axis=0) y_pred = model_cnn.predict(x) prediction = round(y_pred[0][0], 3) else: prediction = 0.00 return prediction
def __init__(self, class_names, model, input_shape, confidence): # {{{ self.class_names = class_names self.num_classes = len(class_names) self.model = model self.input_shape = input_shape self.confidence = confidence self.bbox_util = BBoxUtility(self.num_classes) self.next_ID = 0 # Create unique and somewhat visually distinguishable bright # colors for the different classes. self.class_colors = [] for i in range(0, self.num_classes): # This can probably be written in a more elegant manner hue = 255 * i / self.num_classes col = np.zeros((1, 1, 3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 # Saturation col[0][0][2] = 255 # Value cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) self.class_colors.append(col) # }}}
def main(img_paths): """ Detect objects in images. Parameters ---------- img_paths : list of strings """ # Load the model voc_classes = [ 'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor' ] NUM_CLASSES = len(voc_classes) + 1 input_shape = (300, 300, 3) model = SSD300(input_shape, num_classes=NUM_CLASSES) model.load_weights('weights_SSD300.hdf5', by_name=True) bbox_util = BBoxUtility(NUM_CLASSES) # Load the inputs inputs = [] images = [] for img_path in img_paths: img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) inputs = preprocess_input(np.array(inputs)) # Predict preds = model.predict(inputs, batch_size=1, verbose=1) results = bbox_util.detection_out(preds) # Visualize for i, img in enumerate(images): create_overlay(img, results[i], voc_classes, "{}-det.png".format(img_paths[i]))
def main(img_paths): """ Detect objects in images. Parameters ---------- img_paths : list of strings """ # Load the model voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'] NUM_CLASSES = len(voc_classes) + 1 input_shape = (300, 300, 3) model = SSD300(input_shape, num_classes=NUM_CLASSES) model.load_weights('weights_SSD300.hdf5', by_name=True) bbox_util = BBoxUtility(NUM_CLASSES) # Load the inputs inputs = [] images = [] for img_path in img_paths: img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) inputs = preprocess_input(np.array(inputs)) # Predict preds = model.predict(inputs, batch_size=1, verbose=1) results = bbox_util.detection_out(preds) # Visualize for i, img in enumerate(images): create_overlay(img, results[i], voc_classes, "{}-det.png".format(img_paths[i]))
def __init__(self): NUM_CLASSES = 3 + 1 input_shape = (300, 300, 3) config_string = rospy.get_param("/traffic_light_config") self.config = yaml.load(config_string) self.stop_line_positions = self.config['stop_line_positions'] # get path to resources path_to_resources = os.path.join( os.path.dirname(os.path.abspath(__file__)), '..', '..', '..', '..', 'tlc') # "prior boxes" in the paper priors = pickle.load( open(os.path.join(path_to_resources, 'prior_boxes_ssd300.pkl'), 'rb')) self.bbox_util = BBoxUtility(NUM_CLASSES, priors) # Traffic Light Classifier model and its weights self.model = SSD300(input_shape, num_classes=NUM_CLASSES) print(self.model.summary()) self.model.load_weights(os.path.join( path_to_resources, self.config['classifier_weights_file']), by_name=True) # prevent TensorFlow's ValueError when no raised backend dummy = np.zeros((1, 300, 300, 3)) _ = self.model.predict(dummy, batch_size=1, verbose=0) # prevent TensorFlow's ValueError when no raised backend dummy = np.zeros((1, 300, 300, 3)) _ = self.model.predict(dummy, batch_size=1, verbose=0) self.capture_images = False self.image_counts = {0: 0, 1: 0, 2: 0, 4: 0} self.last_classification = None
def init_model(weight_file='ssd_keras/SSD/weights_SSD300.hdf5'): np.set_printoptions(suppress=True) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.45 set_session(tf.Session(config=config)) NUM_CLASSES = len(voc_classes) + 1 input_shape = (300, 300, 3) model = SSD300(input_shape, num_classes=NUM_CLASSES) model.load_weights(weight_file, by_name=True) bbox_util = BBoxUtility(NUM_CLASSES) return model, bbox_util
def __init__(self, class_names, model, input_shape): self.class_names = class_names self.num_classes = len(class_names) self.model = model self.input_shape = input_shape self.bbox_util = BBoxUtility(self.num_classes) # Create unique and somewhat visually distinguishable bright # colors for the different classes. self.class_colors = [] for i in range(0, self.num_classes): # This can probably be written in a more elegant manner hue = 255*i/self.num_classes col = np.zeros((1,1,3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 # Saturation col[0][0][2] = 255 # Value cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) self.class_colors.append(col)
class ssdKeras(): def __init__(self): #self.node_name = "ssd_keras" #rospy.init_node(self.node_name) self.class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] self.num_classes = len(self.class_names) self.input_shape = (300,300,3) self.model = SSD(self.input_shape,num_classes=self.num_classes) self.model.load_weights('/home/abdulrahman/catkin_ws/src/victim_localization/resources/ssd_keras/weights_SSD300.hdf5') self.bbox_util = BBoxUtility(self.num_classes) self.conf_thresh = 0.7 self.model._make_predict_function() self.graph = tf.get_default_graph() self.detection_index=DL_msgs_boxes() # Create unique and somewhat visually distinguishable bright # colors for the different classes. self.class_colors = [] for i in range(0, self.num_classes): # This can probably be written in a more elegant manner hue = 255*i/self.num_classes col = np.zeros((1,1,3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 # Saturation col[0][0][2] = 255 # Value cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) self.class_colors.append(col) self.bridge = CvBridge() # Create the cv_bridge object self.image_sub = rospy.Subscriber("/image_raw_converted2", Image, self.detect_image,queue_size=1) # the appropriate callbacks self.box_coordinate_pub = rospy.Publisher("/ssd_detction/box", DL_msgs_boxes ,queue_size=5) # the appropriate callbacks def detect_image(self, ros_image): """ Runs the test on a video (or webcam) # Arguments conf_thresh: Threshold of confidence. Any boxes with lower confidence are not visualized. """ #### Use cv_bridge() to convert the ROS image to OpenCV format #### try: image_orig = self.bridge.imgmsg_to_cv2(ros_image, "bgr8") except CvBridgeError as e: print(e) ########## vidw = 1280.0 # change from cv2.cv.CV_CAP_PROP_FRAME_WIDTH vidh = 720.0 # change from cv2.cv.CV_CAP_PROP_FRAME_HEIGHT vidar = vidw/vidh #print(type(image_orig)) im_size = (self.input_shape[0], self.input_shape[1]) resized = cv2.resize(image_orig, im_size) rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) # Reshape to original aspect ratio for later visualization # The resized version is used, to visualize what kind of resolution # the network has to work with. to_draw = cv2.resize(resized, (1280, 720)) # Use model to predict inputs = [image.img_to_array(rgb)] tmp_inp = np.array(inputs) x = preprocess_input(tmp_inp) start_time = time.time() #debuggin with self.graph.as_default(): y = self.model.predict(x) #print("--- %s seconds_for_one_image ---" % (time.time() - start_time)) # This line creates a new TensorFlow device every time. Is there a # way to avoid that? results = self.bbox_util.detection_out(y) if len(results) > 0 and len(results[0]) > 0: # Interpret output, only one frame is used det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin = results[0][:, 2] det_ymin = results[0][:, 3] det_xmax = results[0][:, 4] det_ymax = results[0][:, 5] top_indices = [i for i, conf in enumerate(det_conf) if conf >= self.conf_thresh] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] #initiaze the detection msgs box_msg = DL_msgs_box() box_msg.xmin=0 box_msg.ymin=0 box_msg.xmax=0 box_msg.ymax=0 box_msg.Class="Non" # 100 reflect a non-class value self.detection_index.boxes.append(box_msg) print (top_xmin) for i in range(top_conf.shape[0]): self.detection_index.boxes[:]=[] xmin = int(round(top_xmin[i] * to_draw.shape[1])) ymin = int(round(top_ymin[i] * to_draw.shape[0])) xmax = int(round(top_xmax[i] * to_draw.shape[1])) ymax = int(round(top_ymax[i] * to_draw.shape[0])) #include the corner to be published box_msg = DL_msgs_box() box_msg.xmin=xmin box_msg.ymin=ymin box_msg.xmax=xmax box_msg.ymax=ymax box_msg.Class=self.class_names[int(top_label_indices[i])] self.detection_index.boxes.append(box_msg) # Draw the box on top of the to_draw image class_num = int(top_label_indices[i]) if (self.class_names[class_num]=="person"): cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), self.class_colors[class_num], 2) text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i]) text_top = (xmin, ymin-10) text_bot = (xmin + 80, ymin + 5) text_pos = (xmin + 5, ymin) cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1) cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) #cv2.circle(to_draw, (xmax, ymax),1,self.class_colors[class_num],30); self.detection_index.header = std_msgs.msg.Header() self.detection_index.header.stamp=rospy.Time.now() print (self.detection_index) self.box_coordinate_pub.publish(self.detection_index) self.detection_index.boxes[:]=[] #self.detection_index.boxes.clear() cv2.imshow("SSD result", to_draw) cv2.waitKey(1) def main(self): rospy.spin()
for i in range(nb_epoch): fp.write("%d\t%f\t%f\t%f\t%f\n" % (epochs, loss[i], acc[i], val_loss[i], val_acc[i])) plt.rcParams['figure.figsize'] = (8, 8) plt.rcParams['image.interpolation'] = 'nearest' np.set_printoptions(suppress=True) # 21 NUM_CLASSES = 21 #4 input_shape = (300, 300, 3) priors = pickle.load(open('prior_boxes_ssd300.pkl', 'rb')) bbox_util = BBoxUtility(NUM_CLASSES, priors) # gt = pickle.load(open('gt_pascal.pkl', 'rb')) gt = pickle.load(open('VOC2007.pkl', 'rb')) keys = sorted(gt.keys()) num_train = int(round(0.8 * len(keys))) train_keys = keys[:num_train] val_keys = keys[num_train:] num_val = len(val_keys) class Generator(object): def __init__(self, gt, bbox_util, batch_size,
class VideoTest(object): """ Class for testing a trained SSD model on a video file and show the result in a window. Class is designed so that one VideoTest object can be created for a model, and the same object can then be used on multiple videos and webcams. Arguments: class_names: A list of strings, each containing the name of a class. The first name should be that of the background class which is not used. model: An SSD model. It should already be trained for images similar to the video to test on. input_shape: The shape that the model expects for its input, as a tuple, for example (300, 300, 3) bbox_util: An instance of the BBoxUtility class in ssd_utils.py The BBoxUtility needs to be instantiated with the same number of classes as the length of class_names. """ def __init__(self, class_names, model, input_shape): self.class_names = class_names self.num_classes = len(class_names) self.model = model self.input_shape = input_shape self.bbox_util = BBoxUtility(self.num_classes) # Create unique and somewhat visually distinguishable bright # colors for the different classes. self.class_colors = [] for i in range(0, self.num_classes): # This can probably be written in a more elegant manner hue = 255 * i / self.num_classes col = np.zeros((1, 1, 3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 # Saturation col[0][0][2] = 255 # Value cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) self.class_colors.append(col) def run(self, video_path=0, start_frame=0, conf_thresh=0.6): """ Runs the test on a video (or webcam) # Arguments video_path: A file path to a video to be tested on. Can also be a number, in which case the webcam with the same number (i.e. 0) is used instead start_frame: The number of the first frame of the video to be processed by the network. conf_thresh: Threshold of confidence. Any boxes with lower confidence are not visualized. """ vid = cv2.VideoCapture(video_path) if not vid.isOpened(): raise IOError(( "Couldn't open video file or webcam. If you're " "trying to open a webcam, make sure you video_path is an integer!" )) # Compute aspect ratio of video vidw = vid.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH) vidh = vid.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT) vidar = vidw / vidh # Skip frames until reaching start_frame if start_frame > 0: vid.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() while True: retval, orig_image = vid.read() if not retval: print("Done!") return im_size = (self.input_shape[0], self.input_shape[1]) resized = cv2.resize(orig_image, im_size) rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) # Reshape to original aspect ratio for later visualization # The resized version is used, to visualize what kind of resolution # the network has to work with. to_draw = cv2.resize( resized, (int(self.input_shape[0] * vidar), self.input_shape[1])) # Use model to predict inputs = [image.img_to_array(rgb)] tmp_inp = np.array(inputs) x = preprocess_input(tmp_inp) y = self.model.predict(x) # This line creates a new TensorFlow device every time. Is there a # way to avoid that? results = self.bbox_util.detection_out(y) # Interpret output, only one frame is used det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin = results[0][:, 2] det_ymin = results[0][:, 3] det_xmax = results[0][:, 4] det_ymax = results[0][:, 5] top_indices = [ i for i, conf in enumerate(det_conf) if conf >= conf_thresh ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] for i in range(top_conf.shape[0]): xmin = int(round(top_xmin[i] * to_draw.shape[1])) ymin = int(round(top_ymin[i] * to_draw.shape[0])) xmax = int(round(top_xmax[i] * to_draw.shape[1])) ymax = int(round(top_ymax[i] * to_draw.shape[0])) # Draw the box on top of the to_draw image class_num = int(top_label_indices[i]) cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), self.class_colors[class_num], 2) text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i]) text_top = (xmin, ymin - 10) text_bot = (xmin + 80, ymin + 5) text_pos = (xmin + 5, ymin) cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1) cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1) # Calculate FPS # This computes FPS for everything, not just the model's execution # which may or may not be what you want curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 # Draw FPS in top left corner cv2.rectangle(to_draw, (0, 0), (50, 17), (255, 255, 255), -1) cv2.putText(to_draw, fps, (3, 10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1) cv2.imshow("SSD result", to_draw) cv2.waitKey(10)
config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.8 set_session(tf.Session(config=config)) voc_classes = [ 'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor' ] NUM_CLASSES = len(voc_classes) + 1 input_shape = (300, 300, 3) model = SSD300(input_shape, num_classes=NUM_CLASSES) model.load_weights('weights_SSD300.hdf5', by_name=True) bbox_util = BBoxUtility(NUM_CLASSES) from PIL import Image def get_rectangle(img_file, img_name, target_file, target_label): inputs = [] images = [] img_path = '{}/{}.jpg'.format(img_file, img_name) im = Image.open(img_path) img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) inputs = preprocess_input(np.array(inputs))
class VideoTest(object): """ Class for testing a trained SSD model on a video file and show the result in a window. Class is designed so that one VideoTest object can be created for a model, and the same object can then be used on multiple videos and webcams. Arguments: class_names: A list of strings, each containing the name of a class. The first name should be that of the background class which is not used. model: An SSD model. It should already be trained for images similar to the video to test on. input_shape: The shape that the model expects for its input, as a tuple, for example (300, 300, 3) bbox_util: An instance of the BBoxUtility class in ssd_utils.py The BBoxUtility needs to be instantiated with the same number of classes as the length of class_names. """ def __init__(self, class_names, model, input_shape): self.class_names = class_names self.num_classes = len(class_names) self.model = model self.input_shape = input_shape self.bbox_util = BBoxUtility(self.num_classes) # Create unique and somewhat visually distinguishable bright # colors for the different classes. self.class_colors = [] for i in range(0, self.num_classes): # This can probably be written in a more elegant manner hue = 255*i/self.num_classes col = np.zeros((1,1,3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 # Saturation col[0][0][2] = 255 # Value cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) self.class_colors.append(col) def run(self, video_path = 0, start_frame = 0, conf_thresh = 0.6): """ Runs the test on a video (or webcam) # Arguments video_path: A file path to a video to be tested on. Can also be a number, in which case the webcam with the same number (i.e. 0) is used instead start_frame: The number of the first frame of the video to be processed by the network. conf_thresh: Threshold of confidence. Any boxes with lower confidence are not visualized. """ vid = cv2.VideoCapture(video_path) if not vid.isOpened(): raise IOError(("Couldn't open video file or webcam. If you're " "trying to open a webcam, make sure you video_path is an integer!")) # Compute aspect ratio of video vidw = vid.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH) vidh = vid.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT) vidar = vidw/vidh # Skip frames until reaching start_frame if start_frame > 0: vid.set(cv2.cv.CV_CAP_PROP_POS_MSEC, start_frame) accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() while True: retval, orig_image = vid.read() if not retval: print("Done!") return im_size = (self.input_shape[0], self.input_shape[1]) resized = cv2.resize(orig_image, im_size) rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) # Reshape to original aspect ratio for later visualization # The resized version is used, to visualize what kind of resolution # the network has to work with. to_draw = cv2.resize(resized, (int(self.input_shape[0]*vidar), self.input_shape[1])) # Use model to predict inputs = [image.img_to_array(rgb)] tmp_inp = np.array(inputs) x = preprocess_input(tmp_inp) y = self.model.predict(x) # This line creates a new TensorFlow device every time. Is there a # way to avoid that? results = self.bbox_util.detection_out(y) if len(results) > 0 and len(results[0]) > 0: # Interpret output, only one frame is used det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin = results[0][:, 2] det_ymin = results[0][:, 3] det_xmax = results[0][:, 4] det_ymax = results[0][:, 5] top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] for i in range(top_conf.shape[0]): xmin = int(round(top_xmin[i] * to_draw.shape[1])) ymin = int(round(top_ymin[i] * to_draw.shape[0])) xmax = int(round(top_xmax[i] * to_draw.shape[1])) ymax = int(round(top_ymax[i] * to_draw.shape[0])) # Draw the box on top of the to_draw image class_num = int(top_label_indices[i]) cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), self.class_colors[class_num], 2) text = self.class_names[class_num] + " " + ('%.2f' % top_conf[i]) text_top = (xmin, ymin-10) text_bot = (xmin + 80, ymin + 5) text_pos = (xmin + 5, ymin) cv2.rectangle(to_draw, text_top, text_bot, self.class_colors[class_num], -1) cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) # Calculate FPS # This computes FPS for everything, not just the model's execution # which may or may not be what you want curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 # Draw FPS in top left corner cv2.rectangle(to_draw, (0,0), (50, 17), (255,255,255), -1) cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) cv2.imshow("SSD result", to_draw) cv2.waitKey(10)
def main(dataset, run, input_shape, seq_start, seq_stop, videopath, conf_thresh, i_seq, outname, batch_size): print_flush("> Predicting...") classes = get_classnames(dataset) masker = Masker(dataset) input_shape = parse_resolution(input_shape) num_classes = len(classes)+1 model = get_model(dataset, run, input_shape, num_classes, verbose=False) priors = get_priors(model, input_shape) bbox_util = BBoxUtility(num_classes, priors) width = input_shape[0] height = input_shape[1] inputs = [] outputs = [] old_frame = None with io.get_reader(videopath) as vid: vlen = len(vid) for i_in_seq in range(seq_start, seq_stop): if i_in_seq < vlen: frame = vid.get_data(i_in_seq) frame = masker.mask(frame) old_frame = frame else: frame = old_frame resized = cv2.resize(frame, (width, height)) inputs.append(resized) if len(inputs) == batch_size: inputs2 = np.array(inputs) inputs2 = inputs2.astype(np.float32) inputs2 = preprocess_input(inputs2) y = model.predict_on_batch(inputs2) outputs.append(y) inputs = [] preds = np.vstack(outputs) print_flush("> Processing...") all_detections = [] seq_len = seq_stop - seq_start for i in range(seq_len): frame_num = i + seq_start if frame_num < vlen: pred = preds[i, :] pred = pred.reshape(1, pred.shape[0], pred.shape[1]) results = bbox_util.detection_out(pred, soft=False) detections = process_results(results, width, height, classes, conf_thresh, frame_num) all_detections.append(detections) dets = pd.concat(all_detections) # For the first line, we should open in write mode, and then in append mode # This way, we still overwrite the files if this script is run multiple times open_mode = 'a' include_header = False if i_seq == 0: open_mode = 'w' include_header = True print_flush("> Writing to {} ...".format(outname)) with open(outname, open_mode) as f: dets.to_csv(f, header=include_header)
NUM_CLASSES = 21 args = parser.parse_args() with open(args.path_to_settings, 'r') as fp: sets = yaml.safe_load(fp) input_shape = (sets['img_height'], sets['img_width'], 3) batch_size = sets['batch_size'] priors = pickle.load( open( os.path.join(dir_path, 'priorFiles/prior_boxes_ssd300MobileNetV2.pkl'), 'rb')) bbox_util = BBoxUtility(NUM_CLASSES, priors) gt = pickle.load(open(os.path.join(dir_path, 'voc_2007.pkl'), 'rb')) keys = sorted(gt.keys()) num_train = int(round(0.8 * len(keys))) train_keys = keys[:num_train] val_keys = keys[num_train:] num_val = len(val_keys) path_prefix = os.path.join(sets['dataset_dir'], 'VOC2007/JPEGImages/') gen = Generator(gt, bbox_util, batch_size, path_prefix, train_keys, val_keys, (input_shape[0], input_shape[1]),
# In[2]: voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'] NUM_CLASSES = len(voc_classes) + 1 # In[3]: input_shape = (300, 300, 3) model = SSD300v2(input_shape, num_classes=NUM_CLASSES) model.load_weights('weights_SSD300.hdf5', by_name=True) bbox_util = BBoxUtility(NUM_CLASSES) # In[4]: inputs = [] images = [] img_path = './pics/fish-bike.jpg' img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) img_path = './pics/cat.jpg' img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path))
class DetectorSSD(Detector): def __init__( self, path_weights="/home/francisco/git/ssd_keras/weights_SSD300.hdf5"): config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.45 set_session(tf.Session(config=config)) self.labels = [ 'Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse', 'Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor' ] NUM_CLASSES = len(self.labels) + 1 input_shape = (300, 300, 3) # Get detections with confidence higher than 0.6. self.detection_confidence = 0.6 self.model = SSD300(input_shape, num_classes=NUM_CLASSES) self.model.load_weights(path_weights, by_name=True) self.bbox_util = BBoxUtility(NUM_CLASSES) self.detections = [] def detect(self, cvImage): inputs = [] images = [] cvImage = cv2.cvtColor(cvImage, cv2.COLOR_BGR2RGB) img = cv2.resize(cvImage, (300, 300)) images.append(cvImage) inputs.append(img.copy().astype(np.float)) inputs = preprocess_input(np.array(inputs)) preds = self.model.predict(inputs, batch_size=1, verbose=0) results = self.bbox_util.detection_out(preds) for i, img in enumerate(images): # Parse the outputs. det_label = results[i][:, 0] det_conf = results[i][:, 1] det_xmin = results[i][:, 2] det_ymin = results[i][:, 3] det_xmax = results[i][:, 4] det_ymax = results[i][:, 5] # Get detections with confidence higher than 0.6. top_indices = [ i for i, conf in enumerate(det_conf) if conf >= self.detection_confidence ] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] self.detections = [] for i in range(top_conf.shape[0]): xmin = int(round(top_xmin[i] * img.shape[1])) ymin = int(round(top_ymin[i] * img.shape[0])) xmax = int(round(top_xmax[i] * img.shape[1])) ymax = int(round(top_ymax[i] * img.shape[0])) score = float(top_conf[i]) label = int(top_label_indices[i]) self.detections.append([label, score, xmin, ymin, xmax, ymax]) return self.detections
def train_SSD300_NAG( master_file, train_dir, test_dir, model_path, load_weights_path=r'C:\Users\shingo\jupyter_notebook\tfgpu_py36_work\AI_Edge_Contest\object_detection\SSD_classes_py\all_SSD_module\SSD\weights_SSD300.hdf5', epochs=20, batch_size=32, base_lr=1e-3, num_classes=6 + 1, callback=[]): """ dtc_train.py のパラメータなどを引数にした関数 ラベル情報のcsvファイルから訓練画像の領域情報ロードし、SSDのモデル作成する ※csvファイルからラベル情報読めるのが良いところ(一般的な物体検出モデルのラベル情報は1画像1xmlファイル) 画像のサイズは300x300に変換される(ssd_vgg.pyより) 分類器はVGG16のfine-tuning オプティマイザは ネステロフ+モメンタム+SGD(decayあり). 学習率はLearningRateScheduler でも下げる Args: master_file : 正解の座標(ファイル名, x, y, width, height, ラベルid)一覧のcsvファイルパス. SSDの「背景」ラベルとして使われるため、ラベルidは0を使わないこと!!! train_dir : 訓練用画像が入っているフォルダパス test_dir : 評価用画像が入っているフォルダパス model_path : モデルファイルの保存先パス load_weights_path : 重みファイルのパス epochs : エポック数 batch_size : バッチサイズ base_lr : 学習率初期値 num_classes : クラス数。クラス数は「背景(class_id=0固定)」と「分類したいクラス」の数(要するにクラス数+1)にしないと正しくできない!!!! callback: 追加するcallbackのリスト。空なら ModelCheckpoint と LearningRateScheduler だけの callback にになる Return: なし(モデルファイルweight_ssd_best.hdf5 出力) """ #epochs = 20 # エポック数 #batch_size = 32 # バッチサイズ #base_lr = 1e-3 # 学習率初期値 #num_classes = 11 # 最適化関数 # optimizer = keras.optimizers.Adam(lr=base_lr) # optimizer = keras.optimizers.RMSprop(lr=base_lr) optimizer = keras.optimizers.SGD(lr=base_lr, momentum=0.9, decay=1e-6, nesterov=True) # 学習率のスケジュール関数 def schedule(epoch, decay=0.90): return base_lr * decay**(epoch) # 正解の座標(ファイル名, x, y, width, height)一覧のcsvファイル #master_file = "xywh_train.csv" # 訓練用画像が入っているフォルダ #train_dir = "ssd_train" # 評価用画像が入っているフォルダ #test_dir = "ssd_test" # 画像ファイル名を指定すると正解座標が返ってくる辞書を作成 correct_boxes = get_correct_boxes(master_file, train_dir, test_dir, num_classes=num_classes) # 画像ファイルパス一覧取得 train_path_list = glob.glob(os.path.join(train_dir, "*.*")) test_path_list = glob.glob(os.path.join(test_dir, "*.*")) ## 画像ファイルパス一覧取得 #train_path_list = [] #test_path_list = [] #for folder in glob.glob(os.path.join(train_dir, "*")): # for file in glob.glob(os.path.join(folder, "*.jpg")): # train_path_list.append(file) #for folder in glob.glob(os.path.join(test_dir, "*")): # for file in glob.glob(os.path.join(folder, "*.jpg")): # test_path_list.append(file) # モデル作成 model = create_model(num_classes=num_classes) print('create_model ok') model.load_weights(load_weights_path, by_name=True) print('load_weights ok') # 入力付近の層をフリーズ freeze_layers(model, depth_level=1) print('freeze_layers ok') model.compile(optimizer=optimizer, loss=MultiboxLoss(num_classes).compute_loss) #model.summary() plot_model(model, os.path.join(os.path.dirname(model_path), "model_ssd.png")) # デフォルトボックス作成 priors = create_prior_box() # 画像データのジェネレータ作成 bbox_util = BBoxUtility(num_classes, priors) gen = Generator(correct_boxes, bbox_util, train_path_list, test_path_list, (input_shape[0], input_shape[1]), batch_size) print("Train Items : {}".format(gen.train_batches)) print("Test Items : {}".format(gen.val_batches)) # コールバック設定 callbacks = [ ModelCheckpoint(model_path, verbose=1, save_weights_only=True, save_best_only=True) ] #, LearningRateScheduler(schedule)] if len(callback) != 0: callbacks.extend(callback) #print(model.summary()) # 学習開始 start_time = time.time() history = model.fit_generator(gen.generate(True), gen.train_batches // batch_size, epochs=epochs, verbose=2, callbacks=callbacks, validation_data=gen.generate(False), validation_steps=gen.val_batches // batch_size) end_time = time.time() # 経過時間表示 elapsed_time = end_time - start_time print("Elapsed Time : {0:d} hr {1:d} min {2:d} sec".format( int(elapsed_time // 3600), int((elapsed_time % 3600) // 60), int(elapsed_time % 60))) return history