def __init__(self, argv): app_name = os.path.basename(argv[0]) name, _ = app_name.split(".") inifile = name + ".ini" print("inifile {}".format(inifile)) parser = configparser.ConfigParser() parser.read(inifile) self.weightfile = parser.get("WEIGHT_FILE", "filename") self.showimage = int(parser.get("SHOW_IMAGE", "show")) self.confidence = float(parser.get("DETECTION", "confidence") ) # For example, this may take a string C:/ssd_keras/weights_SSD300.hdf5 self.classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'] self.n_classes = len(self.classes) + 1 self.input_shape = (300, 300, 3) self.model = SSD300v2(self.input_shape, num_classes=self.n_classes) self.model.load_weights(self.weightfile, by_name=True) self.bbox_util = BBoxUtility(self.n_classes)
def feature_flow(): bbox_util = BBoxUtility(NUM_CLASSES) raw_inputs, images = load_inputs(image_files) inputs = preprocess_input(np.array(raw_inputs)) dump_activation_layer = 'conv4_2' compare_layer_name = 'conv6_2' print('dump_activation_layer', dump_activation_layer) print('target_layer_name', compare_layer_name) # normal SSD network model1 = SSD300v2(input_shape, num_classes=NUM_CLASSES) model1.load_weights('weights_SSD300.hdf5', by_name=True) predictions = run_network(model1, inputs) results = bbox_util.detection_out(predictions) plot_detections(images, results) # get dump layer's output (as input for flow network) input_img2 = inputs[1:2, :, :, :] layer_dump = get_layer_output(model=model1, inputs=input_img2, output_layer_name=dump_activation_layer) print('layer_dump.shape = ', layer_dump.shape) # flow (raw rgb) flow_rgb = compute_flow(image_files[1], image_files[0]) print('flow.shape', flow_rgb.shape) imshow_fig(cv2.cvtColor(draw_hsv(flow_rgb), cv2.COLOR_BGR2RGB), title='flow_rgb') # flow (re-sized for feature map) flow_feature = get_flow_for_filter(flow_rgb) # imshow_fig(flow_feature[:, :, 0], title='flow_feature_y', cmap='gray') # imshow_fig(flow_feature[:, :, 1], title='flow_feature_x', cmap='gray') # warp image by flow_rgb iimg1 = cv2.imread(image_files[0]) img_warp = warp_flow(iimg1, flow_rgb) imshow_fig(cv2.cvtColor(img_warp, cv2.COLOR_BGR2RGB), title='frame_2_warp') # shift feature shifted_feature = shift_filter(layer_dump, flow_feature) # flow net model2 = SSD300_conv4_3((128, 128, 512), num_classes=NUM_CLASSES) model2.load_weights('weights_SSD300.hdf5', by_name=True) predictions = run_network(model2, shifted_feature) results = bbox_util.detection_out(predictions) plot_detections(images[1:2], results) # get specific layer's output and compare them (for debugging) compare_model_layer(model1, input_img2, compare_layer_name, model2, shifted_feature, compare_layer_name, True) sess.close() plt.show()
def initialize(weight_file_path): np.set_printoptions(suppress=True) config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.45 set_session(tf.Session(config=config)) input_shape = (300, 300, 3) model = SSD300v2(input_shape, num_classes=NUM_CLASSES) model.load_weights(weight_file_path, by_name=True) return model
tmp_targets = np.array(targets) inputs = [] targets = [] yield preprocess_input(tmp_inp), tmp_targets path_prefix = './VOCdevkit/VOC2007/JPEGImages/' gen = Generator(gt, bbox_util, 4, path_prefix, train_keys, val_keys, (input_shape[0], input_shape[1]), do_crop=False) model = SSD300v2(input_shape, num_classes=NUM_CLASSES) #model.load_weights('weights_SSD300.hdf5', by_name=True) model.load_weights('./checkpoints/weights.01-4.35.hdf5', by_name=True) freeze = [ 'input_1', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'pool3' ] #, # 'conv4_1', 'conv4_2', 'conv4_3', 'pool4'] """ for L in model.layers: if L.name in freeze: L.trainable = False """
def main(): rospy.init_node('detect_pkg', anonymous=True) rospy.sleep(0.5) publisher() gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config = tf.ConfigProto(gpu_options=gpu_options) sess = tf.Session(config=config) K.set_session(sess) np.set_printoptions(suppress=True) voc_classes = ['Aeroplane', 'Bicycle', 'Bird', 'Boat', 'Bottle', 'Bus', 'Car', 'Cat', 'Chair', 'Cow', 'Diningtable', 'Dog', 'Horse','Motorbike', 'Person', 'Pottedplant', 'Sheep', 'Sofa', 'Train', 'Tvmonitor'] NUM_CLASSES = len(voc_classes) + 1 class_colors = [] for i in range(0, len(voc_classes)): # This can probably be written in a more elegant manner hue = 255*i/len(voc_classes) col = np.zeros((1,1,3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 # Saturation col[0][0][2] = 255 # Value cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) class_colors.append(col) # global dict_1,dict_2 # dict_1 = 0 # dict_2 = 0 ic = image_converter() icp = image_converter_pointcloud() r = rospy.Rate(50) rospy.sleep(0.2) img_shape = ic.zed_image.shape img_shape = (360, 640, 3) input_shape = img_shape print('---------initialization model...please wait----------') model = SSD300v2(input_shape, num_classes=NUM_CLASSES) model.load_weights('/home/ogai1234/catkin_ws/src/detect_pkg/bin/weights_SSD300.hdf5', by_name=True) print('---------model done----------') bbox_util = BBoxUtility(NUM_CLASSES) print('after loading') key = '' counter = 0 t0 = time.time() frame_code = 1 while (key != 113) and (not rospy.is_shutdown()): frame_code = frame_code + 1 print("_________________________Frame:",frame_code) counter = counter + 1 image = ic.zed_image frame = image frame = frame[:,:,0:3] res=cv2.resize(frame,(640,360)) img_old = res img = keras_image.img_to_array(res) img = img[np.newaxis, :,:,:] #inputs.append(img.copy()) #inputs = preprocess_input(np.array(inputs)) inputs = preprocess_input(img) t1 = time.time() preds = model.predict(inputs) results = bbox_util.detection_out(preds) #results = bbox_util.detection_out(preds,keep_top_k=200,confidence_threshold=0.8) #results = detection_out(preds) #print(results) det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin = results[0][:, 2] det_ymin = results[0][:, 3] det_xmax = results[0][:, 4] det_ymax = results[0][:, 5] global x,y,z,top_conf,type_code,distance,confidence,objPerFrame top_indices = [i for i, conf in enumerate(det_conf) if conf >= 0.8] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] objPerFrame=0 global type_code_list,confidence_list,distance_list,x_list,y_list,z_list type_code_list=[0,0,0,0,0,0,0,0,0,0,0] confidence_list=[0,0,0,0,0,0,0,0,0,0,0] distance_list=[0,0,0,0,0,0,0,0,0,0,0] x_list=[0,0,0,0,0,0,0,0,0,0,0] y_list=[0,0,0,0,0,0,0,0,0,0,0] z_list=[0,0,0,0,0,0,0,0,0,0,0] # list store information of each object in one frame for i in top_indices: xmin = int(round(top_xmin[i] * img_old.shape[1])) ymin = int(round(top_xmin[i] * img_old.shape[0])) xmax = int(round(top_xmax[i] * img_old.shape[1])) ymax = int(round(top_ymax[i] * img_old.shape[0])) class_num = int(top_label_indices[i]) x_center = round((xmin+xmax) / 2) y_center = round((ymin+ymax) / 2) # print('11111111111:icp.dict_1 = :',icp.dict_1) # print('11111111111:icp.dict_2 = :',icp.dict_2) icp.dict_1=x_center icp.dict_2=y_center # print('22222222222:icp.dict_1 = :',icp.dict_1) # print('22222222222:icp.dict_2 = :',icp.dict_2) rospy.sleep(0.04) # print('4444444444:icp.dict_1 = :',icp.dict_1) # print('4444444444:icp.dict_2 = :',icp.dict_2) point_cloud = icp.zed_image_pointcloud # print('5555555555:icp.dict_1 = :',icp.dict_1) # print('5555555555:icp.dict_2 = :',icp.dict_2) # print(point_cloud) x,y,z = 0,0,0 #x,y,z is the position obtained from pointcloud2 for p in point_cloud: x,y,z = p break distance = math.sqrt(x*x+y*y+z*z) if voc_classes[class_num-1] == "Person": type_code = 1 #'%.2f' % Is to retain 2 decimal places confidence = round(top_conf[i],2) objPerFrame = objPerFrame + 1 #stuck into list for ROS transfer type_code_list[objPerFrame]=type_code confidence_list[objPerFrame]=confidence distance_list[objPerFrame]=round(distance,2) x_list[objPerFrame]=round(x,2) y_list[objPerFrame]=round(y,2) z_list[objPerFrame]=round(z,2) cv2.rectangle(img_old, (xmin, ymin), (xmax, ymax), class_colors[class_num-1], 4) text = voc_classes[class_num-1] + " " + ('%.2f' % top_conf[i])+ " "+("distance: %.2f" %(distance))+ " "+("x: %.2f" %(x)) + " "+("y: %.2f" %(y)) print("Person") print('confidence:',confidence) print('distance',distance_list[objPerFrame]) print('x:',x_list[objPerFrame]) print('y:',y_list[objPerFrame]) print('z:',z_list[objPerFrame]) # talker() text_top = (xmin, ymin - 10) text_bot = (xmin + 280, ymin + 5) text_pos = (xmin + 5, ymin) cv2.rectangle(img_old, text_top, text_bot, class_colors[class_num-1], -1) cv2.putText(img_old, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1) elif voc_classes[class_num-1] == "Car": type_code = 2 confidence = round(top_conf[i],2) objPerFrame = objPerFrame + 1 #stuck into list for ROS transfer type_code_list[objPerFrame]=type_code confidence_list[objPerFrame]=confidence distance_list[objPerFrame]=round(distance,2) x_list[objPerFrame]=round(x,2) y_list[objPerFrame]=round(y,2) z_list[objPerFrame]=round(z,2) cv2.rectangle(img_old, (xmin, ymin), (xmax, ymax), class_colors[class_num-1], 4) text = voc_classes[class_num-1] + " " + ('%.2f' % top_conf[i])+ " "+("distance: %.2f" %(distance))+ " "+("x: %.2f" %(x)) + " "+("y: %.2f" %(y)) print("Car") print('confidence:',confidence) print('distance',distance_list[objPerFrame]) print('x:',x_list[objPerFrame]) print('y:',y_list[objPerFrame]) print('z:',z_list[objPerFrame]) # talker() text_top = (xmin, ymin - 10) text_bot = (xmin + 280, ymin + 5) text_pos = (xmin + 5, ymin) cv2.rectangle(img_old, text_top, text_bot, class_colors[class_num-1], -1) cv2.putText(img_old, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0, 0, 0), 1) cv2.imshow("success!", img_old) key = cv2.waitKey(1) print("There are",objPerFrame,"objects in this frame") t21 = time.time() print('fps {:f}'.format( 1 / (t21 - t1))) talker()
def main(): parser = argparse.ArgumentParser( description="Training ssd model with keras") parser.add_argument("-c", "--class_number", metavar="class_number", type=int, default=21, dest="class_number", help="set the classify number ") parser.add_argument("-b", "--prior_boxes_ssd300", metavar="prior_boxes_ssd300", type=str, default='prior_boxes_ssd300.pkl', dest="prior_boxes_ssd300", help="set the prior boxes file") parser.add_argument("-t", "--train_file", metavar="train_file", type=str, default='test_VOC.pkl', dest="train_file", help="set the train file") parser.add_argument("-p", "--path_prefix", metavar="path_prefix", type=str, default='./tests/test_data/JPEGImages/', dest="path_prefix", help="set the path prefix") parser.add_argument("-w", "--weight_file", metavar="weight_file", type=str, default='try_weights_SSD300.hdf5', dest="weight_file", help="set the weight file") parser.add_argument( "-s", "--save_weight_file", metavar="save_weight_file", type=str, default= './resource/checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5', dest="save_weight_file", help="set the save weight file") parser.add_argument("-n", "--nb_epoch", metavar="nb_epoch", type=int, default=1, dest="nb_epoch", help="set the number of epoch") args = parser.parse_args() input_shape = (300, 300, 3) model = SSD300v2(input_shape, num_classes=args.class_number) base_lr = 3e-4 trainer = Trainer(class_number=args.class_number, input_shape=input_shape, priors_file=args.prior_boxes_ssd300, train_file=args.train_file, path_prefix=args.path_prefix, model=model, weight_file=args.weight_file, freeze=('input_1', 'conv1_1', 'conv1_2', 'pool1', 'conv2_1', 'conv2_2', 'pool2', 'conv3_1', 'conv3_2', 'conv3_3', 'pool3'), save_weight_file=args.save_weight_file, optim=keras.optimizers.Adam(lr=base_lr), batch_size=1, nb_worker=1) graphviz = GraphvizOutput(output_file='ssd_speed.png') with PyCallGraph(output=graphviz): trainer.train(nb_epoch=args.nb_epoch)
def frames(): video_path = 0 start_frame = 0 conf_thresh = 0.6 input_shape = (480,300,3) class_names = ["background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] NUM_CLASSES = len(class_names) num_classes=NUM_CLASSES class_colors = [] for i in range(0, num_classes): hue = 255*i/num_classes col = np.zeros((1,1,3)).astype("uint8") col[0][0][0] = hue col[0][0][1] = 128 # Saturation col[0][0][2] = 255 # Value cvcol = cv2.cvtColor(col, cv2.COLOR_HSV2BGR) col = (int(cvcol[0][0][0]), int(cvcol[0][0][1]), int(cvcol[0][0][2])) class_colors.append(col) bbox_util = BBoxUtility(num_classes) model = SSD(input_shape, num_classes=NUM_CLASSES) model.load_weights('weights_SSD300.hdf5') INTERVAL= 33 # 待ち時間 FRAME_RATE = 20 # fps ORG_WINDOW_NAME = "org" #GRAY_WINDOW_NAME = "gray" #OUT_FILE_NAME = "real_SSD_result.mp4" vid = cv2.VideoCapture(Camera.video_source) width, height = input_shape[0], input_shape[1] #input_shape """ out = cv2.VideoWriter(OUT_FILE_NAME, \ cv_fourcc('M', 'P', '4', 'V'), \ FRAME_RATE, \ (width, height), \ True) """ if not vid.isOpened(): raise IOError(("Couldn't open video file or webcam. If you're " "trying to open a webcam, make sure you video_path is an integer!")) vidw = vid.get(cv2.CAP_PROP_FRAME_WIDTH) vidh = vid.get(cv2.CAP_PROP_FRAME_HEIGHT) vidar = vidw/vidh """ if start_frame > 0: vid.set(cv2.CAP_PROP_POS_MSEC, start_frame) """ accum_time = 0 curr_fps = 0 fps = "FPS: ??" prev_time = timer() start_time=prev_time #cv2.namedWindow(ORG_WINDOW_NAME) while True: retval, orig_image = vid.read() if not retval: print("Done!") return im_size = (input_shape[1], input_shape[0]) resized = cv2.resize(orig_image, im_size) rgb = cv2.cvtColor(resized, cv2.COLOR_BGR2RGB) to_draw = cv2.resize(resized, (int(input_shape[1]*vidar), input_shape[0])) inputs = [image.img_to_array(rgb)] #rgb tmp_inp = np.array(inputs) x = preprocess_input(tmp_inp) y = model.predict(x) results = bbox_util.detection_out(y) if len(results) > 0 and len(results[0]) > 0: det_label = results[0][:, 0] det_conf = results[0][:, 1] det_xmin = results[0][:, 2] det_ymin = results[0][:, 3] det_xmax = results[0][:, 4] det_ymax = results[0][:, 5] top_indices = [i for i, conf in enumerate(det_conf) if conf >= conf_thresh] top_conf = det_conf[top_indices] top_label_indices = det_label[top_indices].tolist() top_xmin = det_xmin[top_indices] top_ymin = det_ymin[top_indices] top_xmax = det_xmax[top_indices] top_ymax = det_ymax[top_indices] for i in range(top_conf.shape[0]): xmin = int(round(top_xmin[i] * to_draw.shape[1])) ymin = int(round(top_ymin[i] * to_draw.shape[0])) xmax = int(round(top_xmax[i] * to_draw.shape[1])) ymax = int(round(top_ymax[i] * to_draw.shape[0])) class_num = int(top_label_indices[i]) cv2.rectangle(to_draw, (xmin, ymin), (xmax, ymax), class_colors[class_num], 2) #to_draw text = class_names[class_num] + " " + ('%.2f' % top_conf[i]) text_top = (xmin, ymin-10) text_bot = (xmin + 80, ymin + 5) text_pos = (xmin + 5, ymin) cv2.rectangle(to_draw, text_top, text_bot, class_colors[class_num], -1) #to_draw cv2.putText(to_draw, text, text_pos, cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) #to_draw print(text," ") curr_time = timer() exec_time = curr_time - prev_time prev_time = curr_time accum_time = accum_time + exec_time curr_fps = curr_fps + 1 if accum_time > 1: accum_time = accum_time - 1 fps = "FPS: " + str(curr_fps) curr_fps = 0 cv2.rectangle(to_draw, (0,0), (50, 17), (255,255,255), -1) #to_draw cv2.putText(to_draw, fps, (3,10), cv2.FONT_HERSHEY_SIMPLEX, 0.35, (0,0,0), 1) #to_draw #yield cv2.imencode('.jpg', to_draw)[1].tobytes() to_draw = cv2.resize(to_draw, (int(input_shape[0]*1), input_shape[1])) #cv2.imshow(ORG_WINDOW_NAME, to_draw) #to_draw #out.write(to_draw) #add to_draw if cv2.waitKey(INTERVAL)>= 0: # & 0xFF == ord('q'): break #elif curr_time-start_time>=60: # break yield cv2.imencode('.jpg', to_draw)[1].tobytes() vid.release() #add #out.release() #add cv2.destroyAllWindows() #add
config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.7 set_session(tf.Session(config=config)) # In[2]: voc_classes = ['rod'] num_classes = len(voc_classes) + 1 # In[3]: input_shape = (300, 300, 3) model = SSD300v2(input_shape, num_classes=num_classes) model.load_weights('weights.30-1.98.hdf5', by_name=True) bbox_util = BBoxUtility(num_classes) # In[4]: inputs = [] images = [] img_path = './pics/000001.jpg' img = image.load_img(img_path, target_size=(300, 300)) img = image.img_to_array(img) images.append(imread(img_path)) inputs.append(img.copy()) img_path = './pics/000002.jpg' img = image.load_img(img_path, target_size=(300, 300))
import sys sys.path.append("..") from ssd_v2 import SSD300v2 as SSD input_shape = (640, 480, 3) # Change this if you run with other classes than VOC class_names = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] NUM_CLASSES = len(class_names) model = SSD(input_shape, num_classes=NUM_CLASSES) # Change this path if you want to use your own trained weights model.load_weights('weights_SSD300.hdf5') vid_test = VideoTest(class_names, model, input_shape) # To test on webcam 0, remove the parameter (or change it to another number # to test on that webcam) #vid_test.run(0) for i in range(1): print(i) #vid_test.run('1.png') #doga_nogisaka.mp4') #time.sleep(1.5) #vid_test.run(0)
from utils.videotest import VideoTest import sys sys.path.append("..") # from ssd import SSD300 as SSD from ssd_v2 import SSD300v2 as SSD input_shape = (300, 300, 3) # Change this if you run with other classes than VOC class_names = [ "background", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor" ] NUM_CLASSES = len(class_names) model = SSD(input_shape, num_classes=NUM_CLASSES) # Change this path if you want to use your own trained weights model.load_weights('../data/weights_SSD300.hdf5') vid_test = VideoTest(class_names, model, input_shape) # To test on webcam 0, remove the parameter (or change it to another number # to test on that webcam) vid_test.run('path/to/your/video.mkv')
def setUp(self): self.class_number = 21 self.input_shape = (300, 300, 3) self.model = SSD300v2(self.input_shape, num_classes=self.class_number)