def freeze_graph(): with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, meta_path[:-5]) output_graph_def = tf.graph_util.convert_variables_to_constants( sess, sess.graph.as_graph_def(), #input_graph_def, output_node_names, ) with tf.gfile.GFile(output_graph, "wb") as f: f.write(output_graph_def.SerializeToString())
def __init__(self): self.counter = 0 # number of people self.violation = 0 # number of violations # The path of the anchor txt file. anchor_path = "./data/yolo_anchors.txt" # Resize the input image with `new_size`, size format: [width, height] self.new_size = [416, 416] # Whether to use the letterbox resize. self.letterbox_resizes = True # The path of the class names class_name_path = "./data/coco.names" # The path of the weights to restore restore_path = "./checkpoint/best_model_Epoch_75_step_29487_mAP_0.8609_loss_5.4903_lr_1e-05" # Whether to save the video detection results. self.save_video = True self.anchors = parse_anchors(anchor_path) self.classes = read_class_names(class_name_path) self.num_class = len(self.classes) self.color_table = get_color_table( self.num_class) # color for each label self.tracker = Sort() self.memory = {} self.COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") # tracker color self.sess = tf.Session() self.input_data = tf.compat.v1.placeholder( tf.float32, [1, self.new_size[1], self.new_size[0], 3], name='input_data') yolo_model = yolov3(self.num_class, self.anchors) with tf.compat.v1.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(self.input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict( pred_feature_maps) pred_scores = pred_confs * pred_probs self.boxes, self.scores, self.labels = gpu_nms(pred_boxes, pred_scores, self.num_class, max_boxes=200, score_thresh=0.6, nms_thresh=0.01) saver = tf.compat.v1.train.Saver() saver.restore(self.sess, restore_path)
def main(): # Dataset Info num_classes = len(config.voc_classes) # Create model model = yolov3((None, None, 3), num_classes=num_classes, training=True) model.summary() # Load Weights model.load_weights(config.yolo_weights, by_name=True) # Freeze layers trainable_model(model, trainable=False) model.get_layer('conv2d_last_layer1_20').trainable = True model.get_layer('conv2d_last_layer2_20').trainable = True model.get_layer('conv2d_last_layer3_20').trainable = True step = 1 if step == 1: batch_size = 30 epoch_step = 1 else: batch_size = 8 epoch_step = 1 start_epoch = 0 # for lr in [1e-3, 1e-3, 1e-4]: memory_used = [] train_data_dict, val_data = create_multi_scale_dataset(batch_size) for lr in [1e-3, 1e-4]: for scale in [320, 352, 384, 416, 448, 480, 512, 544, 576, 608]: print('scale: {}, learning rate: {}'.format(scale, lr)) anchors = config.yolo_anchors / scale # Training optimizer = tf.keras.optimizers.Adam(lr=lr) model.compile(optimizer=optimizer, loss=[YoloLoss(anchors[mask], num_classes=num_classes) for mask in anchor_masks], run_eagerly=False) model.fit(train_data_dict[scale], epochs=start_epoch + epoch_step, steps_per_epoch=83, initial_epoch=start_epoch) start_epoch += epoch_step memory_used.append(psutil.virtual_memory().used / 2 ** 30) plt.plot(memory_used) plt.title('Evolution of memory') plt.xlabel('iteration') plt.ylabel('memory used (GB)') plt.show()
def main(): # Dataset Info num_classes = len(config.voc_classes) # Create model model = yolov3((config.size_h, config.size_w, 3), num_classes=num_classes, training=True) model.summary() # Load Weights model.load_weights(config.yolo_weights, by_name=True) # Callbacks function log_dir = 'logs_yolo' model_dir = log_dir + '/models' os.makedirs(model_dir, exist_ok=True) model_tb = tf.keras.callbacks.TensorBoard(log_dir=log_dir) model_mckp = tf.keras.callbacks.ModelCheckpoint( model_dir + '/best_{epoch:03d}.h5', monitor='val_loss', # TODO: mAP save_best_only=True, mode='min') model_ep = tf.keras.callbacks.EarlyStopping(patience=15, verbose=1) mdoel_rlr = tf.keras.callbacks.ReduceLROnPlateau(verbose=1) # Freeze all layers in except last layer trainable_model(model, trainable=False) model.get_layer('conv2d_last_layer1_20').trainable = True model.get_layer('conv2d_last_layer2_20').trainable = True model.get_layer('conv2d_last_layer3_20').trainable = True # 1) Training model step1 print("Start teraining Step1") training_model(model, callbacks=[model_tb, model_mckp, mdoel_rlr, model_ep], num_classes=num_classes, step=1) # Unfreeze layers trainable_model(model, trainable=True) # 2) Training model step2 print("Start teraining Step2") training_model(model, callbacks=[model_tb, model_mckp, mdoel_rlr, model_ep], num_classes=num_classes, step=2)
def main(): # Create model model = yolov3((config.size_h, config.size_w, 3), num_classes=num_classes, training=False) model.summary() if freeze: # Freeze all layers in except last layer trainable_model(model, trainable=False) model.get_layer('conv2d_last_layer1_20').trainable = True model.get_layer('conv2d_last_layer2_20').trainable = True model.get_layer('conv2d_last_layer3_20').trainable = True # Load weights model.load_weights(weight_file) # Detect Object test_and_show_result(model, test_number=10)
def main(): input_size = (416, 416, 3) if tiny_model: yolo = yolov3_tiny(input_size) yolo_darknet_weights = 'model_data/yolov3_tiny.weights' else: yolo = yolov3(input_size) yolo_darknet_weights = 'model_data/yolov3.weights' yolo.summary() print('model created') load_darknet_weights(yolo, yolo_darknet_weights) print('weights loaded') img = np.random.random((1, 416, 416, 3)).astype(np.float32) output = yolo(img) print('sanity check passed') yolo.save_weights('model_data/yolo_weights.h5') print('weights saved')
def pb_test_tf(img_ori,feature_map,classes,num_class,anchors,score_threshold,iou_threshold): ''' :param img_ori: :param feature_map: :param classes: :param num_class: :param anchors: :param score_threshold: :param iou_threshold: :return: ''' height_ori, width_ori = img_ori.shape[:2] img_size = [416, 416] # 定义一个计算图graph,获得输出结果: graph = tf.Graph() with graph.as_default(): feature_map_1, feature_map_2, feature_map_3 = feature_map feature_map_1 = tf.constant(feature_map_1, dtype=tf.float32) feature_map_2 = tf.constant(feature_map_2, dtype=tf.float32) feature_map_3 = tf.constant(feature_map_3, dtype=tf.float32) tf_img_size = tf.constant(value=img_size, dtype=tf.int32) print("img_size:{}".format(img_size)) # model = yolov3FeatureMap.tf_yolov3FeatureMap(num_classes=num_classes, anchors=anchors) yolo_model = yolov3(num_class, anchors) with tf.Session(graph=graph) as sess: pred_boxes, pred_confs, pred_probs = yolo_model.predict2(feature_map_1, feature_map_2, feature_map_3, tf_img_size) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=30, score_thresh=score_threshold, iou_thresh=iou_threshold) # saver = tf.train.Saver() # saver.restore(sess, ckpt_path) boxes_, scores_, labels_ = sess.run([boxes, scores, labels]) show_image(img_ori, boxes_, scores_, classes, num_class, labels_, width_ori, height_ori, img_size)
def __init(self): self.anchor_path = "./data/yolo_anchors.txt" self.new_size = [416, 416] self.class_name_path = "./data/my_data/data.names" self.restore_path = "./checkpoint/model-step_17500_loss_0.003654_lr_0.0004995866" self.anchors = parse_anchors(self.anchor_path) self.classes = read_class_names(self.class_name_path) self.num_class = len(self.classes) self.color_tabel = get_color_table(self.num_class) self.img_ori = cv2.imread(self.input_image) self.weight_ori, self.width_ori = self.img_ori.shape[:2] self.img = cv2.resize(img_ori, tuple(self.new_size)) self.img = cv2.cvtColor(self.img, cv2.COLOR_BGR2RGB) self.img = np.asarray(self.img, np.float32) self.img = self.img[np.newaxis, :] / 255. #config = tf.ConfigProto() #config.gpu_options.allow_growth = True self.__sess = tf.Session() self.__sess.run(tf.global_variables_initializer()) yolo_model = yolov3(self.num_class, self.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict( pred_feature_maps) pred_scores = pred_confs * pred_probs self.boxes, self.scores, self.labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=100, score_thresh=0.4, iou_thresh=0.5) self.__saver = tf.train.Saver() self.__saver.restore(self.__sess, self.restore_path) self.input_data = tf.placeholder( tf.float32, [1, self.new_size[1], self.new_size[0], 3], name='input_data')
def init_params(self): self.input_data = tf.placeholder( tf.float32, [1, self.new_size[1], self.new_size[0], 3], name='input_data') self.yolo_model = yolov3(self.num_class, cfg.anchors) with tf.variable_scope('yolov3'): self.pred_feature_maps = self.yolo_model.forward( self.input_data, False) self.pred_boxes, self.pred_confs, self.pred_probs = self.yolo_model.predict( self.pred_feature_maps) self.pred_scores = self.pred_confs * self.pred_probs self.boxes, self.scores, self.labels = gpu_nms(self.pred_boxes, self.pred_scores, self.num_class, max_boxes=200, score_thresh=0.25, nms_thresh=0.45) self.saver = tf.train.Saver() self.saver.restore(self.sess, self.restore_path)
def estimatePose(): parser = argparse.ArgumentParser( description="YOLO-V3 video test procedure.") # parser.add_argument("input_video", type=str, # help="The path of the input video.") parser.add_argument("--anchor_path", type=str, default="./data/yolo_anchors.txt", help="The path of the anchor txt file.") parser.add_argument( "--new_size", nargs='*', type=int, default=[416, 416], help= "Resize the input image with `new_size`, size format: [width, height]") parser.add_argument("--letterbox_resize", type=lambda x: (str(x).lower() == 'true'), default=True, help="Whether to use the letterbox resize.") parser.add_argument("--class_name_path", type=str, default="./data/my_data/YOLOPose.names", help="The path of the class names.") parser.add_argument("--restore_path", type=str, default="./data/pose_weights/lunge_best", help="The path of the weights to restore.") parser.add_argument("--save_video", type=lambda x: (str(x).lower() == 'true'), default=True, help="Whether to save the video detection results.") args = parser.parse_args() args.anchors = parse_anchors(args.anchor_path) args.classes = read_class_names(args.class_name_path) args.num_class = len(args.classes) color_table = get_color_table(args.num_class) # vid = cv2.VideoCapture(args.input_video) vid = cv2.VideoCapture('./data/demo/lunge_03.mp4') # vid = cv2.VideoCapture(r'C:\Users\soma\SMART_Referee\SMART_Referee_DL\data\lunge\video\lunge_03.mp4') video_frame_cnt = int(vid.get(7)) video_width = int(vid.get(3)) video_height = int(vid.get(4)) video_fps = int(vid.get(5)) trainer_pose = pd.read_csv('./data/ground_truth/output_right.csv', header=None) trainer_pose = trainer_pose.loc[:, [ 0, 1, 2, 3, 4, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 ]] pca_df = trainer_pose.loc[:, [ 1, 2, 3, 4, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28 ]] pca_df.loc[:, [c for c in pca_df.columns if c % 2 == 1]] = pca_df.loc[:, [c for c in pca_df.columns if c % 2 == 1]] * video_width / 416 pca_df.loc[:, [c for c in pca_df.columns if c % 2 == 0]] = pca_df.loc[:, [c for c in pca_df.columns if c % 2 == 0]] * video_height / 416 pca_df = pca_df.astype(int) pca_df = pca_df.replace(0, np.nan) pca_df = pca_df.dropna() pca_df.describe() pca = PCA(n_components=1) pca.fit(pca_df) size = [video_width, video_height] list_p = [] waist_err = 0 critical_point = 0 past_idx = 0 startTrig = 0 cntdown = 90 t = 0 TRLEN = len(trainer_pose) modify_ankle = pca_df.iloc[0, :].values base_rect = [(int(video_width / 4), int(video_height / 10)), (int(video_width * 3 / 4), int(video_height * 19 / 20))] c_knee = c_waist = c_speed = 0 if args.save_video: fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') videoWriter = cv2.VideoWriter('video_result.mp4', fourcc, video_fps, (video_width, video_height)) with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data') yolo_model = yolov3(args.num_class, args.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict( pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, args.restore_path) for i in range(video_frame_cnt): ret, img_ori = vid.read() if args.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize( img_ori, args.new_size[0], args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. start_time = time.time() boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if args.letterbox_resize: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(args.new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(args.new_size[1])) people_pose = get_people_pose(boxes_, labels_, base_rect) # list-dict people_pose = np.array([p[1] for p in people_pose[0] ]).flatten() # dict-tuple -> list people_pose = people_pose[[ 0, 1, 2, 3, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27 ]] # Start Trigger if startTrig == 2: pass elif startTrig == 0: # start # 기준 박스 cv2.rectangle(img_ori, base_rect[0], base_rect[1], (0, 0, 255), 2) if isInBox(people_pose, base_rect[0], base_rect[1]): # t_resize_pose = resize_pose(people_pose, trainer_pose.iloc[0, 1:].values) t_resize_pose = resize_pose(people_pose, pca_df.iloc[0, :].values) img_ori = draw_ground_truth(img_ori, t_resize_pose) # img_ori = draw_ground_truth(img_ori, pca_df.iloc[0, :].values) startTrig = isStart(people_pose, trainer_pose.iloc[0, 1:].values, size) cv2.imshow('image', img_ori) if cv2.waitKey(1) & 0xFF == ord('q'): break continue else: print("박스안에 들어와주세요!!") continue elif startTrig == 1: img_ori = draw_ground_truth(img_ori, pca_df.iloc[0, :].values) cv2.putText(img_ori, str(int(cntdown / 30)), (100, 300), cv2.FONT_HERSHEY_SIMPLEX, 10, (255, 0, 0), 10) cv2.imshow('image', img_ori) cntdown -= 1 if cntdown == 0: startTrig = 2 if cv2.waitKey(1) & 0xFF == ord('q'): break continue '''check ankle : 편차 40이상 발생시 전에 값 으로 업데이트''' people_pose = check_ankle(list_p, people_pose, modify_ankle, size) # f = open('user.csv', 'a', encoding='utf-8', newline='') # wr = csv.writer(f) # wr.writerow(people_pose) # ground truth 그리기 list_p.append(people_pose) img_ori = draw_ground_truth(img_ori, pca_df.iloc[t, :].values) if check_waist(people_pose): waist_err += 1 if waist_err is 60: # waist_err는 60번 틀리면 피드백함 feedback_waist() c_waist += 1 waist_err = 0 if trainer_pose.iloc[t, 0] == 1: # t는 특정 시점 + i frame critical_point += 1 if critical_point % 2 == 0: my_pose = makeMypose_df(list_p) c_speed = check_speed( my_pose, trainer_pose.iloc[past_idx:t + 1, 1:], pca, c_speed) c_knee = check_knee(people_pose, c_knee) modify_ankle = list_p[-1] list_p = [] past_idx = t t += 1 if t == TRLEN: break # img_ori = draw_body(img_ori, boxes_, labels_) # for i in range(len(boxes_)): # x0, y0, x1, y1 = boxes_[i] # plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) # 사용자 자세 그리기 # img_ori = draw_truth(img_ori, people_pose) end_time = time.time() cv2.putText(img_ori, '{:.2f}ms'.format((end_time - start_time) * 1000), (40, 40), 0, fontScale=1, color=(0, 255, 0), thickness=2) cv2.imshow('image', img_ori) if args.save_video: videoWriter.write(img_ori) if cv2.waitKey(1) & 0xFF == ord('q'): cv2.destroyAllWindows() break vid.release() cv2.destroyAllWindows() if args.save_video: videoWriter.release() f = open('./data/score/result.csv', 'a', encoding='utf-8', newline='') wr = csv.writer(f) d = datetime.today().strftime("%Y/%m/%d") t = datetime.today().strftime("%H:%M:%S") wr.writerow([d, t, c_knee, c_waist, c_speed])
# get an element from the choosed dataset iterator image, y_true_13, y_true_26, y_true_52 = dataset_iterator.get_next() y_true = [y_true_13, y_true_26, y_true_52] # tf.data pipeline will lose the data shape, so we need to set it manually image.set_shape([None, args.img_size[1], args.img_size[0], 3]) for y in y_true: y.set_shape([None, None, None, None, None]) ################## # Model definition ################## # define yolo-v3 model here yolo_model = yolov3(args.class_num, args.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(image, is_training=is_training) loss = yolo_model.compute_loss(pred_feature_maps, y_true) y_pred = yolo_model.predict(pred_feature_maps) ################ # register the gpu nms operation here for the following evaluation scheme pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None]) pred_scores_flag = tf.placeholder(tf.float32, [1, None, None]) gpu_nms_op = gpu_nms(pred_boxes_flag, pred_scores_flag, args.class_num) ################ if args.restore_part == ['None']: args.restore_part = [None] if args.update_part == ['None']:
def yolodet(anchor_path, image_path, new_size, letterbox, class_name_path, restore_path): anchors = parse_anchors(anchor_path) classes = read_class_names(class_name_path) num_class = len(classes) color_table = get_color_table(num_class) img_ori = cv2.imread(image_path) if letterbox: img, resize_ratio, dw, dh = letterbox_resize(img_ori, new_size[0], new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, restore_path) boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if letterbox: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori/float(new_size[0])) boxes_[:, [1, 3]] *= (height_ori/float(new_size[1])) # print("box coords:") # print(boxes_) # print('*' * 30) # print("scores:") # print(scores_) # print('*' * 30) # print("labels:") # print(labels_) # # for i in range(len(boxes_)): # x0, y0, x1, y1 = boxes_[i] # plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) # cv2.imshow('Detection result', img_ori) # cv2.imwrite('detection_result.jpg', img_ori) # cv2.waitKey(0) tf.reset_default_graph() return boxes_, scores_, labels_
image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next() y_true = [y_true_13, y_true_26, y_true_52] # tf.data pipeline will lose the data `static` shape, so we need to set it manually image_ids.set_shape([None]) image.set_shape([None, None, None, 3]) for y in y_true: y.set_shape([None, None, None, None, None]) ################## # Model definition ################## yolo_model = yolov3(args.class_num, args.anchors, args.use_label_smooth, args.use_focal_loss, args.batch_norm_decay, args.weight_decay, use_static_shape=False) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(image, is_training=is_training) loss = yolo_model.compute_loss(pred_feature_maps, y_true) y_pred = yolo_model.predict(pred_feature_maps) l2_loss = tf.losses.get_regularization_loss() # setting restore parts and vars to update saver_to_restore = tf.train.Saver( var_list=tf.contrib.framework.get_variables_to_restore( include=args.restore_include, exclude=args.restore_exclude)) update_vars = tf.contrib.framework.get_variables_to_restore(
def main(_): tellotrack = TelloCV() mission = 0 find_object = 74 bottle = 39 left_count = 0 right_count = 0 tello_is_high = False height_count = 0 move_up = 0 with tf.Graph().as_default(): width, height = args.new_size[0], args.new_size[1] # print(tellotrack.takeoff_time) # tellotrack.take_off() # time.sleep(3) with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, width, height, 3], name='input_data') yolo_model = yolov3(args.num_class, args.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.7, nms_thresh=0.7) saver = tf.train.Saver() saver.restore(sess, args.restore_path) tellotrack.take_off() time.sleep(3) # tellotrack.drone.move_up(50) # time.sleep(3) flag = False landing_flag = True while True: img_ori = tellotrack.process_frame() img = cv2.resize(img_ori, (width, height)) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. start = time.time() boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) print(boxes_) end = time.time() boxes_[:, [0, 2]] *= (img_ori.shape[1] / float(width)) boxes_[:, [1, 3]] *= (img_ori.shape[0] / float(height)) if mission == 0: # find clock and move left print(mission, " Start") for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] if labels_[i] == find_object: # 56 chair 11 stopsign 0 person 74 clock print("-------------------------------------FIND-----") plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) # move left and find mission pad, landing. if int(x1 - x0) > 200: tellotrack.move_left() time.sleep(5) tellotrack.drone.move_down(30) mission = 1 else: # getting closer to object tellotrack.go() time.sleep(5) if find_object not in labels_: tellotrack.go() time.sleep(5) elif mission == 1: # find bottle and landing print(mission, " Start") for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] if labels_[i] == bottle: print("-------------------------------------FIND Bottle-----") plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) # x1-x0 > 70 -> 120cm # x1-x0 > 50 -> 200cm if int(x1 - x0) > 50: mid_x = (x1 + x0) / 2 done = tellotrack.track_x(mid_x,left_count, right_count) time.sleep(5) print(done) if done: tellotrack.landing() time.sleep(5) tellotrack.take_off() time.sleep(5) # if landing_flag: # window high # tellotrack.drone.move_up(50) # window low # tellotrack.drone.move_down(50) if left_count > 0 : for i in range(0,left_count): tellotrack.drone.move_right(20) time.sleep(3) elif right_count > 0 : for i in range(0,right_count): tellotrack.drone.move_left(20) time.sleep(3) if flag is False: mission += 1 else: mission = 4 else: tellotrack.drone.move_forward(30) time.sleep(3) if bottle not in labels_: tellotrack.drone.move_forward(30) time.sleep(3) elif mission == 2: # find clock and tracking, go through the window print(mission, " Start") length = [] index = [] for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] if labels_[i] == find_object: # 56 chair 11 stopsign 0 person 74 clock print("-------------------------------------FIND-----") plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) length.append(x1 - x0) index.append(i) # move left and find mission pad, landing elif tello_is_high is False : tellotrack.drone.move_up(50) time.sleep(3) height_count += 1 tello_is_high = True if len(length) > 0: max_length = max(length) max_index = index[length.index(max_length)] x0, y0, x1, y1 = boxes_[max_index] if int(x1 - x0) > 150: center_x = (x0 + x1) / 2 center_y = (y0 + y1) / 2 done = tellotrack.track_mid(center_x, center_y) time.sleep(3) if done: mission = 3 tellotrack.go_fast() time.sleep(5) if flag is True : tellotrack.drone.move_up(50) time.sleep(3) else: tellotrack.drone.move_forward(30) time.sleep(3) else: tellotrack.drone.move_forward(30) time.sleep(3) elif mission == 3: # find clock and rotate clockwise or counter-clockwise print(mission, " Start") for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] if labels_[i] == find_object: # 56 chair 11 stopsign 0 person 74 clock print("-------------------------------------FIND-----") plot_one_box(img_ori, [x0, y0, x1, y1], label=args.classes[labels_[i]] + ', {:.2f}%'.format(scores_[i] * 100), color=color_table[labels_[i]]) # move left and find mission pad, landing. if int(x1 - x0) > 200: # clock size 180 : 160 cm if flag is False: tellotrack.drone.rotate_clockwise(90) time.sleep(5) # if window 2 low # tellotrack.drone.move_down(50) # time.sleep(5) # if window 2 high #tellotrack.drone.move_up(100) #time.sleep(5) if tello_is_high is True : for i in range (0,height_count): tellotrack.drone.move_down(50) time.sleep(3) tello_is_high = False mission = 2 flag = True break else: tellotrack.drone.rotate_counter_clockwise(90) time.sleep(5) # if window too high, change 50 -> others tellotrack.drone.move_down(50) # if window too low # tellotrack.drone.move_up(50) time.sleep(5) mission = 1 break else: tellotrack.drone.move_forward(60) time.sleep(3) if find_object not in labels_: tellotrack.drone.move_forward(60) time.sleep(3) if len(boxes_) == 0: if flag is False: tellotrack.drone.move_left(30) time.sleep(3) else: tellotrack.drone.move_forward(40) time.sleep(3) elif mission == 4: # finish print(mission, " Start") tellotrack.drone.move_forward(200) time.sleep(5) tellotrack.landing() time.sleep(3) exit() cv2.imshow('YOLO', img_ori) k = cv2.waitKey(1) if k == 1048603 or k == 27: break # esc to quit if k == 1048688: cv2.waitKey(0) # 'p' to pause if k == ord('h'): tellotrack.tracking = True # 'h' to use tracking tellotrack.track_cmd = "" print("Time: " + str(end - start)) del img del img_ori
def recognize(jpg_path, pb_file_path): anchors = parse_anchors("./data/yolo_anchors.txt") classes = read_class_names("./data/coco.names") num_class = len(classes) color_table = get_color_table(num_class) img_ori = cv2.imread(jpg_path) height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple([IMAGE_SIZE, IMAGE_SIZE])) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Graph().as_default(): output_graph_def = tf.GraphDef() print("Load Frozen_Graph File ...") with open(pb_file_path, "rb") as f: output_graph_def.ParseFromString(f.read()) tf.import_graph_def(output_graph_def, name="") print("Finished") # GPU_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) config = tf.ConfigProto() # gpu_options=GPU_options) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Define Input and Outputs input_x = sess.graph.get_tensor_by_name("Placeholder:0") feature_map_1 = sess.graph.get_tensor_by_name( "yolov3/yolov3_head/feature_map_1:0") feature_map_2 = sess.graph.get_tensor_by_name( "yolov3/yolov3_head/feature_map_2:0") feature_map_3 = sess.graph.get_tensor_by_name( "yolov3/yolov3_head/feature_map_3:0") features = feature_map_1, feature_map_2, feature_map_3 # yolo config yolo_model = yolov3(num_class, anchors) yolo_model.pb_forward(input_x) # # use frozen_graph to inference # print "RUN Graph ..." # features = sess.run(features, feed_dict={input_x:np.reshape(img, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])}) # print "Finished" # feature1, feature2, feature3 = features # feature1 = tf.convert_to_tensor(feature1) # feature2 = tf.convert_to_tensor(feature2) # feature3 = tf.convert_to_tensor(feature3) # features = feature1, feature2, feature3 print "Predicting ..." pred_boxes, pred_confs, pred_probs = yolo_model.predict(features) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=30, score_thresh=0.4, iou_thresh=0.5) t0 = time.time() boxes_, scores_, labels_ = sess.run( [boxes, scores, labels], feed_dict={ input_x: np.reshape(img, [-1, IMAGE_SIZE, IMAGE_SIZE, 3]) }) t1 = time.time() print "Finished" # rescale the coordinates to the original image boxes_[:, 0] *= (width_ori / float(IMAGE_SIZE)) boxes_[:, 2] *= (width_ori / float(IMAGE_SIZE)) boxes_[:, 1] *= (height_ori / float(IMAGE_SIZE)) boxes_[:, 3] *= (height_ori / float(IMAGE_SIZE)) print("box coords:") print(boxes_) print('*' * 30) print("scores:") print(scores_) print('*' * 30) print("labels:") print(labels_) print("runtime:") print(t1 - t0) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[i]], color=color_table[labels_[i]]) #cv2.imshow('Detection result', img_ori) cv2.imwrite('pb_result.jpg', img_ori) #cv2.waitKey(0) num_samples = 50 t0 = time.time() for i in range(num_samples): boxes_, scores_, labels_ = sess.run( [boxes, scores, labels], feed_dict={ input_x: np.reshape(img, [-1, IMAGE_SIZE, IMAGE_SIZE, 3]) }) t1 = time.time() print('Average runtime: %f seconds' % (float(t1 - t0) / num_samples))
import os import sys import tensorflow as tf import numpy as np from model import yolov3 from utils.misc_utils import parse_anchors, load_weights num_class = 1 img_size = 416 weight_path = './data/darknet_weights/yolov3_last.weights' save_path = './data/darknet_weights/yolov3.ckpt' anchors = parse_anchors('./data/yolo_anchors.txt') model = yolov3(1, anchors) with tf.Session() as sess: inputs = tf.placeholder(tf.float32, [1, img_size, img_size, 3]) with tf.variable_scope('yolov3'): feature_map = model.forward(inputs) saver = tf.train.Saver(var_list=tf.global_variables(scope='yolov3')) load_ops = load_weights(tf.global_variables(scope='yolov3'), weight_path) sess.run(load_ops) saver.save(sess, save_path=save_path) print('TensorFlow model checkpoint has been saved to {}'.format(save_path))
# This script is used to remove the optimizer parameters in the saved checkpoint files. # These parameters are useless in the forward process. # Removing them will shrink the checkpoint size a lot. import sys sys.path.append('..') import os import tensorflow as tf from model import yolov3 # params ckpt_path = '' class_num = 20 save_dir = 'shrinked_ckpt' if not os.path.exists(save_dir): os.makedirs(save_dir) image = tf.placeholder(tf.float32, [1, 416, 416, 3]) yolo_model = yolov3(class_num, None) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(image) saver_to_restore = tf.train.Saver() saver_to_save = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) saver_to_restore.restore(sess, ckpt_path) saver_to_save.save(sess, save_dir + '/shrinked')
def recognize(jpg_path, pb_file_path): anchors = parse_anchors("./data/yolo_anchors.txt") classes = read_class_names("./data/coco.names") num_class = len(classes) color_table = get_color_table(num_class) img_ori = cv2.imread(jpg_path) height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple([IMAGE_SIZE, IMAGE_SIZE])) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) # img = img[np.newaxis, :] / 255. # img_resized = np.reshape(img, [-1, IMAGE_SIZE, IMAGE_SIZE, 3]) with tf.Graph().as_default(): tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with tf.Session(config=tf_config) as sess: print("Load TRT_Graph File ...") with open(pb_file_path, "rb") as f: output_graph_def = tf.GraphDef() output_graph_def.ParseFromString(f.read()) print("Finished") input_name = "import/Placeholder" output_name1 = "import/yolov3/yolov3_head/feature_map_1" output_name2 = "import/yolov3/yolov3_head/feature_map_2" output_name3 = "import/yolov3/yolov3_head/feature_map_3" output_names = [output_name1, output_name2, output_name3] yolo_model = yolov3(num_class, anchors) print("Import TRT Graph ...") output_node = tf.import_graph_def( output_graph_def, return_elements=[ "yolov3/yolov3_head/feature_map_1", "yolov3/yolov3_head/feature_map_2", "yolov3/yolov3_head/feature_map_3" ]) print("Finished") # for op in tf.get_default_graph().as_graph_def().node: # print(op.name) tf_input = sess.graph.get_tensor_by_name(input_name + ':0') feature_map_1 = sess.graph.get_tensor_by_name(output_name1 + ":0") feature_map_2 = sess.graph.get_tensor_by_name(output_name2 + ":0") feature_map_3 = sess.graph.get_tensor_by_name(output_name3 + ":0") features = feature_map_1, feature_map_2, feature_map_3 sess.run(output_node, feed_dict={tf_input: img[None, ...]}) print("1111111") yolo_model.pb_forward(tf_input) pred_boxes, pred_confs, pred_probs = yolo_model.predict(features) pred_scores = pred_confs * pred_probs print("Detection ......") boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=30, score_thresh=0.4, iou_thresh=0.5) boxes_, scores_, labels_ = sess.run( [boxes, scores, labels], feed_dict={tf_input: img[None, ...]}) # rescale the coordinates to the original image boxes_[:, 0] *= (width_ori / float(IMAGE_SIZE)) boxes_[:, 2] *= (width_ori / float(IMAGE_SIZE)) boxes_[:, 1] *= (height_ori / float(IMAGE_SIZE)) boxes_[:, 3] *= (height_ori / float(IMAGE_SIZE)) print("box coords:") print(boxes_) print('*' * 30) print("scores:") print(scores_) print('*' * 30) print("labels:") print(labels_) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[i]], color=color_table[labels_[i]]) # cv2.imshow('Detection result', img_ori) cv2.imwrite('detection_result.jpg', img_ori)
def yolodet(image_path, anchor_path=rootpath + "/yolo/data/yolo_anchors.txt", new_size=[416, 416], letterbox=True, class_name_path=rootpath + "/yolo/data/coco.names", restore_path=rootpath + "/yolo/data/best_model"): anchors = parse_anchors(anchor_path) classes = read_class_names(class_name_path) num_class = len(classes) color_table = get_color_table(num_class) img_ori = cv2.imread(image_path) if letterbox: img, resize_ratio, dw, dh = letterbox_resize(img_ori, new_size[0], new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict( pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) saver = tf.train.Saver() saver.restore(sess, restore_path) boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image if letterbox: boxes_[:, [0, 2]] = (boxes_[:, [0, 2]] - dw) / resize_ratio boxes_[:, [1, 3]] = (boxes_[:, [1, 3]] - dh) / resize_ratio else: boxes_[:, [0, 2]] *= (width_ori / float(new_size[0])) boxes_[:, [1, 3]] *= (height_ori / float(new_size[1])) tf.reset_default_graph() #transform detections into 1 line (#1class,#1conf,#1xmin,#1ymin,#1max,#1ymax,#2class,#2conf,...) boxes = [] for i in range(np.shape(boxes_)[0]): boxes.append(labels_[i]) boxes.append(scores_[i]) boxes.extend(boxes_[i, :]) return boxes
def freeze(): sess = tf.InteractiveSession() # setting placeholders is_training = tf.placeholder(tf.bool, name="phase_train") handle_flag = tf.placeholder(tf.string, [], name='iterator_handle_flag') # register the gpu nms operation here for the following evaluation scheme pred_boxes_flag = tf.placeholder(tf.float32, [1, None, None]) pred_scores_flag = tf.placeholder(tf.float32, [1, None, None]) ################## # tf.data pipeline ################## train_dataset = tf.data.TextLineDataset(args.train_file) train_dataset = train_dataset.shuffle(args.train_img_cnt) train_dataset = train_dataset.batch(args.batch_size) train_dataset = train_dataset.map( lambda x: tf.py_func(get_batch_data, inp=[x, args.class_num, args.img_size, args.anchors, 'train', args.multi_scale_train, args.use_mix_up, args.letterbox_resize], Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]), num_parallel_calls=args.num_threads ) train_dataset = train_dataset.prefetch(args.prefetech_buffer) val_dataset = tf.data.TextLineDataset(args.val_file) val_dataset = val_dataset.batch(1) val_dataset = val_dataset.map( lambda x: tf.py_func(get_batch_data, inp=[x, args.class_num, args.img_size, args.anchors, 'val', False, False, args.letterbox_resize], Tout=[tf.int64, tf.float32, tf.float32, tf.float32, tf.float32]), num_parallel_calls=args.num_threads ) val_dataset.prefetch(args.prefetech_buffer) iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes) train_init_op = iterator.make_initializer(train_dataset) val_init_op = iterator.make_initializer(val_dataset) # get an element from the chosen dataset iterator image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next() y_true = [y_true_13, y_true_26, y_true_52] # tf.data pipeline will lose the data `static` shape, so we need to set it manually image_ids.set_shape([None]) image.set_shape([None, None, None, 3]) for y in y_true: y.set_shape([None, None, None, None, None]) ################## # Model definition ################## yolo_model = yolov3(args.class_num, args.anchors, args.use_label_smooth, args.use_focal_loss, args.batch_norm_decay, args.weight_decay, use_static_shape=False) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(image, is_training=is_training) quantize.create_eval_graph() # write frozen graph saver = tf.train.Saver(tf.global_variables()) saver.restore(sess, os.path.join(FLAGS.train_dir, FLAGS.ckpt)) frozen_gd = tf.graph_util.convert_variables_to_constants( sess, sess.graph_def, [FLAGS.output_node]) tf.train.write_graph( frozen_gd, '/home/shihaobing/', FLAGS.frozen_pb_name, as_text=False)
def ParseVideo(session, videoWriter, vid, video_frame_cnt, video_width, video_height, video_fps, anchors, classes, num_class, args): counter = 0 counter2 = 0 tracker = Sort() memory = {} counter = 0 counter2 = 0 sec_x = [] sec_y_to = [] sec_y_from = [] frame_x = [] frame_y_to = [] frame_y_from = [] input_data = tf.placeholder(tf.float32, [1, image_size[1], image_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=1000, score_thresh=0.5, nms_thresh=0.5) saver = tf.train.Saver() saver.restore(session, restore_path) for i in range(video_frame_cnt): ret, frame = vid.read() height_ori, width_ori = frame.shape[:2] img = cv2.resize(frame, tuple(image_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. start_time = time.time() boxes_, scores_, labels_ = session.run([boxes, scores, labels], feed_dict={input_data: img}) end_time = time.time() objects = [] boxes_[:, 0] *= (width_ori / float(image_size[0])) boxes_[:, 2] *= (width_ori / float(image_size[0])) boxes_[:, 1] *= (height_ori / float(image_size[1])) boxes_[:, 3] *= (height_ori / float(image_size[1])) for j in range(len(boxes_)): (x, y) = (boxes_[j][0], boxes_[j][1]) (x1, y1) = (boxes_[j][2], boxes_[j][3]) (w, h) = (x1 - x, y1 - y) objects.append([x, y, x + w, y + h, scores_[j]]) np.set_printoptions( formatter={'float': lambda x: "{0:0.3f}".format(x)}) objects = np.asarray(objects) tracks = tracker.update(objects) boundaries = [] IDs = [] prev = memory.copy() memory = {} for track in tracks: boundaries.append([track[0], track[1], track[2], track[3]]) IDs.append(int(track[4])) memory[IDs[-1]] = boundaries[-1] if len(boundaries) > 0: k = int(0) for square in boundaries: (x, y) = (int(square[0]), int(square[1])) (w, h) = (int(square[2]), int(square[3])) cv2.rectangle(frame, (x, y), (w, h), COLOR, FONT_SIZE) if IDs[k] in prev: previous_box = prev[IDs[k]] (x2, y2) = (int(previous_box[0]), int(previous_box[1])) (w2, h2) = (int(previous_box[2]), int(previous_box[3])) p0 = (int(x + (w - x) / 2), int(y + (h - y) / 2)) p1 = (int(x2 + (w2 - x2) / 2), int(y2 + (h2 - y2) / 2)) cv2.line(frame, p0, p1, COLOR, FONT_SIZE) if Intersect(p0, p1, line[0], line[1]): counter += 1 if Intersect(p0, p1, line2[0], line2[1]): counter2 += 1 cv2.putText(frame, "{}".format(classes[labels_[k]]), (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, FONT_SCALE // 2, COLOR, FONT_SIZE // 2) k += 1 cv2.line(frame, line[0], line[1], (0, 255, 255), FONT_SIZE) cv2.line(frame, line2[0], line2[1], (255, 0, 255), FONT_SIZE) cv2.putText(frame, str(counter), POS_YELLOW, cv2.FONT_HERSHEY_DUPLEX, FONT_SCALE, (0, 255, 255), FONT_SIZE) cv2.putText(frame, str(counter2), POS_PINK, cv2.FONT_HERSHEY_DUPLEX, FONT_SCALE, (255, 0, 255), FONT_SIZE) cv2.putText(frame, '{:.2f}ms'.format( (end_time - start_time) * 1000), POS_TIME, cv2.FONT_HERSHEY_DUPLEX, FONT_SCALE, COLOR_GREEN, FONT_SIZE) # Раскомментируйте для получения онлайн-трансляции # cv2.imshow(' YOLO-v3 car detection using Tensorflow-GPU. ', frame) # Раскомментируйте для получения покадрового вывода видео # cv2.imwrite("output/frame-{}.png".format(i), frame) videoWriter.write(frame) frame_x.append(i) frame_y_to.append(counter) frame_y_from.append(counter2) if i % video_fps == 0: sec_x.append(i // video_fps) sec_y_to.append(counter) sec_y_from.append(counter2) if i >= LIMIT: break vid.release() videoWriter.release() return sec_x, sec_y_to, sec_y_from, frame_x, frame_y_to, frame_y_from
def _prune_second_stage(self): pruning_dict = dict() pruning_layer = [] tf_weights = [] layer_weights = [] checkpoint_path = os.path.join( self._checkpoint_dir, "best_model_Epoch_2_step_2024.0_mAP_0.1784_loss_30.0785_lr_0.0001") reader = pywrap_tensorflow.NewCheckpointReader(checkpoint_path) var_to_shape_map = reader.get_variable_to_shape_map() graph = tf.get_default_graph() # 获得默认的图 input_graph_def = graph.as_graph_def() # 返回一个序列化的图代表当前的图 with tf.Session(graph=graph) as sess: input_data = tf.placeholder( tf.float32, [1, self._img_size[1], self._img_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, is_training=True) saver = tf.train.Saver() saver.restore(sess, checkpoint_path) itera = 0 for layer_name in var_to_shape_map: if "darknet53_body" in layer_name and "weights" in layer_name: print('current layer is ', layer_name) pruning_layer.append(layer_name) tf_layer_weight = tf.get_default_graph( ).get_tensor_by_name(layer_name + ":0") layer_weight = sess.run( tf.get_default_graph().get_tensor_by_name(layer_name + ":0")) tf_weights.append( tf.get_default_graph().get_tensor_by_name(layer_name + ":0")) layer_weights.append(layer_weight) pruning_factor = self._pruning_factor filter_indices_to_prune, filter_indices_to_prune_input = self.run_pruning_for_conv2d_layer( pruning_factor, layer_weight) # if 'yolov3/darknet53_body/Conv_25' in layer_name or 'yolov3/darknet53_body/Conv_42' in layer_name: # continue # if 'yolov3/darknet53_body/Conv_24' not in layer_name and 'yolov3/darknet53_body/Conv_41' not in layer_name: ####conv24\conv41 just prune filter, cannot prune channel print('filter_indices_to_prune is ', filter_indices_to_prune) print('filter_indices_to_prune_input is ', filter_indices_to_prune_input) ######prune output weight###### W, H, N, nb_channels = layer_weight.shape print("layer_weight.shape is ", layer_weight.shape) layer_weight_reshaped = sess.run( tf.reshape(layer_weight.transpose(3, 0, 1, 2), (nb_channels, -1))) # layer_weight_reshaped = layer_weight.reshape(nb_channels, -1) prun_weight_reshape = np.delete(layer_weight_reshaped, filter_indices_to_prune, axis=0) prun_channel, _ = prun_weight_reshape.shape print('prun_channel is ', prun_channel) print('calc prune channel is ', nb_channels - len(filter_indices_to_prune)) prune_weight = prun_weight_reshape.reshape( W, H, N, nb_channels - len(filter_indices_to_prune)) print("prun weight shape is", prune_weight.shape) # sess.run(tf.assign(tf_layer_weight, prune_weight, validate_shape=False)) pruning_dict[layer_name] = len(filter_indices_to_prune) #######prune BN params######## bn_params = [ 'BatchNorm/gamma', 'BatchNorm/beta', 'BatchNorm/moving_variance', 'BatchNorm/moving_mean' ] bn_layer_name = [] for i in bn_params: bn_params_str = layer_name.replace('weights', i) bn_layer_name.append(bn_params_str) for bn_layer in bn_layer_name: tf_bn_param = tf.get_default_graph( ).get_tensor_by_name(bn_layer + ":0") layer_bn_param = sess.run(tf_bn_param) bn_channel = layer_bn_param.shape bn_filter_prune = filter_indices_to_prune prune_bn_param = np.delete(layer_bn_param, bn_filter_prune, axis=0) # sess.run(tf.assign(tf_bn_param, prune_bn_param, validate_shape=False)) print('current layer is ', bn_layer) print("bn param.shape is ", layer_bn_param.shape) print("layer_bn_param.shape is ", bn_channel) print('prune bn param shape is ', prune_bn_param.shape) ############################### ######prune input filter weight###### if 'yolov3/darknet53_body/Conv/weights' not in layer_name: ### cannot prune the first conv layer_weight = sess.run( tf.get_default_graph().get_tensor_by_name( layer_name + ":0")) W, H, input_channels, nb_channels_2 = layer_weight.shape print('first prune output shape is ', layer_weight.shape) layer_weight_reshaped_input = sess.run( tf.reshape(layer_weight.transpose(2, 0, 1, 3), (input_channels, -1))) # layer_weight_reshaped = layer_weight.reshape(nb_channels, -1) prun_weight_reshape_input = np.delete( layer_weight_reshaped_input, filter_indices_to_prune_input, axis=0) prun_channel_input, _ = prun_weight_reshape_input.shape print('prun_channel input is ', prun_channel_input) print( 'calc prune channel input is ', input_channels - len(filter_indices_to_prune_input)) prune_weight_input = prun_weight_reshape_input.reshape( W, H, input_channels - len(filter_indices_to_prune_input), nb_channels_2) print("prune_weight_input shape is", prune_weight_input.shape) sess.run( tf.assign(tf_layer_weight, prune_weight_input, validate_shape=False)) pruning_dict[layer_name] = len( filter_indices_to_prune) + len( filter_indices_to_prune_input) saver.save( sess, os.path.join(self._checkpoint_dir, 'prue_channel_model.ckpt')) # yolo_prune_model = self._reconstruction_model() return yolo_model, pruning_dict
num_parallel_calls=args.num_threads ) val_dataset.prefetch(args.prefetech_buffer) iterator = val_dataset.make_one_shot_iterator() image_ids, image, y_true_52 = iterator.get_next() image_ids.set_shape([None]) y_true = [y_true_52] image.set_shape([None, args.img_size[1], args.img_size[0], 3]) for y in y_true: y.set_shape([None, None, None, None]) ################## # Model definition ################## yolo_model = yolov3(args.class_num) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(image, is_training=is_training) loss = yolo_model.compute_loss(pred_feature_maps, y_true) y_pred = yolo_model.predict(pred_feature_maps) saver_to_restore = tf.train.Saver() with tf.Session() as sess: sess.run([tf.global_variables_initializer()]) saver_to_restore.restore(sess, args.restore_path) print('\n----------- start to eval -----------\n') val_loss_total, val_loss_xy, val_loss_conf, val_loss_class = \ AverageMeter(), AverageMeter(), AverageMeter(), AverageMeter()
img_ori = cv2.imread(args.input_image) if args.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data') yolo_model = yolov3(args.num_class, args.anchors, Args.no_anchor_branch) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=100, score_thresh=0.6, nms_thresh=0.3) for each in tf.trainable_variables(): print(each)
color_table = get_color_table(num_class) vid = cv2.VideoCapture(input_video) video_frame_cnt = int(vid.get(7)) video_width = int(vid.get(3)) video_height = int(vid.get(4)) video_fps = int(vid.get(5)) fourcc = cv2.VideoWriter_fourcc('m', 'p', '4', 'v') videoWriter = cv2.VideoWriter('video_result.mp4', fourcc, video_fps, (video_width, video_height)) with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, new_size[1], new_size[0], 3], name='input_data') yolo_model = yolov3(num_class, anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=30, score_thresh=0.5, iou_thresh=0.5) saver = tf.train.Saver()
val_init_op = iterator.make_initializer(val_dataset) # get an element from the chosen dataset iterator image_ids, image, y_true_13, y_true_26, y_true_52 = iterator.get_next() y_true = [y_true_13, y_true_26, y_true_52] # tf.data pipeline will lose the data `static` shape, so we need to set it manually image_ids.set_shape([None]) image.set_shape([None, None, None, 3]) for y in y_true: y.set_shape([None, None, None, None, None]) ################## # Model definition ################## yolo_model = yolov3(args_b_es.class_num, args_b_es.anchors, args_b_es.use_label_smooth, args_b_es.use_focal_loss, args_b_es.batch_norm_decay, args_b_es.weight_decay) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(image, is_training=is_training) loss = yolo_model.compute_loss(pred_feature_maps, y_true) y_pred = yolo_model.predict(pred_feature_maps) l2_loss = tf.losses.get_regularization_loss() # setting restore parts and vars to update saver_to_restore = tf.train.Saver(var_list=tf.contrib.framework.get_variables_to_restore(include=args_b_es.restore_part)) update_vars = tf.contrib.framework.get_variables_to_restore(include=args_b_es.update_part) tf.summary.scalar('train_batch_statistics/total_loss', loss[0]) tf.summary.scalar('train_batch_statistics/loss_xy', loss[1]) tf.summary.scalar('train_batch_statistics/loss_wh', loss[2]) tf.summary.scalar('train_batch_statistics/loss_conf', loss[3])
import os import sys import tensorflow as tf import numpy as np from model import yolov3 from utils.misc_utils import parse_anchors, load_weights num_class = 80 img_size = 416 weight_path = './data/darknet_weights/yolov3.weights' save_path = './data/darknet_weights/yolov3.ckpt' anchors = parse_anchors('./data/yolo_anchors.txt') model = yolov3(80, anchors) with tf.Session() as sess: inputs = tf.placeholder(tf.float32, [1, img_size, img_size, 3]) with tf.variable_scope('yolov3'): feature_map = model.forward(inputs) saver = tf.train.Saver(var_list=tf.global_variables(scope='yolov3')) load_ops = load_weights(tf.global_variables(scope='yolov3'), weight_path) sess.run(load_ops) saver.save(sess, save_path=save_path) print('TensorFlow model checkpoint has been saved to {}'.format(save_path))
def recognize(jpg_path, pb_file_path): anchors = parse_anchors("./data/yolo_anchors.txt") classes = read_class_names("./data/coco.names") num_class = len(classes) color_table = get_color_table(num_class) img_ori = cv2.imread(jpg_path) height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple([IMAGE_SIZE, IMAGE_SIZE])) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Graph().as_default(): output_graph_def = tf.GraphDef() with open(pb_file_path, "rb") as f: output_graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(output_graph_def, name="") tf_config = tf.ConfigProto() tf_config.gpu_options.allow_growth = True with tf.Session(config=tf_config) as sess: init = tf.global_variables_initializer() sess.run(init) input_name = "Placeholder" output_name1 = "yolov3/yolov3_head/feature_map_1" output_name2 = "yolov3/yolov3_head/feature_map_2" output_name3 = "yolov3/yolov3_head/feature_map_3" output_names = [output_name1, output_name2, output_name3] yolo_model = yolov3(num_class, anchors) input_data = tf.placeholder(tf.float32, [1, IMAGE_SIZE, IMAGE_SIZE, 3], name='input_data') trt_graph = trt.create_inference_graph( input_graph_def=output_graph_def, outputs=output_names, max_batch_size=1, max_workspace_size_bytes=1 << 25, precision_mode='FP16', minimum_segment_size=5) with open('./data/yolov3_trt.pb', 'wb') as f: f.write(trt_graph.SerializeToString()) tf.import_graph_def(trt_graph, name='') tf_input = sess.graph.get_tensor_by_name(input_name + ':0') feature_map_1 = sess.graph.get_tensor_by_name(output_name1 + ":0") feature_map_2 = sess.graph.get_tensor_by_name(output_name2 + ":0") feature_map_3 = sess.graph.get_tensor_by_name(output_name3 + ":0") features = feature_map_1, feature_map_2, feature_map_3 # tf_scores = tf_sess.graph.get_tensor_by_name('detection_scores:0') # tf_boxes = tf_sess.graph.get_tensor_by_name('detection_boxes:0') # tf_classes = tf_sess.graph.get_tensor_by_name('detection_classes:0') # tf_num_detections = tf_sess.graph.get_tensor_by_name('num_detections:0') # features = sess.run(features, feed_dict={tf_input:np.reshape(img, [-1, IMAGE_SIZE, IMAGE_SIZE, 3])}) # feature1, feature2, feature3 = features # feature1 = tf.convert_to_tensor(feature1) # feature2 = tf.convert_to_tensor(feature2) # feature3 = tf.convert_to_tensor(feature3) # features = feature1, feature2, feature3 yolo_model.pb_forward(input_data) pred_boxes, pred_confs, pred_probs = yolo_model.predict(features) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, num_class, max_boxes=30, score_thresh=0.4, iou_thresh=0.5) boxes_, scores_, labels_ = sess.run([boxes, scores, labels], feed_dict={input_data: img}) # rescale the coordinates to the original image boxes_[:, 0] *= (width_ori / float(IMAGE_SIZE)) boxes_[:, 2] *= (width_ori / float(IMAGE_SIZE)) boxes_[:, 1] *= (height_ori / float(IMAGE_SIZE)) boxes_[:, 3] *= (height_ori / float(IMAGE_SIZE)) print("box coords:") print(boxes_) print('*' * 30) print("scores:") print(scores_) print('*' * 30) print("labels:") print(labels_) for i in range(len(boxes_)): x0, y0, x1, y1 = boxes_[i] plot_one_box(img_ori, [x0, y0, x1, y1], label=classes[labels_[i]], color=color_table[labels_[i]]) # cv2.imshow('Detection result', img_ori) cv2.imwrite('detection_result.jpg', img_ori)
img_ori = cv2.imread(args.input_image) if args.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data') yolo_model = yolov3(args.num_class, args.anchors) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels = gpu_nms(pred_boxes, pred_scores, args.num_class, max_boxes=200, score_thresh=0.3, nms_thresh=0.45) #0.3,0.45 saver = tf.train.Saver() saver.restore(sess, args.restore_path)
if args.letterbox_resize: img, resize_ratio, dw, dh = letterbox_resize(img_ori, args.new_size[0], args.new_size[1]) else: height_ori, width_ori = img_ori.shape[:2] img = cv2.resize(img_ori, tuple(args.new_size)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.asarray(img, np.float32) img = img[np.newaxis, :] / 255. with tf.Session() as sess: input_data = tf.placeholder(tf.float32, [1, args.new_size[1], args.new_size[0], 3], name='input_data') yolo_model = yolov3(args.num_class) with tf.variable_scope('yolov3'): pred_feature_maps = yolo_model.forward(input_data, False) pred_boxes, pred_confs, pred_probs = yolo_model.predict(pred_feature_maps) pred_scores = pred_confs * pred_probs boxes, scores, labels, _ = score_filter(pred_boxes, pred_scores, args.num_class, score_thresh=0.3) saver = tf.train.Saver() saver.restore(sess, args.restore_path) boxes_, scores_, labels_, pred_boxes_, pred_confs_, pred_probs_ = sess.run(