def main(): parser = argparse.ArgumentParser( description='Pick out VOC object from COCO annotation dataset') parser.add_argument('--coco_annotation_file', type=str, required=True, help='coco annotation txt file') parser.add_argument( '--coco_classes_path', type=str, default='../../configs/coco_classes.txt', help= 'path to coco class definitions, default ../../configs/coco_classes.txt' ) parser.add_argument( '--voc_classes_path', type=str, default='../../configs/voc_classes.txt', help= 'path to voc class definitions, default ../../configs/voc_classes.txt') parser.add_argument('--output_voc_annotation_file', type=str, required=True, help='output voc classes annotation file') args = parser.parse_args() # param parse coco_class_names = get_classes(args.coco_classes_path) voc_class_names = get_classes(args.voc_classes_path) coco_annotation_lines = get_dataset(args.coco_annotation_file) output_file = open(args.output_voc_annotation_file, 'w') for coco_annotation_line in coco_annotation_lines: # parse annotation line coco_line = coco_annotation_line.split() image_name = coco_line[0] boxes = np.array([ np.array(list(map(int, box.split(',')))) for box in coco_line[1:] ]) has_voc_object = False for box in boxes: coco_class_id = box[-1] # check if coco object in voc class list # if true, keep the image & box info if coco_class_names[coco_class_id] in voc_class_names: if has_voc_object == False: has_voc_object = True output_file.write(image_name) # get VOC class ID of the COCO object voc_class_id = voc_class_names.index( coco_class_names[coco_class_id]) output_file.write(" " + ",".join([str(b) for b in box[:-2]]) + ',' + str(voc_class_id)) if has_voc_object == True: output_file.write('\n') output_file.close()
def main(): # load some VOC2007 images for test #image = Image.open("000001.jpg") #boxes = np.array([[48,240,195,371,11],[8,12,352,498,14]]) image = Image.open("000004.jpg") boxes = np.array([[13,311,84,362,6],[362,330,500,389,6],[235,328,334,375,6],[175,327,252,364,6],[139,320,189,359,6],[108,325,150,353,6],[84,323,121,350,6]]) #image = Image.open("000010.jpg") #boxes = np.array([[87,97,258,427,12],[133,72,245,284,14]]) classes = boxes[:, -1] scores = [1.0]*len(classes) class_names = get_classes('../../configs/voc_classes.txt') colors = get_colors(class_names) image_origin = draw_boxes(np.array(image, dtype='uint8'), boxes[:, :4], classes, scores, class_names, colors) image, boxes = random_rotate(image, boxes, prob=1.0) image = draw_boxes(np.array(image, dtype='uint8'), boxes[:, :4], classes, scores, class_names, colors) Image.fromarray(image_origin).show() Image.fromarray(image).show()
def __init__(self): self.log = logging.getLogger('EMSfIIoT') # Load TFLite model and allocate tensors. try: self.interpreter = tflite.Interpreter( model_path="weights/emsfiiot_lite.h5") except: self.interpreter = tf.lite.Interpreter( model_path="weights/emsfiiot_lite.h5") self.interpreter.allocate_tensors() # Get input and output tensors. self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # check the type of the input tensor self.floating_model = self.input_details[0]['dtype'] == np.float32 height = self.input_details[0]['shape'][1] width = self.input_details[0]['shape'][2] self.model_image_size = (width, height) self.anchors = get_anchors("configs/yolo3_anchors.txt") self.class_names = get_classes("configs/emsfiiot_classes.txt") self.colors = get_colors(self.class_names)
def main(): parser = argparse.ArgumentParser( argument_default=argparse.SUPPRESS, description= 'evaluate Deeplab model (h5/pb/tflite/mnn) with test dataset') ''' Command line options ''' parser.add_argument('--dataset_file', type=str, required=True, help='eval samples txt file') parser.add_argument('--gt_label_path', type=str, required=True, help='path containing groundtruth label png file') parser.add_argument('--pred_label_path', type=str, required=True, help='path containing model predict label png file') parser.add_argument('--classes_path', type=str, required=False, default='configs/voc_classes.txt', help='path to class definitions, default=%(default)s') parser.add_argument( '--model_output_shape', type=str, help='model mask output size as <height>x<width>, default=%(default)s', default='512x512') parser.add_argument('--show_background', default=False, action="store_true", help='Show background evaluation info') args = parser.parse_args() # param parse height, width = args.model_output_shape.split('x') model_output_shape = (int(height), int(width)) # add background class class_names = get_classes(args.classes_path) assert len(class_names ) < 254, 'PNG image label only support less than 254 classes.' class_names = ['background'] + class_names # get dataset list dataset = get_data_list(args.dataset_file) start = time.time() eval_mIOU(dataset, args.gt_label_path, args.pred_label_path, class_names, model_output_shape, args.show_background) end = time.time() print("Evaluation time cost: {:.6f}s".format(end - start))
def main(): parser = argparse.ArgumentParser(description='validate YOLO model (h5/pb/onnx/tflite/mnn) with image') parser.add_argument('--model_path', help='model file to predict', type=str, required=True) parser.add_argument('--image_file', help='image file to predict', type=str, required=True) parser.add_argument('--anchors_path',help='path to anchor definitions', type=str, required=True) parser.add_argument('--classes_path', help='path to class definitions, default ../../configs/voc_classes.txt', type=str, default='../../configs/voc_classes.txt') parser.add_argument('--model_image_size', help='model image input size as <height>x<width>, default 416x416', type=str, default='416x416') parser.add_argument('--loop_count', help='loop inference for certain times', type=int, default=1) args = parser.parse_args() # param parse anchors = get_anchors(args.anchors_path) class_names = get_classes(args.classes_path) height, width = args.model_image_size.split('x') model_image_size = (int(height), int(width)) assert (model_image_size[0]%32 == 0 and model_image_size[1]%32 == 0), 'model_image_size should be multiples of 32' # support of tflite model if args.model_path.endswith('.tflite'): validate_yolo_model_tflite(args.model_path, args.image_file, anchors, class_names, args.loop_count) # support of MNN model elif args.model_path.endswith('.mnn'): validate_yolo_model_mnn(args.model_path, args.image_file, anchors, class_names, args.loop_count) # support of TF 1.x frozen pb model elif args.model_path.endswith('.pb'): validate_yolo_model_pb(args.model_path, args.image_file, anchors, class_names, model_image_size, args.loop_count) # support of ONNX model elif args.model_path.endswith('.onnx'): validate_yolo_model_onnx(args.model_path, args.image_file, anchors, class_names, args.loop_count) # normal keras h5 model elif args.model_path.endswith('.h5'): validate_yolo_model(args.model_path, args.image_file, anchors, class_names, model_image_size, args.loop_count) else: raise ValueError('invalid model file')
def __init__(self, **kwargs): super(DeepLab, self).__init__() self.__dict__.update(self._defaults) # set up default values self.__dict__.update(kwargs) # and update with user overrides self.class_names = get_classes(self.classes_path) K.set_learning_phase(0) self.deeplab_model = self._generate_model()
def main(): parser = argparse.ArgumentParser( argument_default=argparse.SUPPRESS, description='convert MSCOCO dataset to PascalVOC dataset') parser.add_argument('--coco_root_path', type=str, required=True, help='path to MSCOCO dataset') parser.add_argument('--output_path', type=str, required=True, help='output path for generated PascalVOC dataset') parser.add_argument( '--classes_path', type=str, required=False, help='path to a selected sub-classes definition, optinal', default=None) args = parser.parse_args() if args.classes_path: class_names = get_classes(args.classes_path) else: class_names = None coco_to_pascalvoc(args.coco_root_path, args.output_path, class_names)
def main(): parser = argparse.ArgumentParser( description= 'check heatmap activation for CNN classifer model (h5) with test images' ) parser.add_argument('--model_path', type=str, required=True, help='model file to predict') parser.add_argument('--image_path', type=str, required=True, help='Image file or directory to predict') parser.add_argument('--classes_path', type=str, required=False, default=None, help='path to class definition, optional') parser.add_argument('--heatmap_path', type=str, required=True, help='output heatmap file or directory') args = parser.parse_args() if args.classes_path: class_names = get_classes(args.classes_path) else: class_names = None generate_heatmap(args.image_path, args.model_path, args.heatmap_path, class_names)
def main(): parser = argparse.ArgumentParser( argument_default=argparse.SUPPRESS, description='convert onboard gray PNG label to PascalVOC PNG label') parser.add_argument('--input_label_path', type=str, required=True, help='input path of gray label files') parser.add_argument('--output_label_path', type=str, required=True, help='output path of converted png label files') parser.add_argument('--classes_path', type=str, required=False, help='path to class definitions, optional', default=None) args = parser.parse_args() if args.classes_path: # add background class to match model & GT class_names = get_classes(args.classes_path) assert len( class_names ) < 254, 'PNG image label only support less than 254 classes.' class_names = ['background'] + class_names else: class_names = None label_convert(args.input_label_path, args.output_label_path, class_names)
def main(): # class YOLO defines the default value, so suppress any default here parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description='evaluate YOLO model (h5/pb/tflite/mnn) with test dataset') ''' Command line options ''' parser.add_argument( '--model_path', type=str, required=True, help='path to model file') parser.add_argument( '--custom_objects', type=str, required=False, default=None, help="Custom objects in keras model (swish/tf). Separated with comma if more than one.") parser.add_argument( '--anchors_path', type=str, required=True, help='path to anchor definitions') parser.add_argument( '--classes_path', type=str, required=False, help='path to class definitions, default configs/voc_classes.txt', default='configs/voc_classes.txt') parser.add_argument( '--annotation_file', type=str, required=True, help='test annotation txt file') parser.add_argument( '--eval_type', type=str, help='evaluation type (VOC/COCO), default=VOC', default='VOC') parser.add_argument( '--iou_threshold', type=float, help='IOU threshold for PascalVOC mAP, default=0.5', default=0.5) parser.add_argument( '--conf_threshold', type=float, help='confidence threshold for filtering box in postprocess, default=0.001', default=0.001) parser.add_argument( '--model_image_size', type=str, help='model image input size as <num>x<num>, default 416x416', default='416x416') parser.add_argument( '--save_result', default=False, action="store_true", help='Save the detection result image in result/detection dir' ) args = parser.parse_args() # param parse anchors = get_anchors(args.anchors_path) class_names = get_classes(args.classes_path) height, width = args.model_image_size.split('x') model_image_size = (int(height), int(width)) annotation_lines = get_dataset(args.annotation_file) model, model_format = load_eval_model(args.model_path, args.custom_objects) eval_AP(model, model_format, annotation_lines, anchors, class_names, model_image_size, args.eval_type, args.iou_threshold, args.conf_threshold, args.save_result)
def main(): # class YOLO defines the default value, so suppress any default here parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description='evaluate YOLO model (h5/pb/onnx/tflite/mnn) with test dataset') ''' Command line options ''' parser.add_argument( '--model_path', type=str, required=True, help='path to model file') parser.add_argument( '--anchors_path', type=str, required=True, help='path to anchor definitions') parser.add_argument( '--classes_path', type=str, required=False, help='path to class definitions, default configs/voc_classes.txt', default=os.path.join('configs' , 'voc_classes.txt')) parser.add_argument( '--annotation_file', type=str, required=True, help='test annotation txt file') parser.add_argument( '--eval_type', type=str, help='evaluation type (VOC/COCO), default=VOC', default='VOC') parser.add_argument( '--iou_threshold', type=float, help='IOU threshold for PascalVOC mAP, default=0.5', default=0.5) parser.add_argument( '--conf_threshold', type=float, help='confidence threshold for filtering box in postprocess, default=0.001', default=0.001) parser.add_argument( '--model_image_size', type=str, help='model image input size as <height>x<width>, default 416x416', default='416x416') parser.add_argument( '--save_result', default=False, action="store_true", help='Save the detection result image in result/detection dir' ) args = parser.parse_args() # param parse anchors = get_anchors(args.anchors_path) class_names = get_classes(args.classes_path) height, width = args.model_image_size.split('x') model_image_size = (int(height), int(width)) assert (model_image_size[0]%32 == 0 and model_image_size[1]%32 == 0), 'model_image_size should be multiples of 32' annotation_lines = get_dataset(args.annotation_file, shuffle=False) model, model_format = load_eval_model(args.model_path) start = time.time() eval_AP(model, model_format, annotation_lines, anchors, class_names, model_image_size, args.eval_type, args.iou_threshold, args.conf_threshold, args.save_result) end = time.time() print("Evaluation time cost: {:.6f}s".format(end - start))
def main(): parser = argparse.ArgumentParser( argument_default=argparse.SUPPRESS, description='Test tool for mosaic data augment function') parser.add_argument('--annotation_file', type=str, required=True, help='data annotation txt file') parser.add_argument('--classes_path', type=str, required=True, help='path to class definitions') parser.add_argument( '--output_path', type=str, required=False, help='output path for augmented images, default is ./test', default='./test') parser.add_argument('--batch_size', type=int, required=False, help="batch size for test data, default=16", default=16) parser.add_argument( '--model_image_size', type=str, required=False, help='model image input size as <num>x<num>, default 416x416', default='416x416') args = parser.parse_args() class_names = get_classes(args.classes_path) height, width = args.model_image_size.split('x') model_image_size = (int(height), int(width)) annotation_lines = get_dataset(args.annotation_file) os.makedirs(args.output_path, exist_ok=True) image_data = [] boxes_data = [] for i in range(args.batch_size): annotation_line = annotation_lines[i] image, boxes = get_ground_truth_data(annotation_line, input_shape=model_image_size, augment=True) #un-normalize image image = image * 255.0 image = image.astype(np.uint8) image_data.append(image) boxes_data.append(boxes) image_data = np.array(image_data) boxes_data = np.array(boxes_data) image_data, boxes_data = random_mosaic_augment(image_data, boxes_data, jitter=1) draw_boxes(image_data, boxes_data, class_names, args.output_path)
def __init__(self, **kwargs): super(YOLO_np, self).__init__() self.__dict__.update(self._defaults) # set up default values self.__dict__.update(kwargs) # and update with user overrides self.class_names = get_classes(self.classes_path) self.anchors = get_anchors(self.anchors_path) self.colors = get_colors(self.class_names) K.set_learning_phase(0) self.yolo_model = self._generate_model()
def __init__(self, **kwargs): super(Hourglass, self).__init__() self.__dict__.update(self._defaults) # set up default values self.__dict__.update(kwargs) # and update with user overrides if self.skeleton_path: self.skeleton_lines = get_skeleton(self.skeleton_path) else: self.skeleton_lines = None self.class_names = get_classes(self.classes_path) self.hourglass_model = self._generate_model() K.set_learning_phase(0)
def main(): parser = argparse.ArgumentParser(description='validate YOLO model (h5/pb/onnx/tflite/mnn) with image') parser.add_argument('--model_path', help='model file to predict', type=str, required=True) parser.add_argument('--image_path', help='image file or directory to predict', type=str, required=True) parser.add_argument('--anchors_path', help='path to anchor definitions', type=str, required=True) parser.add_argument('--classes_path', help='path to class definitions, default=%(default)s', type=str, default='../../configs/voc_classes.txt') parser.add_argument('--model_input_shape', help='model image input shape as <height>x<width>, default=%(default)s', type=str, default='416x416') parser.add_argument('--elim_grid_sense', help="Eliminate grid sensitivity", default=False, action="store_true") parser.add_argument('--v5_decode', help="Use YOLOv5 prediction decode", default=False, action="store_true") parser.add_argument('--loop_count', help='loop inference for certain times', type=int, default=1) parser.add_argument('--output_path', help='output path to save predict result, default=%(default)s', type=str, required=False, default=None) args = parser.parse_args() # param parse anchors = get_anchors(args.anchors_path) class_names = get_classes(args.classes_path) height, width = args.model_input_shape.split('x') model_input_shape = (int(height), int(width)) assert (model_input_shape[0]%32 == 0 and model_input_shape[1]%32 == 0), 'model_input_shape should be multiples of 32' model = load_val_model(args.model_path) if args.model_path.endswith('.mnn'): #MNN inference engine need create session session = model.createSession() # get image file list or single image if os.path.isdir(args.image_path): image_files = glob.glob(os.path.join(args.image_path, '*')) assert args.output_path, 'need to specify output path if you use image directory as input.' else: image_files = [args.image_path] # loop the sample list to predict on each image for image_file in image_files: # support of tflite model if args.model_path.endswith('.tflite'): validate_yolo_model_tflite(model, image_file, anchors, class_names, args.elim_grid_sense, args.v5_decode, args.loop_count, args.output_path) # support of MNN model elif args.model_path.endswith('.mnn'): validate_yolo_model_mnn(model, session, image_file, anchors, class_names, args.elim_grid_sense, args.v5_decode, args.loop_count, args.output_path) # support of TF 1.x frozen pb model elif args.model_path.endswith('.pb'): validate_yolo_model_pb(model, image_file, anchors, class_names, model_input_shape, args.elim_grid_sense, args.v5_decode, args.loop_count, args.output_path) # support of ONNX model elif args.model_path.endswith('.onnx'): validate_yolo_model_onnx(model, image_file, anchors, class_names, args.elim_grid_sense, args.v5_decode, args.loop_count, args.output_path) # normal keras h5 model elif args.model_path.endswith('.h5'): validate_yolo_model(model, image_file, anchors, class_names, model_input_shape, args.elim_grid_sense, args.v5_decode, args.loop_count, args.output_path) else: raise ValueError('invalid model file')
def main(): parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description='label statistic info of dataset') parser.add_argument('--label_path', required=True, type=str, help='path to png label images') parser.add_argument('--classes_path', required=True, type=str, help='path to class definitions') parser.add_argument('--dataset_file', required=False, type=str, default=None, help='dataset txt file') args = parser.parse_args() # prepare class name list, add background class class_names = get_classes(args.classes_path) assert len(class_names) < 254, 'PNG image label only support less than 254 classes.' class_names = ['background'] + class_names label_stat(args.label_path, args.dataset_file, class_names)
def main(): parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description='convert labelme json label to voc png label') parser.add_argument('--json_file_path', required=True, type=str, help='path to labelme annotated json label files') parser.add_argument('--classes_path', type=str, required=False, default='../../../configs/voc_classes.txt', help='path to class definitions, default=%(default)s') parser.add_argument('--png_label_path', required=True, type=str, help='output path of converted png label images') args = parser.parse_args() # add background class to match model & GT class_names = get_classes(args.classes_path) assert len(class_names) < 254, 'PNG image label only support less than 254 classes.' class_names = ['background'] + class_names label_convert(args.json_file_path, args.png_label_path, class_names)
def dataset_visualize(annotation_file, classes_path): annotation_lines = get_dataset(annotation_file, shuffle=False) # get class names and count class item number class_names = get_classes(classes_path) colors = get_colors(len(class_names)) pbar = tqdm(total=len(annotation_lines), desc='Visualize dataset') for i, annotation_line in enumerate(annotation_lines): pbar.update(1) line = annotation_line.split() image = Image.open(line[0]).convert('RGB') image = np.array(image, dtype='uint8') boxes = np.array( [np.array(list(map(int, box.split(',')))) for box in line[1:]]) classes = boxes[:, -1] boxes = boxes[:, :-1] scores = np.array([1.0] * len(classes)) image = draw_boxes(image, boxes, classes, scores, class_names, colors, show_score=False) # show image file info image_file_name = os.path.basename(line[0]) cv2.putText(image, image_file_name + '({}/{})'.format(i + 1, len(annotation_lines)), (3, 15), cv2.FONT_HERSHEY_PLAIN, fontScale=1, color=(255, 0, 0), thickness=1, lineType=cv2.LINE_AA) # convert to BGR for cv2.imshow image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) cv2.namedWindow("Image", 0) cv2.imshow("Image", image) keycode = cv2.waitKey(0) & 0xFF if keycode == ord('q') or keycode == 27: # 27 is keycode for Esc break pbar.close()
def main(): parser = argparse.ArgumentParser( argument_default=argparse.SUPPRESS, description= 'convert MSCOCO 2017 segment annotation to .png label images') parser.add_argument('--annotation_path', type=str, required=True, help='path to MSCOCO 2017 annotation file') parser.add_argument('--set', required=False, type=str, default='all', choices=['all', 'train', 'val'], help='convert dataset, default=%(default)s') parser.add_argument( '--classes_path', type=str, required=False, default='../../../configs/coco_classes.txt', help='path to selected class definitions, default=%(default)s') parser.add_argument( '--output_path', required=True, type=str, help='output path containing converted png label image and dataset txt' ) args = parser.parse_args() # prepare class name list, add background class class_names = get_classes(args.classes_path) assert len(class_names ) < 254, 'PNG image label only support less than 254 classes.' class_names = ['background'] + class_names # dataset list if args.set == 'all': datasets = ['train', 'val'] else: datasets = [args.set] coco_label_convert(args.annotation_path, datasets, class_names, args.output_path)
def main(): parser = argparse.ArgumentParser( description= 'check heatmap activation for CNN classifer model (pth) with test images' ) parser.add_argument('--image_path', type=str, required=True, help='Image file or directory to predict') parser.add_argument('--model_path', type=str, required=True, help='model file to predict') parser.add_argument( '--model_input_shape', type=str, required=False, help='model input image shape as <height>x<width>, default=%(default)s', default='224x224') parser.add_argument('--heatmap_path', type=str, required=True, help='output heatmap file or directory') parser.add_argument('--classes_path', type=str, required=False, default=None, help='path to class definition, optional') args = parser.parse_args() height, width = args.model_input_shape.split('x') args.model_input_shape = (int(height), int(width)) if args.classes_path: class_names = get_classes(args.classes_path) else: class_names = None generate_heatmap(args.image_path, args.model_path, args.model_input_shape, args.heatmap_path, class_names)
def main(): parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS, description='Test tool for enhance mosaic data augment function') parser.add_argument('--annotation_file', type=str, required=True, help='data annotation txt file') parser.add_argument('--classes_path', type=str, required=True, help='path to class definitions') parser.add_argument('--output_path', type=str, required=False, help='output path for augmented images, default is ./test', default='./test') parser.add_argument('--batch_size', type=int, required=False, help = "batch size for test data, default=16", default=16) parser.add_argument('--model_image_size', type=str, required=False, help='model image input size as <height>x<width>, default 416x416', default='416x416') parser.add_argument('--augment_type', type=str, required=False, help = "enhance data augmentation type (mosaic/cutmix), default=mosaic", default='mosaic') args = parser.parse_args() class_names = get_classes(args.classes_path) height, width = args.model_image_size.split('x') model_image_size = (int(height), int(width)) assert (model_image_size[0]%32 == 0 and model_image_size[1]%32 == 0), 'model_image_size should be multiples of 32' annotation_lines = get_dataset(args.annotation_file) os.makedirs(args.output_path, exist_ok=True) image_data = [] boxes_data = [] for i in range(args.batch_size): annotation_line = annotation_lines[i] image, boxes = get_ground_truth_data(annotation_line, input_shape=model_image_size, augment=True) #un-normalize image image = image*255.0 image = image.astype(np.uint8) image_data.append(image) boxes_data.append(boxes) image_data = np.array(image_data) boxes_data = np.array(boxes_data) if args.augment_type == 'mosaic': image_data, boxes_data = random_mosaic_augment(image_data, boxes_data, prob=1) elif args.augment_type == 'cutmix': image_data, boxes_data = random_cutmix_augment(image_data, boxes_data, prob=1) else: raise ValueError('Unsupported augment type') draw_boxes(image_data, boxes_data, class_names, args.output_path)
def main(): parser = argparse.ArgumentParser( argument_default=argparse.SUPPRESS, description= 'TF 2.x post training integer quantization converter for Hourglass keypoint detection model' ) parser.add_argument('--keras_model_file', required=True, type=str, help='path to keras model file') parser.add_argument( '--dataset_path', required=True, type=str, help='dataset path containing images and annotation file') parser.add_argument('--classes_path', required=True, type=str, help='path to keypoint class definition file') parser.add_argument( '--sample_num', type=int, help='image sample number to feed the converter, default 100', default=100) parser.add_argument('--output_file', required=True, type=str, help='output tflite model file') #parser.add_argument('--custom_objects', required=False, type=str, help="Custom objects in keras model. Separated with comma if more than one.", default=None) args = parser.parse_args() # param parse class_names = get_classes(args.classes_path) post_train_quant_convert(args.keras_model_file, args.dataset_path, class_names, args.sample_num, args.output_file) return
def main(args): annotation_file = args.annotation_file log_dir = os.path.join('logs', '000') classes_path = args.classes_path class_names = get_classes(classes_path) num_classes = len(class_names) print('classes_path =', classes_path) print('class_names = ', class_names) print('num_classes = ', num_classes) anchors = get_anchors(args.anchors_path) num_anchors = len(anchors) # get freeze level according to CLI option if args.weights_path: freeze_level = 0 else: freeze_level = 1 if args.freeze_level is not None: freeze_level = args.freeze_level # callbacks for training process logging = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') checkpoint = ModelCheckpoint(os.path.join( log_dir, 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'), monitor='val_loss', verbose=1, save_weights_only=False, save_best_only=True, period=1) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1, cooldown=0, min_lr=1e-10) early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=50, verbose=1) terminate_on_nan = TerminateOnNaN() callbacks = [ logging, checkpoint, reduce_lr, early_stopping, terminate_on_nan ] # get train&val dataset dataset = get_dataset(annotation_file) if args.val_annotation_file: val_dataset = get_dataset(args.val_annotation_file) num_train = len(dataset) print('num_train = ', num_train) num_val = len(val_dataset) dataset.extend(val_dataset) else: val_split = args.val_split num_val = int(len(dataset) * val_split) num_train = len(dataset) - num_val # assign multiscale interval if args.multiscale: rescale_interval = args.rescale_interval else: rescale_interval = -1 #Doesn't rescale # model input shape check input_shape = args.model_image_size assert (input_shape[0] % 32 == 0 and input_shape[1] % 32 == 0), 'Multiples of 32 required' # get different model type & train&val data generator if num_anchors == 9: # YOLOv3 use 9 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False elif num_anchors == 6: # Tiny YOLOv3 use 6 anchors get_train_model = get_yolo3_train_model data_generator = yolo3_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo3DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo3DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = True elif num_anchors == 5: # YOLOv2 use 5 anchors get_train_model = get_yolo2_train_model data_generator = yolo2_data_generator_wrapper # tf.keras.Sequence style data generator #train_data_generator = Yolo2DataGenerator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval) #val_data_generator = Yolo2DataGenerator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes) tiny_version = False else: raise ValueError('Unsupported anchors number') # prepare online evaluation callback if args.eval_online: eval_callback = EvalCallBack( args.model_type, dataset[num_train:], anchors, class_names, args.model_image_size, args.model_pruning, log_dir, eval_epoch_interval=args.eval_epoch_interval, save_eval_checkpoint=args.save_eval_checkpoint) callbacks.append(eval_callback) # prepare train/val data shuffle callback if args.data_shuffle: shuffle_callback = DatasetShuffleCallBack(dataset) callbacks.append(shuffle_callback) # prepare model pruning config pruning_end_step = np.ceil(1.0 * num_train / args.batch_size).astype( np.int32) * args.total_epoch if args.model_pruning: pruning_callbacks = [ sparsity.UpdatePruningStep(), sparsity.PruningSummaries(log_dir=log_dir, profile_batch=0) ] callbacks = callbacks + pruning_callbacks # prepare optimizer optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # get train model model = get_train_model(args.model_type, anchors, num_classes, weights_path=args.weights_path, freeze_level=freeze_level, optimizer=optimizer, label_smoothing=args.label_smoothing, model_pruning=args.model_pruning, pruning_end_step=pruning_end_step) # support multi-gpu training template_model = None if args.gpu_num >= 2: # keep the template model for saving result template_model = model model = multi_gpu_model(model, gpus=args.gpu_num) # recompile multi gpu model model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) model.summary() # Transfer training some epochs with frozen layers first if needed, to get a stable loss. initial_epoch = args.init_epoch ##################################################################################################### epochs = initial_epoch + args.transfer_epoch print("Transfer training stage") print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // args.batch_size), epochs=epochs, initial_epoch=initial_epoch, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Wait 2 seconds for next stage time.sleep(2) if args.decay_type: # rebuild optimizer to apply learning rate decay, only after # unfreeze all layers callbacks.remove(reduce_lr) steps_per_epoch = max(1, num_train // args.batch_size) decay_steps = steps_per_epoch * (args.total_epoch - args.init_epoch - args.transfer_epoch) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=args.decay_type, decay_steps=decay_steps) # Unfreeze the whole network for further tuning # NOTE: more GPU memory is required after unfreezing the body print("Unfreeze and continue training, to fine-tune.") for i in range(len(model.layers)): model.layers[i].trainable = True model.compile(optimizer=optimizer, loss={ 'yolo_loss': lambda y_true, y_pred: y_pred }) # recompile to apply the change print( 'Train on {} samples, val on {} samples, with batch size {}, input_shape {}.' .format(num_train, num_val, args.batch_size, input_shape)) #model.fit_generator(train_data_generator, model.fit_generator( data_generator(dataset[:num_train], args.batch_size, input_shape, anchors, num_classes, args.enhance_augment, rescale_interval), steps_per_epoch=max(1, num_train // args.batch_size), #validation_data=val_data_generator, validation_data=data_generator(dataset[num_train:], args.batch_size, input_shape, anchors, num_classes), validation_steps=max(1, num_val // args.batch_size), epochs=args.total_epoch, initial_epoch=epochs, #verbose=1, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) # Finally store model if args.model_pruning: if template_model is not None: template_model = sparsity.strip_pruning(template_model) else: model = sparsity.strip_pruning(model) if template_model is not None: template_model.save(os.path.join(log_dir, 'trained_final.h5')) else: model.save(os.path.join(log_dir, 'trained_final.h5'))
def deepsort(yolo, args): #nms_max_overlap = 0.3 #nms threshold images_input = True if os.path.isdir(args.input) else False if images_input: # get images list jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg')) jpg_files = glob.glob(os.path.join(args.input, '*.jpg')) frame_capture = jpeg_files + jpg_files frame_capture.sort() else: # create video capture stream frame_capture = cv2.VideoCapture(0 if args.input == '0' else args.input) if not frame_capture.isOpened(): raise IOError("Couldn't open webcam or video") # create video save stream if needed save_output = True if args.output != "" else False if save_output: if images_input: raise ValueError("image folder input could be saved to video file") # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later # to convert it to x264 to reduce file size: # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4 # #video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC)) video_FourCC = cv2.VideoWriter_fourcc( *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v") video_fps = frame_capture.get(cv2.CAP_PROP_FPS) video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(args.output, video_FourCC, (5. if args.input == '0' else video_fps), video_size) if args.tracking_classes_path: # load the object classes used in tracking if have, other class # from detector will be ignored tracking_class_names = get_classes(args.tracking_classes_path) else: tracking_class_names = None #create deep_sort box encoder encoder = create_box_encoder(args.deepsort_model_path, batch_size=1) #create deep_sort tracker max_cosine_distance = 0.5 #threshold for cosine distance nn_budget = None metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) # alloc a set of queues to record motion trace # for each track id motion_traces = [deque(maxlen=30) for _ in range(9999)] total_obj_counter = [] # initialize a list of colors to represent each possible class label np.random.seed(100) COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") i = 0 fps = 0.0 while True: ret, frame = get_frame(frame_capture, i, images_input) if ret != True: break #time.sleep(0.2) i += 1 start_time = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # detect object from image _, out_boxes, out_classnames, out_scores = yolo.detect_image(image) # get tracking objects and convert bbox from (xmin,ymin,xmax,ymax) to (x,y,w,h) boxes, class_names, scores = get_tracking_object( out_boxes, out_classnames, out_scores, tracking_class_names) # get encoded features of bbox area image features = encoder(frame, boxes) # form up detection records detections = [ Detection(bbox, score, feature, class_name) for bbox, score, class_name, feature in zip( boxes, scores, class_names, features) ] # Run non-maximum suppression. #nms_boxes = np.array([d.tlwh for d in detections]) #nms_scores = np.array([d.confidence for d in detections]) #indices = preprocessing.non_max_suppression(nms_boxes, nms_max_overlap, nms_scores) #detections = [detections[i] for i in indices] # Call the tracker tracker.predict() tracker.update(detections) # show all detection result as white box for det in detections: bbox = det.to_tlbr() cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, str(det.class_name), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (255, 255, 255), 2) track_indexes = [] track_count = 0 for track in tracker.tracks: if not track.is_confirmed() or track.time_since_update > 1: continue # record tracking info and get bbox track_indexes.append(int(track.track_id)) total_obj_counter.append(int(track.track_id)) bbox = track.to_tlbr() # show all tracking result as color box color = [ int(c) for c in COLORS[track_indexes[track_count] % len(COLORS)] ] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track.track_id), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) if track.class_name: cv2.putText(frame, str(track.class_name), (int(bbox[0] + 30), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) track_count += 1 # get center point (x,y) of current track bbox and record in queue center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) motion_traces[track.track_id].append(center) # draw current center point thickness = 5 cv2.circle(frame, (center), 1, color, thickness) #draw motion trace motion_trace = motion_traces[track.track_id] for j in range(1, len(motion_trace)): if motion_trace[j - 1] is None or motion_trace[j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]), (color), thickness) # show tracking statistics total_obj_num = len(set(total_obj_counter)) cv2.putText(frame, "Total Object Counter: " + str(total_obj_num), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(track_count), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) # refresh window cv2.namedWindow("DeepSORT", 0) cv2.resizeWindow('DeepSORT', 1024, 768) cv2.imshow('DeepSORT', frame) if save_output: #save a frame out.write(frame) end_time = time.time() fps = (fps + (1. / (end_time - start_time))) / 2 # Press q to stop video if cv2.waitKey(1) & 0xFF == ord('q'): break # Release everything if job is finished if not images_input: frame_capture.release() if save_output: out.release() cv2.destroyAllWindows()
def sort(yolo, args): images_input = True if os.path.isdir(args.input) else False if images_input: # get images list jpeg_files = glob.glob(os.path.join(args.input, '*.jpeg')) jpg_files = glob.glob(os.path.join(args.input, '*.jpg')) frame_capture = jpeg_files + jpg_files frame_capture.sort() else: # create video capture stream frame_capture = cv2.VideoCapture(0 if args.input == '0' else args.input) if not frame_capture.isOpened(): raise IOError("Couldn't open webcam or video") # create video save stream if needed save_output = True if args.output != "" else False if save_output: if images_input: raise ValueError("image folder input could be saved to video file") # here we encode the video to MPEG-4 for better compatibility, you can use ffmpeg later # to convert it to x264 to reduce file size: # ffmpeg -i test.mp4 -vcodec libx264 -f mp4 test_264.mp4 # #video_FourCC = cv2.VideoWriter_fourcc(*'XVID') if args.input == '0' else int(frame_capture.get(cv2.CAP_PROP_FOURCC)) video_FourCC = cv2.VideoWriter_fourcc( *'XVID') if args.input == '0' else cv2.VideoWriter_fourcc(*"mp4v") video_fps = frame_capture.get(cv2.CAP_PROP_FPS) video_size = (int(frame_capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(frame_capture.get(cv2.CAP_PROP_FRAME_HEIGHT))) out = cv2.VideoWriter(args.output, video_FourCC, (5. if args.input == '0' else video_fps), video_size) if args.tracking_classes_path: # load the object classes used in tracking if have, other class # from detector will be ignored tracking_class_names = get_classes(args.tracking_classes_path) else: tracking_class_names = None # create instance of the SORT tracker tracker = Sort(max_age=5, min_hits=3, iou_threshold=0.3) # alloc a set of queues to record motion trace # for each track id motion_traces = [deque(maxlen=30) for _ in range(9999)] total_obj_counter = [] # initialize a list of colors to represent each possible class label np.random.seed(100) COLORS = np.random.randint(0, 255, size=(200, 3), dtype="uint8") i = 0 fps = 0.0 while True: ret, frame = get_frame(frame_capture, i, images_input) if ret != True: break #time.sleep(0.2) i += 1 start_time = time.time() image = Image.fromarray(frame[..., ::-1]) # bgr to rgb # detect object from image _, out_boxes, out_classnames, out_scores = yolo.detect_image(image) # get tracking objects boxes, class_names, scores = get_tracking_object(out_boxes, out_classnames, out_scores, tracking_class_names, convert_box=False) # form up detection records if len(boxes) != 0: detections = np.array([ bbox + [score] for bbox, score, class_name in zip(boxes, scores, class_names) ]) else: detections = np.empty((0, 5)) # Call the tracker tracks = tracker.update(detections) # show all detection result as white box for j, bbox in enumerate(boxes): cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255, 255, 255), 2) cv2.putText(frame, class_names[j], (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (255, 255, 255), 2) track_indexes = [] track_count = 0 for track in tracks: bbox = track[:4] track_id = int(track[4]) # record tracking info and get bbox track_indexes.append(int(track_id)) total_obj_counter.append(int(track_id)) # show all tracking result as color box color = [int(c) for c in COLORS[track_id % len(COLORS)]] cv2.rectangle(frame, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (color), 3) cv2.putText(frame, str(track_id), (int(bbox[0]), int(bbox[1] - 20)), 0, 5e-3 * 150, (color), 2) #if track.class_name: #cv2.putText(frame, str(track.class_name), (int(bbox[0]+30), int(bbox[1]-20)), 0, 5e-3*150, (color), 2) track_count += 1 # get center point (x,y) of current track bbox and record in queue center = (int( ((bbox[0]) + (bbox[2])) / 2), int(((bbox[1]) + (bbox[3])) / 2)) motion_traces[track_id].append(center) # draw current center point thickness = 5 cv2.circle(frame, (center), 1, color, thickness) #draw motion trace motion_trace = motion_traces[track_id] for j in range(1, len(motion_trace)): if motion_trace[j - 1] is None or motion_trace[j] is None: continue thickness = int(np.sqrt(64 / float(j + 1)) * 2) cv2.line(frame, (motion_trace[j - 1]), (motion_trace[j]), (color), thickness) # show tracking statistics total_obj_num = len(set(total_obj_counter)) cv2.putText(frame, "Total Object Counter: " + str(total_obj_num), (int(20), int(120)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "Current Object Counter: " + str(track_count), (int(20), int(80)), 0, 5e-3 * 200, (0, 255, 0), 2) cv2.putText(frame, "FPS: %f" % (fps), (int(20), int(40)), 0, 5e-3 * 200, (0, 255, 0), 3) # refresh window cv2.namedWindow("SORT", 0) cv2.resizeWindow('SORT', 1024, 768) # cv2.imshow('SORT', frame) # Xander commented out if save_output: #save a frame out.write(frame) end_time = time.time() fps = (fps + (1. / (end_time - start_time))) / 2 # Press q to stop video if cv2.waitKey(1) & 0xFF == ord('q'): break # Release everything if job is finished if not images_input: frame_capture.release() if save_output: out.release() cv2.destroyAllWindows()
def main(args): class_names = get_classes(args.classes_path) num_classes = len(class_names) # load trained model for eval model = load_model(args.model_path, compile=False) # get input size, assume only 1 input input_size = tuple(model.input.shape.as_list()[1:3]) # check & get output size output_tensor = model.output # check to handle multi-output model if isinstance(output_tensor, list): output_tensor = output_tensor[-1] output_size = tuple(output_tensor.shape.as_list()[1:3]) # check for any invalid input & output size assert None not in input_size, 'Invalid input size.' assert None not in output_size, 'Invalid output size.' assert output_size[0] == input_size[0] // 4 and output_size[ 1] == input_size[1] // 4, 'output size should be 1/4 of input size.' # prepare validation dataset valdata = hourglass_dataset(args.dataset_path, class_names, input_size=input_size, is_train=False) print('validation data size', valdata.get_dataset_size()) # form up the validation result matrix val_keypoints = np.zeros(shape=(valdata.get_dataset_size(), num_classes, 2), dtype=np.float) count = 0 batch_size = 8 val_gen = valdata.generator(batch_size, num_hgstack=1, sigma=1, is_shuffle=False, with_meta=True) pbar = tqdm(total=valdata.get_dataset_size(), desc='Eval model') # fetch validation data from generator, which will crop out single person area, resize to inres and normalize image for _img, _gthmap, _meta in val_gen: # get predicted heatmap prediction = model.predict(_img) if isinstance(prediction, list): prediction = prediction[-1] # transform predicted heatmap to final keypoint output, # and store it into result matrix get_final_pred_kps(val_keypoints, prediction, _meta, output_size) count += batch_size if count > valdata.get_dataset_size(): break pbar.update(batch_size) pbar.close() # store result matrix, and use it to get PCKh metrics eval_pckh(args.model_path, val_keypoints, valdata.get_annotations(), class_names) return
def main(): parser = argparse.ArgumentParser( argument_default=argparse.SUPPRESS, description='Test tool for enhance mosaic data augment function') parser.add_argument('--annotation_file', type=str, required=True, help='data annotation txt file') parser.add_argument('--classes_path', type=str, required=True, help='path to class definitions') parser.add_argument( '--output_path', type=str, required=False, help='output path for augmented images, default=%(default)s', default='./test') parser.add_argument('--batch_size', type=int, required=False, help="batch size for test data, default=%(default)s", default=16) parser.add_argument( '--model_input_shape', type=str, required=False, help='model image input shape as <height>x<width>, default=%(default)s', default='416x416') parser.add_argument( '--enhance_augment', type=str, required=False, help="enhance data augmentation type, default=%(default)s", default=None, choices=['mosaic', 'mosaic_v5', 'cutmix', None]) args = parser.parse_args() class_names = get_classes(args.classes_path) height, width = args.model_input_shape.split('x') model_input_shape = (int(height), int(width)) assert (model_input_shape[0] % 32 == 0 and model_input_shape[1] % 32 == 0), 'model_input_shape should be multiples of 32' annotation_lines = get_dataset(args.annotation_file) os.makedirs(args.output_path, exist_ok=True) image_data = [] boxes_data = [] pbar = tqdm(total=args.batch_size, desc='Generate augment image') for i in range(args.batch_size): pbar.update(1) annotation_line = annotation_lines[i] image, boxes = get_ground_truth_data(annotation_line, input_shape=model_input_shape, augment=True) #denormalize image image = denormalize_image(image) image_data.append(image) boxes_data.append(boxes) pbar.close() image_data = np.array(image_data) boxes_data = np.array(boxes_data) if args.enhance_augment == 'mosaic': image_data, boxes_data = random_mosaic_augment(image_data, boxes_data, prob=1) elif args.enhance_augment == 'mosaic_v5': image_data, boxes_data = random_mosaic_augment_v5(image_data, boxes_data, prob=1) elif args.enhance_augment == 'cutmix': image_data, boxes_data = random_cutmix_augment(image_data, boxes_data, prob=1) elif args.enhance_augment == None: print('No enhance augment type. Will only apply base augment') else: raise ValueError('Unsupported augment type') draw_boxes(image_data, boxes_data, class_names, args.output_path) print('Done. augment images have been saved in', args.output_path)
def main(): parser = argparse.ArgumentParser( description= 'validate CNN classifier model (h5/pb/onnx/tflite/mnn) with image') parser.add_argument('--model_path', help='model file to predict', type=str, required=True) parser.add_argument('--image_path', help='image file or directory to predict', type=str, required=True) parser.add_argument( '--model_input_shape', help='model image input shape as <height>x<width>, default=%(default)s', type=str, default='224x224') parser.add_argument('--classes_path', help='path to class name definitions', type=str, required=False) parser.add_argument('--loop_count', help='loop inference for certain times', type=int, default=1) parser.add_argument( '--output_path', help='output path to save predict result, default=%(default)s', type=str, required=False, default=None) args = parser.parse_args() class_names = None if args.classes_path: class_names = get_classes(args.classes_path) # param parse height, width = args.model_input_shape.split('x') model_input_shape = (int(height), int(width)) model = load_val_model(args.model_path) if args.model_path.endswith('.mnn'): #MNN inference engine need create session session = model.createSession() # get image file list or single image if os.path.isdir(args.image_path): image_files = glob.glob(os.path.join(args.image_path, '*')) assert args.output_path, 'need to specify output path if you use image directory as input.' else: image_files = [args.image_path] # loop the sample list to predict on each image for image_file in image_files: # support of tflite model if args.model_path.endswith('.tflite'): validate_classifier_model_tflite(model, image_file, class_names, args.loop_count, args.output_path) # support of MNN model elif args.model_path.endswith('.mnn'): validate_classifier_model_mnn(model, session, image_file, class_names, args.loop_count, args.output_path) # support of TF 1.x frozen pb model elif args.model_path.endswith('.pb'): validate_classifier_model_pb(model, image_file, class_names, args.loop_count, args.output_path) # support of ONNX model elif args.model_path.endswith('.onnx'): validate_classifier_model_onnx(model, image_file, class_names, args.loop_count, args.output_path) # normal keras h5 model elif args.model_path.endswith('.h5'): validate_classifier_model(model, image_file, class_names, model_input_shape, args.loop_count, args.output_path) else: raise ValueError('invalid model file')
def main(): parser = argparse.ArgumentParser( description='validate Hourglass model (h5/pb/tflite/mnn) with image') parser.add_argument('--model_path', help='model file to predict', type=str, required=True) parser.add_argument('--image_file', help='image file to predict', type=str, required=True) parser.add_argument( '--classes_path', help='path to class definitions, default ../configs/mpii_classes.txt', type=str, required=False, default='../configs/mpii_classes.txt') parser.add_argument( '--skeleton_path', help='path to keypoint skeleton definitions, default None', type=str, required=False, default=None) parser.add_argument( '--model_image_size', help='model image input size as <num>x<num>, default 256x256', type=str, default='256x256') parser.add_argument('--loop_count', help='loop inference for certain times', type=int, default=1) #parser.add_argument('--custom_objects', required=False, type=str, help="Custom objects in keras model (swish/tf). Separated with comma if more than one.", default=None) args = parser.parse_args() # param parse if args.skeleton_path: skeleton_lines = get_skeleton(args.skeleton_path) else: skeleton_lines = None class_names = get_classes(args.classes_path) height, width = args.model_image_size.split('x') model_image_size = (int(height), int(width)) # support of tflite model if args.model_path.endswith('.tflite'): validate_hourglass_model_tflite(args.model_path, args.image_file, class_names, skeleton_lines, args.loop_count) # support of MNN model elif args.model_path.endswith('.mnn'): validate_hourglass_model_mnn(args.model_path, args.image_file, class_names, skeleton_lines, args.loop_count) ## support of TF 1.x frozen pb model elif args.model_path.endswith('.pb'): validate_hourglass_model_pb(args.model_path, args.image_file, class_names, skeleton_lines, model_image_size, args.loop_count) ## normal keras h5 model elif args.model_path.endswith('.h5'): validate_hourglass_model(args.model_path, args.image_file, class_names, skeleton_lines, model_image_size, args.loop_count) else: raise ValueError('invalid model file')
def main(args): log_dir = 'logs/' class_names = get_classes(args.classes_path) num_classes = len(class_names) if args.matchpoint_path: matchpoints = get_matchpoints(args.matchpoint_path) else: matchpoints = None # choose model type if args.tiny: num_channels = 128 #input_size = (192, 192) else: num_channels = 256 #input_size = (256, 256) input_size = args.model_image_size # get train/val dataset train_dataset = hourglass_dataset(args.dataset_path, class_names, input_size=input_size, is_train=True, matchpoints=matchpoints) val_dataset = hourglass_dataset(args.dataset_path, class_names, input_size=input_size, is_train=False) train_gen = train_dataset.generator(args.batch_size, args.num_stacks, sigma=1, is_shuffle=True, rot_flag=True, scale_flag=True, h_flip_flag=True, v_flip_flag=True) model_type = get_model_type(args.num_stacks, args.mobile, args.tiny, input_size) # callbacks for training process tensorboard = TensorBoard(log_dir=log_dir, histogram_freq=0, write_graph=False, write_grads=False, write_images=False, update_freq='batch') eval_callback = EvalCallBack(log_dir, val_dataset, class_names, input_size, model_type) terminate_on_nan = TerminateOnNaN() callbacks = [tensorboard, eval_callback, terminate_on_nan] # prepare optimizer #optimizer = RMSprop(lr=5e-4) optimizer = get_optimizer(args.optimizer, args.learning_rate, decay_type=None) # get train model, doesn't specify input size model = get_hourglass_model(num_classes, args.num_stacks, num_channels, mobile=args.mobile) print( 'Create {} Stacked Hourglass model with stack number {}, channel number {}. train input size {}' .format('Mobile' if args.mobile else '', args.num_stacks, num_channels, input_size)) model.summary() if args.weights_path: model.load_weights(args.weights_path, by_name=True) #, skip_mismatch=True) print('Load weights {}.'.format(args.weights_path)) # support multi-gpu training template_model = None if args.gpu_num >= 2: # keep the template model for saving result template_model = model model = multi_gpu_model(model, gpus=args.gpu_num) model.compile(optimizer=optimizer, loss=mean_squared_error) # start training model.fit_generator(generator=train_gen, steps_per_epoch=train_dataset.get_dataset_size() // args.batch_size, epochs=args.total_epoch, initial_epoch=args.init_epoch, workers=1, use_multiprocessing=False, max_queue_size=10, callbacks=callbacks) if template_model is not None: template_model.save(os.path.join(log_dir, 'trained_final.h5')) else: model.save(os.path.join(log_dir, 'trained_final.h5')) return