def add_augmenters(self, augmenters): if not isinstance(augmenters, list): raise Exception( "augmenters should be list type in func add_augmenters()") transform_names = [type(x).__name__ for x in self.transforms] for aug in augmenters: if type(aug).__name__ in transform_names: logging.error( "{} is already in ComposedTransforms, need to remove it from add_augmenters()." .format(type(aug).__name__)) self.transforms = augmenters + self.transforms
def split_coco_dataset(dataset_dir, val_percent, test_percent, save_dir): # matplotlib.use() must be called *before* pylab, matplotlib.pyplot, # or matplotlib.backends is imported for the first time # pycocotools import matplotlib import matplotlib matplotlib.use('Agg') from pycocotools.coco import COCO if not osp.exists(osp.join(dataset_dir, "annotations.json")): logging.error( "\'annotations.json\' is not found in {}!".format(dataset_dir)) annotation_file = osp.join(dataset_dir, "annotations.json") coco = COCO(annotation_file) img_ids = coco.getImgIds() cat_ids = coco.getCatIds() anno_ids = coco.getAnnIds() val_num = int(len(img_ids) * val_percent) test_num = int(len(img_ids) * test_percent) train_num = len(img_ids) - val_num - test_num random.shuffle(img_ids) train_files_ids = img_ids[:train_num] val_files_ids = img_ids[train_num:train_num + val_num] test_files_ids = img_ids[train_num + val_num:] for img_id_list in [train_files_ids, val_files_ids, test_files_ids]: img_anno_ids = coco.getAnnIds(imgIds=img_id_list, iscrowd=0) imgs = coco.loadImgs(img_id_list) instances = coco.loadAnns(img_anno_ids) categories = coco.loadCats(cat_ids) img_dict = { "annotations": instances, "images": imgs, "categories": categories } if img_id_list == train_files_ids: json_file = open(osp.join(save_dir, 'train.json'), 'w+') json.dump(img_dict, json_file, cls=MyEncoder) elif img_id_list == val_files_ids: json_file = open(osp.join(save_dir, 'val.json'), 'w+') json.dump(img_dict, json_file, cls=MyEncoder) elif img_id_list == test_files_ids and len(test_files_ids): json_file = open(osp.join(save_dir, 'test.json'), 'w+') json.dump(img_dict, json_file, cls=MyEncoder) return train_num, val_num, test_num
def main(): import os os.environ['CUDA_VISIBLE_DEVICES'] = "" import paddlex as pdx if len(sys.argv) < 2: print("Use command 'paddlex -h` to print the help information\n") return parser = arg_parser() args = parser.parse_args() if args.version: print("PaddleX-{}".format(pdx.__version__)) print("Repo: https://github.com/PaddlePaddle/PaddleX.git") print("Email: [email protected]") return if args.export_inference: assert args.model_dir is not None, "--model_dir should be defined while exporting inference model" assert args.save_dir is not None, "--save_dir should be defined to save inference model" fixed_input_shape = None if args.fixed_input_shape is not None: fixed_input_shape = eval(args.fixed_input_shape) assert len( fixed_input_shape ) == 2, "len of fixed input shape must == 2, such as [224,224]" else: fixed_input_shape = None model = pdx.load_model(args.model_dir, fixed_input_shape) model.export_inference_model(args.save_dir) if args.export_onnx: assert args.model_dir is not None, "--model_dir should be defined while exporting onnx model" assert args.save_dir is not None, "--save_dir should be defined to create onnx model" model = pdx.load_model(args.model_dir) if model.status == "Normal" or model.status == "Prune": logging.error( "Only support inference model, try to export model first as below,", exit=False) logging.error( "paddlex --export_inference --model_dir model_path --save_dir infer_model" ) pdx.convertor.export_onnx_model(model, args.save_dir)
def build_program(self): if hasattr(paddlex, 'model_built') and paddlex.model_built: logging.error( "Function model.train() only can be called once in your code.") paddlex.model_built = True # 构建训练网络 self.train_inputs, self.train_outputs = self.build_net(mode='train') self.train_prog = fluid.default_main_program() startup_prog = fluid.default_startup_program() # 构建预测网络 self.test_prog = fluid.Program() with fluid.program_guard(self.test_prog, startup_prog): with fluid.unique_name.guard(): self.test_inputs, self.test_outputs = self.build_net( mode='test') self.test_prog = self.test_prog.clone(for_test=True)
def default_optimizer(self, learning_rate, warmup_steps, warmup_start_lr, lr_decay_epochs, lr_decay_gamma, num_steps_each_epoch): if warmup_steps > lr_decay_epochs[0] * num_steps_each_epoch: logging.error( "In function train(), parameters should satisfy: warmup_steps <= lr_decay_epochs[0]*num_samples_in_train_dataset", exit=False) logging.error( "See this doc for more information: https://github.com/PaddlePaddle/PaddleX/blob/develop/docs/appendix/parameters.md#notice", exit=False) logging.error( "warmup_steps should less than {} or lr_decay_epochs[0] greater than {}, please modify 'lr_decay_epochs' or 'warmup_steps' in train function" .format(lr_decay_epochs[0] * num_steps_each_epoch, warmup_steps // num_steps_each_epoch)) boundaries = [b * num_steps_each_epoch for b in lr_decay_epochs] values = [(lr_decay_gamma**i) * learning_rate for i in range(len(lr_decay_epochs) + 1)] lr_decay = fluid.layers.piecewise_decay(boundaries=boundaries, values=values) lr_warmup = fluid.layers.linear_lr_warmup(learning_rate=lr_decay, warmup_steps=warmup_steps, start_lr=warmup_start_lr, end_lr=learning_rate) optimizer = fluid.optimizer.Momentum( learning_rate=lr_warmup, momentum=0.9, regularization=fluid.regularizer.L2Decay(1e-04)) return optimizer
def export_onnx_model(model, save_file, opset_version=10): if model.__class__.__name__ == "FastSCNN" or ( model.model_type == "detector" and model.__class__.__name__ != "YOLOv3"): logging.error( "Only image classifier models, detection models(YOLOv3) and semantic segmentation models(except FastSCNN) are supported to export to ONNX" ) try: import paddle2onnx except: logging.error( "You need to install paddle2onnx first, pip install paddle2onnx==0.4" ) import paddle2onnx as p2o if p2o.__version__ != '0.4': logging.error( "You need install paddle2onnx==0.4, but the version of paddle2onnx is {}" .format(p2o.__version__)) if opset_version == 10 and model.__class__.__name__ == "YOLOv3": logging.warning( "Export for openVINO by default, the output of multiclass_nms exported to onnx will contains background. If you need onnx completely consistent with paddle, please use paddle2onnx to export" ) p2o.register_op_mapper('multiclass_nms', MultiClassNMS4OpenVINO) p2o.program2onnx(model.test_prog, scope=model.scope, save_file=save_file, opset_version=opset_version)
def export_onnx_model(model, save_dir): if model.model_type == "detector" or model.__class__.__name__ == "FastSCNN": logging.error( "Only image classifier models and semantic segmentation models(except FastSCNN) are supported to export to ONNX" ) try: import x2paddle if x2paddle.__version__ < '0.7.4': logging.error("You need to upgrade x2paddle >= 0.7.4") except: logging.error( "You need to install x2paddle first, pip install x2paddle>=0.7.4") from x2paddle.op_mapper.paddle_op_mapper import PaddleOpMapper mapper = PaddleOpMapper() mapper.convert(model.test_prog, save_dir)
def main(): import os os.environ['CUDA_VISIBLE_DEVICES'] = "" import paddlex as pdx if len(sys.argv) < 2: print("Use command 'paddlex -h` to print the help information\n") return parser = arg_parser() args = parser.parse_args() if args.version: print("PaddleX-{}".format(pdx.__version__)) print("Repo: https://github.com/PaddlePaddle/PaddleX.git") print("Email: [email protected]") return if args.export_inference: assert args.model_dir is not None, "--model_dir should be defined while exporting Model model" assert args.save_dir is not None, "--save_dir should be defined to save Model model" fixed_input_shape = None if args.fixed_input_shape is not None: fixed_input_shape = eval(args.fixed_input_shape) assert len( fixed_input_shape ) == 2, "len of fixed input shape must == 2, such as [224,224]" else: fixed_input_shape = None model = pdx.load_model(args.model_dir, fixed_input_shape) model.export_inference_model(args.save_dir) if args.export_onnx: assert args.model_dir is not None, "--model_dir should be defined while exporting onnx model" assert args.save_dir is not None, "--save_dir should be defined to create onnx model" model = pdx.load_model(args.model_dir) if model.status == "Normal" or model.status == "Prune": logging.error( "Only support Model model, try to export model first as below,", exit=False) logging.error( "paddlex --export_inference --model_dir model_path --save_dir infer_model" ) save_file = os.path.join(args.save_dir, 'paddle2onnx_model.onnx') pdx.converter.export_onnx_model(model, save_file, args.onnx_opset) if args.data_conversion: assert args.source is not None, "--source should be defined while converting dataset" assert args.to is not None, "--to should be defined to confirm the taregt dataset format" assert args.pics is not None, "--pics should be defined to confirm the pictures path" assert args.annotations is not None, "--annotations should be defined to confirm the annotations path" assert args.save_dir is not None, "--save_dir should be defined to store taregt dataset" if args.source not in ['labelme', 'jingling', 'easydata']: logging.error( "The source format {} is not one of labelme/jingling/easydata". format(args.source), exit=False) if args.to not in ['PascalVOC', 'MSCOCO', 'SEG', 'ImageNet']: logging.error( "The to format {} is not one of PascalVOC/MSCOCO/SEG/ImageNet". format(args.to), exit=False) if args.source == 'labelme' and args.to == 'ImageNet': logging.error( "The labelme dataset can not convert to the ImageNet dataset.", exit=False) if args.source == 'jingling' and args.to == 'PascalVOC': logging.error( "The jingling dataset can not convert to the PascalVOC dataset.", exit=False) if not osp.exists(args.save_dir): os.makedirs(args.save_dir) pdx.tools.convert.dataset_conversion(args.source, args.to, args.pics, args.annotations, args.save_dir) if args.split_dataset: assert args.dataset_dir is not None, "--dataset_dir should be defined while spliting dataset" assert args.format is not None, "--format should be defined while spliting dataset" assert args.val_value is not None, "--val_value should be defined while spliting dataset" dataset_dir = args.dataset_dir dataset_format = args.format.lower() val_value = float(args.val_value) test_value = float(args.test_value if args.test_value is not None else 0) save_dir = dataset_dir if not dataset_format in ["coco", "imagenet", "voc", "seg"]: logging.error( "The dataset format is not correct defined.(support COCO/ImageNet/VOC/Seg)" ) if not osp.exists(dataset_dir): logging.error("The path of dataset to be splited doesn't exist.") if val_value <= 0 or val_value >= 1 or test_value < 0 or test_value >= 1 or val_value + test_value >= 1: logging.error("The value of split is not correct.") pdx.tools.split.dataset_split(dataset_dir, dataset_format, val_value, test_value, save_dir)
def load_model(model_dir, fixed_input_shape=None): model_scope = fluid.Scope() if not osp.exists(model_dir): logging.error("model_dir '{}' is not exists!".format(model_dir)) if not osp.exists(osp.join(model_dir, "model.yml")): raise Exception("There's not model.yml in {}".format(model_dir)) with open(osp.join(model_dir, "model.yml")) as f: info = yaml.load(f.read(), Loader=yaml.Loader) if 'status' in info: status = info['status'] elif 'save_method' in info: # 兼容老版本PaddleX status = info['save_method'] if not hasattr(paddlex.cv.models, info['Model']): raise Exception("There's no attribute {} in paddlex.cv.models".format( info['Model'])) if 'model_name' in info['_init_params']: del info['_init_params']['model_name'] model = getattr(paddlex.cv.models, info['Model'])(**info['_init_params']) model.fixed_input_shape = fixed_input_shape if '_Attributes' in info: if 'fixed_input_shape' in info['_Attributes']: fixed_input_shape = info['_Attributes']['fixed_input_shape'] if fixed_input_shape is not None: logging.info( "Model already has fixed_input_shape with {}".format( fixed_input_shape)) model.fixed_input_shape = fixed_input_shape else: info['_Attributes'][ 'fixed_input_shape'] = model.fixed_input_shape if info['Model'].count('RCNN') > 0: if info['_init_params']['with_fpn']: if model.fixed_input_shape is not None: if model.fixed_input_shape[0] % 32 > 0: raise Exception( "The first value in fixed_input_shape must be a multiple of 32, but recieved {}." .format(model.fixed_input_shape[0])) if model.fixed_input_shape[1] % 32 > 0: raise Exception( "The second value in fixed_input_shape must be a multiple of 32, but recieved {}." .format(model.fixed_input_shape[1])) with fluid.scope_guard(model_scope): if status == "Normal" or \ status == "Prune" or status == "fluid.save": startup_prog = fluid.Program() model.test_prog = fluid.Program() with fluid.program_guard(model.test_prog, startup_prog): with fluid.unique_name.guard(): model.test_inputs, model.test_outputs = model.build_net( mode='test') model.test_prog = model.test_prog.clone(for_test=True) model.exe.run(startup_prog) if status == "Prune": from .slim.prune import update_program model.test_prog = update_program(model.test_prog, model_dir, model.places[0], scope=model_scope) import pickle with open(osp.join(model_dir, 'model.pdparams'), 'rb') as f: load_dict = pickle.load(f) fluid.io.set_program_state(model.test_prog, load_dict) elif status == "Infer" or \ status == "Quant" or status == "fluid.save_inference_model": [prog, input_names, outputs ] = fluid.io.load_inference_model(model_dir, model.exe, params_filename='__params__') model.test_prog = prog test_outputs_info = info['_ModelInputsOutputs']['test_outputs'] model.test_inputs = OrderedDict() model.test_outputs = OrderedDict() for name in input_names: model.test_inputs[name] = model.test_prog.global_block().var( name) for i, out in enumerate(outputs): var_desc = test_outputs_info[i] model.test_outputs[var_desc[0]] = out if 'Transforms' in info: transforms_mode = info.get('TransformsMode', 'RGB') # 固定模型的输入shape fix_input_shape(info, fixed_input_shape=model.fixed_input_shape) if transforms_mode == 'RGB': to_rgb = True else: to_rgb = False if 'BatchTransforms' in info: # 兼容老版本PaddleX模型 model.test_transforms = build_transforms_v1( model.model_type, info['Transforms'], info['BatchTransforms']) model.eval_transforms = copy.deepcopy(model.test_transforms) else: model.test_transforms = build_transforms(model.model_type, info['Transforms'], to_rgb) model.eval_transforms = copy.deepcopy(model.test_transforms) if '_Attributes' in info: for k, v in info['_Attributes'].items(): if k in model.__dict__: model.__dict__[k] = v logging.info("Model[{}] loaded.".format(info['Model'])) model.scope = model_scope model.trainable = False model.status = status return model
def split_voc_dataset(dataset_dir, val_percent, test_percent, save_dir): if not osp.exists(osp.join(dataset_dir, "JPEGImages")): logging.error("\'JPEGImages\' is not found in {}!".format(dataset_dir)) if not osp.exists(osp.join(dataset_dir, "Annotations")): logging.error("\'Annotations\' is not found in {}!".format( dataset_dir)) all_image_files = list_files(osp.join(dataset_dir, "JPEGImages")) image_anno_list = list() label_list = list() for image_file in all_image_files: if not is_pic(image_file): continue anno_name = replace_ext(image_file, "xml") if osp.exists(osp.join(dataset_dir, "Annotations", anno_name)): image_anno_list.append([image_file, anno_name]) try: tree = ET.parse( osp.join(dataset_dir, "Annotations", anno_name)) except: raise Exception("文件{}不是一个良构的xml文件,请检查标注文件".format( osp.join(dataset_dir, "Annotations", anno_name))) objs = tree.findall("object") for i, obj in enumerate(objs): cname = obj.find('name').text if not cname in label_list: label_list.append(cname) else: logging.error("The annotation file {} doesn't exist!".format( anno_name)) random.shuffle(image_anno_list) image_num = len(image_anno_list) val_num = int(image_num * val_percent) test_num = int(image_num * test_percent) train_num = image_num - val_num - test_num train_image_anno_list = image_anno_list[:train_num] val_image_anno_list = image_anno_list[train_num:train_num + val_num] test_image_anno_list = image_anno_list[train_num + val_num:] with open( osp.join(save_dir, 'train_list.txt'), mode='w', encoding='utf-8') as f: for x in train_image_anno_list: file = osp.join("JPEGImages", x[0]) label = osp.join("Annotations", x[1]) f.write('{} {}\n'.format(file, label)) with open( osp.join(save_dir, 'val_list.txt'), mode='w', encoding='utf-8') as f: for x in val_image_anno_list: file = osp.join("JPEGImages", x[0]) label = osp.join("Annotations", x[1]) f.write('{} {}\n'.format(file, label)) if len(test_image_anno_list): with open( osp.join(save_dir, 'test_list.txt'), mode='w', encoding='utf-8') as f: for x in test_image_anno_list: file = osp.join("JPEGImages", x[0]) label = osp.join("Annotations", x[1]) f.write('{} {}\n'.format(file, label)) with open( osp.join(save_dir, 'labels.txt'), mode='w', encoding='utf-8') as f: for l in sorted(label_list): f.write('{}\n'.format(l)) return train_num, val_num, test_num
def get_pretrain_weights(flag, class_name, backbone, save_dir): if flag is None: return None elif osp.isdir(flag): return flag elif osp.isfile(flag): return flag warning_info = "{} does not support to be finetuned with weights pretrained on the {} dataset, so pretrain_weights is forced to be set to {}" if flag == 'COCO': if class_name == 'DeepLabv3p' and backbone in [ 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0', 'MobileNetV3_large_x1_0_ssld' ]: model_name = '{}_{}'.format(class_name, backbone) logging.warning(warning_info.format(model_name, flag, 'IMAGENET')) flag = 'IMAGENET' elif class_name == 'HRNet': logging.warning(warning_info.format(class_name, flag, 'IMAGENET')) flag = 'IMAGENET' elif class_name == 'FastSCNN': logging.warning(warning_info.format(class_name, flag, 'CITYSCAPES')) flag = 'CITYSCAPES' elif flag == 'CITYSCAPES': model_name = '{}_{}'.format(class_name, backbone) if class_name == 'UNet': logging.warning(warning_info.format(class_name, flag, 'COCO')) flag = 'COCO' if class_name == 'HRNet' and backbone.split('_')[-1] in [ 'W30', 'W32', 'W40', 'W48', 'W60', 'W64' ]: logging.warning(warning_info.format(backbone, flag, 'IMAGENET')) flag = 'IMAGENET' if class_name == 'DeepLabv3p' and backbone in [ 'Xception41', 'MobileNetV2_x0.25', 'MobileNetV2_x0.5', 'MobileNetV2_x1.5', 'MobileNetV2_x2.0' ]: model_name = '{}_{}'.format(class_name, backbone) logging.warning(warning_info.format(model_name, flag, 'IMAGENET')) flag = 'IMAGENET' elif flag == 'IMAGENET': if class_name == 'UNet': logging.warning(warning_info.format(class_name, flag, 'COCO')) flag = 'COCO' elif class_name == 'FastSCNN': logging.warning(warning_info.format(class_name, flag, 'CITYSCAPES')) flag = 'CITYSCAPES' elif flag == 'BAIDU10W': if class_name not in ['ResNet50_vd']: raise Exception( "Only the classifier ResNet50_vd supports BAIDU10W pretrained weights" ) if flag == 'IMAGENET': new_save_dir = save_dir if hasattr(paddlex, 'pretrain_dir'): new_save_dir = paddlex.pretrain_dir if backbone.startswith('Xception'): backbone = 'Seg{}'.format(backbone) elif backbone == 'MobileNetV2': backbone = 'MobileNetV2_x1.0' elif backbone == 'MobileNetV3_small_ssld': backbone = 'MobileNetV3_small_x1_0_ssld' elif backbone == 'MobileNetV3_large_ssld': backbone = 'MobileNetV3_large_x1_0_ssld' if class_name in ['YOLOv3', 'FasterRCNN', 'MaskRCNN']: if backbone == 'ResNet50': backbone = 'DetResNet50' assert backbone in image_pretrain, "There is not ImageNet pretrain weights for {}, you may try COCO.".format( backbone) if getattr(paddlex, 'gui_mode', False): url = image_pretrain[backbone] fname = osp.split(url)[-1].split('.')[0] paddlex.utils.download_and_decompress(url, path=new_save_dir) return osp.join(new_save_dir, fname) import paddlehub as hub try: logging.info( "Connecting PaddleHub server to get pretrain weights...") hub.download(backbone, save_path=new_save_dir) except Exception as e: logging.error( "Couldn't download pretrain weight, you can download it manualy from {} (decompress the file if it is a compressed file), and set pretrain weights by your self" .format(image_pretrain[backbone]), exit=False) if isinstance(e, hub.ResourceNotFoundError): raise Exception( "Resource for backbone {} not found".format(backbone)) elif isinstance(e, hub.ServerConnectionError): raise Exception( "Cannot get reource for backbone {}, please check your internet connection" .format(backbone)) else: raise Exception( "Unexpected error, please make sure paddlehub >= 1.6.2") return osp.join(new_save_dir, backbone) elif flag in ['COCO', 'CITYSCAPES']: new_save_dir = save_dir if hasattr(paddlex, 'pretrain_dir'): new_save_dir = paddlex.pretrain_dir if class_name in [ 'YOLOv3', 'FasterRCNN', 'MaskRCNN', 'DeepLabv3p', 'PPYOLO' ]: backbone = '{}_{}'.format(class_name, backbone) backbone = "{}_{}".format(backbone, flag) if flag == 'COCO': url = coco_pretrain[backbone] elif flag == 'CITYSCAPES': url = cityscapes_pretrain[backbone] fname = osp.split(url)[-1].split('.')[0] if getattr(paddlex, 'gui_mode', False): paddlex.utils.download_and_decompress(url, path=new_save_dir) return osp.join(new_save_dir, fname) import paddlehub as hub try: logging.info( "Connecting PaddleHub server to get pretrain weights...") hub.download(backbone, save_path=new_save_dir) except Exception as e: logging.error( "Couldn't download pretrain weight, you can download it manualy from {} (decompress the file if it is a compressed file), and set pretrain weights by your self" .format(url), exit=False) if isinstance(e, hub.ResourceNotFoundError): raise Exception( "Resource for backbone {} not found".format(backbone)) elif isinstance(e, hub.ServerConnectionError): raise Exception( "Cannot get reource for backbone {}, please check your internet connection" .format(backbone)) else: raise Exception( "Unexpected error, please make sure paddlehub >= 1.6.2") return osp.join(new_save_dir, backbone) elif flag == 'BAIDU10W': new_save_dir = save_dir if hasattr(paddlex, 'pretrain_dir'): new_save_dir = paddlex.pretrain_dir backbone = backbone + '_BAIDU10W' url = baidu10w_pretrain[backbone] fname = osp.split(url)[-1].split('.')[0] if getattr(paddlex, 'gui_mode', False): paddlex.utils.download_and_decompress(url, path=new_save_dir) return osp.join(new_save_dir, fname) import paddlehub as hub try: logging.info( "Connecting PaddleHub server to get pretrain weights...") hub.download(backbone, save_path=new_save_dir) except Exception as e: logging.error( "Couldn't download pretrain weight, you can download it manualy from {} (decompress the file if it is a compressed file), and set pretrain weights by your self" .format(url), exit=False) if isinstance(e, hub.ResourceNotFoundError): raise Exception( "Resource for backbone {} not found".format(backbone)) elif isinstance(e, hub.ServerConnectionError): raise Exception( "Cannot get reource for backbone {}, please check your internet connection" .format(backbone)) else: raise Exception( "Unexpected error, please make sure paddlehub >= 1.6.2") return osp.join(new_save_dir, backbone) else: logging.error( "Path of retrain weights '{}' is not exists!".format(flag))