def __init__(self): self.module = hub.Module(name="lac")
def get_task(args, schema_labels, id): # 加载PaddleHub 预训练模型ERNIE Tiny/RoBERTa large # 更多预训练模型 https://www.paddlepaddle.org.cn/hublist?filter=en_category&value=SemanticModel # model_name = "ernie_tiny" model_name = "chinese-roberta-wwm-ext-large" module = hub.Module(name=model_name) inputs, outputs, program = module.context(trainable=True, max_seq_len=args.max_seq_len) # 加载数据并通过SequenceLabelReader读取数据 dataset = EEDataset(args.data_dir, schema_labels, model=args.do_model) reader = SequenceLabelReader(dataset=dataset, vocab_path=module.get_vocab_path(), max_seq_len=args.max_seq_len, sp_model_path=module.get_spm_path(), word_dict_path=module.get_word_dict_path()) # 构建序列标注任务迁移网络 # 使用ERNIE模型字级别的输出sequence_output作为迁移网络的输入 sequence_output = outputs["sequence_output"] # sequence_output = fluid.layers.dropout( # x=sequence_output , # dropout_prob=args.dropout, # dropout_implementation="upscale_in_train") # 设置模型program需要输入的变量feed_list # 必须按照以下顺序设置 feed_list = [ inputs["input_ids"].name, inputs["position_ids"].name, inputs["segment_ids"].name, inputs["input_mask"].name ] # 选择优化策略 strategy = hub.AdamWeightDecayStrategy( warmup_proportion=args.warmup_proportion, weight_decay=args.weight_decay, learning_rate=args.learning_rate) # 配置运行设置 config = hub.RunConfig( log_interval=100, eval_interval=args.eval_step, save_ckpt_interval=args.model_save_step, use_data_parallel=args.use_data_parallel, use_cuda=args.use_gpu, # enable_memory_optim=True, num_epoch=args.num_epoch, batch_size=args.batch_size, checkpoint_dir=args.checkpoint_dir, strategy=strategy) # 构建序列标注迁移任务 seq_label_task = hub.SequenceLabelTask(data_reader=reader, feature=sequence_output, feed_list=feed_list, max_seq_len=args.max_seq_len, num_classes=dataset.num_labels, config=config, add_crf=args.add_crf) seq_label_task.main_program.random_seed = args.random_seed add_hook(args, seq_label_task, id) return seq_label_task, reader
import paddle.fluid as fluid import paddlehub as hub module = hub.Module(name="ernie") inputs, outputs, program = module.context(trainable="True", max_seq_len=128) inputs, outputs, program = module.context(trainable="True", max_seq_len=128) pooled_output = outputs["pooled_output"] sequence_output = outputs["sequence_output"] ds = hub.dataset.ChnSentiCorp() reader = hub.reader.ClassifyReader(dataset=ds, vocab_path=module.get_vocab_path(), max_seq_len=128) ds = hub.dataset.ChnSentiCorp() for e in ds.get_train_examples(): print(e.text_a, e.label) strategy = hub.AdamWeightDecayStrategy(learning_rate=1e-4, lr_scheduler="linear_decay", warmup_proportion=0.0, weight_decay=0.01) config = hub.RunConfig(use_cuda=False, num_epoch=3, batch_size=32, strategy=strategy) feed_list = [ inputs["input_ids"].name, inputs["position_ids"].name,
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.") parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning, input should be True or False") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.") parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.") parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") args = parser.parse_args() # yapf: enable. if __name__ == '__main__': # Load Paddlehub ERNIE 2.0 pretrained model module = hub.Module(name="ernie_v2_eng_base") inputs, outputs, program = module.context(trainable=True, max_seq_len=args.max_seq_len) # Use the appropriate tokenizer to preprocess the data set # For ernie_tiny, it will do word segmentation to get subword. More details: https://www.jiqizhixin.com/articles/2019-11-06-9 if module.name == "ernie_tiny": tokenizer = hub.ErnieTinyTokenizer( vocab_file=module.get_vocab_path(), spm_path=module.get_spm_path(), word_dict_path=module.get_word_dict_path()) else: tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path()) dataset = hub.dataset.GLUE("STS-B", tokenizer=tokenizer,
# yapf: enable. class TestDataset(hub.dataset.GLUE): def get_train_examples(self): return self.train_examples[:800] def get_dev_examples(self): return self.dev_examples[:50] def get_test_examples(self): return self.test_examples[:50] if __name__ == '__main__': module = hub.Module(name="bert_cased_L-24_H-1024_A-16") inputs, outputs, program = module.context( trainable=True, max_seq_len=args.max_seq_len) # metric should be acc, f1 or matthews dataset = TestDataset() metrics_choices = ["acc"] reader = hub.reader.ClassifyReader( dataset=dataset, vocab_path=module.get_vocab_path(), max_seq_len=args.max_seq_len, sp_model_path=module.get_spm_path(), word_dict_path=module.get_word_dict_path()) # Construct transfer learning network
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy") parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.") parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.") parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") args = parser.parse_args() # yapf: enable. if __name__ == '__main__': # Load Paddlehub ERNIE pretrained model module = hub.Module(name="ernie_tiny") inputs, outputs, program = module.context(trainable=True, max_seq_len=args.max_seq_len) # Download dataset and use SequenceLabelReader to read dataset dataset = hub.dataset.MSRA_NER() reader = hub.reader.SequenceLabelReader( dataset=dataset, vocab_path=module.get_vocab_path(), max_seq_len=args.max_seq_len, sp_model_path=module.get_spm_path(), word_dict_path=module.get_word_dict_path()) # Construct transfer learning network # Use "sequence_output" for token-level output. sequence_output = outputs["sequence_output"]
def main(): # Load Paddlehub pretrained model # 更多预训练模型 https://www.paddlepaddle.org.cn/hublist?filter=en_category&value=SemanticModel model_name = "ernie_tiny" #model_name = "chinese-roberta-wwm-ext-large" module = hub.Module(name=model_name) inputs, outputs, program = module.context( trainable=True, max_seq_len=args.max_seq_len) # Download dataset and use SequenceLabelReader to read dataset dataset = EEDataset(args.data_dir, schema_labels, model=args.do_model) reader = hub.reader.SequenceLabelReader( dataset=dataset, vocab_path=module.get_vocab_path(), max_seq_len=args.max_seq_len, sp_model_path=module.get_spm_path(), word_dict_path=module.get_word_dict_path()) # Construct transfer learning network # Use "sequence_output" for token-level output. sequence_output = outputs["sequence_output"] # Setup feed list for data feeder # Must feed all the tensor of module need feed_list = [ inputs["input_ids"].name, inputs["position_ids"].name, inputs["segment_ids"].name, inputs["input_mask"].name ] # Select a finetune strategy strategy = hub.AdamWeightDecayStrategy( warmup_proportion=args.warmup_proportion, weight_decay=args.weight_decay, learning_rate=args.learning_rate) # Setup runing config for PaddleHub Finetune API config = hub.RunConfig( eval_interval=args.eval_step, save_ckpt_interval=args.model_save_step, use_data_parallel=args.use_data_parallel, use_cuda=args.use_gpu, num_epoch=args.num_epoch, batch_size=args.batch_size, checkpoint_dir=args.checkpoint_dir, strategy=strategy) # Define a sequence labeling finetune task by PaddleHub's API # If add crf, the network use crf as decoder seq_label_task = hub.SequenceLabelTask( data_reader=reader, feature=sequence_output, feed_list=feed_list, max_seq_len=args.max_seq_len, num_classes=dataset.num_labels, config=config, add_crf=args.add_crf) # Finetune and evaluate model by PaddleHub's API # will finish training, evaluation, testing, save model automatically if args.do_train: print("start finetune and eval process") seq_label_task.finetune_and_eval() if args.do_predict: print("start predict process") ret = [] id2label = {val: key for key, val in reader.label_map.items()} input_data = [[d] for d in predict_data] run_states = seq_label_task.predict(data=input_data[1:]) results = [] for batch_states in run_states: batch_results = batch_states.run_results batch_infers = batch_results[0].reshape([-1]).astype(np.int32).tolist() seq_lens = batch_results[1].reshape([-1]).astype(np.int32).tolist() current_id = 0 for length in seq_lens: seq_infers = batch_infers[current_id:current_id + length] seq_result = list(map(id2label.get, seq_infers[1: -1])) current_id += length if args.add_crf else args.max_seq_len results.append(seq_result) ret = [] for sent, r_label in zip(predict_sents, results): sent["labels"] = r_label ret.append(json.dumps(sent, ensure_ascii=False)) write_by_lines("{}.{}.pred".format(args.predict_data, args.do_model), ret)
def setUpClass(self): """Prepare the environment once before execution of all tests.\n""" self.classifier = hub.Module(name='se_resnet18_vd_imagenet')
parser.add_argument("--batch_size", type=int, default=1, help="Total examples' number in batch for training.") parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.") parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False") parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.") parser.add_argument("--dataset", type=str, default="chnsenticorp", help="The choice of dataset") parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.") args = parser.parse_args() # yapf: enable. if __name__ == '__main__': dataset = None metrics_choices = [] # Download dataset and use ClassifyReader to read dataset if args.dataset.lower() == "chnsenticorp": dataset = hub.dataset.ChnSentiCorp() module = hub.Module(name="ernie_tiny") metrics_choices = ["acc"] elif args.dataset.lower() == "tnews": dataset = hub.dataset.TNews() module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16") metrics_choices = ["acc"] elif args.dataset.lower() == "nlpcc_dbqa": dataset = hub.dataset.NLPCC_DBQA() module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16") metrics_choices = ["acc"] elif args.dataset.lower() == "lcqmc": dataset = hub.dataset.LCQMC() module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16") metrics_choices = ["acc"] elif args.dataset.lower() == 'inews': dataset = hub.dataset.INews()
type=int, default=16, help="Total examples' number in batch for training.") parser.add_argument("--checkpoint_dir", type=str, default='./checkpoint', help="Directory to model checkpoint") parser.add_argument("--save_interval", type=int, default=10, help="Save checkpoint every n epoch.") args = parser.parse_args() if __name__ == "__main__": model = hub.Module(name='panns_cnn14', task='sound-cls', num_class=ESC50.num_class) train_dataset = ESC50(mode='train') dev_dataset = ESC50(mode='dev') optimizer = paddle.optimizer.AdamW(learning_rate=args.learning_rate, parameters=model.parameters()) trainer = hub.Trainer(model, optimizer, checkpoint_dir=args.checkpoint_dir, use_gpu=args.use_gpu) trainer.train( train_dataset, epochs=args.num_epoch,
import paddlehub as hub # 首先准备好我们要进行分词的素材 raw_data = [["你觉得明天是个晴天吗", "我看还是下雨的可能性大"], ["中国哪家公司的人工智能最牛呢"], ["我在山上看见爱因斯坦"], ["我把车把一把把住了"]] # 然后直接调用 PaddleHub 中现成的分词模型 LAC lac = hub.Module(name="lac") for texts in raw_data: # 每一次取一个列表中的 元素,这个 元素 是个 字符串 的 列表 results = lac.lexical_analysis(texts=texts, use_gpu=False, batch_size=1) # lexical_analysis(texts=[], data={}, use_gpu=False, batch_size=1, user_dict=None, return_tag=True) # lac预测接口,预测输入句子的分词结果 # texts(list): 待预测数据,如果使用texts参数,则不用传入data参数,二选一即可 # data(dict): 预测数据,key必须为text,value是带预测数据。如果使用data参数,则不用传入texts参数,二选一即可。 # 建议使用texts参数,data参数后续会废弃。 # use_gpu(bool): 是否使用GPU预测 # batch_size(int): 批处理大小 # user_dict(None): 该参数不推荐使用,请在使用lexical_analysis()方法之前调用set_user_dict()方法设置自定义词典 # return_tag(bool): 预测结果是否需要返回分词标签结果 # 返回结果:results(list): 分词结果是个列表 for result in results: # 取得结果列表中的一个元素 print(result) # 这里 单个分词 的结果是个字典,其中两个key,一个是分词结果 "word",一个是词性标注 "tag"
import paddlehub as hub import cv2 import numpy as np import math import CVTools face_landmark = hub.Module(name="face_landmark_localization") def landmark_dec_fun(img_src): # img_gray = cv2.cvtColor(img_src, cv2.COLOR_BGR2GRAY) # land_marks = [] # # rects = detector(img_gray, 0) # for i in range(len(rects)): # # land_marks.append(land_marks_node) results = face_landmark.keypoint_detection( images=[img_src], paths=None, batch_size=1, use_gpu=False, output_dir='face_landmark_output', visualization=False) # print('emoi baidu landmark',len(results),len(results[0])) for result in results: # one result for one pic # print(len(result['data'])) land_marks.append(result['data']) return land_marks[0] #one pic for one element
def setUpClass(self): """Prepare the environment once before execution of all tests.\n""" self.human_seg = hub.Module(name="humanseg_lite")
#!/usr/bin/env python3 # -*- coding: utf-8 -*- import os, paddlehub as hub # 把图片中的人物给抠出来 # 参考 https://mp.weixin.qq.com/s/0K1YiR_tCnfg65ZoN8QUqQ huseg = hub.Module(name='deeplabv3p_xception65_humanseg') # 加载模型 path = './imgs/' # 文件目录 files = [path + i for i in os.listdir(path)] # 获取文件列表 print("=====================================") files.remove('./imgs/.DS_Store') print(files) # results = huseg.segmentation(data={'image': files}) # 抠图 yxy:原是这样的,运行发现没有保存,查看api发现visualization=False results = huseg.segmentation(data={'image': files}, visualization=True) # or # results = huseg.segmentation(paths=files,visualization=True) # 抠图
import paddle import paddlehub as hub if __name__ == '__main__': model = hub.Module(name='ocrnet_hrnetw18_voc', num_classes=2, pretrained='/PATH/TO/CHECKPOINT') model.predict(images=["N0007.jpg"], visualization=True)
default="/home/aistudio/test/video", help="视频存放路径") parser.add_argument("-o", "--output", type=str, default="/home/aistudio/test/frame", help="结果帧存放路径") parser.add_argument("-m", "--model", type=str, default="/home/aistudio/plane/gear/output/yolov3/epoch_20", help="起落架检测模型路径") parser.add_argument("--itv", type=int, default=8, help="人进入起落架区域,抽帧间隔") args = parser.parse_args() people_det = hub.Module(name="yolov3_resnet50_vd_coco2017") flg_det = pdx.load_model(args.model) transforms = transforms.Compose([transforms.Resize(), transforms.Normalize()]) # 坐标的顺序是按照crop时下标的顺序,坐标第一个就是下标第一维,cv2里面的应该和这个是反的 def toint(l): return [int(x) for x in l] def crop(img, p, mode="max"): if mode == "max": return img[p[0]:p[2], p[1]:p[3], :] elif mode == "length": p = toint([p[0], p[1], p[0] + p[2], p[1] + p[3]])
def install_module(self, module_name=None, module_dir=None, module_package=None, module_version=None, upgrade=False, extra=None): md5_value = installed_module_version = None from_user_dir = True if module_dir else False with tmp_dir() as _dir: if module_name: self.all_modules(update=True) module_info = self.modules_dict.get(module_name, None) if module_info: if not module_version or module_version == self.modules_dict[ module_name][1]: module_dir = self.modules_dict[module_name][0] module_tag = module_name if not module_version else '%s-%s' % ( module_name, module_version) tips = "Module %s already installed in %s" % ( module_tag, module_dir) return True, tips, self.modules_dict[module_name] search_result = hub.HubServer().get_module_url( module_name, version=module_version, extra=extra) name = search_result.get('name', None) url = search_result.get('url', None) md5_value = search_result.get('md5', None) installed_module_version = search_result.get('version', None) if not url or (module_version is not None and installed_module_version != module_version ) or (name != module_name): if hub.HubServer()._server_check() is False: tips = "Request Hub-Server unsuccessfully, please check your network." return False, tips, None module_versions_info = hub.HubServer().search_module_info( module_name) if module_versions_info is not None and len( module_versions_info) > 0: if utils.is_windows(): placeholders = [20, 8, 14, 14] else: placeholders = [30, 8, 16, 16] tp = TablePrinter(titles=[ "ResourceName", "Version", "PaddlePaddle", "PaddleHub" ], placeholders=placeholders) module_versions_info.sort( key=cmp_to_key(utils.sort_version_key)) for resource_name, resource_version, paddle_version, \ hub_version in module_versions_info: colors = ["yellow", None, None, None] tp.add_line(contents=[ resource_name, resource_version, utils.strflist_version(paddle_version), utils.strflist_version(hub_version) ], colors=colors) tips = "The version of PaddlePaddle or PaddleHub " \ "can not match module, please upgrade your " \ "PaddlePaddle or PaddleHub according to the form " \ "below." + tp.get_text() else: tips = "Can't find module %s" % module_name if module_version: tips += " with version %s" % module_version return False, tips, None result, tips, module_zip_file = default_downloader.download_file( url=url, save_path=_dir, save_name=module_name, replace=True, print_progress=True) result, tips, module_dir = default_downloader.uncompress( file=module_zip_file, dirname=MODULE_HOME, delete_file=True, print_progress=True) if module_package: with tarfile.open(module_package, "r:gz") as tar: file_names = tar.getnames() size = len(file_names) - 1 module_dir = os.path.join(_dir, file_names[0]) for index, file_name in enumerate(file_names): tar.extract(file_name, _dir) if module_dir: if not module_name: module_name = hub.Module(directory=module_dir).name self.all_modules(update=False) module_info = self.modules_dict.get(module_name, None) if module_info: module_dir = self.modules_dict[module_name][0] module_tag = module_name if not module_version else '%s-%s' % ( module_name, module_version) tips = "Module %s already installed in %s" % (module_tag, module_dir) return True, tips, self.modules_dict[module_name] if module_dir: if md5_value: with open(os.path.join(MODULE_HOME, module_dir, "md5.txt"), "w") as fp: fp.write(md5_value) save_path = os.path.join(MODULE_HOME, module_name) if os.path.exists(save_path): shutil.rmtree(save_path) if from_user_dir: shutil.copytree(module_dir, save_path) else: shutil.move(module_dir, save_path) module_dir = save_path tips = "Successfully installed %s" % module_name if installed_module_version: tips += "-%s" % installed_module_version return True, tips, (module_dir, installed_module_version) tips = "Download %s-%s failed" % (module_name, module_version) return False, tips, module_dir
for face_num in range(face_nums): # face_0 dirname = 'face_{}'.format(face_num) # ./train_face/filuudleua_0/FAKE/face_0 ./train_face/filuudleua_0/FAKE/face_1 facedirname = os.path.join(faceFileFullDir + '/', dirname) # 创建对应的目录 if not os.path.isdir(facedirname): # 如果不存在该目录,则创建目录 os.makedirs(facedirname) face = facelist[face_num] faceFullName = os.path.join(facedirname + '/', frameFile) print(faceFullName) cv2.imwrite(faceFullName, face) if __name__ == '__main__': face_detector_big = hub.Module( name="ultra_light_fast_generic_face_detector_1mb_640") # frameImageDir = "/home/aistudio/work/Frame_data/filuudleua_0/FAKE/0_123.jpg" # face_list = DetectFace(face_detector_big, frameImageDir) # print(face_list[0].shape) # print(type(face_list[0])) frameImageDir = '/home/aistudio/work/Frame_data/' train_faceImageDir = '/home/aistudio/work/train_face/' validate_faceImageDir = '/home/aistudio/work/validate_face/' Saver(face_detector_big, frameImageDir, train_faceImageDir, validate_faceImageDir, threshold=0.9)
def __init__(self): self.module = hub.Module( name="ultra_light_fast_generic_face_detector_1mb_640") self.alpha = 0.75 self.start_flag = 1
def setUpClass(self): """Prepare the environment once before execution of all tests.""" self.yolov3_pedestrian_detect = hub.Module( name="yolov3_darknet53_pedestrian")
def file_ocr(Input_path): # 功能:船名识别 # 说明:ocr(输入文件夹地址) input_path = DeepCopy(Input_path) if 1 == 1: # 自定义模型导入(自己训练的模型) ocr = PaddleOCR(det_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/ch_ppocr_server_v1.1_det_infer', rec_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/rec_crnn', rec_char_dict_path=os.path.abspath(os.path.dirname(__file__))+'/modules/ppocr_keys_v1.txt', cls_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/ch_ppocr_mobile_v1.1_cls_infer', use_angle_cls=True) # 导入提供的模型 #ocr = PaddleOCR(det_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/ch_ppocr_server_v1.1_det_infer', rec_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/ch_ppocr_server_v1.1_rec_infer', rec_char_dict_path=os.path.abspath(os.path.dirname(__file__))+'/modules/ppocr_keys_v1.txt', cls_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/ch_ppocr_mobile_v1.1_cls_infer', use_angle_cls=True) ocr_result_list = [] files = os.listdir(input_path) i = 0 for file in files: # 遍历文件夹 if (file[-3:] == 'jpg'): i = i + 1 if (i > 0): print("正在识别第" + str(i) + "张图片...") results = ocr.ocr(input_path + file, cls=True) for line in results: print(line) ocr_result = [] if len(results) != 0: # 判断是否有识别结果 rec_name = '' confidence_sum = 0 for j in range(len(results)): # 所有识别框拼接 rec_name = rec_name + results[j][1][0] # 识别结果可能有倒序 rec_name = reverse_name(rec_name) confidence_sum = confidence_sum + results[j][1][1] confidence = confidence_sum / len(results) # 平均置信度 ocr_result.append(file[:-4]) # 编号 ocr_result.append(rec_name) # 识别结果 ocr_result.append(confidence) # 置信度 ocr_result_list.append(ocr_result) else: ocr_result.append(file[:-4]) ocr_result.append('未能识别') ocr_result.append(0) ocr_result_list.append(ocr_result) if 1==0: # 现有模型导入 ocr = hub.Module(name="chinese_ocr_db_crnn_server") ocr_result_list = [] files = os.listdir(input_path) i = 0 for file in files: # 遍历文件夹 if (file[-3:] == 'jpg'): i = i + 1 if (i > 0): print("正在识别第" + str(i) + "张图片...") results = ocr.recognize_text(paths=[input_path + file], visualization=True) ocr_result = [] if len(results[0]['data']) != 0: # 判断是否有识别结果 rec_name = '' confidence_sum = 0 for j in range(len(results[0]['data'])): # 所有识别框拼接 rec_name = rec_name + results[0]['data'][j]['text'] # 识别结果可能有倒序 rec_name = reverse_name(rec_name) confidence_sum = confidence_sum + results[0]['data'][j]['confidence'] confidence = confidence_sum / len(results[0]['data']) # 平均置信度 ocr_result.append(file[:-4]) # 编号 ocr_result.append(rec_name) # 识别结果 ocr_result.append(confidence) # 置信度 ocr_result_list.append(ocr_result) else: ocr_result.append(file[:-4]) ocr_result.append('未能识别') ocr_result.append(0) ocr_result_list.append(ocr_result) return ocr_result_list
parser = argparse.ArgumentParser(__doc__) parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.") parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False") parser.add_argument("--learning_rate", type=float, default=3e-5, help="Learning rate used to train with warmup.") parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.") parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy") parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint") parser.add_argument("--max_seq_len", type=int, default=384, help="Number of words of the longest seqence.") parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.") parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=True, help="Whether use data parallel.") args = parser.parse_args() # yapf: enable. if __name__ == '__main__': # Load Paddlehub BERT pretrained model module = hub.Module(name="bert_uncased_L-12_H-768_A-12") inputs, outputs, program = module.context(trainable=True, max_seq_len=args.max_seq_len) # Download dataset and use ReadingComprehensionReader to read dataset # If you wanna load SQuAD 2.0 dataset, just set version_2_with_negative as True dataset = hub.dataset.SQUAD(version_2_with_negative=False) # dataset = hub.dataset.SQUAD(version_2_with_negative=True) reader = hub.reader.ReadingComprehensionReader( dataset=dataset, vocab_path=module.get_vocab_path(), max_seq_len=args.max_seq_len, doc_stride=128, max_query_length=64)
import cv2 import paddlehub as hub word_img = cv2.imread("word3.jpg") word_img2 = cv2.imread("word4.jpg") ocr = hub.Module(name="chinese_ocr_db_crnn_server") result = ocr.recognize_text(images=[word_img2],visualization = True) print(result) word_list = result[0]["data"] print(len(word_list)) for i in range(len(word_list)): word = word_list[i]["text"] print(word)
# coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddlehub as hub module = hub.Module(name="bert_chinese_L-12_H-768_A-12") print(module.get_embedding(texts=[["床前明月光", "疑是地上霜"], ["举头望明月"]]))
import paddlehub as hub module = hub.Module(name="ernie_gen_leave") test_texts = ["理由"] results = module.generate(texts=test_texts, use_gpu=False, beam_width=2) for result in results: print(result)
switch_main_program(program) fc0 = fluid.layers.fc(input=input_feature, size=hid_dim * 4) lstm_h, c = fluid.layers.dynamic_lstm(input=fc0, size=hid_dim * 4, is_reverse=False) lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max') lstm_max_tanh = fluid.layers.tanh(lstm_max) fc = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh') return fc if __name__ == '__main__': # Step1: load Paddlehub elmo pretrained model module = hub.Module(name="elmo") inputs, outputs, program = module.context(trainable=True) # Step2: Download dataset and use LACClassifyReade to read dataset dataset = hub.dataset.ChnSentiCorp() reader = hub.reader.LACClassifyReader(dataset=dataset, vocab_path=module.get_vocab_path()) word_dict_len = len(reader.vocab) word_ids = inputs["word_ids"] elmo_embedding = outputs["elmo_embed"] # Step3: switch program and build network # Choose the net which you would like: bow, cnn, gru, bilstm, lstm switch_main_program(program)
import paddlehub as hub import cv2 video_capture = cv2.VideoCapture(0) process_this_frame = True module = hub.Module(name="pyramidbox_lite_server_mask") while True: # 抓取一帧视频 ret, frame = video_capture.read() # 将视频帧调整为1/4大小,以便更快地进行人脸识别处理 small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25) # (list[numpy.ndarray]): 图片数据,ndarray.shape 为 [H, W, C],BGR格式; imglist = [small_frame] # 将图像从BGR颜色(OpenCV使用)转换为RGB颜色(人脸识别使用) rgb_small_frame = small_frame[:, :, ::-1] # # (list[numpy.ndarray]): 图片数据,ndarray.shape 为 [H, W, C],BGR格式; # imglist = [rgb_small_frame] # 只需每隔一帧处理一次即可节省时间 if process_this_frame: # 通过 `data` 传入 image 对象 # input_dict = {"data": [cv2.imread(rgb_small_frame)]} # results = module.face_detection(data=input_dict) results = module.face_detection(images=imglist) print(results) process_this_frame = not process_this_frame
def setUpClass(self): """Prepare the environment once before execution of all tests.\n""" self.animal_classify = hub.Module( name="mobilenet_v3_large_imagenet_ssld")
# coding:utf-8 # Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddlehub as hub module = hub.Module(name="rbt3") print(module.get_embedding(texts=[["床前明月光", "疑是地上霜"], ["举头望明月"]]))
type=int, default=32, help="Total examples' number in batch for training.") parser.add_argument("--checkpoint_dir", type=str, default='./checkpoint', help="Directory to model checkpoint") parser.add_argument("--save_interval", type=int, default=1, help="Save checkpoint every n epoch.") args = parser.parse_args() if __name__ == '__main__': model = hub.Module(name='ernie_tiny', version='2.0.1', task='seq-cls') train_dataset = ChnSentiCorp(tokenizer=model.get_tokenizer(), max_seq_len=args.max_seq_len, mode='train') dev_dataset = ChnSentiCorp(tokenizer=model.get_tokenizer(), max_seq_len=args.max_seq_len, mode='dev') test_dataset = ChnSentiCorp(tokenizer=model.get_tokenizer(), max_seq_len=args.max_seq_len, mode='test') optimizer = paddle.optimizer.AdamW(learning_rate=args.learning_rate, parameters=model.parameters()) trainer = hub.Trainer(model, optimizer,