예제 #1
0
 def __init__(self):
     self.module = hub.Module(name="lac")
def get_task(args, schema_labels, id):
    # 加载PaddleHub 预训练模型ERNIE Tiny/RoBERTa large
    # 更多预训练模型 https://www.paddlepaddle.org.cn/hublist?filter=en_category&value=SemanticModel
    # model_name = "ernie_tiny"
    model_name = "chinese-roberta-wwm-ext-large"
    module = hub.Module(name=model_name)
    inputs, outputs, program = module.context(trainable=True,
                                              max_seq_len=args.max_seq_len)

    # 加载数据并通过SequenceLabelReader读取数据
    dataset = EEDataset(args.data_dir, schema_labels, model=args.do_model)
    reader = SequenceLabelReader(dataset=dataset,
                                 vocab_path=module.get_vocab_path(),
                                 max_seq_len=args.max_seq_len,
                                 sp_model_path=module.get_spm_path(),
                                 word_dict_path=module.get_word_dict_path())

    # 构建序列标注任务迁移网络
    # 使用ERNIE模型字级别的输出sequence_output作为迁移网络的输入
    sequence_output = outputs["sequence_output"]
    # sequence_output  = fluid.layers.dropout(
    #     x=sequence_output ,
    #     dropout_prob=args.dropout,
    #     dropout_implementation="upscale_in_train")

    # 设置模型program需要输入的变量feed_list
    # 必须按照以下顺序设置
    feed_list = [
        inputs["input_ids"].name, inputs["position_ids"].name,
        inputs["segment_ids"].name, inputs["input_mask"].name
    ]

    # 选择优化策略
    strategy = hub.AdamWeightDecayStrategy(
        warmup_proportion=args.warmup_proportion,
        weight_decay=args.weight_decay,
        learning_rate=args.learning_rate)

    # 配置运行设置
    config = hub.RunConfig(
        log_interval=100,
        eval_interval=args.eval_step,
        save_ckpt_interval=args.model_save_step,
        use_data_parallel=args.use_data_parallel,
        use_cuda=args.use_gpu,
        # enable_memory_optim=True,
        num_epoch=args.num_epoch,
        batch_size=args.batch_size,
        checkpoint_dir=args.checkpoint_dir,
        strategy=strategy)

    # 构建序列标注迁移任务
    seq_label_task = hub.SequenceLabelTask(data_reader=reader,
                                           feature=sequence_output,
                                           feed_list=feed_list,
                                           max_seq_len=args.max_seq_len,
                                           num_classes=dataset.num_labels,
                                           config=config,
                                           add_crf=args.add_crf)
    seq_label_task.main_program.random_seed = args.random_seed
    add_hook(args, seq_label_task, id)
    return seq_label_task, reader
예제 #3
0
import paddle.fluid as fluid
import paddlehub as hub

module = hub.Module(name="ernie")

inputs, outputs, program = module.context(trainable="True", max_seq_len=128)
inputs, outputs, program = module.context(trainable="True", max_seq_len=128)

pooled_output = outputs["pooled_output"]
sequence_output = outputs["sequence_output"]

ds = hub.dataset.ChnSentiCorp()
reader = hub.reader.ClassifyReader(dataset=ds,
                                   vocab_path=module.get_vocab_path(),
                                   max_seq_len=128)

ds = hub.dataset.ChnSentiCorp()
for e in ds.get_train_examples():
    print(e.text_a, e.label)

strategy = hub.AdamWeightDecayStrategy(learning_rate=1e-4,
                                       lr_scheduler="linear_decay",
                                       warmup_proportion=0.0,
                                       weight_decay=0.01)

config = hub.RunConfig(use_cuda=False,
                       num_epoch=3,
                       batch_size=32,
                       strategy=strategy)
feed_list = [
    inputs["input_ids"].name, inputs["position_ids"].name,
예제 #4
0
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.1, help="Warmup proportion params for warmup strategy")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.

if __name__ == '__main__':

    # Load Paddlehub ERNIE 2.0 pretrained model
    module = hub.Module(name="ernie_v2_eng_base")
    inputs, outputs, program = module.context(trainable=True,
                                              max_seq_len=args.max_seq_len)

    # Use the appropriate tokenizer to preprocess the data set
    # For ernie_tiny, it will do word segmentation to get subword. More details: https://www.jiqizhixin.com/articles/2019-11-06-9
    if module.name == "ernie_tiny":
        tokenizer = hub.ErnieTinyTokenizer(
            vocab_file=module.get_vocab_path(),
            spm_path=module.get_spm_path(),
            word_dict_path=module.get_word_dict_path())
    else:
        tokenizer = hub.BertTokenizer(vocab_file=module.get_vocab_path())

    dataset = hub.dataset.GLUE("STS-B",
                               tokenizer=tokenizer,
예제 #5
0
# yapf: enable.


class TestDataset(hub.dataset.GLUE):
    def get_train_examples(self):
        return self.train_examples[:800]

    def get_dev_examples(self):
        return self.dev_examples[:50]

    def get_test_examples(self):
        return self.test_examples[:50]


if __name__ == '__main__':
    module = hub.Module(name="bert_cased_L-24_H-1024_A-16")
    inputs, outputs, program = module.context(
        trainable=True, max_seq_len=args.max_seq_len)

    # metric should be acc, f1 or matthews
    dataset = TestDataset()
    metrics_choices = ["acc"]

    reader = hub.reader.ClassifyReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path(),
        max_seq_len=args.max_seq_len,
        sp_model_path=module.get_spm_path(),
        word_dict_path=module.get_word_dict_path())

    # Construct transfer learning network
예제 #6
0
parser.add_argument("--num_epoch", type=int, default=3, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=32, help="Total examples' number in batch for training.")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.

if __name__ == '__main__':
    # Load Paddlehub ERNIE pretrained model
    module = hub.Module(name="ernie_tiny")
    inputs, outputs, program = module.context(trainable=True,
                                              max_seq_len=args.max_seq_len)

    # Download dataset and use SequenceLabelReader to read dataset
    dataset = hub.dataset.MSRA_NER()
    reader = hub.reader.SequenceLabelReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path(),
        max_seq_len=args.max_seq_len,
        sp_model_path=module.get_spm_path(),
        word_dict_path=module.get_word_dict_path())

    # Construct transfer learning network
    # Use "sequence_output" for token-level output.
    sequence_output = outputs["sequence_output"]
예제 #7
0
def main():
    # Load Paddlehub pretrained model
    # 更多预训练模型 https://www.paddlepaddle.org.cn/hublist?filter=en_category&value=SemanticModel
    model_name = "ernie_tiny"
    #model_name = "chinese-roberta-wwm-ext-large"
    module = hub.Module(name=model_name)
    inputs, outputs, program = module.context(
        trainable=True, max_seq_len=args.max_seq_len)

    # Download dataset and use SequenceLabelReader to read dataset
    dataset = EEDataset(args.data_dir, schema_labels, model=args.do_model)
    reader = hub.reader.SequenceLabelReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path(),
        max_seq_len=args.max_seq_len,
        sp_model_path=module.get_spm_path(),
        word_dict_path=module.get_word_dict_path())

    # Construct transfer learning network
    # Use "sequence_output" for token-level output.
    sequence_output = outputs["sequence_output"]

    # Setup feed list for data feeder
    # Must feed all the tensor of module need
    feed_list = [
        inputs["input_ids"].name, inputs["position_ids"].name,
        inputs["segment_ids"].name, inputs["input_mask"].name
    ]

    # Select a finetune strategy
    strategy = hub.AdamWeightDecayStrategy(
        warmup_proportion=args.warmup_proportion,
        weight_decay=args.weight_decay,
        learning_rate=args.learning_rate)

    # Setup runing config for PaddleHub Finetune API
    config = hub.RunConfig(
            eval_interval=args.eval_step,
            save_ckpt_interval=args.model_save_step,
            use_data_parallel=args.use_data_parallel,
            use_cuda=args.use_gpu,
            num_epoch=args.num_epoch,
            batch_size=args.batch_size,
            checkpoint_dir=args.checkpoint_dir,
            strategy=strategy)

    # Define a sequence labeling finetune task by PaddleHub's API
    # If add crf, the network use crf as decoder
    seq_label_task = hub.SequenceLabelTask(
        data_reader=reader,
        feature=sequence_output,
        feed_list=feed_list,
        max_seq_len=args.max_seq_len,
        num_classes=dataset.num_labels,
        config=config,
        add_crf=args.add_crf)

    # Finetune and evaluate model by PaddleHub's API
    # will finish training, evaluation, testing, save model automatically
    if args.do_train:
        print("start finetune and eval process")
        seq_label_task.finetune_and_eval()

    if args.do_predict:
        print("start predict process")
        ret = []
        id2label = {val: key for key, val in reader.label_map.items()}
        input_data = [[d] for d in predict_data]
        run_states = seq_label_task.predict(data=input_data[1:])
        results = []
        for batch_states in run_states:
            batch_results = batch_states.run_results
            batch_infers = batch_results[0].reshape([-1]).astype(np.int32).tolist()
            seq_lens = batch_results[1].reshape([-1]).astype(np.int32).tolist()
            current_id = 0
            for length in seq_lens:
                seq_infers = batch_infers[current_id:current_id + length]
                seq_result = list(map(id2label.get, seq_infers[1: -1]))
                current_id += length if args.add_crf else args.max_seq_len
                results.append(seq_result)

        ret = []
        for sent, r_label in zip(predict_sents, results):
            sent["labels"] = r_label
            ret.append(json.dumps(sent, ensure_ascii=False))
        write_by_lines("{}.{}.pred".format(args.predict_data, args.do_model), ret)
 def setUpClass(self):
     """Prepare the environment once before execution of all tests.\n"""
     self.classifier = hub.Module(name='se_resnet18_vd_imagenet')
예제 #9
0
parser.add_argument("--batch_size",     type=int,   default=1, help="Total examples' number in batch for training.")
parser.add_argument("--max_seq_len", type=int, default=512, help="Number of words of the longest seqence.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=False, help="Whether use GPU for finetuning, input should be True or False")
parser.add_argument("--use_pyreader", type=ast.literal_eval, default=False, help="Whether use pyreader to feed data.")
parser.add_argument("--dataset", type=str, default="chnsenticorp", help="The choice of dataset")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=False, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.

if __name__ == '__main__':
    dataset = None
    metrics_choices = []
    # Download dataset and use ClassifyReader to read dataset
    if args.dataset.lower() == "chnsenticorp":
        dataset = hub.dataset.ChnSentiCorp()
        module = hub.Module(name="ernie_tiny")
        metrics_choices = ["acc"]
    elif args.dataset.lower() == "tnews":
        dataset = hub.dataset.TNews()
        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
        metrics_choices = ["acc"]
    elif args.dataset.lower() == "nlpcc_dbqa":
        dataset = hub.dataset.NLPCC_DBQA()
        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
        metrics_choices = ["acc"]
    elif args.dataset.lower() == "lcqmc":
        dataset = hub.dataset.LCQMC()
        module = hub.Module(name="roberta_wwm_ext_chinese_L-24_H-1024_A-16")
        metrics_choices = ["acc"]
    elif args.dataset.lower() == 'inews':
        dataset = hub.dataset.INews()
예제 #10
0
                    type=int,
                    default=16,
                    help="Total examples' number in batch for training.")
parser.add_argument("--checkpoint_dir",
                    type=str,
                    default='./checkpoint',
                    help="Directory to model checkpoint")
parser.add_argument("--save_interval",
                    type=int,
                    default=10,
                    help="Save checkpoint every n epoch.")
args = parser.parse_args()

if __name__ == "__main__":
    model = hub.Module(name='panns_cnn14',
                       task='sound-cls',
                       num_class=ESC50.num_class)

    train_dataset = ESC50(mode='train')
    dev_dataset = ESC50(mode='dev')

    optimizer = paddle.optimizer.AdamW(learning_rate=args.learning_rate,
                                       parameters=model.parameters())

    trainer = hub.Trainer(model,
                          optimizer,
                          checkpoint_dir=args.checkpoint_dir,
                          use_gpu=args.use_gpu)
    trainer.train(
        train_dataset,
        epochs=args.num_epoch,
예제 #11
0
import paddlehub as hub

# 首先准备好我们要进行分词的素材
raw_data = [["你觉得明天是个晴天吗", "我看还是下雨的可能性大"], ["中国哪家公司的人工智能最牛呢"], ["我在山上看见爱因斯坦"],
            ["我把车把一把把住了"]]

# 然后直接调用 PaddleHub 中现成的分词模型 LAC
lac = hub.Module(name="lac")

for texts in raw_data:  # 每一次取一个列表中的 元素,这个 元素 是个 字符串 的 列表
    results = lac.lexical_analysis(texts=texts, use_gpu=False, batch_size=1)
    # lexical_analysis(texts=[], data={}, use_gpu=False, batch_size=1, user_dict=None, return_tag=True)
    # lac预测接口,预测输入句子的分词结果
    # texts(list): 待预测数据,如果使用texts参数,则不用传入data参数,二选一即可
    # data(dict): 预测数据,key必须为text,value是带预测数据。如果使用data参数,则不用传入texts参数,二选一即可。
    # 建议使用texts参数,data参数后续会废弃。
    # use_gpu(bool): 是否使用GPU预测
    # batch_size(int): 批处理大小
    # user_dict(None): 该参数不推荐使用,请在使用lexical_analysis()方法之前调用set_user_dict()方法设置自定义词典
    # return_tag(bool): 预测结果是否需要返回分词标签结果
    # 返回结果:results(list): 分词结果是个列表

    for result in results:  # 取得结果列表中的一个元素
        print(result)
        # 这里 单个分词 的结果是个字典,其中两个key,一个是分词结果 "word",一个是词性标注 "tag"
예제 #12
0
import paddlehub as hub

import cv2
import numpy as np
import math
import CVTools
face_landmark = hub.Module(name="face_landmark_localization")


def landmark_dec_fun(img_src):
    # img_gray = cv2.cvtColor(img_src, cv2.COLOR_BGR2GRAY)
    #
    land_marks = []
    #
    # rects = detector(img_gray, 0)

    # for i in range(len(rects)):
    #
    #     land_marks.append(land_marks_node)
    results = face_landmark.keypoint_detection(
        images=[img_src],
        paths=None,
        batch_size=1,
        use_gpu=False,
        output_dir='face_landmark_output',
        visualization=False)
    # print('emoi baidu landmark',len(results),len(results[0]))
    for result in results:  # one result for one pic
        # print(len(result['data']))
        land_marks.append(result['data'])
    return land_marks[0]  #one pic for one element
예제 #13
0
 def setUpClass(self):
     """Prepare the environment once before execution of all tests.\n"""
     self.human_seg = hub.Module(name="humanseg_lite")
예제 #14
0
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os, paddlehub as hub

# 把图片中的人物给抠出来
# 参考  https://mp.weixin.qq.com/s/0K1YiR_tCnfg65ZoN8QUqQ
huseg = hub.Module(name='deeplabv3p_xception65_humanseg')  # 加载模型
path = './imgs/'  # 文件目录
files = [path + i for i in os.listdir(path)]  # 获取文件列表
print("=====================================")
files.remove('./imgs/.DS_Store')
print(files)
# results = huseg.segmentation(data={'image': files}) # 抠图 yxy:原是这样的,运行发现没有保存,查看api发现visualization=False
results = huseg.segmentation(data={'image': files}, visualization=True)

# or
# results = huseg.segmentation(paths=files,visualization=True) # 抠图
예제 #15
0
import paddle
import paddlehub as hub

if __name__ == '__main__':
    model = hub.Module(name='ocrnet_hrnetw18_voc',
                       num_classes=2,
                       pretrained='/PATH/TO/CHECKPOINT')
    model.predict(images=["N0007.jpg"], visualization=True)
예제 #16
0
                    default="/home/aistudio/test/video",
                    help="视频存放路径")
parser.add_argument("-o",
                    "--output",
                    type=str,
                    default="/home/aistudio/test/frame",
                    help="结果帧存放路径")
parser.add_argument("-m",
                    "--model",
                    type=str,
                    default="/home/aistudio/plane/gear/output/yolov3/epoch_20",
                    help="起落架检测模型路径")
parser.add_argument("--itv", type=int, default=8, help="人进入起落架区域,抽帧间隔")
args = parser.parse_args()

people_det = hub.Module(name="yolov3_resnet50_vd_coco2017")
flg_det = pdx.load_model(args.model)
transforms = transforms.Compose([transforms.Resize(), transforms.Normalize()])

# 坐标的顺序是按照crop时下标的顺序,坐标第一个就是下标第一维,cv2里面的应该和这个是反的


def toint(l):
    return [int(x) for x in l]


def crop(img, p, mode="max"):
    if mode == "max":
        return img[p[0]:p[2], p[1]:p[3], :]
    elif mode == "length":
        p = toint([p[0], p[1], p[0] + p[2], p[1] + p[3]])
예제 #17
0
    def install_module(self,
                       module_name=None,
                       module_dir=None,
                       module_package=None,
                       module_version=None,
                       upgrade=False,
                       extra=None):
        md5_value = installed_module_version = None
        from_user_dir = True if module_dir else False
        with tmp_dir() as _dir:
            if module_name:
                self.all_modules(update=True)
                module_info = self.modules_dict.get(module_name, None)
                if module_info:
                    if not module_version or module_version == self.modules_dict[
                            module_name][1]:
                        module_dir = self.modules_dict[module_name][0]
                        module_tag = module_name if not module_version else '%s-%s' % (
                            module_name, module_version)
                        tips = "Module %s already installed in %s" % (
                            module_tag, module_dir)
                        return True, tips, self.modules_dict[module_name]

                search_result = hub.HubServer().get_module_url(
                    module_name, version=module_version, extra=extra)
                name = search_result.get('name', None)
                url = search_result.get('url', None)
                md5_value = search_result.get('md5', None)
                installed_module_version = search_result.get('version', None)
                if not url or (module_version is not None
                               and installed_module_version != module_version
                               ) or (name != module_name):
                    if hub.HubServer()._server_check() is False:
                        tips = "Request Hub-Server unsuccessfully, please check your network."
                        return False, tips, None
                    module_versions_info = hub.HubServer().search_module_info(
                        module_name)
                    if module_versions_info is not None and len(
                            module_versions_info) > 0:

                        if utils.is_windows():
                            placeholders = [20, 8, 14, 14]
                        else:
                            placeholders = [30, 8, 16, 16]
                        tp = TablePrinter(titles=[
                            "ResourceName", "Version", "PaddlePaddle",
                            "PaddleHub"
                        ],
                                          placeholders=placeholders)
                        module_versions_info.sort(
                            key=cmp_to_key(utils.sort_version_key))
                        for resource_name, resource_version, paddle_version, \
                            hub_version in module_versions_info:
                            colors = ["yellow", None, None, None]

                            tp.add_line(contents=[
                                resource_name, resource_version,
                                utils.strflist_version(paddle_version),
                                utils.strflist_version(hub_version)
                            ],
                                        colors=colors)
                        tips = "The version of PaddlePaddle or PaddleHub " \
                               "can not match module, please upgrade your " \
                               "PaddlePaddle or PaddleHub according to the form " \
                               "below." + tp.get_text()
                    else:
                        tips = "Can't find module %s" % module_name
                        if module_version:
                            tips += " with version %s" % module_version
                    return False, tips, None

                result, tips, module_zip_file = default_downloader.download_file(
                    url=url,
                    save_path=_dir,
                    save_name=module_name,
                    replace=True,
                    print_progress=True)
                result, tips, module_dir = default_downloader.uncompress(
                    file=module_zip_file,
                    dirname=MODULE_HOME,
                    delete_file=True,
                    print_progress=True)

            if module_package:
                with tarfile.open(module_package, "r:gz") as tar:
                    file_names = tar.getnames()
                    size = len(file_names) - 1
                    module_dir = os.path.join(_dir, file_names[0])
                    for index, file_name in enumerate(file_names):
                        tar.extract(file_name, _dir)

            if module_dir:
                if not module_name:
                    module_name = hub.Module(directory=module_dir).name
                self.all_modules(update=False)
                module_info = self.modules_dict.get(module_name, None)
                if module_info:
                    module_dir = self.modules_dict[module_name][0]
                    module_tag = module_name if not module_version else '%s-%s' % (
                        module_name, module_version)
                    tips = "Module %s already installed in %s" % (module_tag,
                                                                  module_dir)
                    return True, tips, self.modules_dict[module_name]

            if module_dir:
                if md5_value:
                    with open(os.path.join(MODULE_HOME, module_dir, "md5.txt"),
                              "w") as fp:
                        fp.write(md5_value)

                save_path = os.path.join(MODULE_HOME, module_name)
                if os.path.exists(save_path):
                    shutil.rmtree(save_path)
                if from_user_dir:
                    shutil.copytree(module_dir, save_path)
                else:
                    shutil.move(module_dir, save_path)
                module_dir = save_path
                tips = "Successfully installed %s" % module_name
                if installed_module_version:
                    tips += "-%s" % installed_module_version
                return True, tips, (module_dir, installed_module_version)
            tips = "Download %s-%s failed" % (module_name, module_version)
            return False, tips, module_dir
                for face_num in range(face_nums):
                    # face_0
                    dirname = 'face_{}'.format(face_num)
                    # ./train_face/filuudleua_0/FAKE/face_0 ./train_face/filuudleua_0/FAKE/face_1
                    facedirname = os.path.join(faceFileFullDir + '/', dirname)
                    # 创建对应的目录
                    if not os.path.isdir(facedirname):
                        # 如果不存在该目录,则创建目录
                        os.makedirs(facedirname)
                    face = facelist[face_num]
                    faceFullName = os.path.join(facedirname + '/', frameFile)
                    print(faceFullName)
                    cv2.imwrite(faceFullName, face)


if __name__ == '__main__':
    face_detector_big = hub.Module(
        name="ultra_light_fast_generic_face_detector_1mb_640")
    # frameImageDir = "/home/aistudio/work/Frame_data/filuudleua_0/FAKE/0_123.jpg"
    # face_list = DetectFace(face_detector_big, frameImageDir)

    # print(face_list[0].shape)
    # print(type(face_list[0]))
    frameImageDir = '/home/aistudio/work/Frame_data/'
    train_faceImageDir = '/home/aistudio/work/train_face/'
    validate_faceImageDir = '/home/aistudio/work/validate_face/'
    Saver(face_detector_big,
          frameImageDir,
          train_faceImageDir,
          validate_faceImageDir,
          threshold=0.9)
 def __init__(self):
     self.module = hub.Module(
         name="ultra_light_fast_generic_face_detector_1mb_640")
     self.alpha = 0.75
     self.start_flag = 1
예제 #20
0
 def setUpClass(self):
     """Prepare the environment once before execution of all tests."""
     self.yolov3_pedestrian_detect = hub.Module(
         name="yolov3_darknet53_pedestrian")
    def file_ocr(Input_path):
        # 功能:船名识别
        # 说明:ocr(输入文件夹地址)
        input_path = DeepCopy(Input_path)

        if 1 == 1:
            # 自定义模型导入(自己训练的模型)
            ocr = PaddleOCR(det_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/ch_ppocr_server_v1.1_det_infer', rec_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/rec_crnn', rec_char_dict_path=os.path.abspath(os.path.dirname(__file__))+'/modules/ppocr_keys_v1.txt', cls_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/ch_ppocr_mobile_v1.1_cls_infer', use_angle_cls=True)
            # 导入提供的模型
            #ocr = PaddleOCR(det_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/ch_ppocr_server_v1.1_det_infer', rec_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/ch_ppocr_server_v1.1_rec_infer', rec_char_dict_path=os.path.abspath(os.path.dirname(__file__))+'/modules/ppocr_keys_v1.txt', cls_model_dir=os.path.abspath(os.path.dirname(__file__))+'/modules/ch_ppocr_mobile_v1.1_cls_infer', use_angle_cls=True)

            ocr_result_list = []
            files = os.listdir(input_path)
            i = 0
            for file in files:  # 遍历文件夹
                if (file[-3:] == 'jpg'):
                    i = i + 1
                    if (i > 0):
                        print("正在识别第" + str(i) + "张图片...")
                        results = ocr.ocr(input_path + file, cls=True)
                        for line in results:
                            print(line)
                        ocr_result = []
                        if len(results) != 0:  # 判断是否有识别结果
                            rec_name = ''
                            confidence_sum = 0
                            for j in range(len(results)):  # 所有识别框拼接
                                rec_name = rec_name + results[j][1][0]
                                # 识别结果可能有倒序
                                rec_name = reverse_name(rec_name)
                                confidence_sum = confidence_sum + results[j][1][1]

                            confidence = confidence_sum / len(results)  # 平均置信度

                            ocr_result.append(file[:-4])  # 编号
                            ocr_result.append(rec_name)  # 识别结果
                            ocr_result.append(confidence)  # 置信度
                            ocr_result_list.append(ocr_result)
                        else:
                            ocr_result.append(file[:-4])
                            ocr_result.append('未能识别')
                            ocr_result.append(0)
                            ocr_result_list.append(ocr_result)

        if 1==0:
            # 现有模型导入
            ocr = hub.Module(name="chinese_ocr_db_crnn_server")
            ocr_result_list = []
            files = os.listdir(input_path)
            i = 0
            for file in files:  # 遍历文件夹
                if (file[-3:] == 'jpg'):
                    i = i + 1
                    if (i > 0):
                        print("正在识别第" + str(i) + "张图片...")
                        results = ocr.recognize_text(paths=[input_path + file], visualization=True)
                        ocr_result = []
                        if len(results[0]['data']) != 0:  # 判断是否有识别结果
                            rec_name = ''
                            confidence_sum = 0
                            for j in range(len(results[0]['data'])):  # 所有识别框拼接
                                rec_name = rec_name + results[0]['data'][j]['text']
                                # 识别结果可能有倒序
                                rec_name = reverse_name(rec_name)
                                confidence_sum = confidence_sum + results[0]['data'][j]['confidence']

                            confidence = confidence_sum / len(results[0]['data'])  # 平均置信度

                            ocr_result.append(file[:-4])  # 编号
                            ocr_result.append(rec_name)  # 识别结果
                            ocr_result.append(confidence)  # 置信度
                            ocr_result_list.append(ocr_result)
                        else:
                            ocr_result.append(file[:-4])
                            ocr_result.append('未能识别')
                            ocr_result.append(0)
                            ocr_result_list.append(ocr_result)
        return ocr_result_list
예제 #22
0
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--num_epoch", type=int, default=1, help="Number of epoches for fine-tuning.")
parser.add_argument("--use_gpu", type=ast.literal_eval, default=True, help="Whether use GPU for fine-tuning, input should be True or False")
parser.add_argument("--learning_rate", type=float, default=3e-5, help="Learning rate used to train with warmup.")
parser.add_argument("--weight_decay", type=float, default=0.01, help="Weight decay rate for L2 regularizer.")
parser.add_argument("--warmup_proportion", type=float, default=0.0, help="Warmup proportion params for warmup strategy")
parser.add_argument("--checkpoint_dir", type=str, default=None, help="Directory to model checkpoint")
parser.add_argument("--max_seq_len", type=int, default=384, help="Number of words of the longest seqence.")
parser.add_argument("--batch_size", type=int, default=8, help="Total examples' number in batch for training.")
parser.add_argument("--use_data_parallel", type=ast.literal_eval, default=True, help="Whether use data parallel.")
args = parser.parse_args()
# yapf: enable.

if __name__ == '__main__':
    # Load Paddlehub BERT pretrained model
    module = hub.Module(name="bert_uncased_L-12_H-768_A-12")
    inputs, outputs, program = module.context(trainable=True,
                                              max_seq_len=args.max_seq_len)

    # Download dataset and use ReadingComprehensionReader to read dataset
    # If you wanna load SQuAD 2.0 dataset, just set version_2_with_negative as True
    dataset = hub.dataset.SQUAD(version_2_with_negative=False)
    # dataset = hub.dataset.SQUAD(version_2_with_negative=True)

    reader = hub.reader.ReadingComprehensionReader(
        dataset=dataset,
        vocab_path=module.get_vocab_path(),
        max_seq_len=args.max_seq_len,
        doc_stride=128,
        max_query_length=64)
예제 #23
0
import cv2
import paddlehub as hub

word_img = cv2.imread("word3.jpg")

word_img2 = cv2.imread("word4.jpg")

ocr = hub.Module(name="chinese_ocr_db_crnn_server")
result = ocr.recognize_text(images=[word_img2],visualization = True)
print(result)
word_list = result[0]["data"]
print(len(word_list))
for i in range(len(word_list)):
    word = word_list[i]["text"]
    print(word)
예제 #24
0
# coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddlehub as hub

module = hub.Module(name="bert_chinese_L-12_H-768_A-12")

print(module.get_embedding(texts=[["床前明月光", "疑是地上霜"], ["举头望明月"]]))
예제 #25
0
import paddlehub as hub

module = hub.Module(name="ernie_gen_leave")

test_texts = ["理由"]
results = module.generate(texts=test_texts, use_gpu=False, beam_width=2)
for result in results:
    print(result)
예제 #26
0
    switch_main_program(program)

    fc0 = fluid.layers.fc(input=input_feature, size=hid_dim * 4)
    lstm_h, c = fluid.layers.dynamic_lstm(input=fc0,
                                          size=hid_dim * 4,
                                          is_reverse=False)
    lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
    lstm_max_tanh = fluid.layers.tanh(lstm_max)
    fc = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')

    return fc


if __name__ == '__main__':
    # Step1: load Paddlehub elmo pretrained model
    module = hub.Module(name="elmo")
    inputs, outputs, program = module.context(trainable=True)

    # Step2: Download dataset and use LACClassifyReade to read dataset
    dataset = hub.dataset.ChnSentiCorp()

    reader = hub.reader.LACClassifyReader(dataset=dataset,
                                          vocab_path=module.get_vocab_path())
    word_dict_len = len(reader.vocab)

    word_ids = inputs["word_ids"]
    elmo_embedding = outputs["elmo_embed"]

    # Step3: switch program and build network
    # Choose the net which you would like: bow, cnn, gru, bilstm, lstm
    switch_main_program(program)
예제 #27
0
import paddlehub as hub
import cv2

video_capture = cv2.VideoCapture(0)
process_this_frame = True

module = hub.Module(name="pyramidbox_lite_server_mask")

while True:
    # 抓取一帧视频
    ret, frame = video_capture.read()

    # 将视频帧调整为1/4大小,以便更快地进行人脸识别处理
    small_frame = cv2.resize(frame, (0, 0), fx=0.25, fy=0.25)
    # (list[numpy.ndarray]): 图片数据,ndarray.shape 为 [H, W, C],BGR格式;
    imglist = [small_frame]
    # 将图像从BGR颜色(OpenCV使用)转换为RGB颜色(人脸识别使用)
    rgb_small_frame = small_frame[:, :, ::-1]
    # # (list[numpy.ndarray]): 图片数据,ndarray.shape 为 [H, W, C],BGR格式;
    # imglist = [rgb_small_frame]

    # 只需每隔一帧处理一次即可节省时间
    if process_this_frame:

        # 通过 `data` 传入 image 对象
        # input_dict = {"data": [cv2.imread(rgb_small_frame)]}
        # results = module.face_detection(data=input_dict)
        results = module.face_detection(images=imglist)
        print(results)

    process_this_frame = not process_this_frame
 def setUpClass(self):
     """Prepare the environment once before execution of all tests.\n"""
     self.animal_classify = hub.Module(
         name="mobilenet_v3_large_imagenet_ssld")
예제 #29
0
# coding:utf-8
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddlehub as hub

module = hub.Module(name="rbt3")

print(module.get_embedding(texts=[["床前明月光", "疑是地上霜"], ["举头望明月"]]))
예제 #30
0
파일: train.py 프로젝트: houj04/PaddleHub
                    type=int,
                    default=32,
                    help="Total examples' number in batch for training.")
parser.add_argument("--checkpoint_dir",
                    type=str,
                    default='./checkpoint',
                    help="Directory to model checkpoint")
parser.add_argument("--save_interval",
                    type=int,
                    default=1,
                    help="Save checkpoint every n epoch.")

args = parser.parse_args()

if __name__ == '__main__':
    model = hub.Module(name='ernie_tiny', version='2.0.1', task='seq-cls')

    train_dataset = ChnSentiCorp(tokenizer=model.get_tokenizer(),
                                 max_seq_len=args.max_seq_len,
                                 mode='train')
    dev_dataset = ChnSentiCorp(tokenizer=model.get_tokenizer(),
                               max_seq_len=args.max_seq_len,
                               mode='dev')
    test_dataset = ChnSentiCorp(tokenizer=model.get_tokenizer(),
                                max_seq_len=args.max_seq_len,
                                mode='test')

    optimizer = paddle.optimizer.AdamW(learning_rate=args.learning_rate,
                                       parameters=model.parameters())
    trainer = hub.Trainer(model,
                          optimizer,