Esempio n. 1
0
    def __new__(cls,
                name=None,
                directory=None,
                module_dir=None,
                version=None,
                **kwargs):
        if cls.__name__ == "Module":
            if name:
                module = cls.init_with_name(
                    name=name, version=version, **kwargs)
            elif directory:
                module = cls.init_with_directory(directory=directory, **kwargs)
            elif module_dir:
                logger.warning(
                    "Parameter module_dir is deprecated, please use directory to specify the path"
                )
                if isinstance(module_dir, list) or isinstance(
                        module_dir, tuple):
                    directory = module_dir[0]
                    version = module_dir[1]
                else:
                    directory = module_dir
                module = cls.init_with_directory(directory=directory, **kwargs)
            CacheUpdater("update_cache", module.name, module.version).start()
        else:
            if not name and not directory:
                directory = os.path.dirname(
                    os.path.abspath(sys.modules[cls.__module__].__file__))
                module = Module.init_with_directory(
                    directory=directory, **kwargs)
            else:
                module = object.__new__(cls)

        return module
Esempio n. 2
0
 def preinstall_modules(modules):
     configs = []
     module_exist = {}
     if modules is not None:
         for module in modules:
             module_name = module if "==" not in module else \
             module.split("==")[0]
             module_version = None if "==" not in module else \
             module.split("==")[1]
             if module_exist.get(module_name, "") != "":
                 print(module_name, "==", module_exist.get(module_name),
                       " will be ignored cause new version is specified.")
                 configs.pop()
             module_exist.update({module_name: module_version})
             try:
                 CacheUpdater("hub_serving_start",
                              module=module_name,
                              version=module_version).start()
                 m = hub.Module(name=module_name, version=module_version)
                 method_name = m.desc.attr.map.data['default_signature'].s
                 if method_name == "":
                     raise RuntimeError("{} cannot be use for "
                                        "predicting".format(module_name))
                 configs.append({
                     "module":
                     module_name,
                     "version":
                     m.version,
                     "category":
                     str(m.type).split("/")[0].upper()
                 })
             except Exception as err:
                 print(err, ", start PaddleHub Serving unsuccessfully.")
                 exit(1)
         return configs
Esempio n. 3
0
    def execute(self, argv):
        if not argv:
            print("ERROR: Please specify a module name.\n")
            self.help()
            return False
        extra = {"command": "install"}

        if argv[0].endswith("tar.gz") or argv[0].endswith("phm"):
            result, tips, module_dir = default_module_manager.install_module(
                module_package=argv[0], extra=extra)
        elif os.path.exists(argv[0]) and os.path.isdir(argv[0]):
            result, tips, module_dir = default_module_manager.install_module(
                module_dir=argv[0], extra=extra)
        else:
            module_name = argv[0]
            module_version = None if "==" not in module_name else module_name.split(
                "==")[1]
            module_name = module_name if "==" not in module_name else module_name.split(
                "==")[0]
            CacheUpdater("hub_install", module_name, module_version).start()
            result, tips, module_dir = default_module_manager.install_module(
                module_name=module_name,
                module_version=module_version,
                extra=extra)

        print(tips)

        return True
Esempio n. 4
0
    def execute(self, argv):
        if not argv:
            print("ERROR: Please specify a module or a model\n")
            self.help()
            return False

        module_name = argv[0]
        CacheUpdater("hub_show", module_name).start()

        # nlp model
        model_info_file = os.path.join(module_name, "info.yml")
        if os.path.exists(model_info_file):
            self.show_model_info(model_info_file)
            return True

        cwd = os.getcwd()
        module_dir = default_module_manager.search_module(module_name)
        module_dir = (os.path.join(cwd, module_name),
                      None) if not module_dir else module_dir
        if not module_dir or not os.path.exists(module_dir[0]):
            print("%s is not existed!" % module_name)
            return True

        self.show_module_info(module_dir)
        return True
Esempio n. 5
0
def download(name,
             save_path,
             version=None,
             decompress=True,
             resource_type='Model',
             extra={}):
    file = os.path.join(save_path, name)
    file = os.path.realpath(file)
    if os.path.exists(file):
        return

    if not hub.HubServer()._server_check():
        raise ServerConnectionError

    search_result = hub.HubServer().get_resource_url(
        name, resource_type=resource_type, version=version, extra=extra)

    if not search_result:
        raise ResourceNotFoundError(name, version)
    CacheUpdater("x_download", name, version).start()
    url = search_result['url']

    with tmp_dir() as _dir:
        if not os.path.exists(save_path):
            os.makedirs(save_path)
        _, _, savefile = default_downloader.download_file(url=url,
                                                          save_path=_dir,
                                                          print_progress=True)
        if tarfile.is_tarfile(savefile) and decompress:
            _, _, savefile = default_downloader.uncompress(file=savefile,
                                                           print_progress=True)
        shutil.move(savefile, file)
Esempio n. 6
0
    def execute(self, argv):
        if not argv:
            argv = ['.*']

        resource_name = argv[0]
        CacheUpdater("hub_search", resource_name).start()
        extra = {"command": "search"}
        resource_list = hub.HubServer().search_resource(resource_name,
                                                        resource_type="Module",
                                                        extra=extra)
        if utils.is_windows():
            placeholders = [20, 8, 8, 20]
        else:
            placeholders = [30, 8, 8, 25]
        tp = TablePrinter(
            titles=["ResourceName", "Type", "Version", "Summary"],
            placeholders=placeholders)
        if len(resource_list) == 0:
            if hub.HubServer()._server_check() is False:
                print(
                    "Request Hub-Server unsuccessfully, please check your network."
                )
        for resource_name, resource_type, resource_version, resource_summary in resource_list:
            if resource_type == "Module":
                colors = ["yellow", None, None, None]
            else:
                colors = ["light_red", None, None, None]
            tp.add_line(contents=[
                resource_name, resource_type, resource_version,
                resource_summary
            ],
                        colors=colors)
        print(tp.get_text())
        return True
Esempio n. 7
0
    def execute(self, argv):
        args = self.parser.parse_args()

        if not args.module_name or not args.model_dir:
            ConvertCommand.show_help()
            return False
        self.module = args.module_name
        self.version = args.module_version if args.module_version is not None else '1.0.0'
        self.src = args.model_dir
        if not os.path.isdir(self.src):
            print('`{}` is not exists or not a directory path'.format(self.src))
            return False
        self.dest = args.output_dir if args.output_dir is not None else os.path.join(
            '{}_{}'.format(self.module, str(time.time())))

        CacheUpdater("hub_convert", self.module, self.version).start()
        os.makedirs(self.dest)

        with tmp_dir() as _dir:
            self._tmp_dir = _dir
            self.create_module_py()
            self.create_init_py()
            self.create_serving_demo_py()
            self.create_module_tar()

        print('The converted module is stored in `{}`.'.format(self.dest))

        return True
Esempio n. 8
0
    def __init__(self,
                 name=None,
                 module_dir=None,
                 signatures=None,
                 module_info=None,
                 assets=None,
                 processor=None,
                 extra_info=None,
                 version=None):
        self.desc = module_desc_pb2.ModuleDesc()
        self.program = None
        self.assets = []
        self.helper = None
        self.signatures = {}
        self.default_signature = None
        self.module_info = None
        self.processor = None
        self.extra_info = {} if extra_info is None else extra_info
        if not isinstance(self.extra_info, dict):
            raise TypeError(
                "The extra_info should be an instance of python dict")

        # cache data
        self.last_call_name = None
        self.cache_feed_dict = None
        self.cache_fetch_dict = None
        self.cache_program = None

        fp_lock = open(os.path.join(CONF_HOME, 'config.json'))
        lock.flock(fp_lock, lock.LOCK_EX)
        if name:
            self._init_with_name(name=name, version=version)
            lock.flock(fp_lock, lock.LOCK_UN)
        elif module_dir:
            self._init_with_module_file(module_dir=module_dir[0])
            lock.flock(fp_lock, lock.LOCK_UN)
            name = module_dir[0].split("/")[-1]
            version = module_dir[1]
        elif signatures:
            if processor:
                if not issubclass(processor, BaseProcessor):
                    raise TypeError(
                        "Processor shoule be an instance of paddlehub.BaseProcessor"
                    )
            if assets:
                self.assets = utils.to_list(assets)
                # for asset in assets:
                #     utils.check_path(assets)
            self.processor = processor
            self._generate_module_info(module_info)
            self._init_with_signature(signatures=signatures)
            lock.flock(fp_lock, lock.LOCK_UN)
        else:
            lock.flock(fp_lock, lock.LOCK_UN)
            raise ValueError("Module initialized parameter is empty")
        CacheUpdater(name, version).start()
Esempio n. 9
0
 def execute(self, argv):
     if not argv:
         print("ERROR: Please specify a module\n")
         self.help()
         return False
     module_name = argv[0]
     CacheUpdater("hub_uninstall", module_name).start()
     result, tips = default_module_manager.uninstall_module(
         module_name=module_name)
     print(tips)
     return True
Esempio n. 10
0
 def execute(self, argv):
     CacheUpdater("hub_list").start()
     all_modules = default_module_manager.all_modules()
     if utils.is_windows():
         placeholders = [20, 40]
     else:
         placeholders = [25, 50]
     tp = TablePrinter(
         titles=["ModuleName", "Path"], placeholders=placeholders)
     for module_name, module_dir in all_modules.items():
         tp.add_line(contents=[module_name, module_dir[0]])
     print(tp.get_text())
     return True
Esempio n. 11
0
    def start_bert_serving(args):
        if platform.system() != "Linux":
            print("Error. Bert Service only support linux.")
            return False

        if ServingCommand.is_port_occupied("127.0.0.1", args.port) is True:
            print("Port %s is occupied, please change it." % args.port)
            return False

        from paddle_gpu_serving.run import BertServer
        bs = BertServer(with_gpu=args.use_gpu)
        bs.with_model(model_name=args.modules[0])
        CacheUpdater("hub_bert_service",
                     module=args.modules[0],
                     version="0.0.0").start()
        bs.run(gpu_index=args.gpu, port=int(args.port))
Esempio n. 12
0
    def execute(self, argv):
        CacheUpdater("hub_help").start()
        hub_command = BaseCommand.command_dict["hub"]
        help_text = "\n"
        help_text += "Usage:\n"
        help_text += "%s <command> [options]\n" % hub_command.name
        help_text += "\n"
        help_text += "Commands:\n"
        for command_name, command in self.get_all_commands().items():
            if not command.show_in_help or not command.description:
                continue
            help_text += "  %-15s\t\t%s\n" % (command.name,
                                              command.description)

        print(help_text)
        return True
Esempio n. 13
0
    def __new__(cls, name=None, directory=None, module_dir=None, version=None):
        if cls.__name__ == "Module":
            if name:
                module = cls.init_with_name(name=name, version=version)
            elif directory:
                module = cls.init_with_directory(directory=directory)
            elif module_dir:
                logger.warning(
                    "Parameter module_dir is deprecated, please use directory to specify the path"
                )
                if isinstance(module_dir, list) or isinstance(
                        module_dir, tuple):
                    directory = module_dir[0]
                    version = module_dir[1]
                else:
                    directory = module_dir
                module = cls.init_with_directory(directory=directory)
            CacheUpdater("update_cache", module.name, module.version).start()
        else:
            module = object.__new__(cls)

        return module
Esempio n. 14
0
    def execute(self, argv):
        CacheUpdater("hub_clear").start()
        result = True
        total_file_size = 0
        total_file_count = 0
        for rootdir, dirs, files in os.walk(self.cache_dir(), topdown=False):
            for filename in files:
                filename = os.path.join(rootdir, filename)
                try:
                    file_size = os.path.getsize(filename)
                    file_count = file_num_in_dir(filename)
                    os.remove(filename)
                    total_file_size += file_size
                    total_file_count += file_count
                except Exception as e:
                    result = False
            for dirname in dirs:
                dirname = os.path.join(rootdir, dirname)
                try:
                    dir_size = os.path.getsize(dirname)
                    file_count = file_num_in_dir(dirname)
                    os.rmdir(dirname)
                    total_file_size += dir_size
                    total_file_count += file_count
                except Exception as e:
                    result = False
        if total_file_count != 0:
            print("Clear %d cached files." % total_file_count)
            print("Free disk space %s." %
                  file_size_in_human_format(total_file_size))
        else:
            if result:
                print("No cache to release.")
            else:
                print("Clear cache failed!")

        return result
Esempio n. 15
0
    def preinstall_modules(self):
        for key, value in self.modules_info.items():
            init_args = value["init_args"]
            CacheUpdater("hub_serving_start",
                         module=key,
                         version=init_args.get("version", "0.0.0")).start()

            if "directory" not in init_args:
                init_args.update({"name": key})
            m = hub.Module(**init_args)
            method_name = m.serving_func_name
            if method_name is None:
                raise RuntimeError("{} cannot be use for "
                                   "predicting".format(key))
                exit(1)
            category = str(m.type).split("/")[0].upper()
            self.modules_info[key].update({
                "method_name": method_name,
                "code_version": m.code_version,
                "version": m.version,
                "category": category,
                "module": m,
                "name": m.name
            })
Esempio n. 16
0
    def stop_serving(self, port):
        filepath = os.path.join(CONF_HOME, "serving_" + str(port) + ".json")
        info = self.load_pid_file(filepath, port)
        if info is False:
            return
        pid = info["pid"]
        module = info["module"]
        start_time = info["start_time"]
        if os.path.exists(filepath):
            os.remove(filepath)

        if not pid_is_exist(pid):
            print("PaddleHub Serving has been stopped.")
            return
        print("PaddleHub Serving will stop.")
        CacheUpdater("hub_serving_stop",
                     module=module,
                     addition={
                         "period_time": time.time() - start_time
                     }).start()
        if platform.system() == "Windows":
            os.kill(pid, signal.SIGTERM)
        else:
            os.killpg(pid, signal.SIGTERM)
Esempio n. 17
0
 def execute(self, argv):
     CacheUpdater("hub_version").start()
     print("hub %s" % version.hub_version)
     return True
Esempio n. 18
0
    def execute(self, argv):

        if not argv:
            print("ERROR: Please specify a module name.\n")
            self.help()
            return False

        module_name = argv[0]
        CacheUpdater("hub_run", module_name).start()
        self.parser.prog = '%s %s %s' % (ENTRY, self.name, module_name)
        self.arg_input_group = self.parser.add_argument_group(
            title="Input options", description="Data input to the module")
        self.arg_config_group = self.parser.add_argument_group(
            title="Config options",
            description=
            "Run configuration for controlling module behavior, not required")

        self.module = self.find_module(module_name)
        if not self.module:
            return False

        # If the module is not executable, give an alarm and exit
        if not self.module.is_runable:
            print("ERROR! Module %s is not executable." % module_name)
            return False

        if self.module.code_version == "v2":
            results = self.module.run_func(argv[1:])
        else:
            self.module.check_processor()
            self.add_module_config_arg()
            self.add_module_input_arg()

            if not argv[1:]:
                self.help()
                return False

            self.args = self.parser.parse_args(argv[1:])

            config = self.get_config()
            data = self.get_data()

            try:
                self.check_data(data)
            except DataFormatError:
                self.help()
                return False

            results = self.module(sign_name=self.module.default_signature,
                                  data=data,
                                  use_gpu=self.args.use_gpu,
                                  batch_size=self.args.batch_size,
                                  **config)

        if six.PY2:
            try:
                results = json.dumps(results,
                                     encoding="utf8",
                                     ensure_ascii=False)
            except:
                pass

        print(results)

        return True
Esempio n. 19
0
    def execute(self, argv):
        if not argv:
            print("ERROR: Please provide the model/module name\n")
            self.help()
            return False
        mod_name = argv[0]
        mod_version = None if "==" not in mod_name else mod_name.split("==")[1]
        mod_name = mod_name if "==" not in mod_name else mod_name.split(
            "==")[0]
        CacheUpdater("hub_download", mod_name, mod_version).start()
        self.args = self.parser.parse_args(argv[1:])
        self.args.type = self.check_type(self.args.type)

        extra = {"command": "download"}
        if self.args.type in ["Module", "Model"]:
            search_result = hub.HubServer().get_resource_url(
                mod_name,
                resource_type=self.args.type,
                version=mod_version,
                extra=extra)
        else:
            search_result = hub.HubServer().get_resource_url(
                mod_name,
                resource_type="Module",
                version=mod_version,
                extra=extra)
            self.args.type = "Module"
            if search_result == {}:
                search_result = hub.HubServer().get_resource_url(
                    mod_name,
                    resource_type="Model",
                    version=mod_version,
                    extra=extra)
                self.args.type = "Model"
        url = search_result.get('url', None)
        except_md5_value = search_result.get('md5', None)
        if not url:
            if hub.HubServer()._server_check() is False:
                tips = "Request Hub-Server unsuccessfully, please check your network."
            else:
                tips = "PaddleHub can't find model/module named %s" % mod_name
                if mod_version:
                    tips += " with version %s" % mod_version
                tips += ". Please use the 'hub search' command to find the correct model/module name."
            print(tips)
            return True

        need_to_download_file = True
        file_name = os.path.basename(url)
        file = os.path.join(self.args.output_path, file_name)
        if os.path.exists(file):
            print("File %s already existed\nWait to check the MD5 value" %
                  file_name)
            file_md5_value = utils.md5_of_file(file)
            if except_md5_value == file_md5_value:
                print("MD5 check pass.")
                need_to_download_file = False
            else:
                print("MD5 check failed!\nDelete invalid file.")
                os.remove(file)

        if need_to_download_file:
            result, tips, file = default_downloader.download_file(
                url=url, save_path=self.args.output_path, print_progress=True)
            if not result:
                print(tips)
                return False

        if self.args.uncompress:
            result, tips, file = default_downloader.uncompress(
                file=file,
                dirname=self.args.output_path,
                delete_file=True,
                print_progress=True)
            print(tips)
            if self.args.type == "Model":
                os.rename(file, "./" + mod_name)
        return True
Esempio n. 20
0
    def execute(self, argv):
        CacheUpdater("hub_autofinetune").start()
        if not argv:
            print(
                "ERROR: Please specify a script to be finetuned in python.\n")
            self.help()
            return False

        self.fintunee_script = argv[0]

        self.parser.prog = '%s %s %s' % (ENTRY, self.name,
                                         self.fintunee_script)
        self.arg_params_to_be_searched_group = self.parser.add_argument_group(
            title="Input options",
            description="Hyperparameters to be searched.")
        self.arg_config_group = self.parser.add_argument_group(
            title="Autofinetune config options",
            description=
            "Autofintune configuration for controlling autofinetune behavior, not required"
        )
        self.arg_finetuned_task_group = self.parser.add_argument_group(
            title="Finetuned task config options",
            description=
            "Finetuned task configuration for controlling finetuned task behavior, not required"
        )

        self.add_params_file_arg()
        self.add_autoft_config_arg()

        if not argv[1:]:
            self.help()
            return False

        self.args = self.parser.parse_args(argv[1:])
        options_str = ""
        if self.args.opts is not None:
            options_str = self.convert_to_other_options(self.args.opts)

        device_ids = self.args.gpu.strip().split(",")
        device_ids = [int(device_id) for device_id in device_ids]

        if self.args.evaluator.lower() == "fulltrail":
            evaluator = FullTrailEvaluator(self.args.param_file,
                                           self.fintunee_script,
                                           options_str=options_str)
        elif self.args.evaluator.lower() == "populationbased":
            evaluator = PopulationBasedEvaluator(self.args.param_file,
                                                 self.fintunee_script,
                                                 options_str=options_str)
        else:
            raise ValueError("The evaluate %s is not defined!" %
                             self.args.evaluator)

        if self.args.tuning_strategy.lower() == "hazero":
            autoft = HAZero(evaluator,
                            cudas=device_ids,
                            popsize=self.args.popsize,
                            output_dir=self.args.output_dir)
        elif self.args.tuning_strategy.lower() == "pshe2":
            autoft = PSHE2(evaluator,
                           cudas=device_ids,
                           popsize=self.args.popsize,
                           output_dir=self.args.output_dir)
        else:
            raise ValueError("The tuning strategy %s is not defined!" %
                             self.args.tuning_strategy)

        run_round_cnt = 0
        solutions_modeldirs = {}
        print("PaddleHub Autofinetune starts.")
        while (not autoft.is_stop()) and run_round_cnt < self.args.round:
            print("PaddleHub Autofinetune starts round at %s." % run_round_cnt)
            output_dir = autoft._output_dir + "/round" + str(run_round_cnt)
            res = autoft.step(output_dir)
            solutions_modeldirs.update(res)
            evaluator.new_round()
            run_round_cnt = run_round_cnt + 1
        print("PaddleHub Autofinetune ends.")

        best_hparams_origin = autoft.get_best_hparams()
        best_hparams_origin = autoft.mpi.bcast(best_hparams_origin)

        with open(autoft._output_dir + "/log_file.txt", "w") as f:
            best_hparams = evaluator.convert_params(best_hparams_origin)
            print("The final best hyperparameters:")
            f.write("The final best hyperparameters:\n")
            for index, hparam_name in enumerate(autoft.hparams_name_list):
                print("%s=%s" % (hparam_name, best_hparams[index]))
                f.write(hparam_name + "\t:\t" + str(best_hparams[index]) +
                        "\n")

            best_hparams_dir, best_hparams_rank = solutions_modeldirs[tuple(
                best_hparams_origin)]

            print("The final best eval score is %s." %
                  autoft.get_best_eval_value())

            if autoft.mpi.multi_machine:
                print("The final best model parameters are saved as " +
                      autoft._output_dir + "/best_model on rank " +
                      str(best_hparams_rank) + " .")
            else:
                print("The final best model parameters are saved as " +
                      autoft._output_dir + "/best_model .")
            f.write("The final best eval score is %s.\n" %
                    autoft.get_best_eval_value())

            best_model_dir = autoft._output_dir + "/best_model"

            if autoft.mpi.rank == best_hparams_rank:
                shutil.copytree(best_hparams_dir, best_model_dir)

            if autoft.mpi.multi_machine:
                f.write(
                    "The final best model parameters are saved as ./best_model on rank " \
                    + str(best_hparams_rank) + " .")
                f.write("\t".join(autoft.hparams_name_list) +
                        "\tsaved_params_dir\trank\n")
            else:
                f.write(
                    "The final best model parameters are saved as ./best_model ."
                )
                f.write("\t".join(autoft.hparams_name_list) +
                        "\tsaved_params_dir\n")

            print(
                "The related information about hyperparamemters searched are saved as %s/log_file.txt ."
                % autoft._output_dir)
            for solution, modeldir in solutions_modeldirs.items():
                param = evaluator.convert_params(solution)
                param = [str(p) for p in param]
                if autoft.mpi.multi_machine:
                    f.write("\t".join(param) + "\t" + modeldir[0] + "\t" +
                            str(modeldir[1]) + "\n")
                else:
                    f.write("\t".join(param) + "\t" + modeldir[0] + "\n")

        return True