Beispiel #1
0
    def step(self, output_dir):
        solutions = self.get_current_hparams()

        params_cudas_dirs = []
        solution_results = []
        cnt = 0
        solutions_modeldirs = {}
        mkdir(output_dir)

        for idx, solution in enumerate(solutions):
            cuda = self.is_cuda_free["free"][0]
            modeldir = output_dir + "/model-" + str(idx) + "/"
            log_file = output_dir + "/log-" + str(idx) + ".info"
            params_cudas_dirs.append([solution, cuda, modeldir, log_file])
            solutions_modeldirs[tuple(solution)] = modeldir
            self.is_cuda_free["free"].remove(cuda)
            self.is_cuda_free["busy"].append(cuda)
            if len(params_cudas_dirs
                   ) == self.thread or idx == len(solutions) - 1:
                tp = ThreadPool(len(params_cudas_dirs))
                solution_results += tp.map(self.evaluator.run,
                                           params_cudas_dirs)
                cnt += 1
                tp.close()
                tp.join()
                for param_cuda in params_cudas_dirs:
                    self.is_cuda_free["free"].append(param_cuda[1])
                    self.is_cuda_free["busy"].remove(param_cuda[1])
                params_cudas_dirs = []

        self.feedback(solutions, solution_results)

        return solutions_modeldirs
Beispiel #2
0
 def __init__(self, module_home=None):
     self.local_modules_dir = module_home if module_home else MODULE_HOME
     self.modules_dict = {}
     if not os.path.exists(self.local_modules_dir):
         utils.mkdir(self.local_modules_dir)
     elif os.path.isfile(self.local_modules_dir):
         raise ValueError("Module home should be a folder, not a file")
Beispiel #3
0
 def tb_writer(self):
     if not os.path.exists(self.config.checkpoint_dir):
         mkdir(self.config.checkpoint_dir)
     tb_log_dir = os.path.join(self.config.checkpoint_dir, "visualization")
     if not self._tb_writer:
         self._tb_writer = SummaryWriter(tb_log_dir)
     return self._tb_writer
Beispiel #4
0
def report_final_result(result):
    trial_id = os.environ.get("PaddleHub_AutoDL_Trial_ID")
    # tmp.txt is to record the eval results for trials
    mkdir(TMP_HOME)
    tmp_file = os.path.join(TMP_HOME, "tmp.txt")
    with open(tmp_file, 'a') as file:
        file.write(trial_id + "\t" + str(float(result)) + "\n")
Beispiel #5
0
 def request(self):
     if not os.path.exists(CACHE_HOME):
         utils.mkdir(CACHE_HOME)
     try:
         r = requests.get(self.get_server_url() + '/' + 'search')
         data = json.loads(r.text)
         cache_path = os.path.join(CACHE_HOME, RESOURCE_LIST_FILE)
         with open(cache_path, 'w+') as fp:
             yaml.safe_dump({'resource_list': data['data']}, fp)
         return True
     except:
         if self.config.get('debug', False):
             raise
         else:
             pass
     try:
         file_url = self.config[
             'resource_storage_server_url'] + RESOURCE_LIST_FILE
         result, tips, self.resource_list_file = default_downloader.download_file(
             file_url, save_path=CACHE_HOME, replace=True)
         if not result:
             return False
     except:
         return False
     return True
Beispiel #6
0
 def __init__(self, module_home=None):
     self.local_modules_dir = module_home if module_home else MODULE_HOME
     self.modules_dict = {}
     if not os.path.exists(self.local_modules_dir):
         utils.mkdir(self.local_modules_dir)
     elif os.path.isfile(self.local_modules_dir):
         #TODO(wuzewu): give wanring
         pass
Beispiel #7
0
 def vdl_writer(self):
     """
     get vdl_writer for visualization.
     """
     if not os.path.exists(self.config.checkpoint_dir):
         mkdir(self.config.checkpoint_dir)
     tb_log_dir = os.path.join(self.config.checkpoint_dir, "visualization")
     if not self._vdl_writer:
         self._vdl_writer = LogWriter(tb_log_dir)
     return self._vdl_writer
Beispiel #8
0
    def step(self, output_dir):
        solutions = self.get_current_hparams()

        params_cudas_dirs = []
        solution_results = []
        cnt = 0
        solutions_modeldirs = {}
        mkdir(output_dir)

        solutions = self.mpi.bcast(solutions)

        # split solutions to "solutions for me"
        range_start, range_end = self.mpi.split_range(len(solutions))
        my_solutions = solutions[range_start:range_end]

        for idx, solution in enumerate(my_solutions):
            cuda = self.is_cuda_free["free"][0]
            modeldir = output_dir + "/model-" + str(idx) + "/"
            log_file = output_dir + "/log-" + str(idx) + ".info"
            params_cudas_dirs.append([solution, cuda, modeldir, log_file])
            solutions_modeldirs[tuple(solution)] = (modeldir, self.mpi.rank)
            self.is_cuda_free["free"].remove(cuda)
            self.is_cuda_free["busy"].append(cuda)
            if len(params_cudas_dirs
                   ) == self.thread or idx == len(my_solutions) - 1:
                tp = ThreadPool(len(params_cudas_dirs))
                solution_results += tp.map(self.evaluator.run,
                                           params_cudas_dirs)
                cnt += 1
                tp.close()
                tp.join()
                for param_cuda in params_cudas_dirs:
                    self.is_cuda_free["free"].append(param_cuda[1])
                    self.is_cuda_free["busy"].remove(param_cuda[1])
                params_cudas_dirs = []

        all_solution_results = self.mpi.gather(solution_results)

        if self.mpi.rank == 0:
            # only rank 0 need to feedback
            all_solution_results = [y for x in all_solution_results for y in x]
            self.feedback(solutions, all_solution_results)

        # remove the tmp.txt which records the eval results for trials
        tmp_file = os.path.join(TMP_HOME, "tmp.txt")
        if os.path.exists(tmp_file):
            os.remove(tmp_file)

        # collect all solutions_modeldirs
        collected_solutions_modeldirs = self.mpi.allgather(solutions_modeldirs)
        return_dict = {}
        for i in collected_solutions_modeldirs:
            return_dict.update(i)

        return return_dict
Beispiel #9
0
    def download_file(self,
                      url,
                      save_path,
                      save_name=None,
                      retry_limit=3,
                      print_progress=False,
                      replace=False):
        if not os.path.exists(save_path):
            utils.mkdir(save_path)
        save_name = url.split('/')[-1] if save_name is None else save_name
        file_name = os.path.join(save_path, save_name)
        retry_times = 0

        if replace and os.path.exists(file_name):
            os.remove(file_name)

        while not (os.path.exists(file_name)):
            if os.path.exists(file_name):
                logger.info("file md5", md5file(file_name))
            if retry_times < retry_limit:
                retry_times += 1
            else:
                tips = "Cannot download {0} within retry limit {1}".format(
                    url, retry_limit)
                return False, tips, None
            r = requests.get(url, stream=True)
            total_length = r.headers.get('content-length')

            if total_length is None:
                with open(file_name, 'wb') as f:
                    shutil.copyfileobj(r.raw, f)
            else:
                #TODO(ZeyuChen) upgrade to tqdm process
                with open(file_name, 'wb') as f:
                    dl = 0
                    total_length = int(total_length)
                    starttime = time.time()
                    if print_progress:
                        print("Downloading %s" % save_name)
                    for data in r.iter_content(chunk_size=4096):
                        dl += len(data)
                        f.write(data)
                        if print_progress:
                            done = int(50 * dl / total_length)
                            progress(
                                "[%-50s] %.2f%%" %
                                ('=' * done, float(dl / total_length * 100)))
                if print_progress:
                    progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True)

        tips = "File %s download completed!" % (file_name)
        return True, tips, file_name
Beispiel #10
0
 def _dump_processor(self):
     import inspect
     pymodule = inspect.getmodule(self.processor)
     pycode = inspect.getsource(pymodule)
     processor_path = self.helper.processor_path()
     processor_md5 = utils.md5(pycode)
     processor_md5 += str(time.time())
     processor_name = utils.md5(processor_md5)
     output_file = os.path.join(processor_path, processor_name + ".py")
     utils.mkdir(processor_path)
     with open(output_file, "w") as file:
         file.write(pycode)
     utils.from_pyobj_to_module_attr(
         processor_name, self.desc.attr.map.data['processor_info'])
Beispiel #11
0
    def __init__(self, config_file_path=None):
        if not config_file_path:
            config_file_path = os.path.join(hub.CONF_HOME, 'config.json')
        if not os.path.exists(hub.CONF_HOME):
            utils.mkdir(hub.CONF_HOME)
        if not os.path.exists(config_file_path):
            with open(config_file_path, 'w+') as fp:
                fp.write(json.dumps(default_server_config))

        with open(config_file_path) as fp:
            self.config = json.load(fp)

        utils.check_url(self.config['server_url'])
        self.server_url = self.config['server_url']
        self.request()
        self._load_resource_list_file_if_valid()
Beispiel #12
0
    def run_cmd(self, argvs):
        """
        Run as a command
        """
        self.parser = argparse.ArgumentParser(
            description='Run the %s module.' % self.name,
            prog='hub run %s' % self.name,
            usage='%(prog)s',
            add_help=True)

        self.arg_input_group = self.parser.add_argument_group(
            title="Input options", description="Input data. Required")
        self.arg_input_group = self.parser.add_argument_group(
            title="Ouput options", description="Ouput path. Optional.")
        self.arg_config_group = self.parser.add_argument_group(
            title="Config options",
            description=
            "Run configuration for controlling module behavior, optional.")

        self.add_module_config_arg()
        self.add_module_input_arg()
        self.add_module_output_arg()

        args = self.parser.parse_args(argvs)

        try:
            input_data = self.check_input_data(args)
        except DataFormatError and RuntimeError:
            self.parser.print_help()
            return None

        mkdir(args.output_path)
        wavs, sample_rate = self.synthesize(texts=input_data,
                                            use_gpu=args.use_gpu,
                                            vocoder=args.vocoder)

        for index, wav in enumerate(wavs):
            sf.write(os.path.join(args.output_path, f"{index}.wav"), wav,
                     sample_rate)

        ret = f"The synthesized wav files have been saved in {args.output_path}"
        return ret
Beispiel #13
0
    def __init__(self, config_file_path=None):
        if not config_file_path:
            config_file_path = os.path.join(CONF_HOME, 'config.json')
        if not os.path.exists(CONF_HOME):
            utils.mkdir(CONF_HOME)
        if not os.path.exists(config_file_path):
            with open(config_file_path, 'w+') as fp:
                lock.flock(fp, lock.LOCK_EX)
                fp.write(json.dumps(default_server_config))
                lock.flock(fp, lock.LOCK_UN)

        with open(config_file_path, "r") as fp:
            self.config = json.load(fp)

        fp_lock = open(config_file_path)
        lock.flock(fp_lock, lock.LOCK_EX)

        utils.check_url(self.config['server_url'])
        self.server_url = self.config['server_url']
        self.request()
        self._load_resource_list_file_if_valid()
        lock.flock(fp_lock, lock.LOCK_UN)
Beispiel #14
0
    def __init__(self,
                 feed_list,
                 data_reader,
                 main_program=None,
                 startup_program=None,
                 config=None):

        # base item
        self._base_data_reader = data_reader
        self._base_feed_list = feed_list
        if main_program is None:
            self._base_main_program = clone_program(
                fluid.default_main_program(), for_test=False)

        else:
            self._base_main_program = clone_program(main_program,
                                                    for_test=False)
        if startup_program is None:
            self._base_startup_program = clone_program(
                fluid.default_startup_program(), for_test=False)
        else:
            self._base_startup_program = clone_program(startup_program,
                                                       for_test=False)
        self.is_checkpoint_loaded = False
        self._base_compiled_program = None

        # run config
        self.config = config if config else RunConfig()
        self.place = self.places[0]
        self.device_count = len(self.places)

        if self.config.use_data_parallel:
            if not self.config.use_pyreader and self.config.batch_size < self.device_count:
                logger.warning(
                    "Batch size({}) is less than the count of devices({}), which is not allowed in current Paddle versions"
                    .format(self.config.batch_size, self.device_count))
                logger.warning(
                    "Batch size automatically adjusted to {}".format(
                        self.device_count))
                self.config._batch_size = self.device_count

        self.exe = fluid.Executor(place=self.place)
        self.build_strategy = fluid.BuildStrategy()
        if self.config.enable_memory_optim:
            self.build_strategy.memory_optimize = True
        else:
            self.build_strategy.memory_optimize = False

        # log item
        if not os.path.exists(self.config.checkpoint_dir):
            mkdir(self.config.checkpoint_dir)
        vdl_log_dir = os.path.join(self.config.checkpoint_dir, "vdllog")
        self.log_writer = LogWriter(vdl_log_dir, sync_cycle=1)

        # run environment
        self._phases = []
        self._envs = {}
        self._predict_data = None

        # set default phase
        self.enter_phase("train")
Beispiel #15
0
    def __init__(self,
                 feed_list,
                 data_reader,
                 main_program=None,
                 startup_program=None,
                 config=None,
                 metrics_choices="default"):

        # base item
        self._base_data_reader = data_reader
        self._base_feed_list = feed_list

        # metrics item
        self.best_score = -999
        if metrics_choices == "default":
            metrics_choices = ["acc"]
        elif metrics_choices == None:
            metrics_choices = []
        if isinstance(metrics_choices, list):
            self.metrics_choices = metrics_choices
        else:
            self.metrics_choices = [metrics_choices]

        if main_program is None:
            self._base_main_program = clone_program(
                fluid.default_main_program(), for_test=False)

        else:
            self._base_main_program = clone_program(main_program,
                                                    for_test=False)
        if startup_program is None:
            self._base_startup_program = clone_program(
                fluid.default_startup_program(), for_test=False)
        else:
            self._base_startup_program = clone_program(startup_program,
                                                       for_test=False)
        self.is_checkpoint_loaded = False
        self._base_compiled_program = None

        # run config
        self.config = config if config else RunConfig()
        self.place = self.places[0]
        self.device_count = len(self.places)

        if self.config.use_data_parallel:
            if not self.config.use_pyreader and self.config.batch_size < self.device_count:
                logger.warning(
                    "Batch size({}) is less than the count of devices({}), which is not allowed in current Paddle versions"
                    .format(self.config.batch_size, self.device_count))
                logger.warning(
                    "Batch size automatically adjusted to {}".format(
                        self.device_count))
                self.config._batch_size = self.device_count

        self.exe = fluid.Executor(place=self.place)
        self.build_strategy = fluid.BuildStrategy()

        # log item
        if not os.path.exists(self.config.checkpoint_dir):
            mkdir(self.config.checkpoint_dir)
        tb_log_dir = os.path.join(self.config.checkpoint_dir, "visualization")
        self.tb_writer = SummaryWriter(tb_log_dir)

        # run environment
        self._phases = []
        self._envs = {}
        self._predict_data = None

        # accelerate predict
        self.is_best_model_loaded = False

        # set default phase
        self.enter_phase("train")
Beispiel #16
0
    def serialize_to_path(self, path=None, exe=None):
        self._check_signatures()
        self._generate_desc()
        # create module path for saving
        if path is None:
            path = os.path.join(".", self.name)
        self.helper = ModuleHelper(path)
        utils.mkdir(self.helper.module_dir)

        # create module pb
        module_desc = module_desc_pb2.ModuleDesc()
        logger.info("PaddleHub version = %s" % version.hub_version)
        logger.info("PaddleHub Module proto version = %s" %
                    version.module_proto_version)
        logger.info("Paddle version = %s" % paddle.__version__)

        feeded_var_names = [
            input.name for key, sign in self.signatures.items()
            for input in sign.inputs
        ]
        target_vars = [
            output for key, sign in self.signatures.items()
            for output in sign.outputs
        ]
        feeded_var_names = list(set(feeded_var_names))
        target_vars = list(set(target_vars))

        # save inference program
        program = self.program.clone()

        for block in program.blocks:
            for op in block.ops:
                if "op_callstack" in op.all_attrs():
                    op._set_attr("op_callstack", [""])

        if not exe:
            place = fluid.CPUPlace()
            exe = fluid.Executor(place=place)
        utils.mkdir(self.helper.model_path())
        fluid.io.save_inference_model(
            self.helper.model_path(),
            feeded_var_names=list(feeded_var_names),
            target_vars=list(target_vars),
            main_program=program,
            executor=exe)

        with open(os.path.join(self.helper.model_path(), "__model__"),
                  "rb") as file:
            program_desc_str = file.read()
            rename_program = fluid.framework.Program.parse_from_string(
                program_desc_str)
            varlist = {
                var: block
                for block in rename_program.blocks for var in block.vars
                if self.get_name_prefix() not in var
            }
            for var, block in varlist.items():
                old_name = var
                new_name = self.get_var_name_with_prefix(old_name)
                block._rename_var(old_name, new_name)
            utils.mkdir(self.helper.model_path())
            with open(
                    os.path.join(self.helper.model_path(), "__model__"),
                    "wb") as f:
                f.write(rename_program.desc.serialize_to_string())

            for file in os.listdir(self.helper.model_path()):
                if (file == "__model__" or self.get_name_prefix() in file):
                    continue
                os.rename(
                    os.path.join(self.helper.model_path(), file),
                    os.path.join(self.helper.model_path(),
                                 self.get_var_name_with_prefix(file)))

        # create processor file
        if self.processor:
            self._dump_processor()

        # create assets
        self._dump_assets()

        # create check info
        checker = ModuleChecker(self.helper.module_dir)
        checker.generate_check_info()

        # Serialize module_desc pb
        module_pb = self.desc.SerializeToString()
        with open(self.helper.module_desc_path(), "wb") as f:
            f.write(module_pb)
Beispiel #17
0
 def _dump_assets(self):
     utils.mkdir(self.helper.assets_path())
     for asset in self.assets:
         filename = os.path.basename(asset)
         newfile = os.path.join(self.helper.assets_path(), filename)
         copyfile(asset, newfile)