コード例 #1
0
    def __setstate__(self, state):
        for k, v in state.items():
            self.__dict__[k] = v

        if "recovery" in state:
            recovery = save_temp_bin(state["recovery"])
            del self.__dict__["recovery"]
            self.init_params()
            self.pc.populate(recovery)
コード例 #2
0
    def run_traslate(self, input_file, output_file, opt):
        if not hasattr(self, "model_bin_path") or self.model_bin_path is None:   # TODO remove after EMNLP
            self.model_bin_path = save_temp_bin(self.model_bin)

        opt["model"] = self.model_bin_path
        opt["src"] = input_file
        opt["output"] = output_file
        if is_cuda:
            opt["gpu"] = 0

        run_param('translate.py', opt)
コード例 #3
0
    def train(self, save_data, opt):
        save_data_archive = save_temp_bin(save_data)

        save_data_dir = temp_dir()
        shutil.unpack_archive(filename=save_data_archive, extract_dir=save_data_dir, format="gztar")

        save_model = temp_dir()

        opt["data"] = save_data_dir + "data"
        opt["save_model"] = save_model
        if is_cuda:
            opt["world_size"] = 1
            opt["gpu_ranks"] = 0

        run_param('train.py', opt)

        return save_model
コード例 #4
0
ファイル: open_nmt.py プロジェクト: dendisuhubdy/chimera
    def translate(self,
                  plans: List[str],
                  opts=None):  # Translate entire reader file using a model
        if not opts:
            opts = {"beam_size": BEAM_SIZE, "find_best": True}

        model_path = save_temp_bin(self.model_bin)

        o_lines = [
            [add_features(s.strip())
             for i, s in enumerate(s.split("."))] if s != "" else []
            for s in plans
        ]
        n_lines = list(set(chain.from_iterable(o_lines)))

        if len(n_lines) == 0:
            return []

        source_path = save_temp(n_lines)
        target_path = temp_name()

        n_best = opts["beam_size"] if opts["find_best"] else 1

        self.run_traslate(
            model_path, source_path, target_path, {
                "replace_unk": None,
                "beam_size": opts["beam_size"],
                "n_best": n_best,
                "batch_size": 64
            })

        out_lines_f = open(target_path, "r", encoding="utf-8")
        out_lines = chunks(out_lines_f.read().splitlines(), n_best)
        out_lines_f.close()

        map_lines = {
            n: find_best_out(n, out)
            for n, out in zip(n_lines, out_lines)
        }

        return [" ".join([map_lines[s] for s in lines]) for lines in o_lines]
コード例 #5
0
    def __init__(self, model, features=True):
        self.model_bin = model
        self.features = features
        self.sentences_cache = {}

        self.model_bin_path = save_temp_bin(self.model_bin)