Beispiel #1
0
def main() -> None:
    parser = argparse.ArgumentParser()
    utils.add_dynet_argparse(parser)
    parser.add_argument("--src",
                        help=f"Path of source file to read from.",
                        required=True)
    parser.add_argument("--hyp",
                        help="Path of file to write hypothesis to.",
                        required=True)
    parser.add_argument("--mod",
                        help="Path of model file to read.",
                        required=True)
    args = parser.parse_args()

    exp_dir = os.path.dirname(__file__)
    exp = "{EXP}"

    param_collections.ParamManager.init_param_col()

    # TODO: can we avoid the LoadSerialized proxy and load stuff directly?
    load_experiment = LoadSerialized(filename=args.mod)

    uninitialized_experiment = YamlPreloader.preload_obj(load_experiment,
                                                         exp_dir=exp_dir,
                                                         exp_name=exp)
    loaded_experiment = initialize_if_needed(uninitialized_experiment)
    model = loaded_experiment.model
    inference = model.inference
    param_collections.ParamManager.populate()

    decoding_task = tasks.DecodingEvalTask(args.src, args.hyp, model,
                                           inference)
    decoding_task.eval()
Beispiel #2
0
    def setUp(self):
        events.clear()
        ParamManager.init_param_col()

        # Load a pre-trained model
        load_experiment = LoadSerialized(filename=f"test/data/tiny_jaen.model",
                                         overwrite=[
                                             {
                                                 "path": "train",
                                                 "val": None
                                             },
                                             {
                                                 "path": "status",
                                                 "val": None
                                             },
                                         ])
        EXP_DIR = '.'
        EXP = "decode"
        uninitialized_experiment = YamlPreloader.preload_obj(load_experiment,
                                                             exp_dir=EXP_DIR,
                                                             exp_name=EXP)
        loaded_experiment = initialize_if_needed(uninitialized_experiment)
        ParamManager.populate()

        # Pull out the parts we need from the experiment
        self.model = loaded_experiment.model
        src_vocab = self.model.src_reader.vocab
        trg_vocab = self.model.trg_reader.vocab

        event_trigger.set_train(False)

        self.src_data = list(
            self.model.src_reader.read_sents("test/data/head.ja"))
        self.trg_data = list(
            self.model.trg_reader.read_sents("test/data/head.en"))
Beispiel #3
0
import xnmt.tee
from xnmt.param_collection import ParamManager
from xnmt.persistence import initialize_if_needed, YamlPreloader, LoadSerialized, save_to_file

EXP_DIR = os.path.dirname(__file__)
EXP = "programmatic-load"

model_file = f"{EXP_DIR}/models/{EXP}.mod"
log_file = f"{EXP_DIR}/logs/{EXP}.log"

xnmt.tee.set_out_file(log_file)

ParamManager.init_param_col()

load_experiment = LoadSerialized(
  filename=f"{EXP_DIR}/models/programmatic.mod",
  overwrite=[
    {"path" : "train", "val" : None}
  ]
)

uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir=EXP_DIR, exp_name=EXP)
loaded_experiment = initialize_if_needed(uninitialized_experiment)

# if we were to continue training, we would need to set a save model file like this:
# ParamManager.param_col.model_file = model_file
ParamManager.populate()

# run experiment
loaded_experiment(save_fct=lambda: save_to_file(model_file, loaded_experiment))
import sys

from xnmt.param_collection import ParamManager
from xnmt.persistence import initialize_if_needed, YamlPreloader, LoadSerialized, save_to_file


parser = argparse.ArgumentParser()
parser.add_argument("filename")
parser.add_argument("output_vocab")
parser.add_argument("output_embed")
parser.add_argument("--embedding", choices=["src", "trg"], default="src")
args = parser.parse_args()

ParamManager.init_param_col()
load_experiment = LoadSerialized(
  filename=args.filename,
)

uninitialized_experiment = YamlPreloader.preload_obj(load_experiment, exp_dir="/tmp/dummy", exp_name="dummy")
experiment = initialize_if_needed(uninitialized_experiment)


if args.embedding == "src":
  vocab = experiment.model.src_reader.vocab
  tensor = experiment.model.src_embedder.embeddings
else:
  vocab = experiment.model.trg_reader.vocab
  tensor = experiment.model.trg_embedder.embeddings

with open(args.output_vocab, mode="w") as fp:
  for word in vocab.i2w: