def prepare_model(args, vocabs): torchmodels.register_packages(models) mdl_cls = torchmodels.create_model_cls(models.jlu, args.model_path) mdl = mdl_cls(hidden_dim=args.hidden_dim, word_dim=args.word_dim, num_words=len(vocabs[0]), num_slots=len(vocabs[1]), num_intents=len(vocabs[2])) mdl.reset_parameters() ckpt = torch.load(args.ckpt_path) mdl.load_state_dict(ckpt["model"]) if args.expand_vocab: pass # mdl_vocab = vocabs[0] # mdl_emb = mdl.embeds[0].weight # emb = embeds.get_embeddings(args) # emb.preload() # emb = {w: v for w, v in emb} # for rword in [args.bos, args.eos, args.unk]: # emb[rword] = mdl_emb[mdl_vocab.f2i.get(rword)].detach().numpy() # vocab = utils.Vocabulary() # utils.populate_vocab(emb.keys(), vocab) # mdl.embeds[0] = embedding.BasicEmbedding( # vocab_size=len(vocab), # dim=mdl.word_dim, # allow_padding=True # ) # embeds._load_embeddings(mdl.embeds[0], vocab, emb.items()) else: vocab = vocabs[0] return mdl, vocab
def test_create_rnn_from_yaml(): torchmodels.register_packages(custom_modules) model_creator = torchmodels.create_model_cls(torchmodels.modules.rnn, YAML_PATHS[0]) model = model_creator(HIDDEN_DIM, HIDDEN_DIM) model.reset_parameters() ret = model(torch.randn(BATCH_SIZE, random.randint(3, 10), HIDDEN_DIM)) ensure_correct(ret)
def test_create_default_nonlinear(): torchmodels.register_packages(custom_modules) model_creator = torchmodels.create_model_cls(custom_modules.nonlinear) model = model_creator(HIDDEN_DIM, HIDDEN_DIM) model.reset_parameters() ret = model(torch.randn(BATCH_SIZE, HIDDEN_DIM)) ensure_correct(ret)
def test_create_mlp_from_yaml(): torchmodels.register_packages(custom_modules) model_creator = torchmodels.create_model_cls(mlp, YAML_PATHS[1]) model = model_creator(HIDDEN_DIM, HIDDEN_DIM) model.reset_parameters() ret = model(torch.randn(BATCH_SIZE, HIDDEN_DIM)) ensure_correct(ret)
def main(args=None): args = utils.parse_args(create_parser(), args) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) save_dir = pathlib.Path(args.save_dir) shell = utils.ShellUtils() if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*.json"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") shell.mkdir(save_dir, silent=True) logger = logging.getLogger("evaluate") utils.seed(args.seed) logger.info("loading data...") data_dir = pathlib.Path(args.data_dir) data = { split: list(map(Dialog.from_json, utils.load_json(data_dir.joinpath(f"{split}.json")))) for split in (set(args.eval_splits) | {"train"}) } processor: DialogProcessor = utils.load_pickle(args.processor_path) logger.info("preparing model...") torchmodels.register_packages(models) model_cls = torchmodels.create_model_cls(models, args.model_path) model: models.AbstractTDA = model_cls(processor.vocabs) model.reset_parameters() model.load_state_dict(torch.load(args.ckpt_path)) device = torch.device("cpu") if args.gpu is not None: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) for split in args.eval_splits: dialogs = data[split] logger.info(f"running evaluation on '{split}' split...") eval_args = EvaluateArugments( model=model, train_data=tuple(data["train"]), test_data=tuple(dialogs), processor=processor, embed_type=args.embed_type, embed_path=args.embed_path, device=device, batch_size=args.batch_size, beam_size=args.beam_size, max_conv_len=args.max_conv_len, max_sent_len=args.max_sent_len ) utils.save_json(eval_args.to_json(), save_dir.joinpath(f"eval-{split}-args.json")) with torch.no_grad(): results = evaluate(eval_args) save_path = save_dir.joinpath(f"eval-{split}.json") logger.info(f"'{split}' results saved to {save_path}") utils.save_json(results, save_path) logger.info("done!")
def main(): args = utils.parse_args(create_parser()) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*.json"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") shell = utils.ShellUtils() shell.mkdir(save_dir, silent=True) logger = logging.getLogger("generate") utils.seed(args.seed) logger.info("loading data...") processor = utils.load_pickle(args.processor_path) data = None if args.data_path is not None: data = list(map(Dialog.from_json, utils.load_json(args.data_path))) logger.info("preparing model...") torchmodels.register_packages(models) model_cls = torchmodels.create_model_cls(models, args.model_path) model: models.AbstractTDA = model_cls(processor.vocabs) model.reset_parameters() ckpt = torch.load(args.ckpt_path) model.load_state_dict(ckpt) device = torch.device("cpu") if args.gpu is not None: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) gen_args = GenerateArguments( model=model, processor=processor, data=data, instances=args.instances, batch_size=args.batch_size, conv_scale=args.conv_scale, spkr_scale=args.spkr_scale, goal_scale=args.goal_scale, state_scale=args.state_scale, sent_scale=args.sent_scale, validate_dst=args.validate_dst, validate_unique=args.validate_unique, device=device ) utils.save_json(gen_args.to_json(), save_dir.joinpath("args.json")) with torch.no_grad(): samples = generate(gen_args) utils.save_json([sample.output.to_json() for sample in samples], save_dir.joinpath("gen-out.json")) utils.save_json([sample.input.to_json() for sample in samples], save_dir.joinpath("gen-in.json")) utils.save_lines([str(sample.log_prob) for sample in samples], save_dir.joinpath("logprob.txt")) logger.info("done!")
def test_create_mlp_from_yaml(): torchmodels.register_packages(custom_modules) model_cls = torchmodels.create_model_cls(mlp, YAML_PATHS[1]) model = model_cls(10, 20) assert (model.hidden_dims[0] == 100 and model.hidden_dims[1] == 200 and model.hidden_dims[2] == 300) assert (model.activations[0].name == "relu" and model.activations[1].name == "tanh") tester = ModuleTester(model_cls, max_iter=300, pass_threshold=0.5) tester.test_backward() tester.test_forward()
def main(): parser = create_parser() args = utils.parse_args(parser) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) logger = logging.getLogger("evaluate") logger.info("preparing dataset...") data_dir = pathlib.Path(args.data_dir) data = [ datasets.DSTDialog.from_dialog(datasets.Dialog.from_json(d)) for d in utils.load_json(data_dir.joinpath("test.json")) ] logger.info("verifying dataset...") for dialog in data: dialog.validate() processor = utils.load_pickle(args.processor_path) test_dataloader = TestDataloader(dialogs=data, processor=processor, max_batch_size=args.batch_size) logger.info("preparing model...") torchmodels.register_packages(models) torchmodels.register_packages(dst_models) model_cls = torchmodels.create_model_cls(dst, args.model_path) model: dst.AbstractDialogStateTracker = model_cls(processor.vocabs) if args.gpu is None: device = torch.device("cpu") else: device = torch.device(f"cuda:{args.gpu}") model.load_state_dict(torch.load(args.ckpt_path)) model = model.to(device) logger.info(f"number of parameters: {utils.count_parameters(model):,d}") logger.info("preparing evaluator...") runner = Runner(model=model, processor=processor, device=device, asr_method=args.asr_method, asr_sigmoid_sum_order=args.asr_sigmoid_sum_order, asr_topk=args.asr_topk) logger.info("commencing evaluation...") with torch.no_grad(): test_fn = runner.test_asr if args.test_asr else runner.test eval_results = test_fn(test_dataloader) logger.info("done!") pprint.pprint(eval_results) utils.save_json(eval_results, args.save_path)
def prepare_model(args, vocabs, resume_from=None): if resume_from is None: resume_from = dict() model_path = args.model_path if resume_from.get("model_args") is not None: temp_path = tempfile.mkstemp()[1] utils.dump_yaml(resume_from["model_args"], temp_path) torchmodels.register_packages(models) mdl_cls = torchmodels.create_model_cls(models.jlu, model_path) mdl = mdl_cls( hidden_dim=args.hidden_dim, word_dim=args.word_dim, num_words=len(vocabs[0]), num_slots=len(vocabs[1]), num_intents=len(vocabs[2]) ) mdl.reset_parameters() if resume_from.get("model") is not None: mdl.load_state_dict(resume_from["model"]) else: embeds.load_embeddings(args, vocabs[0], mdl.embeddings()) return mdl
def main(): args = utils.parse_args(create_parser()) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*.json"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") shell = utils.ShellUtils() engine = inflect.engine() shell.mkdir(save_dir, silent=True) logger = logging.getLogger("train") utils.seed(args.seed) logger.info("loading data...") load_fn = utils.chain_func(lambda x: list(map(Dialog.from_json, x)), utils.load_json) data_dir = pathlib.Path(args.data_dir) train_data = load_fn(str(data_dir.joinpath("train.json"))) valid_data = load_fn(str(data_dir.joinpath("dev.json"))) test_data = load_fn(str(data_dir.joinpath("test.json"))) processor = datasets.DialogProcessor(sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, tokenizer="space", max_len=30), boc=True, eoc=True, state_order="randomized", max_len=30) processor.prepare_vocabs( list(itertools.chain(train_data, valid_data, test_data))) utils.save_pickle(processor, save_dir.joinpath("processor.pkl")) logger.info("preparing model...") utils.save_json(utils.load_yaml(args.gen_model_path), save_dir.joinpath("model.json")) torchmodels.register_packages(models) model_cls = torchmodels.create_model_cls(models, args.gen_model_path) model: models.AbstractTDA = model_cls(processor.vocabs) model.reset_parameters() utils.report_model(logger, model) device = torch.device("cpu") if args.gpu is not None: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) def create_scheduler(s): return utils.PiecewiseScheduler( [utils.Coordinate(*t) for t in eval(s)]) save_dir = pathlib.Path(args.save_dir) train_args = train.TrainArguments( model=model, train_data=tuple(train_data), valid_data=tuple(valid_data), processor=processor, device=device, save_dir=save_dir, report_every=args.report_every, batch_size=args.batch_size, valid_batch_size=args.valid_batch_size, optimizer=args.optimizer, gradient_clip=args.gradient_clip, l2norm_weight=args.l2norm_weight, learning_rate=args.learning_rate, num_epochs=args.epochs, kld_schedule=(utils.ConstantScheduler(1.0) if args.kld_schedule is None else create_scheduler(args.kld_schedule)), dropout_schedule=(utils.ConstantScheduler(1.0) if args.dropout_schedule is None else create_scheduler(args.dropout_schedule)), validate_every=args.validate_every, early_stop=args.early_stop, early_stop_criterion=args.early_stop_criterion, early_stop_patience=args.early_stop_patience, disable_kl=args.disable_kl, kl_mode=args.kl_mode) utils.save_json(train_args.to_json(), save_dir.joinpath("train-args.json")) record = train.train(train_args) utils.save_json(record.to_json(), save_dir.joinpath("final-summary.json")) eval_dir = save_dir.joinpath("eval") shell.mkdir(eval_dir, silent=True) eval_data = dict( list( filter(None, [ ("train", train_data) if "train" in args.eval_splits else None, ("dev", valid_data) if "dev" in args.eval_splits else None, ("test", test_data) if "test" in args.eval_splits else None ]))) for split, data in eval_data.items(): eval_args = evaluate.EvaluateArugments( model=model, train_data=tuple(train_data), test_data=tuple(data), processor=processor, embed_type=args.embed_type, embed_path=args.embed_path, device=device, batch_size=args.valid_batch_size, beam_size=args.beam_size, max_conv_len=args.max_conv_len, max_sent_len=args.max_sent_len) utils.save_json(eval_args.to_json(), eval_dir.joinpath(f"eval-{split}-args.json")) eval_results = evaluate.evaluate(eval_args) save_path = eval_dir.joinpath(f"eval-{split}.json") utils.save_json(eval_results, save_path) logger.info(f"'{split}' results saved to {save_path}") logger.info(f"will run {args.gen_runs} generation trials...") gen_summary = [] dst_summary = [] for gen_idx in range(1, args.gen_runs + 1): logger.info(f"running {engine.ordinal(gen_idx)} generation trial...") gen_dir = save_dir.joinpath(f"gen-{gen_idx:03d}") shell.mkdir(gen_dir, silent=True) gen_args = generate.GenerateArguments( model=model, processor=processor, data=train_data, instances=int(round(len(train_data) * args.multiplier)), batch_size=args.valid_batch_size, conv_scale=args.conv_scale, spkr_scale=args.spkr_scale, goal_scale=args.goal_scale, state_scale=args.state_scale, sent_scale=args.sent_scale, validate_dst=True, validate_unique=args.validate_unique, device=device) utils.save_json(gen_args.to_json(), gen_dir.joinpath("gen-args.json")) with torch.no_grad(): samples = generate.generate(gen_args) utils.save_json([sample.output.to_json() for sample in samples], gen_dir.joinpath("gen-out.json")) utils.save_json([sample.input.to_json() for sample in samples], gen_dir.joinpath("gen-in.json")) utils.save_lines([str(sample.log_prob) for sample in samples], gen_dir.joinpath("logprob.txt")) da_data = [sample.output for sample in samples] data = {"train": train_data, "dev": valid_data, "test": test_data} data["train"] += da_data # convert dialogs to dst dialogs data = { split: list(map(datasets.DSTDialog.from_dialog, dialogs)) for split, dialogs in data.items() } for split, dialogs in data.items(): logger.info(f"verifying '{split}' dataset...") for dialog in dialogs: dialog.compute_user_goals() dialog.validate() logger.info("preparing dst environment...") dst_processor = dst_datasets.DSTDialogProcessor( sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, max_len=30)) dst_processor.prepare_vocabs(list(itertools.chain(*data.values()))) train_dataset = dst_datasets.DSTDialogDataset(dialogs=data["train"], processor=dst_processor) train_dataloader = dst_datasets.create_dataloader( train_dataset, batch_size=args.dst_batch_size, shuffle=True, pin_memory=True) dev_dataloader = dst_run.TestDataloader( dialogs=data["dev"], processor=dst_processor, max_batch_size=args.dst_batch_size) test_dataloader = dst_run.TestDataloader( dialogs=data["test"], processor=dst_processor, max_batch_size=args.dst_batch_size) logger.info("saving dst processor object...") utils.save_pickle(dst_processor, gen_dir.joinpath("processor.pkl")) torchmodels.register_packages(dst_models) dst_model_cls = torchmodels.create_model_cls(dst_pkg, args.dst_model_path) dst_model = dst_model_cls(dst_processor.vocabs) dst_model = dst_model.to(device) logger.info(str(model)) logger.info(f"number of parameters DST: " f"{utils.count_parameters(dst_model):,d}") logger.info(f"running {args.dst_runs} trials...") all_results = [] for idx in range(1, args.dst_runs + 1): logger.info(f"running {engine.ordinal(idx)} dst trial...") trial_dir = gen_dir.joinpath(f"dst-{idx:03d}") logger.info("resetting parameters...") dst_model.reset_parameters() logger.info("preparing trainer...") runner = dst_run.Runner( model=dst_model, processor=dst_processor, device=device, save_dir=trial_dir, epochs=int(round(args.dst_epochs / (1 + args.multiplier))), loss="sum", l2norm=args.dst_l2norm, gradient_clip=args.dst_gradient_clip, train_validate=False, early_stop=True, early_stop_criterion="joint-goal", early_stop_patience=None, asr_method="scaled", asr_sigmoid_sum_order="sigmoid-sum", asr_topk=5) logger.info("commencing training...") record = runner.train(train_dataloader=train_dataloader, dev_dataloader=dev_dataloader, test_fn=None) logger.info("final summary: ") logger.info(pprint.pformat(record.to_json())) utils.save_json(record.to_json(), trial_dir.joinpath("summary.json")) if not args.dst_test_asr: logger.info("commencing testing...") with torch.no_grad(): eval_results = runner.test(test_dataloader) logger.info("test results: ") logger.info(pprint.pformat(eval_results)) else: logger.info("commencing testing (asr)...") with torch.no_grad(): eval_results = runner.test_asr(test_dataloader) logger.info("test(asr) results: ") logger.info(pprint.pformat(eval_results)) eval_results["epoch"] = int(record.epoch) logger.info("test evaluation: ") logger.info(pprint.pformat(eval_results)) utils.save_json(eval_results, trial_dir.joinpath("eval.json")) all_results.append(eval_results) dst_summary.append(eval_results) logger.info("aggregating results...") summary = reduce_json(all_results) logger.info("aggregated results: ") agg_summary = pprint.pformat( {k: v["stats"]["mean"] for k, v in summary.items()}) logger.info(pprint.pformat(agg_summary)) gen_summary.append(agg_summary) utils.save_json(summary, gen_dir.joinpath("summary.json")) gen_summary = reduce_json(gen_summary) dst_summary = reduce_json(dst_summary) logger.info(f"aggregating generation trials ({args.gen_runs})...") logger.info( pprint.pformat({k: v["stats"]["mean"] for k, v in gen_summary.items()})) logger.info(f"aggregating dst trials ({args.gen_runs * args.dst_runs})...") logger.info( pprint.pformat({k: v["stats"]["mean"] for k, v in dst_summary.items()})) utils.save_json(gen_summary, save_dir.joinpath("gen-summary.json")) utils.save_json(dst_summary, save_dir.joinpath("dst-summary.json")) logger.info("done!")
def main(): parser = create_parser() args = utils.parse_args(parser) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) logger = logging.getLogger("multirun") save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") save_dir.mkdir(exist_ok=True, parents=True) utils.save_yaml(vars(args), save_dir.joinpath("args.yml")) logger.info("preparing dataset...") data_dir = pathlib.Path(args.data_dir) data = { split: utils.load_json(data_dir.joinpath(f"{split}.json")) for split in ("train", "dev", "test") } data = { split: [ datasets.DSTDialog.from_dialog(datasets.Dialog.from_json(d)) for d in dialogs ] for split, dialogs in data.items() } logger.info("verifying dataset...") for split, dialogs in data.items(): for dialog in dialogs: dialog.validate() processor = dst_datasets.DSTDialogProcessor( sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, max_len=30)) processor.prepare_vocabs( list(itertools.chain(*(data["train"], data["dev"], data["test"])))) logger.info("saving processor object...") utils.save_pickle(processor, save_dir.joinpath("processor.pkl")) train_dataset = dst_datasets.DSTDialogDataset(dialogs=data["train"], processor=processor) train_dataloader = dst_datasets.create_dataloader( train_dataset, batch_size=args.batch_size, shuffle=False, pin_memory=True) dev_dataloader = dst_run.TestDataloader(dialogs=data["dev"], processor=processor, max_batch_size=args.batch_size) test_dataloader = dst_run.TestDataloader(dialogs=data["test"], processor=processor, max_batch_size=args.batch_size) logger.info("preparing model...") torchmodels.register_packages(models) torchmodels.register_packages(dst_models) model_cls = torchmodels.create_model_cls(dst, args.model_path) model: dst.AbstractDialogStateTracker = model_cls(processor.vocabs) if args.gpu is None: device = torch.device("cpu") else: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) logger.info(str(model)) logger.info(f"number of parameters: {utils.count_parameters(model):,d}") logger.info(f"running {args.runs} trials...") all_results = [] for idx in range(args.runs): logger.info(f"running trial-{idx + 1}...") run_save_dir = save_dir.joinpath(f"run-{idx + 1:03d}") logger.info("resetting parameters...") model.reset_parameters() logger.info("preparing trainer...") runner = dst_run.Runner( model=model, processor=processor, device=device, save_dir=run_save_dir, epochs=args.epochs, scheduler=(None if not args.scheduled_lr else functools.partial( getattr(op.lr_scheduler, args.scheduler_cls), **json.loads(args.scheduler_kwargs))), loss=args.loss, l2norm=args.l2norm, gradient_clip=args.gradient_clip, train_validate=args.train_validate, early_stop=args.early_stop, early_stop_criterion=args.early_stop_criterion, early_stop_patience=args.early_stop_patience, asr_method=args.asr_method, asr_sigmoid_sum_order=args.asr_sigmoid_sum_order, asr_topk=args.asr_topk) logger.info("commencing training...") record = runner.train( train_dataloader=train_dataloader, dev_dataloader=dev_dataloader, test_fn=runner.test_asr if args.validate_asr else None) logger.info("final summary: ") logger.info(pprint.pformat(record.to_json())) utils.save_json(record.to_json(), run_save_dir.joinpath("summary-final.json")) logger.info("commencing testing...") with torch.no_grad(): eval_results = runner.test(test_dataloader) logger.info("test results: ") logger.info(pprint.pformat(eval_results)) if args.test_asr: logger.info("commencing testing (asr)...") with torch.no_grad(): eval_results = runner.test_asr(test_dataloader) logger.info("test(asr) results: ") logger.info(pprint.pformat(eval_results)) eval_results["epoch"] = int(record.epoch) eval_results["criterion"] = record.value logger.info("test evaluation: ") logger.info(pprint.pformat(eval_results)) if args.save_ckpt: logger.info("saving checkpoint...") torch.save({k: v.cpu() for k, v in model.state_dict().items()}, run_save_dir.joinpath("ckpt.pth")) logger.info("done!") utils.save_json(eval_results, run_save_dir.joinpath("eval.json")) all_results.append(eval_results) logger.info("aggregating results...") summary = reduce_json(all_results) pprint.pprint({k: v["stats"]["mean"] for k, v in summary.items()}) utils.save_json(summary, save_dir.joinpath("summary.json")) logger.info("done!")
def main(): args = utils.parse_args(create_parser()) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*.json"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") shell = utils.ShellUtils() shell.mkdir(save_dir, silent=True) logger = logging.getLogger("train") utils.seed(args.seed) logger.info("loading data...") load_fn = utils.chain_func(lambda x: list(map(Dialog.from_json, x)), utils.load_json) data_dir = pathlib.Path(args.data_dir) train_data = load_fn(str(data_dir.joinpath("train.json"))) valid_data = load_fn(str(data_dir.joinpath("dev.json"))) test_data = load_fn(str(data_dir.joinpath("test.json"))) processor = datasets.DialogProcessor( sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, tokenizer="space", max_len=30 ), boc=True, eoc=True, state_order="randomized", max_len=30 ) processor.prepare_vocabs( list(itertools.chain(train_data, valid_data, test_data))) utils.save_pickle(processor, save_dir.joinpath("processor.pkl")) logger.info("preparing model...") utils.save_json(utils.load_yaml(args.model_path), save_dir.joinpath("model.json")) torchmodels.register_packages(models) model_cls = torchmodels.create_model_cls(models, args.model_path) model: models.AbstractTDA = model_cls(processor.vocabs) model.reset_parameters() utils.report_model(logger, model) device = torch.device("cpu") if args.gpu is not None: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) def create_scheduler(s): return utils.PiecewiseScheduler([utils.Coordinate(*t) for t in eval(s)]) save_dir = pathlib.Path(args.save_dir) train_args = train.TrainArguments( model=model, train_data=tuple(train_data), valid_data=tuple(valid_data), processor=processor, device=device, save_dir=save_dir, report_every=args.report_every, batch_size=args.batch_size, valid_batch_size=args.valid_batch_size, optimizer=args.optimizer, gradient_clip=args.gradient_clip, l2norm_weight=args.l2norm_weight, learning_rate=args.learning_rate, num_epochs=args.epochs, kld_schedule=(utils.ConstantScheduler(1.0) if args.kld_schedule is None else create_scheduler(args.kld_schedule)), dropout_schedule=(utils.ConstantScheduler(1.0) if args.dropout_schedule is None else create_scheduler(args.dropout_schedule)), validate_every=args.validate_every, early_stop=args.early_stop, early_stop_criterion=args.early_stop_criterion, early_stop_patience=args.early_stop_patience, disable_kl=args.disable_kl, kl_mode=args.kl_mode ) utils.save_json(train_args.to_json(), save_dir.joinpath("train-args.json")) record = train.train(train_args) utils.save_json(record.to_json(), save_dir.joinpath("final-summary.json")) eval_dir = save_dir.joinpath("eval") shell.mkdir(eval_dir, silent=True) eval_data = dict(list(filter(None, [ ("train", train_data) if "train" in args.eval_splits else None, ("dev", valid_data) if "dev" in args.eval_splits else None, ("test", test_data) if "test" in args.eval_splits else None ]))) for split, data in eval_data.items(): eval_args = evaluate.EvaluateArugments( model=model, train_data=tuple(train_data), test_data=tuple(data), processor=processor, embed_type=args.embed_type, embed_path=args.embed_path, device=device, batch_size=args.valid_batch_size, beam_size=args.beam_size, max_conv_len=args.max_conv_len, max_sent_len=args.max_sent_len ) utils.save_json(eval_args.to_json(), eval_dir.joinpath(f"eval-{split}-args.json")) with torch.no_grad(): eval_results = evaluate.evaluate(eval_args) save_path = eval_dir.joinpath(f"eval-{split}.json") utils.save_json(eval_results, save_path) logger.info(f"'{split}' results saved to {save_path}") logger.info("done!")
def main(args=None): args = utils.parse_args(create_parser(), args) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") shell = utils.ShellUtils() engine = inflect.engine() shell.mkdir(save_dir, silent=True) logger = logging.getLogger("gda") utils.seed(args.seed) logger.info("loading data...") load_fn = utils.chain_func(lambda data: list(map(Dialog.from_json, data)), utils.load_json) processor = utils.load_pickle(args.processor_path) data_dir = pathlib.Path(args.data_dir) train_data = load_fn(str(data_dir.joinpath("train.json"))) valid_data = load_fn(str(data_dir.joinpath("dev.json"))) test_data = load_fn(str(data_dir.joinpath("test.json"))) data = {"train": train_data, "dev": valid_data, "test": test_data} logger.info("preparing model...") torchmodels.register_packages(models) model_cls = torchmodels.create_model_cls(models, args.gen_model_path) model: models.AbstractTDA = model_cls(processor.vocabs) model.reset_parameters() ckpt = torch.load(args.ckpt_path) model.load_state_dict(ckpt) device = torch.device("cpu") if args.gpu is not None: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) logger.info(f"will run {args.gen_runs} generation trials...") gen_summary = [] dst_summary = [] for gen_idx in range(1, args.gen_runs + 1): logger.info(f"running {engine.ordinal(gen_idx)} generation trial...") gen_dir = save_dir.joinpath(f"gen-{gen_idx:03d}") shell.mkdir(gen_dir, silent=True) gen_args = generate.GenerateArguments( model=model, processor=processor, data=tuple(train_data), instances=int(round(len(train_data) * args.multiplier)), batch_size=args.gen_batch_size, conv_scale=args.conv_scale, spkr_scale=args.spkr_scale, goal_scale=args.goal_scale, state_scale=args.state_scale, sent_scale=args.sent_scale, validate_dst=True, validate_unique=args.validate_unique, device=device) utils.save_json(gen_args.to_json(), gen_dir.joinpath("args.json")) with torch.no_grad(): samples = generate.generate(gen_args) utils.save_json([sample.output.to_json() for sample in samples], gen_dir.joinpath("out.json")) utils.save_json([sample.input.to_json() for sample in samples], gen_dir.joinpath("in.json")) utils.save_lines([str(sample.log_prob) for sample in samples], gen_dir.joinpath("logprob.txt")) da_data = [sample.output for sample in samples] gen_data = { "train": data["train"] + da_data, "dev": data["dev"], "test": data["test"] } # convert dialogs to dst dialogs gen_data = { split: list(map(datasets.DSTDialog.from_dialog, dialogs)) for split, dialogs in gen_data.items() } for split, dialogs in gen_data.items(): logger.info(f"verifying '{split}' dataset...") for dialog in dialogs: dialog.compute_user_goals() dialog.validate() logger.info("preparing dst environment...") dst_processor = dst_datasets.DSTDialogProcessor( sent_processor=datasets.SentProcessor( bos=True, eos=True, lowercase=True, max_len=30)) dst_processor.prepare_vocabs(list(itertools.chain(*gen_data.values()))) train_dataset = dst_datasets.DSTDialogDataset( dialogs=gen_data["train"], processor=dst_processor) train_dataloader = dst_datasets.create_dataloader( train_dataset, batch_size=args.dst_batch_size, shuffle=True, pin_memory=True) dev_dataloader = dst_run.TestDataloader( dialogs=gen_data["dev"], processor=dst_processor, max_batch_size=args.dst_batch_size) test_dataloader = dst_run.TestDataloader( dialogs=gen_data["test"], processor=dst_processor, max_batch_size=args.dst_batch_size) logger.info("saving dst processor object...") utils.save_pickle(dst_processor, gen_dir.joinpath("processor.pkl")) torchmodels.register_packages(dst_models) dst_model_cls = torchmodels.create_model_cls(dst_pkg, args.dst_model_path) dst_model = dst_model_cls(dst_processor.vocabs) dst_model = dst_model.to(device) logger.info(str(model)) logger.info(f"number of parameters DST: " f"{utils.count_parameters(dst_model):,d}") logger.info(f"will run {args.dst_runs} trials...") all_results = [] for idx in range(1, args.dst_runs + 1): logger.info(f"running {engine.ordinal(idx)} dst trial...") trial_dir = gen_dir.joinpath(f"dst-{idx:03d}") logger.info("resetting parameters...") dst_model.reset_parameters() logger.info("preparing trainer...") runner = dst_run.Runner( model=dst_model, processor=dst_processor, device=device, save_dir=trial_dir, epochs=int(round(args.epochs / (1 + args.multiplier))), loss="sum", l2norm=args.l2norm, gradient_clip=args.gradient_clip, train_validate=False, early_stop=True, early_stop_criterion="joint-goal", early_stop_patience=None, asr_method="scaled", asr_sigmoid_sum_order="sigmoid-sum", asr_topk=5) logger.info("commencing training...") record = runner.train(train_dataloader=train_dataloader, dev_dataloader=dev_dataloader, test_fn=None) logger.info("final summary: ") logger.info(pprint.pformat(record.to_json())) utils.save_json(record.to_json(), trial_dir.joinpath("summary.json")) if not args.test_asr: logger.info("commencing testing...") with torch.no_grad(): eval_results = runner.test(test_dataloader) logger.info("test results: ") logger.info(pprint.pformat(eval_results)) else: logger.info("commencing testing (asr)...") with torch.no_grad(): eval_results = runner.test_asr(test_dataloader) logger.info("test(asr) results: ") logger.info(pprint.pformat(eval_results)) eval_results["epoch"] = int(record.epoch) eval_results["criterion"] = record.value logger.info("test evaluation: ") logger.info(pprint.pformat(eval_results)) utils.save_json(eval_results, trial_dir.joinpath("eval.json")) all_results.append(eval_results) dst_summary.append(eval_results) logger.info("aggregating results...") summary = reduce_json(all_results) logger.info("aggregated results: ") agg_results = {k: v["stats"]["mean"] for k, v in summary.items()} gen_summary.append(agg_results) logger.info(pprint.pformat(agg_results)) utils.save_json(summary, gen_dir.joinpath("summary.json")) gen_summary = reduce_json(gen_summary) dst_summary = reduce_json(dst_summary) logger.info(f"aggregating generation trials ({args.gen_runs})...") logger.info( pprint.pformat({k: v["stats"]["mean"] for k, v in gen_summary.items()})) logger.info(f"aggregating dst trials ({args.gen_runs * args.dst_runs})...") logger.info( pprint.pformat({k: v["stats"]["mean"] for k, v in dst_summary.items()})) utils.save_json(gen_summary, save_dir.joinpath("gen-summary.json")) utils.save_json(dst_summary, save_dir.joinpath("dst-summary.json")) logger.info("done!")
def main(args=None): args = utils.parse_args(create_parser(), args) if args.logging_config is not None: logging.config.dictConfig(utils.load_yaml(args.logging_config)) save_dir = pathlib.Path(args.save_dir) if (not args.overwrite and save_dir.exists() and utils.has_element(save_dir.glob("*.json"))): raise FileExistsError(f"save directory ({save_dir}) is not empty") shell = utils.ShellUtils() shell.mkdir(save_dir, silent=True) logger = logging.getLogger("interpolate") data_dir = pathlib.Path(args.data_dir) data = { split: list( map(Dialog.from_json, utils.load_json(data_dir.joinpath(f"{split}.json")))) for split in set(args.splits) } processor: DialogProcessor = utils.load_pickle(args.processor_path) logger.info("preparing model...") torchmodels.register_packages(models) model_cls = torchmodels.create_model_cls(models, args.model_path) model: models.AbstractTDA = model_cls(processor.vocabs) model.reset_parameters() model.load_state_dict(torch.load(args.ckpt_path)) device = torch.device("cpu") if args.gpu is not None: device = torch.device(f"cuda:{args.gpu}") model = model.to(device) samples = (sample_data(data, args.anchor1), sample_data(data, args.anchor2)) formatter = utils.DialogTableFormatter() logger.info(f"first sample: \n{formatter.format(samples[0])}") logger.info(f"second sample: \n{formatter.format(samples[1])}") logger.info("preparing environment...") dataloader = datasets.create_dataloader(dataset=datasets.DialogDataset( data=samples, processor=processor), batch_size=1, shuffle=False, pin_memory=False) inferencer = InterpolateInferencer(model=model, processor=processor, device=device) logger.info("interpolating...") with torch.no_grad(): zconv_a, zconv_b = inferencer.encode(dataloader) zconv = torch.stack([ zconv_a + (zconv_b - zconv_a) / args.steps * i for i in range(args.steps + 1) ]) gen_samples = inferencer.generate(td.DataLoader(zconv, shuffle=False)) # use original data points for two extremes samples = [samples[0]] + list(gen_samples[1:-1]) + [samples[1]] logger.info("interpolation results: ") for i, sample in enumerate(samples): logger.info(f"interpolation step {i / args.steps:.2%}: \n" f"{formatter.format(sample)}") logger.info("saving results...") json_dir = save_dir.joinpath("json") json_dir.mkdir(exist_ok=True) for i, sample in enumerate(samples, 1): utils.save_json(sample.to_json(), json_dir.joinpath(f"{i:02d}.json")) tbl_dir = save_dir.joinpath("table") tbl_dir.mkdir(exist_ok=True) for i, sample in enumerate(samples, 1): utils.save_lines([formatter.format(sample)], tbl_dir.joinpath(f"{i:02d}.txt")) ltx_dir = save_dir.joinpath("latex") ltx_dir.mkdir(exist_ok=True) ltx_formatter = utils.DialogICMLLatexFormatter() for i, sample in enumerate(samples, 1): utils.save_lines([ltx_formatter.format(sample)], ltx_dir.joinpath(f"{i:02d}.tex")) logger.info("done!")