def __init__(self): # path root_path = get_root_path() config_file = os.path.join(get_config_path(), IntentWithBertPredictor.default_model_config) # load config config = json.load(open(config_file)) device = config['DEVICE'] # load intent vocabulary and dataloader intent_vocab = json.load(open(os.path.join(get_data_path(), 'crosswoz/nlu_intent_data/intent_vocab.json'), encoding='utf-8')) dataloader = Dataloader(intent_vocab=intent_vocab, pretrained_weights=config['model']['pretrained_weights']) # load best model best_model_path = os.path.join(DEFAULT_MODEL_PATH, IntentWithBertPredictor.default_model_name) if not os.path.exists(best_model_path): download_from_url(IntentWithBertPredictor.default_model_url, best_model_path) model = IntentWithBert(config['model'], device, dataloader.intent_dim) try: model.load_state_dict(torch.load(os.path.join(DEFAULT_MODEL_PATH, IntentWithBertPredictor.default_model_name), map_location='cpu')) except Exception as e: print(e) # cpu process model.to("cpu") model.eval() self.model = model self.dataloader = dataloader print(f"{best_model_path} loaded - {best_model_path}")
def __init__(self): super(TradeDST, self).__init__() # load config common_config_path = os.path.join(get_config_path(), TradeDST.common_config_name) common_config = json.load(open(common_config_path)) model_config_path = os.path.join(get_config_path(), TradeDST.model_config_name) model_config = json.load(open(model_config_path)) model_config.update(common_config) self.model_config = model_config self.model_config['data_path'] = os.path.join( get_data_path(), 'crosswoz/dst_trade_data') self.model_config['n_gpus'] = 0 if self.model_config[ 'device'] == 'cpu' else torch.cuda.device_count() self.model_config['device'] = torch.device(self.model_config['device']) if model_config['load_embedding']: model_config['hidden_size'] = 300 # download data for model_key, url in TradeDST.model_urls.items(): dst = os.path.join(self.model_config['data_path'], model_key) if model_key.endswith('pth'): file_name = 'trained_model_path' elif model_key.endswith('pkl'): file_name = model_key.rsplit('-', maxsplit=1)[0] else: file_name = model_key.split('.')[0] # ontology self.model_config[file_name] = dst if not os.path.exists(dst) or not self.model_config['use_cache']: download_from_url(url, dst) # load date & model ontology = json.load( open(self.model_config['ontology'], 'r', encoding='utf8')) self.all_slots = get_slot_information(ontology) self.gate2id = {'ptr': 0, 'none': 1} self.id2gate = {id_: gate for gate, id_ in self.gate2id.items()} self.lang = pickle.load(open(self.model_config['lang'], 'rb')) self.mem_lang = pickle.load(open(self.model_config['mem-lang'], 'rb')) model = Trade( lang=self.lang, vocab_size=len(self.lang.index2word), hidden_size=self.model_config['hidden_size'], dropout=self.model_config['dropout'], num_encoder_layers=self.model_config['num_encoder_layers'], num_decoder_layers=self.model_config['num_decoder_layers'], pad_id=self.model_config['pad_id'], slots=self.all_slots, num_gates=len(self.gate2id), unk_mask=self.model_config['unk_mask']) model.load_state_dict( torch.load(self.model_config['trained_model_path'])) self.model = model.to(self.model_config['device']).eval() print(f'>>> {self.model_config["trained_model_path"]} loaded ...') self.state = default_state() print('>>> State initialized ...')
def __init__(self, is_user, mode="auto_manual"): # super().__init__() self.is_user = is_user self.mode = mode if is_user: self.role = 'usr' else: self.role = 'sys' cur_dir = os.path.dirname(os.path.abspath(__file__)) template_dir = os.path.join(cur_dir, '../../data/crosswoz/nlg_template_data') data_urls = {'auto_user_template_nlg.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/intent_train_data.json', 'auto_system_template_nlg.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/intent_val_data.json', 'manual_user_template_nlg.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/intent_test_data.json', 'manual_system_template_nlg.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/intent_test_data.json'} for data_key, url in data_urls.items(): nlg_download = os.path.join(os.path.join(template_dir, data_key)) if not os.path.exists(nlg_download): download_from_url(url, nlg_download) # multi-intent self.auto_user_template = read_json(os.path.join(template_dir, 'auto_user_template_nlg.json')) self.auto_system_template = read_json(os.path.join(template_dir, 'auto_system_template_nlg.json')) # single-intent self.manual_user_template = read_json(os.path.join(template_dir, 'manual_user_template_nlg.json')) self.manual_system_template = read_json(os.path.join(template_dir, 'manual_system_template_nlg.json'))
def __init__(self): root_path = get_root_path() config_file = os.path.join(root_path, 'xbot/configs/{}'.format(JointWithBertPredictor.default_model_config)) config = json.load(open(config_file)) device = config['DEVICE'] data_dir = os.path.join(root_path, config['data_dir']) output_dir = os.path.join(root_path, config['output_dir']) intent_vocab = json.load(open(os.path.join(data_dir, 'intent_vocab.json'))) tag_vocab = json.load(open(os.path.join(data_dir, 'tag_vocab.json'))) dataloader = Dataloader(intent_vocab=intent_vocab, tag_vocab=tag_vocab, pretrained_weights=config['model']['pretrained_weights']) best_model_path = os.path.join(DEFAULT_MODEL_PATH, JointWithBertPredictor.default_model_name) if not os.path.exists(best_model_path): download_from_url(JointWithBertPredictor.default_model_url, best_model_path) model = JointWithBert(config['model'], device, dataloader.tag_dim, dataloader.intent_dim) try: model.load_state_dict(torch.load(os.path.join(DEFAULT_MODEL_PATH, JointWithBertPredictor.default_model_name), map_location='cpu')) except Exception as e: print(e) model.to(device) self.model = model self.dataloader = dataloader print(f"{best_model_path} loaded")
def main(): train_config_name = "policy/bert/train.json" common_config_name = "policy/bert/common.json" data_urls = { "config.json": "http://xbot.bslience.cn/bert-base-chinese/config.json", "pytorch_model.bin": "http://xbot.bslience.cn/bert-base-chinese/pytorch_model.bin", "vocab.txt": "http://xbot.bslience.cn/bert-base-chinese/vocab.txt", "act_ontology.json": "http://xbot.bslience.cn/act_ontology.json", } train_config = update_config(common_config_name, train_config_name, "crosswoz/policy_bert_data") train_config["raw_data_path"] = os.path.join(get_data_path(), "crosswoz/raw") # download data for data_key, url in data_urls.items(): dst = os.path.join(train_config["data_path"], data_key) file_name = data_key.split(".")[0] train_config[file_name] = dst if not os.path.exists(dst): download_from_url(url, dst) pl.seed_everything(train_config["seed"]) trainer = Trainer(train_config) trainer.train() trainer.eval_test()
def __init__(self): # path root_path = get_root_path() config_file = os.path.join(root_path, 'xbot/configs/{}'.format(SlotWithBertPredictor.default_model_config)) # load config config = json.load(open(config_file)) data_path = os.path.join(root_path, config['data_dir']) device = config['DEVICE'] # load intent, tag vocabulary and dataloader intent_vocab = json.load(open(os.path.join(data_path, 'intent_vocab.json'), encoding='utf-8')) tag_vocab = json.load(open(os.path.join(data_path, 'tag_vocab.json'), encoding="utf-8")) dataloader = Dataloader(tag_vocab=tag_vocab, intent_vocab=intent_vocab, pretrained_weights=config['model']['pretrained_weights']) # load best model best_model_path = os.path.join(DEFAULT_MODEL_PATH, SlotWithBertPredictor.default_model_name) if not os.path.exists(best_model_path): download_from_url(SlotWithBertPredictor.default_model_url, best_model_path) model = SlotWithBert(config['model'], device, dataloader.tag_dim) try: model.load_state_dict(torch.load(os.path.join(DEFAULT_MODEL_PATH, SlotWithBertPredictor.default_model_name), map_location='cpu')) except Exception as e: print(e) model.to(device) self.model = model self.dataloader = dataloader print(f"{best_model_path} loaded - {best_model_path}")
def main(): train_config_name = 'policy/bert/train.json' common_config_name = 'policy/bert/common.json' data_urls = { 'config.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/config.json', 'pytorch_model.bin': 'http://qiw2jpwfc.hn-bkt.clouddn.com/pytorch_model.bin', 'vocab.txt': 'http://qiw2jpwfc.hn-bkt.clouddn.com/vocab.txt', 'act_ontology.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/act_ontology.json', } train_config = update_config(common_config_name, train_config_name, 'crosswoz/policy_bert_data') train_config['raw_data_path'] = os.path.join(get_data_path(), 'crosswoz/raw') # download data for data_key, url in data_urls.items(): dst = os.path.join(train_config['data_path'], data_key) file_name = data_key.split('.')[0] train_config[file_name] = dst if not os.path.exists(dst): download_from_url(url, dst) pl.seed_everything(train_config['seed']) trainer = Trainer(train_config) trainer.train() # trainer.best_model_path = '/xhp/xbot/data/crosswoz/policy_bert_data/Epoch-6-f1-0.902' trainer.eval_test()
def main(): model_config_name = 'policy/mle/train.json' common_config_name = 'policy/mle/common.json' data_urls = { 'sys_da_voc.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/usr_da_voc.json', 'usr_da_voc.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/usr_da_voc.json' } # load config root_path = get_root_path() common_config_path = os.path.join(get_config_path(), common_config_name) model_config_path = os.path.join(get_config_path(), model_config_name) common_config = json.load(open(common_config_path)) model_config = json.load(open(model_config_path)) model_config.update(common_config) model_config['n_gpus'] = torch.cuda.device_count() model_config['batch_size'] = max(1, model_config['n_gpus']) * model_config['batch_size'] model_config['device'] = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model_config['data_path'] = os.path.join(get_data_path(), 'crosswoz/policy_mle_data') model_config['raw_data_path'] = os.path.join(get_data_path(), 'crosswoz/raw') model_config['output_dir'] = os.path.join(root_path, model_config['output_dir']) if model_config['load_model_name']: model_config['model_path'] = os.path.join(model_config['output_dir'], model_config['load_model_name']) else: model_config['model_path'] = '' if not os.path.exists(model_config['data_path']): os.makedirs(model_config['data_path']) if not os.path.exists(model_config['output_dir']): os.makedirs(model_config['output_dir']) # download data for data_key, url in data_urls.items(): dst = os.path.join(model_config['data_path'], data_key) file_name = data_key.split('.')[0] model_config[file_name] = dst if not os.path.exists(dst): download_from_url(url, dst) print(f'>>> Train configs:') print('\t', model_config) set_seed(model_config['random_seed']) agent = Trainer(model_config) # 训练 if model_config['do_train']: start_epoch = 0 if not model_config['model_path'] else int(model_config['model_path'].split('-')[2]) + 1 best = float('inf') for epoch in tqdm(range(start_epoch, model_config['num_epochs']), desc='Epoch'): agent.imitating(epoch) best = agent.imit_eval(epoch, best) agent.calc_metrics()
def main(): model_config_name = 'dst/bert/train.json' common_config_name = 'dst/bert/common.json' data_urls = { 'train4bert_dst.json': 'http://xbot.bslience.cn/train4bert_dst.json', 'dev4bert_dst.json': 'http://xbot.bslience.cn/dev4bert_dst.json', 'test4bert_dst.json': 'http://xbot.bslience.cn/test4bert_dst.json', 'cleaned_ontology.json': 'http://xbot.bslience.cn/cleaned_ontology.json', 'config.json': 'http://xbot.bslience.cn/bert-base-chinese/config.json', 'pytorch_model.bin': 'http://xbot.bslience.cn/bert-base-chinese/pytorch_model.bin', 'vocab.txt': 'http://xbot.bslience.cn/bert-base-chinese/vocab.txt' } # load config root_path = get_root_path() common_config_path = os.path.join(get_config_path(), common_config_name) train_config_path = os.path.join(get_config_path(), model_config_name) common_config = json.load(open(common_config_path)) train_config = json.load(open(train_config_path)) train_config.update(common_config) train_config['n_gpus'] = torch.cuda.device_count() train_config['train_batch_size'] = max( 1, train_config['n_gpus']) * train_config['train_batch_size'] train_config['device'] = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') train_config['data_path'] = os.path.join(get_data_path(), 'crosswoz/dst_bert_data') train_config['output_dir'] = os.path.join(root_path, train_config['output_dir']) if not os.path.exists(train_config['data_path']): os.makedirs(train_config['data_path']) if not os.path.exists(train_config['output_dir']): os.makedirs(train_config['output_dir']) # download data for data_key, url in data_urls.items(): dst = os.path.join(train_config['data_path'], data_key) file_name = data_key.split('.')[0] train_config[file_name] = dst if not os.path.exists(dst): download_from_url(url, dst) # train trainer = Trainer(train_config) trainer.train() trainer.eval_test() get_recall(train_config['data_path'])
def main(): model_config_name = "dst/bert/train.json" common_config_name = "dst/bert/common.json" data_urls = { "train4bert_dst.json": "http://xbot.bslience.cn/train4bert_dst.json", "dev4bert_dst.json": "http://xbot.bslience.cn/dev4bert_dst.json", "test4bert_dst.json": "http://xbot.bslience.cn/test4bert_dst.json", "cleaned_ontology.json": "http://xbot.bslience.cn/cleaned_ontology.json", "config.json": "http://xbot.bslience.cn/bert-base-chinese/config.json", "pytorch_model.bin": "http://xbot.bslience.cn/bert-base-chinese/pytorch_model.bin", "vocab.txt": "http://xbot.bslience.cn/bert-base-chinese/vocab.txt", } # load config root_path = get_root_path() common_config_path = os.path.join(get_config_path(), common_config_name) train_config_path = os.path.join(get_config_path(), model_config_name) common_config = json.load(open(common_config_path)) train_config = json.load(open(train_config_path)) train_config.update(common_config) train_config["n_gpus"] = torch.cuda.device_count() train_config["train_batch_size"] = ( max(1, train_config["n_gpus"]) * train_config["train_batch_size"] ) train_config["device"] = torch.device( "cuda" if torch.cuda.is_available() else "cpu" ) train_config["data_path"] = os.path.join(get_data_path(), "crosswoz/dst_bert_data") train_config["output_dir"] = os.path.join(root_path, train_config["output_dir"]) if not os.path.exists(train_config["data_path"]): os.makedirs(train_config["data_path"]) if not os.path.exists(train_config["output_dir"]): os.makedirs(train_config["output_dir"]) # download data for data_key, url in data_urls.items(): dst = os.path.join(train_config["data_path"], data_key) file_name = data_key.split(".")[0] train_config[file_name] = dst if not os.path.exists(dst): download_from_url(url, dst) # train trainer = Trainer(train_config) trainer.train() trainer.eval_test() get_recall(train_config["data_path"])
def download_data(infer_config: dict, model_dir: str) -> None: """Download trained model for inference. Args: infer_config: config used for inference model_dir: model save directory """ for data_key, url in BertPolicy.data_urls.items(): if not os.path.exists(model_dir): os.makedirs(model_dir) dst = os.path.join(model_dir, data_key) file_name = data_key.split(".")[0] infer_config[file_name] = dst if not os.path.exists(dst): download_from_url(url, dst)
def download_data(infer_config: dict) -> None: """Download trained model and ontology file for inference. Args: infer_config: config used for inference """ for data_key, url in BertDST.data_urls.items(): if "ontology" in data_key: dst = os.path.join(infer_config["data_path"], data_key) else: model_dir = os.path.join(infer_config["data_path"], "trained_model") infer_config["model_dir"] = model_dir if not os.path.exists(model_dir): os.makedirs(model_dir) dst = os.path.join(model_dir, data_key) file_name = data_key.split(".")[0] infer_config[file_name] = dst if not os.path.exists(dst): download_from_url(url, dst)
def __init__(self): # path root_path = get_root_path() config_file = os.path.join( get_config_path(), IntentWithBertPredictor.default_model_config) # load config config = json.load(open(config_file)) self.device = config["DEVICE"] # load intent vocabulary and dataloader intent_vocab = json.load( open( os.path.join(get_data_path(), "crosswoz/nlu_intent_data/intent_vocab.json"), encoding="utf-8", )) dataloader = Dataloader( intent_vocab=intent_vocab, pretrained_weights=config["model"]["pretrained_weights"], ) # load best model best_model_path = os.path.join( os.path.join(root_path, DEFAULT_MODEL_PATH), IntentWithBertPredictor.default_model_name, ) # best_model_path = os.path.join(DEFAULT_MODEL_PATH, IntentWithBertPredictor.default_model_name) if not os.path.exists(best_model_path): download_from_url(IntentWithBertPredictor.default_model_url, best_model_path) model = IntentWithBert(config["model"], self.device, dataloader.intent_dim) model.load_state_dict( torch.load(best_model_path, map_location=self.device)) model.to(self.device) model.eval() self.model = model self.dataloader = dataloader print(f"{best_model_path} loaded - {best_model_path}")
def __init__(self): super(MLEPolicy, self).__init__() # load config common_config_path = os.path.join(get_config_path(), MLEPolicy.common_config_name) common_config = json.load(open(common_config_path)) model_config_path = os.path.join(get_config_path(), MLEPolicy.model_config_name) model_config = json.load(open(model_config_path)) model_config.update(common_config) self.model_config = model_config self.model_config["data_path"] = os.path.join( get_data_path(), "crosswoz/policy_mle_data") self.model_config["n_gpus"] = (0 if self.model_config["device"] == "cpu" else torch.cuda.device_count()) self.model_config["device"] = torch.device(self.model_config["device"]) # download data for model_key, url in MLEPolicy.model_urls.items(): dst = os.path.join(self.model_config["data_path"], model_key) file_name = (model_key.split(".")[0] if not model_key.endswith("pth") else "trained_model_path") self.model_config[file_name] = dst if not os.path.exists(dst) or not self.model_config["use_cache"]: download_from_url(url, dst) self.vector = CrossWozVector( sys_da_voc_json=self.model_config["sys_da_voc"], usr_da_voc_json=self.model_config["usr_da_voc"], ) policy = MultiDiscretePolicy(self.vector.state_dim, model_config["hidden_size"], self.vector.sys_da_dim) policy.load_state_dict( torch.load(self.model_config["trained_model_path"])) self.policy = policy.to(self.model_config["device"]).eval() print(f'>>> {self.model_config["trained_model_path"]} loaded ...')
def __init__(self, is_user, mode="auto_manual"): # super().__init__() self.is_user = is_user self.mode = mode if is_user: self.role = "usr" else: self.role = "sys" cur_dir = os.path.dirname(os.path.abspath(__file__)) template_dir = os.path.join(cur_dir, "../../data/crosswoz/nlg_template_data") data_urls = { "auto_user_template_nlg.json": "http://qiw2jpwfc.hn-bkt.clouddn.com/auto_user_template_nlg.json", "auto_system_template_nlg.json": "http://qiw2jpwfc.hn-bkt.clouddn.com/auto_system_template_nlg.json", "manual_user_template_nlg.json": "http://qiw2jpwfc.hn-bkt.clouddn.com/manual_user_template_nlg.json", "manual_system_template_nlg.json": "http://qiw2jpwfc.hn-bkt.clouddn.com/manual_system_template_nlg.json", } for data_key, url in data_urls.items(): nlg_download = os.path.join(os.path.join(template_dir, data_key)) if not os.path.exists(nlg_download): download_from_url(url, nlg_download) # multi-intent self.auto_user_template = read_json( os.path.join(template_dir, "auto_user_template_nlg.json")) self.auto_system_template = read_json( os.path.join(template_dir, "auto_system_template_nlg.json")) # single-intent self.manual_user_template = read_json( os.path.join(template_dir, "manual_user_template_nlg.json")) self.manual_system_template = read_json( os.path.join(template_dir, "manual_system_template_nlg.json"))
def main(): model_config_name = "policy/mle/train.json" common_config_name = "policy/mle/common.json" data_urls = { "sys_da_voc.json": "http://qiw2jpwfc.hn-bkt.clouddn.com/usr_da_voc.json", "usr_da_voc.json": "http://qiw2jpwfc.hn-bkt.clouddn.com/usr_da_voc.json", } # load config root_path = get_root_path() common_config_path = os.path.join(get_config_path(), common_config_name) model_config_path = os.path.join(get_config_path(), model_config_name) common_config = json.load(open(common_config_path)) model_config = json.load(open(model_config_path)) model_config.update(common_config) model_config["n_gpus"] = torch.cuda.device_count() model_config["batch_size"] = (max(1, model_config["n_gpus"]) * model_config["batch_size"]) model_config["device"] = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") model_config["data_path"] = os.path.join(get_data_path(), "crosswoz/policy_mle_data") model_config["raw_data_path"] = os.path.join(get_data_path(), "crosswoz/raw") model_config["output_dir"] = os.path.join(root_path, model_config["output_dir"]) if model_config["load_model_name"]: model_config["model_path"] = os.path.join( model_config["output_dir"], model_config["load_model_name"]) else: model_config["model_path"] = "" if not os.path.exists(model_config["data_path"]): os.makedirs(model_config["data_path"]) if not os.path.exists(model_config["output_dir"]): os.makedirs(model_config["output_dir"]) # download data for data_key, url in data_urls.items(): dst = os.path.join(model_config["data_path"], data_key) file_name = data_key.split(".")[0] model_config[file_name] = dst if not os.path.exists(dst): download_from_url(url, dst) print(f">>> Train configs:") print("\t", model_config) set_seed(model_config["random_seed"]) agent = Trainer(model_config) # 训练 if model_config["do_train"]: start_epoch = (0 if not model_config["model_path"] else int(model_config["model_path"].split("-")[2]) + 1) best = float("inf") for epoch in tqdm(range(start_epoch, model_config["num_epochs"]), desc="Epoch"): agent.imitating(epoch) best = agent.imit_eval(epoch, best) agent.calc_metrics()
root_path = get_root_path() config_file = os.path.join(get_config_path(), IntentWithBertPredictor.default_model_config) config = json.load(open(config_file)) data_dir = os.path.join(get_data_path(), 'crosswoz/nlu_intent_data/') output_dir = config['output_dir'] output_dir = os.path.join(root_path, output_dir) log_dir = config['log_dir'] log_dir = os.path.join(root_path, log_dir) device = config['DEVICE'] # download data for data_key, url in data_urls.items(): dst = os.path.join(os.path.join(data_dir, data_key)) if not os.path.exists(dst): download_from_url(url, dst) set_seed(config['seed']) intent_vocab = json.load(open(os.path.join(data_dir, 'intent_vocab.json'))) dataloader = Dataloader( intent_vocab=intent_vocab, pretrained_weights=config['model']['pretrained_weights']) for data_key in ['val', 'test']: dataloader.load_data(json.load( open(os.path.join(data_dir, 'intent_{}_data.json'.format(data_key)))), data_key, cut_sen_len=0, use_bert_tokenizer=config['use_bert_tokenizer']) print('{} set size: {}'.format(data_key,
def load_from_net(url): download_from_url(url, DEFAULT_MODEL_DST)
def main(): model_config_name = "dst/trade/train.json" common_config_name = "dst/trade/common.json" data_urls = { "train_dials.json": "http://qiw2jpwfc.hn-bkt.clouddn.com/train_dials.json", "dev_dials.json": "http://qiw2jpwfc.hn-bkt.clouddn.com/dev_dials.json", "test_dials.json": "http://qiw2jpwfc.hn-bkt.clouddn.com/test_dials.json", "ontology.json": "http://qiw2jpwfc.hn-bkt.clouddn.com/ontology.json", "sgns.wiki.bigram.bz2": "http://qiw2jpwfc.hn-bkt.clouddn.com/sgns.wiki.bigram.bz2", } # load config root_path = get_root_path() common_config_path = os.path.join(get_config_path(), common_config_name) model_config_path = os.path.join(get_config_path(), model_config_name) common_config = json.load(open(common_config_path)) model_config = json.load(open(model_config_path)) model_config.update(common_config) model_config["n_gpus"] = torch.cuda.device_count() model_config["batch_size"] = (max(1, model_config["n_gpus"]) * model_config["batch_size"]) model_config["device"] = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") if model_config["load_embedding"]: model_config["hidden_size"] = 300 model_config["data_path"] = os.path.join(get_data_path(), "crosswoz/dst_trade_data") model_config["output_dir"] = os.path.join( root_path, model_config["output_dir"]) # 可以用来保存模型文件 if model_config["load_model_name"]: model_config["model_path"] = os.path.join( model_config["output_dir"], model_config["load_model_name"]) else: model_config["model_path"] = "" if not os.path.exists(model_config["data_path"]): os.makedirs(model_config["data_path"]) if not os.path.exists(model_config["output_dir"]): os.makedirs(model_config["output_dir"]) # download data for data_key, url in data_urls.items(): dst = os.path.join(model_config["data_path"], data_key) if "_" in data_key: file_name = data_key.split(".")[0] elif "wiki.bigram" in data_key: file_name = "orig_pretrained_embedding" else: file_name = data_key.split(".")[0] # ontology model_config[file_name] = dst if not os.path.exists(dst): download_from_url(url, dst) avg_best, cnt, acc = 0.0, 0, 0.0 # 数据预处理 train, dev, test, langs, slots, gating_dict = prepare_data_seq( model_config) lang = langs[0] model_config["pretrained_embedding_path"] = os.path.join( model_config["data_path"], f"emb{len(lang.index2word)}") print(f">>> Train configs:") print("\t", model_config) # 初始化训练 trainer = Trainer(config=model_config, langs=langs, gating_dict=gating_dict, slots=slots) # 训练 start_epoch = (0 if not model_config["model_path"] else int(model_config["model_path"].split("-")[2]) + 1) for epoch in tqdm(range(start_epoch, model_config["num_epochs"]), desc="Epoch"): progress_bar = tqdm(enumerate(train), total=len(train)) for i, data in progress_bar: trainer.train_batch(data, slots, reset=(i == 0)) trainer.optimize(int(model_config["grad_clip"])) progress_bar.set_description(trainer.print_loss()) if (epoch + 1) % int(model_config["eval_steps"]) == 0: acc = trainer.evaluate(dev, avg_best, slots, epoch, model_config["early_stop"]) trainer.scheduler.step(acc) if acc >= avg_best: avg_best = acc cnt = 0 else: cnt += 1 if cnt == model_config["patience"] or ( acc == 1.0 and model_config["early_stop"] is None): print("Ran out of patient, early stop...") break
def main(): model_config_name = 'dst/trade/train.json' common_config_name = 'dst/trade/common.json' data_urls = { 'train_dials.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/train_dials.json', 'dev_dials.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/dev_dials.json', 'test_dials.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/test_dials.json', 'ontology.json': 'http://qiw2jpwfc.hn-bkt.clouddn.com/ontology.json', 'sgns.wiki.bigram.bz2': 'http://qiw2jpwfc.hn-bkt.clouddn.com/sgns.wiki.bigram.bz2' } # load config root_path = get_root_path() common_config_path = os.path.join(get_config_path(), common_config_name) model_config_path = os.path.join(get_config_path(), model_config_name) common_config = json.load(open(common_config_path)) model_config = json.load(open(model_config_path)) model_config.update(common_config) model_config['n_gpus'] = torch.cuda.device_count() model_config['batch_size'] = max( 1, model_config['n_gpus']) * model_config['batch_size'] model_config['device'] = torch.device( 'cuda:0' if torch.cuda.is_available() else 'cpu') if model_config['load_embedding']: model_config['hidden_size'] = 300 model_config['data_path'] = os.path.join(get_data_path(), 'crosswoz/dst_trade_data') model_config['output_dir'] = os.path.join( root_path, model_config['output_dir']) # 可以用来保存模型文件 if model_config['load_model_name']: model_config['model_path'] = os.path.join( model_config['output_dir'], model_config['load_model_name']) else: model_config['model_path'] = '' if not os.path.exists(model_config['data_path']): os.makedirs(model_config['data_path']) if not os.path.exists(model_config['output_dir']): os.makedirs(model_config['output_dir']) # download data for data_key, url in data_urls.items(): dst = os.path.join(model_config['data_path'], data_key) if '_' in data_key: file_name = data_key.split('.')[0] elif 'wiki.bigram' in data_key: file_name = 'orig_pretrained_embedding' else: file_name = data_key.split('.')[0] # ontology model_config[file_name] = dst if not os.path.exists(dst): download_from_url(url, dst) avg_best, cnt, acc = 0.0, 0, 0.0 # 数据预处理 train, dev, test, langs, slots, gating_dict = prepare_data_seq( model_config) lang = langs[0] model_config['pretrained_embedding_path'] = os.path.join( model_config['data_path'], f'emb{len(lang.index2word)}') print(f'>>> Train configs:') print('\t', model_config) # 初始化训练 trainer = Trainer(config=model_config, langs=langs, gating_dict=gating_dict, slots=slots) # 训练 start_epoch = 0 if not model_config['model_path'] else int( model_config['model_path'].split('-')[2]) + 1 for epoch in tqdm(range(start_epoch, model_config['num_epochs']), desc='Epoch'): progress_bar = tqdm(enumerate(train), total=len(train)) for i, data in progress_bar: trainer.train_batch(data, slots, reset=(i == 0)) trainer.optimize(int(model_config['grad_clip'])) progress_bar.set_description(trainer.print_loss()) if (epoch + 1) % int(model_config['eval_steps']) == 0: acc = trainer.evaluate(dev, avg_best, slots, epoch, model_config['early_stop']) trainer.scheduler.step(acc) if acc >= avg_best: avg_best = acc cnt = 0 else: cnt += 1 if cnt == model_config["patience"] or ( acc == 1.0 and model_config['early_stop'] is None): print("Ran out of patient, early stop...") break
if slot_value != 'none': predict_belief.append((self.all_slots[i], slot_value)) self.update_belief_state(predict_belief) if __name__ == '__main__': import random dst_model = TradeDST() data_path = os.path.join(get_data_path(), 'crosswoz/dst_trade_data') dials_path = os.path.join(data_path, 'dev_dials.json') # download dials file if not os.path.exists(dials_path): download_from_url('http://qiw2jpwfc.hn-bkt.clouddn.com/dev_dials.json', dials_path) with open(os.path.join(data_path, 'dev_dials.json'), 'r', encoding='utf8') as f: dials = json.load(f) example = random.choice(dials) break_turn = 0 for ti, turn in enumerate(example['dialogue']): dst_model.state['history'].append(('sys', turn['system_transcript'])) dst_model.state['history'].append(('usr', turn['transcript'])) if random.random() < 0.5: break_turn = ti + 1 break if break_turn == len(example['dialogue']): print('对话已完成,请重新开始测试') print('对话状态更新前:') print(json.dumps(dst_model.state, indent=2, ensure_ascii=False)) dst_model.update('')
if slot_value != "none": predict_belief.append((self.all_slots[i], slot_value)) self.update_belief_state(predict_belief) if __name__ == "__main__": import random dst_model = TradeDST() data_path = os.path.join(get_data_path(), "crosswoz/dst_trade_data") dials_path = os.path.join(data_path, "dev_dials.json") # download dials file if not os.path.exists(dials_path): download_from_url("http://xbot.bslience.cn/dev_dials.json", dials_path) with open(os.path.join(data_path, "dev_dials.json"), "r", encoding="utf8") as f: dials = json.load(f) example = random.choice(dials) break_turn = 0 for ti, turn in enumerate(example["dialogue"]): dst_model.state["history"].append( ("sys", turn["system_transcript"])) dst_model.state["history"].append(("usr", turn["transcript"])) if random.random() < 0.5: break_turn = ti + 1 break if break_turn == len(example["dialogue"]): print("对话已完成,请重新开始测试")
root_path = get_root_path() config_path = os.path.join(get_config_path(), 'crosswoz_all_context_nlu_intent.json') config = json.load(open(config_path)) data_path = os.path.join(get_data_path(), 'crosswoz/nlu_intent_data/') output_dir = config['output_dir'] output_dir = os.path.join(root_path, output_dir) log_dir = config['log_dir'] log_dir = os.path.join(root_path, log_dir) device = config['DEVICE'] # download data for data_key, url in data_urls.items(): dst = os.path.join(os.path.join(data_path, data_key)) if not os.path.exists(dst): download_from_url(url, dst) # seed set_seed(config['seed']) # load intent vocabulary and dataloader intent_vocab = json.load( open(os.path.join(data_path, 'intent_vocab.json'), encoding="utf-8")) dataloader = Dataloader( intent_vocab=intent_vocab, pretrained_weights=config['model']['pretrained_weights']) # load data for data_key in ['train', 'val', 'test']: dataloader.load_data(json.load( open(os.path.join(data_path,
def __init__(self): super(TradeDST, self).__init__() # load config common_config_path = os.path.join(get_config_path(), TradeDST.common_config_name) common_config = json.load(open(common_config_path)) model_config_path = os.path.join(get_config_path(), TradeDST.model_config_name) model_config = json.load(open(model_config_path)) model_config.update(common_config) self.model_config = model_config self.model_config["data_path"] = os.path.join( get_data_path(), "crosswoz/dst_trade_data") self.model_config["n_gpus"] = (0 if self.model_config["device"] == "cpu" else torch.cuda.device_count()) self.model_config["device"] = torch.device(self.model_config["device"]) if model_config["load_embedding"]: model_config["hidden_size"] = 300 # download data for model_key, url in TradeDST.model_urls.items(): dst = os.path.join(self.model_config["data_path"], model_key) if model_key.endswith("pth"): file_name = "trained_model_path" elif model_key.endswith("pkl"): file_name = model_key.rsplit("-", maxsplit=1)[0] else: file_name = model_key.split(".")[0] # ontology self.model_config[file_name] = dst if not os.path.exists(dst) or not self.model_config["use_cache"]: download_from_url(url, dst) # load date & model ontology = json.load( open(self.model_config["ontology"], "r", encoding="utf8")) self.all_slots = get_slot_information(ontology) self.gate2id = {"ptr": 0, "none": 1} self.id2gate = {id_: gate for gate, id_ in self.gate2id.items()} self.lang = pickle.load(open(self.model_config["lang"], "rb")) self.mem_lang = pickle.load(open(self.model_config["mem-lang"], "rb")) model = Trade( lang=self.lang, vocab_size=len(self.lang.index2word), hidden_size=self.model_config["hidden_size"], dropout=self.model_config["dropout"], num_encoder_layers=self.model_config["num_encoder_layers"], num_decoder_layers=self.model_config["num_decoder_layers"], pad_id=self.model_config["pad_id"], slots=self.all_slots, num_gates=len(self.gate2id), unk_mask=self.model_config["unk_mask"], ) model.load_state_dict( torch.load(self.model_config["trained_model_path"])) self.model = model.to(self.model_config["device"]).eval() print(f'>>> {self.model_config["trained_model_path"]} loaded ...') self.state = default_state() print(">>> State initialized ...")