def run_test(config): # tokenizers tokenizer = ModBertTokenizer('base', cache_dir=config.cache_dir) label_token_dict = { f"label_{label_idx}_token": label for label_idx, label in enumerate(config.joint_da_seg_recog_labels) } label_token_dict.update({ "pad_token": "<pad>", "bos_token": "<t>", "eos_token": "</t>" }) label_tokenizer = CustomizedTokenizer(token_dict=label_token_dict) # metrics calculator metrics = DAMetrics()
def run_test(config): # tokenizers LOG_FILE_NAME = "debug.log" tokenizer = ModBertTokenizer('base', cache_dir=config.cache_dir) label_token_dict = { "pad_token": "<pad>", "bos_token": "<t>", "eos_token": "</t>", } label_token_dict.update({ f"label_{label_idx}_token": label for label_idx, label in enumerate(config.joint_da_seg_recog_labels) }) label_tokenizer = CustomizedTokenizer(token_dict=label_token_dict) # metrics calculator metrics = DAMetrics() model = SpeechTransformerLabeler(config, tokenizer, label_tokenizer, freeze=config.freeze) model.load_model(config.model_path) print(f"model path: {config.model_path}") model.eval() for set_name in ["dev", "test"]: data_source = SpeechXTSource(split=set_name, config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(data_source.statistics), config, LOG_FILE_NAME) current_score, metrics_results, split_loss = eval_split( model, data_source, set_name, config, label_tokenizer, metrics, LOG_FILE_NAME, write_pred=True) print("Split loss: ", split_loss) diff = (metrics_results['Macro F1'] - metrics_results['DER']) * 100 lazy_s = f"DSER, DER, F1, LWER:\n {100*metrics_results['DSER']}\t{100*metrics_results['DER']}\t{100*metrics_results['Macro F1']}\t{diff}\t{100*metrics_results['Macro LWER']}\n" mlog(lazy_s, config, LOG_FILE_NAME)
mlog(str(train_data_source.statistics)) mlog("----- Loading dev data -----") dev_data_source = DataSource(data=dataset["dev"], config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(dev_data_source.statistics)) mlog("----- Loading test data -----") test_data_source = DataSource(data=dataset["test"], config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(test_data_source.statistics)) # metrics calculator metrics = DAMetrics() # build model if config.model == "ed": Model = EDSeqLabeler elif config.model == "attn_ed": Model = AttnEDSeqLabeler model = Model(config, tokenizer, label_tokenizer) # model adaption if torch.cuda.is_available(): mlog("----- Using GPU -----") model = model.cuda() if config.model_path: model.load_model(config.model_path) mlog("----- Model loaded -----")
def run_train(config): # tokenizers tokenizer = ModBertTokenizer('base', cache_dir=config.cache_dir) label_token_dict = { "pad_token": "<pad>", "bos_token": "<t>", "eos_token": "</t>", } label_token_dict.update({ f"label_{label_idx}_token": label for label_idx, label in enumerate(config.joint_da_seg_recog_labels) }) label_tokenizer = CustomizedTokenizer(token_dict=label_token_dict) # metrics calculator metrics = DAMetrics() # define logger MODEL_NAME = config.model LOG_FILE_NAME = "{}.seed_{}.{}".format( MODEL_NAME, config.seed, time.strftime("%Y%m%d-%H%M%S", time.localtime())[-6:]) if config.filename_note: LOG_FILE_NAME += f".{config.filename_note}" experiment.set_name(config.filename_note) experiment.log_text(LOG_FILE_NAME) # data loaders & number reporters trn_reporter = StatisticsReporter() mlog("----- Loading dev data -----", config, LOG_FILE_NAME) dev_data_source = SpeechXTSource(split="dev", config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(dev_data_source.statistics), config, LOG_FILE_NAME) mlog("----- Loading training data -----", config, LOG_FILE_NAME) if config.debug: train_data_source = dev_data_source else: train_data_source = SpeechXTSource(split="train", config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(train_data_source.statistics), config, LOG_FILE_NAME) # build model if config.model == 'speech_xt': model = SpeechTransformerLabeler(config, tokenizer, label_tokenizer, freeze=config.freeze) elif config.model == 'speech_bl': model = SpeechBaselineLabeler(config, tokenizer, label_tokenizer, freeze=config.freeze) else: print("No model specified, exiting") exit(0) # model adaption if torch.cuda.is_available(): mlog("----- Using GPU -----", config, LOG_FILE_NAME) model = model.cuda() if config.model_path: model.load_model(config.model_path) mlog("----- Model loaded -----", config, LOG_FILE_NAME) mlog(f"model path: {config.model_path}", config, LOG_FILE_NAME) this_model_path = f"{config.model_save_path}/model" # Build optimizer trainable_parameters = [ param for param in model.named_parameters() if param[1].requires_grad ] total_params_count = sum([x[1].numel() for x in trainable_parameters]) print("Total params count: ", total_params_count) warmup_steps = math.ceil(train_data_source.statistics['n_turns'] * config.n_epochs / config.batch_size * 0.1) #10% of train data for warm-up # additional steps because of different loading schemes t_total = math.ceil(1.5 * train_data_source.statistics['n_turns'] * config.n_epochs / config.batch_size) no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in trainable_parameters if not any(nd in n for nd in no_decay) ], 'weight_decay': config.lr_decay_rate }, { 'params': [ p for n, p in trainable_parameters if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] optimizer = transformers.AdamW( optimizer_grouped_parameters, lr=config.init_lr, weight_decay=config.lr_decay_rate, correct_bias=False, ) # Build lr scheduler #lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( # optimizer=optimizer, # mode="min", # factor=config.lr_decay_rate, # patience=2, #) print("warmup/total steps:", warmup_steps, t_total) lr_scheduler = transformers.get_linear_schedule_with_warmup( optimizer, num_warmup_steps=warmup_steps, num_training_steps=t_total) # log hyper parameters start_time = time.time() mlog("----- Hyper-parameters -----", config, LOG_FILE_NAME) for k, v in sorted(dict(config.__dict__).items()): mlog("{}: {}".format(k, v), config, LOG_FILE_NAME) for name, param in model.named_parameters(): mlog( "{}: {}; Grad: {}".format(name, param.size(), param.requires_grad), config, LOG_FILE_NAME) # data stats for batching train_dialogs_keys = train_data_source.dialog_keys shuffle_dialogs = train_dialogs_keys[:] # TRAIN n_step = 0 best_score = -9999 best_loss = np.inf for epoch in range(1, config.n_epochs + 1): #lr = list(lr_scheduler.optimizer.param_groups)[0]["lr"] #if lr <= config.min_lr: # break lr = lr_scheduler.get_last_lr() random.shuffle(shuffle_dialogs) n_batch = 0 for dialog_idx in shuffle_dialogs: if config.frame_features: dialog_frames = train_data_source.load_frames(dialog_idx) else: dialog_frames = [] dialog_length = train_data_source.get_dialog_length(dialog_idx) turn_keys = list(range(dialog_length)) random.shuffle(turn_keys) #if config.debug and n_step > 30: # break for offset in range(0, dialog_length, config.batch_size): model.zero_grad() model.train() turn_idx = turn_keys[offset:offset + config.batch_size] batch_data = train_data_source.get_batch_features( dialog_idx, dialog_frames, turn_idx) # Forward ret_data, ret_stat = model.train_step(batch_data) # Backward loss = ret_data["loss"] loss.backward() if config.gradient_clip > 0.0: torch.nn.utils.clip_grad_norm_(model.parameters(), config.gradient_clip) optimizer.step() optimizer.zero_grad() lr_scheduler.step() # update trn_reporter.update_data(ret_stat) # Check loss and Evaluate on dev dataset # Check loss if n_step > 0 and n_step % config.check_loss_after_n_step == 0: log_s = f"{time.time()-start_time:.2f}s Epoch {epoch} batch {n_batch} step {n_step} - Training loss on this batch: " log_s += trn_reporter.to_string() mlog(log_s, config, LOG_FILE_NAME) trn_reporter.clear() # evaluate if n_step > 0 and n_step % config.validate_after_n_step == 0: model.eval() log_s = f"<Dev> learning rate: {lr}\n" mlog(log_s, config, LOG_FILE_NAME) current_score, metrics_results, split_loss \ = eval_split(model, dev_data_source, "dev", config, label_tokenizer, metrics, LOG_FILE_NAME, write_pred=False) print("Split loss & best loss ", split_loss, best_loss) print("Split score & best score ", current_score, best_score) if not config.debug: experiment.log_metrics(metrics_results) if current_score > best_score: best_score = current_score # Save model if it has better monitor measurement if split_loss < best_loss: best_loss = split_loss if config.save_model: this_model_path = f"{config.model_save_path}/model" if not os.path.exists(this_model_path): os.makedirs(this_model_path) torch.save( model.state_dict(), f"{this_model_path}/{LOG_FILE_NAME}.model.pt") torch.save( config, f"{this_model_path}/{LOG_FILE_NAME}.config") mlog( f"model saved to {this_model_path}/{LOG_FILE_NAME}.model.pt", config, LOG_FILE_NAME) # Finished a step n_batch += 1 n_step += 1 mlog("----- EVALUATING at end of epoch -----", config, LOG_FILE_NAME) mlog(f"End of epoch: {epoch}", config, LOG_FILE_NAME) current_score, metrics_results, split_loss = eval_split( model, dev_data_source, "dev", config, label_tokenizer, metrics, LOG_FILE_NAME, write_pred=False) print("Split loss & best loss ", split_loss, best_loss) print("Split score & best score ", current_score, best_score) if not config.debug: experiment.log_metrics(metrics_results) if current_score > best_score: best_score = current_score if split_loss < best_loss: best_loss = split_loss if config.save_model: torch.save(model.state_dict(), f"{this_model_path}/{LOG_FILE_NAME}.model.pt") torch.save(config, f"{this_model_path}/{LOG_FILE_NAME}.config") mlog( f"model saved to {this_model_path}/{LOG_FILE_NAME}.model.pt", config, LOG_FILE_NAME) # Decay learning rate at end of epoch #lr_scheduler.step(best_loss) # Evaluate on test dataset at the end of training mlog("----- EVALUATING at end of training -----", config, LOG_FILE_NAME) mlog("----- Loading test data -----", config, LOG_FILE_NAME) test_data_source = SpeechXTSource(split='test', config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(test_data_source.statistics), config, LOG_FILE_NAME) if config.save_model: model_path = f"{this_model_path}/{LOG_FILE_NAME}.model.pt" model.load_model(model_path) print(f"model path: {model_path}") model.eval() for set_name, data_source in [("DEV", dev_data_source), ("TEST", test_data_source)]: current_score, metrics_results, split_loss = eval_split( model, data_source, set_name, config, label_tokenizer, metrics, LOG_FILE_NAME, write_pred=True) print("Split loss: ", split_loss) diff = (metrics_results['Macro F1'] - metrics_results['DER']) * 100 lazy_s = f"DSER, DER, F1, LWER:\n {100*metrics_results['DSER']}\t{100*metrics_results['DER']}\t{100*metrics_results['Macro F1']}\t{diff}\t{100*metrics_results['Macro LWER']}\n" mlog(lazy_s, config, LOG_FILE_NAME)
def run_train(config): # tokenizers tokenizer = ModBertTokenizer('base', cache_dir=config.cache_dir) label_token_dict = { f"label_{label_idx}_token": label for label_idx, label in enumerate(config.joint_da_seg_recog_labels) } label_token_dict.update({ "pad_token": "<pad>", "bos_token": "<t>", "eos_token": "</t>" }) label_tokenizer = CustomizedTokenizer(token_dict=label_token_dict) # metrics calculator metrics = DAMetrics() # define logger MODEL_NAME = config.model LOG_FILE_NAME = "{}.seed_{}.{}".format( MODEL_NAME, config.seed, time.strftime("%Y%m%d-%H%M%S", time.localtime())[-6:]) if config.filename_note: LOG_FILE_NAME += f".{config.filename_note}" experiment.set_name(config.filename_note) experiment.log_text(LOG_FILE_NAME) # data loaders & number reporters trn_reporter = StatisticsReporter() dev_reporter = StatisticsReporter() mlog("----- Loading training data -----", config, LOG_FILE_NAME) train_data_source = SpeechDataSource(split="train", config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(train_data_source.statistics), config, LOG_FILE_NAME) mlog("----- Loading dev data -----", config, LOG_FILE_NAME) dev_data_source = SpeechDataSource(split="dev", config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(dev_data_source.statistics), config, LOG_FILE_NAME) # build model if config.model == "bert_attn_ed": Model = BertAttnEDSeqLabeler elif config.model == "speech_attn_ed": Model = SpeechAttnEDSeqLabeler else: print("no model specified") exit(0) model = Model(config, tokenizer, label_tokenizer, freeze=config.freeze) # model adaption if torch.cuda.is_available(): mlog("----- Using GPU -----", config, LOG_FILE_NAME) model = model.cuda() if config.model_path: model.load_model(config.model_path) mlog("----- Model loaded -----", config, LOG_FILE_NAME) mlog(f"model path: {config.model_path}", config, LOG_FILE_NAME) trainable_parameters = [ param for param in model.parameters() if param.requires_grad ] total_params_count = sum([x.numel() for x in trainable_parameters]) print("Total params count: ", total_params_count) # Build optimizer optimizer = optim.AdamW(model.parameters(), lr=config.init_lr, weight_decay=config.l2_penalty) # Build lr scheduler lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer=optimizer, mode="min", factor=config.lr_decay_rate, patience=2, ) # log hyper parameters start_time = time.time() mlog("----- Hyper-parameters -----", config, LOG_FILE_NAME) for k, v in sorted(dict(config.__dict__).items()): mlog("{}: {}".format(k, v), config, LOG_FILE_NAME) for name, param in model.named_parameters(): mlog( "{}: {}; Grad: {}".format(name, param.size(), param.requires_grad), config, LOG_FILE_NAME) # data stats for batching train_dialogs_keys = train_data_source.dialog_keys shuffle_dialogs = train_dialogs_keys[:] # TRAIN n_step = 0 best_score = -9999 for epoch in range(1, config.n_epochs + 1): lr = list(lr_scheduler.optimizer.param_groups)[0]["lr"] if lr <= config.min_lr: break random.shuffle(shuffle_dialogs) n_batch = 0 for dialog_idx in shuffle_dialogs: if config.frame_features: dialog_frames = train_data_source.load_frames(dialog_idx) else: dialog_frames = [] dialog_length = train_data_source.get_dialog_length(dialog_idx) turn_keys = list(range(dialog_length)) random.shuffle(turn_keys) if config.debug and n_step > 30: break for offset in range(0, dialog_length, config.batch_size): model.zero_grad() model.train() turn_idx = turn_keys[offset:offset + config.batch_size] batch_data = train_data_source.get_batch_features( dialog_idx, dialog_frames, turn_idx) # Forward ret_data, ret_stat = model.train_step(batch_data) trn_reporter.update_data(ret_stat) # Backward loss = ret_data["loss"] loss.backward() if config.gradient_clip > 0.0: torch.nn.utils.clip_grad_norm_(model.parameters(), config.gradient_clip) optimizer.step() optimizer.zero_grad() # update trn_reporter.update_data(ret_stat) # Check loss and Evaluate on dev dataset # Check loss if n_step > 0 and n_step % config.check_loss_after_n_step == 0: log_s = f"{time.time()-start_time:.2f}s Epoch {epoch} batch {n_batch} - " log_s += trn_reporter.to_string() mlog(log_s, config, LOG_FILE_NAME) trn_reporter.clear() # evaluate if n_step > 0 and n_step % config.validate_after_n_step == 0: model.eval() log_s = f"<Dev> learning rate: {lr}\n" mlog(log_s, config, LOG_FILE_NAME) current_score, metrics_results, dev_reporter = eval_split( model, dev_data_source, "dev", config, label_tokenizer, metrics, LOG_FILE_NAME, dev_reporter=dev_reporter, write_pred=False) if not config.debug: experiment.log_metrics(metrics_results) # Save model if it has better monitor measurement if current_score > best_score: best_score = current_score if config.save_model: this_model_path = f"{config.model_save_path}/model" if not os.path.exists(this_model_path): os.makedirs(this_model_path) torch.save( model.state_dict(), f"{this_model_path}/{LOG_FILE_NAME}.model.pt") mlog( f"model saved to {this_model_path}/{LOG_FILE_NAME}.model.pt", config, LOG_FILE_NAME) #if torch.cuda.is_available(): # model = model.cuda() # Decay learning rate lr_scheduler.step(dev_reporter.get_value("monitor")) dev_reporter.clear() # Finished a step n_batch += 1 n_step += 1 # Evaluate on test dataset at the end of training mlog("----- EVALUATING at end of training -----", config, LOG_FILE_NAME) mlog("----- Loading test data -----", config, LOG_FILE_NAME) test_data_source = SpeechDataSource(split='test', config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(test_data_source.statistics), config, LOG_FILE_NAME) if config.save_model: model_path = f"{this_model_path}/{LOG_FILE_NAME}.model.pt" model.load_model(model_path) print(f"model path: {model_path}") model.eval() #if config.debug: # exit(0) for set_name, data_source in [("DEV", dev_data_source), ("TEST", test_data_source)]: current_score, metrics_results, dev_reporter = eval_split( model, data_source, set_name, config, label_tokenizer, metrics, LOG_FILE_NAME, dev_reporter=None, write_pred=True) lazy_s = f"DSER, DER, F1, LWER:\n {100*metrics_results['DSER']}\t{100*metrics_results['DER']}\t{100*metrics_results['Macro F1']}\t\t{100*metrics_results['Macro LWER']}\n" mlog(lazy_s, config, LOG_FILE_NAME)
def run_pred(config): # tokenizers tokenizer = ModBertTokenizer('base', cache_dir=config.cache_dir) if config.model in ["bert_attn_ed", "speech_attn_ed"]: label_token_dict = { f"label_{label_idx}_token": label for label_idx, label in enumerate(config.joint_da_seg_recog_labels) } label_token_dict.update({ "pad_token": "<pad>", "bos_token": "<t>", "eos_token": "</t>" }) label_tokenizer = CustomizedTokenizer(token_dict=label_token_dict) # data loaders & number reporters print("----- Loading dev data -----") dev_data_source = SpeechDataSource(split="dev", config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) print(str(dev_data_source.statistics)) print("----- Loading test data -----") test_data_source = SpeechDataSource(split="test", config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) print(str(test_data_source.statistics)) elif config.model in ["speech_xt", "speech_bl"]: label_token_dict = { "pad_token": "<pad>", "bos_token": "<t>", "eos_token": "</t>" } label_token_dict.update({ f"label_{label_idx}_token": label for label_idx, label in enumerate(config.joint_da_seg_recog_labels) }) label_tokenizer = CustomizedTokenizer(token_dict=label_token_dict) # data loaders & number reporters print("----- Loading dev data -----") dev_data_source = SpeechXTSource(split="dev", config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) print(str(dev_data_source.statistics)) print("----- Loading test data -----") test_data_source = SpeechXTSource(split="test", config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) print(str(test_data_source.statistics)) else: print("Invalid model") exit(0) # metrics calculator metrics = DAMetrics() # set up model if config.model == "bert_attn_ed": Model = BertAttnEDSeqLabeler elif config.model == "speech_attn_ed": Model = SpeechAttnEDSeqLabeler elif config.model == "speech_xt": Model = SpeechTransformerLabeler elif config.model == "speech_bl": Model = SpeechBaselineLabeler else: print("no model specified") exit(0) model = Model(config, tokenizer, label_tokenizer, freeze=config.freeze) # model adaption if torch.cuda.is_available(): print("----- Using GPU -----") model = model.cuda() model_name = config.model_name + ".model.pt" model_path = os.path.join(model_dir, model_name) model.load_model(model_path) print(f"model path: {model_path}") for set_name, data_source in [("DEV", dev_data_source), ("TEST", test_data_source)]: output = eval_split(model, data_source, set_name, config, tokenizer, label_tokenizer) print(f"Written to {output}")
def run_train(config): # tokenizers special_token_dict = { "speaker1_token": "<speaker1>", "speaker2_token": "<speaker2>" } tokenizer = WhiteSpaceTokenizer(word_count_path=config.word_count_path, vocab_size=config.vocab_size, special_token_dict=special_token_dict) label_token_dict = { f"label_{label_idx}_token": label for label_idx, label in enumerate(config.joint_da_seg_recog_labels) } label_token_dict.update({ "pad_token": "<pad>", "bos_token": "<t>", "eos_token": "</t>" }) label_tokenizer = CustomizedTokenizer(token_dict=label_token_dict) # metrics calculator metrics = DAMetrics() # define logger MODEL_NAME = config.model LOG_FILE_NAME = "{}.seed_{}.{}".format( MODEL_NAME, config.seed, time.strftime("%Y%m%d-%H%M%S", time.localtime())) if config.filename_note: LOG_FILE_NAME += f".{config.filename_note}" # data loaders & number reporters trn_reporter = StatisticsReporter() dev_reporter = StatisticsReporter() with open(config.dataset_path, encoding="utf-8") as f: dataset = json.load(f) mlog("----- Loading training data -----", config, LOG_FILE_NAME) train_data_source = DataSource(data=dataset["train"], config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(train_data_source.statistics), config, LOG_FILE_NAME) mlog("----- Loading dev data -----", config, LOG_FILE_NAME) dev_data_source = DataSource(data=dataset["dev"], config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(dev_data_source.statistics), config, LOG_FILE_NAME) # build model if config.model == "ed": Model = EDSeqLabeler elif config.model == "attn_ed": Model = AttnEDSeqLabeler model = Model(config, tokenizer, label_tokenizer) # model adaption if torch.cuda.is_available(): mlog("----- Using GPU -----", config, LOG_FILE_NAME) model = model.cuda() if config.model_path: model.load_model(config.model_path) mlog("----- Model loaded -----", config, LOG_FILE_NAME) mlog(f"model path: {config.model_path}", config, LOG_FILE_NAME) # Build optimizer optimizer = optim.AdamW(model.parameters(), lr=config.init_lr, weight_decay=config.l2_penalty) # Build lr scheduler lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer=optimizer, mode="min", factor=config.lr_decay_rate, patience=2, ) # log hyper parameters start_time = time.time() mlog("----- Hyper-parameters -----", config, LOG_FILE_NAME) for k, v in sorted(dict(config.__dict__).items()): mlog("{}: {}".format(k, v), config, LOG_FILE_NAME) # here we go n_step = 0 for epoch in range(1, config.n_epochs + 1): lr = list(lr_scheduler.optimizer.param_groups)[0]["lr"] if lr <= config.min_lr: break # Train n_batch = 0 train_data_source.epoch_init(shuffle=True) while True: batch_data = train_data_source.next(config.batch_size) if batch_data is None: break # Forward model.train() ret_data, ret_stat = model.train_step(batch_data) trn_reporter.update_data(ret_stat) # Backward loss = ret_data["loss"] loss.backward() if config.gradient_clip > 0.0: torch.nn.utils.clip_grad_norm_(model.parameters(), config.gradient_clip) optimizer.step() optimizer.zero_grad() # update trn_reporter.update_data(ret_stat) # Check loss if n_step > 0 and n_step % config.check_loss_after_n_step == 0: log_s = f"{time.time()-start_time:.2f}s Epoch {epoch} batch {n_batch} - " log_s += trn_reporter.to_string() mlog(log_s, config, LOG_FILE_NAME) trn_reporter.clear() # Evaluate on dev dataset if n_step > 0 and n_step % config.validate_after_n_step == 0: model.eval() log_s = f"<Dev> learning rate: {lr}\n" mlog(log_s, config, LOG_FILE_NAME) pred_labels, true_labels = [], [] dev_data_source.epoch_init(shuffle=False) while True: batch_data = dev_data_source.next(config.eval_batch_size) if batch_data is None: break ret_data, ret_stat = model.evaluate_step(batch_data) dev_reporter.update_data(ret_stat) ret_data, ret_stat = model.test_step(batch_data) refs = batch_data["Y"][:, 1:].tolist() hyps = ret_data["symbols"].tolist() for true_label_ids, pred_label_ids in zip(refs, hyps): end_idx = true_label_ids.index( label_tokenizer.eos_token_id) true_labels.append([ label_tokenizer.id2word[label_id] for label_id in true_label_ids[:end_idx] ]) pred_labels.append([ label_tokenizer.id2word[label_id] for label_id in pred_label_ids[:end_idx] ]) log_s = f"\n<Dev> - {time.time()-start_time:.3f}s - " log_s += dev_reporter.to_string() mlog(log_s, config, LOG_FILE_NAME) metrics_results = metrics.batch_metrics( true_labels, pred_labels) experiment.log_metrics(metrics_results) log_s = \ f"\tDSER: {100*metrics_results['DSER']:.2f}\n" \ f"\tseg WER: {100*metrics_results['strict segmentation error']:.2f}\n" \ f"\tDER: {100*metrics_results['DER']:.2f}\n" \ f"\tjoint WER: {100*metrics_results['strict joint error']:.2f}\n" \ f"\tMacro F1: {100*metrics_results['Macro F1']:.2f}\n" \ f"\tMicro F1: {100*metrics_results['Micro F1']:.2f}\n" \ f"\tMacro LWER: {100*metrics_results['Macro LWER']:.2f}\n" \ f"\tMicro LWER: {100*metrics_results['Micro LWER']:.2f}\n" mlog(log_s, config, LOG_FILE_NAME) # Save model if it has better monitor measurement if config.save_model: if not os.path.exists(f"{config.task_data_dir}/model/"): os.makedirs(f"{config.task_data_dir}/model/") torch.save( model.state_dict(), f"{config.task_data_dir}/model/{LOG_FILE_NAME}.model.pt" ) mlog( f"model saved to {config.task_data_dir}/model/{LOG_FILE_NAME}.model.pt", config, LOG_FILE_NAME) if torch.cuda.is_available(): model = model.cuda() # Decay learning rate lr_scheduler.step(dev_reporter.get_value("monitor")) dev_reporter.clear() # Finished a step n_batch += 1 n_step += 1 # Evaluate on test dataset at the end of training mlog("----- EVALUATING at end of training -----", config, LOG_FILE_NAME) mlog("----- Loading test data -----", config, LOG_FILE_NAME) test_data_source = DataSource(data=dataset["test"], config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(test_data_source.statistics), config, LOG_FILE_NAME) model.eval() for set_name, data_source in [("DEV", dev_data_source), ("TEST", test_data_source)]: pred_labels, true_labels = [], [] data_source.epoch_init(shuffle=False) RES_FILE_NAME = set_name + "_" + LOG_FILE_NAME s = "LABELS\tPREDS" reslog(s, RES_FILE_NAME) while True: batch_data = data_source.next(config.eval_batch_size) if batch_data is None: break ret_data, ret_stat = model.test_step(batch_data) refs = batch_data["Y"][:, 1:].tolist() hyps = ret_data["symbols"].tolist() for true_label_ids, pred_label_ids in zip(refs, hyps): end_idx = true_label_ids.index(label_tokenizer.eos_token_id) true_syms = [ label_tokenizer.id2word[label_id] for label_id in true_label_ids[:end_idx] ] pred_syms = [ label_tokenizer.id2word[label_id] for label_id in pred_label_ids[:end_idx] ] s = " ".join(true_syms) + "\t" + " ".join(pred_syms) reslog(s, RES_FILE_NAME) true_labels.append(true_syms) pred_labels.append(pred_syms) log_s = f"\n<{set_name}> - {time.time()-start_time:.3f}s - " mlog(log_s, config, LOG_FILE_NAME) metrics_results = metrics.batch_metrics(true_labels, pred_labels) log_s = \ f"\tDSER: {100*metrics_results['DSER']:.2f}\n" \ f"\tseg WER: {100*metrics_results['strict segmentation error']:.2f}\n" \ f"\tDER: {100*metrics_results['DER']:.2f}\n" \ f"\tjoint WER: {100*metrics_results['strict joint error']:.2f}\n" \ f"\tMacro F1: {100*metrics_results['Macro F1']:.2f}\n" \ f"\tMicro F1: {100*metrics_results['Micro F1']:.2f}\n" \ f"\tMacro LWER: {100*metrics_results['Macro LWER']:.2f}\n" \ f"\tMicro LWER: {100*metrics_results['Micro LWER']:.2f}\n" mlog(log_s, config, LOG_FILE_NAME)
def run_test(config): # tokenizers special_token_dict = { "speaker1_token": "<speaker1>", "speaker2_token": "<speaker2>" } tokenizer = WhiteSpaceTokenizer(word_count_path=config.word_count_path, vocab_size=config.vocab_size, special_token_dict=special_token_dict) label_token_dict = { f"label_{label_idx}_token": label for label_idx, label in enumerate(config.joint_da_seg_recog_labels) } label_token_dict.update({ "pad_token": "<pad>", "bos_token": "<t>", "eos_token": "</t>" }) label_tokenizer = CustomizedTokenizer(token_dict=label_token_dict) # metrics calculator metrics = DAMetrics() mlog("----- Loading dev data -----", config, config.LOG_FILE_NAME) dev_data_source = DataSource(data=dataset["dev"], config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(dev_data_source.statistics), config, config.LOG_FILE_NAME) # build model if config.model == "ed": Model = EDSeqLabeler elif config.model == "attn_ed": Model = AttnEDSeqLabeler model = Model(config, tokenizer, label_tokenizer) # model adaption if torch.cuda.is_available(): mlog("----- Using GPU -----", config, config.LOG_FILE_NAME) model = model.cuda() if not config.model_path: print("NEED TO PROVIDE PATH") exit(0) model.load_model(config.model_path) mlog("----- Model loaded -----", config, config.LOG_FILE_NAME) mlog(f"model path: {config.model_path}", config, config.LOG_FILE_NAME) mlog("----- Loading test data -----", config, config.LOG_FILE_NAME) test_data_source = DataSource(data=dataset["test"], config=config, tokenizer=tokenizer, label_tokenizer=label_tokenizer) mlog(str(test_data_source.statistics), config, config.LOG_FILE_NAME) model.eval() for set_name, data_source in [("DEV", dev_data_source), ("TEST", test_data_source)]: pred_labels, true_labels = [], [] data_source.epoch_init(shuffle=False) RES_FILE_NAME = set_name + "_" + config.LOG_FILE_NAME s = "LABELS\tPREDS" reslog(s, RES_FILE_NAME) while True: batch_data = data_source.next(config.eval_batch_size) if batch_data is None: break ret_data, ret_stat = model.test_step(batch_data) refs = batch_data["Y"][:, 1:].tolist() hyps = ret_data["symbols"].tolist() for true_label_ids, pred_label_ids in zip(refs, hyps): end_idx = true_label_ids.index(label_tokenizer.eos_token_id) true_syms = [ label_tokenizer.id2word[label_id] for label_id in true_label_ids[:end_idx] ] pred_syms = [ label_tokenizer.id2word[label_id] for label_id in pred_label_ids[:end_idx] ] s = " ".join(true_syms) + "\t" + " ".join(pred_syms) reslog(s, RES_FILE_NAME) true_labels.append(true_syms) pred_labels.append(pred_syms) log_s = f"\n<{set_name}> - {time.time()-start_time:.3f}s - " mlog(log_s, config, config.LOG_FILE_NAME) metrics_results = metrics.batch_metrics(true_labels, pred_labels) log_s = \ f"\tDSER: {100*metrics_results['DSER']:.2f}\n" \ f"\tseg WER: {100*metrics_results['strict segmentation error']:.2f}\n" \ f"\tDER: {100*metrics_results['DER']:.2f}\n" \ f"\tjoint WER: {100*metrics_results['strict joint error']:.2f}\n" \ f"\tMacro F1: {100*metrics_results['Macro F1']:.2f}\n" \ f"\tMicro F1: {100*metrics_results['Micro F1']:.2f}\n" mlog(log_s, config, config.LOG_FILE_NAME)