def run(model_path, dataset_json, batch_size=8, tag="best", out_file=None): use_cuda = torch.cuda.is_available() model, preproc = speech.load(model_path, tag=tag) ldr = loader.make_loader(dataset_json, preproc, batch_size) model.cuda() if use_cuda else model.cpu() model.set_eval() results = eval_loop(model, ldr) results = [(preproc.decode(label), preproc.decode(pred)) for label, pred in results] cer = speech.compute_cer(results) print("CER {:.3f}".format(cer)) if out_file is not None: with open(out_file, 'w') as fid: for label, pred in results: res = {'prediction' : pred, 'label' : label} json.dump(res, fid) fid.write("\n")
def json_save(results, dataset_json, out_file, add_filename): """This function writes the results into a json format. """ output_results = [] for label, pred, conf in results: if add_filename: filename = match_filename(label, dataset_json) PER = speech.compute_cer([(label,pred)], verbose=False) res = {'filename': filename, 'prediction' : pred, 'label' : label, 'PER': round(PER, 3)} else: res = {'prediction' : pred, 'label' : label} output_results.append(res) # if including filename, add the suffix "_fn" before extension if add_filename: out_file = create_filename(out_file, "pred-fn", "json") output_results = sorted(output_results, key=lambda x: x['PER'], reverse=True) else: out_file = create_filename(out_file, "pred", "json") print(f"file saved to: {out_file}") with open(out_file, 'w') as fid: for sample in output_results: json.dump(sample, fid) fid.write("\n")
def format_save(results, dataset_json, out_file): """This function writes the results to a file in a human-readable format. """ out_file = create_filename(out_file, "compare", "txt") print(f"file saved to: {out_file}") with open(out_file, 'w') as fid: write_list = list() for label, pred, conf in results: lower_list = lambda x: list(map(str.lower, x)) label, pred = lower_list(label), lower_list(pred) filepath, order = match_filename(label, dataset_json, return_order=True) filename = os.path.splitext(os.path.split(filepath)[1])[0] PER, (dist, length) = speech.compute_cer([(label,pred)], verbose=False, dist_len=True) write_list.append({"order":order, "filename":filename, "label":label, "preds":pred, "metrics":{"PER":round(PER,3), "dist":dist, "len":length, "confidence":round(conf, 3)}}) write_list = sorted(write_list, key=lambda x: x['order']) for write_dict in write_list: fid.write(f"{write_dict['filename']}\n") fid.write(f"label: {' '.join(write_dict['label'])}\n") fid.write(f"preds: {' '.join(write_dict['preds'])}\n") PER, dist = write_dict['metrics']['PER'], write_dict['metrics']['dist'] length, conf = write_dict['metrics']['len'], write_dict['metrics']['confidence'] fid.write(f"metrics: PER: {PER}, dist: {dist}, len: {length}, conf: {conf}\n") fid.write("\n") for write_dict in write_list: fid.write(f"{write_dict['filename']}, {write_dict['metrics']['PER']}\n")
def eval(data_loader, model, int_to_char, m48_39, beam_size=1): total_loss = [] all_preds = [] all_labels = [] for batch in data_loader: inputs, labels = batch loss, preds = model.eval_model(inputs, labels, beam_size) total_loss.append(loss) all_preds.extend(preds) all_labels.extend(labels) avg_loss = sum(total_loss) / len(total_loss) results = [(decode(l, int_to_char), decode(p, int_to_char)) for l, p in zip(all_labels, all_preds)] # timit evaluation map 48 phn to 39 results_mapped = [] for label, predict in results: label_res = [] predict_res = [] for l in label: label_res.append(m48_39[l]) for p in predict: predict_res.append(m48_39[p]) results_mapped.append((label_res, predict_res)) cer = speech.compute_cer(results_mapped) return avg_loss, cer
def eval_dev(model, ldr, preproc, logger, loss_name): """ Runs the devset evaluation loop. """ losses = [] all_preds = [] all_labels = [] model.set_eval() preproc.set_eval() # turns off dataset augmentation use_log = (logger is not None) # saves time by not computing and saving gradients as there is no backwards pass with torch.no_grad(): for batch in tqdm.tqdm(ldr): batch = list(batch) preds = model.infer(batch) inputs, labels, input_lens, label_lens = model.collate(*batch) inputs = inputs.cuda(non_blocking=True) out, rnn_args = model(inputs, softmax=False) if loss_name == "native": loss = native_loss(out, labels, input_lens, label_lens, model.blank) elif loss_name == "awni": loss = awni_loss(out, labels, input_lens, label_lens, model.blank) elif loss_name == "naren": loss = naren_loss(out, labels, input_lens, label_lens, model.blank) losses.append(loss.item()) all_preds.extend(preds) all_labels.extend(batch[1]) #add the labels in the batch object loss = sum(losses) / len(losses) # decodes from integer tokens back to phoneme labels results = [(preproc.decode(l), preproc.decode(p)) for l, p in zip(all_labels, all_preds)] cer = speech.compute_cer(results) print("Dev: Loss {:.3f}, CER {:.3f}".format(loss, cer)) if use_log: logger.info(f"eval_dev: loss calculated as: {loss.item():0.3f}") logger.info(f"eval_dev: loss is nan: {math.isnan(loss.item())}") logger.info(f"eval_dev: results {results}") logger.info(f"CER: {cer}") # set the model and preproc back to training mode model.set_train() preproc.set_train() return loss, cer
def eval_dev(model, ldr, preproc, logger, use_augmentation): losses = [] all_preds = [] all_labels = [] model.set_eval() if not use_augmentation: print("prepoc set to eval") preproc.set_eval() logger.info(f"--------set_eval and entering loop---------") with torch.no_grad(): for batch in tqdm.tqdm(ldr): temp_batch = list(batch) logger.info(f"temp_batch created as list") preds = model.infer(temp_batch) logger.info(f"model.infer called with {len(preds[0])}") loss = model.loss(temp_batch) logger.info(f"loss calculated as: {loss.item():0.3f}") logger.info(f"loss is nan: {math.isnan(loss.item())}") losses.append(loss.item()) #losses.append(loss.data[0]) logger.info(f"loss appended") all_preds.extend(preds) logger.info(f"preds extended") all_labels.extend( temp_batch[1]) #add the labels in the batch object logger.info(f"labels extended") model.set_train() preproc.set_train() logger.info(f"set to train") loss = sum(losses) / len(losses) logger.info(f"Avg loss: {loss}") results = [ (preproc.decode(l), preproc.decode(p) ) # decodes back to phoneme labels for l, p in zip(all_labels, all_preds) ] logger.info(f"results {results}") cer = speech.compute_cer(results) logger.info(f"CER: {cer}") return loss, cer
def eval_dev(model, ldr, preproc): with torch.no_grad(): losses = [] all_preds = [] all_labels = [] model.eval() for batch in tqdm.tqdm(ldr): preds, labels, loss = model.infer_batch(batch, calculate_loss=True) losses.append(loss.item()) all_preds.extend(preds) all_labels.extend(labels) loss = sum(losses) / len(losses) results = [(preproc.decode(l), preproc.decode(p)) for l, p in zip(all_labels, all_preds)] cer = speech.compute_cer(results) print("Dev: Loss {:.3f}, CER {:.3f}".format(loss, cer)) return loss, cer
def eval(data_loader, model, int_to_char, beam_size=1): total_loss = [] all_preds = [] all_labels = [] for batch in data_loader: inputs, labels = batch loss, preds = model.eval_model(inputs, labels, beam_size) total_loss.append(loss) all_preds.extend(preds) all_labels.extend(labels) avg_loss = sum(total_loss)/len(total_loss) results = [(decode(l, int_to_char), decode(p, int_to_char)) for l, p in zip(all_labels, all_preds)] cer = speech.compute_cer(results) return avg_loss, cer
def eval_dev(model, ldr, preproc): losses = []; all_preds = []; all_labels = [] model.set_eval() for batch in tqdm.tqdm(ldr): preds = model.infer(batch) loss = model.loss(batch) losses.append(loss.data[0]) all_preds.extend(preds) all_labels.extend(batch[1]) model.set_train() loss = sum(losses) / len(losses) results = [(preproc.decode(l), preproc.decode(p)) for l, p in zip(all_labels, all_preds)] cer = speech.compute_cer(results) print("Dev: Loss {:.3f}, CER {:.3f}".format(loss, cer)) return loss, cer
def run_eval( model_path, dataset_json, batch_size=8, tag="best", model_name="model_state_dict.pth", device = None, add_filename=False, add_maxdecode:bool=False, formatted=False, config_path = None, out_file=None)->int: """ calculates the distance between the predictions from the model in model_path and the labels in dataset_json Args: model_path (str): path to the directory that contains the model, dataset_json (str): path to the dataset json file batch_size (int): number of examples to be fed into the model at once tag (str): string that prefixes the model_name. if best, the "best_model" is used model_name (str): name of the model, likely either "model_state_dict.pth" or "model" device (torch.device): device that the evaluation should run on add_filename (bool): if true, the filename is added to each example in `save_json` add_maxdecode (bool): if true, the predictions using max decoding will be added in addition to the predictions from the ctc_decoder formatted (bool): if true, the `format_save` will be used instead of `json_save` where `format_save` outputs a more human-readable output file config_path (bool): specific path to the config file, if the one in `model_path` is not desired out_file (str): path where the output file will be saved Returns: (int): returns the computed error rate of the model on the dataset """ if device is None: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_path, preproc_path, config_path = get_names(model_path, tag=tag, model_name=model_name, get_config=True) # load and update preproc preproc = read_pickle(preproc_path) preproc.update() # load and assign config config = load_config(config_path) model_cfg = config['model'] model_cfg.update({'blank_idx': config['preproc']['blank_idx']}) # creat `blank_idx` in model_cfg section # create model model = CTC_train( preproc.input_dim, preproc.vocab_size, model_cfg ) state_dict = load_state_dict(model_path, device=device) model.load_state_dict(state_dict) ldr = loader.make_loader( dataset_json, preproc, batch_size ) model.to(device) model.set_eval() print(f"preproc train_status before set_eval: {preproc.train_status}") preproc.set_eval() preproc.use_log = False print(f"preproc train_status after set_eval: {preproc.train_status}") results = eval_loop(model, ldr, device) print(f"number of examples: {len(results)}") #results_dist = [[(preproc.decode(pred[0]), preproc.decode(pred[1]), prob)] # for example_dist in results_dist # for pred, prob in example_dist] results = [(preproc.decode(label), preproc.decode(pred), conf) for label, pred, conf in results] # maxdecode_results = [(preproc.decode(label), preproc.decode(pred)) # for label, pred in results] cer = speech.compute_cer(results, verbose=True) print("PER {:.3f}".format(cer)) if out_file is not None: compile_save(results, dataset_json, out_file, formatted, add_filename) return round(cer, 3)