Esempio n. 1
0
def run(model_path, dataset_json,
        batch_size=8, tag="best",
        out_file=None):

    use_cuda = torch.cuda.is_available()

    model, preproc = speech.load(model_path, tag=tag)
    ldr = loader.make_loader(dataset_json,
            preproc, batch_size)

    model.cuda() if use_cuda else model.cpu()
    model.set_eval()

    results = eval_loop(model, ldr)
    results = [(preproc.decode(label), preproc.decode(pred))
               for label, pred in results]
    cer = speech.compute_cer(results)
    print("CER {:.3f}".format(cer))

    if out_file is not None:
        with open(out_file, 'w') as fid:
            for label, pred in results:
                res = {'prediction' : pred,
                       'label' : label}
                json.dump(res, fid)
                fid.write("\n")
Esempio n. 2
0
def json_save(results, dataset_json, out_file, add_filename):
    """This function writes the results into a json format.
    """
    output_results = []
    for label, pred, conf in results: 
        if add_filename:
            filename = match_filename(label, dataset_json)
            PER = speech.compute_cer([(label,pred)], verbose=False)
            res = {'filename': filename,
                'prediction' : pred,
                'label' : label,
                'PER': round(PER, 3)}
        else:   
            res = {'prediction' : pred,
                'label' : label}
        output_results.append(res)

    # if including filename, add the suffix "_fn" before extension
    if add_filename: 
        out_file = create_filename(out_file, "pred-fn", "json")
        output_results = sorted(output_results, key=lambda x: x['PER'], reverse=True) 
    else: 
        out_file = create_filename(out_file, "pred", "json")
    print(f"file saved to: {out_file}") 
    with open(out_file, 'w') as fid:
        for sample in output_results:
            json.dump(sample, fid)
            fid.write("\n") 
Esempio n. 3
0
def format_save(results, dataset_json, out_file):
    """This function writes the results to a file in a human-readable format.
    """
    out_file = create_filename(out_file, "compare", "txt")
    print(f"file saved to: {out_file}")
    with open(out_file, 'w') as fid:
        write_list = list()
        for label, pred, conf in results:
            lower_list = lambda x: list(map(str.lower, x))
            label, pred = lower_list(label), lower_list(pred)
            filepath, order = match_filename(label, dataset_json, return_order=True)
            filename = os.path.splitext(os.path.split(filepath)[1])[0]
            PER, (dist, length) = speech.compute_cer([(label,pred)], verbose=False, dist_len=True)
            write_list.append({"order":order, "filename":filename, "label":label, "preds":pred,
            "metrics":{"PER":round(PER,3), "dist":dist, "len":length, "confidence":round(conf, 3)}})
        write_list = sorted(write_list, key=lambda x: x['order'])
            
        for write_dict in write_list: 
            fid.write(f"{write_dict['filename']}\n") 
            fid.write(f"label: {' '.join(write_dict['label'])}\n") 
            fid.write(f"preds: {' '.join(write_dict['preds'])}\n")
            
            PER, dist = write_dict['metrics']['PER'], write_dict['metrics']['dist'] 
            length, conf = write_dict['metrics']['len'], write_dict['metrics']['confidence']
            fid.write(f"metrics: PER: {PER}, dist: {dist}, len: {length}, conf: {conf}\n")
            fid.write("\n")

        for write_dict in write_list:
            fid.write(f"{write_dict['filename']}, {write_dict['metrics']['PER']}\n")
Esempio n. 4
0
def eval(data_loader, model, int_to_char, m48_39, beam_size=1):
    total_loss = []
    all_preds = []
    all_labels = []

    for batch in data_loader:
        inputs, labels = batch
        loss, preds = model.eval_model(inputs, labels, beam_size)
        total_loss.append(loss)
        all_preds.extend(preds)
        all_labels.extend(labels)

    avg_loss = sum(total_loss) / len(total_loss)

    results = [(decode(l, int_to_char), decode(p, int_to_char))
               for l, p in zip(all_labels, all_preds)]

    # timit evaluation map 48 phn to 39

    results_mapped = []
    for label, predict in results:
        label_res = []
        predict_res = []
        for l in label:
            label_res.append(m48_39[l])
        for p in predict:
            predict_res.append(m48_39[p])
        results_mapped.append((label_res, predict_res))

    cer = speech.compute_cer(results_mapped)

    return avg_loss, cer
Esempio n. 5
0
def eval_dev(model, ldr, preproc, logger, loss_name):
    """
    Runs the devset evaluation loop.
    """
    losses = []
    all_preds = []
    all_labels = []

    model.set_eval()
    preproc.set_eval()  # turns off dataset augmentation
    use_log = (logger is not None)

    # saves time by not computing and saving gradients as there is no backwards pass
    with torch.no_grad():
        for batch in tqdm.tqdm(ldr):
            batch = list(batch)
            preds = model.infer(batch)

            inputs, labels, input_lens, label_lens = model.collate(*batch)
            inputs = inputs.cuda(non_blocking=True)
            out, rnn_args = model(inputs, softmax=False)

            if loss_name == "native":
                loss = native_loss(out, labels, input_lens, label_lens,
                                   model.blank)
            elif loss_name == "awni":
                loss = awni_loss(out, labels, input_lens, label_lens,
                                 model.blank)
            elif loss_name == "naren":
                loss = naren_loss(out, labels, input_lens, label_lens,
                                  model.blank)

            losses.append(loss.item())
            all_preds.extend(preds)
            all_labels.extend(batch[1])  #add the labels in the batch object

    loss = sum(losses) / len(losses)

    # decodes from integer tokens back to phoneme labels
    results = [(preproc.decode(l), preproc.decode(p))
               for l, p in zip(all_labels, all_preds)]

    cer = speech.compute_cer(results)
    print("Dev: Loss {:.3f}, CER {:.3f}".format(loss, cer))

    if use_log:
        logger.info(f"eval_dev: loss calculated as: {loss.item():0.3f}")
        logger.info(f"eval_dev: loss is nan: {math.isnan(loss.item())}")
        logger.info(f"eval_dev: results {results}")
        logger.info(f"CER: {cer}")

    # set the model and preproc back to training mode
    model.set_train()
    preproc.set_train()

    return loss, cer
Esempio n. 6
0
def eval_dev(model, ldr, preproc, logger, use_augmentation):
    losses = []
    all_preds = []
    all_labels = []

    model.set_eval()
    if not use_augmentation:
        print("prepoc set to eval")
        preproc.set_eval()
    logger.info(f"--------set_eval and entering loop---------")

    with torch.no_grad():
        for batch in tqdm.tqdm(ldr):
            temp_batch = list(batch)
            logger.info(f"temp_batch created as list")
            preds = model.infer(temp_batch)
            logger.info(f"model.infer called with {len(preds[0])}")
            loss = model.loss(temp_batch)
            logger.info(f"loss calculated as: {loss.item():0.3f}")
            logger.info(f"loss is nan: {math.isnan(loss.item())}")
            losses.append(loss.item())
            #losses.append(loss.data[0])
            logger.info(f"loss appended")
            all_preds.extend(preds)
            logger.info(f"preds extended")
            all_labels.extend(
                temp_batch[1])  #add the labels in the batch object
            logger.info(f"labels extended")

    model.set_train()
    preproc.set_train()
    logger.info(f"set to train")

    loss = sum(losses) / len(losses)
    logger.info(f"Avg loss: {loss}")
    results = [
        (preproc.decode(l), preproc.decode(p)
         )  # decodes back to phoneme labels
        for l, p in zip(all_labels, all_preds)
    ]
    logger.info(f"results {results}")
    cer = speech.compute_cer(results)
    logger.info(f"CER: {cer}")

    return loss, cer
Esempio n. 7
0
def eval_dev(model, ldr, preproc):

    with torch.no_grad():
        losses = []
        all_preds = []
        all_labels = []

        model.eval()
        for batch in tqdm.tqdm(ldr):
            preds, labels, loss = model.infer_batch(batch, calculate_loss=True)
            losses.append(loss.item())
            all_preds.extend(preds)
            all_labels.extend(labels)
    loss = sum(losses) / len(losses)
    results = [(preproc.decode(l), preproc.decode(p)) for l, p in zip(all_labels, all_preds)]
    cer = speech.compute_cer(results)
    print("Dev: Loss {:.3f}, CER {:.3f}".format(loss, cer))
    return loss, cer
Esempio n. 8
0
def eval(data_loader, model, int_to_char, beam_size=1):
    total_loss = []
    all_preds = []
    all_labels = []

    for batch in data_loader:
        inputs, labels = batch
        loss, preds = model.eval_model(inputs, labels, beam_size)
        total_loss.append(loss)
        all_preds.extend(preds)
        all_labels.extend(labels)

    avg_loss = sum(total_loss)/len(total_loss)

    results = [(decode(l, int_to_char), decode(p, int_to_char))
               for l, p in zip(all_labels, all_preds)]
    cer = speech.compute_cer(results)

    return avg_loss, cer
Esempio n. 9
0
def eval_dev(model, ldr, preproc):
    losses = []; all_preds = []; all_labels = []

    model.set_eval()

    for batch in tqdm.tqdm(ldr):
        preds = model.infer(batch)
        loss = model.loss(batch)
        losses.append(loss.data[0])
        all_preds.extend(preds)
        all_labels.extend(batch[1])

    model.set_train()

    loss = sum(losses) / len(losses)
    results = [(preproc.decode(l), preproc.decode(p))
               for l, p in zip(all_labels, all_preds)]
    cer = speech.compute_cer(results)
    print("Dev: Loss {:.3f}, CER {:.3f}".format(loss, cer))
    return loss, cer
Esempio n. 10
0
def run_eval(
        model_path, 
        dataset_json, 
        batch_size=8, 
        tag="best", 
        model_name="model_state_dict.pth",
        device = None,
        add_filename=False, 
        add_maxdecode:bool=False, 
        formatted=False, 
        config_path = None, 
        out_file=None)->int:
    """
    calculates the  distance between the predictions from
    the model in model_path and the labels in dataset_json

    Args:
        model_path (str): path to the directory that contains the model,
        dataset_json (str): path to the dataset json file
        batch_size (int): number of examples to be fed into the model at once
        tag (str): string that prefixes the model_name.  if best,  the "best_model" is used
        model_name (str): name of the model, likely either "model_state_dict.pth" or "model"
        device (torch.device): device that the evaluation should run on
        add_filename (bool): if true, the filename is added to each example in `save_json`
        add_maxdecode (bool): if true, the predictions using max decoding will be added in addition 
            to the predictions from the ctc_decoder
        formatted (bool): if true, the `format_save` will be used instead of `json_save` where 
            `format_save` outputs a more human-readable output file
        config_path (bool): specific path to the config file, if the one in `model_path` is not desired
        out_file (str): path where the output file will be saved
    
    Returns:
        (int): returns the computed error rate of the model on the dataset
    """

    if device is None:
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    model_path, preproc_path, config_path = get_names(model_path, tag=tag, model_name=model_name, get_config=True)
    
    # load and update preproc
    preproc = read_pickle(preproc_path)
    preproc.update()

    # load and assign config
    config = load_config(config_path)
    model_cfg = config['model']
    model_cfg.update({'blank_idx': config['preproc']['blank_idx']}) # creat `blank_idx` in model_cfg section


    # create model
    model = CTC_train(
        preproc.input_dim,
        preproc.vocab_size,
        model_cfg
    )

    state_dict = load_state_dict(model_path, device=device)
    model.load_state_dict(state_dict)
    
    ldr =  loader.make_loader(
        dataset_json,
        preproc, 
        batch_size
    )
    
    model.to(device)
    model.set_eval()
    print(f"preproc train_status before set_eval: {preproc.train_status}")
    preproc.set_eval()
    preproc.use_log = False
    print(f"preproc train_status after set_eval: {preproc.train_status}")


    results = eval_loop(model, ldr, device)
    print(f"number of examples: {len(results)}")
    #results_dist = [[(preproc.decode(pred[0]), preproc.decode(pred[1]), prob)] 
    #                for example_dist in results_dist
    #                for pred, prob in example_dist]
    results = [(preproc.decode(label), preproc.decode(pred), conf)
               for label, pred, conf in results]
    # maxdecode_results = [(preproc.decode(label), preproc.decode(pred))
    #           for label, pred in results]
    cer = speech.compute_cer(results, verbose=True)

    print("PER {:.3f}".format(cer))
    
    if out_file is not None:
        compile_save(results, dataset_json, out_file, formatted, add_filename)
    
    return round(cer, 3)