def test_main(): config_path = "./ctc_config_ph6.yaml" config = load_config(config_path) data_cfg = config['data'] print(config) logger = get_logger('./test.log') preproc = Preprocessor(data_cfg['dev_sets']['cv'], config['preproc'], logger, max_samples=100, start_and_end=False) preproc.update() print("preproc: \n", preproc) check_empty_filename(preproc) check_run_from_AudioDataset(preproc, data_cfg['dev_sets']['cv'])
def __init__(self, output_dir, dataset_name, config_path=None): """ Properties: num_examples (int): number of examples to be downloaded target_eq_guess (bool): if True, the target == guess criterion will filter the downloaded files """ super().__init__(output_dir, dataset_name) config = load_config(config_path) lists_of_ids = [ get_dataset_ids(data_path) for data_path in config['datasets'] ] self.record_ids = [ ids for list_of_ids in lists_of_ids for ids in list_of_ids ]
def __init__(self, output_dir, dataset_name, config_path=None): """ Properties: num_examples (int): number of examples to be downloaded target_eq_guess (bool): if True, the target == guess criterion will filter the downloaded files """ super().__init__(output_dir, dataset_name) config = load_config(config_path) self.num_examples = config['num_examples'] self.target_eq_guess = config['target_eq_guess'] self.check_constraints = config['check_constraints'] self.constraints = config['constraints'] self.days_from_today = config['days_from_today'] self.disjoint_metadata_tsv = config['disjoint_metadata_tsv'] self.disjoint_id_names = config['disjoint_id_names'] self.disjoint_datasets = config['disjoint_datasets']
def torch_to_onnx( model_name:str, num_frames:int, use_state_dict:bool, return_models:bool=False)->None: """ Arg: model_name (str): filename of the model num_frames (int): number of feature frames that will fix the model's size return_models (bool, False): if true, the function will return the torch and onnx model objects """ torch_path, config_path, onnx_path = pytorch_onnx_paths(model_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") config = load_config(config_path) model_cfg = config['model'] freq_dim = 257 #freq dimension out of log_spectrogram vocab_size = 39 time_dim = num_frames model_cfg.update({'blank_idx': config['preproc']['blank_idx']}) torch_model = CTC_model(freq_dim, vocab_size, model_cfg) state_dict = load_state_dict(torch_path, device=device) torch_model.load_state_dict(state_dict) torch_model.to(device) print("model on cuda?: ", torch_model.is_cuda) torch_model.eval() # create the tracking inputs hidden_size = config['model']['encoder']['rnn']['dim'] input_tensor = generate_test_input("pytorch", model_name, time_dim, hidden_size) # export the models to onnx torch_onnx_export(torch_model, input_tensor, onnx_path) print(f"Torch model sucessfully converted to Onnx at {onnx_path}") if return_models: onnx_model = onnx.load(onnx_path) return torch_model, onnx_model
def torch_to_coreml( model_name:str, num_frames:int, use_state_dict:bool, return_models:bool=False)->None: """ Arg: model_name (str): filename of the model num_frames (int): number of feature frames that will fix the model's size return_models (bool, False): if true, the function will return the torch and onnx model objects """ torch_path, config_path, onnx_path = pytorch_onnx_paths(model_name) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") config = load_config(config_path) model_cfg = config['model'] freq_dim = 257 #freq dimension out of log_spectrogram vocab_size = 39 torch_model = CTC_model(freq_dim, vocab_size, model_cfg) state_dict = load_state_dict(torch_path, device=device) torch_model.load_state_dict(state_dict) torch_model.to(device) print("model on cuda?: ", torch_model.is_cuda) torch_model.eval() # create the tracking inputs hidden_size = config['model']['encoder']['rnn']['dim'] x, (h_in, c_in) = generate_test_input("pytorch", model_name, 31, hidden_size) traced_model = torch.jit.trace(torch_model, (x, (h_in, c_in))) x_46, (h_46, c_46) = generate_test_input("pytorch", model_name, 46, hidden_size) out_46, (h_out_46, c_out_46) = traced_model(x_46, (h_46, c_46)) if return_models: pass
def _load_model(model_params: str, device) -> Tuple[torch.nn.Module, speech.loader.Preprocessor]: """ This function will load the model, config, and preprocessing object and prepare the model and preproc for evaluation Args: model_path (dict): dict containing model path, tag, and filename device (torch.device): torch processing device Returns: torch.nn.Module: torch model preprocessing object (speech.loader.Preprocessor): preprocessing object """ model_path, preproc_path, config_path = get_names( model_params['path'], tag=model_params['tag'], get_config=True, model_name=model_params['filename']) # load and update preproc preproc = read_pickle(preproc_path) preproc.update() # load and assign config config = load_config(config_path) model_cfg = config['model'] model_cfg.update({'blank_idx': config['preproc']['blank_idx'] }) # creat `blank_idx` in model_cfg section # create model model = CTC_train(preproc.input_dim, preproc.vocab_size, model_cfg) state_dict = load_state_dict(model_path, device=device) model.load_state_dict(state_dict) model.to(device) # turn model and preproc to eval_mode model.set_eval() preproc.set_eval() return model, preproc
def run_eval( model_path, dataset_json, batch_size=8, tag="best", model_name="model_state_dict.pth", device = None, add_filename=False, add_maxdecode:bool=False, formatted=False, config_path = None, out_file=None)->int: """ calculates the distance between the predictions from the model in model_path and the labels in dataset_json Args: model_path (str): path to the directory that contains the model, dataset_json (str): path to the dataset json file batch_size (int): number of examples to be fed into the model at once tag (str): string that prefixes the model_name. if best, the "best_model" is used model_name (str): name of the model, likely either "model_state_dict.pth" or "model" device (torch.device): device that the evaluation should run on add_filename (bool): if true, the filename is added to each example in `save_json` add_maxdecode (bool): if true, the predictions using max decoding will be added in addition to the predictions from the ctc_decoder formatted (bool): if true, the `format_save` will be used instead of `json_save` where `format_save` outputs a more human-readable output file config_path (bool): specific path to the config file, if the one in `model_path` is not desired out_file (str): path where the output file will be saved Returns: (int): returns the computed error rate of the model on the dataset """ if device is None: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model_path, preproc_path, config_path = get_names(model_path, tag=tag, model_name=model_name, get_config=True) # load and update preproc preproc = read_pickle(preproc_path) preproc.update() # load and assign config config = load_config(config_path) model_cfg = config['model'] model_cfg.update({'blank_idx': config['preproc']['blank_idx']}) # creat `blank_idx` in model_cfg section # create model model = CTC_train( preproc.input_dim, preproc.vocab_size, model_cfg ) state_dict = load_state_dict(model_path, device=device) model.load_state_dict(state_dict) ldr = loader.make_loader( dataset_json, preproc, batch_size ) model.to(device) model.set_eval() print(f"preproc train_status before set_eval: {preproc.train_status}") preproc.set_eval() preproc.use_log = False print(f"preproc train_status after set_eval: {preproc.train_status}") results = eval_loop(model, ldr, device) print(f"number of examples: {len(results)}") #results_dist = [[(preproc.decode(pred[0]), preproc.decode(pred[1]), prob)] # for example_dist in results_dist # for pred, prob in example_dist] results = [(preproc.decode(label), preproc.decode(pred), conf) for label, pred, conf in results] # maxdecode_results = [(preproc.decode(label), preproc.decode(pred)) # for label, pred in results] cer = speech.compute_cer(results, verbose=True) print("PER {:.3f}".format(cer)) if out_file is not None: compile_save(results, dataset_json, out_file, formatted, add_filename) return round(cer, 3)
Args: dataset_paths (List[str]): a list of dataset paths (str) Returns: Dict[str, set]: a dict with the set of ids as values """ data_dict = dict() for data_path in dataset_paths: # _extract_id on the data path will return the dataset name data_name = _extract_id(data_path) dataset = read_data_json(data_path) # set comprehension what extracts the record-id from each audiopath in the dataset id_set = {_extract_id(xmpl['audio']) for xmpl in dataset} data_dict.update({data_name: (id_set, len(dataset))}) return data_dict def _extract_id(record_path: str) -> str: #returns the basename of the path without the extension return os.path.basename(os.path.splitext(record_path)[0]) if __name__ == "__main__": parser = argparse.ArgumentParser(description="Eval a speech model.") parser.add_argument( "--config", help="Path to config file containing the necessary inputs") args = parser.parse_args() config = load_config(args.config) verify_disjoint(config)