# python run_evaluate.py --input=<INPUT> --cat=<CATEGORY> --filter args = parse_args() if args.language == 'en': from nemo_text_processing.text_normalization.en.clean_eval_data import filter_loaded_data file_path = args.input normalizer = Normalizer(input_case=args.input_case, lang=args.language) print("Loading training data: " + file_path) training_data = load_files([file_path]) if args.filter: training_data = filter_loaded_data(training_data) if args.category is None: print("Sentence level evaluation...") sentences_un_normalized, sentences_normalized, _ = training_data_to_sentences(training_data) print("- Data: " + str(len(sentences_normalized)) + " sentences") sentences_prediction = normalizer.normalize_list(sentences_un_normalized) print("- Normalized. Evaluating...") sentences_accuracy = evaluate( preds=sentences_prediction, labels=sentences_normalized, input=sentences_un_normalized ) print("- Accuracy: " + str(sentences_accuracy)) print("Token level evaluation...") tokens_per_type = training_data_to_tokens(training_data, category=args.category) token_accuracy = {} for token_type in tokens_per_type: print("- Token type: " + token_type) tokens_un_normalized, tokens_normalized = tokens_per_type[token_type] print(" - Data: " + str(len(tokens_normalized)) + " tokens")
if fil.class_type == instance.token_type and fil.filter(instance): instance = fil.process(instance) updated_instance = True if updated_instance: if verbose: print(instance) updates_instances.append(instance) return updates_instances def parse_args(): parser = ArgumentParser() parser.add_argument("--input", help="input file path", type=str, default='./ar_with_types/output-00001-of-00100') parser.add_argument("--verbose", help="print filtered instances", action='store_true') return parser.parse_args() if __name__ == "__main__": args = parse_args() file_path = args.input print("Loading training data: " + file_path) instance_list = load_files([file_path]) # List of instances filtered_instance_list = filter_loaded_data(instance_list, args.verbose) training_data_to_sentences(filtered_instance_list)