Example #1
0
    # python run_evaluate.py --input=<INPUT> --cat=<CATEGORY> --filter
    args = parse_args()
    if args.language == 'en':
        from nemo_text_processing.text_normalization.en.clean_eval_data import filter_loaded_data
    file_path = args.input
    normalizer = Normalizer(input_case=args.input_case, lang=args.language)

    print("Loading training data: " + file_path)
    training_data = load_files([file_path])

    if args.filter:
        training_data = filter_loaded_data(training_data)

    if args.category is None:
        print("Sentence level evaluation...")
        sentences_un_normalized, sentences_normalized, _ = training_data_to_sentences(training_data)
        print("- Data: " + str(len(sentences_normalized)) + " sentences")
        sentences_prediction = normalizer.normalize_list(sentences_un_normalized)
        print("- Normalized. Evaluating...")
        sentences_accuracy = evaluate(
            preds=sentences_prediction, labels=sentences_normalized, input=sentences_un_normalized
        )
        print("- Accuracy: " + str(sentences_accuracy))

    print("Token level evaluation...")
    tokens_per_type = training_data_to_tokens(training_data, category=args.category)
    token_accuracy = {}
    for token_type in tokens_per_type:
        print("- Token type: " + token_type)
        tokens_un_normalized, tokens_normalized = tokens_per_type[token_type]
        print("  - Data: " + str(len(tokens_normalized)) + " tokens")
            if fil.class_type == instance.token_type and fil.filter(instance):
                instance = fil.process(instance)
                updated_instance = True
        if updated_instance:
            if verbose:
                print(instance)
            updates_instances.append(instance)
    return updates_instances


def parse_args():
    parser = ArgumentParser()
    parser.add_argument("--input",
                        help="input file path",
                        type=str,
                        default='./ar_with_types/output-00001-of-00100')
    parser.add_argument("--verbose",
                        help="print filtered instances",
                        action='store_true')
    return parser.parse_args()


if __name__ == "__main__":
    args = parse_args()
    file_path = args.input

    print("Loading training data: " + file_path)
    instance_list = load_files([file_path])  # List of instances
    filtered_instance_list = filter_loaded_data(instance_list, args.verbose)
    training_data_to_sentences(filtered_instance_list)