Ejemplo n.º 1
0
def main(args):
    # evaluate for all errors
    print(f"{args.fn_output}")
    print("Evaluate on all errors")
    gold_lines = read_lines(args.fn_gold)
    sub_lines = read_lines(args.fn_output)
    tmp_filename = args.fn_output.replace(".txt", "_tmp.txt")
    if args.fp_ratio:
        calculate_fp_ratio(sub_lines)
    else:
        if args.fn_gold.endswith(".m2"):
            evaluate_from_m2_file(args.fn_gold, sub_lines, tmp_filename)
            exit()
        else:
            evaluate_with_m2(gold_lines, sub_lines, tmp_filename)
    # evaluate on all categories

    eb = ErrorTypesBank()
    categories = eb.get_error_types_list("Patterns22")
    categories = ['Agreement', 'Pronoun', 'Punctuation', 'Preposition', 'Determiner', 'VerbSVA']
    categories = []
    for category in categories:
        print(f"\nEvaluate on {category}")
        gold_filtered, n_errors_in_gold = filter_by_error_type(gold_lines,
                                                               category, "CLC")
        sub_filtered, n_errors_in_sub = filter_by_error_type(sub_lines, category)
        if not n_errors_in_sub:
            print(f"There is no errors of {category} in sub")
            continue
        if args.fp_ratio:
            calculate_fp_ratio(sub_filtered)
        else:
            evaluate_with_m2(gold_filtered, sub_filtered, tmp_filename)
Ejemplo n.º 2
0
def get_lines_from_m2_file(m2_file):
    tmp_number = int(random.random() * 10**9)
    tmp_file = m2_file.replace(".m2", f"_tmp_{tmp_number}.txt")
    os.system(f'cat {m2_file} | grep "^S " | cut -c3- > {tmp_file}')
    lines = read_lines(tmp_file)
    remove_file(tmp_file)
    return lines
Ejemplo n.º 3
0
def main(args):
    # Read original texts
    test_orig = read_lines(args.test_orig)
    # Run checks in parallel and save result
    out_file = args.test_orig.replace('.txt', f'_{args.system_type}.txt')
    run_check_parallel(test_orig,
                       check_type=args.system_type,
                       error_type=args.error_type,
                       n_threads=args.n_threads,
                       fn_out=out_file)
    # Filter output
    unfiltered_data = read_lines(out_file)
    output = filter_by_error_type(unfiltered_data,
                                  error_type=args.error_type,
                                  system_type=args.system_type)
    # Save results
    out_filtered_file = out_file.replace('.txt', f'_by_{args.error_type}.txt')
    write_lines(out_filtered_file, output)
Ejemplo n.º 4
0
def main(args):
    # Read original texts
    unfiltered_data = read_lines(args.unfiltered_file)
    # Filter text
    output, cnt = filter_by_error_type(unfiltered_data,
                                       error_type=args.error_type,
                                       system_type=args.system_type)
    # Save results
    out_file = args.unfiltered_file.replace('.txt',
                                            f'_by_{args.error_type}.txt')
    write_lines(out_file, output)
Ejemplo n.º 5
0
def get_all_records(train_file, error_types=[], store_sents=True):
    all_sents = read_lines(train_file)
    print("All sents are loaded")
    all_records = []
    all_ann_sents = []
    for i, sent in tqdm(enumerate(all_sents)):
        ann_sent = AnnotatedTokens(AnnotatedText(sent))
        for ann in ann_sent.iter_annotations():
            # apply error_type features
            if error_types:
                ann_error_type = get_normalized_error_type(ann)
                if ann_error_type not in error_types:
                    ann_sent.remove(ann)
                    continue
            # check if ann has features (dict should be big)
            if len(ann.meta.keys()) > 5:
                all_records.append(ann.meta)
            # remove ann if there are not big dict there
            else:
                ann_sent.remove(ann)
        if store_sents:
            all_ann_sents.append(ann_sent)
    return all_records, all_ann_sents
Ejemplo n.º 6
0
def main(args):
    all_files = [x for x in os.listdir(args.input_dir) if "tmp" not in x]
    for fname in all_files:
        print(f"Start evaluation {args.system_type} on {fname}")
        if fname.endswith(".txt"):
            fp_ratio = True
        elif fname.endswith(".m2"):
            fp_ratio = False
        else:
            continue
        input_file = os.path.join(args.input_dir, fname)
        if fp_ratio:
            sentences = read_lines(input_file)
        else:
            sentences = get_lines_from_m2_file(input_file)

        # run through system
        if args.system_type == "OPC":
            s_types = ["OPC", "OPC-filtered"]
        elif args.system_type == "UPC":
            s_types = ["UPC"]
        else:
            raise Exception("Unknown system type")
        for system_type in s_types:
            print(f"{system_type} is evaluating")
            combined = [(x, system_type) for x in sentences]
            with ThreadPoolExecutor(args.n_threads) as pool:
                system_out = list(
                    tqdm(pool.map(wrap_check, combined), total=len(combined)))
            system_out = [x.get_annotated_text() for x in system_out]
            print(f"{system_type} system response was got")

            # run system through confidence scorer
            if system_type.endswith("filtered"):
                scorer_list = [None]
            else:
                scorer_list = [None, "LM", "CLF"]
                # scorer_list = [None, "CLF"]
            for scorer in scorer_list:
                print(f"Current scorer is {scorer}")
                if scorer == "CLF":
                    combined = [(x, args.server_path) for x in system_out]
                    with ThreadPoolExecutor(args.n_threads) as pool:
                        scorer_out = list(
                            tqdm(pool.map(wrap_confidence_scorer, combined),
                                 total=len(combined)))
                    # thresholds = [0.1, 0.2, 0.25, 0.3, 0.5]
                    thresholds = [
                        0.1, 0.2, 0.25, 0.3, 0.35, 0.36, 0.38, 0.4, 0.45, 0.5
                    ]
                elif scorer == "LM":
                    with ThreadPoolExecutor(args.n_threads) as pool:
                        scorer_out = list(
                            tqdm(pool.map(wrap_get_lm_scores, system_out),
                                 total=len(combined)))
                    thresholds = [0]
                else:
                    scorer_out = system_out
                    thresholds = [None]
                print("Scores were got")

                # apply thresholds
                if args.error_types is not None:
                    error_types = args.error_types.split()
                else:
                    error_types = None
                for t in thresholds:
                    print(f"The current threshold is {t}")
                    t_out = []
                    for sent in scorer_out:
                        ann_sent = AnnotatedTokens(AnnotatedText(sent))
                        for ann in ann_sent.iter_annotations():
                            ann.meta['system_type'] = system_type
                            et = get_normalized_error_type(ann)
                            if error_types is not None and et not in error_types:
                                ann_sent.remove(ann)
                                continue
                            score = float(ann.meta.get('confidence', 1))
                            if t is not None and score < t:
                                ann_sent.remove(ann)
                        t_out.append(ann_sent.get_annotated_text())
                    if fp_ratio:
                        cnt_errors = sum([
                            len(AnnotatedText(x).get_annotations())
                            for x in t_out
                        ])
                        print(
                            f"\nThe number of errors are equal {cnt_errors}. "
                            f"FP rate {round(100*cnt_errors/len(t_out),2)}%")
                    else:
                        print(f"\nThreshold level is {t}")
                        tmp_filename = input_file.replace(
                            ".m2",
                            f"_{system_type}_{scorer}_above_{t}_tmp.txt")
                        evaluate_from_m2_file(input_file, t_out, tmp_filename)