Python normalize_judgementの例、eval.eval_tools.normalize_judgement Pythonの例

コード例 #1

0

ファイルを表示

ファイル: inter_annotator_agreement.py プロジェクト: stanstarks/nl2bash

def export_annotation_differences(input_file1, input_file2, output_file, command_header):
    o_f = open(output_file, 'w')
    o_f.write('description,{},correct template A,correct command A,correct template B,correct command B\n'.format(
        command_header))
    with open(input_file1) as f1:
        with open(input_file2) as f2:
            reader1 = csv.DictReader(f1)
            reader2 = csv.DictReader(f2)
            current_desp = ''
            desp_written = False
            for row1, row2 in zip(reader1, reader2):
                if row1['description']:
                    current_desp = row1['description']
                    desp_written = False
                if not row1[command_header]:
                    continue
                row1_template_eval = normalize_judgement(row1['correct template'].strip())
                row1_command_eval = normalize_judgement(row1['correct command'].strip())
                row2_template_eval = normalize_judgement(row2['correct template'].strip())
                row2_command_eval = normalize_judgement(row2['correct command'].strip())
                if (row1_template_eval != row2_template_eval) or \
                        (row1_command_eval != row2_command_eval):
                    if not desp_written:
                        o_f.write('"{}","{}",{},{},{},{}\n'.format(
                            current_desp.replace('"', '""'), row1[command_header].replace('"', '""'),
                            row1_template_eval, row1_command_eval, row2_template_eval, row2_command_eval))
                        desp_written = True
                    else:
                        o_f.write(',"{}",{},{},{},{}\n'.format(row1[command_header].replace('"', '""'),
                            row1_template_eval, row1_command_eval, row2_template_eval, row2_command_eval))
    o_f.close()

コード例 #2

0

ファイルを表示

def combine_annotations_multi_files():
    """
    Combine multiple annotations files and discard the annotations that has a conflict.
    """

    input_dir = sys.argv[1]

    template_evals = {}
    command_evals = {}
    discarded_keys = set({})

    for in_csv in os.listdir(input_dir):
        in_csv_path = os.path.join(input_dir, in_csv)
        with open(in_csv_path) as f:
            reader = csv.DictReader(f)
            current_description = ''
            for row in reader:
                template_eval = normalize_judgement(row['correct template'])
                command_eval = normalize_judgement(row['correct command'])
                description = get_example_nl_key(row['description'])
                if description.strip():
                    current_description = description
                else:
                    description = current_description
                prediction = row['prediction']
                example_key = '{}<NL_PREDICTION>{}'.format(
                    description, prediction)
                if example_key in template_evals and template_evals[
                        example_key] != template_eval:
                    discarded_keys.add(example_key)
                    continue
                if example_key in command_evals and command_evals[
                        example_key] != command_eval:
                    discarded_keys.add(example_key)
                    continue
                template_evals[example_key] = template_eval
                command_evals[example_key] = command_eval
            print('{} read ({} manually annotated examples, {} discarded)'.
                  format(in_csv_path, len(template_evals),
                         len(discarded_keys)))

    # Write to new file
    assert (len(template_evals) == len(command_evals))
    with open('manual_annotations.additional', 'w') as o_f:
        o_f.write(
            'description,prediction,template,correct template,correct comand\n'
        )
        for key in sorted(template_evals.keys()):
            if key in discarded_keys:
                continue
            description, prediction = key.split('<NL_PREDICTION>')
            template_eval = template_evals[example_key]
            command_eval = command_evals[example_key]
            pred_tree = data_tools.bash_parser(prediction)
            pred_temp = data_tools.ast2template(pred_tree,
                                                loose_constraints=True)
            o_f.write('"{}","{}","{}",{},{}\n'.format(
                description.replace('"', '""'), prediction.replace('"', '""'),
                pred_temp.replace('"', '""'), template_eval, command_eval))

コード例 #3

0

ファイルを表示

ファイル: inter_annotator_agreement.py プロジェクト: stanstarks/nl2bash

def read_annotations(input_file):
    command_judgements, template_judgements = [], []
    with open(input_file) as f:
        reader = csv.DictReader(f)
        for row in reader:
            command_eval = normalize_judgement(row['correct command'].strip())
            template_eval = normalize_judgement(row['correct template'].strip())
            command_judgements.append(command_eval)
            template_judgements.append(template_eval)
    return command_judgements, template_judgements

コード例 #4

0

ファイルを表示

ファイル: inter_annotator_agreement.py プロジェクト: stanstarks/nl2bash

def combine_annotations():
    """
    Combine the annotations input by three annotators.

    :param input_file1: main annotation file 1.
    :param input_file2: main annotation file 2 (should contain the same number of
        lines as input_file1).
    :param input_file3: supplementary annotation file which contains annotations
        of lines in input_file1 and input_file2 that contain a disagreement.
    :param output_file: file that contains the combined annotations.
    """
    input_file1 = sys.argv[1]
    input_file2 = sys.argv[2]
    input_file3 = sys.argv[3]
    output_file = sys.argv[4]
    o_f = open(output_file, 'w')
    o_f.write('description,prediction,template,correct template,correct command,'
              'correct template A,correct command A,'
              'correct template B,correct command B,'
              'correct template C,correct command C\n')
    sup_structure_eval, sup_command_eval = load_cached_evaluations_from_file(
        input_file3, treat_empty_as_correct=True)
    # for key in sup_structure_eval:
    #     print(key)
    # print('------------------')
    with open(input_file1) as f1:
        with open(input_file2) as f2:
            reader1 = csv.DictReader(f1)
            reader2 = csv.DictReader(f2)
            current_desp = ''
            for row1, row2 in zip(reader1, reader2):
                row1_template_eval = normalize_judgement(row1['correct template'].strip())
                row1_command_eval = normalize_judgement(row1['correct command'].strip())
                row2_template_eval = normalize_judgement(row2['correct template'].strip())
                row2_command_eval = normalize_judgement(row2['correct command'].strip())
                if row1['description']:
                    current_desp = row1['description'].strip()
                sc_key = get_example_nl_key(current_desp)
                pred_cmd = row1['prediction'].strip()
                if not pred_cmd:
                    row1_template_eval, row1_command_eval = 'n', 'n'
                    row2_template_eval, row2_command_eval = 'n', 'n'
                pred_temp = data_tools.cmd2template(pred_cmd, loose_constraints=True)
                structure_example_key = '{}<NL_PREDICTION>{}'.format(sc_key, pred_temp)
                command_example_key = '{}<NL_PREDICTION>{}'.format(sc_key, pred_cmd)
                row3_template_eval, row3_command_eval = None, None
                if structure_example_key in sup_structure_eval:
                    row3_template_eval = sup_structure_eval[structure_example_key]
                if command_example_key in sup_command_eval:
                    row3_command_eval = sup_command_eval[command_example_key]
                if row1_template_eval != row2_template_eval or row1_command_eval != row2_command_eval:
                    if row1_template_eval != row2_template_eval:
                        if row3_template_eval is None:
                            print(structure_example_key)
                        assert(row3_template_eval is not None)
                        template_eval = row3_template_eval
                    else:
                        template_eval = row1_template_eval
                    if row1_command_eval != row2_command_eval:
                        # if row3_command_eval is None:
                        #     print(command_example_key)
                        assert(row3_command_eval is not None)
                        command_eval = row3_command_eval
                    else:
                        command_eval = row1_command_eval
                else:
                    template_eval = row1_template_eval
                    command_eval = row1_command_eval
                if row3_template_eval is None:
                    row3_template_eval = ''
                if row3_command_eval is None:
                    row3_command_eval = ''
                o_f.write('"{}","{}","{}",{},{},{},{},{},{},{},{}\n'.format(
                    current_desp.replace('"', '""'), pred_cmd.replace('"', '""'), pred_temp.replace('"', '""'),
                    template_eval, command_eval,
                    row1_template_eval, row1_command_eval,
                    row2_template_eval, row2_command_eval,
                    row3_template_eval, row3_command_eval))
    o_f.close()

コード例 #5

0

ファイルを表示

ファイル: inter_annotator_agreement.py プロジェクト: stanstarks/nl2bash

def print_error_analysis_sheet():
    input_file1 = sys.argv[1]
    input_file2 = sys.argv[2]
    input_file3 = sys.argv[3]
    output_file = sys.argv[4]
    o_f = open(output_file, 'w')
    o_f.write('description,model,prediction,correct template,correct command,'
              'correct template A,correct command A,'
              'correct template B,correct command B,'
              'correct template C,correct command C\n')
    sup_structure_eval, sup_command_eval = load_cached_evaluations_from_file(
        input_file3, treat_empty_as_correct=True)
    # for key in sup_structure_eval:
    #     print(key)
    # print('------------------')
    with open(input_file1) as f1:
        with open(input_file2) as f2:
            reader1 = csv.DictReader(f1)
            reader2 = csv.DictReader(f2)
            current_desp = ''
            for row_id, (row1, row2) in enumerate(zip(reader1, reader2)):
                if row1['description']:
                    current_desp = row1['description'].strip()
                model_name = row2['model']
                if not model_name in ['partial.token-copynet', 'tellina']:
                    continue
                if row_id % 3 != 0:
                    continue
                row1_template_eval = normalize_judgement(row1['correct template'].strip())
                row1_command_eval = normalize_judgement(row1['correct command'].strip())
                row2_template_eval = normalize_judgement(row2['correct template'].strip())
                row2_command_eval = normalize_judgement(row2['correct command'].strip())
                sc_key = get_example_nl_key(current_desp)
                pred_cmd = row1['prediction'].strip()
                if not pred_cmd:
                    row1_template_eval, row1_command_eval = 'n', 'n'
                    row2_template_eval, row2_command_eval = 'n', 'n'
                pred_temp = data_tools.cmd2template(pred_cmd, loose_constraints=True)
                structure_example_key = '{}<NL_PREDICTION>{}'.format(sc_key, pred_temp)
                command_example_key = '{}<NL_PREDICTION>{}'.format(sc_key, pred_cmd)
                row3_template_eval, row3_command_eval = None, None
                if structure_example_key in sup_structure_eval:
                    row3_template_eval = sup_structure_eval[structure_example_key]
                if command_example_key in sup_command_eval:
                    row3_command_eval = sup_command_eval[command_example_key]
                if row1_template_eval != row2_template_eval or row1_command_eval != row2_command_eval:
                    if row1_template_eval != row2_template_eval:
                        if row3_template_eval is None:
                            print(pred_cmd_key, structure_example_key)
                        assert (row3_template_eval is not None)
                        template_eval = row3_template_eval
                    else:
                        template_eval = row1_template_eval
                    if row1_command_eval != row2_command_eval:
                        # if row3_command_eval is None:
                        #     print(command_example_key)
                        assert (row3_command_eval is not None)
                        command_eval = row3_command_eval
                    else:
                        command_eval = row1_command_eval
                else:
                    template_eval = row1_template_eval
                    command_eval = row1_command_eval
                if row3_template_eval is None:
                    row3_template_eval = ''
                if row3_command_eval is None:
                    row3_command_eval = ''
                o_f.write('"{}","{}","{}",{},{},{},{},{},{},{},{}\n'.format(
                    current_desp.replace('"', '""'), model_name, pred_cmd.replace('"', '""'),
                    template_eval, command_eval,
                    row1_template_eval, row1_command_eval,
                    row2_template_eval, row2_command_eval,
                    row3_template_eval, row3_command_eval))
    o_f.close()