Exemple #1
0
def char_level_file(input_file, amr_ext, sent_ext, pos, super_chars, coreference):
    '''Given an input file, put it in char-level format and write output'''
    if input_file.endswith(amr_ext):
        # File ends with AMR extension, do AMR char-level processing
        out_f = input_file.replace(amr_ext, '.char' + amr_ext)

        if super_chars:
            # Super characters get a different treatment
            print('AMR file, super characters')
            amr_lines = get_amr_lines(input_file)
            fixed_lines = get_fixed_lines(amr_lines, coreference)
            write_to_file(fixed_lines, out_f)
        else:
            print('AMR file, no super characters')
            # If there are no super character we can just process with sed
            os_call = 'sed -e "s/\ /+/g"  -e "s/./&\ /g" < {0} > {1}'.format(input_file, out_f)
            os.system(os_call)

    elif input_file.endswith(sent_ext):
        # File ends with sent ext, do sentence processing
        out_f = input_file.replace(sent_ext, '.char' + sent_ext)
        if pos:
            # POS-tagged files get a different treatment
            print('Sentence file, POS-tagged')
            lines = process_pos_tagged(input_file)
            write_to_file(lines, out_f)
        else:
            # Not POS-tagged, so we can just use sed
            print('Sentence file, not POS-tagged')
            os_call = 'sed -e "s/\ /+/g"  -e "s/./&\ /g" < {0} > {1}'.format(input_file, out_f)
            os.system(os_call)
Exemple #2
0
def restore_variables(input_file, filtered_amrs):
    '''Restore the removed variables for the pruned file'''
    # Write variable-less AMRs to file
    write_to_file(filtered_amrs, input_file + '.pruned_temp')
    # Then restore the AMR
    os.system('python3 restoreAMR/restore_amr.py -f {0} -o {1}'.format(input_file + '.pruned_temp', input_file + '.pruned'))
    # Remove temp file again
    os.system("rm {0}".format(input_file + '.pruned_temp'))
Exemple #3
0
def create_coref_indexing(input_file, output_ext, keep_wiki):
    '''Go from full AMR to one-line AMR without wiki with coreference indexed'''
    # Remove all Wiki instances
    amr_file_no_wiki = delete_wiki(input_file) if not keep_wiki else [x.rstrip() for x in open(input_file, 'r')]
    # Put everything on a single line, sent_file is empty
    single_amrs, _ = single_line_convert(amr_file_no_wiki, '')
    # Add the coference index we want
    repl_amrs = coreference_index(single_amrs)
    # Write output to file
    write_to_file(repl_amrs, input_file + output_ext)
Exemple #4
0
def var_free_amrs(input_file, out_ext, keep_wiki):
    '''Create variable-free AMRs and sentence files'''
    # Delete wiki link if wanted
    amr_no_wiki = delete_wiki(input_file) if not keep_wiki else [
        x.rstrip() for x in open(input_file, 'r')
    ]
    # Remove all variables by duplicating coreference nodes
    del_amrs = delete_amr_variables(amr_no_wiki)
    # Put AMR on single line and write output
    single_amrs, _ = single_line_convert(del_amrs, '')
    write_to_file(single_amrs, input_file + out_ext)
Exemple #5
0
def create_output(input_file, old_amrs, new_amrs, sent_amrs, double, amr_ext):
    '''Print output to the correct files - also keep no-var AMR'''
    permuted_amr, no_var_amr, sent_file, double_sent_file, double_amr_file = get_filenames(
        input_file, amr_ext)
    write_to_file(old_amrs, no_var_amr)
    write_to_file(new_amrs, permuted_amr)
    write_to_file(sent_amrs, sent_file)
    # Potentially we want to keep BOTH the original AMR and the best-permuted AMR
    if double:
        write_to_file(old_amrs + new_amrs, double_amr_file)
        write_to_file(sent_amrs + sent_amrs, double_sent_file)
Exemple #6
0
        for idx in range(len(var_list) - 1):
            for y in range(idx + 1, len(var_list)):
                # Match - we see a concept (var-value) we already saw before
                if var_list[idx][1] == var_list[y][1]:
                    replace_item = var_list[y][0] + ' / ' + var_list[y][1]
                    # The part that needs to be replaced should be present
                    if replace_item in line:
                        # Do the actual replacing here, e.g. replace :ARG1 (var / value) by :ARG refvar
                        new_line_replaced = re.sub(
                            r'\({0} / [^\(]*?\)'.format(var_list[y][0]),
                            ' ' + var_list[idx][0], new_line)
                        # Only do replacing if resulting AMR is valid
                        if new_line_replaced != new_line and valid_amr(
                                new_line_replaced):
                            new_line = new_line_replaced
        # Perhaps fix some weird tokenization issues
        new_line = new_line.replace('_ (', '_(').replace(') "', ')"')
        coref_amrs.append(new_line.strip())

    # Sanity check
    assert len(coref_amrs) == indx + 1
    return coref_amrs


if __name__ == '__main__':
    args = create_arg_parser()
    # Do main processing here
    coref_amrs = process_file(args.input_file)
    # Write results to output file
    write_to_file(coref_amrs, args.input_file + args.output_ext)
Exemple #7
0
                try:
                    # Variable coming up
                    if tokenized_line[count+3] == '/':
                        amr_string.append('\n' + num_tabs * '\t' + part)
                    # Variable coming, add newline here
                    elif variable_match(tokenized_line[count+1]):
                        amr_string.append('\n' + num_tabs * '\t' + part)
                    else:
                        amr_string.append(part)
                except:
                    amr_string.append(part)
            else:
                amr_string.append(part)

        original_line = reverse_tokenize(" ".join(amr_string))
        original_line = original_line.replace('_ (', '_(').replace(') "', ')"')
        fixed_amrs.append(original_line + '\n\n')
    return fixed_amrs


if __name__ == "__main__":
    args = create_arg_parser()
    fixed_amrs = reformat_amr(args.input_file)
    # Check if AMRs are valid, error if they're not
    if args.valid:
        for amr in fixed_amrs:
            if not valid_amr(amr):
                raise ValueError(amr)
    write_to_file(fixed_amrs, args.input_file + args.extension, extra_newline=True)

Exemple #8
0
        # Extra step to make sure digits are not added to arguments
        line = add_space_when_digit(line)

        # Restore variables here, also fix problems afterwards if there are any
        line = convert(line)

        # The digit problem might reoccur again here
        line = add_space_when_digit(line)

        # We did some hacky rewrites to make sure convert() didn't mess anything up
        # restore them in this step (polarity +, polite, etc)
        line = restore_rewrites(line)

        # Finally restore the coreference
        if args.coreference == 'index':
             # Replace the 'coref-' nodes with the reference
            line = add_coref(line)
        elif args.coreference == 'abs':
            # Replace absolute paths with reference here
            line = replace_absolute_paths(line, ref_dict)

        # Save the final line
        restored_lines.append(" ".join(line.strip().split()))

    # Print detailed results for the coreference methods
    if args.print_stats:
        print_coref_stats(args.coreference, replace_types, index_dict)

    # Write final output to file
    write_to_file(restored_lines, args.output_file)