예제 #1
0
                       "--verbose",
                       help="switch verbose statistics mode on",
                       action="store_true")
args = argparser.parse_args()

##################################################################
# Main
unigram_prob = pickle.load(args.unigram_prob_file)
args.unigram_prob_file.close()
bigram_prob = pickle.load(args.bigram_prob_file)
args.bigram_prob_file.close()

esc_char = args.esc_char
skip_line = args.skip_line

foutput = AltFileOutput(encoding=args.encoding, flush=args.flush)
finput = AltFileInput(*args.files, print_func=foutput.fprint, errors='replace')
memory = Memory()

# unfortunately, rules for restoration of misspellings are currently hard-coded
# in the `misspellings.py` file
misspelling_restorer = MisspellingRestorer(unigram_prob, bigram_prob)

# iterate over input lines, skip empty and skip lines, pre-cache information
# about replacements
for line in finput:
    # print empty and skip lines unchanged
    if line == skip_line or not line:
        # check if memory is empty and print it otherwise
        print_mem()
        foutput.fprint(line)
예제 #2
0
        action="store_true")
    argparser.add_argument("--hirschberg", help="use Hirschberg algorithm for alignment (default)", \
                               action = 'store_true', default=True)
    argparser.add_argument("files", help = "input files in which equal and odd strings should be aligned", \
                               nargs = '*', type = argparse.FileType('r'), \
                               default = [sys.stdin])
    args = argparser.parse_args()
    # input/output encoding
    enc = args.encoding
    # determine which type of alignment is requested
    if args.needleman_wunsch:
        alignfunc = nw_align
    else:
        alignfunc = hb_align
    # establish Input/Output
    foutput = AltFileOutput(encoding=args.encoding)
    finput    = AltFileInput(*args.files, \
                              print_func = foutput.fprint, \
                              errors = "replace")

    # auxiliary variables
    line1 = line2 = ''
    oline1 = []
    oline2 = []
    alignment = []
    c_list = []
    c_i = c_len = 0
    fnr = 0

    # iterate over input lines
    for line in finput: