Ejemplo n.º 1
0
def preliminary_preps():
    """Set up logger and read command line arguments"""
    # logger
    logfile_name = os.path.join(tc.LOGDIR, "run_%s.log" % prep.find_run_id())
    lgr, lfh = prep.set_log(__name__, logfile_name, False)
    # cl options
    clargs = set_option_parser()
    if clargs.tag is not None and clargs.tag:
        tc.TAG = True
    elif clargs is not None and not clargs.tag:
        tc.TAG = False
    #TODO: options below don't seem to be able to affect tc other than for writing to the cumulog
    elif clargs.maxdista is not None:
        tc.maxdista = clargs.maxdista
    elif clargs.distaw is not None:
        tc.distaw = clargs.distaw
    elif clargs.lmw is not None:
        tc.lmw = clargs.lmw
    return lgr, lfh, clargs
Ejemplo n.º 2
0
# -*- coding: utf-8 -*-
import codecs
from collections import defaultdict
import os
import re

import tnconfig as tc
import preparation as prep

# logging
logfile_name = os.path.join(tc.LOGDIR, "run_%s.log" % prep.find_run_id())
lgr, lfh = prep.set_log(__name__, logfile_name, False)

class EdScoreMatrix:
    """Methods to read cost matrix from module in arg cost_module
       and to find costs for individual character-edits."""

    def __init__(self, cost_module):
        self.costm = cost_module

    row_names = None
    col_names = None    
    matrix_conts = None
    accented_chars = [u'\xe1', u'\xe9', u'\xed', u'\xf1', u'\xf3', u'\xfa', u'\xfc']
    matrix_stats = {"max" : None, "min" : None, "ave" : None}

    def read_cost_matrix(self):
        """Read cost matrix into a hash. Set instance values for them"""
        row_names = self.costm.row_names.strip().split("\t")
        col_names = self.costm.col_names.strip().split("\t")
        costs = self.costm.costs