Esempio n. 1
0
def freq_file():
    in_file_name = "realdata/freq_count/ivr_non_risk.csv"
    out_file_name = in_file_name + "_result"
    pattern_name = "realdata/freq_count/keyword_row"
    pattern_list =  read_list(pattern_name)
    pattern_dict = {}
    tries = Tries()
    tries.put_list(pattern_list)
    for token in pattern_list:
        pattern_dict[token] = 0

    for line in open(in_file_name):
        result = tries.search_line(line)
        for token in result:
            pattern_dict[token] += 1

    write_dict(pattern_dict,out_file_name)
Esempio n. 2
0
def cal_chi(self_dict,other_dict,self_num,other_num,out_name=None):
    chi_dict = dict()
    for key in self_dict:
        a = self_dict[key]  #self_freq
        b = 0
        if key in other_dict:
            b = other_dict[key] #other_freq
        c = self_num  -  a # self_absent
        d = other_num - b #other_absent

        nominator = (a*d-b*c)**2 * (self_num + other_num)
        denominator = (a + b) * ( c + d ) * (a + c) * (b + d)
        if denominator == 0:
            continue
        value = 1.0 * nominator / denominator
        chi_dict[key] = value
        if out_name is not None:
            write_dict(chi_dict,out_name)
    return chi_dict