def freq_file():
    in_file_name = "realdata/freq_count/ivr_non_risk.csv"
    out_file_name = in_file_name + "_result"
    pattern_name = "realdata/freq_count/keyword_row"
    pattern_list =  read_list(pattern_name)
    pattern_dict = {}
    tries = Tries()
    tries.put_list(pattern_list)
    for token in pattern_list:
        pattern_dict[token] = 0

    for line in open(in_file_name):
        result = tries.search_line(line)
        for token in result:
            pattern_dict[token] += 1

    write_dict(pattern_dict,out_file_name)
 def __init__(self,key_in_name,doc_in_name,base_time):
     self.word_dict = {}
     self.freq_dict = {}
     self.tries = Tries()
     self.base_time = base_time
     key_list = list(get_word_set(key_in_name))
     self.tries.put_list(key_list)
     for line in open(doc_in_name):
         self.read_line(line)
     self.write_dict_to_db()
class CountFreq:
    def __init__(self,key_in_name,doc_in_name,base_time):
        self.word_dict = {}
        self.freq_dict = {}
        self.tries = Tries()
        self.base_time = base_time
        key_list = list(get_word_set(key_in_name))
        self.tries.put_list(key_list)
        for line in open(doc_in_name):
            self.read_line(line)
        self.write_dict_to_db()

    def read_line(self,line):
        #to prevent the duplicate count of a certain term
        key_set = set()
        previous_index = -1
        for chat_record in line.strip().split("|"):
            tokens = chat_record.strip().split(",")
            chat_time = tokens[0].strip()
            index = None

            try:
                index = from_timestamp_to_index(chat_time,self.base_time)
            except ValueError as error:
                print line, error.message
                continue

            chat_content = tokens[1].strip()
            result = self.tries.search_line(chat_content)
            if len(result) == 0:                continue

            contain_new_word = False
            for word in result:
                if word in key_set:             continue
                self.put_to_dict(word,index)
                key_set.add(word)
                contain_new_word = True

            # do not add a document for the same index
            if contain_new_word and previous_index != index:
                if index not in self.freq_dict:
                    self.freq_dict[index] = 1
                else:
                    self.freq_dict[index] += 1
            previous_index = index

    def put_to_dict(self,word,index):
        if word not in self.word_dict:
            self.word_dict[word] = {}

        if index not in self.word_dict[word]:
            self.word_dict[word][index] = 1
        else:
            self.word_dict[word][index] += 1

    def write_dict_to_db(self):
        conn = sqlite3.connect('realdata/application/time_series/time.db')
        conn.text_factory = str
        date = self.base_time[:10]
        for key in self.word_dict:
            conn.execute('insert into word_freq values(?,?,?)',(key,date,str(self.word_dict[key])))
        conn.commit()
        conn.close()