Example #1
0
def process_file(path):
    with open(path, 'r+') as f:
        #json_objs = json.load(f,encoding='utf-8')
        #frequency =  statistic_word_frequency_json(json_objs)
        print path
        frequency = Counter()
        reader = csv.reader(f)
        next(reader)
        for l in reader:
            line = Clean.clean_data(l[-1], Sign)
            frequency = statistic_word_frequency_from_str(line) + frequency
        return frequency
Example #2
0
def generate_str_file(Dir, file):
    str_path = Dir + os.sep + 'clean/'
    if not os.path.isdir(str_path):
        os.mkdir(str_path)
    if os.path.isfile(str_path + 'str.txt'):
        cmd = raw_input("生成的文件已经存在,是否覆盖 y/n : ")
        if cmd == 'y':
            os.remove(str_path + 'str.txt')
        else:
            return str_path + 'str.txt'

    with open(Dir + os.sep + file, 'r+') as f:
        with open(str_path + 'str.txt', 'a+') as w:

            #json_objs = json.load(f,encoding='utf-8')
            #frequency =  statistic_word_frequency_json(json_objs)
            print(Dir + os.sep + file)
            reader = csv.reader(f)
            next(reader)
            for l in reader:
                line = Clean.clean_data(l[-1], Sign)
                if line.strip() != '':
                    w.writelines(line + '\n')
    return str_path + 'str.txt'