def process_file(path): with open(path, 'r+') as f: #json_objs = json.load(f,encoding='utf-8') #frequency = statistic_word_frequency_json(json_objs) print path frequency = Counter() reader = csv.reader(f) next(reader) for l in reader: line = Clean.clean_data(l[-1], Sign) frequency = statistic_word_frequency_from_str(line) + frequency return frequency
def generate_str_file(Dir, file): str_path = Dir + os.sep + 'clean/' if not os.path.isdir(str_path): os.mkdir(str_path) if os.path.isfile(str_path + 'str.txt'): cmd = raw_input("生成的文件已经存在,是否覆盖 y/n : ") if cmd == 'y': os.remove(str_path + 'str.txt') else: return str_path + 'str.txt' with open(Dir + os.sep + file, 'r+') as f: with open(str_path + 'str.txt', 'a+') as w: #json_objs = json.load(f,encoding='utf-8') #frequency = statistic_word_frequency_json(json_objs) print(Dir + os.sep + file) reader = csv.reader(f) next(reader) for l in reader: line = Clean.clean_data(l[-1], Sign) if line.strip() != '': w.writelines(line + '\n') return str_path + 'str.txt'