def combine(src_path, save_path):
    file_list_dict, file_name_list = util.get_objdict_list(src_path, ".txt")
    for file_name, file_dict in file_list_dict.items():
        r_dict = dict()
        for place, num in file_dict.items():
            p_place = place.split(" ")[0]
            r_dict[p_place] = r_dict.get(p_place, 0) + num
        util.save_dict_list(r_dict,save_path+file_name)
import re
import tool.util as util

key_list = util.get_key_list()

dict_dir = r"D:\semantic analysis\2016-10-09结果\词频1//"
for key in key_list:
    print(key)
    set_dict, file_name = util.get_objdict_list(dict_dir + key, ".txt")
    date_list = util.get_file_list(dict_dir + key, ".txt")
    pattern = re.compile(r"(\d*-\d*)-\d*")
    month_array = pattern.findall(" ".join(date_list))
    month_array = ["2010"]

    util.create_directory(r"D:\semantic analysis\2016-10-12结果\2010年频数统计//" +
                          key)

    # 循环查找月份
    for month in month_array:
        pattern = re.compile(r"(" + month + "-\d*-\d*)")
        date_array = pattern.findall(" ".join(date_list))
        print(date_array)
        # 循环合并月份频数字典
        r_dict = dict()
        for file_date in date_array:
            r_dict = util.union_dicts(set_dict[file_date + ".txt"], r_dict)
        util.save_dict_list(
            r_dict,
            r"D:\semantic analysis\2016-10-12结果\2010年频数统计//" + key + ".txt")
import re
import tool.util as util

key_list = util.get_key_list2()

dict_dir = r"D:\semantic analysis\2016-10-09结果\词频1//"
r_dict = dict()
for key in key_list:
    print(key)
    set_dict = util.get_obj_list(dict_dir + key, ".txt")
    for file_date in set_dict:
        r_dict = util.union_dicts(file_date, r_dict)

util.save_dict_list(r_dict, r"D:\百度指数截屏//常用词总频率统计" + ".txt")
"2011-10-16",
"2012-02-16",
"2012-07-09",
"2012-11-19"]
             ]
py_list = ["zp","dd","sz","dr","ms","fh"]

date_list_list = [["2010-06-01",
"2011-01-21",
"2011-07-09",
"2011-11-21"]]
py_list = ["tc"]
root_path = r"D:\semantic analysis\用户信息\dict//"
i = 0
while i < len(py_list):
    py = py_list[i]
    date_list = date_list_list[i]
    i += 1
    for dd in date_list:
        user_id_list = extract_user_id(py,dd)
        place_list = []
        place_dict = dict()
        if user_id_list:
            for user_id in user_id_list:
                place = get_place(user_id)
                if place:
                    place_list.append(place)
            place_dict = dict((a, place_list.count(a)) for a in place_list)
            util.create_directory(root_path+py)
            util.save_dict_list(place_dict, root_path+py+"//"+dd+".txt")
Exemple #5
0
import tool.util as util

# 从每个dict——txt文件里面统计词频率
dict_path = r"D:\semantic analysis\结果\去重频数//"
result_path = r"D:\semantic analysis\结果\去重频率//"

keyword_list = util.get_key_list2() + util.get_key_list()

for key in keyword_list:
    print(key)
    r_dict, file_name_list = util.get_objdict_list(dict_path + key, ".txt")
    for (k, word_dict) in r_dict.items():
        sum = 0
        r_f_dict = {}
        if key in word_dict:
            word_dict.pop(key)
        for word, value in word_dict.items():
            sum += int(value)
        for word, value in word_dict.items():
            ratio = value / sum
            r_f_dict[word] = ratio
        util.create_directory(result_path + key + "//")
        util.save_dict_list(r_f_dict, result_path + key + "//" + k)
Exemple #6
0
# 统计所有关键词出现的文件数


def count_word(word_set, word_dict):
    for word in word_set:
        if word not in word_dict:
            word_dict[word] = 1
        else:
            word_dict[word] = word_dict[word] + 1


def count_word_from_dict(new_dict, word_dict):
    for word, v in new_dict.items():
        if word not in word_dict:
            word_dict[word] = 1
        else:
            word_dict[word] = word_dict[word] + 1


dir = r"D:\semantic analysis\2016-10-09结果\词频月//"
key_list = util.get_key_list()

for key in key_list:
    os.chdir(dir+key)
    print(key)
    file_list = util.get_obj_list(dir+key,".txt")
    r_word_dict = dict()
    for file in file_list:
        count_word_from_dict(file, r_word_dict)
    util.save_dict_list(r_word_dict,r"D:\semantic analysis\2016-10-12结果\总频数月//"+key+".txt")