Esempio n. 1
0
def load_word_freq(project):
    wfrq_cache = dict()
    allfrq_cache = dict()
    proj_name = get_project_name(project)
    try:
        with open(freq_get_file_name(proj_name), "r") as f:
            wfrq_cache = json.load(f)
    except Exception as e:
        print("json file empty: project %s" % e)
    try:
        with open(freq_get_file_name("all"), "r") as f:
            allfrq_cache = json.load(f)
    except Exception as e:
        print("json file empty: all %s" % e)
    return wfrq_cache, allfrq_cache
Esempio n. 2
0
def project_word_freq():
    allfrq = defaultdict(int)
    for path in get_project():
        print(path)
        data = get_word_frequency(path)
        for i in data:
            allfrq[i] += data[i]
    data = simplejson.dumps(allfrq,
                            indent=4,
                            item_sort_key=lambda i: (-i[1], i[0]))
    with open(freq_get_file_name("all"), "w") as f:
        f.write(data)
Esempio n. 3
0
def get_word_frequency(project=""):
    wfrq = defaultdict(int)
    for file in walk_dir(project):
        raw_text = get_text(file, get_all=True)
        clear_text = clean_text(raw_text)
        words = parse_words(clear_text)
        for word in words:
            wfrq[word.lower()] += 1
    proj_name = get_project_name(project)
    data = simplejson.dumps(wfrq,
                            indent=4,
                            item_sort_key=lambda i: (-i[1], i[0]))
    with open(freq_get_file_name(proj_name), "w") as f:
        f.write(data)
    return wfrq
Esempio n. 4
0
def project_word_freq():
    allfrq = defaultdict(int)
    with open("metadata/projects.txt", "r") as f:
        lines = f.readlines()
    for l in lines:
        path = l.strip()
        if path:
            print(path)
            data = get_word_frequency(path)
            for i in data:
                allfrq[i] += data[i]
    data = simplejson.dumps(allfrq,
                            indent=4,
                            item_sort_key=lambda i: (-i[1], i[0]))
    with open(freq_get_file_name("all"), "w") as f:
        f.write(data)