Beispiel #1
0
def query(word, dick, invdick, freqdick):
    """Returns [(syn, prob.)] where `syn` is a synonym of `word` and
    `prob.` is its weight, i.e. how much we prefer it."""
    global freq_offset

    # sim for similarities
    syn_sims = reweight(synonyms.query_word(word, dick, invdick))

    # we should give any word a chance
    res_freqs = reweight({key: weight_func(val, freqdick.get(key, 0))
                          for key, val in syn_sims.iteritems()})

    #return sorted(res_freqs.items(), lambda x: x[1], reverse=True)
    return res_freqs.items() # comment prev. line if we don't need to sort the result
Beispiel #2
0
    `prob.` is its weight, i.e. how much we prefer it."""
    global freq_offset

    # sim for similarities
    syn_sims = reweight(synonyms.query_word(word, dick, invdick))

    # we should give any word a chance
    res_freqs = reweight({key: weight_func(val, freqdick.get(key, 0))
                          for key, val in syn_sims.iteritems()})

    #return sorted(res_freqs.items(), lambda x: x[1], reverse=True)
    return res_freqs.items() # comment prev. line if we don't need to sort the result

if __name__ == '__main__':
    (dick, invdick) = synonyms.read_wordbank('../data/wordbank.txt')
    syn_query_word = lambda word: synonyms.query_word(word, dick, invdick)

    word_freq_prefix = '../wordfreq/'
    wordfreq_files = [word_freq_prefix + fname for fname in enum_file_names()]

    words = dick.keys()

    def count(file_name, output_file = sys.stdout):
        freq_dick = word_freq.load_file(file_name)

        syn_freq_dick = {}
        for word in words:
            if word in freq_dick:
                syn_freq_dick[word] = freq_dick[word]

        print >>output_file, 'File name:', file_name