def query(word, dick, invdick, freqdick): """Returns [(syn, prob.)] where `syn` is a synonym of `word` and `prob.` is its weight, i.e. how much we prefer it.""" global freq_offset # sim for similarities syn_sims = reweight(synonyms.query_word(word, dick, invdick)) # we should give any word a chance res_freqs = reweight({key: weight_func(val, freqdick.get(key, 0)) for key, val in syn_sims.iteritems()}) #return sorted(res_freqs.items(), lambda x: x[1], reverse=True) return res_freqs.items() # comment prev. line if we don't need to sort the result
`prob.` is its weight, i.e. how much we prefer it.""" global freq_offset # sim for similarities syn_sims = reweight(synonyms.query_word(word, dick, invdick)) # we should give any word a chance res_freqs = reweight({key: weight_func(val, freqdick.get(key, 0)) for key, val in syn_sims.iteritems()}) #return sorted(res_freqs.items(), lambda x: x[1], reverse=True) return res_freqs.items() # comment prev. line if we don't need to sort the result if __name__ == '__main__': (dick, invdick) = synonyms.read_wordbank('../data/wordbank.txt') syn_query_word = lambda word: synonyms.query_word(word, dick, invdick) word_freq_prefix = '../wordfreq/' wordfreq_files = [word_freq_prefix + fname for fname in enum_file_names()] words = dick.keys() def count(file_name, output_file = sys.stdout): freq_dick = word_freq.load_file(file_name) syn_freq_dick = {} for word in words: if word in freq_dick: syn_freq_dick[word] = freq_dick[word] print >>output_file, 'File name:', file_name