def save_tfidf_like(parl_counter,sort_tfidf_like, counter_list,tot_counter,counter_list_parl): dic = dict(sort_tfidf_like) f = open(dir_out+"tfidf_like_parametros.csv", 'w') f.write("palavra"+";"+"valor"+";"+"frequencia"+";"+"entropia maxima"+";"+"entropia da palvra"+";"+"prob_politica"+";"+"entropia entre deputados"+"\n") for word in parl_counter: f.write(word+";"+str(dic[word])+";"+ '%.4f'%(TfIdf.tf(word,parl_counter))+";"+ '%.4f'%(math.log2(len(counter_list)))+";"+ '%.4f'%(TfIdf.entropy(word,tot_counter,counter_list))+";"+ '%.4f'%(TfIdf.parl_prob(word,parl_counter,counter_list))+";"+ '%.4f'%(TfIdf.parl_entropy(word, tot_counter, counter_list_parl))+"\n") f.close()
def idf_like( word,parl_counter, tot_counter,doc_counter, counter_list_parl): return ((math.log2(len(doc_counter))-TfIdf.entropy(word,tot_counter,doc_counter)) *TfIdf.parl_prob(word,parl_counter,doc_counter)*TfIdf.parl_entropy(word, tot_counter, counter_list_parl))
def idf_pow( word,parl_counter, tot_counter,doc_counter, counter_list_parl,b1,b2): h_max = math.log2(len(doc_counter)) h_word = TfIdf.entropy(word,tot_counter,doc_counter) x = math.pow(2,h_word)/math.pow(2,h_max) return (expon.pdf(h_word,scale=0.2) *TfIdf.parl_prob(word,parl_counter,doc_counter)*beta.pdf(x,b1,b2))