Exemple #1
0
def save_tfidf_like(parl_counter,sort_tfidf_like, counter_list,tot_counter,counter_list_parl):
    dic = dict(sort_tfidf_like)
    f =  open(dir_out+"tfidf_like_parametros.csv", 'w')
    f.write("palavra"+";"+"valor"+";"+"frequencia"+";"+"entropia maxima"+";"+"entropia da palvra"+";"+"prob_politica"+";"+"entropia entre deputados"+"\n")
    for word in parl_counter:
        f.write(word+";"+str(dic[word])+";"+ '%.4f'%(TfIdf.tf(word,parl_counter))+";"+
             '%.4f'%(math.log2(len(counter_list)))+";"+ '%.4f'%(TfIdf.entropy(word,tot_counter,counter_list))+";"+
             '%.4f'%(TfIdf.parl_prob(word,parl_counter,counter_list))+";"+ '%.4f'%(TfIdf.parl_entropy(word, tot_counter, counter_list_parl))+"\n")
    f.close()
Exemple #2
0
def idf_like( word,parl_counter, tot_counter,doc_counter, counter_list_parl):
    return ((math.log2(len(doc_counter))-TfIdf.entropy(word,tot_counter,doc_counter))
        *TfIdf.parl_prob(word,parl_counter,doc_counter)*TfIdf.parl_entropy(word, tot_counter, counter_list_parl))
Exemple #3
0
def idf_pow( word,parl_counter, tot_counter,doc_counter, counter_list_parl,b1,b2):
        h_max = math.log2(len(doc_counter))
        h_word = TfIdf.entropy(word,tot_counter,doc_counter)
        x = math.pow(2,h_word)/math.pow(2,h_max)
        return (expon.pdf(h_word,scale=0.2)
            *TfIdf.parl_prob(word,parl_counter,doc_counter)*beta.pdf(x,b1,b2))