def good_turing_interpolation(): bins_tri = {} bins_bi = {} bins_uni = {} for key, value in sorted(trigram_dict.iteritems()): if value in bins_tri: bins_tri[value] += 1 else: bins_tri[value] = 1 for key, value in sorted(bigram_dict.iteritems()): if value in bins_bi: bins_bi[value] += 1 else: bins_bi[value] = 1 for key, value in sorted(unigram_dict.iteritems()): if value in bins_uni: bins_uni[value] += 1 else: bins_uni[value] = 1 f = open("Hindi.txt", "r") i = 1 print "Probabilities" for line in f.readlines(): final_prob = float(0) probability = float(1) lis_tokens, dic_tokens = trigram_generator(line) for token in lis_tokens: probability = probability * float(calculate_prob(token, bins_tri, trigram_dict, trigram_len)) final_prob += 0.5 * probability probability = float(1) lis_tokens, dic_tokens = bigram_generator(line) for token in lis_tokens: probability = probability * float(calculate_prob(token, bins_bi, bigram_dict, bigram_len)) final_prob += 0.3 * probability probability = float(1) lis_tokens, dic_tokens = unigram_generator(line) for token in lis_tokens: probability = probability * float(calculate_prob(token, bins_uni, unigram_dict, unigram_len)) final_prob += 0.2 * probability print "line", i, ":", final_prob i = i + 1
def good_turing_interpolation(): bins_tri = {} bins_bi = {} bins_uni = {} for key, value in sorted(trigram_dict.iteritems()): if value in bins_tri : bins_tri[value] += 1 else: bins_tri[value] = 1 #print bins_tri for key, value in sorted(bigram_dict.iteritems()): if value in bins_bi : bins_bi[value] += 1 else: bins_bi[value] = 1 for key, value in sorted(unigram_dict.iteritems()): if value in bins_uni : bins_uni[value] += 1 else: bins_uni[value] = 1 op = open('Hindi.txt','r') for line in op.readlines(): prob = 1 prob_add = 0 [new_token, tokendictionary1] = add_tri(line) for tokens_tri in new_token: prob = prob * float(calculate_good_next(bins_tri, tokens_tri, trigram_len, trigram_dict)) #print prob prob_add = 0.5*prob prob = 1 [new_token, tokendictionary1] = add_bi(line) for tokens_bi in new_token: prob = prob * float(calculate_good_next(bins_bi, tokens_bi, bigram_len, bigram_dict)) #print prob prob_add += 0.3*prob prob = 1 [new_token, tokendictionary1] = add_uni(line) for tokens_uni in new_token: prob = prob * float(calculate_good_next(bins_uni, tokens_uni, unigram_len, unigram_dict)) prob_add += 0.2*prob print prob_add