예제 #1
0
def uncertainty(orig, langin, trans, langout, change):
    # print 'UNCERTAINTY'
    t = make_string(trans)
    words = split_n_gramm(t)
    # print words
    sum_entropy = 0.0
    i = 0
    n = 5
    while i < len(words):
        try:
            if i + n < len(words):
                sum_entropy += log(
                    n_gramm_estimation(words[i:i + n], langout, n), 2)
            else:
                sum_entropy += log(
                    n_gramm_estimation(words[i:len(words)], langout,
                                       len(words) - i), 2)
            i += n
        except:
            sum_entropy += -99999

    sum_max_prob = 0.0
    e_log = sum_entropy
    # print 'Entropy = ', sum_entropy
    for (i, n_gramm) in enumerate(trans):
        try:
            t_gramm = Ngramm.objects.get(n_gramm=n_gramm, lang=langout)
            o_gramm = Ngramm.objects.get(n_gramm=orig[i], lang=langout)
            if change:
                sum_max_prob += log(
                    Translation.objects.get(orig=n_gramm,
                                            lang_orig=langout,
                                            trans=orig[i],
                                            lang_trans=langin).probability, 2)
            else:
                sum_max_prob += log(
                    Translation.objects.get(orig=orig[i],
                                            lang_orig=langin,
                                            trans=n_gramm,
                                            lang_trans=langout).probability, 2)
        except:
            # sum_frequence = -99999
            sum_max_prob += -99999


#    try:
#        e_log = log(sum_entropy, 2)
#    except:
#        e_log = -99999
    power = -1 * (e_log + sum_max_prob / len(trans))
    # print power
    if power > 10:
        # print 'a ', power
        return power
    # print 'b ',power
    return pow(2, power)
예제 #2
0
def cross_entropy(text, langout, size):
    sum = 0.0    
    text = make_string(text)
    words = split_n_gramm(text)
    # разбиваем на н-граммы высшего порядка
    # words = join_by_n(text, size)
    i = 0    
    while i < len(words):
        try:
            if i+size < len(words):
                sum += log(n_gramm_estimation(words[i:i+size], langout, size), 2)
            else:
                sum += log(n_gramm_estimation(words[i:len(words)], langout, len(words)-i), 2)               
        except:        
            sum += -99999    
        i += size
                       
    return sum / len(words)
예제 #3
0
def uncertainty(orig, langin, trans, langout, change):    
    # print 'UNCERTAINTY'
    t = make_string(trans)    
    words = split_n_gramm(t)  
    # print words
    sum_entropy = 0.0
    i = 0
    n = 5    
    while i < len(words):
        try:
            if i+n < len(words):
                sum_entropy += log(n_gramm_estimation(words[i:i+n], langout, n), 2)
            else:
                sum_entropy += log(n_gramm_estimation(words[i:len(words)], langout, len(words)-i), 2)
            i += n
        except:
            sum_entropy += -99999    
                   
    sum_max_prob = 0.0
    e_log = sum_entropy
    # print 'Entropy = ', sum_entropy
    for (i, n_gramm) in enumerate(trans):
        try:
            t_gramm = Ngramm.objects.get(n_gramm=n_gramm, lang=langout)
            o_gramm = Ngramm.objects.get(n_gramm=orig[i], lang=langout)
            if change:                                
                sum_max_prob += log(Translation.objects.get(orig=n_gramm, lang_orig=langout, trans=orig[i], lang_trans=langin).probability, 2)
            else:
                sum_max_prob += log(Translation.objects.get(orig=orig[i], lang_orig=langin, trans=n_gramm, lang_trans=langout).probability, 2)
        except:
            # sum_frequence = -99999
            sum_max_prob += -99999      
#    try:
#        e_log = log(sum_entropy, 2)        
#    except:
#        e_log = -99999
    power = -1*(e_log + sum_max_prob/len(trans))        
    # print power
    if power > 10:
        # print 'a ', power
        return power
    # print 'b ',power
    return pow(2, power)
예제 #4
0
def cross_entropy(text, langout, size):
    sum = 0.0
    text = make_string(text)
    words = split_n_gramm(text)
    # разбиваем на н-граммы высшего порядка
    # words = join_by_n(text, size)
    i = 0
    while i < len(words):
        try:
            if i + size < len(words):
                sum += log(
                    n_gramm_estimation(words[i:i + size], langout, size), 2)
            else:
                sum += log(
                    n_gramm_estimation(words[i:len(words)], langout,
                                       len(words) - i), 2)
        except:
            sum += -99999
        i += size

    return sum / len(words)
예제 #5
0
파일: models.py 프로젝트: BergerV/denotat
 def get_n(self):
     return len(split_n_gramm(self.n_gramm))