Ejemplo n.º 1
0
def TLM(question, answer, word_dict, translation):
    q = tranfer_dict_vect(question)
    d = tranfer_dict_vect(answer)
    Pd = {}
    Pc = {}
    lbd = 0.2
    beta = 0.8
    s = 1.0
    for word in q.keys():
        Pd[word] = 0

        for w in d.keys():
            #if translation[word].has_key(w):
            #Pd[word] += translation[word][w] * float(d[w])/len(answer)
            t = translation.similarity(word, w)
            if t > 0:
                Pd[word] += t * float(d[w]) / len(answer)

        if word in d.keys():
            Pd[word] = Pd[word] * beta + (1 - beta) * float(
                d[word]) / len(answer)
        else:
            Pd[word] = Pd[word] * beta

        if word_dict.has_key(word):
            Pc[word] = word_dict[word][1]
        else:
            Pc[word] = 1.0 / 80000.0

        s *= (1 - lbd) * Pd[word] + lbd * Pc[word]
    return s
Ejemplo n.º 2
0
def VSM(question, answer, word_dict, translation):
    q = tranfer_dict_vect(question)
    d = tranfer_dict_vect(answer)

    wq = {}
    Wq = 0.0
    for word in q.keys():
        if word_dict.has_key(word):
            wq[word] = numpy.log(1 + Narticle / float(word_dict[word][0]))
            Wq += wq[word] * wq[word]
    Wq = numpy.sqrt(Wq)

    wd = {}
    Wd = 0
    for word in d.keys():
        wd[word] = 1 + numpy.log(d[word])
        Wd += wd[word] * wd[word]
    Wd = numpy.sqrt(Wd)

    s = 0.0
    for word in wq.keys():
        if word in wd.keys():
            s += wq[word] * wd[word]

    s = s / Wq / Wd

    return s
Ejemplo n.º 3
0
def LM(question, answer, word_dict, translation):
    q = tranfer_dict_vect(question)
    d = tranfer_dict_vect(answer)
    Pd = {}
    Pc = {}
    lbd = 0.2
    s = 1.0
    for word in q.keys():
        if word in d.keys():
            Pd[word] = float(d[word]) / len(answer)
        else:
            Pd[word] = 0
        if word_dict.has_key(word):
            Pc[word] = word_dict[word][1]
        else:
            Pc[word] = 1.0 / 80000.0

        s *= (1 - lbd) * Pd[word] + lbd * Pc[word]
    return s
Ejemplo n.º 4
0
def Okapi(question, answer, word_dict, translation):
    q = tranfer_dict_vect(question)
    d = tranfer_dict_vect(answer)
    k1 = 1.2
    b = 0.75

    wq = {}
    for word in q.keys():
        if word_dict.has_key(word):
            wq[word] = numpy.log((Narticle - word_dict[word][0] + 0.5) /
                                 (word_dict[word][0] + 0.5)) * q[word]

    wd = {}
    Kd = k1 * ((1 - b) + b * len(question) * 27)
    for word in d.keys():
        wd[word] = (k1 + 1) * d[word] / (Kd + d[word])

    s = 0.0
    for word in wq.keys():
        if word in wd.keys():
            s += wq[word] * wd[word]

    return s
Ejemplo n.º 5
0
db = shelve.open('database_30_50.dat')    
table = db['Table']
qa = db['qa']

translation = {}
word_count1 = {}
word_count2 = {}
n=0
for index in table:
    n+=1
    print n
    if n > 90000:
        break
    q,a = qa[index]
    qq = tranfer_dict_vect(q).keys()
    aa = tranfer_dict_vect(a).keys()
    for word0 in qq:
        for word1 in aa:
            if translation.has_key(word0):
                if translation[word0].has_key(word1):
                    translation[word0][word1] += 1.0
                else:
                    translation[word0][word1] = 1.0
            else:
                translation[word0] = {}
                translation[word0][word1] = 1.0
                
    for word in qq:
        if word_count1.has_key(word):
            word_count1[word] += 1
Ejemplo n.º 6
0
    try:
        answer = list(jieba.cut(article.answer[0][1]))
    except:
        continue
    tanswer = []
    tanswer1 = []
    for j in range(len(answer)):
        try:
            tanswer.append(model[answer[j].encode('utf-8')])
            tanswer1.append(answer[j].encode('utf-8'))
        except:
            pass
    answer = tanswer1
    la += len(answer)

    word_vect = tranfer_dict_vect(question + answer)
    for word in word_vect.keys():
        if word_dict.has_key(word):
            word_dict[word][0] += 1
            word_dict[word][1] += word_vect[word]
        else:
            word_dict[word] = [1, word_vect[word]]
        nword += word_vect[word]

    if (len(question) == 0) or (len(answer) == 0):
        continue
    if (len(question) > 30) or (len(answer) > 50):
        continue
    table50.append(index)
    qa[index] = (question, answer)
    words.extend(question)