コード例 #1
0
ファイル: views.py プロジェクト: xzr12/IR
def test(request):
    # input1 = "四川人喜欢吃麻拉火果"
    # list1 = splitWords(input1)
    # list2 = skipStopWords(list1, 0)
    # output1 = corrector_CH(list2)
    # list3 = splitWordsAfter(''.join(output1))
    # output = skipStopWords(list3, 0)
    # for i in output:
    #     print i
    #
    # docu, results = calcSimilarity(output, 0)
    # docu, results = calcSimilarity(['人类', '用户', '机器', '接口'], 0)
    # docu, results = calcSimilarity(['human', 'user', 'machine', 'interface'], 1)


    dictionary, commonDic = loadDic()
    input1 = "I cannot spelll a right arawee today good"
    list1 = splitWords(input1)
    list2 = skipStopWords(list1, 1)
    output = corrector_EN(list2, dictionary, commonDic)

    docu, results = calcSimilarity(output, 1)
    for i in range(10):
        print results[i][0] + ' : ' + str(results[i][1])
    print len(docu)
    print len(results)

    return HttpResponse('test query!')
コード例 #2
0
ファイル: views.py プロジェクト: xzr12/IR
def search(request):
    if request.method == 'GET':
        return render_to_response('indexQuery.html', {})

    input = request.POST.get('search_input', '')
    list1 = splitWords(input)
    if len(list1) == 0:
        return render_to_response('index.html', {'input': input, 'error': '没有找到相关的结果!'})

    timeBgein = clock()
    if list1[0].isalnum():
        type = 1
        dictionary, commonDic = loadDic()
        list2 = skipStopWords(list1, 1)
        output = corrector_EN(list2, dictionary, commonDic)
        b = ' '.join(output)
    else:
        type = 0
        list2 = skipStopWords(list1, 0)
        output1 = corrector_CH(list2)
        b = ''.join(output1)
        list3 = splitWordsAfter(b)
        output = skipStopWords(list3, 0)

    a = input.encode('utf-8')
    if a in b.encode('utf-8'):
        change = 0
    else:
        change = 1

    docu, results = calcSimilarity(output, type)
    timeout = clock() - timeBgein

    out = []
    for r in results:
        # print r[0] + ':' + str(r[1])
        documentName = r[0]
        if type == 0:
            file = open('content_sohu/'+documentName+'.txt').readlines()
            sep = 20
            #print documentName
        else:
            file = open('content_wiki/'+documentName+'.txt').readlines()
            sep = 20
        allContent = ' '.join(file)
        allContentNew = ' '.join(allContent.split('\n')).decode('utf-8')

        resStr = []
        #print documentName
        #print allContentNew
        for word in output:
            if word.decode('utf-8') in allContentNew:
                wordList = allContentNew.split(word.decode('utf-8'))
                #print len(wordList)
                for i in range(len(wordList)):
                    #print wordList[i]
                    if type == 0:
                        if i == 0:
                            resStr.append('...' + (wordList[i][(len(wordList[i])-sep-1):(len(wordList[i])-3)]).encode('utf-8'))
                        else:
                            resStr.append('<em>')
                            resStr.append(word)
                            resStr.append('</em>')
                            if i == len(wordList) - 1:
                                resStr.append((wordList[i][0:sep]).encode('utf-8') + '...')
                            else:
                                resStr.append((wordList[i][0:sep]).encode('utf-8') + '...' + (wordList[i][(len(wordList[i])-sep-1):(len(wordList[i])-1)]).encode('utf-8'))
                    else:
                        if i == 0:
                            resStr.append('...' + wordList[i][(len(wordList[i])-sep-1):(len(wordList[i])-1)])
                        else:
                            resStr.append('<em>')
                            resStr.append(word)
                            resStr.append('</em>')
                            if i == len(wordList) - 1:
                                resStr.append(wordList[i][0:sep] + '...')
                            else:
                                resStr.append(wordList[i][0:sep] + '...' + wordList[i][(len(wordList[i])-sep-1):(len(wordList[i])-1)])
                break
        if len(resStr) > 22:
            resStr = resStr[0:21]
        out.append({'docu': documentName, 'href': docu[documentName]['href'], 'title': docu[documentName]['title'], 'content': resStr})
    return render_to_response('index.html', {'input': input, 'output': b, 'outList': output, 'change': change, 'times': len(results), 'time': timeout, 'results': out})