Ejemplo n.º 1
0
def scoreQueryWithWordOverlap(query, sqlWiz, question, questionTopWords, answersTopWords):

    # query: Str
    # questionTopWords: set(Str)
    # answersTopWords: set(Str)

    missingQueriesFile = open('missingQueries.txt', 'a')

    snippets = sqlWiz.getNSnippetsForQuery(query, -1)

    print('Found snippets for query :: %s' % query)
    print([s.snippet for s in snippets])
    if (len(snippets) == 0):
        missingQueriesFile.write('%s\n' % query)
        return (0, 0, 0, 0)

    snippets = Utils.filterOutDuplicateSnippets(snippets, question)
    snippets = snippets[:10]
    if len(snippets) == 0:
        return (0, 0, 0, 0)

    for s in snippets:
        print(s.snippet)

    print('\n**************\n')
    snippetTokens = [set(Utils.preprocessText(s.snippet).split()) for s in snippets]
    print(snippetTokens)

    queryTokens = set(query.split(' '))

    aveIntersectionWQuestion = Utils.averageSnippetIntersection(snippetTokens, questionTopWords, queryTokens)
    aveIntersectionWAnswers = Utils.averageSnippetIntersection(snippetTokens, answersTopWords, queryTokens)

    totalIntersectionWQuestion = Utils.totalSnippetIntersection(snippetTokens, questionTopWords, queryTokens)
    totalIntersectionWAnswers = Utils.totalSnippetIntersection(snippetTokens, answersTopWords, queryTokens)

    print((aveIntersectionWQuestion, aveIntersectionWAnswers, totalIntersectionWQuestion, totalIntersectionWAnswers))
    # input()
    return (aveIntersectionWQuestion, aveIntersectionWAnswers, totalIntersectionWQuestion, totalIntersectionWAnswers)
Ejemplo n.º 2
0
def scoreQueryWithCharNGramOverlap(query, sqlWiz, question, questionNGrams, answersNGrams, N):
    missingQueriesFile = open('missingQueries.txt', 'a')

    snippets = sqlWiz.getNSnippetsForQuery(query, -1)
    if (len(snippets) == 0):
        missingQueriesFile.write('%s\n' % query)
        return (0, 0, 0, 0)

    snippets = Utils.filterOutDuplicateSnippets(snippets, question)
    snippets = snippets[:10]
    if len(snippets) == 0:
        return (0, 0, 0, 0)

    snippetsNgrams = [set(constructNGramsForText(s.snippet, N)) for s in snippets]


    aveIntersectionWQuestion = Utils.averageSnippetIntersection(snippetsNgrams, questionNGrams, set([]))
    aveIntersectionWAnswers = Utils.averageSnippetIntersection(snippetsNgrams, answersNGrams, set([]))
    totalIntersectionWQuestion = Utils.totalSnippetIntersection(snippetsNgrams, questionNGrams, set([]))
    totalIntersectionWAnswers = Utils.totalSnippetIntersection(snippetsNgrams, answersNGrams, set([]))

    return (aveIntersectionWQuestion, aveIntersectionWAnswers, totalIntersectionWQuestion, totalIntersectionWAnswers)