def scoreQueryWithWordOverlap(query, sqlWiz, question, questionTopWords, answersTopWords): # query: Str # questionTopWords: set(Str) # answersTopWords: set(Str) missingQueriesFile = open('missingQueries.txt', 'a') snippets = sqlWiz.getNSnippetsForQuery(query, -1) print('Found snippets for query :: %s' % query) print([s.snippet for s in snippets]) if (len(snippets) == 0): missingQueriesFile.write('%s\n' % query) return (0, 0, 0, 0) snippets = Utils.filterOutDuplicateSnippets(snippets, question) snippets = snippets[:10] if len(snippets) == 0: return (0, 0, 0, 0) for s in snippets: print(s.snippet) print('\n**************\n') snippetTokens = [set(Utils.preprocessText(s.snippet).split()) for s in snippets] print(snippetTokens) queryTokens = set(query.split(' ')) aveIntersectionWQuestion = Utils.averageSnippetIntersection(snippetTokens, questionTopWords, queryTokens) aveIntersectionWAnswers = Utils.averageSnippetIntersection(snippetTokens, answersTopWords, queryTokens) totalIntersectionWQuestion = Utils.totalSnippetIntersection(snippetTokens, questionTopWords, queryTokens) totalIntersectionWAnswers = Utils.totalSnippetIntersection(snippetTokens, answersTopWords, queryTokens) print((aveIntersectionWQuestion, aveIntersectionWAnswers, totalIntersectionWQuestion, totalIntersectionWAnswers)) # input() return (aveIntersectionWQuestion, aveIntersectionWAnswers, totalIntersectionWQuestion, totalIntersectionWAnswers)
def scoreQueryWithCharNGramOverlap(query, sqlWiz, question, questionNGrams, answersNGrams, N): missingQueriesFile = open('missingQueries.txt', 'a') snippets = sqlWiz.getNSnippetsForQuery(query, -1) if (len(snippets) == 0): missingQueriesFile.write('%s\n' % query) return (0, 0, 0, 0) snippets = Utils.filterOutDuplicateSnippets(snippets, question) snippets = snippets[:10] if len(snippets) == 0: return (0, 0, 0, 0) snippetsNgrams = [set(constructNGramsForText(s.snippet, N)) for s in snippets] aveIntersectionWQuestion = Utils.averageSnippetIntersection(snippetsNgrams, questionNGrams, set([])) aveIntersectionWAnswers = Utils.averageSnippetIntersection(snippetsNgrams, answersNGrams, set([])) totalIntersectionWQuestion = Utils.totalSnippetIntersection(snippetsNgrams, questionNGrams, set([])) totalIntersectionWAnswers = Utils.totalSnippetIntersection(snippetsNgrams, answersNGrams, set([])) return (aveIntersectionWQuestion, aveIntersectionWAnswers, totalIntersectionWQuestion, totalIntersectionWAnswers)