def render_prevset_url(self, context, data): words = WORDS_RGX.findall( normalizeText(unicode(context.arg('words'), 'utf-8'))) offset = int(context.arg('offset', 0)) if offset: offset -= 15 return 'search?words=%s&offset=%s' % ('+'.join(words), offset)
def _updateScores(self, cursor, db_document_id, text): # insert or update in table document_score db_scores = self._getScoresDict(cursor, db_document_id) doc_scores = {} # We update the document_score table only for the first # occurence of the word in the document for match in WORDS_RGX.finditer(normalizeText(text)): word = match.group(0) if word in doc_scores: continue doc_scores[word] = 0 position = match.start() if word in db_scores: if db_scores[word].position != position: db_scores[word].position = position db_scores[word].commit(cursor, update=True) else: # insert a row in the Word table if required self._ensureWordInDatabase(cursor, word) db_score = DocumentScore(db_document_id=db_document_id, word=word, position=position, download_count=0., relevance=0., popularity=0.) db_score.commit(cursor, update=False)
def _updateScores(self, cursor, db_document_id, text): # insert or update in table document_score db_scores = self._getScoresDict(cursor, db_document_id) doc_scores = {} # We update the document_score table only for the first # occurence of the word in the document for match in WORDS_RGX.finditer(normalizeText(text)): word = match.group(0) if word in doc_scores: continue doc_scores[word] = 0 position = match.start() if word in db_scores : if db_scores[word].position != position: db_scores[word].position = position db_scores[word].commit(cursor, update=True) else: # insert a row in the Word table if required self._ensureWordInDatabase(cursor, word) db_score = DocumentScore(db_document_id=db_document_id, word=word, position=position, download_count=0., relevance=0., popularity=0.) db_score.commit(cursor, update = False)
def findDocuments(self, query): """Find all indexed documents matching the query""" words = WORDS_RGX.findall(normalizeText(query)) self._updateQueryStatistics(words) try: cursor = self._cnx.cursor() return Document.selectContaining(cursor, words) finally: cursor.close()
def findDocuments(self, query): """Find all indexed documents matching the query""" # TODO: order results using document_scores information words = WORDS_RGX.findall(normalizeText(unicode(query.words))) self._updateQueryStatistics(words) try: cursor = self._cnx.cursor() return Document.selectContaining(cursor, words, query.filetype, query.offset, self.searchInPrivate) finally: cursor.close()
def notifyDownload(self, db_document_id, query): words = WORDS_RGX.findall(normalizeText(query)) try: try: cursor = self._cnx.cursor() doc = Document.selectWhere(cursor, db_document_id=db_document_id)[0] finally: cursor.close() self._updateDownloadStatistics(doc, words) return doc.url except IndexError: return ''
def render_nextset_url(self, context, data): words = WORDS_RGX.findall(normalizeText(unicode(context.arg('words'), 'utf-8'))) offset = int(context.arg('offset', 0)) + 15 return 'search?words=%s&offset=%s' % ('+'.join(words), offset)