예제 #1
0
    def indexDocument(self, filename, title, text, fileSize, lastModifiedOn,
                      content_hash, mime_type, state, file_state):
        """Inserts or update information in table documents,
        file_info, document_score and word"""
        # XXX Decide if we can compute the content_hash and mime_type
        # ourselves or if the indexer should do it and pass the values as an argument
        cursor = self._cnx.cursor()
        # insert or update in table file_info
        fileinfo = FileInfo.selectWhere(cursor, file_name=filename)
        if fileinfo:
            fileinfo = fileinfo[0]
            fileinfo.file_time = lastModifiedOn
            fileinfo.state = state
            fileinfo.file_state = file_state
            doc = Document.selectWhere(cursor,
                                       db_document_id=fileinfo.db_document_id)
            if not doc or doc[0].document_id != content_hash:
                # no document was found or a document with another content
                # in both case, we create a new Document in database
                # (we don't want to modify the existing one, because it
                # can be shared by several files)
                doc = self._createDocument(cursor, content_hash, title, text,
                                           fileSize, lastModifiedOn, filename,
                                           state)
                fileinfo.db_document_id = doc.db_document_id
            else:
                # document has not changed
                doc = doc[0]

            fileinfo.commit(cursor, update=True)

        else:
            # file unknown
            # try to find a Document with same hash value
            doc = Document.selectWhere(cursor, document_id=content_hash)
            if doc:
                doc = doc[0]
                doc.state = state
                doc.publication_time = max(doc.publication_time,
                                           lastModifiedOn)
                doc.commit(cursor, update=True)
            else:
                doc = self._createDocument(cursor, content_hash, title, text,
                                           fileSize, lastModifiedOn, filename,
                                           state)
                doc = Document.selectWhere(cursor, document_id=content_hash)[0]

            fileinfo = FileInfo(db_document_id=doc.db_document_id,
                                file_name=filename,
                                file_time=lastModifiedOn,
                                state=state,
                                file_state=file_state)
            fileinfo.commit(cursor, update=False)

        self._updateScores(cursor, doc.db_document_id, text)
        cursor.close()
        self._cnx.commit()
예제 #2
0
    def indexDocument(self, nodeId, futureDoc):
        """Inserts or update information in table documents,
        file_info, document_score and word"""
        # XXX Decide if we can compute the content_hash and mime_type
        # ourselves or if the indexer should do it and
        # pass the values as an argument
        cursor = self._cnx.cursor()
        # insert or update in table file_info
        fileinfo = FileInfo.selectWhere(cursor, file_name=futureDoc.filename)
        # insert title into text to be able to find documents according
        # to their title (e.g: searching 'foo' should find 'foo.pdf')
        futureDoc.text = '%s %s' % (futureDoc.title, futureDoc.text)
        if fileinfo:
            fileinfo = fileinfo[0]
            fileinfo.file_time = futureDoc.lastModificationTime
            fileinfo.state = futureDoc.state
            fileinfo.file_state = futureDoc.file_state
            doc = Document.selectWhere(cursor,
                                       db_document_id=fileinfo.db_document_id)
            if not doc or doc[0].document_id!=futureDoc.content_hash :
                # no document was found or a document with another content
                # in both case, we create a new Document in database
                # (we don't want to modify the existing one, because it
                # can be shared by several files)
                doc = self._createDocument(cursor, futureDoc)
                fileinfo.db_document_id = doc.db_document_id
            else:
                # document has not changed
                doc = doc[0]
                if doc.state != futureDoc.state:
                    doc.state = futureDoc.state
                    doc.commit(cursor, update=True)
                
            fileinfo.commit(cursor, update=True)
                
        else:
            # file unknown
            # try to find a Document with same hash value
            doc = Document.selectWhere(cursor,
                                       document_id=futureDoc.content_hash)
            if doc:
                doc = doc[0]
                doc.state = futureDoc.state
                doc.publication_time = max(doc.publication_time,
                                           futureDoc.lastModificationTime)
                doc.commit(cursor, update=True)
            else:
                doc = self._createDocument(cursor, futureDoc)
                doc = Document.selectWhere(cursor, document_id=futureDoc.content_hash)[0]

            fileinfo = FileInfo(db_document_id=doc.db_document_id,
                                file_name=futureDoc.filename,
                                file_time=futureDoc.lastModificationTime,
                                state=futureDoc.state,
                                file_state=futureDoc.file_state)
            fileinfo.commit(cursor, update=False)

        self._updateScores(cursor, doc.db_document_id, futureDoc.text)
        provider = DocumentProvider.selectOrInsertWhere(cursor,
                                          db_document_id=doc.db_document_id,
                                          node_id=nodeId)[0]
        provider.last_providing_time = int(time.time())
        provider.commit(cursor, update=True)
        node = Node.selectWhere(cursor, node_id=nodeId)[0]
        node.last_seen_time = int(time.time())
        node.commit(cursor, update=True)
        cursor.close()
        self._cnx.commit()