def testIndexDocument(self): text = u"""Le tartuffe, de Jean-Baptiste Poquelin, dit Molière. Le petit chat est mort.""" text = normalizeText(text) digest = sha.sha(text).hexdigest() cursor = self.cnx.cursor() # At this point, database should be emtpy, so no document # should match <digest> title = 'Le Tartuffe' matchingDocs = Document.selectWhere(cursor, document_id=digest) self.assertEquals(len(matchingDocs), 0) self.querier.indexDocument('0'*40, FutureDocument( filename='/tmp/Tartuffe.txt', title=title, text=text, fileSize=len(text), lastModificationTime=30000, content_hash=digest, mime_type='text', state=Document.PUBLISHED_STATE, file_state=FileInfo.CREATED_FILE_STATE)) matchingDocs = Document.selectWhere(cursor, document_id=digest) self.assertEquals(len(matchingDocs), 1) self.assertEquals(matchingDocs[0].text, '%s %s' % (title, text))
def testIndexDocument(self): text = u"""Le tartuffe, de Jean-Baptiste Poquelin, dit Molière. Le petit chat est mort.""" text = normalizeText(text) digest = sha.sha(text).hexdigest() cursor = self.cnx.cursor() # At this point, database should be emtpy, so no document # should match <digest> matchingDocs = Document.selectWhere(cursor, document_id=digest) self.assertEquals(len(matchingDocs), 0) self.querier.indexDocument('/tmp/Tartuffe.txt', 'Le Tartuffe', text, len(text), 30000, digest, 'text', Document.PUBLISHED_STATE, FileInfo.CREATED_FILE_STATE) matchingDocs = Document.selectWhere(cursor, document_id=digest) self.assertEquals(len(matchingDocs), 1) self.assertEquals(matchingDocs[0].text, text)