def test_authentication(self): Index.create(name='idx') app.config['AUTHENTICATION'] = 'test' resp = self.app.get('/') self.assertEqual(resp.status_code, 401) self.assertEqual(resp.data, 'Invalid API key') resp = self.app.get('/?key=tesss') self.assertEqual(resp.status_code, 401) resp = self.app.get('/', headers={'key': 'tesss'}) self.assertEqual(resp.status_code, 401) resp = self.app.get('/?key=test') self.assertEqual(resp.status_code, 200) self.assertEqual(json.loads(resp.data)['indexes'], [{ 'id': 1, 'name': 'idx', 'document_count': 0, 'documents': '/idx/' }]) resp = self.app.get('/', headers={'key': 'test'}) self.assertEqual(resp.status_code, 200) self.assertEqual(json.loads(resp.data)['indexes'], [{ 'id': 1, 'name': 'idx', 'document_count': 0, 'documents': '/idx/' }])
def test_index_list(self): for i in range(3): Index.create(name='i%s' % i) response = self.app.get('/') data = json.loads(response.data) self.assertEqual(data['indexes'], [ { 'document_count': 0, 'documents': '/i0/', 'id': 1, 'name': 'i0' }, { 'document_count': 0, 'documents': '/i1/', 'id': 2, 'name': 'i1' }, { 'document_count': 0, 'documents': '/i2/', 'id': 3, 'name': 'i2' }, ])
def setUp(self): super(TestSearch, self).setUp() self.app = app.test_client() self.index = Index.create(name='default') Index.create(name='unused-1') Index.create(name='unused-2') app.config['AUTHENTICATION'] = None
def test_index_document(self): idx_a = Index.create(name='idx-a') idx_b = Index.create(name='idx-b') response = self.post_json('/documents/', { 'content': 'doc 1', 'index': 'idx-a', 'metadata': {'k1': 'v1', 'k2': 'v2'}}) self.assertEqual(response, { 'attachments': '/documents/1/attachments/', 'content': 'doc 1', 'id': 1, 'identifier': None, 'indexes': ['idx-a'], 'metadata': {'k1': 'v1', 'k2': 'v2'}}) response = self.post_json('/documents/', { 'content': 'doc 2', 'indexes': ['idx-a', 'idx-b']}) self.assertEqual(response, { 'attachments': '/documents/2/attachments/', 'content': 'doc 2', 'id': 2, 'identifier': None, 'indexes': ['idx-a', 'idx-b'], 'metadata': {}})
def test_multi_index(self): """ Test that documents can be stored in multiple indexes. """ self.index.delete_instance() indexes = [Index.create(name='idx-%s' % i) for i in range(3)] document = Document.create(content='hueybear') for index in indexes: index.index( document.content, document) self.assertEqual(Document.select().count(), 1) self.assertEqual(Index.select().count(), 3) self.assertEqual(IndexDocument.select().count(), 3) query = (IndexDocument .select(Index.name, IndexDocument.document) .join(Index) .order_by(Index.name) .dicts()) idx_doc_data = [idx_doc for idx_doc in query] self.assertEqual(idx_doc_data, [ {'document_id': document.get_id(), 'name': 'idx-0'}, {'document_id': document.get_id(), 'name': 'idx-1'}, {'document_id': document.get_id(), 'name': 'idx-2'}, ])
def test_document_detail_delete(self): idx = Index.create(name='idx') alt_idx = Index.create(name='alt-idx') d1 = idx.index('doc 1', k1='v1', k2='v2') d2 = idx.index('doc 2', k3='v3') d2.attach('foo.jpg', 'bar') alt_idx.add_to_index(d1) alt_idx.add_to_index(d2) self.assertEqual(Metadata.select().count(), 3) self.assertEqual(Attachment.select().count(), 1) response = self.app.delete('/documents/%s/' % d2.get_id()) data = json.loads(response.data) self.assertEqual(data, {'success': True}) self.assertEqual(Metadata.select().count(), 2) self.assertEqual(Attachment.select().count(), 0) response = self.app.delete('/documents/%s/' % d2.get_id()) self.assertEqual(response.status_code, 404) self.assertEqual(Document.select().count(), 1) self.assertEqual(IndexDocument.select().count(), 2) self.assertEqual( [d.get_id() for d in idx.documents], [d1.get_id()]) self.assertEqual( [d.get_id() for d in alt_idx.documents], [d1.get_id()])
def test_document_detail_delete(self): idx = Index.create(name='idx') alt_idx = Index.create(name='alt-idx') d1 = idx.index('doc 1', k1='v1', k2='v2') d2 = idx.index('doc 2', k3='v3') alt_idx.add_to_index(d1) alt_idx.add_to_index(d2) self.assertEqual(Metadata.select().count(), 3) response = self.app.delete('/documents/%s/' % d2.get_id()) data = json.loads(response.data) self.assertEqual(data, {'success': True}) self.assertEqual(Metadata.select().count(), 2) response = self.app.delete('/documents/%s/' % d2.get_id()) self.assertEqual(response.status_code, 404) self.assertEqual(Document.select().count(), 1) self.assertEqual(IndexDocument.select().count(), 2) self.assertEqual( [d.get_id() for d in idx.documents], [d1.get_id()]) self.assertEqual( [d.get_id() for d in alt_idx.documents], [d1.get_id()])
def test_multi_index(self): """ Test that documents can be stored in multiple indexes. """ self.index.delete_instance() indexes = [Index.create(name='idx-%s' % i) for i in range(3)] document = Document.create(content='hueybear') for index in indexes: index.index( document.content, document) self.assertEqual(Document.select().count(), 1) self.assertEqual(Index.select().count(), 3) self.assertEqual(IndexDocument.select().count(), 3) query = (IndexDocument .select(Index.name, IndexDocument.document) .join(Index) .order_by(Index.name) .dicts()) idx_doc_data = [idx_doc for idx_doc in query] self.assertEqual(idx_doc_data, [ {'document': document.rowid, 'name': 'idx-0'}, {'document': document.rowid, 'name': 'idx-1'}, {'document': document.rowid, 'name': 'idx-2'}, ])
def test_index_list(self): for i in range(3): Index.create(name='i%s' % i) response = self.app.get('/') data = json.loads(response.data) self.assertEqual(data['indexes'], [ {'documents': 0, 'id': 1, 'name': 'i0'}, {'documents': 0, 'id': 2, 'name': 'i1'}, {'documents': 0, 'id': 3, 'name': 'i2'}, ])
def main(argv=sys.argv): log.info("Environment setup in progress.") args = parse_args(argv) setup_logging(args.config_uri) slices = json.loads(args.slicing) log.info("Calling scout.Index, please hold on.") idx = Index(args.corpus, args.database, slices) log.info("Registering index, this may take some time.") idx.register() log.info(f"Index registration completed. (sqlite :{args.database})")
def test_index_detail(self): idx_a = Index.create(name='idx-a') idx_b = Index.create(name='idx-b') for i in range(11): idx_a.index('document-%s' % i, foo='bar-%s' % i) b_doc = idx_b.index('both-doc') idx_a.index(b_doc.content, b_doc) response = self.app.get('/idx-a/') data = json.loads(response.data) self.assertEqual(data['page'], 1) self.assertEqual(data['pages'], 2) self.assertEqual(len(data['documents']), 10) doc = data['documents'][0] self.assertEqual( doc, { 'attachments': '/documents/1/attachments/', 'content': 'document-0', 'id': 1, 'identifier': None, 'indexes': ['idx-a'], 'metadata': { 'foo': 'bar-0' } }) response = self.app.get('/idx-a/?page=2') data = json.loads(response.data) self.assertEqual(data['page'], 2) self.assertEqual(data['pages'], 2) self.assertEqual(len(data['documents']), 2) response = self.app.get('/idx-b/') data = json.loads(response.data) self.assertEqual(data['page'], 1) self.assertEqual(data['pages'], 1) self.assertEqual(len(data['documents']), 1) doc = data['documents'][0] self.assertEqual( doc, { 'attachments': '/documents/12/attachments/', 'content': 'both-doc', 'id': 12, 'identifier': None, 'indexes': ['idx-b', 'idx-a'], 'metadata': {} })
def test_search(self): idx = Index.create(name='idx') phrases = ['foo', 'bar', 'baz', 'nug nugs', 'blah nuggie foo', 'huey', 'zaizee'] for phrase in phrases: idx.index('document %s' % phrase, special=True) for i in range(10): idx.index('document %s' % i, special=False) response = self.search('idx', 'docum*') self.assertEqual(response['page'], 1) self.assertEqual(response['pages'], 2) self.assertEqual(len(response['documents']), 10) response = self.search('idx', 'document', 2) self.assertEqual(len(response['documents']), 7) response = self.search('idx', 'doc* nug*') self.assertEqual(response['page'], 1) self.assertEqual(response['pages'], 1) self.assertEqual(len(response['documents']), 2) doc1, doc2 = response['documents'] self.assertEqual(doc1, { 'content': 'document nug nugs', 'id': doc1['id'], 'identifier': None, 'indexes': ['idx'], 'metadata': {'special': 'True'}, 'score': doc1['score']}) if IS_FTS5: self.assertEqual(round(doc1['score'], 4), -2.2675) else: self.assertEqual(round(doc1['score'], 4), -0.) self.assertEqual(doc2, { 'content': 'document blah nuggie foo', 'id': doc2['id'], 'identifier': None, 'indexes': ['idx'], 'metadata': {'special': 'True'}, 'score': doc2['score']}) if IS_FTS5: self.assertEqual(round(doc2['score'], 4), -1.3588) else: self.assertEqual(round(doc2['score'], 4), -0.) response = self.search('idx', 'missing') self.assertEqual(len(response['documents']), 0) response = self.search('idx', 'nug', ranking='bm25') doc = response['documents'][0] self.assertEqual(doc['content'], 'document nug nugs') if IS_FTS5: self.assertEqual(round(doc['score'], 3), -2.98) else: self.assertEqual(round(doc['score'], 3), -2.891)
def test_search_filters(self): idx = Index.create(name='idx') data = ( ('huey document', {'name': 'huey', 'kitty': 'yes'}), ('zaizee document', {'name': 'zaizee', 'kitty': 'yes'}), ('little huey bear', {'name': 'huey', 'kitty': 'yes'}), ('uncle huey', {'kitty': 'no'}), ('michael nuggie document', {'name': 'mickey', 'kitty': 'no'}), ) for content, metadata in data: idx.index(content, **metadata) def assertResults(query, metadata, expected): results = self.search('idx', query, **metadata) content = [document['content'] for document in results['documents']] self.assertEqual(content, expected) assertResults( 'huey', {}, ['huey document', 'little huey bear', 'uncle huey']) assertResults( 'huey', {'kitty': 'yes'}, ['huey document', 'little huey bear']) assertResults( 'huey', {'kitty': 'yes', 'name': 'huey'}, ['huey document', 'little huey bear']) assertResults( 'docu*', {'kitty': 'yes'}, ['huey document', 'zaizee document'])
def test_document_detail_update_attachments(self): idx = Index.create(name='idx') doc = idx.index('test doc', foo='bar', nug='baze') doc.attach('foo.jpg', 'empty') url = '/documents/%s/' % doc.get_id() json_data = json.dumps({'content': 'test doc-edited'}) response = self.app.post(url, data={ 'data': json_data, 'file_0': (StringIO('xx'), 'foo.jpg'), 'file_1': (StringIO('yy'), 'foo2.jpg')}) resp_data = json.loads(response.data) self.assertEqual(resp_data, { 'attachments': '/documents/1/attachments/', 'content': 'test doc-edited', 'id': 1, 'identifier': None, 'indexes': ['idx'], 'metadata': {'foo': 'bar', 'nug': 'baze'}}) self.assertEqual(Attachment.select().count(), 2) self.assertEqual(BlobData.select().count(), 3) # Existing file updated, new file added. foo, foo2 = Attachment.select().order_by(Attachment.filename) self.assertEqual(foo.blob.data, 'xx') self.assertEqual(foo2.blob.data, 'yy')
def test_index_document_validation(self): idx = Index.create(name='idx') response = self.post_json('/documents/', {'content': 'foo'}) self.assertEqual( response['error'], 'You must specify either an "index" or "indexes".') response = self.post_json('/documents/', {'content': 'x', 'index': ''}) self.assertEqual( response['error'], 'You must specify either an "index" or "indexes".') response = self.post_json('/documents/', { 'content': 'foo', 'index': 'missing'}) self.assertEqual( response['error'], 'The following indexes were not found: missing.') response = self.post_json('/documents/', { 'content': 'foo', 'indexes': ['missing', 'idx', 'blah']}) self.assertEqual( response['error'], 'The following indexes were not found: missing, blah.') self.assertEqual(Document.select().count(), 0)
def test_document_detail_post(self): idx = Index.create(name='idx') alt_idx = Index.create(name='alt-idx') doc = idx.index('test doc', foo='bar', nug='baze') alt_doc = idx.index('alt doc') url = '/documents/%s/' % doc.rowid def assertDoc(doc, content, metadata=None, indexes=None): doc_db = self.refresh_doc(doc) self.assertEqual(doc_db.content, content) self.assertEqual( [idx.name for idx in doc_db.get_indexes()], indexes or []) self.assertEqual(doc_db.metadata, metadata or {}) # Update the content. response = self.post_json(url, {'content': 'updated'}) assertDoc(doc, 'updated', {'foo': 'bar', 'nug': 'baze'}, ['idx']) # Test updating metadata. response = self.post_json(url, {'metadata': dict( doc.metadata, nug='baz', herp='derp')}) assertDoc( doc, 'updated', {'foo': 'bar', 'nug': 'baz', 'herp': 'derp'}, ['idx']) # Test clearing metadata. response = self.post_json(url, {'metadata': None}) assertDoc(doc, 'updated', {}, ['idx']) # Test updating indexes. response = self.post_json(url, {'indexes': ['idx', 'alt-idx']}) assertDoc(doc, 'updated', {}, ['alt-idx', 'idx']) # Test clearing indexes. response = self.post_json(url, {'indexes': []}) assertDoc(doc, 'updated', {}, []) # Ensure alt_doc has not been affected. assertDoc(alt_doc, 'alt doc', {}, ['idx']) # Sanity check. self.assertEqual(Document.select().count(), 2)
def test_document_detail_post(self): idx = Index.create(name='idx') alt_idx = Index.create(name='alt-idx') doc = idx.index('test doc', foo='bar', nug='baze') alt_doc = idx.index('alt doc') url = '/documents/%s/' % doc.get_id() def assertDoc(doc, content, metadata=None, indexes=None): doc_db = self.refresh_doc(doc) self.assertEqual(doc_db.content, content) self.assertEqual( [idx.name for idx in doc_db.get_indexes()], indexes or []) self.assertEqual(doc_db.metadata, metadata or {}) # Update the content. response = self.post_json(url, {'content': 'updated'}) assertDoc(doc, 'updated', {'foo': 'bar', 'nug': 'baze'}, ['idx']) # Test updating metadata. response = self.post_json(url, {'metadata': dict( doc.metadata, nug='baz', herp='derp')}) assertDoc( doc, 'updated', {'foo': 'bar', 'nug': 'baz', 'herp': 'derp'}, ['idx']) # Test clearing metadata. response = self.post_json(url, {'metadata': None}) assertDoc(doc, 'updated', {}, ['idx']) # Test updating indexes. response = self.post_json(url, {'indexes': ['idx', 'alt-idx']}) assertDoc(doc, 'updated', {}, ['alt-idx', 'idx']) # Test clearing indexes. response = self.post_json(url, {'indexes': []}) assertDoc(doc, 'updated', {}, []) # Ensure alt_doc has not been affected. assertDoc(alt_doc, 'alt doc', {}, ['idx']) # Sanity check. self.assertEqual(Document.select().count(), 2)
def test_attachment_views(self): idx = Index.create(name='idx') doc = idx.index('doc 1') doc.attach('foo.jpg', 'x') doc.attach('bar.png', 'x') Attachment.update(timestamp='2016-01-02 03:04:05').execute() resp = self.app.get('/documents/1/attachments/') resp_data = json.loads(resp.data) self.assertEqual(resp_data['attachments'], [ { 'mimetype': 'image/png', 'timestamp': '2016-01-02 03:04:05', 'data_length': 1, 'filename': 'bar.png', 'document': '/documents/1/', 'data': '/documents/1/attachments/bar.png/download/', }, { 'mimetype': 'image/jpeg', 'timestamp': '2016-01-02 03:04:05', 'data_length': 1, 'filename': 'foo.jpg', 'document': '/documents/1/', 'data': '/documents/1/attachments/foo.jpg/download/', }, ]) resp = self.app.get('/documents/1/attachments/foo.jpg/') resp_data = json.loads(resp.data) self.assertEqual( resp_data, { 'mimetype': 'image/jpeg', 'timestamp': '2016-01-02 03:04:05', 'data_length': 1, 'filename': 'foo.jpg', 'document': '/documents/1/', 'data': '/documents/1/attachments/foo.jpg/download/', }) resp = self.app.delete('/documents/1/attachments/foo.jpg/') self.assertEqual(Attachment.select().count(), 1) resp = self.app.post('/documents/1/attachments/bar.png/', data={ 'data': '', 'file_0': (StringIO('zz'), 'bar.png') }) resp_data = json.loads(resp.data) self.assertEqual(resp_data['data_length'], 2) resp = self.app.get('/documents/1/attachments/bar.png/download/') self.assertEqual(resp.data, 'zz')
def test_index_update_delete(self): idx = Index.create(name='idx') alt_idx = Index.create(name='alt-idx') doc = idx.index(content='foo') alt_idx.index(doc.content, doc) idx.index('idx only') alt_idx.index('alt only') response = self.post_json('/idx/', {'name': 'idx-updated'}) self.assertEqual(response['id'], idx.id) self.assertEqual(response['name'], 'idx-updated') self.assertEqual([doc['content'] for doc in response['documents']], ['foo', 'idx only']) response = self.app.delete('/idx-updated/') data = json.loads(response.data) self.assertEqual(data, {'success': True}) self.assertEqual(Document.select().count(), 3) self.assertEqual(IndexDocument.select().count(), 2) self.assertEqual(Index.select().count(), 1)
def test_document_detail_get(self): idx = Index.create(name='idx') doc = idx.index('test doc', foo='bar') alt_doc = idx.index('alt doc') response = self.app.get('/documents/%s/' % doc.rowid) data = json.loads(response.data) self.assertEqual(data, { 'content': 'test doc', 'id': doc.rowid, 'indexes': ['idx'], 'metadata': {'foo': 'bar'}})
def test_index_detail(self): idx_a = Index.create(name='idx-a') idx_b = Index.create(name='idx-b') for i in range(11): idx_a.index('document-%s' % i, foo='bar-%s' % i) b_doc = idx_b.index('both-doc') idx_a.index(b_doc.content, b_doc) response = self.app.get('/idx-a/') data = json.loads(response.data) self.assertEqual(data['page'], 1) self.assertEqual(data['pages'], 2) self.assertEqual(len(data['documents']), 10) doc = data['documents'][0] self.assertEqual(doc, { 'content': 'document-0', 'id': 1, 'identifier': None, 'indexes': ['idx-a'], 'metadata': {'foo': 'bar-0'}}) response = self.app.get('/idx-a/?page=2') data = json.loads(response.data) self.assertEqual(data['page'], 2) self.assertEqual(data['pages'], 2) self.assertEqual(len(data['documents']), 2) response = self.app.get('/idx-b/') data = json.loads(response.data) self.assertEqual(data['page'], 1) self.assertEqual(data['pages'], 1) self.assertEqual(len(data['documents']), 1) doc = data['documents'][0] self.assertEqual(doc, { 'content': 'both-doc', 'id': 12, 'identifier': None, 'indexes': ['idx-b', 'idx-a'], 'metadata': {}})
def test_index_document(self): idx_a = Index.create(name='idx-a') idx_b = Index.create(name='idx-b') response = self.post_json('/documents/', { 'content': 'doc 1', 'index': 'idx-a', 'metadata': {'k1': 'v1', 'k2': 'v2'}}) self.assertEqual(response, { 'content': 'doc 1', 'id': 1, 'indexes': ['idx-a'], 'metadata': {'k1': 'v1', 'k2': 'v2'}}) response = self.post_json('/documents/', { 'content': 'doc 2', 'indexes': ['idx-a', 'idx-b']}) self.assertEqual(response, { 'content': 'doc 2', 'id': 2, 'indexes': ['idx-a', 'idx-b'], 'metadata': {}})
def test_index_update_delete(self): idx = Index.create(name='idx') alt_idx = Index.create(name='alt-idx') doc = idx.index(content='foo') alt_idx.index(doc.content, doc) idx.index('idx only') alt_idx.index('alt only') response = self.post_json('/idx/', {'name': 'idx-updated'}) self.assertEqual(response['id'], idx.id) self.assertEqual(response['name'], 'idx-updated') self.assertEqual( [doc['content'] for doc in response['documents']], ['foo', 'idx only']) response = self.app.delete('/idx-updated/') data = json.loads(response.data) self.assertEqual(data, {'success': True}) self.assertEqual(Document.select().count(), 3) self.assertEqual(IndexDocument.select().count(), 2) self.assertEqual(Index.select().count(), 1)
def test_query_count(self): idx_a = Index.create(name='idx-a') idx_b = Index.create(name='idx-b') phrases = ['foo', 'bar', 'baze', 'nug', 'nuggie'] for phrase in phrases: phrase = 'document ' + phrase doc = idx_a.index(phrase) idx_b.index(phrase, doc, foo='bar', baze='nug') for idx in ['idx-a', 'idx-b']: for query in ['nug', 'nug*', 'document', 'missing']: with assert_query_count(8): # 1. Get index. # 2. Get # of docs in index. # 3. Prefetch indexes. # 4. Prefetch index documents. # 5. Prefetch metadata # 6. Fetch documents (top of prefetch). # 7. COUNT(*) for pagination. # 8. COUNT(*) for pagination. self.search(idx, query) with assert_query_count(8): self.search(idx, query, foo='bar') with assert_query_count(8): # Same as above. data = self.app.get('/idx-a/').data with assert_query_count(7): # Same as above minus first query for index. self.app.get('/documents/') for i in range(10): Index.create(name='idx-%s' % i) with assert_query_count(2): # 2 queries, one for list, one for pagination. self.app.get('/')
def test_document_detail_by_identifier(self): idx = Index.create(name='idx') doc = idx.index('test doc', identifier='td', foo='bar') alt_doc = idx.index('alt doc', identifier='ad') response = self.app.get('/documents/identifier/td/') data = json.loads(response.data) self.assertEqual(data, { 'content': 'test doc', 'id': doc.get_id(), 'identifier': doc.identifier, 'indexes': ['idx'], 'metadata': {'foo': 'bar'}})
def test_authentication(self): Index.create(name='idx') app.config['AUTHENTICATION'] = 'test' resp = self.app.get('/') self.assertEqual(resp.status_code, 401) self.assertEqual(resp.data, 'Invalid API key') resp = self.app.get('/?key=tesss') self.assertEqual(resp.status_code, 401) resp = self.app.get('/', headers={'key': 'tesss'}) self.assertEqual(resp.status_code, 401) resp = self.app.get('/?key=test') self.assertEqual(resp.status_code, 200) self.assertEqual(json.loads(resp.data), {'indexes': [ {'id': 1, 'name': 'idx', 'documents': 0}]}) resp = self.app.get('/', headers={'key': 'test'}) self.assertEqual(resp.status_code, 200) self.assertEqual(json.loads(resp.data), {'indexes': [ {'id': 1, 'name': 'idx', 'documents': 0}]})
def test_document_detail_get(self): idx = Index.create(name='idx') doc = idx.index('test doc', foo='bar') alt_doc = idx.index('alt doc') response = self.app.get('/documents/%s/' % doc.get_id()) data = json.loads(response.data) self.assertEqual(data, { 'attachments': '/documents/%s/attachments/' % doc.get_id(), 'content': 'test doc', 'id': doc.get_id(), 'identifier': None, 'indexes': ['idx'], 'metadata': {'foo': 'bar'}})
def test_index_document_attachments(self): idx_a = Index.create(name='idx-a') json_data = json.dumps({ 'content': 'doc a', 'index': 'idx-a', 'metadata': {'k1': 'v1-a', 'k2': 'v2-a'}, }) response = self.app.post('/documents/', data={ 'data': json_data, 'file_0': (StringIO('testfile1'), 'test1.txt'), 'file_1': (StringIO('testfile2'), 'test2.jpg')}) resp_data = json.loads(response.data) self.assertEqual(resp_data, { 'attachments': '/documents/1/attachments/', 'content': 'doc a', 'id': 1, 'identifier': None, 'indexes': ['idx-a'], 'metadata': {'k1': 'v1-a', 'k2': 'v2-a'}}) Attachment.update(timestamp='2016-02-01 01:02:03').execute() with assert_query_count(3): resp = self.app.get(resp_data['attachments']) self.assertEqual(json.loads(resp.data), { 'ordering': [], 'pages': 1, 'page': 1, 'attachments': [ { 'mimetype': 'text/plain', 'timestamp': '2016-02-01 01:02:03', 'data_length': 9, 'filename': 'test1.txt', 'document': '/documents/1/', 'data': '/documents/1/attachments/test1.txt/download/', }, { 'mimetype': 'image/jpeg', 'timestamp': '2016-02-01 01:02:03', 'data_length': 9, 'filename': 'test2.jpg', 'document': '/documents/1/', 'data': '/documents/1/attachments/test2.jpg/download/', }, ], })
def test_query_count(self): idx_a = Index.create(name='idx-a') idx_b = Index.create(name='idx-b') phrases = ['foo', 'bar', 'baze', 'nug', 'nuggie'] for phrase in phrases: phrase = 'document ' + phrase doc = idx_a.index(phrase) idx_b.index(phrase, doc, foo='bar', baze='nug') for idx in ['idx-a', 'idx-b']: for query in ['nug', 'nug*', 'document', 'missing']: with assert_query_count(6): # 1. Get index. # 2. Prefetch indexes. # 3. Prefetch index documents. # 4. Prefetch metadata # 5. Fetch documents (top of prefetch). # 6. COUNT(*) for pagination. self.search(idx, query) with assert_query_count(6): self.search(idx, query, foo='bar') with assert_query_count(6): # Same as above. self.app.get('/idx-a/') with assert_query_count(5): # Same as above minus first query for index. self.app.get('/documents/') for i in range(10): Index.create(name='idx-%s' % i) with assert_query_count(1): self.app.get('/')
def test_search_filters(self): idx = Index.create(name='idx') data = ( ('huey document', { 'name': 'huey', 'kitty': 'yes' }), ('zaizee document', { 'name': 'zaizee', 'kitty': 'yes' }), ('little huey bear', { 'name': 'huey', 'kitty': 'yes' }), ('uncle huey', { 'kitty': 'no' }), ('michael nuggie document', { 'name': 'mickey', 'kitty': 'no' }), ) for content, metadata in data: idx.index(content, **metadata) def assertResults(query, metadata, expected): results = self.search('idx', query, **metadata) content = [ document['content'] for document in results['documents'] ] self.assertEqual(content, expected) if IS_FTS5: results = ['huey document', 'uncle huey', 'little huey bear'] else: results = ['huey document', 'little huey bear', 'uncle huey'] assertResults('huey', {}, results) assertResults('huey', {'kitty': 'yes'}, ['huey document', 'little huey bear']) assertResults('huey', { 'kitty': 'yes', 'name': 'huey' }, ['huey document', 'little huey bear']) assertResults('docu*', {'kitty': 'yes'}, ['huey document', 'zaizee document'])
def setUp(self): super(TestModelAPIs, self).setUp() self.index = Index.create(name='default')
def test_create_index(self): data = self.post_json('/', {'name': 'TestIndex'}) self.assertEqual(data['name'], 'TestIndex') self.assertEqual(data['documents'], []) self.assertEqual(Index.select().count(), 1)
def test_search(self): idx = Index.create(name='idx') phrases = ['foo', 'bar', 'baz', 'nug nugs', 'blah nuggie foo', 'huey', 'zaizee'] for phrase in phrases: idx.index('document %s' % phrase, special=True) for i in range(10): idx.index('document %s' % i, special=False) response = self.search('idx', 'docum*') self.assertEqual(response['page'], 1) self.assertEqual(response['pages'], 2) self.assertEqual(len(response['documents']), 10) response = self.search('idx', 'document', 2) self.assertEqual(len(response['documents']), 7) response = self.search('idx', 'doc* nug*') self.assertEqual(response['page'], 1) self.assertEqual(response['pages'], 1) self.assertEqual(len(response['documents']), 2) doc1, doc2 = response['documents'] self.assertEqual(doc1, { 'attachments': '/documents/%s/attachments/' % doc1['id'], 'content': 'document nug nugs', 'id': doc1['id'], 'identifier': None, 'indexes': ['idx'], 'metadata': {'special': 'True'}, 'score': doc1['score']}) if IS_FTS5: self.assertEqual(round(doc1['score'], 4), -2.2675) else: self.assertEqual(round(doc1['score'], 4), -0.) self.assertEqual(doc2, { 'attachments': '/documents/%s/attachments/' % doc2['id'], 'content': 'document blah nuggie foo', 'id': doc2['id'], 'identifier': None, 'indexes': ['idx'], 'metadata': {'special': 'True'}, 'score': doc2['score']}) if IS_FTS5: self.assertEqual(round(doc2['score'], 4), -1.3588) else: self.assertEqual(round(doc2['score'], 4), -0.) response = self.search('idx', 'missing') self.assertEqual(len(response['documents']), 0) response = self.search('idx', 'nug', ranking='bm25') doc = response['documents'][0] self.assertEqual(doc['content'], 'document nug nugs') if IS_FTS5: self.assertEqual(round(doc['score'], 3), -2.98) else: self.assertEqual(round(doc['score'], 3), -2.891)