def test_index_list(self): for i in range(3): Index.create(name='i%s' % i) response = self.app.get('/') data = json_load(response.data) self.assertEqual(data['indexes'], [ { 'document_count': 0, 'documents': '/i0/', 'id': 1, 'name': 'i0' }, { 'document_count': 0, 'documents': '/i1/', 'id': 2, 'name': 'i1' }, { 'document_count': 0, 'documents': '/i2/', 'id': 3, 'name': 'i2' }, ])
def setUp(self): super(TestSearch, self).setUp() self.app = app.test_client() self.index = Index.create(name='default') Index.create(name='unused-1') Index.create(name='unused-2') app.config['AUTHENTICATION'] = None
def test_document_detail_delete(self): idx = Index.create(name='idx') alt_idx = Index.create(name='alt-idx') d1 = idx.index('doc 1', k1='v1', k2='v2') d2 = idx.index('doc 2', k3='v3') d2.attach('foo.jpg', 'bar') alt_idx.add_to_index(d1) alt_idx.add_to_index(d2) self.assertEqual(Metadata.select().count(), 3) self.assertEqual(Attachment.select().count(), 1) response = self.app.delete('/documents/%s/' % d2.get_id()) data = json_load(response.data) self.assertEqual(data, {'success': True}) self.assertEqual(Metadata.select().count(), 2) self.assertEqual(Attachment.select().count(), 0) response = self.app.delete('/documents/%s/' % d2.get_id()) self.assertEqual(response.status_code, 404) self.assertEqual(Document.select().count(), 1) self.assertEqual(IndexDocument.select().count(), 2) self.assertEqual([d.get_id() for d in idx.documents], [d1.get_id()]) self.assertEqual([d.get_id() for d in alt_idx.documents], [d1.get_id()])
def test_authentication(self): Index.create(name='idx') app.config['AUTHENTICATION'] = 'test' resp = self.app.get('/') self.assertEqual(resp.status_code, 401) self.assertEqual(resp.data.decode('utf-8'), 'Invalid API key') resp = self.app.get('/?key=tesss') self.assertEqual(resp.status_code, 401) resp = self.app.get('/', headers={'key': 'tesss'}) self.assertEqual(resp.status_code, 401) resp = self.app.get('/?key=test') self.assertEqual(resp.status_code, 200) self.assertEqual( json_load(resp.data)['indexes'], [{ 'id': 1, 'name': 'idx', 'document_count': 0, 'documents': '/idx/' }]) resp = self.app.get('/', headers={'key': 'test'}) self.assertEqual(resp.status_code, 200) self.assertEqual( json_load(resp.data)['indexes'], [{ 'id': 1, 'name': 'idx', 'document_count': 0, 'documents': '/idx/' }])
def test_index_detail(self): idx_a = Index.create(name='idx-a') idx_b = Index.create(name='idx-b') for i in range(11): idx_a.index('document-%s' % i, foo='bar-%s' % i) b_doc = idx_b.index('both-doc') idx_a.index(b_doc.content, b_doc) response = self.app.get('/idx-a/') data = json_load(response.data) self.assertEqual(data['page'], 1) self.assertEqual(data['pages'], 2) self.assertEqual(len(data['documents']), 10) doc = data['documents'][0] self.assertEqual( doc, { 'attachments': [], 'content': 'document-0', 'id': 1, 'identifier': None, 'indexes': ['idx-a'], 'metadata': { 'foo': 'bar-0' } }) response = self.app.get('/idx-a/?page=2') data = json_load(response.data) self.assertEqual(data['page'], 2) self.assertEqual(data['pages'], 2) self.assertEqual(len(data['documents']), 2) response = self.app.get('/idx-b/') data = json_load(response.data) self.assertEqual(data['page'], 1) self.assertEqual(data['pages'], 1) self.assertEqual(len(data['documents']), 1) doc = data['documents'][0] self.assertEqual( doc, { 'attachments': [], 'content': 'both-doc', 'id': 12, 'identifier': None, 'indexes': ['idx-b', 'idx-a'], 'metadata': {} })
def test_multi_index(self): """ Test that documents can be stored in multiple indexes. """ self.index.delete_instance() indexes = [Index.create(name='idx-%s' % i) for i in range(3)] document = Document.create(content='hueybear') for index in indexes: index.index(document.content, document) self.assertEqual(Document.select().count(), 1) self.assertEqual(Index.select().count(), 3) self.assertEqual(IndexDocument.select().count(), 3) query = (IndexDocument.select( Index.name, IndexDocument.document).join(Index).order_by(Index.name).dicts()) idx_doc_data = [idx_doc for idx_doc in query] self.assertEqual(idx_doc_data, [ { 'document': document.get_id(), 'name': 'idx-0' }, { 'document': document.get_id(), 'name': 'idx-1' }, { 'document': document.get_id(), 'name': 'idx-2' }, ])
def test_index_document_validation(self): idx = Index.create(name='idx') response = self.post_json('/documents/', {'content': 'foo'}) self.assertEqual(response['error'], 'You must specify either an "index" or "indexes".') response = self.post_json('/documents/', {'content': 'x', 'index': ''}) self.assertEqual(response['error'], 'You must specify either an "index" or "indexes".') response = self.post_json('/documents/', { 'content': 'foo', 'index': 'missing' }) self.assertEqual(response['error'], 'The following indexes were not found: missing.') response = self.post_json('/documents/', { 'content': 'foo', 'indexes': ['missing', 'idx', 'blah'] }) self.assertEqual( response['error'], 'The following indexes were not found: missing, blah.') self.assertEqual(Document.select().count(), 0)
def test_document_detail_post(self): idx = Index.create(name='idx') alt_idx = Index.create(name='alt-idx') doc = idx.index('test doc', foo='bar', nug='baze') alt_doc = idx.index('alt doc') url = '/documents/%s/' % doc.get_id() def assertDoc(doc, content, metadata=None, indexes=None): doc_db = self.refresh_doc(doc) self.assertEqual(doc_db.content, content) self.assertEqual([idx.name for idx in doc_db.get_indexes()], indexes or []) self.assertEqual(doc_db.metadata, metadata or {}) # Update the content. response = self.post_json(url, {'content': 'updated'}) assertDoc(doc, 'updated', {'foo': 'bar', 'nug': 'baze'}, ['idx']) # Test updating metadata. response = self.post_json( url, {'metadata': dict(doc.metadata, nug='baz', herp='derp')}) assertDoc(doc, 'updated', { 'foo': 'bar', 'nug': 'baz', 'herp': 'derp' }, ['idx']) # Test clearing metadata. response = self.post_json(url, {'metadata': None}) assertDoc(doc, 'updated', {}, ['idx']) # Test updating indexes. response = self.post_json(url, {'indexes': ['idx', 'alt-idx']}) assertDoc(doc, 'updated', {}, ['alt-idx', 'idx']) # Test clearing indexes. response = self.post_json(url, {'indexes': []}) assertDoc(doc, 'updated', {}, []) # Ensure alt_doc has not been affected. assertDoc(alt_doc, 'alt doc', {}, ['idx']) # Sanity check. self.assertEqual(Document.select().count(), 2)
def test_index_update_delete(self): idx = Index.create(name='idx') alt_idx = Index.create(name='alt-idx') doc = idx.index(content='foo') alt_idx.index(doc.content, doc) idx.index('idx only') alt_idx.index('alt only') response = self.post_json('/idx/', {'name': 'idx-updated'}) self.assertEqual(response['id'], idx.id) self.assertEqual(response['name'], 'idx-updated') self.assertEqual([doc['content'] for doc in response['documents']], ['foo', 'idx only']) response = self.app.delete('/idx-updated/') data = json_load(response.data) self.assertEqual(data, {'success': True}) self.assertEqual(Document.select().count(), 3) self.assertEqual(IndexDocument.select().count(), 2) self.assertEqual(Index.select().count(), 1)
def create(self): data = validator.parse_post(['name']) with database.atomic(): try: index = Index.create(name=data['name']) except IntegrityError: error('"%s" already exists.' % data['name']) else: logger.info('Created new index "%s"' % index.name) return self.detail(index.name)
def test_attachment_views(self): idx = Index.create(name='idx') doc = idx.index('doc 1') doc.attach('foo.jpg', 'x') doc.attach('bar.png', 'x') Attachment.update(timestamp='2016-01-02 03:04:05').execute() resp = self.app.get('/documents/1/attachments/') resp_data = json_load(resp.data) self.assertEqual(resp_data['attachments'], [ { 'mimetype': 'image/png', 'timestamp': '2016-01-02 03:04:05', 'data_length': 1, 'filename': 'bar.png', 'document': '/documents/1/', 'data': '/documents/1/attachments/bar.png/download/', }, { 'mimetype': 'image/jpeg', 'timestamp': '2016-01-02 03:04:05', 'data_length': 1, 'filename': 'foo.jpg', 'document': '/documents/1/', 'data': '/documents/1/attachments/foo.jpg/download/', }, ]) resp = self.app.get('/documents/1/attachments/foo.jpg/') resp_data = json_load(resp.data) self.assertEqual( resp_data, { 'mimetype': 'image/jpeg', 'timestamp': '2016-01-02 03:04:05', 'data_length': 1, 'filename': 'foo.jpg', 'document': '/documents/1/', 'data': '/documents/1/attachments/foo.jpg/download/', }) resp = self.app.delete('/documents/1/attachments/foo.jpg/') self.assertEqual(Attachment.select().count(), 1) resp = self.app.post('/documents/1/attachments/bar.png/', data={ 'data': '', 'file_0': (BytesIO(b'zz'), 'bar.png') }) resp_data = json_load(resp.data) self.assertEqual(resp_data['data_length'], 2) resp = self.app.get('/documents/1/attachments/bar.png/download/') self.assertEqual(resp.data, b'zz')
def test_query_count(self): idx_a = Index.create(name='idx-a') idx_b = Index.create(name='idx-b') phrases = ['foo', 'bar', 'baze', 'nug', 'nuggie'] for phrase in phrases: phrase = 'document ' + phrase doc = idx_a.index(phrase) idx_b.index(phrase, doc, foo='bar', baze='nug') for idx in ['idx-a', 'idx-b']: for query in ['nug', 'nug*', 'document', 'missing']: with assert_query_count(9): # 1. Get index. # 2. Get # of docs in index. # 3. Prefetch indexes. # 4. Prefetch index documents. # 5. Prefetch metadata # 6. Fetch documents (top of prefetch). # 7. COUNT(*) for pagination. # 8. COUNT(*) for pagination. self.search(idx, query) with assert_query_count(9): self.search(idx, query, foo='bar') with assert_query_count(9): # Same as above. data = self.app.get('/idx-a/').data with assert_query_count(8): # Same as above minus first query for index. self.app.get('/documents/') for i in range(10): Index.create(name='idx-%s' % i) with assert_query_count(2): # 2 queries, one for list, one for pagination. self.app.get('/')
def test_index_document(self): idx_a = Index.create(name='idx-a') idx_b = Index.create(name='idx-b') response = self.post_json( '/documents/', { 'content': 'doc 1', 'index': 'idx-a', 'metadata': { 'k1': 'v1', 'k2': 'v2' } }) self.assertEqual( response, { 'attachments': [], 'content': 'doc 1', 'id': 1, 'identifier': None, 'indexes': ['idx-a'], 'metadata': { 'k1': 'v1', 'k2': 'v2' } }) response = self.post_json('/documents/', { 'content': 'doc 2', 'indexes': ['idx-a', 'idx-b'] }) self.assertEqual( response, { 'attachments': [], 'content': 'doc 2', 'id': 2, 'identifier': None, 'indexes': ['idx-a', 'idx-b'], 'metadata': {} })
def test_document_detail_update_attachments(self): idx = Index.create(name='idx') doc = idx.index('test doc', foo='bar', nug='baze') doc.attach('foo.jpg', 'empty') url = '/documents/%s/' % doc.docid json_data = json.dumps({'content': 'test doc-edited'}) response = self.app.post(url, data={ 'data': json_data, 'file_0': (BytesIO(b'xx'), 'foo.jpg'), 'file_1': (BytesIO(b'yy'), 'foo2.jpg') }) resp_data = json_load(response.data) a1 = Attachment.get(Attachment.filename == 'foo.jpg') a2 = Attachment.get(Attachment.filename == 'foo2.jpg') a1_data = { 'mimetype': 'image/jpeg', 'data_length': 2, 'data': '/documents/%s/attachments/foo.jpg/download/' % doc.docid, 'timestamp': str(a1.timestamp), 'filename': 'foo.jpg' } a2_data = { 'mimetype': 'image/jpeg', 'data_length': 2, 'data': '/documents/%s/attachments/foo2.jpg/download/' % doc.docid, 'timestamp': str(a2.timestamp), 'filename': 'foo2.jpg' } self.assertEqual( resp_data, { 'attachments': [a1_data, a2_data], 'content': 'test doc-edited', 'id': 1, 'identifier': None, 'indexes': ['idx'], 'metadata': { 'foo': 'bar', 'nug': 'baze' } }) self.assertEqual(Attachment.select().count(), 2) self.assertEqual(BlobData.select().count(), 3) # Existing file updated, new file added. foo, foo2 = Attachment.select().order_by(Attachment.filename) self.assertEqual(foo.blob.data, b'xx') self.assertEqual(foo2.blob.data, b'yy')
def test_document_detail_get(self): idx = Index.create(name='idx') doc = idx.index('test doc', foo='bar') alt_doc = idx.index('alt doc') response = self.app.get('/documents/%s/' % doc.docid) data = json_load(response.data) self.assertEqual( data, { 'attachments': [], 'content': 'test doc', 'id': doc.get_id(), 'identifier': None, 'indexes': ['idx'], 'metadata': { 'foo': 'bar' } })
def test_search_filters(self): idx = Index.create(name='idx') data = ( ('huey document', { 'name': 'huey', 'kitty': 'yes' }), ('zaizee document', { 'name': 'zaizee', 'kitty': 'yes' }), ('little huey bear', { 'name': 'huey', 'kitty': 'yes' }), ('uncle huey', { 'kitty': 'no' }), ('michael nuggie document', { 'name': 'mickey', 'kitty': 'no' }), ) for content, metadata in data: idx.index(content, **metadata) def assertResults(query, metadata, expected): results = self.search('idx', query, **metadata) content = [ document['content'] for document in results['documents'] ] self.assertEqual(content, expected) results = ['huey document', 'little huey bear', 'uncle huey'] assertResults('huey', {}, results) assertResults('huey', {'kitty': 'yes'}, ['huey document', 'little huey bear']) assertResults('huey', { 'kitty': 'yes', 'name': 'huey' }, ['huey document', 'little huey bear']) assertResults('docu*', {'kitty': 'yes'}, ['huey document', 'zaizee document'])
def test_search(self): idx = Index.create(name='idx') phrases = [ 'foo', 'bar', 'baz', 'nug nugs', 'blah nuggie foo', 'huey', 'zaizee' ] for phrase in phrases: idx.index('document %s' % phrase, special=True) for i in range(10): idx.index('document %s' % i, special=False) response = self.search('idx', 'docum*') self.assertEqual(response['page'], 1) self.assertEqual(response['pages'], 2) self.assertEqual(len(response['documents']), 10) response = self.search('idx', 'document', 2) self.assertEqual(len(response['documents']), 7) response = self.search('idx', 'doc* nug*') self.assertEqual(response['page'], 1) self.assertEqual(response['pages'], 1) self.assertEqual(len(response['documents']), 2) doc1, doc2 = response['documents'] self.assertEqual( doc1, { 'attachments': [], 'content': 'document nug nugs', 'id': doc1['id'], 'identifier': None, 'indexes': ['idx'], 'metadata': { 'special': 'True' }, 'score': doc1['score'] }) self.assertEqual(round(doc1['score'], 4), -0.) self.assertEqual( doc2, { 'attachments': [], 'content': 'document blah nuggie foo', 'id': doc2['id'], 'identifier': None, 'indexes': ['idx'], 'metadata': { 'special': 'True' }, 'score': doc2['score'] }) self.assertEqual(round(doc2['score'], 4), -0.) response = self.search('idx', 'missing') self.assertEqual(len(response['documents']), 0) response = self.search('idx', 'nug', ranking='bm25') doc = response['documents'][0] self.assertEqual(doc['content'], 'document nug nugs') self.assertEqual(round(doc['score'], 3), -2.891)
def setUp(self): super(TestModelAPIs, self).setUp() self.index = Index.create(name='default')
def test_index_document_attachments(self): idx_a = Index.create(name='idx-a') json_data = json.dumps({ 'content': 'doc a', 'index': 'idx-a', 'metadata': { 'k1': 'v1-a', 'k2': 'v2-a' }, }) response = self.app.post('/documents/', data={ 'data': json_data, 'file_0': (BytesIO(b'testfile1'), 'test1.txt'), 'file_1': (BytesIO(b'testfile2'), 'test2.jpg') }) a1 = Attachment.get(Attachment.filename == 'test1.txt') a2 = Attachment.get(Attachment.filename == 'test2.jpg') a1_data = { 'data': '/documents/1/attachments/test1.txt/download/', 'data_length': 9, 'mimetype': 'text/plain', 'timestamp': str(a1.timestamp), 'filename': 'test1.txt' } a2_data = { 'data': '/documents/1/attachments/test2.jpg/download/', 'data_length': 9, 'mimetype': 'image/jpeg', 'timestamp': str(a2.timestamp), 'filename': 'test2.jpg' } resp_data = json_load(response.data) self.assertEqual( resp_data, { 'attachments': [a1_data, a2_data], 'content': 'doc a', 'id': 1, 'identifier': None, 'indexes': ['idx-a'], 'metadata': { 'k1': 'v1-a', 'k2': 'v2-a' } }) Attachment.update(timestamp='2016-02-01 01:02:03').execute() with assert_query_count(3): resp = self.app.get('/documents/1/attachments/') self.assertEqual( json_load(resp.data), { 'ordering': [], 'pages': 1, 'page': 1, 'attachments': [ { 'mimetype': 'text/plain', 'timestamp': '2016-02-01 01:02:03', 'data_length': 9, 'filename': 'test1.txt', 'document': '/documents/1/', 'data': '/documents/1/attachments/test1.txt/download/', }, { 'mimetype': 'image/jpeg', 'timestamp': '2016-02-01 01:02:03', 'data_length': 9, 'filename': 'test2.jpg', 'document': '/documents/1/', 'data': '/documents/1/attachments/test2.jpg/download/', }, ], })