Esempio n. 1
0
    def test_authentication(self):
        Index.create(name='idx')

        app.config['AUTHENTICATION'] = 'test'
        resp = self.app.get('/')
        self.assertEqual(resp.status_code, 401)
        self.assertEqual(resp.data, 'Invalid API key')

        resp = self.app.get('/?key=tesss')
        self.assertEqual(resp.status_code, 401)

        resp = self.app.get('/', headers={'key': 'tesss'})
        self.assertEqual(resp.status_code, 401)

        resp = self.app.get('/?key=test')
        self.assertEqual(resp.status_code, 200)
        self.assertEqual(json.loads(resp.data)['indexes'], [{
            'id': 1, 'name': 'idx', 'document_count': 0, 'documents': '/idx/'
        }])

        resp = self.app.get('/', headers={'key': 'test'})
        self.assertEqual(resp.status_code, 200)
        self.assertEqual(json.loads(resp.data)['indexes'], [{
            'id': 1, 'name': 'idx', 'document_count': 0, 'documents': '/idx/'
        }])
Esempio n. 2
0
    def test_index_list(self):
        for i in range(3):
            Index.create(name='i%s' % i)

        response = self.app.get('/')
        data = json.loads(response.data)
        self.assertEqual(data['indexes'], [
            {
                'document_count': 0,
                'documents': '/i0/',
                'id': 1,
                'name': 'i0'
            },
            {
                'document_count': 0,
                'documents': '/i1/',
                'id': 2,
                'name': 'i1'
            },
            {
                'document_count': 0,
                'documents': '/i2/',
                'id': 3,
                'name': 'i2'
            },
        ])
Esempio n. 3
0
 def setUp(self):
     super(TestSearch, self).setUp()
     self.app = app.test_client()
     self.index = Index.create(name='default')
     Index.create(name='unused-1')
     Index.create(name='unused-2')
     app.config['AUTHENTICATION'] = None
Esempio n. 4
0
    def test_index_document(self):
        idx_a = Index.create(name='idx-a')
        idx_b = Index.create(name='idx-b')
        response = self.post_json('/documents/', {
            'content': 'doc 1',
            'index': 'idx-a',
            'metadata': {'k1': 'v1', 'k2': 'v2'}})

        self.assertEqual(response, {
            'attachments': '/documents/1/attachments/',
            'content': 'doc 1',
            'id': 1,
            'identifier': None,
            'indexes': ['idx-a'],
            'metadata': {'k1': 'v1', 'k2': 'v2'}})

        response = self.post_json('/documents/', {
            'content': 'doc 2',
            'indexes': ['idx-a', 'idx-b']})
        self.assertEqual(response, {
            'attachments': '/documents/2/attachments/',
            'content': 'doc 2',
            'id': 2,
            'identifier': None,
            'indexes': ['idx-a', 'idx-b'],
            'metadata': {}})
Esempio n. 5
0
    def test_multi_index(self):
        """
        Test that documents can be stored in multiple indexes.
        """
        self.index.delete_instance()

        indexes = [Index.create(name='idx-%s' % i) for i in range(3)]
        document = Document.create(content='hueybear')
        for index in indexes:
            index.index(
                document.content,
                document)

        self.assertEqual(Document.select().count(), 1)
        self.assertEqual(Index.select().count(), 3)
        self.assertEqual(IndexDocument.select().count(), 3)
        query = (IndexDocument
                 .select(Index.name, IndexDocument.document)
                 .join(Index)
                 .order_by(Index.name)
                 .dicts())
        idx_doc_data = [idx_doc for idx_doc in query]
        self.assertEqual(idx_doc_data, [
            {'document_id': document.get_id(), 'name': 'idx-0'},
            {'document_id': document.get_id(), 'name': 'idx-1'},
            {'document_id': document.get_id(), 'name': 'idx-2'},
        ])
Esempio n. 6
0
    def test_document_detail_delete(self):
        idx = Index.create(name='idx')
        alt_idx = Index.create(name='alt-idx')

        d1 = idx.index('doc 1', k1='v1', k2='v2')
        d2 = idx.index('doc 2', k3='v3')
        d2.attach('foo.jpg', 'bar')

        alt_idx.add_to_index(d1)
        alt_idx.add_to_index(d2)

        self.assertEqual(Metadata.select().count(), 3)
        self.assertEqual(Attachment.select().count(), 1)

        response = self.app.delete('/documents/%s/' % d2.get_id())
        data = json.loads(response.data)
        self.assertEqual(data, {'success': True})

        self.assertEqual(Metadata.select().count(), 2)
        self.assertEqual(Attachment.select().count(), 0)

        response = self.app.delete('/documents/%s/' % d2.get_id())
        self.assertEqual(response.status_code, 404)

        self.assertEqual(Document.select().count(), 1)
        self.assertEqual(IndexDocument.select().count(), 2)
        self.assertEqual(
            [d.get_id() for d in idx.documents],
            [d1.get_id()])
        self.assertEqual(
            [d.get_id() for d in alt_idx.documents],
            [d1.get_id()])
Esempio n. 7
0
    def test_document_detail_delete(self):
        idx = Index.create(name='idx')
        alt_idx = Index.create(name='alt-idx')

        d1 = idx.index('doc 1', k1='v1', k2='v2')
        d2 = idx.index('doc 2', k3='v3')
        alt_idx.add_to_index(d1)
        alt_idx.add_to_index(d2)

        self.assertEqual(Metadata.select().count(), 3)

        response = self.app.delete('/documents/%s/' % d2.get_id())
        data = json.loads(response.data)
        self.assertEqual(data, {'success': True})

        self.assertEqual(Metadata.select().count(), 2)

        response = self.app.delete('/documents/%s/' % d2.get_id())
        self.assertEqual(response.status_code, 404)

        self.assertEqual(Document.select().count(), 1)
        self.assertEqual(IndexDocument.select().count(), 2)
        self.assertEqual(
            [d.get_id() for d in idx.documents],
            [d1.get_id()])
        self.assertEqual(
            [d.get_id() for d in alt_idx.documents],
            [d1.get_id()])
Esempio n. 8
0
 def setUp(self):
     super(TestSearch, self).setUp()
     self.app = app.test_client()
     self.index = Index.create(name='default')
     Index.create(name='unused-1')
     Index.create(name='unused-2')
     app.config['AUTHENTICATION'] = None
Esempio n. 9
0
    def test_multi_index(self):
        """
        Test that documents can be stored in multiple indexes.
        """
        self.index.delete_instance()

        indexes = [Index.create(name='idx-%s' % i) for i in range(3)]
        document = Document.create(content='hueybear')
        for index in indexes:
            index.index(
                document.content,
                document)

        self.assertEqual(Document.select().count(), 1)
        self.assertEqual(Index.select().count(), 3)
        self.assertEqual(IndexDocument.select().count(), 3)
        query = (IndexDocument
                 .select(Index.name, IndexDocument.document)
                 .join(Index)
                 .order_by(Index.name)
                 .dicts())
        idx_doc_data = [idx_doc for idx_doc in query]
        self.assertEqual(idx_doc_data, [
            {'document': document.rowid, 'name': 'idx-0'},
            {'document': document.rowid, 'name': 'idx-1'},
            {'document': document.rowid, 'name': 'idx-2'},
        ])
Esempio n. 10
0
    def test_index_list(self):
        for i in range(3):
            Index.create(name='i%s' % i)

        response = self.app.get('/')
        data = json.loads(response.data)
        self.assertEqual(data['indexes'], [
            {'documents': 0, 'id': 1, 'name': 'i0'},
            {'documents': 0, 'id': 2, 'name': 'i1'},
            {'documents': 0, 'id': 3, 'name': 'i2'},
        ])
Esempio n. 11
0
def main(argv=sys.argv):
    log.info("Environment setup in progress.")
    args = parse_args(argv)
    setup_logging(args.config_uri)
    slices = json.loads(args.slicing)

    log.info("Calling scout.Index, please hold on.")
    idx = Index(args.corpus, args.database, slices)
    log.info("Registering index, this may take some time.")
    idx.register()
    log.info(f"Index registration completed. (sqlite :{args.database})")
Esempio n. 12
0
    def test_index_detail(self):
        idx_a = Index.create(name='idx-a')
        idx_b = Index.create(name='idx-b')
        for i in range(11):
            idx_a.index('document-%s' % i, foo='bar-%s' % i)

        b_doc = idx_b.index('both-doc')
        idx_a.index(b_doc.content, b_doc)

        response = self.app.get('/idx-a/')
        data = json.loads(response.data)
        self.assertEqual(data['page'], 1)
        self.assertEqual(data['pages'], 2)
        self.assertEqual(len(data['documents']), 10)
        doc = data['documents'][0]
        self.assertEqual(
            doc, {
                'attachments': '/documents/1/attachments/',
                'content': 'document-0',
                'id': 1,
                'identifier': None,
                'indexes': ['idx-a'],
                'metadata': {
                    'foo': 'bar-0'
                }
            })

        response = self.app.get('/idx-a/?page=2')
        data = json.loads(response.data)
        self.assertEqual(data['page'], 2)
        self.assertEqual(data['pages'], 2)
        self.assertEqual(len(data['documents']), 2)

        response = self.app.get('/idx-b/')
        data = json.loads(response.data)
        self.assertEqual(data['page'], 1)
        self.assertEqual(data['pages'], 1)
        self.assertEqual(len(data['documents']), 1)
        doc = data['documents'][0]
        self.assertEqual(
            doc, {
                'attachments': '/documents/12/attachments/',
                'content': 'both-doc',
                'id': 12,
                'identifier': None,
                'indexes': ['idx-b', 'idx-a'],
                'metadata': {}
            })
Esempio n. 13
0
    def test_search(self):
        idx = Index.create(name='idx')
        phrases = ['foo', 'bar', 'baz', 'nug nugs', 'blah nuggie foo', 'huey',
                   'zaizee']
        for phrase in phrases:
            idx.index('document %s' % phrase, special=True)

        for i in range(10):
            idx.index('document %s' % i, special=False)

        response = self.search('idx', 'docum*')
        self.assertEqual(response['page'], 1)
        self.assertEqual(response['pages'], 2)
        self.assertEqual(len(response['documents']), 10)

        response = self.search('idx', 'document', 2)
        self.assertEqual(len(response['documents']), 7)

        response = self.search('idx', 'doc* nug*')
        self.assertEqual(response['page'], 1)
        self.assertEqual(response['pages'], 1)
        self.assertEqual(len(response['documents']), 2)
        doc1, doc2 = response['documents']

        self.assertEqual(doc1, {
            'content': 'document nug nugs',
            'id': doc1['id'],
            'identifier': None,
            'indexes': ['idx'],
            'metadata': {'special': 'True'},
            'score': doc1['score']})

        if IS_FTS5:
            self.assertEqual(round(doc1['score'], 4), -2.2675)
        else:
            self.assertEqual(round(doc1['score'], 4), -0.)

        self.assertEqual(doc2, {
            'content': 'document blah nuggie foo',
            'id': doc2['id'],
            'identifier': None,
            'indexes': ['idx'],
            'metadata': {'special': 'True'},
            'score': doc2['score']})

        if IS_FTS5:
            self.assertEqual(round(doc2['score'], 4), -1.3588)
        else:
            self.assertEqual(round(doc2['score'], 4), -0.)

        response = self.search('idx', 'missing')
        self.assertEqual(len(response['documents']), 0)

        response = self.search('idx', 'nug', ranking='bm25')
        doc = response['documents'][0]
        self.assertEqual(doc['content'], 'document nug nugs')
        if IS_FTS5:
            self.assertEqual(round(doc['score'], 3), -2.98)
        else:
            self.assertEqual(round(doc['score'], 3), -2.891)
Esempio n. 14
0
    def test_search_filters(self):
        idx = Index.create(name='idx')
        data = (
            ('huey document', {'name': 'huey', 'kitty': 'yes'}),
            ('zaizee document', {'name': 'zaizee', 'kitty': 'yes'}),
            ('little huey bear', {'name': 'huey', 'kitty': 'yes'}),
            ('uncle huey', {'kitty': 'no'}),
            ('michael nuggie document', {'name': 'mickey', 'kitty': 'no'}),
        )
        for content, metadata in data:
            idx.index(content, **metadata)

        def assertResults(query, metadata, expected):
            results = self.search('idx', query, **metadata)
            content = [document['content']
                       for document in results['documents']]
            self.assertEqual(content, expected)

        assertResults(
            'huey',
            {},
            ['huey document', 'little huey bear', 'uncle huey'])
        assertResults(
            'huey',
            {'kitty': 'yes'},
            ['huey document', 'little huey bear'])
        assertResults(
            'huey',
            {'kitty': 'yes', 'name': 'huey'},
            ['huey document', 'little huey bear'])
        assertResults(
            'docu*',
            {'kitty': 'yes'},
            ['huey document', 'zaizee document'])
Esempio n. 15
0
    def test_document_detail_update_attachments(self):
        idx = Index.create(name='idx')
        doc = idx.index('test doc', foo='bar', nug='baze')
        doc.attach('foo.jpg', 'empty')
        url = '/documents/%s/' % doc.get_id()

        json_data = json.dumps({'content': 'test doc-edited'})
        response = self.app.post(url, data={
            'data': json_data,
            'file_0': (StringIO('xx'), 'foo.jpg'),
            'file_1': (StringIO('yy'), 'foo2.jpg')})

        resp_data = json.loads(response.data)
        self.assertEqual(resp_data, {
            'attachments': '/documents/1/attachments/',
            'content': 'test doc-edited',
            'id': 1,
            'identifier': None,
            'indexes': ['idx'],
            'metadata': {'foo': 'bar', 'nug': 'baze'}})

        self.assertEqual(Attachment.select().count(), 2)
        self.assertEqual(BlobData.select().count(), 3)

        # Existing file updated, new file added.
        foo, foo2 = Attachment.select().order_by(Attachment.filename)
        self.assertEqual(foo.blob.data, 'xx')
        self.assertEqual(foo2.blob.data, 'yy')
Esempio n. 16
0
    def test_index_document_validation(self):
        idx = Index.create(name='idx')
        response = self.post_json('/documents/', {'content': 'foo'})
        self.assertEqual(
            response['error'],
            'You must specify either an "index" or "indexes".')

        response = self.post_json('/documents/', {'content': 'x', 'index': ''})
        self.assertEqual(
            response['error'],
            'You must specify either an "index" or "indexes".')

        response = self.post_json('/documents/', {
            'content': 'foo',
            'index': 'missing'})
        self.assertEqual(
            response['error'],
            'The following indexes were not found: missing.')

        response = self.post_json('/documents/', {
            'content': 'foo',
            'indexes': ['missing', 'idx', 'blah']})
        self.assertEqual(
            response['error'],
            'The following indexes were not found: missing, blah.')
        self.assertEqual(Document.select().count(), 0)
Esempio n. 17
0
    def test_index_document_validation(self):
        idx = Index.create(name='idx')
        response = self.post_json('/documents/', {'content': 'foo'})
        self.assertEqual(
            response['error'],
            'You must specify either an "index" or "indexes".')

        response = self.post_json('/documents/', {'content': 'x', 'index': ''})
        self.assertEqual(
            response['error'],
            'You must specify either an "index" or "indexes".')

        response = self.post_json('/documents/', {
            'content': 'foo',
            'index': 'missing'})
        self.assertEqual(
            response['error'],
            'The following indexes were not found: missing.')

        response = self.post_json('/documents/', {
            'content': 'foo',
            'indexes': ['missing', 'idx', 'blah']})
        self.assertEqual(
            response['error'],
            'The following indexes were not found: missing, blah.')
        self.assertEqual(Document.select().count(), 0)
Esempio n. 18
0
    def test_document_detail_post(self):
        idx = Index.create(name='idx')
        alt_idx = Index.create(name='alt-idx')
        doc = idx.index('test doc', foo='bar', nug='baze')
        alt_doc = idx.index('alt doc')

        url = '/documents/%s/' % doc.rowid

        def assertDoc(doc, content, metadata=None, indexes=None):
            doc_db = self.refresh_doc(doc)
            self.assertEqual(doc_db.content, content)
            self.assertEqual(
                [idx.name for idx in doc_db.get_indexes()],
                indexes or [])
            self.assertEqual(doc_db.metadata, metadata or {})

        # Update the content.
        response = self.post_json(url, {'content': 'updated'})
        assertDoc(doc, 'updated', {'foo': 'bar', 'nug': 'baze'}, ['idx'])

        # Test updating metadata.
        response = self.post_json(url, {'metadata': dict(
            doc.metadata, nug='baz', herp='derp')})
        assertDoc(
            doc,
            'updated',
            {'foo': 'bar', 'nug': 'baz', 'herp': 'derp'},
            ['idx'])

        # Test clearing metadata.
        response = self.post_json(url, {'metadata': None})
        assertDoc(doc, 'updated', {}, ['idx'])

        # Test updating indexes.
        response = self.post_json(url, {'indexes': ['idx', 'alt-idx']})
        assertDoc(doc, 'updated', {}, ['alt-idx', 'idx'])

        # Test clearing indexes.
        response = self.post_json(url, {'indexes': []})
        assertDoc(doc, 'updated', {}, [])

        # Ensure alt_doc has not been affected.
        assertDoc(alt_doc, 'alt doc', {}, ['idx'])

        # Sanity check.
        self.assertEqual(Document.select().count(), 2)
Esempio n. 19
0
    def test_document_detail_post(self):
        idx = Index.create(name='idx')
        alt_idx = Index.create(name='alt-idx')
        doc = idx.index('test doc', foo='bar', nug='baze')
        alt_doc = idx.index('alt doc')

        url = '/documents/%s/' % doc.get_id()

        def assertDoc(doc, content, metadata=None, indexes=None):
            doc_db = self.refresh_doc(doc)
            self.assertEqual(doc_db.content, content)
            self.assertEqual(
                [idx.name for idx in doc_db.get_indexes()],
                indexes or [])
            self.assertEqual(doc_db.metadata, metadata or {})

        # Update the content.
        response = self.post_json(url, {'content': 'updated'})
        assertDoc(doc, 'updated', {'foo': 'bar', 'nug': 'baze'}, ['idx'])

        # Test updating metadata.
        response = self.post_json(url, {'metadata': dict(
            doc.metadata, nug='baz', herp='derp')})
        assertDoc(
            doc,
            'updated',
            {'foo': 'bar', 'nug': 'baz', 'herp': 'derp'},
            ['idx'])

        # Test clearing metadata.
        response = self.post_json(url, {'metadata': None})
        assertDoc(doc, 'updated', {}, ['idx'])

        # Test updating indexes.
        response = self.post_json(url, {'indexes': ['idx', 'alt-idx']})
        assertDoc(doc, 'updated', {}, ['alt-idx', 'idx'])

        # Test clearing indexes.
        response = self.post_json(url, {'indexes': []})
        assertDoc(doc, 'updated', {}, [])

        # Ensure alt_doc has not been affected.
        assertDoc(alt_doc, 'alt doc', {}, ['idx'])

        # Sanity check.
        self.assertEqual(Document.select().count(), 2)
Esempio n. 20
0
    def test_attachment_views(self):
        idx = Index.create(name='idx')
        doc = idx.index('doc 1')
        doc.attach('foo.jpg', 'x')
        doc.attach('bar.png', 'x')
        Attachment.update(timestamp='2016-01-02 03:04:05').execute()

        resp = self.app.get('/documents/1/attachments/')
        resp_data = json.loads(resp.data)
        self.assertEqual(resp_data['attachments'], [
            {
                'mimetype': 'image/png',
                'timestamp': '2016-01-02 03:04:05',
                'data_length': 1,
                'filename': 'bar.png',
                'document': '/documents/1/',
                'data': '/documents/1/attachments/bar.png/download/',
            },
            {
                'mimetype': 'image/jpeg',
                'timestamp': '2016-01-02 03:04:05',
                'data_length': 1,
                'filename': 'foo.jpg',
                'document': '/documents/1/',
                'data': '/documents/1/attachments/foo.jpg/download/',
            },
        ])

        resp = self.app.get('/documents/1/attachments/foo.jpg/')
        resp_data = json.loads(resp.data)
        self.assertEqual(
            resp_data, {
                'mimetype': 'image/jpeg',
                'timestamp': '2016-01-02 03:04:05',
                'data_length': 1,
                'filename': 'foo.jpg',
                'document': '/documents/1/',
                'data': '/documents/1/attachments/foo.jpg/download/',
            })

        resp = self.app.delete('/documents/1/attachments/foo.jpg/')
        self.assertEqual(Attachment.select().count(), 1)

        resp = self.app.post('/documents/1/attachments/bar.png/',
                             data={
                                 'data': '',
                                 'file_0': (StringIO('zz'), 'bar.png')
                             })
        resp_data = json.loads(resp.data)
        self.assertEqual(resp_data['data_length'], 2)

        resp = self.app.get('/documents/1/attachments/bar.png/download/')
        self.assertEqual(resp.data, 'zz')
Esempio n. 21
0
    def test_index_update_delete(self):
        idx = Index.create(name='idx')
        alt_idx = Index.create(name='alt-idx')
        doc = idx.index(content='foo')
        alt_idx.index(doc.content, doc)
        idx.index('idx only')
        alt_idx.index('alt only')

        response = self.post_json('/idx/', {'name': 'idx-updated'})
        self.assertEqual(response['id'], idx.id)
        self.assertEqual(response['name'], 'idx-updated')
        self.assertEqual([doc['content'] for doc in response['documents']],
                         ['foo', 'idx only'])

        response = self.app.delete('/idx-updated/')
        data = json.loads(response.data)
        self.assertEqual(data, {'success': True})

        self.assertEqual(Document.select().count(), 3)
        self.assertEqual(IndexDocument.select().count(), 2)
        self.assertEqual(Index.select().count(), 1)
Esempio n. 22
0
    def test_document_detail_get(self):
        idx = Index.create(name='idx')
        doc = idx.index('test doc', foo='bar')
        alt_doc = idx.index('alt doc')

        response = self.app.get('/documents/%s/' % doc.rowid)
        data = json.loads(response.data)
        self.assertEqual(data, {
            'content': 'test doc',
            'id': doc.rowid,
            'indexes': ['idx'],
            'metadata': {'foo': 'bar'}})
Esempio n. 23
0
    def test_index_detail(self):
        idx_a = Index.create(name='idx-a')
        idx_b = Index.create(name='idx-b')
        for i in range(11):
            idx_a.index('document-%s' % i, foo='bar-%s' % i)

        b_doc = idx_b.index('both-doc')
        idx_a.index(b_doc.content, b_doc)

        response = self.app.get('/idx-a/')
        data = json.loads(response.data)
        self.assertEqual(data['page'], 1)
        self.assertEqual(data['pages'], 2)
        self.assertEqual(len(data['documents']), 10)
        doc = data['documents'][0]
        self.assertEqual(doc, {
            'content': 'document-0',
            'id': 1,
            'identifier': None,
            'indexes': ['idx-a'],
            'metadata': {'foo': 'bar-0'}})

        response = self.app.get('/idx-a/?page=2')
        data = json.loads(response.data)
        self.assertEqual(data['page'], 2)
        self.assertEqual(data['pages'], 2)
        self.assertEqual(len(data['documents']), 2)

        response = self.app.get('/idx-b/')
        data = json.loads(response.data)
        self.assertEqual(data['page'], 1)
        self.assertEqual(data['pages'], 1)
        self.assertEqual(len(data['documents']), 1)
        doc = data['documents'][0]
        self.assertEqual(doc, {
            'content': 'both-doc',
            'id': 12,
            'identifier': None,
            'indexes': ['idx-b', 'idx-a'],
            'metadata': {}})
Esempio n. 24
0
    def test_index_document(self):
        idx_a = Index.create(name='idx-a')
        idx_b = Index.create(name='idx-b')
        response = self.post_json('/documents/', {
            'content': 'doc 1',
            'index': 'idx-a',
            'metadata': {'k1': 'v1', 'k2': 'v2'}})

        self.assertEqual(response, {
            'content': 'doc 1',
            'id': 1,
            'indexes': ['idx-a'],
            'metadata': {'k1': 'v1', 'k2': 'v2'}})

        response = self.post_json('/documents/', {
            'content': 'doc 2',
            'indexes': ['idx-a', 'idx-b']})
        self.assertEqual(response, {
            'content': 'doc 2',
            'id': 2,
            'indexes': ['idx-a', 'idx-b'],
            'metadata': {}})
Esempio n. 25
0
    def test_index_update_delete(self):
        idx = Index.create(name='idx')
        alt_idx = Index.create(name='alt-idx')
        doc = idx.index(content='foo')
        alt_idx.index(doc.content, doc)
        idx.index('idx only')
        alt_idx.index('alt only')

        response = self.post_json('/idx/', {'name': 'idx-updated'})
        self.assertEqual(response['id'], idx.id)
        self.assertEqual(response['name'], 'idx-updated')
        self.assertEqual(
            [doc['content'] for doc in response['documents']],
            ['foo', 'idx only'])

        response = self.app.delete('/idx-updated/')
        data = json.loads(response.data)
        self.assertEqual(data, {'success': True})

        self.assertEqual(Document.select().count(), 3)
        self.assertEqual(IndexDocument.select().count(), 2)
        self.assertEqual(Index.select().count(), 1)
Esempio n. 26
0
    def test_query_count(self):
        idx_a = Index.create(name='idx-a')
        idx_b = Index.create(name='idx-b')
        phrases = ['foo', 'bar', 'baze', 'nug', 'nuggie']
        for phrase in phrases:
            phrase = 'document ' + phrase
            doc = idx_a.index(phrase)
            idx_b.index(phrase, doc, foo='bar', baze='nug')

        for idx in ['idx-a', 'idx-b']:
            for query in ['nug', 'nug*', 'document', 'missing']:
                with assert_query_count(8):
                    # 1. Get index.
                    # 2. Get # of docs in index.
                    # 3. Prefetch indexes.
                    # 4. Prefetch index documents.
                    # 5. Prefetch metadata
                    # 6. Fetch documents (top of prefetch).
                    # 7. COUNT(*) for pagination.
                    # 8. COUNT(*) for pagination.
                    self.search(idx, query)

                with assert_query_count(8):
                    self.search(idx, query, foo='bar')

        with assert_query_count(8):
            # Same as above.
            data = self.app.get('/idx-a/').data

        with assert_query_count(7):
            # Same as above minus first query for index.
            self.app.get('/documents/')

        for i in range(10):
            Index.create(name='idx-%s' % i)

        with assert_query_count(2):
            # 2 queries, one for list, one for pagination.
            self.app.get('/')
Esempio n. 27
0
    def test_document_detail_by_identifier(self):
        idx = Index.create(name='idx')
        doc = idx.index('test doc', identifier='td', foo='bar')
        alt_doc = idx.index('alt doc', identifier='ad')

        response = self.app.get('/documents/identifier/td/')
        data = json.loads(response.data)
        self.assertEqual(data, {
            'content': 'test doc',
            'id': doc.get_id(),
            'identifier': doc.identifier,
            'indexes': ['idx'],
            'metadata': {'foo': 'bar'}})
Esempio n. 28
0
    def test_authentication(self):
        Index.create(name='idx')

        app.config['AUTHENTICATION'] = 'test'
        resp = self.app.get('/')
        self.assertEqual(resp.status_code, 401)
        self.assertEqual(resp.data, 'Invalid API key')

        resp = self.app.get('/?key=tesss')
        self.assertEqual(resp.status_code, 401)

        resp = self.app.get('/', headers={'key': 'tesss'})
        self.assertEqual(resp.status_code, 401)

        resp = self.app.get('/?key=test')
        self.assertEqual(resp.status_code, 200)
        self.assertEqual(json.loads(resp.data), {'indexes': [
            {'id': 1, 'name': 'idx', 'documents': 0}]})

        resp = self.app.get('/', headers={'key': 'test'})
        self.assertEqual(resp.status_code, 200)
        self.assertEqual(json.loads(resp.data), {'indexes': [
            {'id': 1, 'name': 'idx', 'documents': 0}]})
Esempio n. 29
0
    def test_document_detail_get(self):
        idx = Index.create(name='idx')
        doc = idx.index('test doc', foo='bar')
        alt_doc = idx.index('alt doc')

        response = self.app.get('/documents/%s/' % doc.get_id())
        data = json.loads(response.data)
        self.assertEqual(data, {
            'attachments': '/documents/%s/attachments/' % doc.get_id(),
            'content': 'test doc',
            'id': doc.get_id(),
            'identifier': None,
            'indexes': ['idx'],
            'metadata': {'foo': 'bar'}})
Esempio n. 30
0
    def test_index_document_attachments(self):
        idx_a = Index.create(name='idx-a')
        json_data = json.dumps({
            'content': 'doc a',
            'index': 'idx-a',
            'metadata': {'k1': 'v1-a', 'k2': 'v2-a'},
        })
        response = self.app.post('/documents/', data={
            'data': json_data,
            'file_0': (StringIO('testfile1'), 'test1.txt'),
            'file_1': (StringIO('testfile2'), 'test2.jpg')})

        resp_data = json.loads(response.data)
        self.assertEqual(resp_data, {
            'attachments': '/documents/1/attachments/',
            'content': 'doc a',
            'id': 1,
            'identifier': None,
            'indexes': ['idx-a'],
            'metadata': {'k1': 'v1-a', 'k2': 'v2-a'}})

        Attachment.update(timestamp='2016-02-01 01:02:03').execute()

        with assert_query_count(3):
            resp = self.app.get(resp_data['attachments'])

        self.assertEqual(json.loads(resp.data), {
            'ordering': [],
            'pages': 1,
            'page': 1,
            'attachments': [
                {
                    'mimetype': 'text/plain',
                    'timestamp': '2016-02-01 01:02:03',
                    'data_length': 9,
                    'filename': 'test1.txt',
                    'document': '/documents/1/',
                    'data': '/documents/1/attachments/test1.txt/download/',
                },
                {
                    'mimetype': 'image/jpeg',
                    'timestamp': '2016-02-01 01:02:03',
                    'data_length': 9,
                    'filename': 'test2.jpg',
                    'document': '/documents/1/',
                    'data': '/documents/1/attachments/test2.jpg/download/',
                },
            ],
        })
Esempio n. 31
0
    def test_query_count(self):
        idx_a = Index.create(name='idx-a')
        idx_b = Index.create(name='idx-b')
        phrases = ['foo', 'bar', 'baze', 'nug', 'nuggie']
        for phrase in phrases:
            phrase = 'document ' + phrase
            doc = idx_a.index(phrase)
            idx_b.index(phrase, doc, foo='bar', baze='nug')

        for idx in ['idx-a', 'idx-b']:
            for query in ['nug', 'nug*', 'document', 'missing']:
                with assert_query_count(6):
                    # 1. Get index.
                    # 2. Prefetch indexes.
                    # 3. Prefetch index documents.
                    # 4. Prefetch metadata
                    # 5. Fetch documents (top of prefetch).
                    # 6. COUNT(*) for pagination.
                    self.search(idx, query)

                with assert_query_count(6):
                    self.search(idx, query, foo='bar')

        with assert_query_count(6):
            # Same as above.
            self.app.get('/idx-a/')

        with assert_query_count(5):
            # Same as above minus first query for index.
            self.app.get('/documents/')

        for i in range(10):
            Index.create(name='idx-%s' % i)

        with assert_query_count(1):
            self.app.get('/')
Esempio n. 32
0
    def test_search_filters(self):
        idx = Index.create(name='idx')
        data = (
            ('huey document', {
                'name': 'huey',
                'kitty': 'yes'
            }),
            ('zaizee document', {
                'name': 'zaizee',
                'kitty': 'yes'
            }),
            ('little huey bear', {
                'name': 'huey',
                'kitty': 'yes'
            }),
            ('uncle huey', {
                'kitty': 'no'
            }),
            ('michael nuggie document', {
                'name': 'mickey',
                'kitty': 'no'
            }),
        )
        for content, metadata in data:
            idx.index(content, **metadata)

        def assertResults(query, metadata, expected):
            results = self.search('idx', query, **metadata)
            content = [
                document['content'] for document in results['documents']
            ]
            self.assertEqual(content, expected)

        if IS_FTS5:
            results = ['huey document', 'uncle huey', 'little huey bear']
        else:
            results = ['huey document', 'little huey bear', 'uncle huey']

        assertResults('huey', {}, results)
        assertResults('huey', {'kitty': 'yes'},
                      ['huey document', 'little huey bear'])
        assertResults('huey', {
            'kitty': 'yes',
            'name': 'huey'
        }, ['huey document', 'little huey bear'])
        assertResults('docu*', {'kitty': 'yes'},
                      ['huey document', 'zaizee document'])
Esempio n. 33
0
 def setUp(self):
     super(TestModelAPIs, self).setUp()
     self.index = Index.create(name='default')
Esempio n. 34
0
 def setUp(self):
     super(TestModelAPIs, self).setUp()
     self.index = Index.create(name='default')
Esempio n. 35
0
 def test_create_index(self):
     data = self.post_json('/', {'name': 'TestIndex'})
     self.assertEqual(data['name'], 'TestIndex')
     self.assertEqual(data['documents'], [])
     self.assertEqual(Index.select().count(), 1)
Esempio n. 36
0
 def test_create_index(self):
     data = self.post_json('/', {'name': 'TestIndex'})
     self.assertEqual(data['name'], 'TestIndex')
     self.assertEqual(data['documents'], [])
     self.assertEqual(Index.select().count(), 1)
Esempio n. 37
0
    def test_search(self):
        idx = Index.create(name='idx')
        phrases = ['foo', 'bar', 'baz', 'nug nugs', 'blah nuggie foo', 'huey',
                   'zaizee']
        for phrase in phrases:
            idx.index('document %s' % phrase, special=True)

        for i in range(10):
            idx.index('document %s' % i, special=False)

        response = self.search('idx', 'docum*')
        self.assertEqual(response['page'], 1)
        self.assertEqual(response['pages'], 2)
        self.assertEqual(len(response['documents']), 10)

        response = self.search('idx', 'document', 2)
        self.assertEqual(len(response['documents']), 7)

        response = self.search('idx', 'doc* nug*')
        self.assertEqual(response['page'], 1)
        self.assertEqual(response['pages'], 1)
        self.assertEqual(len(response['documents']), 2)
        doc1, doc2 = response['documents']

        self.assertEqual(doc1, {
            'attachments': '/documents/%s/attachments/' % doc1['id'],
            'content': 'document nug nugs',
            'id': doc1['id'],
            'identifier': None,
            'indexes': ['idx'],
            'metadata': {'special': 'True'},
            'score': doc1['score']})

        if IS_FTS5:
            self.assertEqual(round(doc1['score'], 4), -2.2675)
        else:
            self.assertEqual(round(doc1['score'], 4), -0.)

        self.assertEqual(doc2, {
            'attachments': '/documents/%s/attachments/' % doc2['id'],
            'content': 'document blah nuggie foo',
            'id': doc2['id'],
            'identifier': None,
            'indexes': ['idx'],
            'metadata': {'special': 'True'},
            'score': doc2['score']})

        if IS_FTS5:
            self.assertEqual(round(doc2['score'], 4), -1.3588)
        else:
            self.assertEqual(round(doc2['score'], 4), -0.)

        response = self.search('idx', 'missing')
        self.assertEqual(len(response['documents']), 0)

        response = self.search('idx', 'nug', ranking='bm25')
        doc = response['documents'][0]
        self.assertEqual(doc['content'], 'document nug nugs')
        if IS_FTS5:
            self.assertEqual(round(doc['score'], 3), -2.98)
        else:
            self.assertEqual(round(doc['score'], 3), -2.891)