Beispiel #1
0
    def test_index_list(self):
        for i in range(3):
            Index.create(name='i%s' % i)

        response = self.app.get('/')
        data = json_load(response.data)
        self.assertEqual(data['indexes'], [
            {
                'document_count': 0,
                'documents': '/i0/',
                'id': 1,
                'name': 'i0'
            },
            {
                'document_count': 0,
                'documents': '/i1/',
                'id': 2,
                'name': 'i1'
            },
            {
                'document_count': 0,
                'documents': '/i2/',
                'id': 3,
                'name': 'i2'
            },
        ])
Beispiel #2
0
 def setUp(self):
     super(TestSearch, self).setUp()
     self.app = app.test_client()
     self.index = Index.create(name='default')
     Index.create(name='unused-1')
     Index.create(name='unused-2')
     app.config['AUTHENTICATION'] = None
Beispiel #3
0
    def test_document_detail_delete(self):
        idx = Index.create(name='idx')
        alt_idx = Index.create(name='alt-idx')

        d1 = idx.index('doc 1', k1='v1', k2='v2')
        d2 = idx.index('doc 2', k3='v3')
        d2.attach('foo.jpg', 'bar')

        alt_idx.add_to_index(d1)
        alt_idx.add_to_index(d2)

        self.assertEqual(Metadata.select().count(), 3)
        self.assertEqual(Attachment.select().count(), 1)

        response = self.app.delete('/documents/%s/' % d2.get_id())
        data = json_load(response.data)
        self.assertEqual(data, {'success': True})

        self.assertEqual(Metadata.select().count(), 2)
        self.assertEqual(Attachment.select().count(), 0)

        response = self.app.delete('/documents/%s/' % d2.get_id())
        self.assertEqual(response.status_code, 404)

        self.assertEqual(Document.select().count(), 1)
        self.assertEqual(IndexDocument.select().count(), 2)
        self.assertEqual([d.get_id() for d in idx.documents], [d1.get_id()])
        self.assertEqual([d.get_id() for d in alt_idx.documents],
                         [d1.get_id()])
Beispiel #4
0
    def test_authentication(self):
        Index.create(name='idx')

        app.config['AUTHENTICATION'] = 'test'
        resp = self.app.get('/')
        self.assertEqual(resp.status_code, 401)
        self.assertEqual(resp.data.decode('utf-8'), 'Invalid API key')

        resp = self.app.get('/?key=tesss')
        self.assertEqual(resp.status_code, 401)

        resp = self.app.get('/', headers={'key': 'tesss'})
        self.assertEqual(resp.status_code, 401)

        resp = self.app.get('/?key=test')
        self.assertEqual(resp.status_code, 200)
        self.assertEqual(
            json_load(resp.data)['indexes'], [{
                'id': 1,
                'name': 'idx',
                'document_count': 0,
                'documents': '/idx/'
            }])

        resp = self.app.get('/', headers={'key': 'test'})
        self.assertEqual(resp.status_code, 200)
        self.assertEqual(
            json_load(resp.data)['indexes'], [{
                'id': 1,
                'name': 'idx',
                'document_count': 0,
                'documents': '/idx/'
            }])
Beispiel #5
0
    def test_index_detail(self):
        idx_a = Index.create(name='idx-a')
        idx_b = Index.create(name='idx-b')
        for i in range(11):
            idx_a.index('document-%s' % i, foo='bar-%s' % i)

        b_doc = idx_b.index('both-doc')
        idx_a.index(b_doc.content, b_doc)

        response = self.app.get('/idx-a/')
        data = json_load(response.data)
        self.assertEqual(data['page'], 1)
        self.assertEqual(data['pages'], 2)
        self.assertEqual(len(data['documents']), 10)
        doc = data['documents'][0]
        self.assertEqual(
            doc, {
                'attachments': [],
                'content': 'document-0',
                'id': 1,
                'identifier': None,
                'indexes': ['idx-a'],
                'metadata': {
                    'foo': 'bar-0'
                }
            })

        response = self.app.get('/idx-a/?page=2')
        data = json_load(response.data)
        self.assertEqual(data['page'], 2)
        self.assertEqual(data['pages'], 2)
        self.assertEqual(len(data['documents']), 2)

        response = self.app.get('/idx-b/')
        data = json_load(response.data)
        self.assertEqual(data['page'], 1)
        self.assertEqual(data['pages'], 1)
        self.assertEqual(len(data['documents']), 1)
        doc = data['documents'][0]
        self.assertEqual(
            doc, {
                'attachments': [],
                'content': 'both-doc',
                'id': 12,
                'identifier': None,
                'indexes': ['idx-b', 'idx-a'],
                'metadata': {}
            })
Beispiel #6
0
    def test_multi_index(self):
        """
        Test that documents can be stored in multiple indexes.
        """
        self.index.delete_instance()

        indexes = [Index.create(name='idx-%s' % i) for i in range(3)]
        document = Document.create(content='hueybear')
        for index in indexes:
            index.index(document.content, document)

        self.assertEqual(Document.select().count(), 1)
        self.assertEqual(Index.select().count(), 3)
        self.assertEqual(IndexDocument.select().count(), 3)
        query = (IndexDocument.select(
            Index.name,
            IndexDocument.document).join(Index).order_by(Index.name).dicts())
        idx_doc_data = [idx_doc for idx_doc in query]
        self.assertEqual(idx_doc_data, [
            {
                'document': document.get_id(),
                'name': 'idx-0'
            },
            {
                'document': document.get_id(),
                'name': 'idx-1'
            },
            {
                'document': document.get_id(),
                'name': 'idx-2'
            },
        ])
Beispiel #7
0
    def test_index_document_validation(self):
        idx = Index.create(name='idx')
        response = self.post_json('/documents/', {'content': 'foo'})
        self.assertEqual(response['error'],
                         'You must specify either an "index" or "indexes".')

        response = self.post_json('/documents/', {'content': 'x', 'index': ''})
        self.assertEqual(response['error'],
                         'You must specify either an "index" or "indexes".')

        response = self.post_json('/documents/', {
            'content': 'foo',
            'index': 'missing'
        })
        self.assertEqual(response['error'],
                         'The following indexes were not found: missing.')

        response = self.post_json('/documents/', {
            'content': 'foo',
            'indexes': ['missing', 'idx', 'blah']
        })
        self.assertEqual(
            response['error'],
            'The following indexes were not found: missing, blah.')
        self.assertEqual(Document.select().count(), 0)
Beispiel #8
0
    def test_document_detail_post(self):
        idx = Index.create(name='idx')
        alt_idx = Index.create(name='alt-idx')
        doc = idx.index('test doc', foo='bar', nug='baze')
        alt_doc = idx.index('alt doc')

        url = '/documents/%s/' % doc.get_id()

        def assertDoc(doc, content, metadata=None, indexes=None):
            doc_db = self.refresh_doc(doc)
            self.assertEqual(doc_db.content, content)
            self.assertEqual([idx.name for idx in doc_db.get_indexes()],
                             indexes or [])
            self.assertEqual(doc_db.metadata, metadata or {})

        # Update the content.
        response = self.post_json(url, {'content': 'updated'})
        assertDoc(doc, 'updated', {'foo': 'bar', 'nug': 'baze'}, ['idx'])

        # Test updating metadata.
        response = self.post_json(
            url, {'metadata': dict(doc.metadata, nug='baz', herp='derp')})
        assertDoc(doc, 'updated', {
            'foo': 'bar',
            'nug': 'baz',
            'herp': 'derp'
        }, ['idx'])

        # Test clearing metadata.
        response = self.post_json(url, {'metadata': None})
        assertDoc(doc, 'updated', {}, ['idx'])

        # Test updating indexes.
        response = self.post_json(url, {'indexes': ['idx', 'alt-idx']})
        assertDoc(doc, 'updated', {}, ['alt-idx', 'idx'])

        # Test clearing indexes.
        response = self.post_json(url, {'indexes': []})
        assertDoc(doc, 'updated', {}, [])

        # Ensure alt_doc has not been affected.
        assertDoc(alt_doc, 'alt doc', {}, ['idx'])

        # Sanity check.
        self.assertEqual(Document.select().count(), 2)
Beispiel #9
0
    def test_index_update_delete(self):
        idx = Index.create(name='idx')
        alt_idx = Index.create(name='alt-idx')
        doc = idx.index(content='foo')
        alt_idx.index(doc.content, doc)
        idx.index('idx only')
        alt_idx.index('alt only')

        response = self.post_json('/idx/', {'name': 'idx-updated'})
        self.assertEqual(response['id'], idx.id)
        self.assertEqual(response['name'], 'idx-updated')
        self.assertEqual([doc['content'] for doc in response['documents']],
                         ['foo', 'idx only'])

        response = self.app.delete('/idx-updated/')
        data = json_load(response.data)
        self.assertEqual(data, {'success': True})

        self.assertEqual(Document.select().count(), 3)
        self.assertEqual(IndexDocument.select().count(), 2)
        self.assertEqual(Index.select().count(), 1)
Beispiel #10
0
    def create(self):
        data = validator.parse_post(['name'])

        with database.atomic():
            try:
                index = Index.create(name=data['name'])
            except IntegrityError:
                error('"%s" already exists.' % data['name'])
            else:
                logger.info('Created new index "%s"' % index.name)

        return self.detail(index.name)
Beispiel #11
0
    def test_attachment_views(self):
        idx = Index.create(name='idx')
        doc = idx.index('doc 1')
        doc.attach('foo.jpg', 'x')
        doc.attach('bar.png', 'x')
        Attachment.update(timestamp='2016-01-02 03:04:05').execute()

        resp = self.app.get('/documents/1/attachments/')
        resp_data = json_load(resp.data)
        self.assertEqual(resp_data['attachments'], [
            {
                'mimetype': 'image/png',
                'timestamp': '2016-01-02 03:04:05',
                'data_length': 1,
                'filename': 'bar.png',
                'document': '/documents/1/',
                'data': '/documents/1/attachments/bar.png/download/',
            },
            {
                'mimetype': 'image/jpeg',
                'timestamp': '2016-01-02 03:04:05',
                'data_length': 1,
                'filename': 'foo.jpg',
                'document': '/documents/1/',
                'data': '/documents/1/attachments/foo.jpg/download/',
            },
        ])

        resp = self.app.get('/documents/1/attachments/foo.jpg/')
        resp_data = json_load(resp.data)
        self.assertEqual(
            resp_data, {
                'mimetype': 'image/jpeg',
                'timestamp': '2016-01-02 03:04:05',
                'data_length': 1,
                'filename': 'foo.jpg',
                'document': '/documents/1/',
                'data': '/documents/1/attachments/foo.jpg/download/',
            })

        resp = self.app.delete('/documents/1/attachments/foo.jpg/')
        self.assertEqual(Attachment.select().count(), 1)

        resp = self.app.post('/documents/1/attachments/bar.png/',
                             data={
                                 'data': '',
                                 'file_0': (BytesIO(b'zz'), 'bar.png')
                             })
        resp_data = json_load(resp.data)
        self.assertEqual(resp_data['data_length'], 2)

        resp = self.app.get('/documents/1/attachments/bar.png/download/')
        self.assertEqual(resp.data, b'zz')
Beispiel #12
0
    def test_query_count(self):
        idx_a = Index.create(name='idx-a')
        idx_b = Index.create(name='idx-b')
        phrases = ['foo', 'bar', 'baze', 'nug', 'nuggie']
        for phrase in phrases:
            phrase = 'document ' + phrase
            doc = idx_a.index(phrase)
            idx_b.index(phrase, doc, foo='bar', baze='nug')

        for idx in ['idx-a', 'idx-b']:
            for query in ['nug', 'nug*', 'document', 'missing']:
                with assert_query_count(9):
                    # 1. Get index.
                    # 2. Get # of docs in index.
                    # 3. Prefetch indexes.
                    # 4. Prefetch index documents.
                    # 5. Prefetch metadata
                    # 6. Fetch documents (top of prefetch).
                    # 7. COUNT(*) for pagination.
                    # 8. COUNT(*) for pagination.
                    self.search(idx, query)

                with assert_query_count(9):
                    self.search(idx, query, foo='bar')

        with assert_query_count(9):
            # Same as above.
            data = self.app.get('/idx-a/').data

        with assert_query_count(8):
            # Same as above minus first query for index.
            self.app.get('/documents/')

        for i in range(10):
            Index.create(name='idx-%s' % i)

        with assert_query_count(2):
            # 2 queries, one for list, one for pagination.
            self.app.get('/')
Beispiel #13
0
    def test_index_document(self):
        idx_a = Index.create(name='idx-a')
        idx_b = Index.create(name='idx-b')
        response = self.post_json(
            '/documents/', {
                'content': 'doc 1',
                'index': 'idx-a',
                'metadata': {
                    'k1': 'v1',
                    'k2': 'v2'
                }
            })

        self.assertEqual(
            response, {
                'attachments': [],
                'content': 'doc 1',
                'id': 1,
                'identifier': None,
                'indexes': ['idx-a'],
                'metadata': {
                    'k1': 'v1',
                    'k2': 'v2'
                }
            })

        response = self.post_json('/documents/', {
            'content': 'doc 2',
            'indexes': ['idx-a', 'idx-b']
        })
        self.assertEqual(
            response, {
                'attachments': [],
                'content': 'doc 2',
                'id': 2,
                'identifier': None,
                'indexes': ['idx-a', 'idx-b'],
                'metadata': {}
            })
Beispiel #14
0
    def test_document_detail_update_attachments(self):
        idx = Index.create(name='idx')
        doc = idx.index('test doc', foo='bar', nug='baze')
        doc.attach('foo.jpg', 'empty')
        url = '/documents/%s/' % doc.docid

        json_data = json.dumps({'content': 'test doc-edited'})
        response = self.app.post(url,
                                 data={
                                     'data': json_data,
                                     'file_0': (BytesIO(b'xx'), 'foo.jpg'),
                                     'file_1': (BytesIO(b'yy'), 'foo2.jpg')
                                 })

        resp_data = json_load(response.data)
        a1 = Attachment.get(Attachment.filename == 'foo.jpg')
        a2 = Attachment.get(Attachment.filename == 'foo2.jpg')
        a1_data = {
            'mimetype': 'image/jpeg',
            'data_length': 2,
            'data': '/documents/%s/attachments/foo.jpg/download/' % doc.docid,
            'timestamp': str(a1.timestamp),
            'filename': 'foo.jpg'
        }
        a2_data = {
            'mimetype': 'image/jpeg',
            'data_length': 2,
            'data': '/documents/%s/attachments/foo2.jpg/download/' % doc.docid,
            'timestamp': str(a2.timestamp),
            'filename': 'foo2.jpg'
        }
        self.assertEqual(
            resp_data, {
                'attachments': [a1_data, a2_data],
                'content': 'test doc-edited',
                'id': 1,
                'identifier': None,
                'indexes': ['idx'],
                'metadata': {
                    'foo': 'bar',
                    'nug': 'baze'
                }
            })

        self.assertEqual(Attachment.select().count(), 2)
        self.assertEqual(BlobData.select().count(), 3)

        # Existing file updated, new file added.
        foo, foo2 = Attachment.select().order_by(Attachment.filename)
        self.assertEqual(foo.blob.data, b'xx')
        self.assertEqual(foo2.blob.data, b'yy')
Beispiel #15
0
    def test_document_detail_get(self):
        idx = Index.create(name='idx')
        doc = idx.index('test doc', foo='bar')
        alt_doc = idx.index('alt doc')

        response = self.app.get('/documents/%s/' % doc.docid)
        data = json_load(response.data)
        self.assertEqual(
            data, {
                'attachments': [],
                'content': 'test doc',
                'id': doc.get_id(),
                'identifier': None,
                'indexes': ['idx'],
                'metadata': {
                    'foo': 'bar'
                }
            })
Beispiel #16
0
    def test_search_filters(self):
        idx = Index.create(name='idx')
        data = (
            ('huey document', {
                'name': 'huey',
                'kitty': 'yes'
            }),
            ('zaizee document', {
                'name': 'zaizee',
                'kitty': 'yes'
            }),
            ('little huey bear', {
                'name': 'huey',
                'kitty': 'yes'
            }),
            ('uncle huey', {
                'kitty': 'no'
            }),
            ('michael nuggie document', {
                'name': 'mickey',
                'kitty': 'no'
            }),
        )
        for content, metadata in data:
            idx.index(content, **metadata)

        def assertResults(query, metadata, expected):
            results = self.search('idx', query, **metadata)
            content = [
                document['content'] for document in results['documents']
            ]
            self.assertEqual(content, expected)

        results = ['huey document', 'little huey bear', 'uncle huey']

        assertResults('huey', {}, results)
        assertResults('huey', {'kitty': 'yes'},
                      ['huey document', 'little huey bear'])
        assertResults('huey', {
            'kitty': 'yes',
            'name': 'huey'
        }, ['huey document', 'little huey bear'])
        assertResults('docu*', {'kitty': 'yes'},
                      ['huey document', 'zaizee document'])
Beispiel #17
0
    def test_search(self):
        idx = Index.create(name='idx')
        phrases = [
            'foo', 'bar', 'baz', 'nug nugs', 'blah nuggie foo', 'huey',
            'zaizee'
        ]
        for phrase in phrases:
            idx.index('document %s' % phrase, special=True)

        for i in range(10):
            idx.index('document %s' % i, special=False)

        response = self.search('idx', 'docum*')
        self.assertEqual(response['page'], 1)
        self.assertEqual(response['pages'], 2)
        self.assertEqual(len(response['documents']), 10)

        response = self.search('idx', 'document', 2)
        self.assertEqual(len(response['documents']), 7)

        response = self.search('idx', 'doc* nug*')
        self.assertEqual(response['page'], 1)
        self.assertEqual(response['pages'], 1)
        self.assertEqual(len(response['documents']), 2)
        doc1, doc2 = response['documents']

        self.assertEqual(
            doc1, {
                'attachments': [],
                'content': 'document nug nugs',
                'id': doc1['id'],
                'identifier': None,
                'indexes': ['idx'],
                'metadata': {
                    'special': 'True'
                },
                'score': doc1['score']
            })

        self.assertEqual(round(doc1['score'], 4), -0.)

        self.assertEqual(
            doc2, {
                'attachments': [],
                'content': 'document blah nuggie foo',
                'id': doc2['id'],
                'identifier': None,
                'indexes': ['idx'],
                'metadata': {
                    'special': 'True'
                },
                'score': doc2['score']
            })

        self.assertEqual(round(doc2['score'], 4), -0.)

        response = self.search('idx', 'missing')
        self.assertEqual(len(response['documents']), 0)

        response = self.search('idx', 'nug', ranking='bm25')
        doc = response['documents'][0]
        self.assertEqual(doc['content'], 'document nug nugs')
        self.assertEqual(round(doc['score'], 3), -2.891)
Beispiel #18
0
 def setUp(self):
     super(TestModelAPIs, self).setUp()
     self.index = Index.create(name='default')
Beispiel #19
0
    def test_index_document_attachments(self):
        idx_a = Index.create(name='idx-a')
        json_data = json.dumps({
            'content': 'doc a',
            'index': 'idx-a',
            'metadata': {
                'k1': 'v1-a',
                'k2': 'v2-a'
            },
        })
        response = self.app.post('/documents/',
                                 data={
                                     'data': json_data,
                                     'file_0':
                                     (BytesIO(b'testfile1'), 'test1.txt'),
                                     'file_1':
                                     (BytesIO(b'testfile2'), 'test2.jpg')
                                 })

        a1 = Attachment.get(Attachment.filename == 'test1.txt')
        a2 = Attachment.get(Attachment.filename == 'test2.jpg')
        a1_data = {
            'data': '/documents/1/attachments/test1.txt/download/',
            'data_length': 9,
            'mimetype': 'text/plain',
            'timestamp': str(a1.timestamp),
            'filename': 'test1.txt'
        }
        a2_data = {
            'data': '/documents/1/attachments/test2.jpg/download/',
            'data_length': 9,
            'mimetype': 'image/jpeg',
            'timestamp': str(a2.timestamp),
            'filename': 'test2.jpg'
        }

        resp_data = json_load(response.data)
        self.assertEqual(
            resp_data, {
                'attachments': [a1_data, a2_data],
                'content': 'doc a',
                'id': 1,
                'identifier': None,
                'indexes': ['idx-a'],
                'metadata': {
                    'k1': 'v1-a',
                    'k2': 'v2-a'
                }
            })

        Attachment.update(timestamp='2016-02-01 01:02:03').execute()

        with assert_query_count(3):
            resp = self.app.get('/documents/1/attachments/')

        self.assertEqual(
            json_load(resp.data), {
                'ordering': [],
                'pages':
                1,
                'page':
                1,
                'attachments': [
                    {
                        'mimetype': 'text/plain',
                        'timestamp': '2016-02-01 01:02:03',
                        'data_length': 9,
                        'filename': 'test1.txt',
                        'document': '/documents/1/',
                        'data': '/documents/1/attachments/test1.txt/download/',
                    },
                    {
                        'mimetype': 'image/jpeg',
                        'timestamp': '2016-02-01 01:02:03',
                        'data_length': 9,
                        'filename': 'test2.jpg',
                        'document': '/documents/1/',
                        'data': '/documents/1/attachments/test2.jpg/download/',
                    },
                ],
            })