Beispiel #1
0
    def setUp(self):
        # init the database attributes
        self.data_root = tempfile.mkdtemp()
        self.parts = ['part01', 'part02']

        # init the database environ
        if not os.path.exists(self.data_root):
            print 'create %s database' % self.data_root
            os.makedirs(self.data_root)

        self.zapian = Zapian(self.data_root)
        for part_name in self.parts:
            database_path = os.path.join(self.data_root, part_name)
            if not os.path.isdir(database_path):
                os.makedirs(database_path)

        # init the test data
        self.doc = {'+title':'we are firend', 
                    'subjects':['file','ktv','*****@*****.**'], 
                    'created':datetime(2000, 1, 1)}
Beispiel #2
0
class ZapianTest(unittest.TestCase):

    def setUp(self):
        # init the database attributes
        self.data_root = tempfile.mkdtemp()
        self.parts = ['part01', 'part02']

        # init the database environ
        if not os.path.exists(self.data_root):
            print 'create %s database' % self.data_root
            os.makedirs(self.data_root)

        self.zapian = Zapian(self.data_root)
        for part_name in self.parts:
            database_path = os.path.join(self.data_root, part_name)
            if not os.path.isdir(database_path):
                os.makedirs(database_path)
            self.zapian.add_part(part_name)

        # init the test data
        self.doc = {'title':'we are firend', 
                    'subjects':['file','ktv','*****@*****.**'], 
                    'created':datetime(2000, 1, 1)}

    def tearDown(self):
        """ """
        if os.path.exists(self.data_root):
            shutil.rmtree(self.data_root)

        self.zapian.fields.clear()
        self.zapian.attributes.clear()

    def test_schema(self):
        schema = self.zapian
        # test gen prefix and slot
        prefix = schema._gen_prefix() 
        self.assertEqual(prefix, 'XA')
        slot = schema._gen_slot()
        self.assertEqual(slot, 0)
        # test add field and attribute
        new_prefix = schema.add_field('new_field')
        self.assertTrue(schema.get_prefix('new_field') == new_prefix == prefix)
        new_slot = schema.add_attribute('new_attribute')
        self.assertTrue(schema.get_slot('new_attribute') == new_slot == slot)
        # test load the old schema
        new_zapian = Zapian(self.data_root)
        self.assertEqual(new_zapian.fields, {'new_field':'XA'})
        self.assertEqual(new_zapian.attributes, {'new_attribute':0})

    def test_add_document(self):
        part = self.parts[0]
        uid = "12345"
        self.zapian.add_document(part, uid=uid, 
                index=self.doc, data={'data': "测试内容"},
                flush=True)
        # test value of the new document
        doc = self.zapian._get_document(uid, [part])
        for value in doc.values():
            if value.num == 0:
                self.assertEqual(value.value, '946656000')
        # test term of the new document
        title_prefix = self.zapian.get_prefix('title', auto_add=False)
        subjects_prefix = self.zapian.get_prefix('subjects', auto_add=False)
        validate_terms = ['Q'+uid,
                            title_prefix + 'are', 
                            title_prefix + 'firend', 
                            title_prefix + 'we',
                            subjects_prefix + 'com', 
                            subjects_prefix + 'file', 
                            subjects_prefix + 'ktv', 
                            subjects_prefix + 'what_gmail', ]
        old_terms = [ term.term for term in doc.termlist() ]
        self.assertTrue(len(validate_terms) == len(old_terms))
        self.assertEqual(set(validate_terms), set(old_terms))
        # test data of the new document
        data = pickle.loads( doc.get_data() )['data']
        self.assertEqual(data, '测试内容')

    def test_update_document(self):
        part = self.parts[0]
        uid = "12345"
        # add a document 
        self.zapian.add_document(part, uid=uid, 
                index=self.doc, data={'data': "测试内容"},
                flush=True)
        new_doc = self.doc.copy()
        new_doc['title'] = "new title"

        self.zapian.update_document(part, uid=uid, index=new_doc, flush=True)
        # test value of the new document
        doc = self.zapian._get_document(uid, [part])
        for value in doc.values():
            if value.num == 0:
                self.assertEqual(value.value, '946656000')
        # test term of the new document
        title_prefix = self.zapian.get_prefix('title', auto_add=False)
        subjects_prefix = self.zapian.get_prefix('subjects', auto_add=False)
        validate_terms = ['Q'+uid,
                            title_prefix + 'new', 
                            title_prefix + 'title', 
                            subjects_prefix + 'com', 
                            subjects_prefix + 'file', 
                            subjects_prefix + 'ktv', 
                            subjects_prefix + 'what_gmail', ]
        old_terms = [term.term for term in doc.termlist()]
        self.assertTrue(len(validate_terms) == len(old_terms))
        self.assertEqual(set(validate_terms), set(old_terms))
        # test data of the new document
        data = pickle.loads( doc.get_data() )['data']
        self.assertEqual(data, '测试内容')

    def test_replace_document(self):
        part = self.parts[0]
        uid = "12345"
        # add a document 
        self.zapian.add_document(part, uid=uid, 
                index=self.doc, data={'data': "测试内容"},
                flush=True)
        new_doc = self.doc.copy()
        new_doc['title'] = "new title"
        self.zapian.add_field('new-field')
        new_doc['new-field'] = 'last'

        self.zapian.replace_document(part, uid=uid, index=new_doc, flush=True)
        # test value of the new document
        doc = self.zapian._get_document(uid, [part])
        for value in doc.values():
            if value.num == 0:
                self.assertEqual(value.value, '946656000')
        # test term of the new document
        title_prefix = self.zapian.get_prefix('title', auto_add=False)
        subjects_prefix = self.zapian.get_prefix('subjects', auto_add=False)
        new_field_prefix = self.zapian.get_prefix('new-field', auto_add=False)
        validate_terms = ['Q'+uid,
                            new_field_prefix + 'last',
                            title_prefix + 'new', 
                            title_prefix + 'title', 
                            subjects_prefix + 'com', 
                            subjects_prefix + 'file', 
                            subjects_prefix + 'ktv', 
                            subjects_prefix + 'what_gmail', ]
        old_terms = [term.term for term in doc.termlist()]
        self.assertTrue(len(validate_terms) == len(old_terms))
        self.assertEqual(set(validate_terms), set(old_terms))
        # test data of the new document
        self.assertEqual(doc.get_data(), '')

    def test_del_document(self):
        part = self.parts[0]
        uid = "12345"
        # add a document 
        self.zapian.add_document(part, uid=uid, 
                index=self.doc, data={'data': "测试内容"},
                flush=True)
        # delete the document
        self.zapian.delete_document(part, uids=[uid], flush=True)
        # test get the document, it will be raise KeyError
        try:
            self.zapian._get_document(uid, [part])
            raise AssertionError("Unique ID '%s' is exists" % uid)
        except KeyError:
            pass

    def test_search_document(self):
        part = self.parts[0]
        uid = "12345"
        # add a document 
        self.zapian.add_document(part, uid=uid, 
                index=self.doc, data={'data': "测试内容"},
                flush=True)
        # serach
        query = [  
                    [[u'title'], u'we', u'parse'],
                    [u'subjects', u'file ktv', u'anyof'],
                ] 
        results = self.zapian.search([part], query)
        self.assertEqual([uid], results)

    def test_search_document_for_mulit_database(self):
        """ """
        # add first document into first database
        first_uid = "12345"
        first_doc = {'title':'we are firend', 
                    'subjects':['file','ktv','*****@*****.**'] 
                    }
        self.zapian.add_document(self.parts[0], uid="12345", 
                index=first_doc, data={'data': "测试内容"},
                flush=True)
        # add second document into second database
        second_uid = '67890'
        second_doc = {'title':'Go to school', 
                    'subjects':['morning','walking','sport'] 
                    }
        self.zapian.add_document(self.parts[1], uid="67890", 
                index=second_doc, data={'data': "测试内容"},
                flush=True)
        # add third document into first database
        third_doc = {'title': 'Big Data',
                    'subjects': ['big', 'expensive']}
        self.zapian.add_document(self.parts[0], uid="45678", 
                index=third_doc, data={'data': "测试内容"},
                flush=True)

        # search for muilt database

        # search firrst document
        query = [  
                    [[u'title'], u'we', u'parse'],
                    [u'subjects', u'file ktv', u'anyof'],
                 ] 
        results = self.zapian.search(self.parts, query)
        self.assertEqual([first_uid], results)

        # search second document
        query = [  
                    [[u'title'], u'Go', u'parse'],
                    [u'subjects', u'walking sport', u'anyof'],
                ]
        results = self.zapian.search(self.parts, query)
        self.assertEqual([second_uid], results)

        # search two document
        query = [  
                    [u'title', u'Go we', u'anyof'],
                    [u'subjects', u'walking sport ktv', u'anyof'],
                ]
        results = self.zapian.search(self.parts, query)
        self.assertEqual(len(results), 2)
        self.assertEqual(set([first_uid, second_uid]), set(results))