コード例 #1
0
ファイル: datastore.py プロジェクト: mpessas/mneme
class DataStore(object):
    """Class to save/retrieve data from datastore.

    Uses xapian for indexing the data.
    """

    def __init__(self, data_dir):
        if not os.path.exists(data_dir):
            os.mkdir(data_dir)
        self.index = IndexStore(data_dir)

    def add(self, url, text, categories=[]):
        """Add an entry to the datastore."""
        with self.index.connect():
            self.index.add(url, text, categories)

    def search(self, text, category=None):
        """Search for matching documents for a term
        among the indexed documents.
        """
        res = self.index.search(text, category)
        return (self.index.get_url(r) for r in res)

    def exists(self, url):
        """Check whether a URL has already been indexed."""
        self.index.exists(url)

    def get_categories(self):
        """Get a list of the categories defined in the index."""
        return self.index.get_categories()

    def get_urls(self):
        """Get a list of the urls in the index."""
        return self.index.get_urls()

    def get_entries(self):
        """Get a list of the entries in the index."""
        docs = self.index.get_documents()
        return ((d.data['url'][0], d.data['category']) for d in docs)
コード例 #2
0
ファイル: test_index.py プロジェクト: mpessas/mneme
class TestIndex(unittest.TestCase):

    def setUp(self):
        self.data_dir = tempfile.mkdtemp()
        self.index = IndexStore(self.data_dir)
        self.index._connect()

    def tearDown(self):
        self.index._disconnect()
        shutil.rmtree(self.data_dir)

    def test_add(self):
        self.assertEqual(self.index.add("url", "Body of text"), '0')
        self.assertEqual(self.index.add("url2", "Body of text",
                                        date='2010-01-10'), '1')
        self.assertEqual(self.index.add("url3", "Lots of text",
                                        categories=['general', ]), '2')

    def test_search(self):
        self.index.add("url", "Body of text")
        self.index._conn.flush()
        res = self.index.search("Body")
        self.assertEquals(len(res), 1)
        self.assertEquals(res[0].rank, 0)
        self.assertEquals(res[0].id, '0')
        
    def test_search_two(self):
        self.index.add("url", "Body of text")
        self.index.add("url2", "Lot of text", date='2010-02-10')
        self.index._conn.flush()
        res = self.index.search("text")
        self.assertEquals(len(res), 2)
        self.assertEquals(res[0].id, '0')
        self.assertEquals(res[1].id, '1')

    def test_search_category(self):
        self.index.add("url", "Body of text")
        self.index.add("url2", "Example text general")
        self.index.add("url3", "Lots of text", categories=['general', ])
        self.index._conn.flush()
        res = self.index.search('lots')
        self.assertEquals(len(res), 1)
        res = self.index.search('lots', category='general')
        self.assertEquals(len(res), 1)
        res = self.index.search('nothing', category='general')
        self.assertEquals(len(res), 0)

    def test_add_multiple_categories(self):
        self.index.add("url", "Lots of text and text",
                       categories=['general', 'example'])
        self.index._conn.flush()
        res = self.index.search('lots', category='general')
        self.assertEquals(len(res), 1)
        res = self.index.search('lots', category='example')
        self.assertEquals(len(res), 1)

    def test_get_categories(self):
        self.index.add("url", "Lots of text and text",
                       categories=['general', 'example'])
        self.index.add("url2", "Example text",
                       categories=['example'])
        self.index.add("url3", "Lorem ipsum", categories=['latin', ])
        self.index._conn.flush()
        self.assertEqual(
            [c for c in self.index.get_categories()],
            ['example', 'general', 'latin']
        )

    def test_get_url_by_id(self):
        self.index.add("url", "Lots of text and text",
                       categories=['general', 'example'])
        self.index.add("url2", "Example text",
                       categories=['example'])
        self.index._conn.flush()
        res = self.index.search('example')
        self.assertEqual(self.index.get_url(res[0]), "url2")

    def test_exists(self):
        self.index.add("url", "Lots of text and text",
                       categories=['general', 'example'])
        self.index._conn.flush()
        self.assertTrue(self.index.exists('url'))
        self.assertFalse(self.index.exists('url2'))

    def test_get_urls(self):
        self.index.add("url", "Lots of text and text",
                       categories=['general', 'example'])
        self.index.add("url2", "Example text",
                       categories=['example'])
        self.index._conn.flush()
        res = self.index.get_urls()
        self.assertEqual(
            [u for u in self.index.get_urls()],
            ['url', 'url2']
        )        
コード例 #3
0
ファイル: datastore.py プロジェクト: mpessas/mneme
 def __init__(self, data_dir):
     if not os.path.exists(data_dir):
         os.mkdir(data_dir)
     self.index = IndexStore(data_dir)
コード例 #4
0
ファイル: test_index.py プロジェクト: mpessas/mneme
 def setUp(self):
     self.data_dir = tempfile.mkdtemp()
     self.index = IndexStore(self.data_dir)
     self.index._connect()