예제 #1
0
 def setup_method(self, method):
     self.indexer = IndexUtil(MockDB(), MockSchema())
예제 #2
0
class TestIndex:
    def setup_method(self, method):
        self.indexer = IndexUtil(MockDB(), MockSchema())

    def monkeypatch_indexer(self):
        self.indexer.get_thing_ids = lambda keys: dict(
            (k, "id:" + k) for k in keys)
        self.indexer.get_property_id = lambda type, name: "p:%s-%s" % (
            type.split("/")[-1], name)
        self.indexer.get_table = lambda type, datatype, name: "%s_%s" % (
            type.split("/")[-1], datatype)

    def test_monkeypatch(self):
        self.monkeypatch_indexer()
        assert self.indexer.get_thing_ids(["a", "b"]) == {
            "a": "id:a",
            "b": "id:b"
        }
        assert self.indexer.get_property_id("/type/book",
                                            "title") == "p:book-title"
        assert self.indexer.get_table("/type/book", "foo", "bar") == "book_foo"

    def process_index(self, index):
        """Process index to remove order in the values, so that it is easier to compare."""
        return {k: set(v) for k, v in iteritems(index)}

    def test_compute_index(self, testdata):
        index = self.indexer.compute_index(testdata['doc1'])
        assert self.process_index(index) == self.process_index(
            testdata['doc1.index'])

    def test_dict_difference(self):
        f = self.indexer._dict_difference
        d1 = {"w": 1, "x": 2, "y": 3}
        d2 = {"x": 2, "y": 4, "z": 5}

        assert f(d1, d2) == {"w": 1, "y": 3}
        assert f(d2, d1) == {"y": 4, "z": 5}

    def test_diff_index(self):
        doc1 = {
            "key": "/books/1",
            "type": {
                "key": "/type/book"
            },
            "title": "foo",
            "author": {
                "key": "/authors/1"
            }
        }
        doc2 = dict(doc1, title='bar')

        deletes, inserts = self.indexer.diff_index(doc1, doc2)
        assert deletes == {("/type/book", "/books/1", "str", "title"): ["foo"]}
        assert inserts == {("/type/book", "/books/1", "str", "title"): ["bar"]}

        deletes, inserts = self.indexer.diff_index(None, doc1)
        assert deletes == {}
        assert inserts == {
            ("/type/book", "/books/1", "ref", "author"): ["/authors/1"],
            ("/type/book", "/books/1", "str", "title"): ["foo"]
        }

        # when type is changed all the old properties must be deleted
        doc2 = dict(doc1, type={"key": "/type/object"})
        deletes, inserts = self.indexer.diff_index(doc1, doc2)
        assert deletes == {
            ("/type/book", "/books/1", "ref", None): [],
            ("/type/book", "/books/1", "str", None): [],
            ("/type/book", "/books/1", "int", None): [],
        }

    def test_diff_records(self):
        doc1 = {
            "key": "/books/1",
            "type": {
                "key": "/type/book"
            },
            "title": "foo",
            "author": {
                "key": "/authors/1"
            }
        }
        doc2 = dict(doc1, title='bar')
        record = web.storage(key='/books/1',
                             data=doc2,
                             prev=web.storage(data=doc1))

        deletes, inserts = self.indexer.diff_records([record])
        assert deletes == {("/type/book", "/books/1", "str", "title"): ["foo"]}
        assert inserts == {("/type/book", "/books/1", "str", "title"): ["bar"]}

    def test_compile_index(self):
        self.monkeypatch_indexer()

        index = {
            ("/type/book", "/books/1", "str", "name"):
            ["Getting started with py.test"],
            ("/type/book", "/books/2", "ref", "author"): ["/authors/1"],
        }
        self.indexer.compile_index(index) == {
            ("book_str", "id:/books/1", "p:book-name"):
            ["Getting started with py.test"],
            ("book_ref", "id:/books/2", "p:book-author"): ["id:/authors/1"],
        }

        # When the type is changed, property_name will be None to indicate that all the properties are to be removed.
        index = {("/type/books", "/books/1", "str", None): []}
        self.indexer.compile_index(index) == {
            ("book_str", "id:/books/1", None): []
        }

    def test_too_long(self):
        assert self.indexer._is_too_long("a" * 10000) is True
        assert self.indexer._is_too_long("a" * 2047) is False
        c = u'\u20AC'  # 3 bytes in utf-8  TODO: Why different in Python 2 vs. 3??
        assert self.indexer._is_too_long(c * 1000) is PY2
예제 #3
0
 def setup_method(self, method):
     self.indexer = IndexUtil(MockDB(), MockSchema())
예제 #4
0
class TestIndex:
    def setup_method(self, method):
        self.indexer = IndexUtil(MockDB(), MockSchema())
        
    def monkeypatch_indexer(self):
        self.indexer.get_thing_ids = lambda keys: dict((k, "id:" + k) for k in keys)
        self.indexer.get_property_id = lambda type, name: "p:%s-%s" % (type.split("/")[-1], name)
        self.indexer.get_table = lambda type, datatype, name: "%s_%s" % (type.split("/")[-1], datatype)
        
    def test_monkeypatch(self):
        self.monkeypatch_indexer()
        assert self.indexer.get_thing_ids(["a", "b"]) == {"a": "id:a", "b": "id:b"}
        assert self.indexer.get_property_id("/type/book", "title") == "p:book-title"
        assert self.indexer.get_table("/type/book", "foo", "bar") == "book_foo"
        
    def process_index(self, index):
        """Process index to remove order in the values, so that it is easier to compare."""
        return dict((k, set(v)) for k, v in index.iteritems())
                
    def test_compute_index(self, testdata):
        index = self.indexer.compute_index(testdata['doc1'])
        assert self.process_index(index) == self.process_index(testdata['doc1.index'])
        
    def test_dict_difference(self):
        f = self.indexer._dict_difference
        d1 = {"w": 1, "x": 2, "y": 3}
        d2 = {"x": 2, "y": 4, "z": 5}
        
        assert f(d1, d2) == {"w": 1, "y": 3}
        assert f(d2, d1) == {"y": 4, "z": 5}
    
    def test_diff_index(self):
        doc1 = {
            "key": "/books/1",
            "type": {"key": "/type/book"},
            "title": "foo",
            "author": {"key": "/authors/1"}
        }
        doc2 = dict(doc1, title='bar')
        
        deletes, inserts = self.indexer.diff_index(doc1, doc2)
        assert deletes == {
            ("/type/book", "/books/1", "str", "title"): ["foo"]
        }
        assert inserts == {
            ("/type/book", "/books/1", "str", "title"): ["bar"]
        }

        deletes, inserts = self.indexer.diff_index(None, doc1)
        assert deletes == {}
        assert inserts == {
            ("/type/book", "/books/1", "ref", "author"): ["/authors/1"],
            ("/type/book", "/books/1", "str", "title"): ["foo"]
        }
        
        # when type is changed all the old properties must be deleted
        doc2 = dict(doc1, type={"key": "/type/object"})
        deletes, inserts = self.indexer.diff_index(doc1, doc2)
        assert deletes == {
            ("/type/book", "/books/1", "ref", None): [],
            ("/type/book", "/books/1", "str", None): [],
            ("/type/book", "/books/1", "int", None): [],
        }
        
    def test_diff_records(self):
        doc1 = {
            "key": "/books/1",
            "type": {"key": "/type/book"},
            "title": "foo",
            "author": {"key": "/authors/1"}
        }
        doc2 = dict(doc1, title='bar')
        record = web.storage(key='/books/1', data=doc2, prev=web.storage(data=doc1))

        deletes, inserts = self.indexer.diff_records([record])
        assert deletes == {
            ("/type/book", "/books/1", "str", "title"): ["foo"]
        }
        assert inserts == {
            ("/type/book", "/books/1", "str", "title"): ["bar"]
        }
        
    def test_compile_index(self):
        self.monkeypatch_indexer()
        
        index = {
            ("/type/book", "/books/1", "str", "name"): ["Getting started with py.test"],
            ("/type/book", "/books/2", "ref", "author"): ["/authors/1"],
        }
        self.indexer.compile_index(index) == {
            ("book_str", "id:/books/1", "p:book-name"): ["Getting started with py.test"],
            ("book_ref", "id:/books/2", "p:book-author"): ["id:/authors/1"],
        }
        
        # When the type is changed, property_name will be None to indicate that all the properties are to be removed.
        index = {
            ("/type/books", "/books/1", "str", None): []
        }
        self.indexer.compile_index(index) == {
            ("book_str", "id:/books/1", None): []
        }
        
    def test_too_long(self):
        assert self.indexer._is_too_long("a" * 10000) == True
        assert self.indexer._is_too_long("a" * 2047) == False
        c = u'\u20AC' # 3 bytes in utf-8
        assert self.indexer._is_too_long(c * 1000) == True
예제 #5
0
파일: oldump.py 프로젝트: yzou/openlibrary
 def __init__(self, dirname):
     self.dirname = dirname
     self.index_engine = IndexUtil(db, schema.get_schema())
예제 #6
0
파일: oldump.py 프로젝트: yzou/openlibrary
class RestoreEngine:
    """Engine to update an existing database with new changes from a dump.
    """
    def __init__(self, dirname):
        self.dirname = dirname
        self.index_engine = IndexUtil(db, schema.get_schema())
        
    def path(self, filename):
        return os.path.abspath(os.path.join(self.dirname, filename))
        
    def restore(self):
        self.restore_transactions()
        self.restore_tables()
        self.restore_sequences()
        
    def restore_sequences(self):
        d = simplejson.loads(open(self.path("sequences.txt")).read())
        
        for name, value in d.items():
            db.query("SELECT setval($name, $value)", vars=locals())
        
    def restore_tables(self):
        # some tables can't be restored before some other table is restored because of foreign-key constraints.
        # This dict specified the order. Smaller number must be restored first.
        order = {
            "store": 1,
            "store_index": 2
        }
        
        tables = [f[len("table_"):-len(".txt")] for f in os.listdir(self.dirname) if f.startswith("table_")]
        tables.sort(key=lambda key: order.get(key, 0))
        
        for t in tables[::-1]:
            db.query("DELETE FROM %s" % t)

        for t in tables:
            filename = self.path("table_%s.txt" % t)
            db.query("COPY %s FROM $filename" % t, vars=locals())
            
    def get_doc(self, thing_id, revision):
        d = db.query("SELECT data FROM data WHERE thing_id=$thing_id AND revision=$revision", vars=locals())
        try:
            return simplejson.loads(d[0].data)
        except IndexError:
            return {}
            
    def restore_tx(self, row):
        data = row.pop("_versions")

        tx = db.transaction()
        try:
            old_docs = []
            new_docs = []
            for d in data:
                id = d['thing_id']

                doc = simplejson.loads(d['data'])
                key = doc['key']
                type_id = self.get_thing_id(doc['type']['key'])

                if d['revision'] == 1:
                    db.insert("thing", seqname=False, 
                        id=d['thing_id'], key=key, type=type_id,
                        latest_revision=d['revision'],
                        created=row['created'], last_modified=row['created'])
                else:
                    db.update('thing', where="id=$id", 
                        type=type_id,
                        latest_revision=d['revision'],
                        last_modified=row['created'], 
                        vars=locals())
                    old_docs.append(self.get_doc(d['thing_id'], d['revision']-1))
                new_docs.append(doc)

            db.insert("transaction", seqname=False, **row)

            values = [{"id": d['version_id'], "thing_id": d['thing_id'], "revision": d['revision'], "transaction_id": row['id']} for d in data]
            db.multiple_insert("version", values, seqname=False)

            values = [{"data": d['data'], "thing_id": d['thing_id'], "revision": d['revision']} for d in data]
            db.multiple_insert("data", values, seqname=False)
            
            self.delete_index(old_docs)
            self.insert_index(new_docs)
        except:
            tx.rollback()
            raise
        else:
            tx.commit()
        
    def restore_transactions(self):
        for line in open(self.path("transactions.txt")):
            row = simplejson.loads(line)
            if self.has_transaction(row['id']):
                print "ignoring tx", row['id']
                continue
            else:
                self.restore_tx(row)
                
    def has_transaction(self, txid):
        d = db.query("SELECT id FROM transaction WHERE id=$txid", vars=locals())
        return bool(d)

    def get_thing_id(self, key):
        return db.query("SELECT id FROM thing WHERE key=$key", vars=locals())[0].id
        
    def _process_key(self, key):
        # some data in the database still has /b/ instead /books. 
        # The transaformation is still done in software.
        mapping = (
            "/l/", "/languages/",
            "/a/", "/authors/",
            "/b/", "/books/",
            "/user/", "/people/"
        )

        if "/" in key and key.split("/")[1] in ['a', 'b', 'l', 'user']:
            for old, new in web.group(mapping, 2):
                if key.startswith(old):
                    return new + key[len(old):]
        return key
            
    def delete_index(self, docs):
        all_deletes = {}
        for doc in docs:
            doc = dict(doc, _force_reindex=True)
            dummy_doc = {"key": self._process_key(doc['key']), "type": {"key": "/type/foo"}}
            deletes, _inserts = self.index_engine.diff_index(doc, dummy_doc)
            all_deletes.update(deletes)
            
        all_deletes = self.index_engine.compile_index(all_deletes)
        self.index_engine.delete_index(all_deletes)
        
    def insert_index(self, docs):
        all_inserts = {}
        for doc in docs:
            _deletes, inserts = self.index_engine.diff_index({}, doc)
            all_inserts.update(inserts)
            
        all_inserts = self.index_engine.compile_index(all_inserts)
        self.index_engine.insert_index(all_inserts)