def setup_method(self, method): self.indexer = IndexUtil(MockDB(), MockSchema())
class TestIndex: def setup_method(self, method): self.indexer = IndexUtil(MockDB(), MockSchema()) def monkeypatch_indexer(self): self.indexer.get_thing_ids = lambda keys: dict( (k, "id:" + k) for k in keys) self.indexer.get_property_id = lambda type, name: "p:%s-%s" % ( type.split("/")[-1], name) self.indexer.get_table = lambda type, datatype, name: "%s_%s" % ( type.split("/")[-1], datatype) def test_monkeypatch(self): self.monkeypatch_indexer() assert self.indexer.get_thing_ids(["a", "b"]) == { "a": "id:a", "b": "id:b" } assert self.indexer.get_property_id("/type/book", "title") == "p:book-title" assert self.indexer.get_table("/type/book", "foo", "bar") == "book_foo" def process_index(self, index): """Process index to remove order in the values, so that it is easier to compare.""" return {k: set(v) for k, v in iteritems(index)} def test_compute_index(self, testdata): index = self.indexer.compute_index(testdata['doc1']) assert self.process_index(index) == self.process_index( testdata['doc1.index']) def test_dict_difference(self): f = self.indexer._dict_difference d1 = {"w": 1, "x": 2, "y": 3} d2 = {"x": 2, "y": 4, "z": 5} assert f(d1, d2) == {"w": 1, "y": 3} assert f(d2, d1) == {"y": 4, "z": 5} def test_diff_index(self): doc1 = { "key": "/books/1", "type": { "key": "/type/book" }, "title": "foo", "author": { "key": "/authors/1" } } doc2 = dict(doc1, title='bar') deletes, inserts = self.indexer.diff_index(doc1, doc2) assert deletes == {("/type/book", "/books/1", "str", "title"): ["foo"]} assert inserts == {("/type/book", "/books/1", "str", "title"): ["bar"]} deletes, inserts = self.indexer.diff_index(None, doc1) assert deletes == {} assert inserts == { ("/type/book", "/books/1", "ref", "author"): ["/authors/1"], ("/type/book", "/books/1", "str", "title"): ["foo"] } # when type is changed all the old properties must be deleted doc2 = dict(doc1, type={"key": "/type/object"}) deletes, inserts = self.indexer.diff_index(doc1, doc2) assert deletes == { ("/type/book", "/books/1", "ref", None): [], ("/type/book", "/books/1", "str", None): [], ("/type/book", "/books/1", "int", None): [], } def test_diff_records(self): doc1 = { "key": "/books/1", "type": { "key": "/type/book" }, "title": "foo", "author": { "key": "/authors/1" } } doc2 = dict(doc1, title='bar') record = web.storage(key='/books/1', data=doc2, prev=web.storage(data=doc1)) deletes, inserts = self.indexer.diff_records([record]) assert deletes == {("/type/book", "/books/1", "str", "title"): ["foo"]} assert inserts == {("/type/book", "/books/1", "str", "title"): ["bar"]} def test_compile_index(self): self.monkeypatch_indexer() index = { ("/type/book", "/books/1", "str", "name"): ["Getting started with py.test"], ("/type/book", "/books/2", "ref", "author"): ["/authors/1"], } self.indexer.compile_index(index) == { ("book_str", "id:/books/1", "p:book-name"): ["Getting started with py.test"], ("book_ref", "id:/books/2", "p:book-author"): ["id:/authors/1"], } # When the type is changed, property_name will be None to indicate that all the properties are to be removed. index = {("/type/books", "/books/1", "str", None): []} self.indexer.compile_index(index) == { ("book_str", "id:/books/1", None): [] } def test_too_long(self): assert self.indexer._is_too_long("a" * 10000) is True assert self.indexer._is_too_long("a" * 2047) is False c = u'\u20AC' # 3 bytes in utf-8 TODO: Why different in Python 2 vs. 3?? assert self.indexer._is_too_long(c * 1000) is PY2
class TestIndex: def setup_method(self, method): self.indexer = IndexUtil(MockDB(), MockSchema()) def monkeypatch_indexer(self): self.indexer.get_thing_ids = lambda keys: dict((k, "id:" + k) for k in keys) self.indexer.get_property_id = lambda type, name: "p:%s-%s" % (type.split("/")[-1], name) self.indexer.get_table = lambda type, datatype, name: "%s_%s" % (type.split("/")[-1], datatype) def test_monkeypatch(self): self.monkeypatch_indexer() assert self.indexer.get_thing_ids(["a", "b"]) == {"a": "id:a", "b": "id:b"} assert self.indexer.get_property_id("/type/book", "title") == "p:book-title" assert self.indexer.get_table("/type/book", "foo", "bar") == "book_foo" def process_index(self, index): """Process index to remove order in the values, so that it is easier to compare.""" return dict((k, set(v)) for k, v in index.iteritems()) def test_compute_index(self, testdata): index = self.indexer.compute_index(testdata['doc1']) assert self.process_index(index) == self.process_index(testdata['doc1.index']) def test_dict_difference(self): f = self.indexer._dict_difference d1 = {"w": 1, "x": 2, "y": 3} d2 = {"x": 2, "y": 4, "z": 5} assert f(d1, d2) == {"w": 1, "y": 3} assert f(d2, d1) == {"y": 4, "z": 5} def test_diff_index(self): doc1 = { "key": "/books/1", "type": {"key": "/type/book"}, "title": "foo", "author": {"key": "/authors/1"} } doc2 = dict(doc1, title='bar') deletes, inserts = self.indexer.diff_index(doc1, doc2) assert deletes == { ("/type/book", "/books/1", "str", "title"): ["foo"] } assert inserts == { ("/type/book", "/books/1", "str", "title"): ["bar"] } deletes, inserts = self.indexer.diff_index(None, doc1) assert deletes == {} assert inserts == { ("/type/book", "/books/1", "ref", "author"): ["/authors/1"], ("/type/book", "/books/1", "str", "title"): ["foo"] } # when type is changed all the old properties must be deleted doc2 = dict(doc1, type={"key": "/type/object"}) deletes, inserts = self.indexer.diff_index(doc1, doc2) assert deletes == { ("/type/book", "/books/1", "ref", None): [], ("/type/book", "/books/1", "str", None): [], ("/type/book", "/books/1", "int", None): [], } def test_diff_records(self): doc1 = { "key": "/books/1", "type": {"key": "/type/book"}, "title": "foo", "author": {"key": "/authors/1"} } doc2 = dict(doc1, title='bar') record = web.storage(key='/books/1', data=doc2, prev=web.storage(data=doc1)) deletes, inserts = self.indexer.diff_records([record]) assert deletes == { ("/type/book", "/books/1", "str", "title"): ["foo"] } assert inserts == { ("/type/book", "/books/1", "str", "title"): ["bar"] } def test_compile_index(self): self.monkeypatch_indexer() index = { ("/type/book", "/books/1", "str", "name"): ["Getting started with py.test"], ("/type/book", "/books/2", "ref", "author"): ["/authors/1"], } self.indexer.compile_index(index) == { ("book_str", "id:/books/1", "p:book-name"): ["Getting started with py.test"], ("book_ref", "id:/books/2", "p:book-author"): ["id:/authors/1"], } # When the type is changed, property_name will be None to indicate that all the properties are to be removed. index = { ("/type/books", "/books/1", "str", None): [] } self.indexer.compile_index(index) == { ("book_str", "id:/books/1", None): [] } def test_too_long(self): assert self.indexer._is_too_long("a" * 10000) == True assert self.indexer._is_too_long("a" * 2047) == False c = u'\u20AC' # 3 bytes in utf-8 assert self.indexer._is_too_long(c * 1000) == True
def __init__(self, dirname): self.dirname = dirname self.index_engine = IndexUtil(db, schema.get_schema())
class RestoreEngine: """Engine to update an existing database with new changes from a dump. """ def __init__(self, dirname): self.dirname = dirname self.index_engine = IndexUtil(db, schema.get_schema()) def path(self, filename): return os.path.abspath(os.path.join(self.dirname, filename)) def restore(self): self.restore_transactions() self.restore_tables() self.restore_sequences() def restore_sequences(self): d = simplejson.loads(open(self.path("sequences.txt")).read()) for name, value in d.items(): db.query("SELECT setval($name, $value)", vars=locals()) def restore_tables(self): # some tables can't be restored before some other table is restored because of foreign-key constraints. # This dict specified the order. Smaller number must be restored first. order = { "store": 1, "store_index": 2 } tables = [f[len("table_"):-len(".txt")] for f in os.listdir(self.dirname) if f.startswith("table_")] tables.sort(key=lambda key: order.get(key, 0)) for t in tables[::-1]: db.query("DELETE FROM %s" % t) for t in tables: filename = self.path("table_%s.txt" % t) db.query("COPY %s FROM $filename" % t, vars=locals()) def get_doc(self, thing_id, revision): d = db.query("SELECT data FROM data WHERE thing_id=$thing_id AND revision=$revision", vars=locals()) try: return simplejson.loads(d[0].data) except IndexError: return {} def restore_tx(self, row): data = row.pop("_versions") tx = db.transaction() try: old_docs = [] new_docs = [] for d in data: id = d['thing_id'] doc = simplejson.loads(d['data']) key = doc['key'] type_id = self.get_thing_id(doc['type']['key']) if d['revision'] == 1: db.insert("thing", seqname=False, id=d['thing_id'], key=key, type=type_id, latest_revision=d['revision'], created=row['created'], last_modified=row['created']) else: db.update('thing', where="id=$id", type=type_id, latest_revision=d['revision'], last_modified=row['created'], vars=locals()) old_docs.append(self.get_doc(d['thing_id'], d['revision']-1)) new_docs.append(doc) db.insert("transaction", seqname=False, **row) values = [{"id": d['version_id'], "thing_id": d['thing_id'], "revision": d['revision'], "transaction_id": row['id']} for d in data] db.multiple_insert("version", values, seqname=False) values = [{"data": d['data'], "thing_id": d['thing_id'], "revision": d['revision']} for d in data] db.multiple_insert("data", values, seqname=False) self.delete_index(old_docs) self.insert_index(new_docs) except: tx.rollback() raise else: tx.commit() def restore_transactions(self): for line in open(self.path("transactions.txt")): row = simplejson.loads(line) if self.has_transaction(row['id']): print "ignoring tx", row['id'] continue else: self.restore_tx(row) def has_transaction(self, txid): d = db.query("SELECT id FROM transaction WHERE id=$txid", vars=locals()) return bool(d) def get_thing_id(self, key): return db.query("SELECT id FROM thing WHERE key=$key", vars=locals())[0].id def _process_key(self, key): # some data in the database still has /b/ instead /books. # The transaformation is still done in software. mapping = ( "/l/", "/languages/", "/a/", "/authors/", "/b/", "/books/", "/user/", "/people/" ) if "/" in key and key.split("/")[1] in ['a', 'b', 'l', 'user']: for old, new in web.group(mapping, 2): if key.startswith(old): return new + key[len(old):] return key def delete_index(self, docs): all_deletes = {} for doc in docs: doc = dict(doc, _force_reindex=True) dummy_doc = {"key": self._process_key(doc['key']), "type": {"key": "/type/foo"}} deletes, _inserts = self.index_engine.diff_index(doc, dummy_doc) all_deletes.update(deletes) all_deletes = self.index_engine.compile_index(all_deletes) self.index_engine.delete_index(all_deletes) def insert_index(self, docs): all_inserts = {} for doc in docs: _deletes, inserts = self.index_engine.diff_index({}, doc) all_inserts.update(inserts) all_inserts = self.index_engine.compile_index(all_inserts) self.index_engine.insert_index(all_inserts)