def test_prechunk(source, target, old_docs, new_docs): builder = CopyBuilder(source, target, delete_orphans=True) source.update(old_docs) source.update(new_docs) chunk_queries = list(builder.prechunk(2)) assert len(chunk_queries) == 2 assert chunk_queries[0] == {"query": {"k": {"$in": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}}}
def test_query(source, target, old_docs, new_docs): builder = CopyBuilder(source, target) builder.query = {"k": {"$gt": 5}} source.update(old_docs) source.update(new_docs) builder.run() all_docs = list(target.query(criteria={})) assert len(all_docs) == 14 assert min([d["k"] for d in all_docs]) == 6
def test_run(source, target, old_docs, new_docs): source.update(old_docs) source.update(new_docs) target.update(old_docs) builder = CopyBuilder(source, target) builder.run() assert target.query_one(criteria={"k": 0})["v"] == "new" assert target.query_one(criteria={"k": 10})["v"] == "old"
def test_delete_orphans(source, target, old_docs, new_docs): builder = CopyBuilder(source, target, delete_orphans=True) source.update(old_docs) source.update(new_docs) target.update(old_docs) deletion_criteria = {"k": {"$in": list(range(5))}} source.collection.delete_many(deletion_criteria) builder.run() assert target.collection.count_documents(deletion_criteria) == 0 assert target.query_one(criteria={"k": 5})["v"] == "new" assert target.query_one(criteria={"k": 10})["v"] == "old"
def test_delete_orphans(self): self.builder = CopyBuilder(self.source, self.target, delete_orphans=True) self.source.collection.insert_many(self.old_docs) self.source.update(self.new_docs, update_lu=False) self.target.collection.insert_many(self.old_docs) deletion_criteria = {"k": {"$in": list(range(5))}} self.source.collection.delete_many(deletion_criteria) runner = Runner([self.builder]) runner.run() self.assertEqual(self.target.collection.count_documents(deletion_criteria), 0) self.assertEqual(self.target.query_one(criteria={"k": 5})["v"], "new") self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old")
def test_incremental_false(self): tic = datetime.now() toc = tic + timedelta(seconds=1) keys = list(range(20)) earlier = [{"lu": tic, "k": k, "v": "val"} for k in keys] later = [{"lu": toc, "k": k, "v": "val"} for k in keys] self.source.collection.insert_many(earlier) self.target.collection.insert_many(later) query = {"k": {"$gt": 5}} self.builder = CopyBuilder(self.source, self.target, incremental=False, query=query) Runner([self.builder]).run() docs = sorted(self.target.query(), key=lambda d: d["k"]) self.assertTrue(all(d["lu"] == tic) for d in docs[5:]) self.assertTrue(all(d["lu"] == toc) for d in docs[:5])
def test_reporting(mongostore, reporting_store): memorystore = MemoryStore("temp") builder = CopyBuilder(mongostore, memorystore) mongostore.update([{ mongostore.key: i, mongostore.last_updated_field: datetime.utcnow() } for i in range(10)]) runner = CliRunner() with runner.isolated_filesystem(): dumpfn(builder, "test_builder.json") dumpfn(reporting_store, "test_reporting_store.json") result = runner.invoke( run, ["-v", "test_builder.json", "-r", "test_reporting_store.json"]) assert result.exit_code == 0 report_docs = list(reporting_store.query()) assert len(report_docs) == 3 start_doc = next(d for d in report_docs if d["event"] == "BUILD_STARTED") assert "sources" in start_doc assert "targets" in start_doc end_doc = next(d for d in report_docs if d["event"] == "BUILD_ENDED") assert "errors" in end_doc assert "warnings" in end_doc update_doc = next(d for d in report_docs if d["event"] == "UPDATE") assert "items" in update_doc
def test_run_builder(mongostore): memorystore = MemoryStore("temp") builder = CopyBuilder(mongostore, memorystore) mongostore.update([{ mongostore.key: i, mongostore.last_updated_field: datetime.utcnow() } for i in range(10)]) runner = CliRunner() with runner.isolated_filesystem(): dumpfn(builder, "test_builder.json") result = runner.invoke(run, ["-v", "test_builder.json"]) assert result.exit_code == 0 assert "CopyBuilder" in result.output assert "SerialProcessor" in result.output result = runner.invoke(run, ["-vvv", "--no_bars", "test_builder.json"]) assert result.exit_code == 0 assert "Get" not in result.output assert "Update" not in result.output result = runner.invoke(run, ["-v", "-n", "2", "test_builder.json"]) assert result.exit_code == 0 assert "CopyBuilder" in result.output assert "MultiProcessor" in result.output result = runner.invoke( run, ["-vvv", "-n", "2", "--no_bars", "test_builder.json"]) assert result.exit_code == 0 assert "Get" not in result.output assert "Update" not in result.output
def test_simple_runner(self): builder = CopyBuilder(self.source, self.target) runner = Runner([builder]) dumpfn(runner, self.runner_filename) p = subprocess.run("python -m maggma.cli.mrun {}".format( self.runner_filename).split(), timeout=15) self.assertEqual(p.returncode, 0)
def setUp(self): tic = datetime.now() toc = tic + timedelta(seconds=1) keys = list(range(20)) self.old_docs = [{"lu": tic, "k": k, "v": "old"} for k in keys] self.new_docs = [{"lu": toc, "k": k, "v": "new"} for k in keys[:10]] kwargs = dict(key="k", lu_field="lu") self.source = MongoStore(self.dbname, "source", **kwargs) self.target = MongoStore(self.dbname, "target", **kwargs) self.builder = CopyBuilder(self.source, self.target) self.source.connect() self.source.ensure_index(self.source.key) self.source.ensure_index(self.source.lu_field) self.target.connect() self.target.ensure_index(self.target.key) self.target.ensure_index(self.target.lu_field)
def test_get_items(source, target, old_docs): builder = CopyBuilder(source, target) source.update(old_docs) assert len(list(builder.get_items())) == len(old_docs) target.update(old_docs) assert len(list(builder.get_items())) == 0 builder = CopyBuilder(source, target, projection=["k"]) target.remove_docs({}) assert len(list(builder.get_items())) == len(old_docs) assert all("v" not in d for d in builder.get_items())
class TestCopyBuilder(TestCase): @classmethod def setUpClass(cls): cls.dbname = "test_" + uuid4().hex s = MongoStore(cls.dbname, "test") s.connect() cls.client = s.collection.database.client @classmethod def tearDownClass(cls): cls.client.drop_database(cls.dbname) def setUp(self): tic = datetime.now() toc = tic + timedelta(seconds=1) keys = list(range(20)) self.old_docs = [{"lu": tic, "k": k, "v": "old"} for k in keys] self.new_docs = [{"lu": toc, "k": k, "v": "new"} for k in keys[:10]] kwargs = dict(key="k", lu_field="lu") self.source = MongoStore(self.dbname, "source", **kwargs) self.target = MongoStore(self.dbname, "target", **kwargs) self.builder = CopyBuilder(self.source, self.target) self.source.connect() self.source.ensure_index(self.source.key) self.source.ensure_index(self.source.lu_field) self.target.connect() self.target.ensure_index(self.target.key) self.target.ensure_index(self.target.lu_field) def tearDown(self): self.source.collection.drop() self.target.collection.drop() def test_get_items(self): self.source.collection.insert_many(self.old_docs) self.assertEqual(len(list(self.builder.get_items())), len(self.old_docs)) self.target.collection.insert_many(self.old_docs) self.assertEqual(len(list(self.builder.get_items())), 0) self.source.update(self.new_docs, update_lu=False) self.assertEqual(len(list(self.builder.get_items())), len(self.new_docs)) def test_process_item(self): self.source.collection.insert_many(self.old_docs) items = list(self.builder.get_items()) self.assertCountEqual(items, map(self.builder.process_item, items)) def test_update_targets(self): self.source.collection.insert_many(self.old_docs) self.source.update(self.new_docs, update_lu=False) self.target.collection.insert_many(self.old_docs) items = list(map(self.builder.process_item, self.builder.get_items())) self.builder.update_targets(items) self.assertEqual(self.target.query_one(criteria={"k": 0})["v"], "new") self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old") @unittest.skip( "Have to refactor how we force read-only so a warning will get thrown") def test_index_warning(self): """Should log warning when recommended store indexes are not present.""" self.source.collection.drop_index([(self.source.key, 1)]) with self.assertLogs(level=logging.WARNING) as cm: list(self.builder.get_items()) self.assertIn("Ensure indices", "\n".join(cm.output)) def test_run(self): self.source.collection.insert_many(self.old_docs) self.source.update(self.new_docs, update_lu=False) self.target.collection.insert_many(self.old_docs) self.builder.run() self.assertEqual(self.target.query_one(criteria={"k": 0})["v"], "new") self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old") def test_query(self): self.builder.query = {"k": {"$gt": 5}} self.source.collection.insert_many(self.old_docs) self.source.update(self.new_docs, update_lu=False) self.builder.run() all_docs = list(self.target.query(criteria={})) self.assertEqual(len(all_docs), 14) self.assertTrue(min([d['k'] for d in all_docs]), 6) def test_delete_orphans(self): self.builder = CopyBuilder(self.source, self.target, delete_orphans=True) self.source.collection.insert_many(self.old_docs) self.source.update(self.new_docs, update_lu=False) self.target.collection.insert_many(self.old_docs) deletion_criteria = {"k": {"$in": list(range(5))}} self.source.collection.delete_many(deletion_criteria) self.builder.run() self.assertEqual( self.target.collection.count_documents(deletion_criteria), 0) self.assertEqual(self.target.query_one(criteria={"k": 5})["v"], "new") self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old") def test_incremental_false(self): tic = datetime.now() toc = tic + timedelta(seconds=1) keys = list(range(20)) earlier = [{"lu": tic, "k": k, "v": "val"} for k in keys] later = [{"lu": toc, "k": k, "v": "val"} for k in keys] self.source.collection.insert_many(earlier) self.target.collection.insert_many(later) query = {"k": {"$gt": 5}} self.builder = CopyBuilder(self.source, self.target, incremental=False, query=query) self.builder.run() docs = sorted(self.target.query(), key=lambda d: d["k"]) self.assertTrue(all(d["lu"] == tic) for d in docs[5:]) self.assertTrue(all(d["lu"] == toc) for d in docs[:5])
def test_update_targets(source, target, old_docs, new_docs): builder = CopyBuilder(source, target) builder.update_targets(old_docs) builder.update_targets(new_docs) assert target.query_one(criteria={"k": 0})["v"] == "new" assert target.query_one(criteria={"k": 10})["v"] == "old"
def test_process_item(source, target, old_docs): builder = CopyBuilder(source, target) source.update(old_docs) items = list(builder.get_items()) assert len(items) == len(list(map(builder.process_item, items)))
def test_get_items(source, target, old_docs, some_failed_old_docs): builder = CopyBuilder(source, target) source.update(old_docs) assert len(list(builder.get_items())) == len(old_docs) target.update(old_docs) assert len(list(builder.get_items())) == 0 builder = CopyBuilder(source, target, projection=["k"]) target.remove_docs({}) assert len(list(builder.get_items())) == len(old_docs) assert all("v" not in d for d in builder.get_items()) source.update(some_failed_old_docs) target.update(old_docs) target.update(some_failed_old_docs) builder = CopyBuilder(source, target) assert len(list(builder.get_items())) == 0 builder = CopyBuilder(source, target, retry_failed=True) assert len(list(builder.get_items())) == len(some_failed_old_docs) builder = CopyBuilder(source, target, query={"k": {"$lt": 11}}) assert len(list(builder.get_items())) == 0 builder = CopyBuilder(source, target, retry_failed=True, query={"k": { "$lt": 11 }}) assert len(list(builder.get_items())) == 3