Ejemplo n.º 1
0
def test_prechunk(source, target, old_docs, new_docs):
    builder = CopyBuilder(source, target, delete_orphans=True)
    source.update(old_docs)
    source.update(new_docs)

    chunk_queries = list(builder.prechunk(2))
    assert len(chunk_queries) == 2
    assert chunk_queries[0] == {"query": {"k": {"$in": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]}}}
Ejemplo n.º 2
0
def test_query(source, target, old_docs, new_docs):
    builder = CopyBuilder(source, target)
    builder.query = {"k": {"$gt": 5}}
    source.update(old_docs)
    source.update(new_docs)
    builder.run()
    all_docs = list(target.query(criteria={}))
    assert len(all_docs) == 14
    assert min([d["k"] for d in all_docs]) == 6
Ejemplo n.º 3
0
def test_run(source, target, old_docs, new_docs):
    source.update(old_docs)
    source.update(new_docs)
    target.update(old_docs)

    builder = CopyBuilder(source, target)
    builder.run()
    assert target.query_one(criteria={"k": 0})["v"] == "new"
    assert target.query_one(criteria={"k": 10})["v"] == "old"
Ejemplo n.º 4
0
def test_delete_orphans(source, target, old_docs, new_docs):
    builder = CopyBuilder(source, target, delete_orphans=True)
    source.update(old_docs)
    source.update(new_docs)
    target.update(old_docs)

    deletion_criteria = {"k": {"$in": list(range(5))}}
    source.collection.delete_many(deletion_criteria)
    builder.run()

    assert target.collection.count_documents(deletion_criteria) == 0
    assert target.query_one(criteria={"k": 5})["v"] == "new"
    assert target.query_one(criteria={"k": 10})["v"] == "old"
Ejemplo n.º 5
0
    def test_delete_orphans(self):
        self.builder = CopyBuilder(self.source, self.target, delete_orphans=True)
        self.source.collection.insert_many(self.old_docs)
        self.source.update(self.new_docs, update_lu=False)
        self.target.collection.insert_many(self.old_docs)

        deletion_criteria = {"k": {"$in": list(range(5))}}
        self.source.collection.delete_many(deletion_criteria)
        runner = Runner([self.builder])
        runner.run()

        self.assertEqual(self.target.collection.count_documents(deletion_criteria), 0)
        self.assertEqual(self.target.query_one(criteria={"k": 5})["v"], "new")
        self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old")
Ejemplo n.º 6
0
 def test_incremental_false(self):
     tic = datetime.now()
     toc = tic + timedelta(seconds=1)
     keys = list(range(20))
     earlier = [{"lu": tic, "k": k, "v": "val"} for k in keys]
     later = [{"lu": toc, "k": k, "v": "val"} for k in keys]
     self.source.collection.insert_many(earlier)
     self.target.collection.insert_many(later)
     query = {"k": {"$gt": 5}}
     self.builder = CopyBuilder(self.source, self.target, incremental=False, query=query)
     Runner([self.builder]).run()
     docs = sorted(self.target.query(), key=lambda d: d["k"])
     self.assertTrue(all(d["lu"] == tic) for d in docs[5:])
     self.assertTrue(all(d["lu"] == toc) for d in docs[:5])
Ejemplo n.º 7
0
def test_reporting(mongostore, reporting_store):

    memorystore = MemoryStore("temp")
    builder = CopyBuilder(mongostore, memorystore)

    mongostore.update([{
        mongostore.key: i,
        mongostore.last_updated_field: datetime.utcnow()
    } for i in range(10)])

    runner = CliRunner()
    with runner.isolated_filesystem():
        dumpfn(builder, "test_builder.json")
        dumpfn(reporting_store, "test_reporting_store.json")
        result = runner.invoke(
            run,
            ["-v", "test_builder.json", "-r", "test_reporting_store.json"])
        assert result.exit_code == 0

        report_docs = list(reporting_store.query())
        assert len(report_docs) == 3

        start_doc = next(d for d in report_docs
                         if d["event"] == "BUILD_STARTED")
        assert "sources" in start_doc
        assert "targets" in start_doc

        end_doc = next(d for d in report_docs if d["event"] == "BUILD_ENDED")
        assert "errors" in end_doc
        assert "warnings" in end_doc

        update_doc = next(d for d in report_docs if d["event"] == "UPDATE")
        assert "items" in update_doc
Ejemplo n.º 8
0
def test_run_builder(mongostore):

    memorystore = MemoryStore("temp")
    builder = CopyBuilder(mongostore, memorystore)

    mongostore.update([{
        mongostore.key: i,
        mongostore.last_updated_field: datetime.utcnow()
    } for i in range(10)])

    runner = CliRunner()
    with runner.isolated_filesystem():
        dumpfn(builder, "test_builder.json")
        result = runner.invoke(run, ["-v", "test_builder.json"])
        assert result.exit_code == 0
        assert "CopyBuilder" in result.output
        assert "SerialProcessor" in result.output

        result = runner.invoke(run, ["-vvv", "--no_bars", "test_builder.json"])
        assert result.exit_code == 0
        assert "Get" not in result.output
        assert "Update" not in result.output

        result = runner.invoke(run, ["-v", "-n", "2", "test_builder.json"])
        assert result.exit_code == 0
        assert "CopyBuilder" in result.output
        assert "MultiProcessor" in result.output

        result = runner.invoke(
            run, ["-vvv", "-n", "2", "--no_bars", "test_builder.json"])
        assert result.exit_code == 0
        assert "Get" not in result.output
        assert "Update" not in result.output
 def test_simple_runner(self):
     builder = CopyBuilder(self.source, self.target)
     runner = Runner([builder])
     dumpfn(runner, self.runner_filename)
     p = subprocess.run("python -m maggma.cli.mrun {}".format(
         self.runner_filename).split(),
                        timeout=15)
     self.assertEqual(p.returncode, 0)
Ejemplo n.º 10
0
    def setUp(self):
        tic = datetime.now()
        toc = tic + timedelta(seconds=1)
        keys = list(range(20))
        self.old_docs = [{"lu": tic, "k": k, "v": "old"} for k in keys]
        self.new_docs = [{"lu": toc, "k": k, "v": "new"} for k in keys[:10]]
        kwargs = dict(key="k", lu_field="lu")
        self.source = MongoStore(self.dbname, "source", **kwargs)
        self.target = MongoStore(self.dbname, "target", **kwargs)
        self.builder = CopyBuilder(self.source, self.target)

        self.source.connect()
        self.source.ensure_index(self.source.key)
        self.source.ensure_index(self.source.lu_field)

        self.target.connect()
        self.target.ensure_index(self.target.key)
        self.target.ensure_index(self.target.lu_field)
Ejemplo n.º 11
0
def test_get_items(source, target, old_docs):
    builder = CopyBuilder(source, target)
    source.update(old_docs)
    assert len(list(builder.get_items())) == len(old_docs)

    target.update(old_docs)
    assert len(list(builder.get_items())) == 0

    builder = CopyBuilder(source, target, projection=["k"])
    target.remove_docs({})
    assert len(list(builder.get_items())) == len(old_docs)
    assert all("v" not in d for d in builder.get_items())
Ejemplo n.º 12
0
class TestCopyBuilder(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.dbname = "test_" + uuid4().hex
        s = MongoStore(cls.dbname, "test")
        s.connect()
        cls.client = s.collection.database.client

    @classmethod
    def tearDownClass(cls):
        cls.client.drop_database(cls.dbname)

    def setUp(self):
        tic = datetime.now()
        toc = tic + timedelta(seconds=1)
        keys = list(range(20))
        self.old_docs = [{"lu": tic, "k": k, "v": "old"} for k in keys]
        self.new_docs = [{"lu": toc, "k": k, "v": "new"} for k in keys[:10]]
        kwargs = dict(key="k", lu_field="lu")
        self.source = MongoStore(self.dbname, "source", **kwargs)
        self.target = MongoStore(self.dbname, "target", **kwargs)
        self.builder = CopyBuilder(self.source, self.target)

        self.source.connect()
        self.source.ensure_index(self.source.key)
        self.source.ensure_index(self.source.lu_field)

        self.target.connect()
        self.target.ensure_index(self.target.key)
        self.target.ensure_index(self.target.lu_field)

    def tearDown(self):
        self.source.collection.drop()
        self.target.collection.drop()

    def test_get_items(self):
        self.source.collection.insert_many(self.old_docs)
        self.assertEqual(len(list(self.builder.get_items())),
                         len(self.old_docs))
        self.target.collection.insert_many(self.old_docs)
        self.assertEqual(len(list(self.builder.get_items())), 0)
        self.source.update(self.new_docs, update_lu=False)
        self.assertEqual(len(list(self.builder.get_items())),
                         len(self.new_docs))

    def test_process_item(self):
        self.source.collection.insert_many(self.old_docs)
        items = list(self.builder.get_items())
        self.assertCountEqual(items, map(self.builder.process_item, items))

    def test_update_targets(self):
        self.source.collection.insert_many(self.old_docs)
        self.source.update(self.new_docs, update_lu=False)
        self.target.collection.insert_many(self.old_docs)
        items = list(map(self.builder.process_item, self.builder.get_items()))
        self.builder.update_targets(items)
        self.assertEqual(self.target.query_one(criteria={"k": 0})["v"], "new")
        self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old")

    @unittest.skip(
        "Have to refactor how we force read-only so a warning will get thrown")
    def test_index_warning(self):
        """Should log warning when recommended store indexes are not present."""
        self.source.collection.drop_index([(self.source.key, 1)])
        with self.assertLogs(level=logging.WARNING) as cm:
            list(self.builder.get_items())
        self.assertIn("Ensure indices", "\n".join(cm.output))

    def test_run(self):
        self.source.collection.insert_many(self.old_docs)
        self.source.update(self.new_docs, update_lu=False)
        self.target.collection.insert_many(self.old_docs)
        self.builder.run()
        self.assertEqual(self.target.query_one(criteria={"k": 0})["v"], "new")
        self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old")

    def test_query(self):
        self.builder.query = {"k": {"$gt": 5}}
        self.source.collection.insert_many(self.old_docs)
        self.source.update(self.new_docs, update_lu=False)
        self.builder.run()
        all_docs = list(self.target.query(criteria={}))
        self.assertEqual(len(all_docs), 14)
        self.assertTrue(min([d['k'] for d in all_docs]), 6)

    def test_delete_orphans(self):
        self.builder = CopyBuilder(self.source,
                                   self.target,
                                   delete_orphans=True)
        self.source.collection.insert_many(self.old_docs)
        self.source.update(self.new_docs, update_lu=False)
        self.target.collection.insert_many(self.old_docs)

        deletion_criteria = {"k": {"$in": list(range(5))}}
        self.source.collection.delete_many(deletion_criteria)
        self.builder.run()

        self.assertEqual(
            self.target.collection.count_documents(deletion_criteria), 0)
        self.assertEqual(self.target.query_one(criteria={"k": 5})["v"], "new")
        self.assertEqual(self.target.query_one(criteria={"k": 10})["v"], "old")

    def test_incremental_false(self):
        tic = datetime.now()
        toc = tic + timedelta(seconds=1)
        keys = list(range(20))
        earlier = [{"lu": tic, "k": k, "v": "val"} for k in keys]
        later = [{"lu": toc, "k": k, "v": "val"} for k in keys]
        self.source.collection.insert_many(earlier)
        self.target.collection.insert_many(later)
        query = {"k": {"$gt": 5}}
        self.builder = CopyBuilder(self.source,
                                   self.target,
                                   incremental=False,
                                   query=query)
        self.builder.run()
        docs = sorted(self.target.query(), key=lambda d: d["k"])
        self.assertTrue(all(d["lu"] == tic) for d in docs[5:])
        self.assertTrue(all(d["lu"] == toc) for d in docs[:5])
Ejemplo n.º 13
0
def test_update_targets(source, target, old_docs, new_docs):
    builder = CopyBuilder(source, target)
    builder.update_targets(old_docs)
    builder.update_targets(new_docs)
    assert target.query_one(criteria={"k": 0})["v"] == "new"
    assert target.query_one(criteria={"k": 10})["v"] == "old"
Ejemplo n.º 14
0
def test_process_item(source, target, old_docs):
    builder = CopyBuilder(source, target)
    source.update(old_docs)
    items = list(builder.get_items())
    assert len(items) == len(list(map(builder.process_item, items)))
Ejemplo n.º 15
0
def test_get_items(source, target, old_docs, some_failed_old_docs):
    builder = CopyBuilder(source, target)
    source.update(old_docs)
    assert len(list(builder.get_items())) == len(old_docs)

    target.update(old_docs)
    assert len(list(builder.get_items())) == 0

    builder = CopyBuilder(source, target, projection=["k"])
    target.remove_docs({})
    assert len(list(builder.get_items())) == len(old_docs)
    assert all("v" not in d for d in builder.get_items())

    source.update(some_failed_old_docs)
    target.update(old_docs)
    target.update(some_failed_old_docs)
    builder = CopyBuilder(source, target)

    assert len(list(builder.get_items())) == 0

    builder = CopyBuilder(source, target, retry_failed=True)
    assert len(list(builder.get_items())) == len(some_failed_old_docs)

    builder = CopyBuilder(source, target, query={"k": {"$lt": 11}})
    assert len(list(builder.get_items())) == 0

    builder = CopyBuilder(source,
                          target,
                          retry_failed=True,
                          query={"k": {
                              "$lt": 11
                          }})
    assert len(list(builder.get_items())) == 3