def test_no_changes(self, graph):
     """These scenarios should result in no Changes."""
     plan_1 = ChangePlan.create(graph, 0)
     plan_2 = ChangePlan.create(graph, 2)
     self.assertEqual(plan_1, plan_2)
     self.assertEqual(plan_1.delete, [])
     self.assertEqual(plan_1.update_parents, [])
    def test_details_output(self):
        """Test our details file output."""
        graph = create_test_graph(["1"], ["2", "3"], ["4", "5", "6"])
        buff = StringIO()
        buff.name = "test_file.txt"
        plan = ChangePlan.create(graph, 0, buff)
        details_txt = buff.getvalue()

        # pylint: disable=line-too-long
        expected_output = textwrap.dedent("""
            == Summary ==
            Active Version Branches: 3
            Total Structures: 6
            Structures to Save: 5
            Structures to Delete: 1
            Structures to Rewrite Parent Link: 1

            == Active Versions ==
            Active Version A00000000000000000000001 [2012-05-02 00:00:00] draft-branch for course-v1:edx+splitmongo+1
            + 1 (active) (original)

            Active Version A00000000000000000000002 [2012-05-02 00:00:00] published-branch for course-v1:edx+splitmongo+2
            + 3 (active)
            + 2 (original)

            Active Version A00000000000000000000003 [2012-05-02 00:00:00] draft-branch for course-v1:edx+splitmongo+3
            + 6 (active) (re-link to original)
            - 5
            + 4 (original)

            """).lstrip()
        # pylint: enable=line-too-long
        self.assertEqual(expected_output, details_txt)
        self.assertEqual(plan,
                         ChangePlan(delete=["5"], update_parents=[("6", "4")]))
Exemple #3
0
def make_plan(ctx, plan_file, details, retain, delay, batch_size):
    """
    Create a Change Plan JSON file describing the operations needed to prune the
    database. This command is read-only and does not alter the database.

    The Change Plan JSON is a dictionary with two keys:

    "delete" - A sorted array of Structure document IDs to delete. Since MongoDB
    object IDs are created in ascending order by timestamp, this means that the
    oldest documents come earlier in the list.

    "update_parents" - A list of [Structure ID, New Parent/Previous ID] pairs.
    This is used to re-link the oldest preserved Intermediate Structure back to
    the Original Structure, so that we don't leave the database in a state where
    a Structure's "previous_version" points to a deleted Structure.

    Specifying a --details file will generate a more verbose, human-readable
    text description of the Change Plan for verification purposes. The details
    file will only display Structures that are reachable from an Active Version,
    so any Structures that are "orphaned" as a result of partial runs of this
    script or Studio race conditions will not be reflected. That being said,
    orphaned Structures are detected and properly noted in the Change Plan JSON.
    """
    structures_graph = ctx.obj['BACKEND'].structures_graph(delay / 1000.0, batch_size)

    # This will create the details file as a side-effect, if specified.
    change_plan = ChangePlan.create(structures_graph, retain, details)
    change_plan.dump(plan_file)
 def test_update(self):
     """Execute a simple update."""
     self.backend.update(ChangePlan(delete=[str_id(i) for i in [2, 3]],
                                    update_parents=[(str_id(4), str_id(1))
                                                    ]),
                         delay=0)
     graph = self.backend.structures_graph(0, 100)
     self.assertEqual(list(graph.structures.keys()),
                      [str_id(i) for i in [1, 4, 10, 11, 20]])
     self.assertEqual(
         graph.structures,
         {
             str_id(1):
             Structure(
                 id=str_id(1), original_id=str_id(1), previous_id=None),
             # This one got its previous_id rewritten from 3 -> 1
             str_id(4):
             Structure(id=str_id(4),
                       original_id=str_id(1),
                       previous_id=str_id(1)),
             str_id(10):
             Structure(
                 id=str_id(10), original_id=str_id(10), previous_id=None),
             str_id(11):
             Structure(id=str_id(11),
                       original_id=str_id(10),
                       previous_id=str_id(10)),
             str_id(20):
             Structure(
                 id=str_id(20), original_id=str_id(20), previous_id=None),
         })
    def test_overlapping_shared_history(self):
        """Test multiple branches that overlap in what history to preserve."""
        graph = create_test_graph(
            ["1", "2", "3"],
            ["1", "2", "3", "4", "5"],
            ["1", "2", "3", "6"],
            ["1", "2", "7", "8", "9", "10"],
        )
        plan = ChangePlan.create(graph, 1)

        # We specified only one intermediate structure in each branch should be
        # preserved. So why do we only delete "7" and "8" here?
        # "1" is the original structure, and will always be preserved.
        # "2" is the intermediate structure preserved by the first branch. It
        #     won't be deleted, even if other branches might want to flag it for
        #     deletion.
        # "3" would be deleted by the second branch, but it's Active in the
        #     first, and so is preserved. Active Structures are never deleted.
        # "4" is preserved by the second branch.
        # "5" is the Active Structure for the second branch.
        # "6" is the Active Structure for the third branch.
        # "7" is marked for deletion by the fourth branch.
        # "8" is marked for deletion by the fourth branch.
        # "9" is preserved by the fourth branch.
        # "10" is the Active Structure for the fourth branch.
        self.assertEqual(plan.delete, ["7", "8"])
        self.assertEqual(plan.update_parents, [("9", "1")])
    def test_non_overlapping_shared_history(self):
        """Test shared history, preserved intermediate set doesn't overlap."""
        graph = create_test_graph(
            ["1", "2", "3"],
            ["1", "2", "3", "4", "5", "6"],
        )
        plan = ChangePlan.create(graph, 0)
        self.assertEqual(plan.delete, ["2", "4", "5"])
        self.assertEqual(plan.update_parents, [("3", "1"), ("6", "1")])

        graph_save_1 = create_test_graph(
            ["1", "2", "3", "4"],
            ["1", "2", "3", "4", "5", "6", "7"],
        )
        plan_save_1 = ChangePlan.create(graph_save_1, 1)
        self.assertEqual(plan_save_1.delete, ["2", "5"])
        self.assertEqual(plan_save_1.update_parents, [("3", "1"), ("6", "1")])
    def test_simple(self):
        """Simple happy path ChangePlans."""
        graph = create_test_graph(["1", "2", "3", "4"])

        # Preserve no intermediate structures -- prune the middle structures.
        plan_no_intermediate = ChangePlan.create(graph, 0)
        self.assertEqual(plan_no_intermediate.delete, ["2", "3"])
        self.assertEqual(plan_no_intermediate.update_parents, [("4", "1")])

        # Preserve one intermediate structure
        plan_1_intermediate = ChangePlan.create(graph, 1)
        self.assertEqual(plan_1_intermediate.delete, ["2"])
        self.assertEqual(plan_1_intermediate.update_parents, [("3", "1")])

        # Preserve two intermediate structures -- Do nothing
        plan_2_intermediate = ChangePlan.create(graph, 2)
        self.assertEqual(plan_2_intermediate.delete, [])
        self.assertEqual(plan_2_intermediate.update_parents, [])
Exemple #8
0
def prune(ctx, plan_file, delay, batch_size, start):
    """
    Prune the MongoDB database according to a Change Plan file.

    This command tries to be as safe as possible. It executes parent updates
    before deletes, so an interruption at any point should be safe in that it
    won't leave the structure graphs in an inconsistent state. It should also
    be safe to resume pruning with the same Change Plan in the event of an
    interruption.

    It's also safe to run while Studio is still operating, though you should be
    careful to test and tweak the delay and batch_size options to throttle load
    on your database.
    """
    change_plan = ChangePlan.load(plan_file)
    if start is not None and start not in change_plan.delete:
        raise click.BadParameter("{} is not in the Change Plan {}".format(
            start, click.format_filename(plan_file.name)),
                                 param_hint='--start')
    ctx.obj['BACKEND'].update(change_plan, delay / 1000.0, batch_size, start)
    def test_race_condition(self):
        """Create new Structures are during ChangePlan creation."""
        # Get the real method before we patch it...
        real_all_structures_fn = SplitMongoBackend._all_structures  # pylint: disable=protected-access

        def add_structures(backend, delay, batch_size):
            """Do what _all_structures() would do, then add new Structures."""
            structures = real_all_structures_fn(backend, delay, batch_size)

            # Create new Structures
            self.structures.insert_one(
                dict(_id=obj_id(5),
                     original_version=obj_id(1),
                     previous_version=obj_id(4)), )
            self.structures.insert_one(
                dict(_id=obj_id(6),
                     original_version=obj_id(1),
                     previous_version=obj_id(5)), )
            self.structures.insert_one(
                dict(_id=obj_id(7),
                     original_version=obj_id(1),
                     previous_version=obj_id(6)), )

            # Update the Draft branch of course-v1:edx+split_course+2017 to
            # point to one of the new Structures
            self.active_versions.update_one(
                {'_id': obj_id(100)},
                {'$set': {
                    'versions.draft-branch': obj_id(5)
                }})

            # Create an entirely new ActiveVersion and point it to the newest
            # Structure.
            self.active_versions.insert_one({
                '_id': obj_id(102),
                'edited_on': datetime(2012, 5, 3),
                'org': 'edx',
                'course': 'split_library_race',
                'run': 'library',
                'versions': {
                    'library': obj_id(7),
                }
            })

            return structures

        with patch.object(SplitMongoBackend, '_all_structures',
                          autospec=True) as all_structures_mock:
            all_structures_mock.side_effect = add_structures
            graph = self.backend.structures_graph(0, 100)
            self.assertEqual(len(graph.structures), 10)
            self.assertEqual(len(graph.branches), 4)

            plan = ChangePlan.create(graph, 0)
            self.assertNotIn(
                str_id(5),
                plan.delete)  # Active updated to this for our course.
            self.assertNotIn(str_id(7),
                             plan.delete)  # Active for our new Library
            self.assertIn(str_id(4), plan.delete)  # Was our Active before
            self.assertIn(str_id(6),
                          plan.delete)  # Intermediate structure to new Library