Example #1
0
    def makedir(cls,
                labbook: LabBook,
                relative_path: str,
                make_parents: bool = True,
                create_activity_record: bool = False) -> None:
        """Make a new directory inside the labbook directory.

        Args:
            labbook: Subject LabBook
            relative_path(str): Path within the labbook to make directory
            make_parents(bool): If true, create intermediary directories
            create_activity_record(bool): If true, create commit and activity record

        Returns:
            str: Absolute path of new directory
        """
        if not relative_path:
            raise ValueError("relative_path argument cannot be None or empty")

        relative_path = LabBook.make_path_relative(relative_path)
        new_directory_path = os.path.join(labbook.root_dir, relative_path)
        section = relative_path.split(os.sep)[0]
        git_untracked = in_untracked(labbook.root_dir, section)
        if os.path.exists(new_directory_path):
            return
        else:
            logger.info(f"Making new directory in `{new_directory_path}`")
            os.makedirs(new_directory_path, exist_ok=make_parents)
            if git_untracked:
                logger.warning(
                    f'New {str(labbook)} untracked directory `{new_directory_path}`'
                )
                return
            new_dir = ''
            for d in relative_path.split(os.sep):
                new_dir = os.path.join(new_dir, d)
                full_new_dir = os.path.join(labbook.root_dir, new_dir)

                gitkeep_path = os.path.join(full_new_dir, '.gitkeep')
                if not os.path.exists(gitkeep_path):
                    with open(gitkeep_path, 'w') as gitkeep:
                        gitkeep.write(
                            "This file is necessary to keep this directory tracked by Git"
                            " and archivable by compression tools. Do not delete or modify!"
                        )
                    labbook.git.add(gitkeep_path)

            if create_activity_record:
                # Create detail record
                activity_type, activity_detail_type, section_str = labbook.infer_section_from_relative_path(
                    relative_path)
                adr = ActivityDetailRecord(activity_detail_type,
                                           show=False,
                                           importance=0,
                                           action=ActivityAction.CREATE)

                msg = f"Created new {section_str} directory `{relative_path}`"
                commit = labbook.git.commit(msg)
                adr.add_value('text/markdown', msg)

                # Create activity record
                ar = ActivityRecord(activity_type,
                                    message=msg,
                                    linked_commit=commit.hexsha,
                                    show=True,
                                    importance=255,
                                    tags=['directory-create'])
                ar.add_detail_object(adr)

                # Store
                ars = ActivityStore(labbook)
                ars.create_activity_record(ar)
Example #2
0
    def add_bundled_app(self,
                        port: int,
                        name: str,
                        description: str,
                        command: Optional[str] = None) -> Dict[str, Any]:
        """Add a "bundled app" configuration to this labbook

        Args:
            port(int): port number to expose from the container (will be routed to the browser)
            name(str): name of the bundled app
            description(str): description of the bundled app
            command(str): command to run in the container if needed to start the app

        Returns:
            dict
        """
        # Check if a reserved application name, currently:
        if name.lower() in self.reserved_names:
            raise ValueError(
                f"{name} is a reserved application name. Try again.")

        if len(name) > 10 or len(name) < 1:
            raise ValueError(f"{name} must be 10 characters or less.")

        if len(description) > 240:
            raise ValueError(f"{description} must be 240 characters or less.")

        if command:
            if len(command) > 1024:
                raise ValueError(f"{command} must be 1024 characters or less.")

        # Check if a reserved port currently
        if port in self.reserved_ports:
            raise ValueError(f"Port {port} is a in reserved port. Try again.")

        data = self.get_bundled_apps()

        # Check for port already in use
        for app in data:
            if data[app].get('port') == port:
                raise ValueError(f"Port {port} is already in use. Try again.")

        data[name] = {
            'port': port,
            'description': description,
            'command': command
        }

        with open(self.bundled_app_file, 'wt') as baf:
            json.dump(data, baf)

        # Commit the changes
        self.labbook.git.add(self.bundled_app_file)
        commit = self.labbook.git.commit(f"Committing bundled app")

        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain',
                      f"Added bundled application: {json.dumps(data[name])}")
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=f"Added bundled app '{name}'",
                            show=True,
                            linked_commit=commit.hexsha,
                            tags=["environment", "docker", "bundled_app"])
        ar.add_detail_object(adr)
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)

        return data
Example #3
0
    def test_multiple_cells(self, redis_client, mock_labbook, mock_kernel):
        """Test processing notebook activity"""
        dummy_file = os.path.join(mock_labbook[2].root_dir, 'code',
                                  'Test.ipynb')
        with open(dummy_file, 'wt') as tf:
            tf.write("Dummy file")

        monitor_key = "dev_env_monitor:{}:{}:{}:{}:activity_monitor:{}".format(
            'test', 'test', 'labbook1', 'jupyterlab-ubuntu1604', uuid.uuid4())

        monitor = JupyterLabNotebookMonitor("test",
                                            "test",
                                            mock_labbook[2].name,
                                            monitor_key,
                                            config_file=mock_labbook[0])

        # Setup monitoring metadata
        metadata = {
            "kernel_id": "XXXX",
            "kernel_name": 'python',
            "kernel_type": 'notebook',
            "path": 'code/Test.ipynb'
        }

        # Perform an action
        mock_kernel[0].execute("print('Hello, World')")
        mock_kernel[0].execute("print('Cell number 2!')")

        # Check lab book repo state
        status = mock_labbook[2].git.status()
        assert len(status["untracked"]) == 1
        assert status["untracked"][0] == 'code/Test.ipynb'

        # Process messages
        msg1a = mock_kernel[0].get_iopub_msg()
        msg2a = mock_kernel[0].get_iopub_msg()
        msg3a = mock_kernel[0].get_iopub_msg()
        msg4a = mock_kernel[0].get_iopub_msg()

        msg1b = mock_kernel[0].get_iopub_msg()
        msg2b = mock_kernel[0].get_iopub_msg()
        msg3b = mock_kernel[0].get_iopub_msg()
        msg4b = mock_kernel[0].get_iopub_msg()

        # Process first state change message
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is False
        monitor.handle_message(msg1a)
        assert monitor.kernel_status == 'busy'

        # Process input message
        monitor.handle_message(msg2a)
        assert len(monitor.current_cell.code) > 0
        assert len(monitor.cell_data) == 0
        assert monitor.can_store_activity_record is False

        # Process output message
        monitor.handle_message(msg3a)
        assert len(monitor.current_cell.result) > 0
        assert len(monitor.cell_data) == 0
        assert monitor.can_store_activity_record is False

        # Process final state change message
        monitor.handle_message(msg4a)
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is True
        assert len(monitor.cell_data) == 1

        # Process first state change message
        assert monitor.kernel_status == 'idle'
        monitor.handle_message(msg1b)
        assert monitor.can_store_activity_record is False
        assert monitor.kernel_status == 'busy'

        # Process input message
        monitor.handle_message(msg2b)
        assert len(monitor.current_cell.code) > 0
        assert len(monitor.cell_data) == 1
        assert monitor.can_store_activity_record is False

        # Process output message
        monitor.handle_message(msg3b)
        assert len(monitor.current_cell.result) > 0
        assert len(monitor.cell_data) == 1
        assert monitor.can_store_activity_record is False

        # Process final state change message
        monitor.handle_message(msg4b)
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is True
        assert len(monitor.cell_data) == 2

        # Store the record manually for this test
        monitor.store_record(metadata)
        assert monitor.can_store_activity_record is False
        assert len(monitor.cell_data) == 0

        # Check lab book repo state
        status = mock_labbook[2].git.status()
        assert len(status["untracked"]) == 0
        assert len(status["staged"]) == 0
        assert len(status["unstaged"]) == 0

        # Check activity entry
        log = mock_labbook[2].git.log()
        assert len(log) == 4
        assert 'code/Test.ipynb' in log[0]['message']

        a_store = ActivityStore(mock_labbook[2])
        record = a_store.get_activity_record(log[0]['commit'])
        assert record.type == ActivityType.CODE
        assert record.show is True
        assert record.importance == 0
        assert not record.tags
        assert record.message == 'Executed cell in notebook code/Test.ipynb'
        assert len(record._detail_objects) == 5
        assert record._detail_objects[0][0] is True
        assert record._detail_objects[0][1] == ActivityDetailType.RESULT.value
        assert record._detail_objects[0][2] == 155
        assert record._detail_objects[1][0] is True
        assert record._detail_objects[1][1] == ActivityDetailType.RESULT.value
        assert record._detail_objects[1][2] == 154
        assert record._detail_objects[2][0] is False
        assert record._detail_objects[2][1] == ActivityDetailType.CODE.value
        assert record._detail_objects[2][2] == 255
        assert record._detail_objects[3][0] is False
        assert record._detail_objects[3][
            1] == ActivityDetailType.CODE_EXECUTED.value
        assert record._detail_objects[3][2] == 255
        assert record._detail_objects[4][0] is False
        assert record._detail_objects[4][
            1] == ActivityDetailType.CODE_EXECUTED.value
        assert record._detail_objects[4][2] == 254
Example #4
0
    def insert_file(cls,
                    labbook: LabBook,
                    section: str,
                    src_file: str,
                    dst_path: str = '') -> Dict[str, Any]:
        """ Move the file at `src_file` into the `dst_dir`, overwriting
        if a file already exists there. This calls `put_file()` under-
        the-hood, but will create an activity record.

        Args:
            labbook: Subject labbook
            section: Section name (code, input, output)
            src_file: Full path of file to insert into
            dst_path: Relative path within labbook where `src_file`
                      should be copied to

        Returns:
            dict: The inserted file's info
        """

        finfo = FileOperations.put_file(labbook=labbook,
                                        section=section,
                                        src_file=src_file,
                                        dst_path=dst_path)

        rel_path = os.path.join(section, finfo['key'])
        if in_untracked(labbook.root_dir, section):
            logger.warning(f"Inserted file {rel_path} ({finfo['size']} bytes)"
                           f" to untracked section {section}. This will not"
                           f" be tracked by commits or activity records.")
            return finfo

        # If we are setting this section to be untracked
        activity_type, activity_detail_type, section_str = \
            labbook.get_activity_type_from_section(section)

        commit_msg = f"Added new {section_str} file {rel_path}"
        try:
            labbook.git.add(rel_path)
            commit = labbook.git.commit(commit_msg)
        except Exception as x:
            logger.error(x)
            os.remove(dst_path)
            raise FileOperationsException(x)

        # Create Activity record and detail
        _, ext = os.path.splitext(rel_path) or 'file'
        adr = ActivityDetailRecord(activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', commit_msg)
        ar = ActivityRecord(activity_type,
                            message=commit_msg,
                            show=True,
                            importance=255,
                            linked_commit=commit.hexsha,
                            tags=[ext])
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)

        return finfo
Example #5
0
    def test_no_show(self, redis_client, mock_labbook, mock_kernel):
        """Test processing notebook activity that doesn't have any important detail items"""
        monitor_key = "dev_env_monitor:{}:{}:{}:{}:activity_monitor:{}".format(
            'test', 'test', 'labbook1', 'jupyterlab-ubuntu1604', uuid.uuid4())

        monitor = JupyterLabNotebookMonitor("test",
                                            "test",
                                            mock_labbook[2].name,
                                            monitor_key,
                                            config_file=mock_labbook[0])

        # Setup monitoring metadata
        metadata = {
            "kernel_id": "XXXX",
            "kernel_name": 'python',
            "kernel_type": 'notebook',
            "path": 'code/Test.ipynb'
        }

        # Perform an action
        mock_kernel[0].execute("a=1")

        # Check lab book repo state
        status = mock_labbook[2].git.status()
        assert len(status["untracked"]) == 0

        # Process messages
        msg1 = mock_kernel[0].get_iopub_msg()
        msg2 = mock_kernel[0].get_iopub_msg()
        msg3 = mock_kernel[0].get_iopub_msg()

        # Process first state change message
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is False
        monitor.handle_message(msg1)
        assert monitor.kernel_status == 'busy'

        # Process input message
        monitor.handle_message(msg2)
        assert len(monitor.current_cell.code) > 0
        assert len(monitor.cell_data) == 0
        assert monitor.can_store_activity_record is False

        # Process output message
        monitor.handle_message(msg3)
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is True
        assert len(monitor.cell_data) == 1

        # Store the record manually for this test
        monitor.store_record(metadata)
        assert monitor.can_store_activity_record is False
        assert len(monitor.cell_data) == 0

        # Check activity entry
        log = mock_labbook[2].git.log()
        assert len(log) == 4
        assert 'code/Test.ipynb' in log[0]['message']

        a_store = ActivityStore(mock_labbook[2])
        record = a_store.get_activity_record(log[0]['commit'])
        assert record.type == ActivityType.CODE
        assert record.show is False
        assert record.importance == 0
        assert not record.tags
        assert record.message == 'Executed cell in notebook code/Test.ipynb'
        assert len(record._detail_objects) == 1
        assert record._detail_objects[0][0] is False
        assert record._detail_objects[0][
            1] == ActivityDetailType.CODE_EXECUTED.value
        assert record._detail_objects[0][2] == 255
Example #6
0
    def test_add_many_files(self, redis_client, mock_labbook, mock_kernel):
        """Test processing notebook activity when lots of output files have been created"""
        for file_number in range(0, 260):
            with open(
                    os.path.join(mock_labbook[2].root_dir, 'output',
                                 f"{file_number}.dat"), 'wt') as tf:
                tf.write("blah")

        monitor_key = "dev_env_monitor:{}:{}:{}:{}:activity_monitor:{}".format(
            'test', 'test', 'labbook1', 'jupyterlab-ubuntu1604', uuid.uuid4())

        monitor = JupyterLabNotebookMonitor("test",
                                            "test",
                                            mock_labbook[2].name,
                                            monitor_key,
                                            config_file=mock_labbook[0])

        # Setup monitoring metadata
        metadata = {
            "kernel_id": "XXXX",
            "kernel_name": 'python',
            "kernel_type": 'notebook',
            "path": 'code/Test.ipynb'
        }

        # Perform an action
        mock_kernel[0].execute("print('Generated 260 output files')")

        # Check lab book repo state
        status = mock_labbook[2].git.status()
        assert len(status["untracked"]) == 260

        # Process messages
        msg1 = mock_kernel[0].get_iopub_msg()
        msg2 = mock_kernel[0].get_iopub_msg()
        msg3 = mock_kernel[0].get_iopub_msg()
        msg4 = mock_kernel[0].get_iopub_msg()

        # Process first state change message
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is False
        monitor.handle_message(msg1)
        assert monitor.kernel_status == 'busy'

        # Process input message
        monitor.handle_message(msg2)
        assert len(monitor.current_cell.code) > 0
        assert len(monitor.cell_data) == 0
        assert monitor.can_store_activity_record is False

        # Process output message
        monitor.handle_message(msg3)
        assert len(monitor.current_cell.result) > 0
        assert len(monitor.cell_data) == 0
        assert monitor.can_store_activity_record is False

        # Process final state change message
        monitor.handle_message(msg4)
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is True
        assert len(monitor.cell_data) == 1

        # Store the record manually for this test
        monitor.store_record(metadata)
        assert monitor.can_store_activity_record is False
        assert len(monitor.cell_data) == 0

        # Check lab book repo state
        status = mock_labbook[2].git.status()
        assert len(status["untracked"]) == 0
        assert len(status["staged"]) == 0
        assert len(status["unstaged"]) == 0

        # Check activity entry
        log = mock_labbook[2].git.log()
        assert len(log) == 4
        assert 'code/Test.ipynb' in log[0]['message']

        a_store = ActivityStore(mock_labbook[2])
        record = a_store.get_activity_record(log[0]['commit'])
        assert record.type == ActivityType.CODE
        assert record.show is True
        assert record.importance == 0
        assert not record.tags
        assert record.message == 'Executed cell in notebook code/Test.ipynb'
        assert len(record._detail_objects) == 256
        assert record.num_detail_objects == 256
        assert record._detail_objects[0][0] is True
        assert record._detail_objects[0][1] == ActivityDetailType.NOTE.value
        assert record._detail_objects[0][2] == 0
        assert record._detail_objects[1][0] is True
        assert record._detail_objects[1][1] == ActivityDetailType.RESULT.value
        assert record._detail_objects[1][2] == 155
        assert record._detail_objects[2][0] is False
        assert record._detail_objects[2][
            1] == ActivityDetailType.CODE_EXECUTED.value
        assert record._detail_objects[2][2] == 255
        assert record._detail_objects[3][0] is False
        assert record._detail_objects[3][
            1] == ActivityDetailType.OUTPUT_DATA.value
        assert record._detail_objects[3][2] == 255
        assert record._detail_objects[4][0] is False
        assert record._detail_objects[4][
            1] == ActivityDetailType.OUTPUT_DATA.value
        assert record._detail_objects[4][2] == 254
        assert record._detail_objects[48][0] is False
        assert record._detail_objects[48][
            1] == ActivityDetailType.OUTPUT_DATA.value
        assert record._detail_objects[48][2] == 210
        assert record._detail_objects[254][0] is False
        assert record._detail_objects[254][
            1] == ActivityDetailType.OUTPUT_DATA.value
        assert record._detail_objects[254][2] == 4
        assert record._detail_objects[255][0] is False
        assert record._detail_objects[255][
            1] == ActivityDetailType.OUTPUT_DATA.value
        assert record._detail_objects[255][2] == 3
Example #7
0
    def test_start_modify(self, redis_client, mock_labbook, mock_kernel):
        """Test processing notebook activity and have it modify an existing file & create some files"""
        dummy_file = os.path.join(mock_labbook[2].root_dir, 'code',
                                  'Test.ipynb')
        dummy_output = os.path.join(mock_labbook[2].root_dir, 'output',
                                    'result.bin')
        with open(dummy_file, 'wt') as tf:
            tf.write("Dummy file")

        monitor_key = "dev_env_monitor:{}:{}:{}:{}:activity_monitor:{}".format(
            'test', 'test', 'labbook1', 'jupyterlab-ubuntu1604', uuid.uuid4())

        monitor = JupyterLabNotebookMonitor("test",
                                            "test",
                                            mock_labbook[2].name,
                                            monitor_key,
                                            config_file=mock_labbook[0])

        # Setup monitoring metadata
        metadata = {
            "kernel_id": "XXXX",
            "kernel_name": 'python',
            "kernel_type": 'notebook',
            "path": 'code/Test.ipynb'
        }

        # Perform an action
        mock_kernel[0].execute("print('Hello, World')")

        # Check lab book repo state
        status = mock_labbook[2].git.status()
        assert len(status["untracked"]) == 1
        assert status["untracked"][0] == 'code/Test.ipynb'

        # Process messages
        msg1 = mock_kernel[0].get_iopub_msg()
        msg2 = mock_kernel[0].get_iopub_msg()
        msg3 = mock_kernel[0].get_iopub_msg()
        msg4 = mock_kernel[0].get_iopub_msg()

        # Process first state change message
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is False
        monitor.handle_message(msg1)
        assert monitor.kernel_status == 'busy'

        # Process input message
        monitor.handle_message(msg2)
        assert len(monitor.current_cell.code) > 0
        assert len(monitor.cell_data) == 0
        assert monitor.can_store_activity_record is False

        # Process output message
        monitor.handle_message(msg3)
        assert len(monitor.current_cell.result) > 0
        assert len(monitor.cell_data) == 0
        assert monitor.can_store_activity_record is False

        # Check lab book repo state
        status = mock_labbook[2].git.status()
        assert len(status["untracked"]) == 1
        assert status["untracked"][0] == 'code/Test.ipynb'

        # Process final state change message
        monitor.handle_message(msg4)
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is True
        assert len(monitor.cell_data) == 1

        # Store the record manually for this test
        monitor.store_record(metadata)
        assert monitor.can_store_activity_record is False
        assert len(monitor.cell_data) == 0

        # Check lab book repo state
        status = mock_labbook[2].git.status()
        assert len(status["untracked"]) == 0
        assert len(status["staged"]) == 0
        assert len(status["unstaged"]) == 0

        # Check activity entry
        log = mock_labbook[2].git.log()
        assert len(log) == 4
        assert 'code/Test.ipynb' in log[0]['message']

        # Mock Performing an action AGAIN, faking editing the file and generating some output files
        mock_kernel[0].execute("a=100\nprint('Hello, World 2')")
        with open(dummy_file, 'wt') as tf:
            tf.write("change the fake notebook")

        with open(dummy_output, 'wt') as tf:
            tf.write("some result data")
        # Process messages
        msg1 = mock_kernel[0].get_iopub_msg()
        msg2 = mock_kernel[0].get_iopub_msg()
        msg3 = mock_kernel[0].get_iopub_msg()
        msg4 = mock_kernel[0].get_iopub_msg()

        # Process first state change message
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is False
        monitor.handle_message(msg1)
        assert monitor.kernel_status == 'busy'

        # Process input message
        monitor.handle_message(msg2)
        assert len(monitor.current_cell.code) > 0
        assert len(monitor.cell_data) == 0
        assert monitor.can_store_activity_record is False

        # Process output message
        monitor.handle_message(msg3)
        assert len(monitor.current_cell.result) > 0
        assert len(monitor.cell_data) == 0
        assert monitor.can_store_activity_record is False

        # Check lab book repo state
        status = mock_labbook[2].git.status()
        assert len(status["staged"]) == 0
        assert len(status["untracked"]) == 1
        assert len(status["unstaged"]) == 1
        assert status["unstaged"][0][0] == 'code/Test.ipynb'
        assert status["unstaged"][0][1] == 'modified'

        # Process final state change message
        monitor.handle_message(msg4)
        assert monitor.kernel_status == 'idle'
        assert monitor.can_store_activity_record is True
        assert len(monitor.cell_data) == 1

        # Store the record manually for this test
        monitor.store_record(metadata)
        assert monitor.can_store_activity_record is False
        assert len(monitor.cell_data) == 0

        # Check lab book repo state
        status = mock_labbook[2].git.status()
        assert len(status["untracked"]) == 0
        assert len(status["staged"]) == 0
        assert len(status["unstaged"]) == 0

        # Check activity entry
        log = mock_labbook[2].git.log()
        assert len(log) == 6
        assert 'code/Test.ipynb' in log[0]['message']

        a_store = ActivityStore(mock_labbook[2])
        record = a_store.get_activity_record(log[0]['commit'])
        assert record.type == ActivityType.CODE
        assert record.show is True
        assert record.importance == 0
        assert not record.tags
        assert record.message == 'Executed cell in notebook code/Test.ipynb'
        assert len(record._detail_objects) == 4
        assert record._detail_objects[0][0] is True
        assert record._detail_objects[0][1] == ActivityDetailType.RESULT.value
        assert record._detail_objects[0][2] == 155
        assert record._detail_objects[1][0] is False
        assert record._detail_objects[1][1] == ActivityDetailType.CODE.value
        assert record._detail_objects[1][2] == 255
        assert record._detail_objects[2][0] is False
        assert record._detail_objects[2][
            1] == ActivityDetailType.CODE_EXECUTED.value
        assert record._detail_objects[2][2] == 255
        assert record._detail_objects[3][0] is False
        assert record._detail_objects[3][
            1] == ActivityDetailType.OUTPUT_DATA.value
        assert record._detail_objects[3][2] == 255

        detail = a_store.get_detail_record(record._detail_objects[3][3].key)
        assert len(detail.data) == 1
        assert detail.data[
            'text/markdown'] == 'Created new Output Data file `output/result.bin`'

        detail = a_store.get_detail_record(record._detail_objects[1][3].key)
        assert len(detail.data) == 1
        assert detail.data[
            'text/markdown'] == 'Modified Code file `code/Test.ipynb`'
    def change_base(self, repository: str, base_id: str,
                    revision: int) -> None:
        """Delete existing base, create an activity record, call add_base

        Note that all packages that were installed by the current base will be removed from the environment (in
        env/package_manager). Even if the new base installs a newer version of a user-installed package,
        that package will remain in effect - this avoids actively breaking a working package selection and is easy
        enough for a user to update.

        In case it's useful, this method is robust to multiple base images (this might happen, for example, after a
        merge). If multiple base image files are found, all will be removed prior to installing the specified base.

        Args:
             repository: name of git repo for base images, e.g. 'gigantum_base-images'
             base_id: name of base image, e.g. 'python3-minimal'
             revision: The revision number specified INSIDE the YAML file for that base image
        """
        # We'll populate detail records as we go
        detail_records: List[ActivityDetailRecord] = []

        current_base_dir = Path(self.env_dir) / "base"
        matching_fnames = list(current_base_dir.glob('*.yaml'))

        short_message = ''
        if len(matching_fnames) != 1:
            logger.warning(
                f"Project misconfigured. Found {len(matching_fnames)} base configuration files."
            )
            if len(matching_fnames) > 1:
                # We provide brief details regarding these files
                short_message = self.remove_all_bases(matching_fnames,
                                                      detail_records)
        else:
            # We have a properly configured Labbook, we'll report more detail about the base
            short_message = self.remove_base(matching_fnames[0],
                                             detail_records)
            logger.info(short_message)

        if short_message:
            # We did something above - we commit and create an activity record
            commit = self.labbook.git.commit(short_message)

            # Create activity record - we populated detail_records above
            ar = ActivityRecord(ActivityType.ENVIRONMENT,
                                message=short_message,
                                linked_commit=commit.hexsha,
                                tags=["environment", "base"],
                                show=True)

            for adr in detail_records:
                ar.add_detail_object(adr)

            # Store the activity record.
            ars = ActivityStore(self.labbook)
            ars.create_activity_record(ar)

        # We construct a list of packages with `from_base` == True for each package manager
        packages_to_rm: Dict[str, List[str]] = {}
        for package in self.get_component_list("package_manager"):
            # Build dictionary of packages
            if package['from_base']:
                # We are removing the base - so the package isn't guaranteed
                packages_to_rm.setdefault(package['manager'],
                                          []).append(package["package"])

        for p_manager, package_names in packages_to_rm.items():
            # Package removal will also create activity records
            self.remove_packages(p_manager,
                                 package_names,
                                 remove_from_base=True)

        # add_base currently returns None, but this will incorporate any future changes
        return self.add_base(repository, base_id, revision)
    def add_base(self, repository: str, base_id: str, revision: int) -> None:
        """Method to add a base to a LabBook's environment

        Note that if this is run after packages have been configured, it will leave user-specified packages alone (and
        not override them with the base-installed package) even if the base provides a newer version of the  package.

        Args:
            repository: The Environment Component repository the component is in
            base_id: The name of the component
            revision: The revision to use, specified *inside* yaml file.

        Returns:
            None
        """
        if not repository:
            raise ValueError('repository cannot be None or empty')

        if not base_id:
            raise ValueError('component cannot be None or empty')

        # Get the base
        base_data = self.bases.get_base(repository, base_id, revision)
        base_filename = f"{repository}_{base_id}.yaml"
        base_final_path = os.path.join(self.env_dir, 'base', base_filename)

        short_message = f"Added base: {base_id} r{revision}"
        # Count number of YAML files in our base dir - should be 0
        existing_bases = sum(
            1 for base_path in Path(self.env_dir, 'base').iterdir()
            if base_path.suffix == '.yaml')
        if existing_bases:
            # This shouldn't ever happen - but we don't trust the front-end
            raise ValueError(
                f"Found {existing_bases} base(s) already in this project")

        with open(base_final_path, 'wt') as cf:
            cf.write(yaml.safe_dump(base_data, default_flow_style=False))

        # We construct records of packages installed by the user grouped by package manager
        # This can happen, for example, when we're changing bases
        installed_packages: Dict[str, List[str]] = {}
        for package in self.get_component_list("package_manager"):
            if package['from_base']:
                # Packages from the base to be added are NOT yet installed, but there are package
                # files that are marked `from_base`. This should never happen!
                logger.warning(
                    'Residual packages remain that are listed as installed by base - converting to user'
                )
                self.add_packages(package_manager=package['manager'],
                                  packages=[package],
                                  force=True,
                                  from_base=False)

            # Build dictionary of packages
            installed_packages.setdefault(package['manager'],
                                          []).append(package["package"])

        for manager in base_data['package_managers']:
            packages = list()
            # Build dictionary of packages
            for p_manager in manager.keys():
                if manager[p_manager]:
                    for pkg in manager[p_manager]:
                        pkg_name, pkg_version = strip_package_and_version(
                            p_manager, pkg)
                        if pkg_name in installed_packages.get(p_manager, []):
                            # If package is already installed by this package manager, we expect it gets overwritten
                            # If it's a different package manger, it won't.
                            continue
                        packages.append({
                            "package": pkg_name,
                            "version": pkg_version,
                            "manager": p_manager
                        })

                    self.add_packages(package_manager=p_manager,
                                      packages=packages,
                                      force=True,
                                      from_base=True)

        self.labbook.git.add(base_final_path)
        commit = self.labbook.git.commit(short_message)
        logger.info(f"Added base from {repository}: {base_id} rev{revision}")

        # Create a ActivityRecord
        long_message = "\n".join(
            (f"Added base {base_id}\n", f"{base_data['description']}\n",
             f"  - repository: {repository}", f"  - component: {base_id}",
             f"  - revision: {revision}\n"))

        # Create detail record
        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', long_message)

        # Create activity record
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=short_message,
                            linked_commit=commit.hexsha,
                            tags=["environment", "base"],
                            show=True)
        ar.add_detail_object(adr)

        # Store
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
    def remove_packages(self,
                        package_manager: str,
                        package_names: List[str],
                        remove_from_base: bool = False) -> None:
        """Remove yaml files describing a package and its context to the labbook.

        Args:
            package_manager: The package manager (eg., "apt" or "pip3")
            package_names: A list of packages to uninstall
            remove_from_base: Usually we won't do this, specify `True` when you are changing out a base
        """
        # Create activity record
        ar = ActivityRecord(
            ActivityType.ENVIRONMENT,
            message="",
            show=True,
            linked_commit="",
            tags=["environment", 'package_manager', package_manager])

        for pkg in package_names:
            yaml_filename = '{}_{}.yaml'.format(package_manager, pkg)
            package_yaml_path = os.path.join(self.env_dir, 'package_manager',
                                             yaml_filename)

            # Check for package to exist
            if not os.path.exists(package_yaml_path):
                raise ValueError(
                    f"{package_manager} installed package {pkg} does not exist."
                )

            # Check to make sure package isn't from the base. You cannot remove packages from the base yet.
            with open(package_yaml_path, 'rt') as cf:
                package_data = yaml.safe_load(cf)

            if not package_data:
                raise IOError("Failed to load package description")

            if package_data['from_base'] is True and not remove_from_base:
                raise ValueError(
                    "Won't remove a package installed in the Base, without `remove_from_base=True`"
                )

            # Delete the yaml file, which on next Dockerfile gen/rebuild will remove the dependency
            os.remove(package_yaml_path)
            if os.path.exists(package_yaml_path):
                raise ValueError(f"Failed to remove package.")

            self.labbook.git.remove(package_yaml_path)

            # Create detail record
            adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                       show=False,
                                       action=ActivityAction.DELETE)
            adr.add_value('text/plain',
                          f"Removed {package_manager} managed package: {pkg}")
            ar.add_detail_object(adr)
            logger.info(f"Removed {package_manager} managed package: {pkg}")

        # Add to git
        short_message = f"Removed {len(package_names)} {package_manager} managed package(s)"
        commit = self.labbook.git.commit(short_message)
        ar.linked_commit = commit.hexsha
        ar.message = short_message

        # Store
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
    def add_packages(self,
                     package_manager: str,
                     packages: List[dict],
                     force: bool = False,
                     from_base: bool = False) -> None:
        """Add a new yaml file describing the new package and its context to the labbook.

        Args:
            package_manager: The package manager (eg., "apt" or "pip3")
            packages: A dictionary of packages to install (package & version are main keys needed)
            force: Force overwriting a component if it already exists (e.g. you want to update the version)
            from_base: If a package in a base image, not deletable. Otherwise, can be deleted by LB user.

        Returns:
            None
        """
        if not package_manager:
            raise ValueError(
                'Argument package_manager cannot be None or empty')

        # Create activity record
        ar = ActivityRecord(
            ActivityType.ENVIRONMENT,
            show=True,
            message="",
            linked_commit="",
            tags=["environment", 'package_manager', package_manager])

        update_cnt = 0
        add_cnt = 0
        for pkg in packages:
            version_str = f'"{pkg["version"]}"' if pkg["version"] else 'latest'

            yaml_lines = [
                f'# Generated on: {datetime.datetime.now()}',
                f'manager: "{package_manager}"',
                f'package: "{pkg["package"]}"', f'version: {version_str}',
                f'from_base: {str(from_base).lower()}',
                f'schema: {CURRENT_SCHEMA}'
            ]
            yaml_filename = f'{package_manager}_{pkg["package"]}.yaml'
            package_yaml_path = os.path.join(self.env_dir, 'package_manager',
                                             yaml_filename)

            # Check if package already exists
            if os.path.exists(package_yaml_path):
                if force:
                    # You are updating, since force is set and package already exists.
                    logger.warning("Updating package file at {}".format(
                        package_yaml_path))
                    detail_msg = "Update {} managed package: {} {}".format(
                        package_manager, pkg["package"], version_str)
                    adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                               show=False,
                                               action=ActivityAction.EDIT)
                    update_cnt += 1
                else:
                    raise ValueError(
                        "The package {} already exists in this LabBook.".
                        format(pkg["package"]) + " Use `force` to overwrite")
            else:
                add_cnt += 1
                detail_msg = "Add {} managed package: {} {}".format(
                    package_manager, pkg["package"], version_str)
                adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                           show=False,
                                           action=ActivityAction.CREATE)

            # Write the YAML to the file
            with open(package_yaml_path, 'w') as package_yaml_file:
                package_yaml_file.write(os.linesep.join(yaml_lines))

            # Create activity record
            adr.add_value('text/plain', detail_msg)
            ar.add_detail_object(adr)
            logger.info("Added package {} to labbook at {}".format(
                pkg["package"], self.labbook.root_dir))

        # Set activity message
        ar_msg = ""
        if add_cnt > 0:
            ar_msg = f"Added {add_cnt} {package_manager} package(s). "

        if update_cnt > 0:
            ar_msg = f"{ar_msg}Updated {update_cnt} {package_manager} package(s)"

        # Add to git
        self.labbook.git.add_all(self.env_dir)
        commit = self.labbook.git.commit(ar_msg)
        ar.linked_commit = commit.hexsha
        ar.message = ar_msg

        # Store
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)