Beispiel #1
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               name,
                               description,
                               repository,
                               base_id,
                               revision,
                               is_untracked=False,
                               client_mutation_id=None):
        username = get_logged_in_username()
        inv_manager = InventoryManager()
        if is_untracked:
            lb = inv_manager.create_labbook_disabled_lfs(
                username=username,
                owner=username,
                labbook_name=name,
                description=description,
                author=get_logged_in_author())
        else:
            lb = inv_manager.create_labbook(username=username,
                                            owner=username,
                                            labbook_name=name,
                                            description=description,
                                            author=get_logged_in_author())

        if is_untracked:
            FileOperations.set_untracked(lb, 'input')
            FileOperations.set_untracked(lb, 'output')
            input_set = FileOperations.is_set_untracked(lb, 'input')
            output_set = FileOperations.is_set_untracked(lb, 'output')
            if not (input_set and output_set):
                raise ValueError(
                    f'{str(lb)} untracking for input/output in malformed state'
                )
            if not lb.is_repo_clean:
                raise ValueError(
                    f'{str(lb)} should have clean Git state after setting for untracked'
                )

        adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                   show=False,
                                   importance=0)
        adr.add_value('text/plain', f"Created new LabBook: {username}/{name}")

        # Create activity record
        ar = ActivityRecord(ActivityType.LABBOOK,
                            message=f"Created new LabBook: {username}/{name}",
                            show=True,
                            importance=255,
                            linked_commit=lb.git.commit_hash)
        ar.add_detail_object(adr)

        store = ActivityStore(lb)
        store.create_activity_record(ar)

        cm = ComponentManager(lb)
        cm.add_base(repository, base_id, revision)

        return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
Beispiel #2
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               dataset_name,
                               description,
                               client_mutation_id=None):
        username = get_logged_in_username()
        ds = InventoryManager().load_dataset(username,
                                             owner,
                                             dataset_name,
                                             author=get_logged_in_author())
        ds.description = description
        with ds.lock():
            ds.git.add(os.path.join(ds.root_dir, '.gigantum/gigantum.yaml'))
            commit = ds.git.commit('Updating description')

            adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False)
            adr.add_value('text/plain',
                          f"Updated Dataset description: {description}")
            ar = ActivityRecord(ActivityType.LABBOOK,
                                message="Updated Dataset description",
                                linked_commit=commit.hexsha,
                                tags=["dataset"],
                                show=False)
            ar.add_detail_object(adr)
            ars = ActivityStore(ds)
            ars.create_activity_record(ar)
        return SetDatasetDescription(
            updated_dataset=Dataset(owner=owner, name=dataset_name))
Beispiel #3
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               labbook_name,
                               description_content,
                               client_mutation_id=None):
        username = get_logged_in_username()
        lb = InventoryManager().load_labbook(username,
                                             owner,
                                             labbook_name,
                                             author=get_logged_in_author())
        lb.description = description_content
        with lb.lock():
            lb.git.add(os.path.join(lb.config_path))
            commit = lb.git.commit('Updating description')

            adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False)
            adr.add_value('text/plain', "Updated description of Project")
            ar = ActivityRecord(ActivityType.LABBOOK,
                                message="Updated description of Project",
                                linked_commit=commit.hexsha,
                                tags=["labbook"],
                                show=False)
            ar.add_detail_object(adr)
            ars = ActivityStore(lb)
            ars.create_activity_record(ar)
        return SetLabbookDescription(success=True)
def migrate_labbook_schema(labbook: LabBook) -> None:
    # Fallback point in case of a problem
    initial_commit = labbook.git.commit_hash

    try:
        migrate_schema_to_current(labbook.root_dir)
    except Exception as e:
        logger.exception(e)
        call_subprocess(f'git reset --hard {initial_commit}'.split(),
                        cwd=labbook.root_dir)
        raise

    msg = f"Migrate schema to {CURRENT_LABBOOK_SCHEMA}"
    labbook.git.add(labbook.config_path)
    cmt = labbook.git.commit(msg,
                             author=labbook.author,
                             committer=labbook.author)
    adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                               show=True,
                               importance=100,
                               action=ActivityAction.EDIT)

    adr.add_value('text/plain', msg)
    ar = ActivityRecord(ActivityType.LABBOOK,
                        message=msg,
                        show=True,
                        importance=255,
                        linked_commit=cmt.hexsha,
                        tags=['schema', 'update', 'migration'])
    ar.add_detail_object(adr)
    ars = ActivityStore(labbook)
    ars.create_activity_record(ar)
    def remove_docker_snippet(self, name: str) -> None:
        """Remove a custom docker snippet

        Args:
            name: Name or identifer of snippet to remove

        Returns:
            None
        """
        docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env',
                                  'docker')
        docker_file = os.path.join(docker_dir, f'{name}.yaml')

        if not os.path.exists(docker_file):
            raise ValueError(f'Docker snippet name `{name}` does not exist')

        self.labbook.git.remove(docker_file, keep_file=False)
        short_message = f"Removed custom Docker snippet `{name}`"
        logger.info(short_message)
        commit = self.labbook.git.commit(short_message)
        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.DELETE)
        adr.add_value('text/plain', short_message)
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=short_message,
                            show=False,
                            linked_commit=commit.hexsha,
                            tags=["environment", "docker", "snippet"])
        ar.add_detail_object(adr)
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
Beispiel #6
0
    def backend_config(self, data: dict) -> None:
        """Save storage config data"""
        if self._backend:
            self._backend.configuration = {**self._backend.configuration, **data}

        # Remove defaults set at runtime that shouldn't be persisted
        if "username" in data:
            del data["username"]
        if "gigantum_bearer_token" in data:
            del data["gigantum_bearer_token"]
        if "gigantum_id_token" in data:
            del data["gigantum_id_token"]

        config_file = os.path.join(self.root_dir, ".gigantum", "backend.json")
        with open(config_file, 'wt') as sf:
            json.dump(data, sf, indent=2)

        self.git.add(config_file)
        cm = self.git.commit("Updating backend config")

        ar = ActivityRecord(ActivityType.DATASET,
                            message="Updated Dataset storage backend configuration",
                            show=True,
                            importance=255,
                            linked_commit=cm.hexsha,
                            tags=['config'])
        adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=255,
                                   action=ActivityAction.EDIT)
        d = json.dumps(data, indent=2)
        adr.add_value('text/markdown', f"Updated dataset storage backend configuration:\n\n ```{d}```")
        ar.add_detail_object(adr)
        ars = ActivityStore(self)
        ars.create_activity_record(ar)
Beispiel #7
0
    def remove_bundled_app(self, name: str) -> None:
        """Remove a bundled app from this labbook

        Args:
            name(str): name of the bundled app

        Returns:
            None
        """
        data = self.get_bundled_apps()
        if name not in data:
            raise ValueError(f"App {name} does not exist. Cannot remove.")

        del data[name]

        with open(self.bundled_app_file, 'wt') as baf:
            json.dump(data, baf)

        # Commit the changes
        self.labbook.git.add(self.bundled_app_file)
        commit = self.labbook.git.commit(f"Committing bundled app")

        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', f"Removed bundled application: {name}")
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=f"Removed bundled application: {name}",
                            show=True,
                            linked_commit=commit.hexsha,
                            tags=["environment", "docker", "bundled_app"])
        ar.add_detail_object(adr)
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
Beispiel #8
0
    def insert_file(cls,
                    labbook: LabBook,
                    section: str,
                    src_file: str,
                    dst_path: str = '') -> Dict[str, Any]:
        """ Move the file at `src_file` into the `dst_dir`, overwriting
        if a file already exists there. This calls `copy_into_container()` under-
        the-hood, but will create an activity record.

        Args:
            labbook: Subject labbook
            section: Section name (code, input, output)
            src_file: Full path of file to insert into
            dst_path: Relative path within labbook where `src_file`
                      should be copied to

        Returns:
            dict: The inserted file's info
        """

        finfo = FileOperations.put_file(labbook=labbook,
                                        section=section,
                                        src_file=src_file,
                                        dst_path=dst_path)

        rel_path = os.path.join(section, finfo['key'])

        # If we are setting this section to be untracked
        activity_type, activity_detail_type, section_str = \
            labbook.get_activity_type_from_section(section)

        commit_msg = f"Added new {section_str} file {rel_path}"
        try:
            labbook.git.add(rel_path)
            commit = labbook.git.commit(commit_msg)
        except Exception as x:
            logger.error(x)
            os.remove(dst_path)
            raise FileOperationsException(x)

        # Create Activity record and detail
        _, ext = os.path.splitext(rel_path) or 'file'
        adr = ActivityDetailRecord(activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', commit_msg)
        ar = ActivityRecord(activity_type,
                            message=commit_msg,
                            show=True,
                            importance=255,
                            linked_commit=commit.hexsha,
                            tags=[ext])
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)

        return finfo
    def add_docker_snippet(self,
                           name: str,
                           docker_content: List[str],
                           description: Optional[str] = None) -> None:
        """ Add a custom docker snippet to the environment (replacing custom dependency).

        Args:
            name: Name or identifier of the custom docker snippet
            docker_content: Content of the docker material (May make this a list of strings instead)
            description: Human-readable verbose description of what the snippet is intended to accomplish.

        Returns:
            None
        """

        if not name:
            raise ValueError('Argument `name` cannot be None or empty')

        if not name.replace('-', '').replace('_', '').isalnum():
            raise ValueError(
                'Argument `name` must be alphanumeric string (- and _ accepted)'
            )

        if not docker_content:
            docker_content = []

        file_data = {
            'name': name,
            'timestamp_utc': datetime.datetime.utcnow().isoformat(),
            'description': description or "",
            'content': docker_content
        }

        docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env',
                                  'docker')
        docker_file = os.path.join(docker_dir, f'{name}.yaml')
        os.makedirs(docker_dir, exist_ok=True)
        yaml_dump = yaml.safe_dump(file_data, default_flow_style=False)
        with open(docker_file, 'w') as df:
            df.write(yaml_dump)

        logger.info(
            f"Wrote custom Docker snippet `{name}` to {str(self.labbook)}")
        short_message = f"Wrote custom Docker snippet `{name}`"
        self.labbook.git.add(docker_file)
        commit = self.labbook.git.commit(short_message)
        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', '\n'.join(docker_content))
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=short_message,
                            show=True,
                            linked_commit=commit.hexsha,
                            tags=["environment", "docker", "snippet"])
        ar.add_detail_object(adr)
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
Beispiel #10
0
    def create_directory(self, path: str) -> Dict[str, Any]:
        """Method to create an empty directory in a dataset

        Args:
            path: Relative path to the directory

        Returns:
            dict
        """
        relative_path = self.dataset.make_path_relative(path)
        new_directory_path = os.path.join(self.cache_mgr.cache_root, self.dataset_revision, relative_path)

        previous_revision = self.dataset_revision

        if os.path.exists(new_directory_path):
            raise ValueError(f"Directory already exists: `{relative_path}`")
        else:
            logger.info(f"Creating new empty directory in `{new_directory_path}`")

            if os.path.isdir(Path(new_directory_path).parent) is False:
                raise ValueError(f"Parent directory does not exist. Failed to create `{new_directory_path}` ")

            # create dir
            os.makedirs(new_directory_path)
            self.update()
            if relative_path not in self.manifest:
                raise ValueError("Failed to add directory to manifest")

            # Create detail record
            adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=0,
                                       action=ActivityAction.CREATE)

            msg = f"Created new empty directory `{relative_path}`"
            adr.add_value('text/markdown', msg)

            commit = self.dataset.git.commit(msg)

            # Create activity record
            ar = ActivityRecord(ActivityType.DATASET,
                                message=msg,
                                linked_commit=commit.hexsha,
                                show=True,
                                importance=255,
                                tags=['directory-create'])
            ar.add_detail_object(adr)

            # Store
            ars = ActivityStore(self.dataset)
            ars.create_activity_record(ar)

            # Relink after the commit
            self.link_revision()
            if os.path.isdir(os.path.join(self.cache_mgr.cache_root, previous_revision)):
                shutil.rmtree(os.path.join(self.cache_mgr.cache_root, previous_revision))

            return self.gen_file_info(relative_path)
Beispiel #11
0
    def write_readme(self, contents: str) -> None:
        """Method to write a string to the readme file within the repository. Must write ENTIRE document at once.

        Args:
            contents(str): entire readme document in markdown format

        Returns:
            None
        """
        # Validate readme data
        if len(contents) > (1000000 * 5):
            raise ValueError("Readme file is larger than the 5MB limit")

        if type(contents) is not str:
            raise TypeError("Invalid content. Must provide string")

        readme_file = os.path.join(self.root_dir, 'README.md')
        readme_exists = os.path.exists(readme_file)

        # Write file to disk
        with open(readme_file, 'wt') as rf:
            rf.write(contents)

        # Create commit
        if readme_exists:
            commit_msg = f"Updated README file"
            action = ActivityAction.EDIT
        else:
            commit_msg = f"Added README file"
            action = ActivityAction.CREATE

        self.git.add(readme_file)
        commit = self.git.commit(commit_msg)

        # Create detail record
        adr = ActivityDetailRecord(self._default_activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=action)
        adr.add_value('text/plain', commit_msg)

        # Create activity record
        ar = ActivityRecord(self._default_activity_type,
                            message=commit_msg,
                            show=False,
                            importance=255,
                            linked_commit=commit.hexsha,
                            tags=['readme'])
        ar.add_detail_object(adr)

        # Store
        ars = ActivityStore(self)
        ars.create_activity_record(ar)
Beispiel #12
0
    def unlink_dataset_from_labbook(self, dataset_namespace: str,
                                    dataset_name: str,
                                    labbook: LabBook) -> None:
        """Method to removed a dataset reference from a labbook

        Args:
            dataset_namespace:
            dataset_name:
            labbook:

        Returns:

        """
        submodule_dir = os.path.join('.gigantum', 'datasets',
                                     dataset_namespace, dataset_name)
        call_subprocess(['git', 'rm', '-f', submodule_dir],
                        cwd=labbook.root_dir)

        git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules',
                                      f"{dataset_namespace}&{dataset_name}")
        if os.path.exists(git_module_dir):
            shutil.rmtree(git_module_dir)

        absolute_submodule_dir = os.path.join(labbook.root_dir, '.gigantum',
                                              'datasets', dataset_namespace,
                                              dataset_name)
        if os.path.exists(absolute_submodule_dir):
            shutil.rmtree(absolute_submodule_dir)

        labbook.git.add_all()
        commit = labbook.git.commit("removing submodule ref")

        # Add Activity Record
        adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                   show=False,
                                   action=ActivityAction.DELETE)
        adr.add_value(
            'text/markdown',
            f"Unlinked Dataset `{dataset_namespace}/{dataset_name}` from project"
        )
        ar = ActivityRecord(
            ActivityType.DATASET,
            message=
            f"Unlinked Dataset {dataset_namespace}/{dataset_name} from project.",
            linked_commit=commit.hexsha,
            tags=["dataset"],
            show=True)
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)
Beispiel #13
0
    def update_linked_dataset_reference(self, dataset_namespace: str,
                                        dataset_name: str,
                                        labbook: LabBook) -> Dataset:
        """Method to update a linked dataset reference to the latest revision

        Args:
            dataset_namespace: owner (namespace) of the dateset
            dataset_name: name of the dataset
            labbook: labbook instance to which the dataset is linked

        Returns:
            none1
        """
        # Load dataset from inside Project directory
        submodule_dir = os.path.join(labbook.root_dir, '.gigantum', 'datasets',
                                     dataset_namespace, dataset_name)
        ds = self.load_dataset_from_directory(submodule_dir,
                                              author=labbook.author)
        ds.namespace = dataset_namespace

        # Update the submodule reference with the latest changes
        original_revision = ds.git.repo.head.object.hexsha
        ds.git.pull()
        revision = ds.git.repo.head.object.hexsha

        # If the submodule has changed, commit the changes.
        if original_revision != revision:
            labbook.git.add_all()
            commit = labbook.git.commit("Updating submodule ref")

            # Add Activity Record
            adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                       show=False,
                                       action=ActivityAction.DELETE)
            adr.add_value(
                'text/markdown',
                f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to {revision}"
            )
            msg = f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to version {revision[0:8]}"
            ar = ActivityRecord(ActivityType.DATASET,
                                message=msg,
                                linked_commit=commit.hexsha,
                                tags=["dataset"],
                                show=True)
            ar.add_detail_object(adr)
            ars = ActivityStore(labbook)
            ars.create_activity_record(ar)

        return ds
Beispiel #14
0
    def _update_branch_description(cls, lb: LabBook, description: str):
        # Update the description on branch creation
        lb.description = description
        lb.git.add(lb.config_path)
        commit = lb.git.commit('Updating description')

        adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False)
        adr.add_value('text/plain', description)
        ar = ActivityRecord(ActivityType.LABBOOK,
                            message="Updated description of Project",
                            linked_commit=commit.hexsha,
                            tags=["labbook"],
                            show=False)
        ar.add_detail_object(adr)
        ars = ActivityStore(lb)
        ars.create_activity_record(ar)
Beispiel #15
0
    def sweep_uncommitted_changes(self,
                                  upload: bool = False,
                                  extra_msg: Optional[str] = None,
                                  show: bool = False) -> None:
        """ Sweep all changes into a commit, and create activity record.
            NOTE: This method MUST be called inside a lock.

        Args:
            upload(bool): Flag indicating if this was from a batch upload
            extra_msg(str): Optional string used to augment the activity message
            show(bool): Optional flag indicating if the result of this sweep is important enough to be shown in the feed

        Returns:

        """
        result_status = self.git.status()
        if any([result_status[k] for k in result_status.keys()]):
            self.git.add_all()
            self.git.commit("Sweep of uncommitted changes")

            ar = ActivityRecord(self._default_activity_type,
                                message="--overwritten--",
                                show=show,
                                importance=255,
                                linked_commit=self.git.commit_hash,
                                tags=['save'])
            if upload:
                ar.tags.append('upload')
            ar, newcnt, modcnt, delcnt = self.process_sweep_status(
                ar, result_status)
            nmsg = f"{newcnt} new file(s). " if newcnt > 0 else ""
            mmsg = f"{modcnt} modified file(s). " if modcnt > 0 else ""
            dmsg = f"{delcnt} deleted file(s). " if delcnt > 0 else ""

            message = f"{extra_msg or ''}" \
                      f"{'Uploaded ' if upload else ''}" \
                      f"{nmsg}{mmsg}{dmsg}"

            # This is used to handle if you try to delete an empty directory. This shouldn't technically happen, but if
            # a user manages to create an empty dir outside the client, we should handle it gracefully
            ar.message = "No detected changes" if not message else message
            ars = ActivityStore(self)
            ars.create_activity_record(ar)
        else:
            logger.info(f"{str(self)} no changes to sweep.")
Beispiel #16
0
 def _record_remove_activity(cls, secret_store, filename, lb):
     """Make an activity record for the removal of the secret. """
     lb.git.add(secret_store.secret_path)
     lb.git.commit("Removed entry from secrets registry.")
     commit = lb.git.commit_hash
     adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                show=True,
                                action=ActivityAction.DELETE)
     adr.add_value('text/markdown',
                   f"Removed entry for secrets file {filename}")
     ar = ActivityRecord(
         ActivityType.LABBOOK,
         message=f"Removed entry for secrets file {filename}",
         linked_commit=commit,
         tags=["labbook", "secrets"],
         show=True)
     ar.add_detail_object(adr)
     ars = ActivityStore(lb)
     ars.create_activity_record(ar)
Beispiel #17
0
 def _record_insert_activity(cls, secret_store, filename, lb, mount_path):
     """Make an activity record for the insertion of the secret. """
     lb.git.add(secret_store.secret_path)
     lb.git.commit("Updated secrets registry.")
     commit = lb.git.commit_hash
     adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                show=True,
                                action=ActivityAction.CREATE)
     adr.add_value(
         'text/markdown', f"Created new entry for secrets file {filename}"
         f"to map to {mount_path}")
     ar = ActivityRecord(
         ActivityType.LABBOOK,
         message=f"Created entry for secrets file {filename}",
         linked_commit=commit,
         tags=["labbook", "secrets"],
         show=True)
     ar.add_detail_object(adr)
     ars = ActivityStore(lb)
     ars.create_activity_record(ar)
Beispiel #18
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               name,
                               description,
                               repository,
                               base_id,
                               revision,
                               is_untracked=False,
                               client_mutation_id=None):
        username = get_logged_in_username()
        inv_manager = InventoryManager()
        lb = inv_manager.create_labbook(username=username,
                                        owner=username,
                                        labbook_name=name,
                                        description=description,
                                        author=get_logged_in_author())

        adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                   show=False,
                                   importance=0)
        adr.add_value('text/plain', f"Created new LabBook: {username}/{name}")

        # Create activity record
        ar = ActivityRecord(ActivityType.LABBOOK,
                            message=f"Created new LabBook: {username}/{name}",
                            show=True,
                            importance=255,
                            linked_commit=lb.git.commit_hash)
        ar.add_detail_object(adr)

        store = ActivityStore(lb)
        store.create_activity_record(ar)

        cm = ComponentManager(lb)
        cm.add_base(repository, base_id, revision)

        return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
Beispiel #19
0
    def _make_move_activity_record(cls, labbook: LabBook, section: str,
                                   dst_abs_path: str, commit_msg: str) -> None:
        if os.path.isdir(dst_abs_path):
            labbook.git.add_all(dst_abs_path)
        else:
            labbook.git.add(dst_abs_path)

        commit = labbook.git.commit(commit_msg)
        activity_type, activity_detail_type, section_str = labbook.get_activity_type_from_section(
            section)
        adr = ActivityDetailRecord(activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=ActivityAction.EDIT)
        adr.add_value('text/markdown', commit_msg)
        ar = ActivityRecord(activity_type,
                            message=commit_msg,
                            linked_commit=commit.hexsha,
                            show=True,
                            importance=255,
                            tags=['file-move'])
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)
Beispiel #20
0
    def _create_user_note(cls, lb, title, body, tags):
        store = ActivityStore(lb)
        adr = ActivityDetailRecord(ActivityDetailType.NOTE,
                                   show=True,
                                   importance=255)
        if body:
            adr.add_value('text/markdown', body)

        ar = ActivityRecord(ActivityType.NOTE,
                            message=title,
                            linked_commit="no-linked-commit",
                            importance=255,
                            tags=tags)
        ar.add_detail_object(adr)
        ar = store.create_activity_record(ar)
        return ar
Beispiel #21
0
    def create_dataset(self,
                       username: str,
                       owner: str,
                       dataset_name: str,
                       storage_type: str,
                       description: Optional[str] = None,
                       author: Optional[GitAuthor] = None) -> Dataset:
        """Create a new Dataset in this Gigantum working directory.

        Args:
            username: Active username
            owner: Namespace in which to place this Dataset
            dataset_name: Name of the Dataset
            storage_type: String identifying the type of Dataset to instantiate
            description: Optional brief description of Dataset
            author: Optional Git Author

        Returns:
            Newly created LabBook instance

        """
        dataset = Dataset(config_file=self.config_file,
                          author=author,
                          namespace=owner)

        if storage_type not in storage.SUPPORTED_STORAGE_BACKENDS:
            raise ValueError(
                f"Unsupported Dataset storage type: {storage_type}")

        try:
            build_info = Configuration(self.config_file).config['build_info']
        except KeyError:
            logger.warning("Could not obtain build_info from config")
            build_info = None

        # Build data file contents
        dataset._data = {
            "schema": DATASET_CURRENT_SCHEMA,
            "id": uuid.uuid4().hex,
            "name": dataset_name,
            "storage_type": storage_type,
            "description": description or '',
            "created_on": datetime.datetime.utcnow().isoformat(),
            "build_info": build_info
        }
        dataset._validate_gigantum_data()

        logger.info("Creating new Dataset on disk for {}/{}/{}".format(
            username, owner, dataset_name))
        # lock while creating initial directory
        with dataset.lock(
                lock_key=f"new_dataset_lock|{username}|{owner}|{dataset_name}"
        ):
            # Verify or Create user subdirectory
            # Make sure you expand a user dir string
            starting_dir = os.path.expanduser(
                dataset.client_config.config["git"]["working_directory"])
            user_dir = os.path.join(starting_dir, username)
            if not os.path.isdir(user_dir):
                os.makedirs(user_dir)

            # Create owner dir - store LabBooks in working dir > logged in user > owner
            owner_dir = os.path.join(user_dir, owner)
            if not os.path.isdir(owner_dir):
                os.makedirs(owner_dir)

                # Create `datasets` subdir in the owner dir
                owner_dir = os.path.join(owner_dir, "datasets")
            else:
                owner_dir = os.path.join(owner_dir, "datasets")

            # Verify name not already in use
            if os.path.isdir(os.path.join(owner_dir, dataset_name)):
                raise ValueError(
                    f"Dataset `{dataset_name}` already exists locally. Choose a new Dataset name"
                )

            # Create Dataset subdirectory
            new_root_dir = os.path.join(owner_dir, dataset_name)
            os.makedirs(new_root_dir)
            dataset._set_root_dir(new_root_dir)

            # Init repository
            dataset.git.initialize()

            # Create Directory Structure
            dirs = [
                'manifest', 'metadata', '.gigantum',
                os.path.join('.gigantum', 'activity'),
                os.path.join('.gigantum', 'activity', 'log')
            ]

            # Create .gitignore default file
            shutil.copyfile(
                os.path.join(resource_filename('gtmcore', 'dataset'),
                             'gitignore.default'),
                os.path.join(dataset.root_dir, ".gitignore"))

            for d in dirs:
                p = os.path.join(dataset.root_dir, d, '.gitkeep')
                os.makedirs(os.path.dirname(p), exist_ok=True)
                with open(p, 'w') as gk:
                    gk.write(
                        "This file is necessary to keep this directory tracked by Git"
                        " and archivable by compression tools. Do not delete or modify!"
                    )

            dataset._save_gigantum_data()

            # Create an empty storage.json file
            dataset.backend_config = {}

            # Commit
            dataset.git.add_all()

            # NOTE: this string is used to indicate there are no more activity records to get. Changing the string will
            # break activity paging.
            # TODO: Improve method for detecting the first activity record
            dataset.git.commit(f"Creating new empty Dataset: {dataset_name}")

            # Create Activity Record
            adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                       show=False,
                                       importance=0)
            adr.add_value('text/plain',
                          f"Created new Dataset: {username}/{dataset_name}")
            ar = ActivityRecord(
                ActivityType.DATASET,
                message=f"Created new Dataset: {username}/{dataset_name}",
                show=True,
                importance=255,
                linked_commit=dataset.git.commit_hash)
            ar.add_detail_object(adr)
            store = ActivityStore(dataset)
            store.create_activity_record(ar)

            # Initialize file cache and link revision
            m = Manifest(dataset, username)
            m.link_revision()

            return dataset
    def test_get_recent_activity(self, fixture_working_dir, snapshot,
                                 fixture_test_file):
        """Test paging through activity records"""
        im = InventoryManager(fixture_working_dir[0])
        lb = im.create_labbook("default",
                               "default",
                               "labbook11",
                               description="my test description",
                               author=GitAuthor(name="tester",
                                                email="*****@*****.**"))

        FileOperations.insert_file(lb, "code", fixture_test_file)

        # fake activity
        store = ActivityStore(lb)
        adr1 = ActivityDetailRecord(ActivityDetailType.CODE)
        adr1.show = False
        adr1.importance = 100
        adr1.add_value("text/plain", "first")

        ar = ActivityRecord(ActivityType.CODE,
                            show=False,
                            message="ran some code",
                            importance=50,
                            linked_commit="asdf")

        ar.add_detail_object(adr1)

        # Create Activity Record
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        open('/tmp/test_file.txt', 'w').write("xxx" * 50)
        FileOperations.insert_file(lb, "input", '/tmp/test_file.txt')
        FileOperations.makedir(lb, "input/test")
        open('/tmp/test_file.txt', 'w').write("xxx" * 50)
        FileOperations.insert_file(lb, "input", '/tmp/test_file.txt', "test")
        FileOperations.makedir(lb, "input/test2")
        open('/tmp/test_file.txt', 'w').write("xxx" * 50)
        FileOperations.insert_file(lb, "input", '/tmp/test_file.txt', "test2")
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        store.create_activity_record(ar)
        open('/tmp/test_file.txt', 'w').write("xxx" * 50)
        FileOperations.insert_file(lb, "output", '/tmp/test_file.txt')

        # Get all records at once with no pagination args and verify cursors look OK directly
        query = """
                    {
                      labbook(owner: "default", name: "labbook11") {
                        overview {
                          recentActivity {
                            message
                            type
                            show
                            importance
                            tags
                          }
                        }
                      }
                    }
                    """
        snapshot.assert_match(fixture_working_dir[2].execute(query))
    def remove_packages(self, package_manager: str,
                        package_names: List[str]) -> None:
        """Remove yaml files describing a package and its context to the labbook.

        Args:
            package_manager: The package manager (eg., "apt" or "pip3")
            package_names: A list of packages to uninstall

        Returns:
            None
        """
        # Create activity record
        ar = ActivityRecord(
            ActivityType.ENVIRONMENT,
            message="",
            show=True,
            linked_commit="",
            tags=["environment", 'package_manager', package_manager])

        for pkg in package_names:
            yaml_filename = '{}_{}.yaml'.format(package_manager, pkg)
            package_yaml_path = os.path.join(self.env_dir, 'package_manager',
                                             yaml_filename)

            # Check for package to exist
            if not os.path.exists(package_yaml_path):
                raise ValueError(
                    f"{package_manager} installed package {pkg} does not exist."
                )

            # Check to make sure package isn't from the base. You cannot remove packages from the base yet.
            with open(package_yaml_path, 'rt') as cf:
                package_data = yaml.safe_load(cf)

            if not package_data:
                raise IOError("Failed to load package description")

            if package_data['from_base'] is True:
                raise ValueError(
                    "Cannot remove a package installed in the Base")

            # Delete the yaml file, which on next Dockerfile gen/rebuild will remove the dependency
            os.remove(package_yaml_path)
            if os.path.exists(package_yaml_path):
                raise ValueError(f"Failed to remove package.")

            self.labbook.git.remove(package_yaml_path)

            # Create detail record
            adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                       show=False,
                                       action=ActivityAction.DELETE)
            adr.add_value('text/plain',
                          f"Removed {package_manager} managed package: {pkg}")
            ar.add_detail_object(adr)
            logger.info(f"Removed {package_manager} managed package: {pkg}")

        # Add to git
        short_message = f"Removed {len(package_names)} {package_manager} managed package(s)"
        commit = self.labbook.git.commit(short_message)
        ar.linked_commit = commit.hexsha
        ar.message = short_message

        # Store
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
Beispiel #24
0
    def create_update_activity_record(self, status: StatusResult, upload: bool = False, extra_msg: str = None) -> None:
        """

        Args:
            status(StatusResult): a StatusResult object after updating the manifest
            upload(bool): flag indicating if this is a record for an upload
            extra_msg(str): any extra string to add to the activity record

        Returns:
            None
        """
        def _item_type(key):
            if key[-1] == os.path.sep:
                return 'directory'
            else:
                return 'file'

        if len(status.deleted) > 0 or len(status.created) > 0 or len(status.modified) > 0:
            # commit changed manifest file
            self.dataset.git.add_all()
            self.dataset.git.commit("Commit changes to manifest file.")

            ar = ActivityRecord(ActivityType.DATASET,
                                message="msg is set below after detail record processing...",
                                show=True,
                                importance=255,
                                linked_commit=self.dataset.git.commit_hash,
                                tags=[])

            for cnt, f in enumerate(status.created):
                adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=max(255 - cnt, 0),
                                           action=ActivityAction.CREATE)

                msg = f"Created new {_item_type(f)} `{f}`"
                adr.add_value('text/markdown', msg)
                ar.add_detail_object(adr)

            for cnt, f in enumerate(status.modified):
                adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=max(255 - cnt, 0),
                                           action=ActivityAction.EDIT)

                msg = f"Modified {_item_type(f)} `{f}`"
                adr.add_value('text/markdown', msg)
                ar.add_detail_object(adr)

            for cnt, f in enumerate(status.deleted):
                adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=max(255 - cnt, 0),
                                           action=ActivityAction.DELETE)

                msg = f"Deleted {_item_type(f)} `{f}`"
                adr.add_value('text/markdown', msg)
                ar.add_detail_object(adr)

            num_files_created = sum([_item_type(x) == "file" for x in status.created])
            num_files_modified = sum([_item_type(x) == "file" for x in status.modified])
            num_files_deleted = sum([_item_type(x) == "file" for x in status.deleted])
            upload_str = "Uploaded" if upload else ''
            nmsg = f"{upload_str} {num_files_created} new file(s). " if num_files_created > 0 else ""
            mmsg = f"{upload_str} {num_files_modified} modified file(s). " if num_files_modified > 0 else ""
            dmsg = f"{num_files_deleted} deleted file(s). " if num_files_deleted > 0 else ""

            if not nmsg and not mmsg and not dmsg:
                # You didn't edit any files, only an empty directory
                num_dirs_created = sum([_item_type(x) == "directory" for x in status.created])
                num_dirs_modified = sum([_item_type(x) == "directory" for x in status.modified])
                num_dirs_deleted = sum([_item_type(x) == "directory" for x in status.deleted])
                nmsg = f"{num_dirs_created} new folder(s). " if num_dirs_created > 0 else ""
                mmsg = f"{num_dirs_modified} modified folder(s). " if num_dirs_modified > 0 else ""
                dmsg = f"{num_dirs_deleted} deleted folder(s). " if num_dirs_deleted > 0 else ""

            ar.message = f"{extra_msg if extra_msg else ''}" \
                         f"{nmsg}{mmsg}{dmsg}"

            ars = ActivityStore(self.dataset)
            ars.create_activity_record(ar)
Beispiel #25
0
    def link_dataset_to_labbook(self, dataset_url: str, dataset_namespace: str,
                                dataset_name: str,
                                labbook: LabBook) -> Dataset:
        """

        Args:
            dataset_url:
            dataset_namespace:
            dataset_name:
            labbook:

        Returns:

        """
        # add submodule and init
        submodules_root = os.path.join(labbook.root_dir, '.gigantum',
                                       'datasets')
        submodule_dir = os.path.join(submodules_root, dataset_namespace,
                                     dataset_name)
        if not os.path.exists(os.path.join(submodules_root,
                                           dataset_namespace)):
            pathlib.Path(os.path.join(submodules_root,
                                      dataset_namespace)).mkdir(parents=True,
                                                                exist_ok=True)

        subprocess.run([
            'git', 'submodule', 'add', '--name',
            f"{dataset_namespace}&{dataset_name}", dataset_url,
            os.path.join('.gigantum', 'datasets', dataset_namespace,
                         dataset_name)
        ],
                       check=True,
                       cwd=labbook.root_dir,
                       stderr=subprocess.STDOUT)

        labbook.git.add_all()
        commit = labbook.git.commit(
            f"adding submodule ref to link dataset {dataset_namespace}/{dataset_name}"
        )
        labbook.git.update_submodules(init=True)

        ds = self.load_dataset_from_directory(submodule_dir)
        dataset_revision = ds.git.repo.head.commit.hexsha

        # Add Activity Record
        adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value(
            'text/markdown',
            f"Linked Dataset `{dataset_namespace}/{dataset_name}` to "
            f"project at revision `{dataset_revision}`")
        ar = ActivityRecord(
            ActivityType.DATASET,
            message=
            f"Linked Dataset {dataset_namespace}/{dataset_name} to project.",
            linked_commit=commit.hexsha,
            tags=["dataset"],
            show=True)
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)

        return ds
Beispiel #26
0
    def makedir(cls,
                labbook: LabBook,
                relative_path: str,
                make_parents: bool = True,
                create_activity_record: bool = False) -> None:
        """Make a new directory inside the labbook directory.

        Args:
            labbook: Subject LabBook
            relative_path(str): Path within the labbook to make directory
            make_parents(bool): If true, create intermediary directories
            create_activity_record(bool): If true, create commit and activity record

        Returns:
            str: Absolute path of new directory
        """
        if not relative_path:
            raise ValueError("relative_path argument cannot be None or empty")

        relative_path = LabBook.make_path_relative(relative_path)
        new_directory_path = os.path.join(labbook.root_dir, relative_path)
        if os.path.exists(new_directory_path):
            return
        else:
            logger.info(f"Making new directory in `{new_directory_path}`")
            os.makedirs(new_directory_path, exist_ok=make_parents)
            new_dir = ''
            for d in relative_path.split(os.sep):
                new_dir = os.path.join(new_dir, d)
                full_new_dir = os.path.join(labbook.root_dir, new_dir)
                gitkeep_path = os.path.join(full_new_dir, '.gitkeep')
                if not os.path.exists(gitkeep_path):
                    with open(gitkeep_path, 'w') as gitkeep:
                        gitkeep.write(
                            "This file is necessary to keep this directory tracked by Git"
                            " and archivable by compression tools. Do not delete or modify!"
                        )
                    labbook.git.add(gitkeep_path)

            if create_activity_record:
                # Create detail record
                activity_type, activity_detail_type, section_str = labbook.infer_section_from_relative_path(
                    relative_path)
                adr = ActivityDetailRecord(activity_detail_type,
                                           show=False,
                                           importance=0,
                                           action=ActivityAction.CREATE)

                msg = f"Created new {section_str} directory `{relative_path}`"
                commit = labbook.git.commit(msg)
                adr.add_value('text/markdown', msg)

                # Create activity record
                ar = ActivityRecord(activity_type,
                                    message=msg,
                                    linked_commit=commit.hexsha,
                                    show=True,
                                    importance=255,
                                    tags=['directory-create'])
                ar.add_detail_object(adr)

                # Store
                ars = ActivityStore(labbook)
                ars.create_activity_record(ar)
Beispiel #27
0
class ActivityMonitor(metaclass=abc.ABCMeta):
    """Class to monitor a kernel/IDE for activity to be processed."""
    def __init__(self,
                 user: str,
                 owner: str,
                 labbook_name: str,
                 monitor_key: str,
                 config_file: str = None,
                 author_name: Optional[str] = None,
                 author_email: Optional[str] = None) -> None:
        """Constructor requires info to load the lab book

        Args:
            user(str): current logged in user
            owner(str): owner of the lab book
            labbook_name(str): name of the lab book
            monitor_key(str): Unique key for the activity monitor in redis
            author_name(str): Name of the user starting this activity monitor
            author_email(str): Email of the user starting this activity monitor
        """
        self.monitor_key = monitor_key

        # List of processor classes that will be invoked in order
        self.processors: List[ActivityProcessor] = []

        # Populate GitAuthor instance if available
        if author_name:
            author: Optional[GitAuthor] = GitAuthor(name=author_name,
                                                    email=author_email)
        else:
            author = None

        # Load Lab Book instance
        im = InventoryManager(config_file)
        self.labbook = im.load_labbook(user,
                                       owner,
                                       labbook_name,
                                       author=author)
        self.user = user
        self.owner = owner
        self.labbook_name = labbook_name

        # Create ActivityStore instance
        self.activity_store = ActivityStore(self.labbook)

        # A flag indicating if the activity record is OK to store
        self.can_store_activity_record = False

    def add_processor(self, processor_instance: ActivityProcessor) -> None:
        """

        Args:
            processor_instance(ActivityProcessor): A processor class to add to the pipeline

        Returns:
            None
        """
        self.processors.append(processor_instance)

    def commit_file(self, filename: str) -> str:
        """Method to commit changes to a file

        Args:
            filename(str): file to commit

        Returns:
            str
        """
        self.labbook.git.add(filename)
        commit = self.labbook.git.commit(
            "Auto-commit from activity monitoring")
        return commit.hexsha

    def commit_labbook(self) -> str:
        """Method to commit changes to the entire labbook

        Returns:
            str
        """
        self.labbook.git.add_all()
        commit = self.labbook.git.commit(
            "Auto-commit from activity monitoring")
        return commit.hexsha

    def store_activity_record(
            self, linked_commit: str,
            activity_record: ActivityRecord) -> Optional[str]:
        """Method to commit changes to a file

        Args:
            linked_commit(str): Git commit this ActivityRecord is related to
            activity_record(ActivityNote): The populated ActivityNote object returned by the processing pipeline

        Returns:
            str
        """
        activity_record.linked_commit = linked_commit

        # Create a activity record
        record = self.activity_store.create_activity_record(activity_record)

        return record.commit

    def process(self, activity_type: ActivityType, data: List[ExecutionData],
                metadata: Dict[str, Any]) -> ActivityRecord:
        """Method to update the result ActivityRecord object based on code and result data

        Args:
            activity_type(ActivityType): A ActivityType object indicating the activity type
            data(list): A list of ExecutionData instances containing the data for this record
            metadata(str): A dictionary containing Dev Env specific or other developer defined data

        Returns:
            ActivityRecord
        """
        # Initialize empty record
        activity_record = ActivityRecord(activity_type=activity_type)

        # Get git status for tracking file changes
        status = self.labbook.git.status()

        # Run processors to populate the record
        for p in self.processors:
            activity_record = p.process(activity_record, data, status,
                                        metadata)

        return activity_record

    def get_container_ip(self) -> Optional[str]:
        """Method to get the monitored lab book container's IP address on the Docker bridge network

        Returns:
            str
        """
        client = get_docker_client()
        lb_key = infer_docker_image_name(self.labbook_name, self.owner,
                                         self.user)
        container = client.containers.get(lb_key)
        ip = container.attrs['NetworkSettings']['Networks']['bridge'][
            'IPAddress']
        logger.info("container {} IP: {}".format(container.name, ip))

        return ip

    def set_busy_state(self, is_busy: bool) -> None:
        """Method to set the busy state of the dev env being monitored. If busy, some actions (e.g. auto-save hooks)
        may be disabled depending on the dev env. This method sets or deletes a key in redis that other processes can
        check

        Args:
            is_busy(bool): True if busy, false if idle

        Returns:

        """
        try:
            client = redis.StrictRedis(db=1)
            key = f"{self.labbook.key}&is-busy&{self.monitor_key}"
            if is_busy:
                client.set(key, True)
            else:
                client.delete(key)
        except Exception as err:
            # This should never stop more important operations
            logger.warning(
                f"An error occurred while setting the monitor busy state for {str(self.labbook )}: {err}"
            )

    def start(self, data: Dict[str, Any]) -> None:
        """Method called in a long running scheduled async worker that should monitor for activity, committing files
        and creating notes as needed.

        Args:
            data(dict): A dictionary of data to start the activity monitor

        Returns:
            None
        """
        raise NotImplemented
Beispiel #28
0
    def link_dataset_to_labbook(self, dataset_url: str, dataset_namespace: str,
                                dataset_name: str,
                                labbook: LabBook) -> Dataset:
        """

        Args:
            dataset_url:
            dataset_namespace:
            dataset_name:
            labbook:

        Returns:

        """
        def _clean_submodule():
            """Helper method to clean a submodule reference from a repository"""
            if os.path.exists(absolute_submodule_dir):
                logger.warning(
                    f"Cleaning {relative_submodule_dir} from parent git repo")
                try:
                    call_subprocess([
                        'git', 'rm', '-f', '--cached', relative_submodule_dir
                    ],
                                    cwd=labbook.root_dir)
                except subprocess.CalledProcessError:
                    logger.warning(
                        f"git rm on {relative_submodule_dir} failed. Continuing..."
                    )
                    pass

            if os.path.exists(absolute_submodule_dir):
                logger.warning(f"Removing {absolute_submodule_dir} directory")
                shutil.rmtree(absolute_submodule_dir)

            if os.path.exists(git_module_dir):
                logger.warning(f"Removing {git_module_dir} directory")
                shutil.rmtree(git_module_dir)

        relative_submodule_dir = os.path.join('.gigantum', 'datasets',
                                              dataset_namespace, dataset_name)
        absolute_submodule_dir = os.path.join(labbook.root_dir,
                                              relative_submodule_dir)
        absolute_submodule_root = os.path.join(labbook.root_dir, '.gigantum',
                                               'datasets', dataset_namespace)
        git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules',
                                      f"{dataset_namespace}&{dataset_name}")

        if not os.path.exists(absolute_submodule_root):
            pathlib.Path(absolute_submodule_root).mkdir(parents=True,
                                                        exist_ok=True)

        if os.path.exists(absolute_submodule_dir) and os.path.exists(
                git_module_dir):
            # Seem to be trying to link a dataset after a reset removed the dataset. Clean up first.
            _clean_submodule()

        try:
            # Link dataset via submodule reference
            call_subprocess([
                'git', 'submodule', 'add', '--name',
                f"{dataset_namespace}&{dataset_name}", dataset_url,
                relative_submodule_dir
            ],
                            cwd=labbook.root_dir)

        except subprocess.CalledProcessError:
            logger.warning(
                "Failed to link dataset. Attempting to repair repository and link again."
            )
            _clean_submodule()

            # Try to add again 1 more time, allowing a failure to raise an exception
            call_subprocess([
                'git', 'submodule', 'add', '--name',
                f"{dataset_namespace}&{dataset_name}", dataset_url,
                relative_submodule_dir
            ],
                            cwd=labbook.root_dir)

            # If you got here, repair worked and link OK
            logger.info("Repository repair and linking retry successful.")

        labbook.git.add_all()
        commit = labbook.git.commit(
            f"adding submodule ref to link dataset {dataset_namespace}/{dataset_name}"
        )
        labbook.git.update_submodules(init=True)

        ds = self.load_dataset_from_directory(absolute_submodule_dir)
        dataset_revision = ds.git.repo.head.commit.hexsha

        # Add Activity Record
        adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value(
            'text/markdown',
            f"Linked Dataset `{dataset_namespace}/{dataset_name}` to "
            f"project at revision `{dataset_revision}`")
        ar = ActivityRecord(
            ActivityType.DATASET,
            message=
            f"Linked Dataset {dataset_namespace}/{dataset_name} to project.",
            linked_commit=commit.hexsha,
            tags=["dataset"],
            show=True)
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)

        return ds
    def add_base(self, repository: str, base_id: str, revision: int) -> None:
        """Method to add a base to a LabBook's environment

        Args:
            repository(str): The Environment Component repository the component is in
            base_id(str): The name of the component
            revision(int): The revision to use (r_<revision_) in yaml filename.

        Returns:
            None
        """
        if not repository:
            raise ValueError('repository cannot be None or empty')

        if not base_id:
            raise ValueError('component cannot be None or empty')

        # Get the base
        base_data = self.bases.get_base(repository, base_id, revision)
        base_filename = "{}_{}.yaml".format(repository, base_id, revision)
        base_final_path = os.path.join(self.env_dir, 'base', base_filename)

        short_message = "Added base: {}".format(base_id)
        if os.path.exists(base_final_path):
            raise ValueError("The base {} already exists in this project")

        with open(base_final_path, 'wt') as cf:
            cf.write(yaml.safe_dump(base_data, default_flow_style=False))

        for manager in base_data['package_managers']:
            packages = list()
            # Build dictionary of packages
            for p_manager in manager.keys():
                if manager[p_manager]:
                    for pkg in manager[p_manager]:
                        pkg_name, pkg_version = strip_package_and_version(
                            p_manager, pkg)
                        packages.append({
                            "package": pkg_name,
                            "version": pkg_version,
                            "manager": p_manager
                        })

                    self.add_packages(package_manager=p_manager,
                                      packages=packages,
                                      force=True,
                                      from_base=True)

        self.labbook.git.add(base_final_path)
        commit = self.labbook.git.commit(short_message)
        logger.info(f"Added base from {repository}: {base_id} rev{revision}")

        # Create a ActivityRecord
        long_message = "Added base {}\n".format(base_id)
        long_message = "{}\n{}\n\n".format(long_message,
                                           base_data['description'])
        long_message = "{}  - repository: {}\n".format(long_message,
                                                       repository)
        long_message = "{}  - component: {}\n".format(long_message, base_id)
        long_message = "{}  - revision: {}\n".format(long_message, revision)

        # Create detail record
        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', long_message)

        # Create activity record
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=short_message,
                            linked_commit=commit.hexsha,
                            tags=["environment", "base"],
                            show=True)
        ar.add_detail_object(adr)

        # Store
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
Beispiel #30
0
    def sweep_all_changes(self,
                          upload: bool = False,
                          extra_msg: str = None) -> None:
        """

        Args:
            upload:
            extra_msg:

        Returns:

        """
        def _item_type(key):
            if key[-1] == os.path.sep:
                return 'directory'
            else:
                return 'file'

        previous_revision = self.dataset_revision

        # Update manifest
        status = self.update()

        if len(status.deleted) > 0 or len(status.created) > 0 or len(
                status.modified) > 0:
            # commit changed manifest file
            self.dataset.git.add_all()
            self.dataset.git.commit("Commit changes to manifest file.")

            ar = ActivityRecord(
                ActivityType.DATASET,
                message="msg is set below after detail record processing...",
                show=True,
                importance=255,
                linked_commit=self.dataset.git.commit_hash,
                tags=[])
            if upload:
                ar.tags.append('upload')

            for cnt, f in enumerate(status.created):
                adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                           show=False,
                                           importance=max(255 - cnt, 0),
                                           action=ActivityAction.CREATE)

                msg = f"Created new {_item_type(f)} `{f}`"
                adr.add_value('text/markdown', msg)
                ar.add_detail_object(adr)

            for cnt, f in enumerate(status.modified):
                adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                           show=False,
                                           importance=max(255 - cnt, 0),
                                           action=ActivityAction.EDIT)

                msg = f"Modified {_item_type(f)} `{f}`"
                adr.add_value('text/markdown', msg)
                ar.add_detail_object(adr)

            for cnt, f in enumerate(status.deleted):
                adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                           show=False,
                                           importance=max(255 - cnt, 0),
                                           action=ActivityAction.DELETE)

                msg = f"Deleted {_item_type(f)} `{f}`"
                adr.add_value('text/markdown', msg)
                ar.add_detail_object(adr)

            nmsg = f"{len(status.created)} new file(s). " if len(
                status.created) > 0 else ""
            mmsg = f"{len(status.modified)} modified file(s). " if len(
                status.modified) > 0 else ""
            dmsg = f"{len(status.deleted)} deleted file(s). " if len(
                status.deleted) > 0 else ""

            ar.message = f"{extra_msg if extra_msg else ''}" \
                         f"{'Uploaded ' if upload else ''}" \
                         f"{nmsg}{mmsg}{dmsg}"

            ars = ActivityStore(self.dataset)
            ars.create_activity_record(ar)

        # Re-link new revision, unlink old revision
        self.link_revision()
        if os.path.isdir(
                os.path.join(self.cache_mgr.cache_root, previous_revision)):
            shutil.rmtree(
                os.path.join(self.cache_mgr.cache_root, previous_revision))