Ejemplo n.º 1
0
    def test_code_and_image(self, redis_client, mock_labbook):
        """Test reading a log and storing a record"""

        # create a server monitor
        server_monitor = RStudioServerMonitor("test",
                                              "test",
                                              mock_labbook[2].name,
                                              "foo:activity_monitor:52f5a3a9",
                                              config_file=mock_labbook[0])

        mitmlog = open(
            f"{os.path.dirname(os.path.realpath(__file__))}/52f5a3a9.rserver.dump",
            "rb")

        # Read activity and return an aggregated activity record
        server_monitor.process_activity(mitmlog)
        # call processor
        server_monitor.store_record()

        a_store = ActivityStore(mock_labbook[2])
        ars = a_store.get_activity_records()

        # details object [x][3] gets the x^th object
        code_dict = a_store.get_detail_record(
            ars[0]._detail_objects[1][3].key).data

        # check the code results
        assert (code_dict['text/markdown'][101:109] == 'y("knitr')

        # check part of an image
        imgdata = a_store.get_detail_record(
            ars[1]._detail_objects[1][3].key).data['image/png'][0:20]
        assert (imgdata == '/9j/4AAQSkZJRgABAQAA')
Ejemplo n.º 2
0
    def test_multiplecells(self, redis_client, mock_labbook):
        """Make sure that RStudio detects and splits cells"""

        server_monitor = RStudioServerMonitor("test",
                                              "test",
                                              mock_labbook[2].name,
                                              "foo:activity_monitor:73467b78",
                                              config_file=mock_labbook[0])

        mitmlog = open(
            f"{os.path.dirname(os.path.realpath(__file__))}/73467b78.rserver.dump",
            "rb")

        # Read activity and return an aggregated activity record
        server_monitor.process_activity(mitmlog)
        # call processor
        server_monitor.store_record()

        a_store = ActivityStore(mock_labbook[2])
        ars = a_store.get_activity_records()

        # details object [x][3] gets the x^th object
        cell_1 = a_store.get_detail_record(
            ars[0]._detail_objects[2][3].key).data
        cell_2 = a_store.get_detail_record(
            ars[0]._detail_objects[3][3].key).data

        # if the cells were divided, there will be two records
        assert (cell_1['text/plain'][55:58] == 'pop')
        assert (cell_2['text/plain'][200:204] == 'stan')
Ejemplo n.º 3
0
def migrate_labbook_schema(labbook: LabBook) -> None:
    # Fallback point in case of a problem
    initial_commit = labbook.git.commit_hash

    try:
        migrate_schema_to_current(labbook.root_dir)
    except Exception as e:
        logger.exception(e)
        call_subprocess(f'git reset --hard {initial_commit}'.split(),
                        cwd=labbook.root_dir)
        raise

    msg = f"Migrate schema to {CURRENT_LABBOOK_SCHEMA}"
    labbook.git.add(labbook.config_path)
    cmt = labbook.git.commit(msg,
                             author=labbook.author,
                             committer=labbook.author)
    adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                               show=True,
                               importance=100,
                               action=ActivityAction.EDIT)

    adr.add_value('text/plain', msg)
    ar = ActivityRecord(ActivityType.LABBOOK,
                        message=msg,
                        show=True,
                        importance=255,
                        linked_commit=cmt.hexsha,
                        tags=['schema', 'update', 'migration'])
    ar.add_detail_object(adr)
    ars = ActivityStore(labbook)
    ars.create_activity_record(ar)
Ejemplo n.º 4
0
    def remove_docker_snippet(self, name: str) -> None:
        """Remove a custom docker snippet

        Args:
            name: Name or identifer of snippet to remove

        Returns:
            None
        """
        docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env',
                                  'docker')
        docker_file = os.path.join(docker_dir, f'{name}.yaml')

        if not os.path.exists(docker_file):
            raise ValueError(f'Docker snippet name `{name}` does not exist')

        self.labbook.git.remove(docker_file, keep_file=False)
        short_message = f"Removed custom Docker snippet `{name}`"
        logger.info(short_message)
        commit = self.labbook.git.commit(short_message)
        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.DELETE)
        adr.add_value('text/plain', short_message)
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=short_message,
                            show=False,
                            linked_commit=commit.hexsha,
                            tags=["environment", "docker", "snippet"])
        ar.add_detail_object(adr)
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
Ejemplo n.º 5
0
    def _load_activity_record(self, info):
        """Private method to load an activity record if it has not been previously loaded and set"""
        if not self._activity_record:
            # Load record from database
            if not self.commit:
                raise ValueError(
                    "Must set `commit` on object creation to resolve detail record"
                )

            # Load store instance
            lb = self._get_loader(info).load(
                f"{get_logged_in_username()}&{self.owner}&{self.name}").get()
            store = ActivityStore(lb)

            # Retrieve record
            self._activity_record = store.get_activity_record(self.commit)

        # Set class properties
        self.linked_commit = self._activity_record.linked_commit
        self.message = self._activity_record.message
        self.type = ActivityRecordTypeEnum.get(
            self._activity_record.type.value).value
        self.show = self._activity_record.show
        self.tags = self._activity_record.tags
        self.timestamp = self._activity_record.timestamp
        self.importance = self._activity_record.importance
        self.username = self._activity_record.username
        self.email = self._activity_record.email
Ejemplo n.º 6
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               labbook_name,
                               description_content,
                               client_mutation_id=None):
        username = get_logged_in_username()
        lb = InventoryManager().load_labbook(username,
                                             owner,
                                             labbook_name,
                                             author=get_logged_in_author())
        lb.description = description_content
        with lb.lock():
            lb.git.add(os.path.join(lb.config_path))
            commit = lb.git.commit('Updating description')

            adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False)
            adr.add_value('text/plain', "Updated description of Project")
            ar = ActivityRecord(ActivityType.LABBOOK,
                                message="Updated description of Project",
                                linked_commit=commit.hexsha,
                                tags=["labbook"],
                                show=False)
            ar.add_detail_object(adr)
            ars = ActivityStore(lb)
            ars.create_activity_record(ar)
        return SetLabbookDescription(success=True)
Ejemplo n.º 7
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               owner,
                               dataset_name,
                               description,
                               client_mutation_id=None):
        username = get_logged_in_username()
        ds = InventoryManager().load_dataset(username,
                                             owner,
                                             dataset_name,
                                             author=get_logged_in_author())
        ds.description = description
        with ds.lock():
            ds.git.add(os.path.join(ds.root_dir, '.gigantum/gigantum.yaml'))
            commit = ds.git.commit('Updating description')

            adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False)
            adr.add_value('text/plain',
                          f"Updated Dataset description: {description}")
            ar = ActivityRecord(ActivityType.LABBOOK,
                                message="Updated Dataset description",
                                linked_commit=commit.hexsha,
                                tags=["dataset"],
                                show=False)
            ar.add_detail_object(adr)
            ars = ActivityStore(ds)
            ars.create_activity_record(ar)
        return SetDatasetDescription(
            updated_dataset=Dataset(owner=owner, name=dataset_name))
Ejemplo n.º 8
0
    def remove_bundled_app(self, name: str) -> None:
        """Remove a bundled app from this labbook

        Args:
            name(str): name of the bundled app

        Returns:
            None
        """
        data = self.get_bundled_apps()
        if name not in data:
            raise ValueError(f"App {name} does not exist. Cannot remove.")

        del data[name]

        with open(self.bundled_app_file, 'wt') as baf:
            json.dump(data, baf)

        # Commit the changes
        self.labbook.git.add(self.bundled_app_file)
        commit = self.labbook.git.commit(f"Committing bundled app")

        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', f"Removed bundled application: {name}")
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=f"Removed bundled application: {name}",
                            show=True,
                            linked_commit=commit.hexsha,
                            tags=["environment", "docker", "bundled_app"])
        ar.add_detail_object(adr)
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
Ejemplo n.º 9
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               name,
                               description,
                               repository,
                               base_id,
                               revision,
                               is_untracked=False,
                               client_mutation_id=None):
        username = get_logged_in_username()
        inv_manager = InventoryManager()
        if is_untracked:
            lb = inv_manager.create_labbook_disabled_lfs(
                username=username,
                owner=username,
                labbook_name=name,
                description=description,
                author=get_logged_in_author())
        else:
            lb = inv_manager.create_labbook(username=username,
                                            owner=username,
                                            labbook_name=name,
                                            description=description,
                                            author=get_logged_in_author())

        if is_untracked:
            FileOperations.set_untracked(lb, 'input')
            FileOperations.set_untracked(lb, 'output')
            input_set = FileOperations.is_set_untracked(lb, 'input')
            output_set = FileOperations.is_set_untracked(lb, 'output')
            if not (input_set and output_set):
                raise ValueError(
                    f'{str(lb)} untracking for input/output in malformed state'
                )
            if not lb.is_repo_clean:
                raise ValueError(
                    f'{str(lb)} should have clean Git state after setting for untracked'
                )

        adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                   show=False,
                                   importance=0)
        adr.add_value('text/plain', f"Created new LabBook: {username}/{name}")

        # Create activity record
        ar = ActivityRecord(ActivityType.LABBOOK,
                            message=f"Created new LabBook: {username}/{name}",
                            show=True,
                            importance=255,
                            linked_commit=lb.git.commit_hash)
        ar.add_detail_object(adr)

        store = ActivityStore(lb)
        store.create_activity_record(ar)

        cm = ComponentManager(lb)
        cm.add_base(repository, base_id, revision)

        return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
Ejemplo n.º 10
0
    def help_resolve_recent_activity(self, labbook):
        """Method to create 4 activity records with show=True"""
        # Create instance of ActivityStore for this LabBook
        store = ActivityStore(labbook)

        records = list()
        # Get 4 records with show=True
        after = None
        while len(records) < 4:
            items = store.get_activity_records(first=4, after=after)

            if not items:
                # if no more items, continue
                break

            for item in items:
                if item.show is True and item.num_detail_objects > 0:
                    ar = ActivityRecordObject(
                        id=f"labbook&{self.owner}&{self.name}&{item.commit}",
                        owner=self.owner,
                        name=self.name,
                        _repository_type='labbook',
                        commit=item.commit,
                        _activity_record=item)
                    records.append(ar)
                    if len(records) >= 4:
                        break

                # Set after cursor to last commit
                after = item.commit

        return records
Ejemplo n.º 11
0
    def _load_detail_record(self, info):
        """Private method to load a detail record if it has not been previously loaded and set"""
        if not self._detail_record:
            # Load record from database
            if not self.key:
                raise ValueError(
                    "Must set `key` on object creation to resolve detail record"
                )

            # Load store instance
            lb = self._get_loader(info).load(
                f"{get_logged_in_username()}&{self.owner}&{self.name}").get()
            store = ActivityStore(lb)

            # Retrieve record
            self._detail_record: ActivityDetailRecord = store.get_detail_record(
                self.key)

        # Set class properties
        self.type = ActivityDetailTypeEnum.get(
            self._detail_record.type.value).value
        self.show = self._detail_record.show
        self.tags = self._detail_record.tags
        self.importance = self._detail_record.importance
        self.action = ActivityActionTypeEnum.get(
            self._detail_record.action.value).value
Ejemplo n.º 12
0
    def backend_config(self, data: dict) -> None:
        """Save storage config data"""
        if self._backend:
            self._backend.configuration = {**self._backend.configuration, **data}

        # Remove defaults set at runtime that shouldn't be persisted
        if "username" in data:
            del data["username"]
        if "gigantum_bearer_token" in data:
            del data["gigantum_bearer_token"]
        if "gigantum_id_token" in data:
            del data["gigantum_id_token"]

        config_file = os.path.join(self.root_dir, ".gigantum", "backend.json")
        with open(config_file, 'wt') as sf:
            json.dump(data, sf, indent=2)

        self.git.add(config_file)
        cm = self.git.commit("Updating backend config")

        ar = ActivityRecord(ActivityType.DATASET,
                            message="Updated Dataset storage backend configuration",
                            show=True,
                            importance=255,
                            linked_commit=cm.hexsha,
                            tags=['config'])
        adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=255,
                                   action=ActivityAction.EDIT)
        d = json.dumps(data, indent=2)
        adr.add_value('text/markdown', f"Updated dataset storage backend configuration:\n\n ```{d}```")
        ar.add_detail_object(adr)
        ars = ActivityStore(self)
        ars.create_activity_record(ar)
Ejemplo n.º 13
0
    def add_docker_snippet(self,
                           name: str,
                           docker_content: List[str],
                           description: Optional[str] = None) -> None:
        """ Add a custom docker snippet to the environment (replacing custom dependency).

        Args:
            name: Name or identifier of the custom docker snippet
            docker_content: Content of the docker material (May make this a list of strings instead)
            description: Human-readable verbose description of what the snippet is intended to accomplish.

        Returns:
            None
        """

        if not name:
            raise ValueError('Argument `name` cannot be None or empty')

        if not name.replace('-', '').replace('_', '').isalnum():
            raise ValueError(
                'Argument `name` must be alphanumeric string (- and _ accepted)'
            )

        if not docker_content:
            docker_content = []

        file_data = {
            'name': name,
            'timestamp_utc': datetime.datetime.utcnow().isoformat(),
            'description': description or "",
            'content': docker_content
        }

        docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env',
                                  'docker')
        docker_file = os.path.join(docker_dir, f'{name}.yaml')
        os.makedirs(docker_dir, exist_ok=True)
        yaml_dump = yaml.safe_dump(file_data, default_flow_style=False)
        with open(docker_file, 'w') as df:
            df.write(yaml_dump)

        logger.info(
            f"Wrote custom Docker snippet `{name}` to {str(self.labbook)}")
        short_message = f"Wrote custom Docker snippet `{name}`"
        self.labbook.git.add(docker_file)
        commit = self.labbook.git.commit(short_message)
        adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', '\n'.join(docker_content))
        ar = ActivityRecord(ActivityType.ENVIRONMENT,
                            message=short_message,
                            show=True,
                            linked_commit=commit.hexsha,
                            tags=["environment", "docker", "snippet"])
        ar.add_detail_object(adr)
        ars = ActivityStore(self.labbook)
        ars.create_activity_record(ar)
Ejemplo n.º 14
0
    def insert_file(cls,
                    labbook: LabBook,
                    section: str,
                    src_file: str,
                    dst_path: str = '') -> Dict[str, Any]:
        """ Move the file at `src_file` into the `dst_dir`, overwriting
        if a file already exists there. This calls `copy_into_container()` under-
        the-hood, but will create an activity record.

        Args:
            labbook: Subject labbook
            section: Section name (code, input, output)
            src_file: Full path of file to insert into
            dst_path: Relative path within labbook where `src_file`
                      should be copied to

        Returns:
            dict: The inserted file's info
        """

        finfo = FileOperations.put_file(labbook=labbook,
                                        section=section,
                                        src_file=src_file,
                                        dst_path=dst_path)

        rel_path = os.path.join(section, finfo['key'])

        # If we are setting this section to be untracked
        activity_type, activity_detail_type, section_str = \
            labbook.get_activity_type_from_section(section)

        commit_msg = f"Added new {section_str} file {rel_path}"
        try:
            labbook.git.add(rel_path)
            commit = labbook.git.commit(commit_msg)
        except Exception as x:
            logger.error(x)
            os.remove(dst_path)
            raise FileOperationsException(x)

        # Create Activity record and detail
        _, ext = os.path.splitext(rel_path) or 'file'
        adr = ActivityDetailRecord(activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=ActivityAction.CREATE)
        adr.add_value('text/plain', commit_msg)
        ar = ActivityRecord(activity_type,
                            message=commit_msg,
                            show=True,
                            importance=255,
                            linked_commit=commit.hexsha,
                            tags=[ext])
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)

        return finfo
Ejemplo n.º 15
0
    def create_directory(self, path: str) -> Dict[str, Any]:
        """Method to create an empty directory in a dataset

        Args:
            path: Relative path to the directory

        Returns:
            dict
        """
        relative_path = self.dataset.make_path_relative(path)
        new_directory_path = os.path.join(self.cache_mgr.cache_root, self.dataset_revision, relative_path)

        previous_revision = self.dataset_revision

        if os.path.exists(new_directory_path):
            raise ValueError(f"Directory already exists: `{relative_path}`")
        else:
            logger.info(f"Creating new empty directory in `{new_directory_path}`")

            if os.path.isdir(Path(new_directory_path).parent) is False:
                raise ValueError(f"Parent directory does not exist. Failed to create `{new_directory_path}` ")

            # create dir
            os.makedirs(new_directory_path)
            self.update()
            if relative_path not in self.manifest:
                raise ValueError("Failed to add directory to manifest")

            # Create detail record
            adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=0,
                                       action=ActivityAction.CREATE)

            msg = f"Created new empty directory `{relative_path}`"
            adr.add_value('text/markdown', msg)

            commit = self.dataset.git.commit(msg)

            # Create activity record
            ar = ActivityRecord(ActivityType.DATASET,
                                message=msg,
                                linked_commit=commit.hexsha,
                                show=True,
                                importance=255,
                                tags=['directory-create'])
            ar.add_detail_object(adr)

            # Store
            ars = ActivityStore(self.dataset)
            ars.create_activity_record(ar)

            # Relink after the commit
            self.link_revision()
            if os.path.isdir(os.path.join(self.cache_mgr.cache_root, previous_revision)):
                shutil.rmtree(os.path.join(self.cache_mgr.cache_root, previous_revision))

            return self.gen_file_info(relative_path)
Ejemplo n.º 16
0
    def helper_resolve_activity_records(self, dataset, kwargs):
        """Helper method to generate ActivityRecord objects and populate the connection"""
        # Create instance of ActivityStore for this dataset
        store = ActivityStore(dataset)

        if kwargs.get('before') or kwargs.get('last'):
            raise ValueError(
                "Only `after` and `first` arguments are supported when paging activity records"
            )

        # Get edges and cursors
        edges = store.get_activity_records(after=kwargs.get('after'),
                                           first=kwargs.get('first'))
        if edges:
            cursors = [x.commit for x in edges]
        else:
            cursors = []

        # Get ActivityRecordObject instances
        edge_objs = []
        for edge, cursor in zip(edges, cursors):
            edge_objs.append(
                ActivityConnection.Edge(node=ActivityRecordObject(
                    id=f"dataset&{self.owner}&{self.name}&{edge.commit}",
                    owner=self.owner,
                    name=self.name,
                    _repository_type='dataset',
                    commit=edge.commit,
                    _activity_record=edge),
                                        cursor=cursor))

        # Create page info based on first commit. Since only paging backwards right now, just check for commit
        if edges:
            has_next_page = True

            # Get the message of the linked commit and check if it is the non-activity record dataset creation commit
            if len(edges) > 1:
                if edges[-2].linked_commit != "no-linked-commit":
                    linked_msg = dataset.git.log_entry(
                        edges[-2].linked_commit)['message']
                    if linked_msg == f"Creating new empty Dataset: {dataset.name}" and "_GTM_ACTIVITY_" not in linked_msg:
                        # if you get here, this is the first activity record
                        has_next_page = False

            end_cursor = cursors[-1]
        else:
            has_next_page = False
            end_cursor = None

        page_info = graphene.relay.PageInfo(has_next_page=has_next_page,
                                            has_previous_page=False,
                                            end_cursor=end_cursor)

        return ActivityConnection(edges=edge_objs, page_info=page_info)
Ejemplo n.º 17
0
    def write_readme(self, contents: str) -> None:
        """Method to write a string to the readme file within the repository. Must write ENTIRE document at once.

        Args:
            contents(str): entire readme document in markdown format

        Returns:
            None
        """
        # Validate readme data
        if len(contents) > (1000000 * 5):
            raise ValueError("Readme file is larger than the 5MB limit")

        if type(contents) is not str:
            raise TypeError("Invalid content. Must provide string")

        readme_file = os.path.join(self.root_dir, 'README.md')
        readme_exists = os.path.exists(readme_file)

        # Write file to disk
        with open(readme_file, 'wt') as rf:
            rf.write(contents)

        # Create commit
        if readme_exists:
            commit_msg = f"Updated README file"
            action = ActivityAction.EDIT
        else:
            commit_msg = f"Added README file"
            action = ActivityAction.CREATE

        self.git.add(readme_file)
        commit = self.git.commit(commit_msg)

        # Create detail record
        adr = ActivityDetailRecord(self._default_activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=action)
        adr.add_value('text/plain', commit_msg)

        # Create activity record
        ar = ActivityRecord(self._default_activity_type,
                            message=commit_msg,
                            show=False,
                            importance=255,
                            linked_commit=commit.hexsha,
                            tags=['readme'])
        ar.add_detail_object(adr)

        # Store
        ars = ActivityStore(self)
        ars.create_activity_record(ar)
Ejemplo n.º 18
0
    def unlink_dataset_from_labbook(self, dataset_namespace: str,
                                    dataset_name: str,
                                    labbook: LabBook) -> None:
        """Method to removed a dataset reference from a labbook

        Args:
            dataset_namespace:
            dataset_name:
            labbook:

        Returns:

        """
        submodule_dir = os.path.join('.gigantum', 'datasets',
                                     dataset_namespace, dataset_name)
        call_subprocess(['git', 'rm', '-f', submodule_dir],
                        cwd=labbook.root_dir)

        git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules',
                                      f"{dataset_namespace}&{dataset_name}")
        if os.path.exists(git_module_dir):
            shutil.rmtree(git_module_dir)

        absolute_submodule_dir = os.path.join(labbook.root_dir, '.gigantum',
                                              'datasets', dataset_namespace,
                                              dataset_name)
        if os.path.exists(absolute_submodule_dir):
            shutil.rmtree(absolute_submodule_dir)

        labbook.git.add_all()
        commit = labbook.git.commit("removing submodule ref")

        # Add Activity Record
        adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                   show=False,
                                   action=ActivityAction.DELETE)
        adr.add_value(
            'text/markdown',
            f"Unlinked Dataset `{dataset_namespace}/{dataset_name}` from project"
        )
        ar = ActivityRecord(
            ActivityType.DATASET,
            message=
            f"Unlinked Dataset {dataset_namespace}/{dataset_name} from project.",
            linked_commit=commit.hexsha,
            tags=["dataset"],
            show=True)
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)
Ejemplo n.º 19
0
    def update_linked_dataset_reference(self, dataset_namespace: str,
                                        dataset_name: str,
                                        labbook: LabBook) -> Dataset:
        """Method to update a linked dataset reference to the latest revision

        Args:
            dataset_namespace: owner (namespace) of the dateset
            dataset_name: name of the dataset
            labbook: labbook instance to which the dataset is linked

        Returns:
            none1
        """
        # Load dataset from inside Project directory
        submodule_dir = os.path.join(labbook.root_dir, '.gigantum', 'datasets',
                                     dataset_namespace, dataset_name)
        ds = self.load_dataset_from_directory(submodule_dir,
                                              author=labbook.author)
        ds.namespace = dataset_namespace

        # Update the submodule reference with the latest changes
        original_revision = ds.git.repo.head.object.hexsha
        ds.git.pull()
        revision = ds.git.repo.head.object.hexsha

        # If the submodule has changed, commit the changes.
        if original_revision != revision:
            labbook.git.add_all()
            commit = labbook.git.commit("Updating submodule ref")

            # Add Activity Record
            adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                       show=False,
                                       action=ActivityAction.DELETE)
            adr.add_value(
                'text/markdown',
                f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to {revision}"
            )
            msg = f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to version {revision[0:8]}"
            ar = ActivityRecord(ActivityType.DATASET,
                                message=msg,
                                linked_commit=commit.hexsha,
                                tags=["dataset"],
                                show=True)
            ar.add_detail_object(adr)
            ars = ActivityStore(labbook)
            ars.create_activity_record(ar)

        return ds
Ejemplo n.º 20
0
    def _create_user_note(cls, lb, title, body, tags):
        store = ActivityStore(lb)
        adr = ActivityDetailRecord(ActivityDetailType.NOTE,
                                   show=True,
                                   importance=255)
        if body:
            adr.add_value('text/markdown', body)

        ar = ActivityRecord(ActivityType.NOTE,
                            message=title,
                            linked_commit="no-linked-commit",
                            importance=255,
                            tags=tags)
        ar.add_detail_object(adr)
        ar = store.create_activity_record(ar)
        return ar
Ejemplo n.º 21
0
    def _update_branch_description(cls, lb: LabBook, description: str):
        # Update the description on branch creation
        lb.description = description
        lb.git.add(lb.config_path)
        commit = lb.git.commit('Updating description')

        adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False)
        adr.add_value('text/plain', description)
        ar = ActivityRecord(ActivityType.LABBOOK,
                            message="Updated description of Project",
                            linked_commit=commit.hexsha,
                            tags=["labbook"],
                            show=False)
        ar.add_detail_object(adr)
        ars = ActivityStore(lb)
        ars.create_activity_record(ar)
Ejemplo n.º 22
0
    def __init__(self,
                 user: str,
                 owner: str,
                 labbook_name: str,
                 monitor_key: str,
                 config_file: str = None,
                 author_name: Optional[str] = None,
                 author_email: Optional[str] = None) -> None:
        """Constructor requires info to load the lab book

        Args:
            user(str): current logged in user
            owner(str): owner of the lab book
            labbook_name(str): name of the lab book
            monitor_key(str): Unique key for the activity monitor in redis
            author_name(str): Name of the user starting this activity monitor
            author_email(str): Email of the user starting this activity monitor
        """
        self.monitor_key = monitor_key

        # List of processor classes that will be invoked in order
        self.processors: List[ActivityProcessor] = []

        # Populate GitAuthor instance if available
        if author_name:
            author: Optional[GitAuthor] = GitAuthor(name=author_name,
                                                    email=author_email)
        else:
            author = None

        # Load Lab Book instance
        im = InventoryManager(config_file)
        self.labbook = im.load_labbook(user,
                                       owner,
                                       labbook_name,
                                       author=author)
        self.user = user
        self.owner = owner
        self.labbook_name = labbook_name

        # Create ActivityStore instance
        self.activity_store = ActivityStore(self.labbook)

        # A flag indicating if the activity record is OK to store
        self.can_store_activity_record = False
Ejemplo n.º 23
0
    def sweep_uncommitted_changes(self,
                                  upload: bool = False,
                                  extra_msg: Optional[str] = None,
                                  show: bool = False) -> None:
        """ Sweep all changes into a commit, and create activity record.
            NOTE: This method MUST be called inside a lock.

        Args:
            upload(bool): Flag indicating if this was from a batch upload
            extra_msg(str): Optional string used to augment the activity message
            show(bool): Optional flag indicating if the result of this sweep is important enough to be shown in the feed

        Returns:

        """
        result_status = self.git.status()
        if any([result_status[k] for k in result_status.keys()]):
            self.git.add_all()
            self.git.commit("Sweep of uncommitted changes")

            ar = ActivityRecord(self._default_activity_type,
                                message="--overwritten--",
                                show=show,
                                importance=255,
                                linked_commit=self.git.commit_hash,
                                tags=['save'])
            if upload:
                ar.tags.append('upload')
            ar, newcnt, modcnt, delcnt = self.process_sweep_status(
                ar, result_status)
            nmsg = f"{newcnt} new file(s). " if newcnt > 0 else ""
            mmsg = f"{modcnt} modified file(s). " if modcnt > 0 else ""
            dmsg = f"{delcnt} deleted file(s). " if delcnt > 0 else ""

            message = f"{extra_msg or ''}" \
                      f"{'Uploaded ' if upload else ''}" \
                      f"{nmsg}{mmsg}{dmsg}"

            # This is used to handle if you try to delete an empty directory. This shouldn't technically happen, but if
            # a user manages to create an empty dir outside the client, we should handle it gracefully
            ar.message = "No detected changes" if not message else message
            ars = ActivityStore(self)
            ars.create_activity_record(ar)
        else:
            logger.info(f"{str(self)} no changes to sweep.")
Ejemplo n.º 24
0
 def _record_remove_activity(cls, secret_store, filename, lb):
     """Make an activity record for the removal of the secret. """
     lb.git.add(secret_store.secret_path)
     lb.git.commit("Removed entry from secrets registry.")
     commit = lb.git.commit_hash
     adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                show=True,
                                action=ActivityAction.DELETE)
     adr.add_value('text/markdown',
                   f"Removed entry for secrets file {filename}")
     ar = ActivityRecord(
         ActivityType.LABBOOK,
         message=f"Removed entry for secrets file {filename}",
         linked_commit=commit,
         tags=["labbook", "secrets"],
         show=True)
     ar.add_detail_object(adr)
     ars = ActivityStore(lb)
     ars.create_activity_record(ar)
Ejemplo n.º 25
0
 def _record_insert_activity(cls, secret_store, filename, lb, mount_path):
     """Make an activity record for the insertion of the secret. """
     lb.git.add(secret_store.secret_path)
     lb.git.commit("Updated secrets registry.")
     commit = lb.git.commit_hash
     adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                show=True,
                                action=ActivityAction.CREATE)
     adr.add_value(
         'text/markdown', f"Created new entry for secrets file {filename}"
         f"to map to {mount_path}")
     ar = ActivityRecord(
         ActivityType.LABBOOK,
         message=f"Created entry for secrets file {filename}",
         linked_commit=commit,
         tags=["labbook", "secrets"],
         show=True)
     ar.add_detail_object(adr)
     ars = ActivityStore(lb)
     ars.create_activity_record(ar)
Ejemplo n.º 26
0
    def mutate_and_get_payload(cls,
                               root,
                               info,
                               name,
                               description,
                               repository,
                               base_id,
                               revision,
                               is_untracked=False,
                               client_mutation_id=None):
        username = get_logged_in_username()
        inv_manager = InventoryManager()
        lb = inv_manager.create_labbook(username=username,
                                        owner=username,
                                        labbook_name=name,
                                        description=description,
                                        author=get_logged_in_author())

        adr = ActivityDetailRecord(ActivityDetailType.LABBOOK,
                                   show=False,
                                   importance=0)
        adr.add_value('text/plain', f"Created new LabBook: {username}/{name}")

        # Create activity record
        ar = ActivityRecord(ActivityType.LABBOOK,
                            message=f"Created new LabBook: {username}/{name}",
                            show=True,
                            importance=255,
                            linked_commit=lb.git.commit_hash)
        ar.add_detail_object(adr)

        store = ActivityStore(lb)
        store.create_activity_record(ar)

        cm = ComponentManager(lb)
        cm.add_base(repository, base_id, revision)

        return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
Ejemplo n.º 27
0
    def _make_move_activity_record(cls, labbook: LabBook, section: str,
                                   dst_abs_path: str, commit_msg: str) -> None:
        if os.path.isdir(dst_abs_path):
            labbook.git.add_all(dst_abs_path)
        else:
            labbook.git.add(dst_abs_path)

        commit = labbook.git.commit(commit_msg)
        activity_type, activity_detail_type, section_str = labbook.get_activity_type_from_section(
            section)
        adr = ActivityDetailRecord(activity_detail_type,
                                   show=False,
                                   importance=0,
                                   action=ActivityAction.EDIT)
        adr.add_value('text/markdown', commit_msg)
        ar = ActivityRecord(activity_type,
                            message=commit_msg,
                            linked_commit=commit.hexsha,
                            show=True,
                            importance=255,
                            tags=['file-move'])
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)
Ejemplo n.º 28
0
def mock_config_with_activitystore():
    """A pytest fixture that creates a ActivityStore (and labbook) and deletes directory after test"""
    # Create a temporary working directory
    conf_file, working_dir = _create_temp_work_dir()
    im = InventoryManager(conf_file)
    lb = im.create_labbook('default',
                           'default',
                           'labbook1',
                           description="my first labbook",
                           author=GitAuthor("default", "*****@*****.**"))
    store = ActivityStore(lb)

    yield store, lb

    # Remove the temp_dir
    shutil.rmtree(working_dir)
Ejemplo n.º 29
0
    def link_dataset_to_labbook(self, dataset_url: str, dataset_namespace: str,
                                dataset_name: str,
                                labbook: LabBook) -> Dataset:
        """

        Args:
            dataset_url:
            dataset_namespace:
            dataset_name:
            labbook:

        Returns:

        """
        def _clean_submodule():
            """Helper method to clean a submodule reference from a repository"""
            if os.path.exists(absolute_submodule_dir):
                logger.warning(
                    f"Cleaning {relative_submodule_dir} from parent git repo")
                try:
                    call_subprocess([
                        'git', 'rm', '-f', '--cached', relative_submodule_dir
                    ],
                                    cwd=labbook.root_dir)
                except subprocess.CalledProcessError:
                    logger.warning(
                        f"git rm on {relative_submodule_dir} failed. Continuing..."
                    )
                    pass

            if os.path.exists(absolute_submodule_dir):
                logger.warning(f"Removing {absolute_submodule_dir} directory")
                shutil.rmtree(absolute_submodule_dir)

            if os.path.exists(git_module_dir):
                logger.warning(f"Removing {git_module_dir} directory")
                shutil.rmtree(git_module_dir)

        relative_submodule_dir = os.path.join('.gigantum', 'datasets',
                                              dataset_namespace, dataset_name)
        absolute_submodule_dir = os.path.join(labbook.root_dir,
                                              relative_submodule_dir)
        absolute_submodule_root = os.path.join(labbook.root_dir, '.gigantum',
                                               'datasets', dataset_namespace)
        git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules',
                                      f"{dataset_namespace}&{dataset_name}")

        if not os.path.exists(absolute_submodule_root):
            pathlib.Path(absolute_submodule_root).mkdir(parents=True,
                                                        exist_ok=True)

        if os.path.exists(absolute_submodule_dir) and os.path.exists(
                git_module_dir):
            # Seem to be trying to link a dataset after a reset removed the dataset. Clean up first.
            _clean_submodule()

        try:
            # Link dataset via submodule reference
            call_subprocess([
                'git', 'submodule', 'add', '--name',
                f"{dataset_namespace}&{dataset_name}", dataset_url,
                relative_submodule_dir
            ],
                            cwd=labbook.root_dir)

        except subprocess.CalledProcessError:
            logger.warning(
                "Failed to link dataset. Attempting to repair repository and link again."
            )
            _clean_submodule()

            # Try to add again 1 more time, allowing a failure to raise an exception
            call_subprocess([
                'git', 'submodule', 'add', '--name',
                f"{dataset_namespace}&{dataset_name}", dataset_url,
                relative_submodule_dir
            ],
                            cwd=labbook.root_dir)

            # If you got here, repair worked and link OK
            logger.info("Repository repair and linking retry successful.")

        labbook.git.add_all()
        commit = labbook.git.commit(
            f"adding submodule ref to link dataset {dataset_namespace}/{dataset_name}"
        )
        labbook.git.update_submodules(init=True)

        ds = self.load_dataset_from_directory(absolute_submodule_dir)
        dataset_revision = ds.git.repo.head.commit.hexsha

        # Add Activity Record
        adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                   show=False,
                                   action=ActivityAction.CREATE)
        adr.add_value(
            'text/markdown',
            f"Linked Dataset `{dataset_namespace}/{dataset_name}` to "
            f"project at revision `{dataset_revision}`")
        ar = ActivityRecord(
            ActivityType.DATASET,
            message=
            f"Linked Dataset {dataset_namespace}/{dataset_name} to project.",
            linked_commit=commit.hexsha,
            tags=["dataset"],
            show=True)
        ar.add_detail_object(adr)
        ars = ActivityStore(labbook)
        ars.create_activity_record(ar)

        return ds
Ejemplo n.º 30
0
    def create_dataset(self,
                       username: str,
                       owner: str,
                       dataset_name: str,
                       storage_type: str,
                       description: Optional[str] = None,
                       author: Optional[GitAuthor] = None) -> Dataset:
        """Create a new Dataset in this Gigantum working directory.

        Args:
            username: Active username
            owner: Namespace in which to place this Dataset
            dataset_name: Name of the Dataset
            storage_type: String identifying the type of Dataset to instantiate
            description: Optional brief description of Dataset
            author: Optional Git Author

        Returns:
            Newly created LabBook instance

        """
        dataset = Dataset(config_file=self.config_file,
                          author=author,
                          namespace=owner)

        if storage_type not in storage.SUPPORTED_STORAGE_BACKENDS:
            raise ValueError(
                f"Unsupported Dataset storage type: {storage_type}")

        try:
            build_info = Configuration(self.config_file).config['build_info']
        except KeyError:
            logger.warning("Could not obtain build_info from config")
            build_info = None

        # Build data file contents
        dataset._data = {
            "schema": DATASET_CURRENT_SCHEMA,
            "id": uuid.uuid4().hex,
            "name": dataset_name,
            "storage_type": storage_type,
            "description": description or '',
            "created_on": datetime.datetime.utcnow().isoformat(),
            "build_info": build_info
        }
        dataset._validate_gigantum_data()

        logger.info("Creating new Dataset on disk for {}/{}/{}".format(
            username, owner, dataset_name))
        # lock while creating initial directory
        with dataset.lock(
                lock_key=f"new_dataset_lock|{username}|{owner}|{dataset_name}"
        ):
            # Verify or Create user subdirectory
            # Make sure you expand a user dir string
            starting_dir = os.path.expanduser(
                dataset.client_config.config["git"]["working_directory"])
            user_dir = os.path.join(starting_dir, username)
            if not os.path.isdir(user_dir):
                os.makedirs(user_dir)

            # Create owner dir - store LabBooks in working dir > logged in user > owner
            owner_dir = os.path.join(user_dir, owner)
            if not os.path.isdir(owner_dir):
                os.makedirs(owner_dir)

                # Create `datasets` subdir in the owner dir
                owner_dir = os.path.join(owner_dir, "datasets")
            else:
                owner_dir = os.path.join(owner_dir, "datasets")

            # Verify name not already in use
            if os.path.isdir(os.path.join(owner_dir, dataset_name)):
                raise ValueError(
                    f"Dataset `{dataset_name}` already exists locally. Choose a new Dataset name"
                )

            # Create Dataset subdirectory
            new_root_dir = os.path.join(owner_dir, dataset_name)
            os.makedirs(new_root_dir)
            dataset._set_root_dir(new_root_dir)

            # Init repository
            dataset.git.initialize()

            # Create Directory Structure
            dirs = [
                'manifest', 'metadata', '.gigantum',
                os.path.join('.gigantum', 'activity'),
                os.path.join('.gigantum', 'activity', 'log')
            ]

            # Create .gitignore default file
            shutil.copyfile(
                os.path.join(resource_filename('gtmcore', 'dataset'),
                             'gitignore.default'),
                os.path.join(dataset.root_dir, ".gitignore"))

            for d in dirs:
                p = os.path.join(dataset.root_dir, d, '.gitkeep')
                os.makedirs(os.path.dirname(p), exist_ok=True)
                with open(p, 'w') as gk:
                    gk.write(
                        "This file is necessary to keep this directory tracked by Git"
                        " and archivable by compression tools. Do not delete or modify!"
                    )

            dataset._save_gigantum_data()

            # Create an empty storage.json file
            dataset.backend_config = {}

            # Commit
            dataset.git.add_all()

            # NOTE: this string is used to indicate there are no more activity records to get. Changing the string will
            # break activity paging.
            # TODO: Improve method for detecting the first activity record
            dataset.git.commit(f"Creating new empty Dataset: {dataset_name}")

            # Create Activity Record
            adr = ActivityDetailRecord(ActivityDetailType.DATASET,
                                       show=False,
                                       importance=0)
            adr.add_value('text/plain',
                          f"Created new Dataset: {username}/{dataset_name}")
            ar = ActivityRecord(
                ActivityType.DATASET,
                message=f"Created new Dataset: {username}/{dataset_name}",
                show=True,
                importance=255,
                linked_commit=dataset.git.commit_hash)
            ar.add_detail_object(adr)
            store = ActivityStore(dataset)
            store.create_activity_record(ar)

            # Initialize file cache and link revision
            m = Manifest(dataset, username)
            m.link_revision()

            return dataset