def mutate_and_get_payload(cls, root, info, name, description, repository, base_id, revision, is_untracked=False, client_mutation_id=None): username = get_logged_in_username() inv_manager = InventoryManager() if is_untracked: lb = inv_manager.create_labbook_disabled_lfs( username=username, owner=username, labbook_name=name, description=description, author=get_logged_in_author()) else: lb = inv_manager.create_labbook(username=username, owner=username, labbook_name=name, description=description, author=get_logged_in_author()) if is_untracked: FileOperations.set_untracked(lb, 'input') FileOperations.set_untracked(lb, 'output') input_set = FileOperations.is_set_untracked(lb, 'input') output_set = FileOperations.is_set_untracked(lb, 'output') if not (input_set and output_set): raise ValueError( f'{str(lb)} untracking for input/output in malformed state' ) if not lb.is_repo_clean: raise ValueError( f'{str(lb)} should have clean Git state after setting for untracked' ) adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False, importance=0) adr.add_value('text/plain', f"Created new LabBook: {username}/{name}") # Create activity record ar = ActivityRecord(ActivityType.LABBOOK, message=f"Created new LabBook: {username}/{name}", show=True, importance=255, linked_commit=lb.git.commit_hash) ar.add_detail_object(adr) store = ActivityStore(lb) store.create_activity_record(ar) cm = ComponentManager(lb) cm.add_base(repository, base_id, revision) return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
def mutate_and_get_payload(cls, root, info, owner, dataset_name, description, client_mutation_id=None): username = get_logged_in_username() ds = InventoryManager().load_dataset(username, owner, dataset_name, author=get_logged_in_author()) ds.description = description with ds.lock(): ds.git.add(os.path.join(ds.root_dir, '.gigantum/gigantum.yaml')) commit = ds.git.commit('Updating description') adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False) adr.add_value('text/plain', f"Updated Dataset description: {description}") ar = ActivityRecord(ActivityType.LABBOOK, message="Updated Dataset description", linked_commit=commit.hexsha, tags=["dataset"], show=False) ar.add_detail_object(adr) ars = ActivityStore(ds) ars.create_activity_record(ar) return SetDatasetDescription( updated_dataset=Dataset(owner=owner, name=dataset_name))
def mutate_and_get_payload(cls, root, info, owner, labbook_name, description_content, client_mutation_id=None): username = get_logged_in_username() lb = InventoryManager().load_labbook(username, owner, labbook_name, author=get_logged_in_author()) lb.description = description_content with lb.lock(): lb.git.add(os.path.join(lb.config_path)) commit = lb.git.commit('Updating description') adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False) adr.add_value('text/plain', "Updated description of Project") ar = ActivityRecord(ActivityType.LABBOOK, message="Updated description of Project", linked_commit=commit.hexsha, tags=["labbook"], show=False) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar) return SetLabbookDescription(success=True)
def migrate_labbook_schema(labbook: LabBook) -> None: # Fallback point in case of a problem initial_commit = labbook.git.commit_hash try: migrate_schema_to_current(labbook.root_dir) except Exception as e: logger.exception(e) call_subprocess(f'git reset --hard {initial_commit}'.split(), cwd=labbook.root_dir) raise msg = f"Migrate schema to {CURRENT_LABBOOK_SCHEMA}" labbook.git.add(labbook.config_path) cmt = labbook.git.commit(msg, author=labbook.author, committer=labbook.author) adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=True, importance=100, action=ActivityAction.EDIT) adr.add_value('text/plain', msg) ar = ActivityRecord(ActivityType.LABBOOK, message=msg, show=True, importance=255, linked_commit=cmt.hexsha, tags=['schema', 'update', 'migration']) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def remove_docker_snippet(self, name: str) -> None: """Remove a custom docker snippet Args: name: Name or identifer of snippet to remove Returns: None """ docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env', 'docker') docker_file = os.path.join(docker_dir, f'{name}.yaml') if not os.path.exists(docker_file): raise ValueError(f'Docker snippet name `{name}` does not exist') self.labbook.git.remove(docker_file, keep_file=False) short_message = f"Removed custom Docker snippet `{name}`" logger.info(short_message) commit = self.labbook.git.commit(short_message) adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.DELETE) adr.add_value('text/plain', short_message) ar = ActivityRecord(ActivityType.ENVIRONMENT, message=short_message, show=False, linked_commit=commit.hexsha, tags=["environment", "docker", "snippet"]) ar.add_detail_object(adr) ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def backend_config(self, data: dict) -> None: """Save storage config data""" if self._backend: self._backend.configuration = {**self._backend.configuration, **data} # Remove defaults set at runtime that shouldn't be persisted if "username" in data: del data["username"] if "gigantum_bearer_token" in data: del data["gigantum_bearer_token"] if "gigantum_id_token" in data: del data["gigantum_id_token"] config_file = os.path.join(self.root_dir, ".gigantum", "backend.json") with open(config_file, 'wt') as sf: json.dump(data, sf, indent=2) self.git.add(config_file) cm = self.git.commit("Updating backend config") ar = ActivityRecord(ActivityType.DATASET, message="Updated Dataset storage backend configuration", show=True, importance=255, linked_commit=cm.hexsha, tags=['config']) adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=255, action=ActivityAction.EDIT) d = json.dumps(data, indent=2) adr.add_value('text/markdown', f"Updated dataset storage backend configuration:\n\n ```{d}```") ar.add_detail_object(adr) ars = ActivityStore(self) ars.create_activity_record(ar)
def remove_bundled_app(self, name: str) -> None: """Remove a bundled app from this labbook Args: name(str): name of the bundled app Returns: None """ data = self.get_bundled_apps() if name not in data: raise ValueError(f"App {name} does not exist. Cannot remove.") del data[name] with open(self.bundled_app_file, 'wt') as baf: json.dump(data, baf) # Commit the changes self.labbook.git.add(self.bundled_app_file) commit = self.labbook.git.commit(f"Committing bundled app") adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.CREATE) adr.add_value('text/plain', f"Removed bundled application: {name}") ar = ActivityRecord(ActivityType.ENVIRONMENT, message=f"Removed bundled application: {name}", show=True, linked_commit=commit.hexsha, tags=["environment", "docker", "bundled_app"]) ar.add_detail_object(adr) ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def insert_file(cls, labbook: LabBook, section: str, src_file: str, dst_path: str = '') -> Dict[str, Any]: """ Move the file at `src_file` into the `dst_dir`, overwriting if a file already exists there. This calls `copy_into_container()` under- the-hood, but will create an activity record. Args: labbook: Subject labbook section: Section name (code, input, output) src_file: Full path of file to insert into dst_path: Relative path within labbook where `src_file` should be copied to Returns: dict: The inserted file's info """ finfo = FileOperations.put_file(labbook=labbook, section=section, src_file=src_file, dst_path=dst_path) rel_path = os.path.join(section, finfo['key']) # If we are setting this section to be untracked activity_type, activity_detail_type, section_str = \ labbook.get_activity_type_from_section(section) commit_msg = f"Added new {section_str} file {rel_path}" try: labbook.git.add(rel_path) commit = labbook.git.commit(commit_msg) except Exception as x: logger.error(x) os.remove(dst_path) raise FileOperationsException(x) # Create Activity record and detail _, ext = os.path.splitext(rel_path) or 'file' adr = ActivityDetailRecord(activity_detail_type, show=False, importance=0, action=ActivityAction.CREATE) adr.add_value('text/plain', commit_msg) ar = ActivityRecord(activity_type, message=commit_msg, show=True, importance=255, linked_commit=commit.hexsha, tags=[ext]) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return finfo
def add_docker_snippet(self, name: str, docker_content: List[str], description: Optional[str] = None) -> None: """ Add a custom docker snippet to the environment (replacing custom dependency). Args: name: Name or identifier of the custom docker snippet docker_content: Content of the docker material (May make this a list of strings instead) description: Human-readable verbose description of what the snippet is intended to accomplish. Returns: None """ if not name: raise ValueError('Argument `name` cannot be None or empty') if not name.replace('-', '').replace('_', '').isalnum(): raise ValueError( 'Argument `name` must be alphanumeric string (- and _ accepted)' ) if not docker_content: docker_content = [] file_data = { 'name': name, 'timestamp_utc': datetime.datetime.utcnow().isoformat(), 'description': description or "", 'content': docker_content } docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env', 'docker') docker_file = os.path.join(docker_dir, f'{name}.yaml') os.makedirs(docker_dir, exist_ok=True) yaml_dump = yaml.safe_dump(file_data, default_flow_style=False) with open(docker_file, 'w') as df: df.write(yaml_dump) logger.info( f"Wrote custom Docker snippet `{name}` to {str(self.labbook)}") short_message = f"Wrote custom Docker snippet `{name}`" self.labbook.git.add(docker_file) commit = self.labbook.git.commit(short_message) adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.CREATE) adr.add_value('text/plain', '\n'.join(docker_content)) ar = ActivityRecord(ActivityType.ENVIRONMENT, message=short_message, show=True, linked_commit=commit.hexsha, tags=["environment", "docker", "snippet"]) ar.add_detail_object(adr) ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def create_directory(self, path: str) -> Dict[str, Any]: """Method to create an empty directory in a dataset Args: path: Relative path to the directory Returns: dict """ relative_path = self.dataset.make_path_relative(path) new_directory_path = os.path.join(self.cache_mgr.cache_root, self.dataset_revision, relative_path) previous_revision = self.dataset_revision if os.path.exists(new_directory_path): raise ValueError(f"Directory already exists: `{relative_path}`") else: logger.info(f"Creating new empty directory in `{new_directory_path}`") if os.path.isdir(Path(new_directory_path).parent) is False: raise ValueError(f"Parent directory does not exist. Failed to create `{new_directory_path}` ") # create dir os.makedirs(new_directory_path) self.update() if relative_path not in self.manifest: raise ValueError("Failed to add directory to manifest") # Create detail record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=0, action=ActivityAction.CREATE) msg = f"Created new empty directory `{relative_path}`" adr.add_value('text/markdown', msg) commit = self.dataset.git.commit(msg) # Create activity record ar = ActivityRecord(ActivityType.DATASET, message=msg, linked_commit=commit.hexsha, show=True, importance=255, tags=['directory-create']) ar.add_detail_object(adr) # Store ars = ActivityStore(self.dataset) ars.create_activity_record(ar) # Relink after the commit self.link_revision() if os.path.isdir(os.path.join(self.cache_mgr.cache_root, previous_revision)): shutil.rmtree(os.path.join(self.cache_mgr.cache_root, previous_revision)) return self.gen_file_info(relative_path)
def write_readme(self, contents: str) -> None: """Method to write a string to the readme file within the repository. Must write ENTIRE document at once. Args: contents(str): entire readme document in markdown format Returns: None """ # Validate readme data if len(contents) > (1000000 * 5): raise ValueError("Readme file is larger than the 5MB limit") if type(contents) is not str: raise TypeError("Invalid content. Must provide string") readme_file = os.path.join(self.root_dir, 'README.md') readme_exists = os.path.exists(readme_file) # Write file to disk with open(readme_file, 'wt') as rf: rf.write(contents) # Create commit if readme_exists: commit_msg = f"Updated README file" action = ActivityAction.EDIT else: commit_msg = f"Added README file" action = ActivityAction.CREATE self.git.add(readme_file) commit = self.git.commit(commit_msg) # Create detail record adr = ActivityDetailRecord(self._default_activity_detail_type, show=False, importance=0, action=action) adr.add_value('text/plain', commit_msg) # Create activity record ar = ActivityRecord(self._default_activity_type, message=commit_msg, show=False, importance=255, linked_commit=commit.hexsha, tags=['readme']) ar.add_detail_object(adr) # Store ars = ActivityStore(self) ars.create_activity_record(ar)
def unlink_dataset_from_labbook(self, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> None: """Method to removed a dataset reference from a labbook Args: dataset_namespace: dataset_name: labbook: Returns: """ submodule_dir = os.path.join('.gigantum', 'datasets', dataset_namespace, dataset_name) call_subprocess(['git', 'rm', '-f', submodule_dir], cwd=labbook.root_dir) git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules', f"{dataset_namespace}&{dataset_name}") if os.path.exists(git_module_dir): shutil.rmtree(git_module_dir) absolute_submodule_dir = os.path.join(labbook.root_dir, '.gigantum', 'datasets', dataset_namespace, dataset_name) if os.path.exists(absolute_submodule_dir): shutil.rmtree(absolute_submodule_dir) labbook.git.add_all() commit = labbook.git.commit("removing submodule ref") # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.DELETE) adr.add_value( 'text/markdown', f"Unlinked Dataset `{dataset_namespace}/{dataset_name}` from project" ) ar = ActivityRecord( ActivityType.DATASET, message= f"Unlinked Dataset {dataset_namespace}/{dataset_name} from project.", linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def update_linked_dataset_reference(self, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> Dataset: """Method to update a linked dataset reference to the latest revision Args: dataset_namespace: owner (namespace) of the dateset dataset_name: name of the dataset labbook: labbook instance to which the dataset is linked Returns: none1 """ # Load dataset from inside Project directory submodule_dir = os.path.join(labbook.root_dir, '.gigantum', 'datasets', dataset_namespace, dataset_name) ds = self.load_dataset_from_directory(submodule_dir, author=labbook.author) ds.namespace = dataset_namespace # Update the submodule reference with the latest changes original_revision = ds.git.repo.head.object.hexsha ds.git.pull() revision = ds.git.repo.head.object.hexsha # If the submodule has changed, commit the changes. if original_revision != revision: labbook.git.add_all() commit = labbook.git.commit("Updating submodule ref") # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.DELETE) adr.add_value( 'text/markdown', f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to {revision}" ) msg = f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to version {revision[0:8]}" ar = ActivityRecord(ActivityType.DATASET, message=msg, linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return ds
def _update_branch_description(cls, lb: LabBook, description: str): # Update the description on branch creation lb.description = description lb.git.add(lb.config_path) commit = lb.git.commit('Updating description') adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False) adr.add_value('text/plain', description) ar = ActivityRecord(ActivityType.LABBOOK, message="Updated description of Project", linked_commit=commit.hexsha, tags=["labbook"], show=False) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar)
def sweep_uncommitted_changes(self, upload: bool = False, extra_msg: Optional[str] = None, show: bool = False) -> None: """ Sweep all changes into a commit, and create activity record. NOTE: This method MUST be called inside a lock. Args: upload(bool): Flag indicating if this was from a batch upload extra_msg(str): Optional string used to augment the activity message show(bool): Optional flag indicating if the result of this sweep is important enough to be shown in the feed Returns: """ result_status = self.git.status() if any([result_status[k] for k in result_status.keys()]): self.git.add_all() self.git.commit("Sweep of uncommitted changes") ar = ActivityRecord(self._default_activity_type, message="--overwritten--", show=show, importance=255, linked_commit=self.git.commit_hash, tags=['save']) if upload: ar.tags.append('upload') ar, newcnt, modcnt, delcnt = self.process_sweep_status( ar, result_status) nmsg = f"{newcnt} new file(s). " if newcnt > 0 else "" mmsg = f"{modcnt} modified file(s). " if modcnt > 0 else "" dmsg = f"{delcnt} deleted file(s). " if delcnt > 0 else "" message = f"{extra_msg or ''}" \ f"{'Uploaded ' if upload else ''}" \ f"{nmsg}{mmsg}{dmsg}" # This is used to handle if you try to delete an empty directory. This shouldn't technically happen, but if # a user manages to create an empty dir outside the client, we should handle it gracefully ar.message = "No detected changes" if not message else message ars = ActivityStore(self) ars.create_activity_record(ar) else: logger.info(f"{str(self)} no changes to sweep.")
def _record_remove_activity(cls, secret_store, filename, lb): """Make an activity record for the removal of the secret. """ lb.git.add(secret_store.secret_path) lb.git.commit("Removed entry from secrets registry.") commit = lb.git.commit_hash adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=True, action=ActivityAction.DELETE) adr.add_value('text/markdown', f"Removed entry for secrets file {filename}") ar = ActivityRecord( ActivityType.LABBOOK, message=f"Removed entry for secrets file {filename}", linked_commit=commit, tags=["labbook", "secrets"], show=True) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar)
def _record_insert_activity(cls, secret_store, filename, lb, mount_path): """Make an activity record for the insertion of the secret. """ lb.git.add(secret_store.secret_path) lb.git.commit("Updated secrets registry.") commit = lb.git.commit_hash adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=True, action=ActivityAction.CREATE) adr.add_value( 'text/markdown', f"Created new entry for secrets file {filename}" f"to map to {mount_path}") ar = ActivityRecord( ActivityType.LABBOOK, message=f"Created entry for secrets file {filename}", linked_commit=commit, tags=["labbook", "secrets"], show=True) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar)
def mutate_and_get_payload(cls, root, info, name, description, repository, base_id, revision, is_untracked=False, client_mutation_id=None): username = get_logged_in_username() inv_manager = InventoryManager() lb = inv_manager.create_labbook(username=username, owner=username, labbook_name=name, description=description, author=get_logged_in_author()) adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False, importance=0) adr.add_value('text/plain', f"Created new LabBook: {username}/{name}") # Create activity record ar = ActivityRecord(ActivityType.LABBOOK, message=f"Created new LabBook: {username}/{name}", show=True, importance=255, linked_commit=lb.git.commit_hash) ar.add_detail_object(adr) store = ActivityStore(lb) store.create_activity_record(ar) cm = ComponentManager(lb) cm.add_base(repository, base_id, revision) return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
def _make_move_activity_record(cls, labbook: LabBook, section: str, dst_abs_path: str, commit_msg: str) -> None: if os.path.isdir(dst_abs_path): labbook.git.add_all(dst_abs_path) else: labbook.git.add(dst_abs_path) commit = labbook.git.commit(commit_msg) activity_type, activity_detail_type, section_str = labbook.get_activity_type_from_section( section) adr = ActivityDetailRecord(activity_detail_type, show=False, importance=0, action=ActivityAction.EDIT) adr.add_value('text/markdown', commit_msg) ar = ActivityRecord(activity_type, message=commit_msg, linked_commit=commit.hexsha, show=True, importance=255, tags=['file-move']) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def _create_user_note(cls, lb, title, body, tags): store = ActivityStore(lb) adr = ActivityDetailRecord(ActivityDetailType.NOTE, show=True, importance=255) if body: adr.add_value('text/markdown', body) ar = ActivityRecord(ActivityType.NOTE, message=title, linked_commit="no-linked-commit", importance=255, tags=tags) ar.add_detail_object(adr) ar = store.create_activity_record(ar) return ar
def create_dataset(self, username: str, owner: str, dataset_name: str, storage_type: str, description: Optional[str] = None, author: Optional[GitAuthor] = None) -> Dataset: """Create a new Dataset in this Gigantum working directory. Args: username: Active username owner: Namespace in which to place this Dataset dataset_name: Name of the Dataset storage_type: String identifying the type of Dataset to instantiate description: Optional brief description of Dataset author: Optional Git Author Returns: Newly created LabBook instance """ dataset = Dataset(config_file=self.config_file, author=author, namespace=owner) if storage_type not in storage.SUPPORTED_STORAGE_BACKENDS: raise ValueError( f"Unsupported Dataset storage type: {storage_type}") try: build_info = Configuration(self.config_file).config['build_info'] except KeyError: logger.warning("Could not obtain build_info from config") build_info = None # Build data file contents dataset._data = { "schema": DATASET_CURRENT_SCHEMA, "id": uuid.uuid4().hex, "name": dataset_name, "storage_type": storage_type, "description": description or '', "created_on": datetime.datetime.utcnow().isoformat(), "build_info": build_info } dataset._validate_gigantum_data() logger.info("Creating new Dataset on disk for {}/{}/{}".format( username, owner, dataset_name)) # lock while creating initial directory with dataset.lock( lock_key=f"new_dataset_lock|{username}|{owner}|{dataset_name}" ): # Verify or Create user subdirectory # Make sure you expand a user dir string starting_dir = os.path.expanduser( dataset.client_config.config["git"]["working_directory"]) user_dir = os.path.join(starting_dir, username) if not os.path.isdir(user_dir): os.makedirs(user_dir) # Create owner dir - store LabBooks in working dir > logged in user > owner owner_dir = os.path.join(user_dir, owner) if not os.path.isdir(owner_dir): os.makedirs(owner_dir) # Create `datasets` subdir in the owner dir owner_dir = os.path.join(owner_dir, "datasets") else: owner_dir = os.path.join(owner_dir, "datasets") # Verify name not already in use if os.path.isdir(os.path.join(owner_dir, dataset_name)): raise ValueError( f"Dataset `{dataset_name}` already exists locally. Choose a new Dataset name" ) # Create Dataset subdirectory new_root_dir = os.path.join(owner_dir, dataset_name) os.makedirs(new_root_dir) dataset._set_root_dir(new_root_dir) # Init repository dataset.git.initialize() # Create Directory Structure dirs = [ 'manifest', 'metadata', '.gigantum', os.path.join('.gigantum', 'activity'), os.path.join('.gigantum', 'activity', 'log') ] # Create .gitignore default file shutil.copyfile( os.path.join(resource_filename('gtmcore', 'dataset'), 'gitignore.default'), os.path.join(dataset.root_dir, ".gitignore")) for d in dirs: p = os.path.join(dataset.root_dir, d, '.gitkeep') os.makedirs(os.path.dirname(p), exist_ok=True) with open(p, 'w') as gk: gk.write( "This file is necessary to keep this directory tracked by Git" " and archivable by compression tools. Do not delete or modify!" ) dataset._save_gigantum_data() # Create an empty storage.json file dataset.backend_config = {} # Commit dataset.git.add_all() # NOTE: this string is used to indicate there are no more activity records to get. Changing the string will # break activity paging. # TODO: Improve method for detecting the first activity record dataset.git.commit(f"Creating new empty Dataset: {dataset_name}") # Create Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=0) adr.add_value('text/plain', f"Created new Dataset: {username}/{dataset_name}") ar = ActivityRecord( ActivityType.DATASET, message=f"Created new Dataset: {username}/{dataset_name}", show=True, importance=255, linked_commit=dataset.git.commit_hash) ar.add_detail_object(adr) store = ActivityStore(dataset) store.create_activity_record(ar) # Initialize file cache and link revision m = Manifest(dataset, username) m.link_revision() return dataset
def test_get_recent_activity(self, fixture_working_dir, snapshot, fixture_test_file): """Test paging through activity records""" im = InventoryManager(fixture_working_dir[0]) lb = im.create_labbook("default", "default", "labbook11", description="my test description", author=GitAuthor(name="tester", email="*****@*****.**")) FileOperations.insert_file(lb, "code", fixture_test_file) # fake activity store = ActivityStore(lb) adr1 = ActivityDetailRecord(ActivityDetailType.CODE) adr1.show = False adr1.importance = 100 adr1.add_value("text/plain", "first") ar = ActivityRecord(ActivityType.CODE, show=False, message="ran some code", importance=50, linked_commit="asdf") ar.add_detail_object(adr1) # Create Activity Record store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "input", '/tmp/test_file.txt') FileOperations.makedir(lb, "input/test") open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "input", '/tmp/test_file.txt', "test") FileOperations.makedir(lb, "input/test2") open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "input", '/tmp/test_file.txt', "test2") store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) store.create_activity_record(ar) open('/tmp/test_file.txt', 'w').write("xxx" * 50) FileOperations.insert_file(lb, "output", '/tmp/test_file.txt') # Get all records at once with no pagination args and verify cursors look OK directly query = """ { labbook(owner: "default", name: "labbook11") { overview { recentActivity { message type show importance tags } } } } """ snapshot.assert_match(fixture_working_dir[2].execute(query))
def remove_packages(self, package_manager: str, package_names: List[str]) -> None: """Remove yaml files describing a package and its context to the labbook. Args: package_manager: The package manager (eg., "apt" or "pip3") package_names: A list of packages to uninstall Returns: None """ # Create activity record ar = ActivityRecord( ActivityType.ENVIRONMENT, message="", show=True, linked_commit="", tags=["environment", 'package_manager', package_manager]) for pkg in package_names: yaml_filename = '{}_{}.yaml'.format(package_manager, pkg) package_yaml_path = os.path.join(self.env_dir, 'package_manager', yaml_filename) # Check for package to exist if not os.path.exists(package_yaml_path): raise ValueError( f"{package_manager} installed package {pkg} does not exist." ) # Check to make sure package isn't from the base. You cannot remove packages from the base yet. with open(package_yaml_path, 'rt') as cf: package_data = yaml.safe_load(cf) if not package_data: raise IOError("Failed to load package description") if package_data['from_base'] is True: raise ValueError( "Cannot remove a package installed in the Base") # Delete the yaml file, which on next Dockerfile gen/rebuild will remove the dependency os.remove(package_yaml_path) if os.path.exists(package_yaml_path): raise ValueError(f"Failed to remove package.") self.labbook.git.remove(package_yaml_path) # Create detail record adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.DELETE) adr.add_value('text/plain', f"Removed {package_manager} managed package: {pkg}") ar.add_detail_object(adr) logger.info(f"Removed {package_manager} managed package: {pkg}") # Add to git short_message = f"Removed {len(package_names)} {package_manager} managed package(s)" commit = self.labbook.git.commit(short_message) ar.linked_commit = commit.hexsha ar.message = short_message # Store ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def create_update_activity_record(self, status: StatusResult, upload: bool = False, extra_msg: str = None) -> None: """ Args: status(StatusResult): a StatusResult object after updating the manifest upload(bool): flag indicating if this is a record for an upload extra_msg(str): any extra string to add to the activity record Returns: None """ def _item_type(key): if key[-1] == os.path.sep: return 'directory' else: return 'file' if len(status.deleted) > 0 or len(status.created) > 0 or len(status.modified) > 0: # commit changed manifest file self.dataset.git.add_all() self.dataset.git.commit("Commit changes to manifest file.") ar = ActivityRecord(ActivityType.DATASET, message="msg is set below after detail record processing...", show=True, importance=255, linked_commit=self.dataset.git.commit_hash, tags=[]) for cnt, f in enumerate(status.created): adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=max(255 - cnt, 0), action=ActivityAction.CREATE) msg = f"Created new {_item_type(f)} `{f}`" adr.add_value('text/markdown', msg) ar.add_detail_object(adr) for cnt, f in enumerate(status.modified): adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=max(255 - cnt, 0), action=ActivityAction.EDIT) msg = f"Modified {_item_type(f)} `{f}`" adr.add_value('text/markdown', msg) ar.add_detail_object(adr) for cnt, f in enumerate(status.deleted): adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=max(255 - cnt, 0), action=ActivityAction.DELETE) msg = f"Deleted {_item_type(f)} `{f}`" adr.add_value('text/markdown', msg) ar.add_detail_object(adr) num_files_created = sum([_item_type(x) == "file" for x in status.created]) num_files_modified = sum([_item_type(x) == "file" for x in status.modified]) num_files_deleted = sum([_item_type(x) == "file" for x in status.deleted]) upload_str = "Uploaded" if upload else '' nmsg = f"{upload_str} {num_files_created} new file(s). " if num_files_created > 0 else "" mmsg = f"{upload_str} {num_files_modified} modified file(s). " if num_files_modified > 0 else "" dmsg = f"{num_files_deleted} deleted file(s). " if num_files_deleted > 0 else "" if not nmsg and not mmsg and not dmsg: # You didn't edit any files, only an empty directory num_dirs_created = sum([_item_type(x) == "directory" for x in status.created]) num_dirs_modified = sum([_item_type(x) == "directory" for x in status.modified]) num_dirs_deleted = sum([_item_type(x) == "directory" for x in status.deleted]) nmsg = f"{num_dirs_created} new folder(s). " if num_dirs_created > 0 else "" mmsg = f"{num_dirs_modified} modified folder(s). " if num_dirs_modified > 0 else "" dmsg = f"{num_dirs_deleted} deleted folder(s). " if num_dirs_deleted > 0 else "" ar.message = f"{extra_msg if extra_msg else ''}" \ f"{nmsg}{mmsg}{dmsg}" ars = ActivityStore(self.dataset) ars.create_activity_record(ar)
def link_dataset_to_labbook(self, dataset_url: str, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> Dataset: """ Args: dataset_url: dataset_namespace: dataset_name: labbook: Returns: """ # add submodule and init submodules_root = os.path.join(labbook.root_dir, '.gigantum', 'datasets') submodule_dir = os.path.join(submodules_root, dataset_namespace, dataset_name) if not os.path.exists(os.path.join(submodules_root, dataset_namespace)): pathlib.Path(os.path.join(submodules_root, dataset_namespace)).mkdir(parents=True, exist_ok=True) subprocess.run([ 'git', 'submodule', 'add', '--name', f"{dataset_namespace}&{dataset_name}", dataset_url, os.path.join('.gigantum', 'datasets', dataset_namespace, dataset_name) ], check=True, cwd=labbook.root_dir, stderr=subprocess.STDOUT) labbook.git.add_all() commit = labbook.git.commit( f"adding submodule ref to link dataset {dataset_namespace}/{dataset_name}" ) labbook.git.update_submodules(init=True) ds = self.load_dataset_from_directory(submodule_dir) dataset_revision = ds.git.repo.head.commit.hexsha # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.CREATE) adr.add_value( 'text/markdown', f"Linked Dataset `{dataset_namespace}/{dataset_name}` to " f"project at revision `{dataset_revision}`") ar = ActivityRecord( ActivityType.DATASET, message= f"Linked Dataset {dataset_namespace}/{dataset_name} to project.", linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return ds
def makedir(cls, labbook: LabBook, relative_path: str, make_parents: bool = True, create_activity_record: bool = False) -> None: """Make a new directory inside the labbook directory. Args: labbook: Subject LabBook relative_path(str): Path within the labbook to make directory make_parents(bool): If true, create intermediary directories create_activity_record(bool): If true, create commit and activity record Returns: str: Absolute path of new directory """ if not relative_path: raise ValueError("relative_path argument cannot be None or empty") relative_path = LabBook.make_path_relative(relative_path) new_directory_path = os.path.join(labbook.root_dir, relative_path) if os.path.exists(new_directory_path): return else: logger.info(f"Making new directory in `{new_directory_path}`") os.makedirs(new_directory_path, exist_ok=make_parents) new_dir = '' for d in relative_path.split(os.sep): new_dir = os.path.join(new_dir, d) full_new_dir = os.path.join(labbook.root_dir, new_dir) gitkeep_path = os.path.join(full_new_dir, '.gitkeep') if not os.path.exists(gitkeep_path): with open(gitkeep_path, 'w') as gitkeep: gitkeep.write( "This file is necessary to keep this directory tracked by Git" " and archivable by compression tools. Do not delete or modify!" ) labbook.git.add(gitkeep_path) if create_activity_record: # Create detail record activity_type, activity_detail_type, section_str = labbook.infer_section_from_relative_path( relative_path) adr = ActivityDetailRecord(activity_detail_type, show=False, importance=0, action=ActivityAction.CREATE) msg = f"Created new {section_str} directory `{relative_path}`" commit = labbook.git.commit(msg) adr.add_value('text/markdown', msg) # Create activity record ar = ActivityRecord(activity_type, message=msg, linked_commit=commit.hexsha, show=True, importance=255, tags=['directory-create']) ar.add_detail_object(adr) # Store ars = ActivityStore(labbook) ars.create_activity_record(ar)
class ActivityMonitor(metaclass=abc.ABCMeta): """Class to monitor a kernel/IDE for activity to be processed.""" def __init__(self, user: str, owner: str, labbook_name: str, monitor_key: str, config_file: str = None, author_name: Optional[str] = None, author_email: Optional[str] = None) -> None: """Constructor requires info to load the lab book Args: user(str): current logged in user owner(str): owner of the lab book labbook_name(str): name of the lab book monitor_key(str): Unique key for the activity monitor in redis author_name(str): Name of the user starting this activity monitor author_email(str): Email of the user starting this activity monitor """ self.monitor_key = monitor_key # List of processor classes that will be invoked in order self.processors: List[ActivityProcessor] = [] # Populate GitAuthor instance if available if author_name: author: Optional[GitAuthor] = GitAuthor(name=author_name, email=author_email) else: author = None # Load Lab Book instance im = InventoryManager(config_file) self.labbook = im.load_labbook(user, owner, labbook_name, author=author) self.user = user self.owner = owner self.labbook_name = labbook_name # Create ActivityStore instance self.activity_store = ActivityStore(self.labbook) # A flag indicating if the activity record is OK to store self.can_store_activity_record = False def add_processor(self, processor_instance: ActivityProcessor) -> None: """ Args: processor_instance(ActivityProcessor): A processor class to add to the pipeline Returns: None """ self.processors.append(processor_instance) def commit_file(self, filename: str) -> str: """Method to commit changes to a file Args: filename(str): file to commit Returns: str """ self.labbook.git.add(filename) commit = self.labbook.git.commit( "Auto-commit from activity monitoring") return commit.hexsha def commit_labbook(self) -> str: """Method to commit changes to the entire labbook Returns: str """ self.labbook.git.add_all() commit = self.labbook.git.commit( "Auto-commit from activity monitoring") return commit.hexsha def store_activity_record( self, linked_commit: str, activity_record: ActivityRecord) -> Optional[str]: """Method to commit changes to a file Args: linked_commit(str): Git commit this ActivityRecord is related to activity_record(ActivityNote): The populated ActivityNote object returned by the processing pipeline Returns: str """ activity_record.linked_commit = linked_commit # Create a activity record record = self.activity_store.create_activity_record(activity_record) return record.commit def process(self, activity_type: ActivityType, data: List[ExecutionData], metadata: Dict[str, Any]) -> ActivityRecord: """Method to update the result ActivityRecord object based on code and result data Args: activity_type(ActivityType): A ActivityType object indicating the activity type data(list): A list of ExecutionData instances containing the data for this record metadata(str): A dictionary containing Dev Env specific or other developer defined data Returns: ActivityRecord """ # Initialize empty record activity_record = ActivityRecord(activity_type=activity_type) # Get git status for tracking file changes status = self.labbook.git.status() # Run processors to populate the record for p in self.processors: activity_record = p.process(activity_record, data, status, metadata) return activity_record def get_container_ip(self) -> Optional[str]: """Method to get the monitored lab book container's IP address on the Docker bridge network Returns: str """ client = get_docker_client() lb_key = infer_docker_image_name(self.labbook_name, self.owner, self.user) container = client.containers.get(lb_key) ip = container.attrs['NetworkSettings']['Networks']['bridge'][ 'IPAddress'] logger.info("container {} IP: {}".format(container.name, ip)) return ip def set_busy_state(self, is_busy: bool) -> None: """Method to set the busy state of the dev env being monitored. If busy, some actions (e.g. auto-save hooks) may be disabled depending on the dev env. This method sets or deletes a key in redis that other processes can check Args: is_busy(bool): True if busy, false if idle Returns: """ try: client = redis.StrictRedis(db=1) key = f"{self.labbook.key}&is-busy&{self.monitor_key}" if is_busy: client.set(key, True) else: client.delete(key) except Exception as err: # This should never stop more important operations logger.warning( f"An error occurred while setting the monitor busy state for {str(self.labbook )}: {err}" ) def start(self, data: Dict[str, Any]) -> None: """Method called in a long running scheduled async worker that should monitor for activity, committing files and creating notes as needed. Args: data(dict): A dictionary of data to start the activity monitor Returns: None """ raise NotImplemented
def link_dataset_to_labbook(self, dataset_url: str, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> Dataset: """ Args: dataset_url: dataset_namespace: dataset_name: labbook: Returns: """ def _clean_submodule(): """Helper method to clean a submodule reference from a repository""" if os.path.exists(absolute_submodule_dir): logger.warning( f"Cleaning {relative_submodule_dir} from parent git repo") try: call_subprocess([ 'git', 'rm', '-f', '--cached', relative_submodule_dir ], cwd=labbook.root_dir) except subprocess.CalledProcessError: logger.warning( f"git rm on {relative_submodule_dir} failed. Continuing..." ) pass if os.path.exists(absolute_submodule_dir): logger.warning(f"Removing {absolute_submodule_dir} directory") shutil.rmtree(absolute_submodule_dir) if os.path.exists(git_module_dir): logger.warning(f"Removing {git_module_dir} directory") shutil.rmtree(git_module_dir) relative_submodule_dir = os.path.join('.gigantum', 'datasets', dataset_namespace, dataset_name) absolute_submodule_dir = os.path.join(labbook.root_dir, relative_submodule_dir) absolute_submodule_root = os.path.join(labbook.root_dir, '.gigantum', 'datasets', dataset_namespace) git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules', f"{dataset_namespace}&{dataset_name}") if not os.path.exists(absolute_submodule_root): pathlib.Path(absolute_submodule_root).mkdir(parents=True, exist_ok=True) if os.path.exists(absolute_submodule_dir) and os.path.exists( git_module_dir): # Seem to be trying to link a dataset after a reset removed the dataset. Clean up first. _clean_submodule() try: # Link dataset via submodule reference call_subprocess([ 'git', 'submodule', 'add', '--name', f"{dataset_namespace}&{dataset_name}", dataset_url, relative_submodule_dir ], cwd=labbook.root_dir) except subprocess.CalledProcessError: logger.warning( "Failed to link dataset. Attempting to repair repository and link again." ) _clean_submodule() # Try to add again 1 more time, allowing a failure to raise an exception call_subprocess([ 'git', 'submodule', 'add', '--name', f"{dataset_namespace}&{dataset_name}", dataset_url, relative_submodule_dir ], cwd=labbook.root_dir) # If you got here, repair worked and link OK logger.info("Repository repair and linking retry successful.") labbook.git.add_all() commit = labbook.git.commit( f"adding submodule ref to link dataset {dataset_namespace}/{dataset_name}" ) labbook.git.update_submodules(init=True) ds = self.load_dataset_from_directory(absolute_submodule_dir) dataset_revision = ds.git.repo.head.commit.hexsha # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.CREATE) adr.add_value( 'text/markdown', f"Linked Dataset `{dataset_namespace}/{dataset_name}` to " f"project at revision `{dataset_revision}`") ar = ActivityRecord( ActivityType.DATASET, message= f"Linked Dataset {dataset_namespace}/{dataset_name} to project.", linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return ds
def add_base(self, repository: str, base_id: str, revision: int) -> None: """Method to add a base to a LabBook's environment Args: repository(str): The Environment Component repository the component is in base_id(str): The name of the component revision(int): The revision to use (r_<revision_) in yaml filename. Returns: None """ if not repository: raise ValueError('repository cannot be None or empty') if not base_id: raise ValueError('component cannot be None or empty') # Get the base base_data = self.bases.get_base(repository, base_id, revision) base_filename = "{}_{}.yaml".format(repository, base_id, revision) base_final_path = os.path.join(self.env_dir, 'base', base_filename) short_message = "Added base: {}".format(base_id) if os.path.exists(base_final_path): raise ValueError("The base {} already exists in this project") with open(base_final_path, 'wt') as cf: cf.write(yaml.safe_dump(base_data, default_flow_style=False)) for manager in base_data['package_managers']: packages = list() # Build dictionary of packages for p_manager in manager.keys(): if manager[p_manager]: for pkg in manager[p_manager]: pkg_name, pkg_version = strip_package_and_version( p_manager, pkg) packages.append({ "package": pkg_name, "version": pkg_version, "manager": p_manager }) self.add_packages(package_manager=p_manager, packages=packages, force=True, from_base=True) self.labbook.git.add(base_final_path) commit = self.labbook.git.commit(short_message) logger.info(f"Added base from {repository}: {base_id} rev{revision}") # Create a ActivityRecord long_message = "Added base {}\n".format(base_id) long_message = "{}\n{}\n\n".format(long_message, base_data['description']) long_message = "{} - repository: {}\n".format(long_message, repository) long_message = "{} - component: {}\n".format(long_message, base_id) long_message = "{} - revision: {}\n".format(long_message, revision) # Create detail record adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.CREATE) adr.add_value('text/plain', long_message) # Create activity record ar = ActivityRecord(ActivityType.ENVIRONMENT, message=short_message, linked_commit=commit.hexsha, tags=["environment", "base"], show=True) ar.add_detail_object(adr) # Store ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def sweep_all_changes(self, upload: bool = False, extra_msg: str = None) -> None: """ Args: upload: extra_msg: Returns: """ def _item_type(key): if key[-1] == os.path.sep: return 'directory' else: return 'file' previous_revision = self.dataset_revision # Update manifest status = self.update() if len(status.deleted) > 0 or len(status.created) > 0 or len( status.modified) > 0: # commit changed manifest file self.dataset.git.add_all() self.dataset.git.commit("Commit changes to manifest file.") ar = ActivityRecord( ActivityType.DATASET, message="msg is set below after detail record processing...", show=True, importance=255, linked_commit=self.dataset.git.commit_hash, tags=[]) if upload: ar.tags.append('upload') for cnt, f in enumerate(status.created): adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=max(255 - cnt, 0), action=ActivityAction.CREATE) msg = f"Created new {_item_type(f)} `{f}`" adr.add_value('text/markdown', msg) ar.add_detail_object(adr) for cnt, f in enumerate(status.modified): adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=max(255 - cnt, 0), action=ActivityAction.EDIT) msg = f"Modified {_item_type(f)} `{f}`" adr.add_value('text/markdown', msg) ar.add_detail_object(adr) for cnt, f in enumerate(status.deleted): adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=max(255 - cnt, 0), action=ActivityAction.DELETE) msg = f"Deleted {_item_type(f)} `{f}`" adr.add_value('text/markdown', msg) ar.add_detail_object(adr) nmsg = f"{len(status.created)} new file(s). " if len( status.created) > 0 else "" mmsg = f"{len(status.modified)} modified file(s). " if len( status.modified) > 0 else "" dmsg = f"{len(status.deleted)} deleted file(s). " if len( status.deleted) > 0 else "" ar.message = f"{extra_msg if extra_msg else ''}" \ f"{'Uploaded ' if upload else ''}" \ f"{nmsg}{mmsg}{dmsg}" ars = ActivityStore(self.dataset) ars.create_activity_record(ar) # Re-link new revision, unlink old revision self.link_revision() if os.path.isdir( os.path.join(self.cache_mgr.cache_root, previous_revision)): shutil.rmtree( os.path.join(self.cache_mgr.cache_root, previous_revision))