def test_code_and_image(self, redis_client, mock_labbook): """Test reading a log and storing a record""" # create a server monitor server_monitor = RStudioServerMonitor("test", "test", mock_labbook[2].name, "foo:activity_monitor:52f5a3a9", config_file=mock_labbook[0]) mitmlog = open( f"{os.path.dirname(os.path.realpath(__file__))}/52f5a3a9.rserver.dump", "rb") # Read activity and return an aggregated activity record server_monitor.process_activity(mitmlog) # call processor server_monitor.store_record() a_store = ActivityStore(mock_labbook[2]) ars = a_store.get_activity_records() # details object [x][3] gets the x^th object code_dict = a_store.get_detail_record( ars[0]._detail_objects[1][3].key).data # check the code results assert (code_dict['text/markdown'][101:109] == 'y("knitr') # check part of an image imgdata = a_store.get_detail_record( ars[1]._detail_objects[1][3].key).data['image/png'][0:20] assert (imgdata == '/9j/4AAQSkZJRgABAQAA')
def test_multiplecells(self, redis_client, mock_labbook): """Make sure that RStudio detects and splits cells""" server_monitor = RStudioServerMonitor("test", "test", mock_labbook[2].name, "foo:activity_monitor:73467b78", config_file=mock_labbook[0]) mitmlog = open( f"{os.path.dirname(os.path.realpath(__file__))}/73467b78.rserver.dump", "rb") # Read activity and return an aggregated activity record server_monitor.process_activity(mitmlog) # call processor server_monitor.store_record() a_store = ActivityStore(mock_labbook[2]) ars = a_store.get_activity_records() # details object [x][3] gets the x^th object cell_1 = a_store.get_detail_record( ars[0]._detail_objects[2][3].key).data cell_2 = a_store.get_detail_record( ars[0]._detail_objects[3][3].key).data # if the cells were divided, there will be two records assert (cell_1['text/plain'][55:58] == 'pop') assert (cell_2['text/plain'][200:204] == 'stan')
def migrate_labbook_schema(labbook: LabBook) -> None: # Fallback point in case of a problem initial_commit = labbook.git.commit_hash try: migrate_schema_to_current(labbook.root_dir) except Exception as e: logger.exception(e) call_subprocess(f'git reset --hard {initial_commit}'.split(), cwd=labbook.root_dir) raise msg = f"Migrate schema to {CURRENT_LABBOOK_SCHEMA}" labbook.git.add(labbook.config_path) cmt = labbook.git.commit(msg, author=labbook.author, committer=labbook.author) adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=True, importance=100, action=ActivityAction.EDIT) adr.add_value('text/plain', msg) ar = ActivityRecord(ActivityType.LABBOOK, message=msg, show=True, importance=255, linked_commit=cmt.hexsha, tags=['schema', 'update', 'migration']) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def remove_docker_snippet(self, name: str) -> None: """Remove a custom docker snippet Args: name: Name or identifer of snippet to remove Returns: None """ docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env', 'docker') docker_file = os.path.join(docker_dir, f'{name}.yaml') if not os.path.exists(docker_file): raise ValueError(f'Docker snippet name `{name}` does not exist') self.labbook.git.remove(docker_file, keep_file=False) short_message = f"Removed custom Docker snippet `{name}`" logger.info(short_message) commit = self.labbook.git.commit(short_message) adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.DELETE) adr.add_value('text/plain', short_message) ar = ActivityRecord(ActivityType.ENVIRONMENT, message=short_message, show=False, linked_commit=commit.hexsha, tags=["environment", "docker", "snippet"]) ar.add_detail_object(adr) ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def _load_activity_record(self, info): """Private method to load an activity record if it has not been previously loaded and set""" if not self._activity_record: # Load record from database if not self.commit: raise ValueError( "Must set `commit` on object creation to resolve detail record" ) # Load store instance lb = self._get_loader(info).load( f"{get_logged_in_username()}&{self.owner}&{self.name}").get() store = ActivityStore(lb) # Retrieve record self._activity_record = store.get_activity_record(self.commit) # Set class properties self.linked_commit = self._activity_record.linked_commit self.message = self._activity_record.message self.type = ActivityRecordTypeEnum.get( self._activity_record.type.value).value self.show = self._activity_record.show self.tags = self._activity_record.tags self.timestamp = self._activity_record.timestamp self.importance = self._activity_record.importance self.username = self._activity_record.username self.email = self._activity_record.email
def mutate_and_get_payload(cls, root, info, owner, labbook_name, description_content, client_mutation_id=None): username = get_logged_in_username() lb = InventoryManager().load_labbook(username, owner, labbook_name, author=get_logged_in_author()) lb.description = description_content with lb.lock(): lb.git.add(os.path.join(lb.config_path)) commit = lb.git.commit('Updating description') adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False) adr.add_value('text/plain', "Updated description of Project") ar = ActivityRecord(ActivityType.LABBOOK, message="Updated description of Project", linked_commit=commit.hexsha, tags=["labbook"], show=False) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar) return SetLabbookDescription(success=True)
def mutate_and_get_payload(cls, root, info, owner, dataset_name, description, client_mutation_id=None): username = get_logged_in_username() ds = InventoryManager().load_dataset(username, owner, dataset_name, author=get_logged_in_author()) ds.description = description with ds.lock(): ds.git.add(os.path.join(ds.root_dir, '.gigantum/gigantum.yaml')) commit = ds.git.commit('Updating description') adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False) adr.add_value('text/plain', f"Updated Dataset description: {description}") ar = ActivityRecord(ActivityType.LABBOOK, message="Updated Dataset description", linked_commit=commit.hexsha, tags=["dataset"], show=False) ar.add_detail_object(adr) ars = ActivityStore(ds) ars.create_activity_record(ar) return SetDatasetDescription( updated_dataset=Dataset(owner=owner, name=dataset_name))
def remove_bundled_app(self, name: str) -> None: """Remove a bundled app from this labbook Args: name(str): name of the bundled app Returns: None """ data = self.get_bundled_apps() if name not in data: raise ValueError(f"App {name} does not exist. Cannot remove.") del data[name] with open(self.bundled_app_file, 'wt') as baf: json.dump(data, baf) # Commit the changes self.labbook.git.add(self.bundled_app_file) commit = self.labbook.git.commit(f"Committing bundled app") adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.CREATE) adr.add_value('text/plain', f"Removed bundled application: {name}") ar = ActivityRecord(ActivityType.ENVIRONMENT, message=f"Removed bundled application: {name}", show=True, linked_commit=commit.hexsha, tags=["environment", "docker", "bundled_app"]) ar.add_detail_object(adr) ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def mutate_and_get_payload(cls, root, info, name, description, repository, base_id, revision, is_untracked=False, client_mutation_id=None): username = get_logged_in_username() inv_manager = InventoryManager() if is_untracked: lb = inv_manager.create_labbook_disabled_lfs( username=username, owner=username, labbook_name=name, description=description, author=get_logged_in_author()) else: lb = inv_manager.create_labbook(username=username, owner=username, labbook_name=name, description=description, author=get_logged_in_author()) if is_untracked: FileOperations.set_untracked(lb, 'input') FileOperations.set_untracked(lb, 'output') input_set = FileOperations.is_set_untracked(lb, 'input') output_set = FileOperations.is_set_untracked(lb, 'output') if not (input_set and output_set): raise ValueError( f'{str(lb)} untracking for input/output in malformed state' ) if not lb.is_repo_clean: raise ValueError( f'{str(lb)} should have clean Git state after setting for untracked' ) adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False, importance=0) adr.add_value('text/plain', f"Created new LabBook: {username}/{name}") # Create activity record ar = ActivityRecord(ActivityType.LABBOOK, message=f"Created new LabBook: {username}/{name}", show=True, importance=255, linked_commit=lb.git.commit_hash) ar.add_detail_object(adr) store = ActivityStore(lb) store.create_activity_record(ar) cm = ComponentManager(lb) cm.add_base(repository, base_id, revision) return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
def help_resolve_recent_activity(self, labbook): """Method to create 4 activity records with show=True""" # Create instance of ActivityStore for this LabBook store = ActivityStore(labbook) records = list() # Get 4 records with show=True after = None while len(records) < 4: items = store.get_activity_records(first=4, after=after) if not items: # if no more items, continue break for item in items: if item.show is True and item.num_detail_objects > 0: ar = ActivityRecordObject( id=f"labbook&{self.owner}&{self.name}&{item.commit}", owner=self.owner, name=self.name, _repository_type='labbook', commit=item.commit, _activity_record=item) records.append(ar) if len(records) >= 4: break # Set after cursor to last commit after = item.commit return records
def _load_detail_record(self, info): """Private method to load a detail record if it has not been previously loaded and set""" if not self._detail_record: # Load record from database if not self.key: raise ValueError( "Must set `key` on object creation to resolve detail record" ) # Load store instance lb = self._get_loader(info).load( f"{get_logged_in_username()}&{self.owner}&{self.name}").get() store = ActivityStore(lb) # Retrieve record self._detail_record: ActivityDetailRecord = store.get_detail_record( self.key) # Set class properties self.type = ActivityDetailTypeEnum.get( self._detail_record.type.value).value self.show = self._detail_record.show self.tags = self._detail_record.tags self.importance = self._detail_record.importance self.action = ActivityActionTypeEnum.get( self._detail_record.action.value).value
def backend_config(self, data: dict) -> None: """Save storage config data""" if self._backend: self._backend.configuration = {**self._backend.configuration, **data} # Remove defaults set at runtime that shouldn't be persisted if "username" in data: del data["username"] if "gigantum_bearer_token" in data: del data["gigantum_bearer_token"] if "gigantum_id_token" in data: del data["gigantum_id_token"] config_file = os.path.join(self.root_dir, ".gigantum", "backend.json") with open(config_file, 'wt') as sf: json.dump(data, sf, indent=2) self.git.add(config_file) cm = self.git.commit("Updating backend config") ar = ActivityRecord(ActivityType.DATASET, message="Updated Dataset storage backend configuration", show=True, importance=255, linked_commit=cm.hexsha, tags=['config']) adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=255, action=ActivityAction.EDIT) d = json.dumps(data, indent=2) adr.add_value('text/markdown', f"Updated dataset storage backend configuration:\n\n ```{d}```") ar.add_detail_object(adr) ars = ActivityStore(self) ars.create_activity_record(ar)
def add_docker_snippet(self, name: str, docker_content: List[str], description: Optional[str] = None) -> None: """ Add a custom docker snippet to the environment (replacing custom dependency). Args: name: Name or identifier of the custom docker snippet docker_content: Content of the docker material (May make this a list of strings instead) description: Human-readable verbose description of what the snippet is intended to accomplish. Returns: None """ if not name: raise ValueError('Argument `name` cannot be None or empty') if not name.replace('-', '').replace('_', '').isalnum(): raise ValueError( 'Argument `name` must be alphanumeric string (- and _ accepted)' ) if not docker_content: docker_content = [] file_data = { 'name': name, 'timestamp_utc': datetime.datetime.utcnow().isoformat(), 'description': description or "", 'content': docker_content } docker_dir = os.path.join(self.labbook.root_dir, '.gigantum', 'env', 'docker') docker_file = os.path.join(docker_dir, f'{name}.yaml') os.makedirs(docker_dir, exist_ok=True) yaml_dump = yaml.safe_dump(file_data, default_flow_style=False) with open(docker_file, 'w') as df: df.write(yaml_dump) logger.info( f"Wrote custom Docker snippet `{name}` to {str(self.labbook)}") short_message = f"Wrote custom Docker snippet `{name}`" self.labbook.git.add(docker_file) commit = self.labbook.git.commit(short_message) adr = ActivityDetailRecord(ActivityDetailType.ENVIRONMENT, show=False, action=ActivityAction.CREATE) adr.add_value('text/plain', '\n'.join(docker_content)) ar = ActivityRecord(ActivityType.ENVIRONMENT, message=short_message, show=True, linked_commit=commit.hexsha, tags=["environment", "docker", "snippet"]) ar.add_detail_object(adr) ars = ActivityStore(self.labbook) ars.create_activity_record(ar)
def insert_file(cls, labbook: LabBook, section: str, src_file: str, dst_path: str = '') -> Dict[str, Any]: """ Move the file at `src_file` into the `dst_dir`, overwriting if a file already exists there. This calls `copy_into_container()` under- the-hood, but will create an activity record. Args: labbook: Subject labbook section: Section name (code, input, output) src_file: Full path of file to insert into dst_path: Relative path within labbook where `src_file` should be copied to Returns: dict: The inserted file's info """ finfo = FileOperations.put_file(labbook=labbook, section=section, src_file=src_file, dst_path=dst_path) rel_path = os.path.join(section, finfo['key']) # If we are setting this section to be untracked activity_type, activity_detail_type, section_str = \ labbook.get_activity_type_from_section(section) commit_msg = f"Added new {section_str} file {rel_path}" try: labbook.git.add(rel_path) commit = labbook.git.commit(commit_msg) except Exception as x: logger.error(x) os.remove(dst_path) raise FileOperationsException(x) # Create Activity record and detail _, ext = os.path.splitext(rel_path) or 'file' adr = ActivityDetailRecord(activity_detail_type, show=False, importance=0, action=ActivityAction.CREATE) adr.add_value('text/plain', commit_msg) ar = ActivityRecord(activity_type, message=commit_msg, show=True, importance=255, linked_commit=commit.hexsha, tags=[ext]) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return finfo
def create_directory(self, path: str) -> Dict[str, Any]: """Method to create an empty directory in a dataset Args: path: Relative path to the directory Returns: dict """ relative_path = self.dataset.make_path_relative(path) new_directory_path = os.path.join(self.cache_mgr.cache_root, self.dataset_revision, relative_path) previous_revision = self.dataset_revision if os.path.exists(new_directory_path): raise ValueError(f"Directory already exists: `{relative_path}`") else: logger.info(f"Creating new empty directory in `{new_directory_path}`") if os.path.isdir(Path(new_directory_path).parent) is False: raise ValueError(f"Parent directory does not exist. Failed to create `{new_directory_path}` ") # create dir os.makedirs(new_directory_path) self.update() if relative_path not in self.manifest: raise ValueError("Failed to add directory to manifest") # Create detail record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=0, action=ActivityAction.CREATE) msg = f"Created new empty directory `{relative_path}`" adr.add_value('text/markdown', msg) commit = self.dataset.git.commit(msg) # Create activity record ar = ActivityRecord(ActivityType.DATASET, message=msg, linked_commit=commit.hexsha, show=True, importance=255, tags=['directory-create']) ar.add_detail_object(adr) # Store ars = ActivityStore(self.dataset) ars.create_activity_record(ar) # Relink after the commit self.link_revision() if os.path.isdir(os.path.join(self.cache_mgr.cache_root, previous_revision)): shutil.rmtree(os.path.join(self.cache_mgr.cache_root, previous_revision)) return self.gen_file_info(relative_path)
def helper_resolve_activity_records(self, dataset, kwargs): """Helper method to generate ActivityRecord objects and populate the connection""" # Create instance of ActivityStore for this dataset store = ActivityStore(dataset) if kwargs.get('before') or kwargs.get('last'): raise ValueError( "Only `after` and `first` arguments are supported when paging activity records" ) # Get edges and cursors edges = store.get_activity_records(after=kwargs.get('after'), first=kwargs.get('first')) if edges: cursors = [x.commit for x in edges] else: cursors = [] # Get ActivityRecordObject instances edge_objs = [] for edge, cursor in zip(edges, cursors): edge_objs.append( ActivityConnection.Edge(node=ActivityRecordObject( id=f"dataset&{self.owner}&{self.name}&{edge.commit}", owner=self.owner, name=self.name, _repository_type='dataset', commit=edge.commit, _activity_record=edge), cursor=cursor)) # Create page info based on first commit. Since only paging backwards right now, just check for commit if edges: has_next_page = True # Get the message of the linked commit and check if it is the non-activity record dataset creation commit if len(edges) > 1: if edges[-2].linked_commit != "no-linked-commit": linked_msg = dataset.git.log_entry( edges[-2].linked_commit)['message'] if linked_msg == f"Creating new empty Dataset: {dataset.name}" and "_GTM_ACTIVITY_" not in linked_msg: # if you get here, this is the first activity record has_next_page = False end_cursor = cursors[-1] else: has_next_page = False end_cursor = None page_info = graphene.relay.PageInfo(has_next_page=has_next_page, has_previous_page=False, end_cursor=end_cursor) return ActivityConnection(edges=edge_objs, page_info=page_info)
def write_readme(self, contents: str) -> None: """Method to write a string to the readme file within the repository. Must write ENTIRE document at once. Args: contents(str): entire readme document in markdown format Returns: None """ # Validate readme data if len(contents) > (1000000 * 5): raise ValueError("Readme file is larger than the 5MB limit") if type(contents) is not str: raise TypeError("Invalid content. Must provide string") readme_file = os.path.join(self.root_dir, 'README.md') readme_exists = os.path.exists(readme_file) # Write file to disk with open(readme_file, 'wt') as rf: rf.write(contents) # Create commit if readme_exists: commit_msg = f"Updated README file" action = ActivityAction.EDIT else: commit_msg = f"Added README file" action = ActivityAction.CREATE self.git.add(readme_file) commit = self.git.commit(commit_msg) # Create detail record adr = ActivityDetailRecord(self._default_activity_detail_type, show=False, importance=0, action=action) adr.add_value('text/plain', commit_msg) # Create activity record ar = ActivityRecord(self._default_activity_type, message=commit_msg, show=False, importance=255, linked_commit=commit.hexsha, tags=['readme']) ar.add_detail_object(adr) # Store ars = ActivityStore(self) ars.create_activity_record(ar)
def unlink_dataset_from_labbook(self, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> None: """Method to removed a dataset reference from a labbook Args: dataset_namespace: dataset_name: labbook: Returns: """ submodule_dir = os.path.join('.gigantum', 'datasets', dataset_namespace, dataset_name) call_subprocess(['git', 'rm', '-f', submodule_dir], cwd=labbook.root_dir) git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules', f"{dataset_namespace}&{dataset_name}") if os.path.exists(git_module_dir): shutil.rmtree(git_module_dir) absolute_submodule_dir = os.path.join(labbook.root_dir, '.gigantum', 'datasets', dataset_namespace, dataset_name) if os.path.exists(absolute_submodule_dir): shutil.rmtree(absolute_submodule_dir) labbook.git.add_all() commit = labbook.git.commit("removing submodule ref") # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.DELETE) adr.add_value( 'text/markdown', f"Unlinked Dataset `{dataset_namespace}/{dataset_name}` from project" ) ar = ActivityRecord( ActivityType.DATASET, message= f"Unlinked Dataset {dataset_namespace}/{dataset_name} from project.", linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def update_linked_dataset_reference(self, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> Dataset: """Method to update a linked dataset reference to the latest revision Args: dataset_namespace: owner (namespace) of the dateset dataset_name: name of the dataset labbook: labbook instance to which the dataset is linked Returns: none1 """ # Load dataset from inside Project directory submodule_dir = os.path.join(labbook.root_dir, '.gigantum', 'datasets', dataset_namespace, dataset_name) ds = self.load_dataset_from_directory(submodule_dir, author=labbook.author) ds.namespace = dataset_namespace # Update the submodule reference with the latest changes original_revision = ds.git.repo.head.object.hexsha ds.git.pull() revision = ds.git.repo.head.object.hexsha # If the submodule has changed, commit the changes. if original_revision != revision: labbook.git.add_all() commit = labbook.git.commit("Updating submodule ref") # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.DELETE) adr.add_value( 'text/markdown', f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to {revision}" ) msg = f"Updated Dataset `{dataset_namespace}/{dataset_name}` link to version {revision[0:8]}" ar = ActivityRecord(ActivityType.DATASET, message=msg, linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return ds
def _create_user_note(cls, lb, title, body, tags): store = ActivityStore(lb) adr = ActivityDetailRecord(ActivityDetailType.NOTE, show=True, importance=255) if body: adr.add_value('text/markdown', body) ar = ActivityRecord(ActivityType.NOTE, message=title, linked_commit="no-linked-commit", importance=255, tags=tags) ar.add_detail_object(adr) ar = store.create_activity_record(ar) return ar
def _update_branch_description(cls, lb: LabBook, description: str): # Update the description on branch creation lb.description = description lb.git.add(lb.config_path) commit = lb.git.commit('Updating description') adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False) adr.add_value('text/plain', description) ar = ActivityRecord(ActivityType.LABBOOK, message="Updated description of Project", linked_commit=commit.hexsha, tags=["labbook"], show=False) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar)
def __init__(self, user: str, owner: str, labbook_name: str, monitor_key: str, config_file: str = None, author_name: Optional[str] = None, author_email: Optional[str] = None) -> None: """Constructor requires info to load the lab book Args: user(str): current logged in user owner(str): owner of the lab book labbook_name(str): name of the lab book monitor_key(str): Unique key for the activity monitor in redis author_name(str): Name of the user starting this activity monitor author_email(str): Email of the user starting this activity monitor """ self.monitor_key = monitor_key # List of processor classes that will be invoked in order self.processors: List[ActivityProcessor] = [] # Populate GitAuthor instance if available if author_name: author: Optional[GitAuthor] = GitAuthor(name=author_name, email=author_email) else: author = None # Load Lab Book instance im = InventoryManager(config_file) self.labbook = im.load_labbook(user, owner, labbook_name, author=author) self.user = user self.owner = owner self.labbook_name = labbook_name # Create ActivityStore instance self.activity_store = ActivityStore(self.labbook) # A flag indicating if the activity record is OK to store self.can_store_activity_record = False
def sweep_uncommitted_changes(self, upload: bool = False, extra_msg: Optional[str] = None, show: bool = False) -> None: """ Sweep all changes into a commit, and create activity record. NOTE: This method MUST be called inside a lock. Args: upload(bool): Flag indicating if this was from a batch upload extra_msg(str): Optional string used to augment the activity message show(bool): Optional flag indicating if the result of this sweep is important enough to be shown in the feed Returns: """ result_status = self.git.status() if any([result_status[k] for k in result_status.keys()]): self.git.add_all() self.git.commit("Sweep of uncommitted changes") ar = ActivityRecord(self._default_activity_type, message="--overwritten--", show=show, importance=255, linked_commit=self.git.commit_hash, tags=['save']) if upload: ar.tags.append('upload') ar, newcnt, modcnt, delcnt = self.process_sweep_status( ar, result_status) nmsg = f"{newcnt} new file(s). " if newcnt > 0 else "" mmsg = f"{modcnt} modified file(s). " if modcnt > 0 else "" dmsg = f"{delcnt} deleted file(s). " if delcnt > 0 else "" message = f"{extra_msg or ''}" \ f"{'Uploaded ' if upload else ''}" \ f"{nmsg}{mmsg}{dmsg}" # This is used to handle if you try to delete an empty directory. This shouldn't technically happen, but if # a user manages to create an empty dir outside the client, we should handle it gracefully ar.message = "No detected changes" if not message else message ars = ActivityStore(self) ars.create_activity_record(ar) else: logger.info(f"{str(self)} no changes to sweep.")
def _record_remove_activity(cls, secret_store, filename, lb): """Make an activity record for the removal of the secret. """ lb.git.add(secret_store.secret_path) lb.git.commit("Removed entry from secrets registry.") commit = lb.git.commit_hash adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=True, action=ActivityAction.DELETE) adr.add_value('text/markdown', f"Removed entry for secrets file {filename}") ar = ActivityRecord( ActivityType.LABBOOK, message=f"Removed entry for secrets file {filename}", linked_commit=commit, tags=["labbook", "secrets"], show=True) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar)
def _record_insert_activity(cls, secret_store, filename, lb, mount_path): """Make an activity record for the insertion of the secret. """ lb.git.add(secret_store.secret_path) lb.git.commit("Updated secrets registry.") commit = lb.git.commit_hash adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=True, action=ActivityAction.CREATE) adr.add_value( 'text/markdown', f"Created new entry for secrets file {filename}" f"to map to {mount_path}") ar = ActivityRecord( ActivityType.LABBOOK, message=f"Created entry for secrets file {filename}", linked_commit=commit, tags=["labbook", "secrets"], show=True) ar.add_detail_object(adr) ars = ActivityStore(lb) ars.create_activity_record(ar)
def mutate_and_get_payload(cls, root, info, name, description, repository, base_id, revision, is_untracked=False, client_mutation_id=None): username = get_logged_in_username() inv_manager = InventoryManager() lb = inv_manager.create_labbook(username=username, owner=username, labbook_name=name, description=description, author=get_logged_in_author()) adr = ActivityDetailRecord(ActivityDetailType.LABBOOK, show=False, importance=0) adr.add_value('text/plain', f"Created new LabBook: {username}/{name}") # Create activity record ar = ActivityRecord(ActivityType.LABBOOK, message=f"Created new LabBook: {username}/{name}", show=True, importance=255, linked_commit=lb.git.commit_hash) ar.add_detail_object(adr) store = ActivityStore(lb) store.create_activity_record(ar) cm = ComponentManager(lb) cm.add_base(repository, base_id, revision) return CreateLabbook(labbook=Labbook(owner=username, name=lb.name))
def _make_move_activity_record(cls, labbook: LabBook, section: str, dst_abs_path: str, commit_msg: str) -> None: if os.path.isdir(dst_abs_path): labbook.git.add_all(dst_abs_path) else: labbook.git.add(dst_abs_path) commit = labbook.git.commit(commit_msg) activity_type, activity_detail_type, section_str = labbook.get_activity_type_from_section( section) adr = ActivityDetailRecord(activity_detail_type, show=False, importance=0, action=ActivityAction.EDIT) adr.add_value('text/markdown', commit_msg) ar = ActivityRecord(activity_type, message=commit_msg, linked_commit=commit.hexsha, show=True, importance=255, tags=['file-move']) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar)
def mock_config_with_activitystore(): """A pytest fixture that creates a ActivityStore (and labbook) and deletes directory after test""" # Create a temporary working directory conf_file, working_dir = _create_temp_work_dir() im = InventoryManager(conf_file) lb = im.create_labbook('default', 'default', 'labbook1', description="my first labbook", author=GitAuthor("default", "*****@*****.**")) store = ActivityStore(lb) yield store, lb # Remove the temp_dir shutil.rmtree(working_dir)
def link_dataset_to_labbook(self, dataset_url: str, dataset_namespace: str, dataset_name: str, labbook: LabBook) -> Dataset: """ Args: dataset_url: dataset_namespace: dataset_name: labbook: Returns: """ def _clean_submodule(): """Helper method to clean a submodule reference from a repository""" if os.path.exists(absolute_submodule_dir): logger.warning( f"Cleaning {relative_submodule_dir} from parent git repo") try: call_subprocess([ 'git', 'rm', '-f', '--cached', relative_submodule_dir ], cwd=labbook.root_dir) except subprocess.CalledProcessError: logger.warning( f"git rm on {relative_submodule_dir} failed. Continuing..." ) pass if os.path.exists(absolute_submodule_dir): logger.warning(f"Removing {absolute_submodule_dir} directory") shutil.rmtree(absolute_submodule_dir) if os.path.exists(git_module_dir): logger.warning(f"Removing {git_module_dir} directory") shutil.rmtree(git_module_dir) relative_submodule_dir = os.path.join('.gigantum', 'datasets', dataset_namespace, dataset_name) absolute_submodule_dir = os.path.join(labbook.root_dir, relative_submodule_dir) absolute_submodule_root = os.path.join(labbook.root_dir, '.gigantum', 'datasets', dataset_namespace) git_module_dir = os.path.join(labbook.root_dir, '.git', 'modules', f"{dataset_namespace}&{dataset_name}") if not os.path.exists(absolute_submodule_root): pathlib.Path(absolute_submodule_root).mkdir(parents=True, exist_ok=True) if os.path.exists(absolute_submodule_dir) and os.path.exists( git_module_dir): # Seem to be trying to link a dataset after a reset removed the dataset. Clean up first. _clean_submodule() try: # Link dataset via submodule reference call_subprocess([ 'git', 'submodule', 'add', '--name', f"{dataset_namespace}&{dataset_name}", dataset_url, relative_submodule_dir ], cwd=labbook.root_dir) except subprocess.CalledProcessError: logger.warning( "Failed to link dataset. Attempting to repair repository and link again." ) _clean_submodule() # Try to add again 1 more time, allowing a failure to raise an exception call_subprocess([ 'git', 'submodule', 'add', '--name', f"{dataset_namespace}&{dataset_name}", dataset_url, relative_submodule_dir ], cwd=labbook.root_dir) # If you got here, repair worked and link OK logger.info("Repository repair and linking retry successful.") labbook.git.add_all() commit = labbook.git.commit( f"adding submodule ref to link dataset {dataset_namespace}/{dataset_name}" ) labbook.git.update_submodules(init=True) ds = self.load_dataset_from_directory(absolute_submodule_dir) dataset_revision = ds.git.repo.head.commit.hexsha # Add Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, action=ActivityAction.CREATE) adr.add_value( 'text/markdown', f"Linked Dataset `{dataset_namespace}/{dataset_name}` to " f"project at revision `{dataset_revision}`") ar = ActivityRecord( ActivityType.DATASET, message= f"Linked Dataset {dataset_namespace}/{dataset_name} to project.", linked_commit=commit.hexsha, tags=["dataset"], show=True) ar.add_detail_object(adr) ars = ActivityStore(labbook) ars.create_activity_record(ar) return ds
def create_dataset(self, username: str, owner: str, dataset_name: str, storage_type: str, description: Optional[str] = None, author: Optional[GitAuthor] = None) -> Dataset: """Create a new Dataset in this Gigantum working directory. Args: username: Active username owner: Namespace in which to place this Dataset dataset_name: Name of the Dataset storage_type: String identifying the type of Dataset to instantiate description: Optional brief description of Dataset author: Optional Git Author Returns: Newly created LabBook instance """ dataset = Dataset(config_file=self.config_file, author=author, namespace=owner) if storage_type not in storage.SUPPORTED_STORAGE_BACKENDS: raise ValueError( f"Unsupported Dataset storage type: {storage_type}") try: build_info = Configuration(self.config_file).config['build_info'] except KeyError: logger.warning("Could not obtain build_info from config") build_info = None # Build data file contents dataset._data = { "schema": DATASET_CURRENT_SCHEMA, "id": uuid.uuid4().hex, "name": dataset_name, "storage_type": storage_type, "description": description or '', "created_on": datetime.datetime.utcnow().isoformat(), "build_info": build_info } dataset._validate_gigantum_data() logger.info("Creating new Dataset on disk for {}/{}/{}".format( username, owner, dataset_name)) # lock while creating initial directory with dataset.lock( lock_key=f"new_dataset_lock|{username}|{owner}|{dataset_name}" ): # Verify or Create user subdirectory # Make sure you expand a user dir string starting_dir = os.path.expanduser( dataset.client_config.config["git"]["working_directory"]) user_dir = os.path.join(starting_dir, username) if not os.path.isdir(user_dir): os.makedirs(user_dir) # Create owner dir - store LabBooks in working dir > logged in user > owner owner_dir = os.path.join(user_dir, owner) if not os.path.isdir(owner_dir): os.makedirs(owner_dir) # Create `datasets` subdir in the owner dir owner_dir = os.path.join(owner_dir, "datasets") else: owner_dir = os.path.join(owner_dir, "datasets") # Verify name not already in use if os.path.isdir(os.path.join(owner_dir, dataset_name)): raise ValueError( f"Dataset `{dataset_name}` already exists locally. Choose a new Dataset name" ) # Create Dataset subdirectory new_root_dir = os.path.join(owner_dir, dataset_name) os.makedirs(new_root_dir) dataset._set_root_dir(new_root_dir) # Init repository dataset.git.initialize() # Create Directory Structure dirs = [ 'manifest', 'metadata', '.gigantum', os.path.join('.gigantum', 'activity'), os.path.join('.gigantum', 'activity', 'log') ] # Create .gitignore default file shutil.copyfile( os.path.join(resource_filename('gtmcore', 'dataset'), 'gitignore.default'), os.path.join(dataset.root_dir, ".gitignore")) for d in dirs: p = os.path.join(dataset.root_dir, d, '.gitkeep') os.makedirs(os.path.dirname(p), exist_ok=True) with open(p, 'w') as gk: gk.write( "This file is necessary to keep this directory tracked by Git" " and archivable by compression tools. Do not delete or modify!" ) dataset._save_gigantum_data() # Create an empty storage.json file dataset.backend_config = {} # Commit dataset.git.add_all() # NOTE: this string is used to indicate there are no more activity records to get. Changing the string will # break activity paging. # TODO: Improve method for detecting the first activity record dataset.git.commit(f"Creating new empty Dataset: {dataset_name}") # Create Activity Record adr = ActivityDetailRecord(ActivityDetailType.DATASET, show=False, importance=0) adr.add_value('text/plain', f"Created new Dataset: {username}/{dataset_name}") ar = ActivityRecord( ActivityType.DATASET, message=f"Created new Dataset: {username}/{dataset_name}", show=True, importance=255, linked_commit=dataset.git.commit_hash) ar.add_detail_object(adr) store = ActivityStore(dataset) store.create_activity_record(ar) # Initialize file cache and link revision m = Manifest(dataset, username) m.link_revision() return dataset