def helper_resolve_visibility(labbook, info): # TODO: Future work will look up remote in LabBook data, allowing user to select remote. default_remote = labbook.client_config.config['git']['default_remote'] admin_service = None for remote in labbook.client_config.config['git']['remotes']: if default_remote == remote: admin_service = labbook.client_config.config['git']['remotes'][ remote]['admin_service'] break # Extract valid Bearer token if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) else: raise ValueError( "Authorization header not provided. Must have a valid session to query for collaborators" ) # Get collaborators from remote service mgr = GitLabManager(default_remote, admin_service, token) try: owner = InventoryManager().query_owner(labbook) d = mgr.repo_details(namespace=owner, repository_name=labbook.name) assert 'visibility' in d.keys( ), 'Visibility is not in repo details response keys' return d.get('visibility') except ValueError: return "local"
def mutate_and_get_payload(cls, root, info, owner, labbook_name, pull_only=False, override_method="abort", client_mutation_id=None): # Load LabBook username = get_logged_in_username() lb = InventoryManager().load_labbook(username, owner, labbook_name, author=get_logged_in_author()) # Extract valid Bearer token token = None if hasattr(info.context.headers, 'environ'): if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) if not token: raise ValueError( "Authorization header not provided. " "Must have a valid session to query for collaborators") default_remote = lb.client_config.config['git']['default_remote'] admin_service = None for remote in lb.client_config.config['git']['remotes']: if default_remote == remote: admin_service = lb.client_config.config['git']['remotes'][ remote]['admin_service'] break if not admin_service: raise ValueError('admin_service could not be found') # Configure git creds mgr = GitLabManager(default_remote, admin_service, access_token=token) mgr.configure_git_credentials(default_remote, username) override = MergeOverride(override_method) job_metadata = {'method': 'sync_labbook', 'labbook': lb.key} job_kwargs = { 'repository': lb, 'pull_only': pull_only, 'username': username, 'override': override } dispatcher = Dispatcher() job_key = dispatcher.dispatch_task(jobs.sync_repository, kwargs=job_kwargs, metadata=job_metadata) logger.info( f"Syncing LabBook {lb.root_dir} in background job with key {job_key.key_str}" ) return SyncLabbook(job_key=job_key.key_str)
def resolve_repository_name_is_available(self, info, name: str): """Resolver to check if a repository name is in use locally or remotely Args: info: name: desired name for the repository Returns: """ # Check if repository exists locally logged_in_username = get_logged_in_username() im = InventoryManager() if im.repository_exists(logged_in_username, logged_in_username, name): return False # Check if repository exists remotely remote_config = Configuration().get_remote_configuration() auth_service = None remote = None if remote_config: auth_service = remote_config.get('admin_service') remote = remote_config.get('git_remote') # Get collaborators from remote service mgr = GitLabManager(remote, auth_service, flask.g.access_token) if mgr.repository_exists(logged_in_username, name): return False # If you get here the name is available return True
def _configure_git(cls, lb, info) -> GitLabManager: # Extract valid Bearer token # TODO - This code is duplicated all over the place, must be refactored. token = None if hasattr(info.context.headers, 'environ'): if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token(info.context.headers.environ["HTTP_AUTHORIZATION"]) if not token: raise ValueError("Authorization header not provided. " "Must have a valid session to query for collaborators") default_remote = lb.client_config.config['git']['default_remote'] admin_service = None for remote in lb.client_config.config['git']['remotes']: if default_remote == remote: admin_service = lb.client_config.config['git']['remotes'][remote]['admin_service'] break if not admin_service: raise ValueError('admin_service could not be found') # Configure git creds mgr = GitLabManager(default_remote, admin_service, access_token=token) mgr.configure_git_credentials(default_remote, get_logged_in_username()) return mgr
def _fetch_collaborators(self, labbook, info): """Helper method to fetch this labbook's collaborators Args: info: The graphene info object for this requests """ # TODO: Future work will look up remote in LabBook data, allowing user to select remote. default_remote = labbook.client_config.config['git']['default_remote'] admin_service = None for remote in labbook.client_config.config['git']['remotes']: if default_remote == remote: admin_service = labbook.client_config.config['git']['remotes'][remote]['admin_service'] break # Extract valid Bearer token if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token(info.context.headers.environ["HTTP_AUTHORIZATION"]) else: raise ValueError("Authorization header not provided. Must have a valid session to query for collaborators") # Get collaborators from remote service mgr = GitLabManager(default_remote, admin_service, token) try: self._collaborators = [Collaborator(owner=self.owner, name=self.name, collaborator_username=c[1], permission=ProjectPermissions(c[2]).name) for c in mgr.get_collaborators(self.owner, self.name)] except ValueError: # If ValueError Raised, assume repo doesn't exist yet self._collaborators = []
def mutate_and_get_payload(cls, root, info, owner, labbook_name, username, permissions, client_mutation_id=None): #TODO(billvb/dmk) - Here "username" refers to the intended recipient username. # it should probably be renamed here and in the frontend to "collaboratorUsername" logged_in_username = get_logged_in_username() lb = InventoryManager().load_labbook(logged_in_username, owner, labbook_name, author=get_logged_in_author()) # TODO: Future work will look up remote in LabBook data, allowing user to select remote. default_remote = lb.client_config.config['git']['default_remote'] admin_service = None for remote in lb.client_config.config['git']['remotes']: if default_remote == remote: admin_service = lb.client_config.config['git']['remotes'][ remote]['admin_service'] break # Extract valid Bearer token if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) else: raise ValueError( "Authorization header not provided. " "Must have a valid session to query for collaborators") if permissions == 'readonly': perm = ProjectPermissions.READ_ONLY elif permissions == 'readwrite': perm = ProjectPermissions.READ_WRITE elif permissions == 'owner': perm = ProjectPermissions.OWNER else: raise ValueError(f"Unknown permission set: {permissions}") mgr = GitLabManager(default_remote, admin_service, token) existing_collabs = mgr.get_collaborators(owner, labbook_name) if username not in [n[1] for n in existing_collabs]: logger.info(f"Adding user {username} to {owner}/{labbook_name}" f"with permission {perm}") mgr.add_collaborator(owner, labbook_name, username, perm) else: logger.warning(f"Changing permission of {username} on" f"{owner}/{labbook_name} to {perm}") mgr.delete_collaborator(owner, labbook_name, username) mgr.add_collaborator(owner, labbook_name, username, perm) create_data = {"owner": owner, "name": labbook_name} return AddLabbookCollaborator(updated_labbook=Labbook(**create_data))
def mutate_and_get_payload(cls, root, info, owner, labbook_name, remote_url, client_mutation_id=None): username = get_logged_in_username() logger.info(f"Importing remote labbook from {remote_url}") lb = LabBook(author=get_logged_in_author()) default_remote = lb.client_config.config['git']['default_remote'] admin_service = None for remote in lb.client_config.config['git']['remotes']: if default_remote == remote: admin_service = lb.client_config.config['git']['remotes'][ remote]['admin_service'] break # Extract valid Bearer token if hasattr(info.context, 'headers' ) and "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) else: raise ValueError( "Authorization header not provided. Must have a valid session to query for collaborators" ) gl_mgr = GitLabManager(default_remote, admin_service=admin_service, access_token=token) gl_mgr.configure_git_credentials(default_remote, username) job_metadata = {'method': 'import_labbook_from_remote'} job_kwargs = {'remote_url': remote_url, 'username': username} dispatcher = Dispatcher() job_key = dispatcher.dispatch_task(jobs.import_labbook_from_remote, metadata=job_metadata, kwargs=job_kwargs) logger.info( f"Dispatched import_labbook_from_remote({remote_url}) to Job {job_key}" ) return ImportRemoteLabbook(job_key=job_key.key_str)
def mutate_and_get_payload(cls, root, info, owner, dataset_name, remote_url, client_mutation_id=None): username = get_logged_in_username() logger.info(f"Importing remote dataset from {remote_url}") ds = Dataset(author=get_logged_in_author()) default_remote = ds.client_config.config['git']['default_remote'] admin_service = None for remote in ds.client_config.config['git']['remotes']: if default_remote == remote: admin_service = ds.client_config.config['git']['remotes'][ remote]['admin_service'] break # Extract valid Bearer token if hasattr(info.context, 'headers' ) and "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) else: raise ValueError( "Authorization header not provided. Must have a valid session to query for collaborators" ) gl_mgr = GitLabManager(default_remote, admin_service=admin_service, access_token=token) gl_mgr.configure_git_credentials(default_remote, username) wf = DatasetWorkflow.import_from_remote(remote_url, username=username) ds = wf.dataset import_owner = InventoryManager().query_owner(ds) # TODO: Fix cursor implementation, this currently doesn't make sense cursor = base64.b64encode(f"{0}".encode('utf-8')) dsedge = DatasetConnection.Edge(node=DatasetObject(owner=import_owner, name=ds.name), cursor=cursor) return ImportRemoteDataset(new_dataset_edge=dsedge)
def create_remote_gitlab_repo(repository: Repository, username: str, visibility: str, access_token: Optional[str] = None) -> None: """Create a new repository in GitLab, Note: It may make more sense to factor this out later on. """ default_remote = repository.client_config.config['git']['default_remote'] admin_service = None for remote in repository.client_config.config['git']['remotes']: if default_remote == remote: admin_service = repository.client_config.config['git']['remotes'][ remote]['admin_service'] break if not admin_service: raise ValueError('admin_service could not be found') try: # Add collaborator to remote service mgr = GitLabManager(default_remote, admin_service, access_token=access_token or 'invalid') mgr.configure_git_credentials(default_remote, username) mgr.create_labbook( namespace=InventoryManager().query_owner(repository), labbook_name=repository.name, visibility=visibility) repository.add_remote( "origin", f"https://{default_remote}/{username}/{repository.name}.git") except Exception as e: raise GitLabRemoteError(e)
def mutate_and_get_payload(cls, root, info, owner, dataset_name, username, client_mutation_id=None): logged_in_username = get_logged_in_username() ds = InventoryManager().load_dataset(logged_in_username, owner, dataset_name, author=get_logged_in_author()) # TODO: Future work will look up remote in LabBook data, allowing user to select remote. default_remote = ds.client_config.config['git']['default_remote'] admin_service = None for remote in ds.client_config.config['git']['remotes']: if default_remote == remote: admin_service = ds.client_config.config['git']['remotes'][ remote]['admin_service'] break # Extract valid Bearer token if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) else: raise ValueError( "Authorization header not provided. Must have a valid session to query for collaborators" ) # Add collaborator to remote service mgr = GitLabManager(default_remote, admin_service, token) mgr.delete_collaborator(owner, dataset_name, username) create_data = {"owner": owner, "name": dataset_name} return DeleteDatasetCollaborator(updated_dataset=DatasetObject( **create_data))
def resolve_visibility(self, info): app_config = Configuration().config default_remote = app_config['git']['default_remote'] admin_service = None for remote in Configuration().config['git']['remotes']: if default_remote == remote: admin_service = app_config['git']['remotes'][remote]['admin_service'] break # Extract valid Bearer token if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token(info.context.headers.environ["HTTP_AUTHORIZATION"]) else: raise ValueError("Authorization header not provided. Must have a valid session to query for collaborators") # Get collaborators from remote service mgr = GitLabManager(default_remote, admin_service, token) try: d = mgr.repo_details(namespace=self.owner, repository_name=self.name) return d.get('visibility') except ValueError: return "unknown"
def mutate_and_get_payload(cls, root, info, owner, dataset_name, visibility, client_mutation_id=None): # Load Dataset username = get_logged_in_username() ds = InventoryManager().load_dataset(username, owner, dataset_name, author=get_logged_in_author()) # Extract valid Bearer token token = None if hasattr(info.context.headers, 'environ'): if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) if not token: raise ValueError( "Authorization header not provided. Must have a valid session to query for collaborators" ) default_remote = ds.client_config.config['git']['default_remote'] admin_service = None for remote in ds.client_config.config['git']['remotes']: if default_remote == remote: admin_service = ds.client_config.config['git']['remotes'][ remote]['admin_service'] break if not admin_service: raise ValueError('admin_service could not be found') # Configure git creds mgr = GitLabManager(default_remote, admin_service, access_token=token) mgr.configure_git_credentials(default_remote, username) if visibility not in ['public', 'private']: raise ValueError( f'Visibility must be either "public" or "private";' f'("{visibility}" invalid)') with ds.lock(): mgr.set_visibility(namespace=owner, repository_name=dataset_name, visibility=visibility) cursor = base64.b64encode(f"{0}".encode('utf-8')) dsedge = DatasetConnection.Edge(node=DatasetObject(owner=owner, name=dataset_name), cursor=cursor) return SetDatasetVisibility(new_dataset_edge=dsedge)
def mutate_and_get_payload(cls, root, info, owner, labbook_name, confirm, client_mutation_id=None): if confirm is True: # Load config data configuration = Configuration().config # Extract valid Bearer token token = None if hasattr(info.context.headers, 'environ'): if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token( info.context.headers.environ["HTTP_AUTHORIZATION"]) if not token: raise ValueError( "Authorization header not provided. Cannot perform remote delete operation." ) # Get remote server configuration default_remote = configuration['git']['default_remote'] admin_service = None for remote in configuration['git']['remotes']: if default_remote == remote: admin_service = configuration['git']['remotes'][remote][ 'admin_service'] index_service = configuration['git']['remotes'][remote][ 'index_service'] break if not admin_service: raise ValueError('admin_service could not be found') # Perform delete operation mgr = GitLabManager(default_remote, admin_service, access_token=token) mgr.remove_repository(owner, labbook_name) logger.info( f"Deleted {owner}/{labbook_name} from the remote repository {default_remote}" ) # Call Index service to remove project from cloud index and search # Don't raise an exception if the index delete fails, since this can be handled relatively gracefully # for now, but do return success=false success = True access_token = flask.g.get('access_token', None) id_token = flask.g.get('id_token', None) repo_id = mgr.get_repository_id(owner, labbook_name) response = requests.delete( f"https://{index_service}/index/{repo_id}", headers={ "Authorization": f"Bearer {access_token}", "Identity": id_token }, timeout=10) if response.status_code != 204: logger.error(f"Failed to remove project from cloud index. " f"Status Code: {response.status_code}") logger.error(response.json()) else: logger.info( f"Deleted remote repository {owner}/{labbook_name} from cloud index" ) # Remove locally any references to that cloud repo that's just been deleted. try: username = get_logged_in_username() lb = InventoryManager().load_labbook( username, owner, labbook_name, author=get_logged_in_author()) lb.remove_remote() lb.remove_lfs_remotes() except GigantumException as e: logger.warning(e) return DeleteLabbook(success=True) else: logger.info( f"Dry run deleting {labbook_name} from remote repository -- not deleted." ) return DeleteLabbook(success=False)
def check_and_import_dataset(logged_in_username: str, dataset_owner: str, dataset_name: str, remote_url: str, access_token: Optional[str] = None, config_file: Optional[str] = None) -> None: """Job to check if a dataset exists in the user's working directory, and if not import it. This is primarily used when importing, syncing, or switching branches on a project with linked datasets Args: logged_in_username: username for the currently logged in user dataset_owner: Owner of the labbook if this dataset is linked dataset_name: Name of the labbook if this dataset is linked remote_url: URL of the dataset to import if needed access_token: The current user's access token, needed to initialize git credentials in certain situations config_file: config file (used for test mocking) Returns: None """ logger = LMLogger.get_logger() p = os.getpid() try: logger.info( f"(Job {p}) Starting check_and_import_dataset(logged_in_username={logged_in_username}," f"dataset_owner={dataset_owner}, dataset_name={dataset_name}") im = InventoryManager(config_file=config_file) try: # Check for dataset already existing in the user's working directory im.load_dataset(logged_in_username, dataset_owner, dataset_name) logger.info( f"{logged_in_username}/{dataset_owner}/{dataset_name} exists. Skipping auto-import." ) return except InventoryException: # Dataset not found, import it logger.info( f"{logged_in_username}/{dataset_owner}/{dataset_name} not found. " f"Auto-importing remote dataset from {remote_url}") config_obj = Configuration(config_file=config_file) if access_token: # If the access token is set, git creds should be configured remote_parts = urlsplit(remote_url) if remote_parts.netloc: remote_target = f"{remote_parts.scheme}://{remote_parts.netloc}/" else: remote_target = remote_parts.path admin_service = None for remote in config_obj.config['git']['remotes']: if remote == remote_target: admin_service = config_obj.config['git']['remotes'][ remote]['admin_service'] break if not admin_service: raise ValueError( f"Failed to configure admin service URL based on target remote: {remote_target}" ) gl_mgr = GitLabManager(remote_target, admin_service=admin_service, access_token=access_token) gl_mgr.configure_git_credentials(remote_target, logged_in_username) gitworkflows_utils.clone_repo( remote_url=remote_url, username=logged_in_username, owner=dataset_owner, load_repository=im.load_dataset_from_directory, put_repository=im.put_dataset) logger.info( f"{logged_in_username}/{dataset_owner}/{dataset_name} auto-imported successfully" ) except Exception as err: logger.error(f"(Job {p}) Error in check_and_import_dataset job") logger.exception(err) raise
def mutate_and_get_payload(cls, root, info, owner, dataset_name, local=False, remote=False, client_mutation_id=None): logged_in_user = get_logged_in_username() local_deleted = False remote_deleted = False if remote: logger.info(f"Deleting remote Dataset {owner}/{dataset_name}") # Extract valid Bearer token access_token = flask.g.get('access_token', None) id_token = flask.g.get('id_token', None) if not access_token or not id_token: raise ValueError( "Deleting a remote Dataset requires a valid session.") try: ds = InventoryManager().load_dataset( logged_in_user, owner, dataset_name, author=get_logged_in_author()) except InventoryException: raise ValueError( "A dataset must exist locally to delete it in the remote.") # Delete the dataset's files if supported if ds.is_managed(): ds.backend.set_default_configuration(logged_in_user, access_token, id_token) ds.backend.delete_contents(ds) # Get remote server configuration config = Configuration() remote_config = config.get_remote_configuration() # Delete the repository mgr = GitLabManager(remote_config['git_remote'], remote_config['admin_service'], access_token=access_token) mgr.remove_repository(owner, dataset_name) logger.info(f"Deleted {owner}/{dataset_name} repository from the" f" remote repository {remote_config['git_remote']}") # Call Index service to remove project from cloud index and search # Don't raise an exception if the index delete fails, since this can be handled relatively gracefully repo_id = mgr.get_repository_id(owner, dataset_name) response = requests.delete( f"https://{remote_config['index_service']}/index/{repo_id}", headers={ "Authorization": f"Bearer {access_token}", "Identity": id_token }, timeout=30) if response.status_code != 204: # Soft failure, still continue logger.error( f"Failed to remove {owner}/{dataset_name} from cloud index. " f"Status Code: {response.status_code}") logger.error(response.json()) else: logger.info( f"Deleted remote repository {owner}/{dataset_name} from cloud index" ) # Remove locally any references to that cloud repo that's just been deleted. try: ds.remove_remote() except GigantumException as e: logger.warning(e) remote_deleted = True if local: logger.info(f"Deleting local Dataset {owner}/{dataset_name}") # Delete the dataset dataset_delete_job = InventoryManager().delete_dataset( logged_in_user, owner, dataset_name) local_deleted = True # Schedule Job to clear file cache if dataset is no longer in use job_metadata = {'method': 'clean_dataset_file_cache'} job_kwargs = { 'logged_in_username': logged_in_user, 'dataset_owner': dataset_delete_job.namespace, 'dataset_name': dataset_delete_job.name, 'cache_location': dataset_delete_job.cache_root } dispatcher = Dispatcher() job_key = dispatcher.dispatch_task(jobs.clean_dataset_file_cache, metadata=job_metadata, kwargs=job_kwargs) logger.info( f"Dispatched clean_dataset_file_cache({owner}/{dataset_name}) to Job {job_key}" ) return DeleteDataset(local_deleted=local_deleted, remote_deleted=remote_deleted)
def mutate_and_get_payload(cls, root, info, labbook_owner, labbook_name, dataset_owner, dataset_name, action, dataset_url=None, client_mutation_id=None): logged_in_username = get_logged_in_username() im = InventoryManager() lb = im.load_labbook(logged_in_username, labbook_owner, labbook_name, author=get_logged_in_author()) with lb.lock(): if action == 'link': if dataset_url: remote_domain = cls._get_remote_domain( dataset_url, dataset_owner, dataset_name) if remote_domain: # Make sure git creds are configured for the remote admin_service = None for remote in lb.client_config.config['git'][ 'remotes']: if remote_domain == remote: admin_service = lb.client_config.config['git'][ 'remotes'][remote]['admin_service'] break if "HTTP_AUTHORIZATION" in info.context.headers.environ: token = parse_token(info.context.headers. environ["HTTP_AUTHORIZATION"]) else: raise ValueError( "Authorization header not provided." " Must have a valid session to query for collaborators" ) mgr = GitLabManager(remote_domain, admin_service, token) mgr.configure_git_credentials(remote_domain, logged_in_username) else: # Link to local dataset ds = im.load_dataset(logged_in_username, dataset_owner, dataset_name) dataset_url = f"{ds.root_dir}/.git" # Link the dataset to the labbook ds = im.link_dataset_to_labbook(dataset_url, dataset_owner, dataset_name, lb) ds.namespace = dataset_owner # Preload the dataloader info.context.dataset_loader.prime( f"{get_logged_in_username()}&{dataset_owner}&{dataset_name}", ds) # Relink the revision m = Manifest(ds, logged_in_username) m.link_revision() elif action == 'unlink': im.unlink_dataset_from_labbook(dataset_owner, dataset_name, lb) elif action == 'update': ds = im.update_linked_dataset_reference( dataset_owner, dataset_name, lb) m = Manifest(ds, logged_in_username) m.force_reload() info.context.dataset_loader.prime( f"{get_logged_in_username()}&{dataset_owner}&{dataset_name}", ds) else: raise ValueError( "Unsupported action. Use `link`, `unlink`, or `update`") info.context.labbook_loader.prime( f"{get_logged_in_username()}&{labbook_owner}&{labbook_name}", lb) edge = LabbookConnection.Edge(node=Labbook(owner=labbook_owner, name=labbook_name), cursor=base64.b64encode( f"{0}".encode('utf-8'))) return ModifyDatasetLink(new_labbook_edge=edge)
def test_repository_id(self): """test the repository_id property""" assert GitLabManager.get_repository_id("tester", "test-lb-1") == "tester%2Ftest-lb-1"
def gitlab_mngr_fixture(): """A pytest fixture that returns a GitLabRepositoryManager instance""" yield GitLabManager("repo.gigantum.io", "usersrv.gigantum.io", "fakeaccesstoken")