def live_client_destructive(): storage = ConfigParserTokenStorage(filename='integ_testing_destruct.cfg') client = NativeClient(client_id='7414f0b4-7d05-4bb6-bb00-076fa3f17cf5', token_storage=storage, default_scopes=['openid']) yield client client.logout() os.unlink('integ_testing_destruct.cfg')
def test_json_token_storage(mock_tokens, mock_revoke, monkeypatch): cli = NativeClient(client_id=str(uuid.uuid4()), token_storage=JSONTokenStorage()) # Mock actual call to open(). Catch the data 'written' and use it in the # load function. This is a cheap and easy (and hacky) way to test that the # stuff we get read was the same as the stuff written in. monkeypatch.setattr(os.path, 'exists', lambda x: True) mo = mock_open() with patch(BUILTIN_OPEN, mo): cli.save_tokens(mock_tokens) written = ''.join([c[1][0] for c in mo().write.mock_calls]) with patch(BUILTIN_OPEN, mock_open(read_data=written)): tokens = cli.load_tokens() assert tokens == MOCK_TOKEN_SET mock_remove = Mock() with patch('os.remove', mock_remove): cli.logout() assert mock_remove.called
class FuncXClient(throttling.ThrottledBaseClient): """Main class for interacting with the funcX service Holds helper operations for performing common tasks with the funcX service. """ TOKEN_DIR = os.path.expanduser("~/.funcx/credentials") TOKEN_FILENAME = 'funcx_sdk_tokens.json' CLIENT_ID = '4cf29807-cf21-49ec-9443-ff9a3fb9f81c' def __init__(self, http_timeout=None, funcx_home=os.path.join('~', '.funcx'), force_login=False, fx_authorizer=None, funcx_service_address='https://api.funcx.org/v1', **kwargs): """ Initialize the client Parameters ---------- http_timeout: int Timeout for any call to service in seconds. Default is no timeout force_login: bool Whether to force a login to get new credentials. fx_authorizer:class:`GlobusAuthorizer <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with funcX. Default: ``None``, will be created. funcx_service_address: str The address of the funcX web service to communicate with. Default: https://api.funcx.org/v1 Keyword arguments are the same as for BaseClient. """ self.func_table = {} self.ep_registration_path = 'register_endpoint_2' self.funcx_home = os.path.expanduser(funcx_home) if not os.path.exists(self.TOKEN_DIR): os.makedirs(self.TOKEN_DIR) tokens_filename = os.path.join(self.TOKEN_DIR, self.TOKEN_FILENAME) self.native_client = NativeClient( client_id=self.CLIENT_ID, app_name="FuncX SDK", token_storage=JSONTokenStorage(tokens_filename)) # TODO: if fx_authorizer is given, we still need to get an authorizer for Search fx_scope = "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all" search_scope = "urn:globus:auth:scope:search.api.globus.org:all" scopes = [fx_scope, search_scope, "openid"] search_authorizer = None if not fx_authorizer: self.native_client.login( requested_scopes=scopes, no_local_server=kwargs.get("no_local_server", True), no_browser=kwargs.get("no_browser", True), refresh_tokens=kwargs.get("refresh_tokens", True), force=force_login) all_authorizers = self.native_client.get_authorizers_by_scope( requested_scopes=scopes) fx_authorizer = all_authorizers[fx_scope] search_authorizer = all_authorizers[search_scope] openid_authorizer = all_authorizers["openid"] super(FuncXClient, self).__init__("funcX", environment='funcx', authorizer=fx_authorizer, http_timeout=http_timeout, base_url=funcx_service_address, **kwargs) self.fx_serializer = FuncXSerializer() authclient = AuthClient(authorizer=openid_authorizer) user_info = authclient.oauth2_userinfo() self.searcher = SearchHelper(authorizer=search_authorizer, owner_uuid=user_info['sub']) self.funcx_service_address = funcx_service_address def version_check(self): """Check this client version meets the service's minimum supported version. """ resp = self.get("version", params={"service": "all"}) versions = resp.data if "min_ep_version" not in versions: raise VersionMismatch( "Failed to retrieve version information from funcX service.") min_ep_version = versions['min_ep_version'] if ENDPOINT_VERSION is None: raise VersionMismatch( "You do not have the funcx endpoint installed. You can use 'pip install funcx-endpoint'." ) if ENDPOINT_VERSION < min_ep_version: raise VersionMismatch( f"Your version={ENDPOINT_VERSION} is lower than the " f"minimum version for an endpoint: {min_ep_version}. Please update." ) def logout(self): """Remove credentials from your local system """ self.native_client.logout() def update_table(self, return_msg, task_id): """ Parses the return message from the service and updates the internal func_tables Parameters ---------- return_msg : str Return message received from the funcx service task_id : str task id string """ if isinstance(return_msg, str): r_dict = json.loads(return_msg) else: r_dict = return_msg status = {'pending': True} if 'result' in r_dict: try: r_obj = self.fx_serializer.deserialize(r_dict['result']) completion_t = r_dict['completion_t'] except Exception: raise SerializationError("Result Object Deserialization") else: status.update({ 'pending': False, 'result': r_obj, 'completion_t': completion_t }) self.func_table[task_id] = status elif 'exception' in r_dict: try: r_exception = self.fx_serializer.deserialize( r_dict['exception']) completion_t = r_dict['completion_t'] logger.info(f"Exception : {r_exception}") except Exception: raise SerializationError( "Task's exception object deserialization") else: status.update({ 'pending': False, 'exception': r_exception, 'completion_t': completion_t }) self.func_table[task_id] = status return status def get_task(self, task_id): """Get a funcX task. Parameters ---------- task_id : str UUID of the task Returns ------- dict Task block containing "status" key. """ if task_id in self.func_table: return self.func_table[task_id] r = self.get("tasks/{task_id}".format(task_id=task_id)) logger.debug("Response string : {}".format(r)) try: rets = self.update_table(r.text, task_id) except Exception as e: raise e return rets def get_result(self, task_id): """ Get the result of a funcX task Parameters ---------- task_id: str UUID of the task Returns ------- Result obj: If task completed Raises ------ Exception obj: Exception due to which the task failed """ task = self.get_task(task_id) if task['pending'] is True: raise Exception("Task pending") else: if 'result' in task: return task['result'] else: logger.warning("We have an exception : {}".format( task['exception'])) task['exception'].reraise() def get_batch_status(self, task_id_list): """ Request status for a batch of task_ids """ assert isinstance(task_id_list, list), "get_batch_status expects a list of task ids" pending_task_ids = [ t for t in task_id_list if t not in self.func_table ] results = {} if pending_task_ids: payload = {'task_ids': pending_task_ids} r = self.post("/batch_status", json_body=payload) logger.debug("Response string : {}".format(r)) pending_task_ids = set(pending_task_ids) for task_id in task_id_list: if task_id in pending_task_ids: try: data = r['results'][task_id] rets = self.update_table(data, task_id) results[task_id] = rets except KeyError: logger.debug( "Task {} info was not available in the batch status") except Exception: logger.exception( "Failure while unpacking results fom get_batch_status") else: results[task_id] = self.func_table[task_id] return results def get_batch_result(self, task_id_list): """ Request results for a batch of task_ids """ pass def run(self, *args, endpoint_id=None, function_id=None, **kwargs): """Initiate an invocation Parameters ---------- *args : Any Args as specified by the function signature endpoint_id : uuid str Endpoint UUID string. Required function_id : uuid str Function UUID string. Required asynchronous : bool Whether or not to run the function asynchronously Returns ------- task_id : str UUID string that identifies the task """ assert endpoint_id is not None, "endpoint_id key-word argument must be set" assert function_id is not None, "function_id key-word argument must be set" batch = self.create_batch() batch.add(*args, endpoint_id=endpoint_id, function_id=function_id, **kwargs) r = self.batch_run(batch) """ Create a future to deal with the result funcx_future = FuncXFuture(self, task_id, async_poll) if not asynchronous: return funcx_future.result() # Return the result return funcx_future """ return r[0] def create_batch(self): """ Create a Batch instance to handle batch submission in funcX Parameters ---------- Returns ------- Batch instance Status block containing "status" key. """ batch = Batch() return batch def batch_run(self, batch): """Initiate a batch of tasks to funcX Parameters ---------- batch: a Batch object Returns ------- task_ids : a list of UUID strings that identify the tasks """ servable_path = 'submit' assert isinstance(batch, Batch), "Requires a Batch object as input" assert len(batch.tasks) > 0, "Requires a non-empty batch" data = batch.prepare() # Send the data to funcX r = self.post(servable_path, json_body=data) if r.http_status != 200: raise HTTPError(r) if r.get("status", "Failure") == "Failure": raise MalformedResponse("FuncX Request failed: {}".format( r.get("reason", "Unknown"))) return r['task_uuids'] def map_run(self, *args, endpoint_id=None, function_id=None, asynchronous=False, **kwargs): """Initiate an invocation Parameters ---------- *args : Any Args as specified by the function signature endpoint_id : uuid str Endpoint UUID string. Required function_id : uuid str Function UUID string. Required asynchronous : bool Whether or not to run the function asynchronously Returns ------- task_id : str UUID string that identifies the task """ servable_path = 'submit_batch' assert endpoint_id is not None, "endpoint_id key-word argument must be set" assert function_id is not None, "function_id key-word argument must be set" ser_kwargs = self.fx_serializer.serialize(kwargs) batch_payload = [] iterator = args[0] for arg in iterator: ser_args = self.fx_serializer.serialize((arg, )) payload = self.fx_serializer.pack_buffers([ser_args, ser_kwargs]) batch_payload.append(payload) data = { 'endpoints': [endpoint_id], 'func': function_id, 'payload': batch_payload, 'is_async': asynchronous } # Send the data to funcX r = self.post(servable_path, json_body=data) if r.http_status != 200: raise Exception(r) if r.get("status", "Failure") == "Failure": raise MalformedResponse("FuncX Request failed: {}".format( r.get("reason", "Unknown"))) return r['task_uuids'] def register_endpoint(self, name, endpoint_uuid, metadata=None, endpoint_version=None): """Register an endpoint with the funcX service. Parameters ---------- name : str Name of the endpoint endpoint_uuid : str The uuid of the endpoint metadata : dict endpoint metadata, see default_config example endpoint_version: str Version string to be passed to the webService as a compatibility check Returns ------- A dict {'endopoint_id' : <>, 'address' : <>, 'client_ports': <>} """ self.version_check() data = { "endpoint_name": name, "endpoint_uuid": endpoint_uuid, "version": endpoint_version } if metadata: data['meta'] = metadata r = self.post(self.ep_registration_path, json_body=data) if r.http_status != 200: raise HTTPError(r) # Return the result return r.data def get_containers(self, name, description=None): """Register a DLHub endpoint with the funcX service and get the containers to launch. Parameters ---------- name : str Name of the endpoint description : str Description of the endpoint Returns ------- int The port to connect to and a list of containers """ registration_path = 'get_containers' data = {"endpoint_name": name, "description": description} r = self.post(registration_path, json_body=data) if r.http_status != 200: raise HTTPError(r) # Return the result return r.data['endpoint_uuid'], r.data['endpoint_containers'] def get_container(self, container_uuid, container_type): """Get the details of a container for staging it locally. Parameters ---------- container_uuid : str UUID of the container in question container_type : str The type of containers that will be used (Singularity, Shifter, Docker) Returns ------- dict The details of the containers to deploy """ container_path = f'containers/{container_uuid}/{container_type}' r = self.get(container_path) if r.http_status != 200: raise HTTPError(r) # Return the result return r.data['container'] def get_endpoint_status(self, endpoint_uuid): """Get the status reports for an endpoint. Parameters ---------- endpoint_uuid : str UUID of the endpoint in question Returns ------- dict The details of the endpoint's stats """ stats_path = f'endpoints/{endpoint_uuid}/status' r = self.get(stats_path) if r.http_status != 200: raise HTTPError(r) # Return the result return r.data def register_function(self, function, function_name=None, container_uuid=None, description=None, public=False, group=None, searchable=True): """Register a function code with the funcX service. Parameters ---------- function : Python Function The function to be registered for remote execution function_name : str The entry point (function name) of the function. Default: None container_uuid : str Container UUID from registration with funcX description : str Description of the file public : bool Whether or not the function is publicly accessible. Default = False group : str A globus group uuid to share this function with searchable : bool If true, the function will be indexed into globus search with the appropriate permissions Returns ------- function uuid : str UUID identifier for the registered function """ registration_path = 'register_function' source_code = "" try: source_code = getsource(function) except OSError: logger.error( "Failed to find source code during function registration.") serialized_fn = self.fx_serializer.serialize(function) packed_code = self.fx_serializer.pack_buffers([serialized_fn]) data = { "function_name": function.__name__, "function_code": packed_code, "function_source": source_code, "container_uuid": container_uuid, "entry_point": function_name if function_name else function.__name__, "description": description, "public": public, "group": group, "searchable": searchable } logger.info("Registering function : {}".format(data)) r = self.post(registration_path, json_body=data) if r.http_status != 200: raise HTTPError(r) func_uuid = r.data['function_uuid'] # Return the result return func_uuid def update_function(self, func_uuid, function): pass def search_function(self, q, offset=0, limit=10, advanced=False): """Search for function via the funcX service Parameters ---------- q : str free-form query string offset : int offset into total results limit : int max number of results to return advanced : bool allows elastic-search like syntax in query string Returns ------- FunctionSearchResults """ return self.searcher.search_function(q, offset=offset, limit=limit, advanced=advanced) def search_endpoint(self, q, scope='all', owner_id=None): """ Parameters ---------- q scope : str Can be one of {'all', 'my-endpoints', 'shared-with-me'} owner_id should be urn like f"urn:globus:auth:identity:{owner_uuid}" Returns ------- """ return self.searcher.search_endpoint(q, scope=scope, owner_id=owner_id) def register_container(self, location, container_type, name='', description=''): """Register a container with the funcX service. Parameters ---------- location : str The location of the container (e.g., its docker url). Required container_type : str The type of containers that will be used (Singularity, Shifter, Docker). Required name : str A name for the container. Default = '' description : str A description to associate with the container. Default = '' Returns ------- str The id of the container """ container_path = 'containers' payload = { 'name': name, 'location': location, 'description': description, 'type': container_type } r = self.post(container_path, json_body=payload) if r.http_status != 200: raise HTTPError(r) # Return the result return r.data['container_id'] def add_to_whitelist(self, endpoint_id, function_ids): """Adds the function to the endpoint's whitelist Parameters ---------- endpoint_id : str The uuid of the endpoint function_ids : list A list of function id's to be whitelisted Returns ------- json The response of the request """ req_path = f'endpoints/{endpoint_id}/whitelist' if not isinstance(function_ids, list): function_ids = [function_ids] payload = {'func': function_ids} r = self.post(req_path, json_body=payload) if r.http_status != 200: raise HTTPError(r) # Return the result return r def get_whitelist(self, endpoint_id): """List the endpoint's whitelist Parameters ---------- endpoint_id : str The uuid of the endpoint Returns ------- json The response of the request """ req_path = f'endpoints/{endpoint_id}/whitelist' r = self.get(req_path) if r.http_status != 200: raise HTTPError(r) # Return the result return r def delete_from_whitelist(self, endpoint_id, function_ids): """List the endpoint's whitelist Parameters ---------- endpoint_id : str The uuid of the endpoint function_ids : list A list of function id's to be whitelisted Returns ------- json The response of the request """ if not isinstance(function_ids, list): function_ids = [function_ids] res = [] for fid in function_ids: req_path = f'endpoints/{endpoint_id}/whitelist/{fid}' r = self.delete(req_path) if r.http_status != 200: raise HTTPError(r) res.append(r) # Return the result return res
class FuncXClient: """Main class for interacting with the funcX service Holds helper operations for performing common tasks with the funcX service. """ TOKEN_DIR = os.path.expanduser("~/.funcx/credentials") TOKEN_FILENAME = "funcx_sdk_tokens.json" FUNCX_SDK_CLIENT_ID = os.environ.get( "FUNCX_SDK_CLIENT_ID", "4cf29807-cf21-49ec-9443-ff9a3fb9f81c" ) FUNCX_SCOPE = os.environ.get( "FUNCX_SCOPE", "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all", ) def __init__( self, http_timeout=None, funcx_home=_FUNCX_HOME, force_login=False, fx_authorizer=None, search_authorizer=None, openid_authorizer=None, funcx_service_address=None, check_endpoint_version=False, asynchronous=False, loop=None, results_ws_uri=None, use_offprocess_checker=True, environment=None, **kwargs, ): """ Initialize the client Parameters ---------- http_timeout: int Timeout for any call to service in seconds. Default is no timeout force_login: bool Whether to force a login to get new credentials. fx_authorizer:class:`GlobusAuthorizer \ <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with funcX. Default: ``None``, will be created. search_authorizer:class:`GlobusAuthorizer \ <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with Globus Search. Default: ``None``, will be created. openid_authorizer:class:`GlobusAuthorizer \ <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with OpenID. Default: ``None``, will be created. funcx_service_address: str For internal use only. The address of the web service. results_ws_uri: str For internal use only. The address of the websocket service. environment: str For internal use only. The name of the environment to use. asynchronous: bool Should the API use asynchronous interactions with the web service? Currently only impacts the run method Default: False loop: AbstractEventLoop If asynchronous mode is requested, then you can provide an optional event loop instance. If None, then we will access asyncio.get_event_loop() Default: None use_offprocess_checker: Bool, Use this option to disable the offprocess_checker in the FuncXSerializer used by the client. Default: True Keyword arguments are the same as for BaseClient. """ # resolve URLs if not set if funcx_service_address is None: funcx_service_address = get_web_service_url(environment) if results_ws_uri is None: results_ws_uri = get_web_socket_url(environment) self.func_table = {} self.use_offprocess_checker = use_offprocess_checker self.funcx_home = os.path.expanduser(funcx_home) self.session_task_group_id = str(uuid.uuid4()) if not os.path.exists(self.TOKEN_DIR): os.makedirs(self.TOKEN_DIR) tokens_filename = os.path.join(self.TOKEN_DIR, self.TOKEN_FILENAME) self.native_client = NativeClient( client_id=self.FUNCX_SDK_CLIENT_ID, app_name="FuncX SDK", token_storage=JSONTokenStorage(tokens_filename), ) # TODO: if fx_authorizer is given, we still need to get an authorizer for Search search_scope = "urn:globus:auth:scope:search.api.globus.org:all" scopes = [self.FUNCX_SCOPE, search_scope, "openid"] if not fx_authorizer or not search_authorizer or not openid_authorizer: self.native_client.login( requested_scopes=scopes, no_local_server=kwargs.get("no_local_server", True), no_browser=kwargs.get("no_browser", True), refresh_tokens=kwargs.get("refresh_tokens", True), force=force_login, ) all_authorizers = self.native_client.get_authorizers_by_scope( requested_scopes=scopes ) fx_authorizer = all_authorizers[self.FUNCX_SCOPE] search_authorizer = all_authorizers[search_scope] openid_authorizer = all_authorizers["openid"] self.web_client = FuncxWebClient( base_url=funcx_service_address, authorizer=fx_authorizer ) self.fx_serializer = FuncXSerializer( use_offprocess_checker=self.use_offprocess_checker ) authclient = AuthClient(authorizer=openid_authorizer) user_info = authclient.oauth2_userinfo() self.searcher = SearchHelper( authorizer=search_authorizer, owner_uuid=user_info["sub"] ) self.funcx_service_address = funcx_service_address self.check_endpoint_version = check_endpoint_version self.version_check() self.results_ws_uri = results_ws_uri self.asynchronous = asynchronous if asynchronous: self.loop = loop if loop else asyncio.get_event_loop() # Start up an asynchronous polling loop in the background self.ws_polling_task = WebSocketPollingTask( self, self.loop, init_task_group_id=self.session_task_group_id, results_ws_uri=self.results_ws_uri, ) else: self.loop = None def version_check(self): """Check this client version meets the service's minimum supported version.""" resp = self.web_client.get_version() versions = resp.data if "min_ep_version" not in versions: raise VersionMismatch( "Failed to retrieve version information from funcX service." ) min_ep_version = versions["min_ep_version"] min_sdk_version = versions["min_sdk_version"] if self.check_endpoint_version: if ENDPOINT_VERSION is None: raise VersionMismatch( "You do not have the funcx endpoint installed. " "You can use 'pip install funcx-endpoint'." ) if LooseVersion(ENDPOINT_VERSION) < LooseVersion(min_ep_version): raise VersionMismatch( f"Your version={ENDPOINT_VERSION} is lower than the " f"minimum version for an endpoint: {min_ep_version}. " "Please update. " f"pip install funcx-endpoint>={min_ep_version}" ) else: if LooseVersion(SDK_VERSION) < LooseVersion(min_sdk_version): raise VersionMismatch( f"Your version={SDK_VERSION} is lower than the " f"minimum version for funcx SDK: {min_sdk_version}. " "Please update. " f"pip install funcx>={min_sdk_version}" ) def logout(self): """Remove credentials from your local system""" self.native_client.logout() def update_table(self, return_msg, task_id): """Parses the return message from the service and updates the internal func_table Parameters ---------- return_msg : str Return message received from the funcx service task_id : str task id string """ if isinstance(return_msg, str): r_dict = json.loads(return_msg) else: r_dict = return_msg r_status = r_dict.get("status", "unknown") status = {"pending": True, "status": r_status} if "result" in r_dict: try: r_obj = self.fx_serializer.deserialize(r_dict["result"]) completion_t = r_dict["completion_t"] except Exception: raise SerializationError("Result Object Deserialization") else: status.update( {"pending": False, "result": r_obj, "completion_t": completion_t} ) self.func_table[task_id] = status elif "exception" in r_dict: try: r_exception = self.fx_serializer.deserialize(r_dict["exception"]) completion_t = r_dict["completion_t"] logger.info(f"Exception : {r_exception}") except Exception: raise SerializationError("Task's exception object deserialization") else: status.update( { "pending": False, "exception": r_exception, "completion_t": completion_t, } ) self.func_table[task_id] = status return status def get_task(self, task_id): """Get a funcX task. Parameters ---------- task_id : str UUID of the task Returns ------- dict Task block containing "status" key. """ if task_id in self.func_table: return self.func_table[task_id] r = self.web_client.get_task(task_id) logger.debug(f"Response string : {r}") try: rets = self.update_table(r.text, task_id) except Exception as e: raise e return rets def get_result(self, task_id): """Get the result of a funcX task Parameters ---------- task_id: str UUID of the task Returns ------- Result obj: If task completed Raises ------ Exception obj: Exception due to which the task failed """ task = self.get_task(task_id) if task["pending"] is True: raise TaskPending(task["status"]) else: if "result" in task: return task["result"] else: logger.warning("We have an exception : {}".format(task["exception"])) task["exception"].reraise() def get_batch_result(self, task_id_list): """Request status for a batch of task_ids""" assert isinstance( task_id_list, list ), "get_batch_result expects a list of task ids" pending_task_ids = [t for t in task_id_list if t not in self.func_table] results = {} if pending_task_ids: r = self.web_client.get_batch_status(pending_task_ids) logger.debug(f"Response string : {r}") pending_task_ids = set(pending_task_ids) for task_id in task_id_list: if task_id in pending_task_ids: try: data = r["results"][task_id] rets = self.update_table(data, task_id) results[task_id] = rets except KeyError: logger.debug("Task {} info was not available in the batch status") except Exception: logger.exception( "Failure while unpacking results fom get_batch_result" ) else: results[task_id] = self.func_table[task_id] return results def run(self, *args, endpoint_id=None, function_id=None, **kwargs): """Initiate an invocation Parameters ---------- *args : Any Args as specified by the function signature endpoint_id : uuid str Endpoint UUID string. Required function_id : uuid str Function UUID string. Required asynchronous : bool Whether or not to run the function asynchronously Returns ------- task_id : str UUID string that identifies the task if asynchronous is False funcX Task: asyncio.Task A future that will eventually resolve into the function's result if asynchronous is True """ assert endpoint_id is not None, "endpoint_id key-word argument must be set" assert function_id is not None, "function_id key-word argument must be set" batch = self.create_batch() batch.add(*args, endpoint_id=endpoint_id, function_id=function_id, **kwargs) r = self.batch_run(batch) return r[0] def create_batch(self, task_group_id=None): """ Create a Batch instance to handle batch submission in funcX Parameters ---------- task_group_id : str Override the session wide session_task_group_id with a different task_group_id for this batch. If task_group_id is not specified, it will default to using the client's session_task_group_id Returns ------- Batch instance Status block containing "status" key. """ if not task_group_id: task_group_id = self.session_task_group_id batch = Batch(task_group_id=task_group_id) return batch def batch_run(self, batch): """Initiate a batch of tasks to funcX Parameters ---------- batch: a Batch object Returns ------- task_ids : a list of UUID strings that identify the tasks """ assert isinstance(batch, Batch), "Requires a Batch object as input" assert len(batch.tasks) > 0, "Requires a non-empty batch" data = batch.prepare() # Send the data to funcX r = self.web_client.submit(data) task_uuids = [] for result in r["results"]: task_id = result["task_uuid"] task_uuids.append(task_id) if result["http_status_code"] != 200: # this method of handling errors for a batch response is not # ideal, as it will raise any error in the multi-response, # but it will do until batch_run is deprecated in favor of Executer handle_response_errors(result) if self.asynchronous: task_group_id = r["task_group_id"] asyncio_tasks = [] for task_id in task_uuids: funcx_task = FuncXTask(task_id) asyncio_task = self.loop.create_task(funcx_task.get_result()) asyncio_tasks.append(asyncio_task) self.ws_polling_task.add_task(funcx_task) self.ws_polling_task.put_task_group_id(task_group_id) return asyncio_tasks return task_uuids def map_run( self, *args, endpoint_id=None, function_id=None, asynchronous=False, **kwargs ): """Initiate an invocation Parameters ---------- *args : Any Args as specified by the function signature endpoint_id : uuid str Endpoint UUID string. Required function_id : uuid str Function UUID string. Required asynchronous : bool Whether or not to run the function asynchronously Returns ------- task_id : str UUID string that identifies the task """ assert endpoint_id is not None, "endpoint_id key-word argument must be set" assert function_id is not None, "function_id key-word argument must be set" ser_kwargs = self.fx_serializer.serialize(kwargs) batch_payload = [] iterator = args[0] for arg in iterator: ser_args = self.fx_serializer.serialize((arg,)) payload = self.fx_serializer.pack_buffers([ser_args, ser_kwargs]) batch_payload.append(payload) data = { "endpoints": [endpoint_id], "func": function_id, "payload": batch_payload, "is_async": asynchronous, } # Send the data to funcX r = self.web_client.submit_batch(data) return r["task_uuids"] def register_endpoint( self, name, endpoint_uuid, metadata=None, endpoint_version=None ): """Register an endpoint with the funcX service. Parameters ---------- name : str Name of the endpoint endpoint_uuid : str The uuid of the endpoint metadata : dict endpoint metadata, see default_config example endpoint_version: str Version string to be passed to the webService as a compatibility check Returns ------- A dict {'endpoint_id' : <>, 'address' : <>, 'client_ports': <>} """ self.version_check() r = self.web_client.register_endpoint( endpoint_name=name, endpoint_id=endpoint_uuid, metadata=metadata, endpoint_version=endpoint_version, ) return r.data def get_containers(self, name, description=None): """Register a DLHub endpoint with the funcX service and get the containers to launch. Parameters ---------- name : str Name of the endpoint description : str Description of the endpoint Returns ------- int The port to connect to and a list of containers """ data = {"endpoint_name": name, "description": description} r = self.web_client.post("get_containers", data=data) return r.data["endpoint_uuid"], r.data["endpoint_containers"] def get_container(self, container_uuid, container_type): """Get the details of a container for staging it locally. Parameters ---------- container_uuid : str UUID of the container in question container_type : str The type of containers that will be used (Singularity, Shifter, Docker) Returns ------- dict The details of the containers to deploy """ self.version_check() r = self.web_client.get(f"containers/{container_uuid}/{container_type}") return r.data["container"] def get_endpoint_status(self, endpoint_uuid): """Get the status reports for an endpoint. Parameters ---------- endpoint_uuid : str UUID of the endpoint in question Returns ------- dict The details of the endpoint's stats """ r = self.web_client.get_endpoint_status(endpoint_uuid) return r.data def register_function( self, function, function_name=None, container_uuid=None, description=None, public=False, group=None, searchable=True, ): """Register a function code with the funcX service. Parameters ---------- function : Python Function The function to be registered for remote execution function_name : str The entry point (function name) of the function. Default: None container_uuid : str Container UUID from registration with funcX description : str Description of the file public : bool Whether or not the function is publicly accessible. Default = False group : str A globus group uuid to share this function with searchable : bool If true, the function will be indexed into globus search with the appropriate permissions Returns ------- function uuid : str UUID identifier for the registered function """ data = FunctionRegistrationData( function=function, failover_source="", container_uuid=container_uuid, entry_point=function_name, description=description, public=public, group=group, searchable=searchable, serializer=self.fx_serializer, ) logger.info(f"Registering function : {data}") r = self.web_client.register_function(data) return r.data["function_uuid"] def search_function(self, q, offset=0, limit=10, advanced=False): """Search for function via the funcX service Parameters ---------- q : str free-form query string offset : int offset into total results limit : int max number of results to return advanced : bool allows elastic-search like syntax in query string Returns ------- FunctionSearchResults """ return self.searcher.search_function( q, offset=offset, limit=limit, advanced=advanced ) def search_endpoint(self, q, scope="all", owner_id=None): """ Parameters ---------- q scope : str Can be one of {'all', 'my-endpoints', 'shared-with-me'} owner_id should be urn like f"urn:globus:auth:identity:{owner_uuid}" Returns ------- """ return self.searcher.search_endpoint(q, scope=scope, owner_id=owner_id) def register_container(self, location, container_type, name="", description=""): """Register a container with the funcX service. Parameters ---------- location : str The location of the container (e.g., its docker url). Required container_type : str The type of containers that will be used (Singularity, Shifter, Docker). Required name : str A name for the container. Default = '' description : str A description to associate with the container. Default = '' Returns ------- str The id of the container """ payload = { "name": name, "location": location, "description": description, "type": container_type, } r = self.web_client.post("containers", data=payload) return r.data["container_id"] def add_to_whitelist(self, endpoint_id, function_ids): """Adds the function to the endpoint's whitelist Parameters ---------- endpoint_id : str The uuid of the endpoint function_ids : list A list of function id's to be whitelisted Returns ------- json The response of the request """ return self.web_client.whitelist_add(endpoint_id, function_ids) def get_whitelist(self, endpoint_id): """List the endpoint's whitelist Parameters ---------- endpoint_id : str The uuid of the endpoint Returns ------- json The response of the request """ return self.web_client.get_whitelist(endpoint_id) def delete_from_whitelist(self, endpoint_id, function_ids): """List the endpoint's whitelist Parameters ---------- endpoint_id : str The uuid of the endpoint function_ids : list A list of function id's to be whitelisted Returns ------- json The response of the request """ if not isinstance(function_ids, list): function_ids = [function_ids] res = [] for fid in function_ids: res.append(self.web_client.whitelist_remove(endpoint_id, fid)) return res
class FuncXClient(throttling.ThrottledBaseClient): """Main class for interacting with the funcX service Holds helper operations for performing common tasks with the funcX service. """ TOKEN_DIR = os.path.expanduser("~/.funcx/credentials") TOKEN_FILENAME = 'funcx_sdk_tokens.json' CLIENT_ID = '4cf29807-cf21-49ec-9443-ff9a3fb9f81c' def __init__(self, http_timeout=None, funcx_home=os.path.join('~', '.funcx'), force_login=False, fx_authorizer=None, funcx_service_address='https://funcx.org/api/v1', **kwargs): """ Initialize the client Parameters ---------- http_timeout: int Timeout for any call to service in seconds. Default is no timeout force_login: bool Whether to force a login to get new credentials. fx_authorizer:class:`GlobusAuthorizer <globus_sdk.authorizers.base.GlobusAuthorizer>`: A custom authorizer instance to communicate with funcX. Default: ``None``, will be created. service_address: str The address of the funcX web service to communicate with. Default: https://dev.funcx.org/api/v1 Keyword arguments are the same as for BaseClient. """ self.ep_registration_path = 'register_endpoint_2' self.funcx_home = os.path.expanduser(funcx_home) if not os.path.exists(self.TOKEN_DIR): os.makedirs(self.TOKEN_DIR) tokens_filename = os.path.join(self.TOKEN_DIR, self.TOKEN_FILENAME) self.native_client = NativeClient( client_id=self.CLIENT_ID, app_name="FuncX SDK", token_storage=JSONTokenStorage(tokens_filename)) fx_scope = "https://auth.globus.org/scopes/facd7ccc-c5f4-42aa-916b-a0e270e2c2a9/all" if not fx_authorizer: self.native_client.login( requested_scopes=[fx_scope], no_local_server=kwargs.get("no_local_server", True), no_browser=kwargs.get("no_browser", True), refresh_tokens=kwargs.get("refresh_tokens", True), force=force_login) all_authorizers = self.native_client.get_authorizers_by_scope( requested_scopes=[fx_scope]) fx_authorizer = all_authorizers[fx_scope] super(FuncXClient, self).__init__("funcX", environment='funcx', authorizer=fx_authorizer, http_timeout=http_timeout, base_url=funcx_service_address, **kwargs) self.fx_serializer = FuncXSerializer() def logout(self): """Remove credentials from your local system """ self.native_client.logout() def get_task_status(self, task_id): """Get the status of a funcX task. Parameters ---------- task_id : str UUID of the task Returns ------- dict Status block containing "status" key. """ r = self.get("{task_id}/status".format(task_id=task_id)) return json.loads(r.text) def get_result(self, task_id): """ Get the result of a funcX task Parameters ---------- task_id: str UUID of the task Returns ------- Result obj: If task completed Raises ------ Exception obj: Exception due to which the task failed """ r = self.get("{task_id}/status".format(task_id=task_id)) logger.info(f"Got from globus : {r}") r_dict = json.loads(r.text) if 'result' in r_dict: try: r_obj = self.fx_serializer.deserialize(r_dict['result']) except Exception: raise Exception( "Failure during deserialization of the result object") else: return r_obj elif 'exception' in r_dict: try: r_exception = self.fx_serializer.deserialize( r_dict['exception']) logger.info(f"Exception : {r_exception}") except Exception: raise Exception( "Failure during deserialization of the Task's exception object" ) else: r_exception.reraise() else: raise Exception("Task pending") def run(self, *args, endpoint_id=None, function_id=None, asynchronous=False, **kwargs): """Initiate an invocation Parameters ---------- *args : Any Args as specified by the function signature endpoint_id : uuid str Endpoint UUID string. Required function_id : uuid str Function UUID string. Required asynchronous : bool Whether or not to run the function asynchronously Returns ------- task_id : str UUID string that identifies the task """ servable_path = 'submit' assert endpoint_id is not None, "endpoint_id key-word argument must be set" assert function_id is not None, "function_id key-word argument must be set" ser_args = self.fx_serializer.serialize(args) ser_kwargs = self.fx_serializer.serialize(kwargs) payload = self.fx_serializer.pack_buffers([ser_args, ser_kwargs]) data = { 'endpoint': endpoint_id, 'func': function_id, 'payload': payload, 'is_async': asynchronous } # Send the data to funcX r = self.post(servable_path, json_body=data) if r.http_status is not 200: raise Exception(r) if 'task_uuid' not in r: raise MalformedResponse(r) """ Create a future to deal with the result funcx_future = FuncXFuture(self, task_id, async_poll) if not asynchronous: return funcx_future.result() # Return the result return funcx_future """ return r['task_uuid'] def register_endpoint(self, name, endpoint_uuid, description=None): """Register an endpoint with the funcX service. Parameters ---------- name : str Name of the endpoint endpoint_uuid : str The uuid of the endpoint description : str Description of the endpoint Returns ------- A dict {'endopoint_id' : <>, 'address' : <>, 'client_ports': <>} """ data = { "endpoint_name": name, "endpoint_uuid": endpoint_uuid, "description": description } r = self.post(self.ep_registration_path, json_body=data) if r.http_status is not 200: raise Exception(r) # Return the result return r.data def get_containers(self, name, description=None): """Register a DLHub endpoint with the funcX service and get the containers to launch. Parameters ---------- name : str Name of the endpoint description : str Description of the endpoint Returns ------- int The port to connect to and a list of containers """ registration_path = 'get_containers' data = {"endpoint_name": name, "description": description} r = self.post(registration_path, json_body=data) if r.http_status is not 200: raise Exception(r) # Return the result return r.data['endpoint_uuid'], r.data['endpoint_containers'] def get_container(self, container_uuid, container_type): """Get the details of a container for staging it locally. Parameters ---------- container_uuid : str UUID of the container in question container_type : str The type of containers that will be used (Singularity, Shifter, Docker) Returns ------- dict The details of the containers to deploy """ container_path = f'containers/{container_uuid}/{container_type}' r = self.get(container_path) if r.http_status is not 200: raise Exception(r) # Return the result return r.data['container'] def register_function(self, function, function_name=None, container_uuid=None, description=None): """Register a function code with the funcX service. Parameters ---------- function : Python Function The function to be registered for remote execution function_name : str The entry point (function name) of the function. Default: None container_uuid : str Container UUID from registration with funcX description : str Description of the file Returns ------- function uuid : str UUID identifier for the registered function """ registration_path = 'register_function' serialized_fn = self.fx_serializer.serialize(function) packed_code = self.fx_serializer.pack_buffers([serialized_fn]) data = { "function_name": function.__name__, "function_code": packed_code, "container_uuid": container_uuid, "entry_point": function_name if function_name else function.__name__, "description": description } logger.info("Registering function : {}".format(data)) r = self.post(registration_path, json_body=data) if r.http_status is not 200: raise Exception(r) # Return the result return r.data['function_uuid'] def register_container(self, location, container_type, name='', description=''): """Register a container with the funcX service. Parameters ---------- location : str The location of the container (e.g., its docker url). Required container_type : str The type of containers that will be used (Singularity, Shifter, Docker). Required name : str A name for the container. Default = '' description : str A description to associate with the container. Default = '' Returns ------- str The id of the container """ container_path = f'containers' payload = { 'name': name, 'location': location, 'description': description, 'type': container_type } r = self.post(container_path, json_body=payload) if r.http_status is not 200: raise Exception(r) # Return the result return r.data['container_id']
""" from globus_sdk import AuthClient from fair_research_login import NativeClient # Register a Native App for a client_id at https://developers.globus.org client = NativeClient(client_id='7414f0b4-7d05-4bb6-bb00-076fa3f17cf5') # Automatically saves tokens in ~/.globus-native-apps.cfg tokens = client.login( # Request any scopes you want to use here. requested_scopes=['openid', 'profile'], # You can turn off the local server if it cannot be used for some reason no_local_server=False, # You can also turn off automatically opening the Auth URL no_browser=False, # refresh tokens are fully supported, but optional refresh_tokens=True, ) # Authorizers automatically choose a refresh token authorizer if possible, # and will automatically save new refreshed tokens when they expire. ac_authorizer = client.get_authorizers()['auth.globus.org'] # Example client usage: auth_cli = AuthClient(authorizer=ac_authorizer) user_info = auth_cli.oauth2_userinfo() print('Hello {}! How are you today?'.format(user_info['name'])) # Revoke tokens now that we're done client.logout()
class CfdeClient(): """The CfdeClient enables easily using the CFDE tools to ingest data.""" client_id = "417301b1-5101-456a-8a27-423e71a2ae26" app_name = "CfdeClient" archive_format = "tgz" def __init__(self, **kwargs): """Create a CfdeClient. Keyword Arguments: no_browser (bool): Do not automatically open the browser for the Globus Auth URL. Display the URL instead and let the user navigate to that location manually. **Default**: ``False``. refresh_tokens (bool): Use Globus Refresh Tokens to extend login time. **Default**: ``True``. force (bool): Force a login flow, even if loaded tokens are valid. **Default**: ``False``. service_instance (str): The instance of the Globus Automate Flow and/or the DERIVA ingest Action Provider to use. Unless directed otherwise, this should be left to the default. **Default**: ``prod``. """ self.__native_client = NativeClient(client_id=self.client_id, app_name=self.app_name) self.__native_client.login( requested_scopes=CONFIG["ALL_SCOPES"], no_browser=kwargs.get("no_browser", False), no_local_server=kwargs.get("no_browser", False), refresh_tokens=kwargs.get("refresh_tokens", True), force=kwargs.get("force", False)) tokens = self.__native_client.load_tokens_by_scope() flows_token_map = { scope: token["access_token"] for scope, token in tokens.items() } automate_authorizer = self.__native_client.get_authorizer( tokens[globus_automate_client.flows_client.MANAGE_FLOWS_SCOPE]) self.__https_authorizer = self.__native_client.get_authorizer( tokens[CONFIG["HTTPS_SCOPE"]]) self.flow_client = globus_automate_client.FlowsClient( flows_token_map, self.client_id, "flows_client", app_name=self.app_name, base_url="https://flows.automate.globus.org", authorizer=automate_authorizer) self.last_flow_run = {} # Fetch dynamic config info self.service_instance = kwargs.get("service_instance") or "prod" try: dconf_res = requests.get( CONFIG["DYNAMIC_CONFIG_LINKS"][self.service_instance]) if dconf_res.status_code >= 300: raise ValueError( "Unable to download required configuration: Error {}: {}". format(dconf_res.status_code, dconf_res.content)) dconf = dconf_res.json() self.catalogs = dconf["CATALOGS"] self.flow_info = dconf["FLOWS"][self.service_instance] except KeyError as e: raise ValueError( "Flow configuration for service_instance '{}' not found". format(self.service_instance)) from e except json.JSONDecodeError: if b"<!DOCTYPE html>" in dconf_res.content: raise ValueError("Unable to authenticate with Globus: " "HTML authentication flow detected") else: raise ValueError("Flow configuration not JSON: \n{}".format( dconf_res.content)) except Exception: # TODO: Are there other exceptions that need to be handled/translated? raise # Verify client version is compatible with service if parse_version(dconf["MIN_VERSION"]) > parse_version(VERSION): raise RuntimeError( "This CFDE Client is not up to date and can no longer make " "submissions. Please update the client and try again.") # Verify user has permission to view Flow try: self.flow_client.get_flow(self.flow_info["flow_id"]) except globus_sdk.GlobusAPIError as e: if e.http_status == 404: raise PermissionError( "Unable to view ingest Flow. Are you in the CFDE DERIVA " "Demo Globus Group? Check your membership or apply for access " "here: https://app.globus.org/groups/a437abe3-c9a4-11e9-b441-" "0efb3ba9a670/about") else: raise @property def version(self): return VERSION def logout(self): """Log out and revoke this client's tokens. This object will no longer be usable; to submit additional data or check the status of previous submissions, you must create a new CfdeClient. """ self.__native_client.logout() def start_deriva_flow(self, data_path, dcc_id, catalog_id=None, schema=None, server=None, dataset_acls=None, output_dir=None, delete_dir=False, handle_git_repos=True, dry_run=False, test_sub=False, verbose=False, **kwargs): """Start the Globus Automate Flow to ingest CFDE data into DERIVA. Arguments: data_path (str): The path to the data to ingest into DERIVA. The path can be: 1) A directory to be formatted into a BDBag 2) A Git repository to be copied into a BDBag 3) A premade BDBag directory 4) A premade BDBag in an archive file dcc_id (str): The CFDE-recognized DCC ID for this submission. catalog_id (int or str): The ID of the DERIVA catalog to ingest into. Default None, to create a new catalog. schema (str): The named schema or schema file link to validate data against. Default None, to only validate against the declared TableSchema. server (str): The DERIVA server to ingest to. Default None, to use the Action Provider-set default. dataset_acls (dict): The DERIVA ACL(s) to set on the final dataset. Default None, to use the CFDE default ACLs. output_dir (str): The path to create an output directory in. The resulting BDBag archive will be named after this directory. If not set, the directory will be turned into a BDBag in-place. For Git repositories, this is automatically set, but can be overridden. If data_path is a file, this has no effect. This dir MUST NOT be in the `data_path` directory or any subdirectories. Default None. delete_dir (bool): Should the output_dir be deleted after submission? Has no effect if output_dir is not specified. For Git repositories, this is always True. Default False. handle_git_repos (bool): Should Git repositories be detected and handled? When this is False, Git repositories are handled as simple directories instead of Git repositories. Default True. dry_run (bool): Should the data be validated and bagged without starting the Flow? When True, does not ingest into DERIVA or start the Globus Automate Flow, and the return value will not have valid DERIVA Flow information. Default False. test_sub (bool): Should the submission be run in "test mode" where the submission will be inegsted into DERIVA and immediately deleted? When True, the data wil not remain in DERIVA to be viewed and the Flow will terminate before any curation step. verbose (bool): Should intermediate status messages be printed out? Default False. Keyword Arguments: force_http (bool): Should the data be sent using HTTP instead of Globus Transfer, even if Globus Transfer is available? Because Globus Transfer is more robust than HTTP, it is highly recommended to leave this False. Default False. Other keyword arguments are passed directly to the ``make_bag()`` function of the BDBag API (see https://github.com/fair-research/bdbag for details). """ if verbose: print("Startup: Validating input") data_path = os.path.abspath(data_path) if not os.path.exists(data_path): raise FileNotFoundError( "Path '{}' does not exist".format(data_path)) if catalog_id in self.catalogs.keys(): if schema: raise ValueError( "You may not specify a schema ('{}') when ingesting to " "a named catalog ('{}'). Retry without specifying " "a schema.".format(schema, catalog_id)) schema = self.catalogs[catalog_id] # Pull out known kwargs force_http = kwargs.pop("force_http", False) if handle_git_repos: if verbose: print("Checking for a Git repository") # If Git repo, set output_dir appropriately try: repo = git.Repo(data_path, search_parent_directories=True) # Not Git repo except git.InvalidGitRepositoryError: if verbose: print("Not a Git repo") # Path not found, turn into standard FileNotFoundError except git.NoSuchPathError: raise FileNotFoundError( "Path '{}' does not exist".format(data_path)) # Is Git repo else: if verbose: print("Git repo found, collecting metadata") # Needs to not have slash at end - is known Git repo already, slash # interferes with os.path.basename/dirname if data_path.endswith("/"): data_path = data_path[:-1] # Set output_dir to new dir named with HEAD commit hash new_dir_name = "{}_{}".format(os.path.basename(data_path), str(repo.head.commit)) output_dir = os.path.join(os.path.dirname(data_path), new_dir_name) # Delete temp dir after archival delete_dir = True # If dir and not already BDBag, make BDBag if os.path.isdir(data_path) and not bdbag_api.is_bag(data_path): if verbose: print("Creating BDBag out of directory '{}'".format(data_path)) # If output_dir specified, copy data to output dir first if output_dir: if verbose: print("Copying data to '{}' before creating BDBag".format( output_dir)) output_dir = os.path.abspath(output_dir) # If shutil.copytree is called when the destination dir is inside the source dir # by more than one layer, it will recurse infinitely. # (e.g. /source => /source/dir/dest) # Exactly one layer is technically okay (e.g. /source => /source/dest), # but it's easier to forbid all parent/child dir cases. # Check for this error condition by determining if output_dir is a child # of data_path. if os.path.commonpath([data_path]) == os.path.commonpath( [data_path, output_dir]): raise ValueError( "The output_dir ('{}') must not be in data_path ('{}')" .format(output_dir, data_path)) try: shutil.copytree(data_path, output_dir) except FileExistsError: raise FileExistsError( ("The output directory must not exist. " "Delete '{}' to submit.\nYou can set delete_dir=True " "to avoid this issue in the future." ).format(output_dir)) # Process new dir instead of old path data_path = output_dir # If output_dir not specified, never delete data dir else: delete_dir = False # Make bag bdbag_api.make_bag(data_path, **kwargs) if not bdbag_api.is_bag(data_path): raise ValueError( "Failed to create BDBag from {}".format(data_path)) elif verbose: print("BDBag created at '{}'".format(data_path)) # If dir (must be BDBag at this point), archive if os.path.isdir(data_path): if verbose: print("Archiving BDBag at '{}' using '{}'".format( data_path, CONFIG["ARCHIVE_FORMAT"])) new_data_path = bdbag_api.archive_bag(data_path, CONFIG["ARCHIVE_FORMAT"]) if verbose: print("BDBag archived to file '{}'".format(new_data_path)) # If requested (e.g. Git repo copied dir), delete data dir if delete_dir: if verbose: print("Removing old directory '{}'".format(data_path)) shutil.rmtree(data_path) # Overwrite data_path - don't care about dir for uploading data_path = new_data_path # Validate TableSchema in BDBag if verbose: print("Validating TableSchema in BDBag '{}'".format(data_path)) validation_res = ts_validate(data_path, schema=schema) if not validation_res["is_valid"]: return { "success": False, "error": ("TableSchema invalid due to the following errors: \n{}\n". format(validation_res["error"])) } elif verbose: print("Validation successful") # Now BDBag is archived file # Set path on destination dest_path = "{}{}".format(self.flow_info["cfde_ep_path"], os.path.basename(data_path)) # If doing dry run, stop here before making Flow input if dry_run: return { "success": True, "message": "Dry run validated successfully. No data was transferred." } # Set up Flow if verbose: print("Creating input for Flow") # If local EP exists (and not force_http), can use Transfer # Local EP fetched now in case GCP started after Client creation local_endpoint = globus_sdk.LocalGlobusConnectPersonal().endpoint_id if local_endpoint and not force_http: if verbose: print( "Using local Globus Connect Personal Endpoint '{}'".format( local_endpoint)) # Populate Transfer fields in Flow flow_id = self.flow_info["flow_id"] flow_input = { "source_endpoint_id": local_endpoint, "source_path": data_path, "cfde_ep_id": self.flow_info["cfde_ep_id"], "cfde_ep_path": dest_path, "cfde_ep_url": self.flow_info["cfde_ep_url"], "is_directory": False, "test_sub": test_sub, "dcc_id": dcc_id } if catalog_id: flow_input["catalog_id"] = str(catalog_id) if server: flow_input["server"] = server # Otherwise, we must PUT the BDBag on the server else: if verbose: print("No Globus Endpoint detected; using HTTP upload instead") headers = {} self.__https_authorizer.set_authorization_header(headers) data_url = "{}{}".format(self.flow_info["cfde_ep_url"], dest_path) with open(data_path, 'rb') as bag_file: bag_data = bag_file.read() put_res = requests.put(data_url, data=bag_data, headers=headers) # Regenerate headers on 401 if put_res.status_code == 401: self.__https_authorizer.handle_missing_authorization() self.__https_authorizer.set_authorization_header(headers) put_res = requests.put(data_url, data=bag_data, headers=headers) # Error message on failed PUT or any unexpected response if put_res.status_code >= 300: return { "success": False, "error": ("Could not upload BDBag to server (error {}):\n{}".format( put_res.status_code, put_res.content)) } elif put_res.status_code != 200: print( "Warning: HTTP upload returned status code {}, which was unexpected." .format(put_res.status_code)) if verbose: print("Upload successful to '{}': {} {}".format( data_url, put_res.status_code, put_res.content)) flow_id = self.flow_info["flow_id"] flow_input = { "source_endpoint_id": False, "data_url": data_url, "test_sub": test_sub, "dcc_id": dcc_id } if catalog_id: flow_input["catalog_id"] = str(catalog_id) if server: flow_input["server"] = server if verbose: print("Flow input populated:\n{}".format( json.dumps(flow_input, indent=4, sort_keys=True))) # Get Flow scope flow_def = self.flow_client.get_flow(flow_id) flow_scope = flow_def["globus_auth_scope"] # Start Flow if verbose: print("Starting Flow - Submitting data") try: flow_res = self.flow_client.run_flow(flow_id, flow_scope, flow_input) except globus_sdk.GlobusAPIError as e: if e.http_status == 404: return { "success": False, "error": ("Could not access ingest Flow. Are you in the CFDE DERIVA " "Demo Globus Group? Check your membership or apply for access " "here: https://app.globus.org/groups/a437abe3-c9a4-11e9-b441-" "0efb3ba9a670/about") } else: raise self.last_flow_run = { "flow_id": flow_id, "flow_instance_id": flow_res["action_id"] } if verbose: print("Flow started successfully.") return { "success": True, "message": ("Started DERIVA ingest Flow\nFlow ID: {}\nFlow Instance ID: {}". format(flow_id, flow_res["action_id"])), "flow_id": flow_id, "flow_instance_id": flow_res["action_id"], "cfde_dest_path": dest_path, "http_link": "{}{}".format(self.flow_info["cfde_ep_url"], dest_path), "globus_web_link": ("https://app.globus.org/file-manager?origin_id={}&origin_path={}". format(self.flow_info["cfde_ep_id"], os.path.dirname(dest_path))) } def check_status(self, flow_id=None, flow_instance_id=None, raw=False): """Check the status of a Flow. By default, check the status of the last Flow run with this instantiation of the client. Arguments: flow_id (str): The ID of the Flow run. Default: The last run Flow ID. flow_instance_id (str): The ID of the Flow to check. Default: The last Flow instance run with this client. raw (bool): Should the status results be returned? Default: False, to print the results instead. """ if not flow_id: flow_id = self.last_flow_run.get("flow_id") if not flow_instance_id: flow_instance_id = self.last_flow_run.get("flow_instance_id") if not flow_id or not flow_instance_id: raise ValueError("Flow not started and IDs not specified.") # Get Flow scope and status flow_def = self.flow_client.get_flow(flow_id) flow_status = self.flow_client.flow_action_status( flow_id, flow_def["globus_auth_scope"], flow_instance_id).data clean_status = ( "\nStatus of {} (Flow ID {})\nThis instance ID: {}\n\n".format( flow_def["title"], flow_id, flow_instance_id)) # Flow overall status # NOTE: Automate Flows do NOT fail automatically if an Action fails. # Any "FAILED" Flow has an error in the Flow itself. # Therefore, "SUCCEEDED" Flows are not guaranteed to have actually succeeded. if flow_status["status"] == "ACTIVE": clean_status += "This Flow is still in progress.\n" elif flow_status["status"] == "INACTIVE": clean_status += "This Flow has stalled, and may need help to resume.\n" elif flow_status["status"] == "SUCCEEDED": clean_status += "This Flow has completed.\n" elif flow_status["status"] == "FAILED": clean_status += "This Flow has failed.\n" # "Details" if flow_status["details"].get("details"): if flow_status["details"]["details"].get("state_name"): clean_status += ("Current Flow Step: {}".format( flow_status["details"]["details"]["state_name"])) # "cause" indicates a failure mode if flow_status["details"]["details"].get("cause"): cause = flow_status["details"]["details"]["cause"] # Try to pretty-print massive blob of state try: str_cause, dict_cause = cause.split(" '{") dict_cause = "{" + dict_cause.strip("'") dict_cause = json.loads(dict_cause)["UserState"] dict_cause.pop("prevars", None) dict_cause.pop("vars", None) dict_cause = json.dumps(dict_cause, indent=4, sort_keys=True) cause = str_cause + "\n" + dict_cause except Exception: pass clean_status += "Error: {}\n".format(cause) # Too onerous to pull out results of each step (when even available), # also would defeat dynamic config and tie client to Flow. # Instead, print out whatever is provided in `details` if Flow FAILED, # or print out the appropriate field(s) for the "SUCCEEDED" Flow. if flow_status["status"] == "SUCCEEDED": flow_output = flow_status["details"]["output"] # Each Step is only present in exactly one "SUCCEEDED" Flow result, # and they are mutually exclusive success_step = self.flow_info["success_step"] failure_step = self.flow_info["failure_step"] error_step = self.flow_info["error_step"] if success_step in flow_output.keys(): clean_status += flow_output[success_step]["details"]["message"] elif failure_step in flow_output.keys(): clean_status += flow_output[failure_step]["details"]["error"] elif error_step in flow_output.keys(): clean_status += flow_output[error_step]["details"]["error"] else: clean_status += ( "Submission errored: The Flow has finished, but no final " "details are available.") elif flow_status["status"] == "FAILED": # Every Flow step can supply failure messages differently, so unfortunately # printing out the entire details block is the only way to actually get # the error message out. # "cause" is printed earlier when available, so avoid double-printing it if flow_status["details"].get("details", {}).get("cause"): clean_status += "Submission Flow failed." else: details = flow_status.get("details", "No details available") # Try to pretty-print JSON blob try: details = json.dumps(details, indent=4, sort_keys=True) except Exception: pass clean_status += "Submission Flow failed: {}".format(details) # Extra newline for cleanliness clean_status += "\n" # Return or print status if raw: return { "success": True, "status": flow_status, "clean_status": clean_status } else: print(clean_status)
globus_sdk.auth.token_response.OAuthTokenResponse.by_resource_server No need to check expiration, that's handled by NativeClient. """ with open(self.FILENAME) as fh: return json.load(fh) def clear_tokens(self): """ Delete tokens from where they are stored. Before this method is called, tokens will have been revoked. This is both for cleanup and to ensure inactive tokens are not accidentally loaded in the future. """ os.remove(self.FILENAME) # Provide an instance of your config object to Native Client. The only # restrictions are your client MUST have the three methods above, # or it will throw an AttributeError. app = NativeClient(client_id='7414f0b4-7d05-4bb6-bb00-076fa3f17cf5', token_storage=MyTokenStorage()) # Calls read_tokens() then write_tokens() app.login() # Calls read_tokens() app.load_tokens() # Calls clear_tokens() app.logout()