def quick_transfer(transfer_client, source_ep, dest_ep, path_list, timeout=None): """Perform a Globus Transfer and monitor for success. Arguments: transfer_client (TransferClient): An authenticated Transfer client. source_ep (str): The source Globus Endpoint ID. dest_ep (str): The destination Globus Endpoint ID. path_list (list of tuple of 2 str): A list of tuples containing the paths to transfer as (source, destination). Directory paths must end in a slash, and file paths must not. Example: [("/source/files/file.dat", "/dest/mydocs/doc.dat"), ("/source/all_reports/", "/dest/reports/")] timeout (int): Time, in scores of seconds, to wait for a transfer to complete before erroring. Default None, which will wait until a transfer succeeds or fails. If this argument is -1, the transfer will submit but not wait at all. There is then no error checking. Returns: str: ID of the Globus Transfer. """ INTERVAL_SEC = 10 tdata = globus_sdk.TransferData(transfer_client, source_ep, dest_ep, verify_checksum=True) for item in path_list: # Is not directory if item[0][-1] != "/" and item[1][-1] != "/": tdata.add_item(item[0], item[1]) # Is directory elif item[0][-1] == "/" and item[1][-1] == "/": tdata.add_item(item[0], item[1], recursive=True) # Malformed else: raise globus_sdk.GlobusError("Cannot transfer file to directory or vice-versa: " + str(item)) res = transfer_client.submit_transfer(tdata) if res["code"] != "Accepted": raise globus_sdk.GlobusError("Failed to transfer files: Transfer " + res["code"]) iterations = 0 while timeout is not None and timeout >= 0 and not transfer_client.task_wait( res["task_id"], timeout=INTERVAL_SEC, polling_interval=INTERVAL_SEC): for event in transfer_client.task_event_list(res["task_id"]): if event["is_error"]: transfer_client.cancel_task(res["task_id"]) raise globus_sdk.GlobusError("Error transferring data: " + event["description"]) if timeout and iterations >= timeout: transfer_client.cancel_task(res["task_id"]) raise globus_sdk.GlobusError("Transfer timed out after " + str(iterations * INTERVAL_SEC) + " seconds.") iterations += 1 return res["task_id"]
def get_local_ep(transfer_client): """Discover the local Globus Connect Personal endpoint's ID, if possible. Arguments: transfer_client (TransferClient): An authenticated Transfer client. Returns: str: The local GCP EP ID if it was discovered. If the ID is not discovered, an exception will be raised. (globus_sdk.GlobusError unless the user cancels the search) """ pgr_res = transfer_client.endpoint_search(filter_scope="my-endpoints") ep_candidates = pgr_res.data # Check number of candidates if len(ep_candidates) < 1: # Nothing found raise globus_sdk.GlobusError("Error: No local endpoints found") elif len(ep_candidates) == 1: # Exactly one candidate if not ep_candidates[0]["gcp_connected"]: # Is GCP, is not on raise globus_sdk.GlobusError("Error: Globus Connect is not running") else: # Is GCServer or GCP and connected return ep_candidates[0]["id"] else: # >1 found # Filter out disconnected GCP ep_connections = [candidate for candidate in ep_candidates if candidate["gcp_connected"] is not False] # Recheck list if len(ep_connections) < 1: # Nothing found raise globus_sdk.GlobusError("Error: No local endpoints running") elif len(ep_connections) == 1: # Exactly one candidate if not ep_connections[0]["gcp_connected"]: # Is GCP, is not on raise globus_sdk.GlobusError("Error: Globus Connect is not active") else: # Is GCServer or GCP and connected return ep_connections[0]["id"] else: # Still >1 found # Prompt user print_("Multiple endpoints found:") count = 0 for ep in ep_connections: count += 1 print_(count, ": ", ep["display_name"], "\t", ep["id"]) print_("\nPlease choose the endpoint on this machine") ep_num = 0 while ep_num == 0: usr_choice = input("Enter the number of the correct endpoint (-1 to cancel): ") try: ep_choice = int(usr_choice) if ep_choice == -1: # User wants to quit ep_num = -1 elif ep_choice in range(1, count+1): # Valid selection ep_num = ep_choice else: # Invalid number print_("Invalid selection") except Exception: print_("Invalid input") if ep_num == -1: print_("Cancelling") raise SystemExit return ep_connections[ep_num-1]["id"]
def custom_transfer(transfer_client, source_ep, dest_ep, path_list, interval=DEFAULT_INTERVAL, inactivity_time=DEFAULT_INACTIVITY_TIME, notify=True): """Perform a Globus Transfer. Arguments: transfer_client (TransferClient): An authenticated Transfer client. source_ep (str): The source Globus Endpoint ID. dest_ep (str): The destination Globus Endpoint ID. path_list (list of tuple of 2 str): A list of tuples containing the paths to transfer as ``(source, destination)``. **Example**:: [("/source/files/file.dat", "/dest/mydocs/doc.dat"), ("/source/all_reports/", "/dest/reports/")] interval (int): Number of seconds to wait before polling Transfer status. Minimum ``1``. **Default**: ``DEFAULT_INTERVAL``. inactivity_time (int): Number of seconds a Transfer is allowed to go without progress before being cancelled. **Default**: ``DEFAULT_INACTIVITY_TIME``. notify (bool): When ``True``, trigger a notification email from Globus to the user when the Transfer succeeds or fails. When ``False``, disable the notification. **Default**: ``True``. Yields: dict: An error from the transfer, or (last) a success status. Accepts via ``.send()``: *bool*: ``True``: Continue the Transfer ``False``: Cancel the Transfer **Default**: ``True`` """ # Ensure paths are POSIX for i, path in enumerate(path_list): path_list[i] = (posixify_path(path[0]), posixify_path(path[1])) # TODO: (LW) Handle transfers with huge number of files # If a TransferData object is too large, Globus might timeout # before it can be completely uploaded. # So, we need to be able to check the size of the TD object and, if need be, send it early. if interval < 1: interval = 1 deadline = datetime.utcfromtimestamp(int(time.time()) + inactivity_time) tdata = globus_sdk.TransferData(transfer_client, source_ep, dest_ep, deadline=deadline, verify_checksum=True, notify_on_succeeded=notify, notify_on_failed=notify, notify_on_inactive=notify) for item in path_list: # Check if source path is directory or missing source_res = globus_check_directory(transfer_client, source_ep, item[0], allow_missing=False) if not source_res["success"]: raise globus_sdk.GlobusError(source_res["error"]) source_is_dir = source_res["is_dir"] # Check if dest path is directory dest_res = globus_check_directory(transfer_client, dest_ep, item[1], allow_missing=True) if not dest_res["success"]: raise globus_sdk.GlobusError(dest_res["error"]) dest_exists = dest_res["exists"] dest_is_dir = dest_res["is_dir"] # Transfer dir if source_is_dir and (not dest_exists or dest_is_dir): tdata.add_item(item[0], item[1], recursive=True) # Transfer non-dir elif not source_is_dir and (not dest_exists or not dest_is_dir): tdata.add_item(item[0], item[1]) # Transfer non-dir into dir # TODO: Is this logic user-friendly or is it surprising? # Take non-dir source filename, Transfer to dest dir+filename elif not source_is_dir and (dest_exists and dest_is_dir): new_dest = os.path.join(item[1], os.path.basename(item[0])) tdata.add_item(item[0], new_dest) # Malformed - Cannot transfer dir into non-dir else: raise globus_sdk.GlobusError( "Cannot transfer a directory into a file: " + str(item)) res = transfer_client.submit_transfer(tdata) if res["code"] != "Accepted": raise globus_sdk.GlobusError("Failed to transfer files: Transfer " + res["code"]) error_timestamps = set() # while Transfer is active while not transfer_client.task_wait( res["task_id"], timeout=interval, polling_interval=interval): for event in transfer_client.task_event_list(res["task_id"]): # Only process error events that have not been presented to the user # Events do not have UUIDs, so if there are multiple simultaneous errors # only the last (chronologically) error will be processed if event["is_error"] and event["time"] not in error_timestamps: error_timestamps.add(event["time"]) ret_event = deepcopy(event.data) # yield value should always have success: bool ret_event["success"] = False ret_event["finished"] = False # User can cancel Transfer with .send(False) cont = yield ret_event if cont is False: transfer_client.cancel_task(res["task_id"]) # Wait until Transfer is no longer active after cancellation while not transfer_client.task_wait( res["task_id"], timeout=1, polling_interval=1): pass break # If progress has been made, move deadline forward elif event["code"] == "PROGRESS": new_deadline = datetime.utcfromtimestamp( int(time.time()) + inactivity_time) new_doc = {"DATA_TYPE": "task", "deadline": str(new_deadline)} transfer_client.update_task(res["task_id"], new_doc) # Transfer is no longer active; now check if succeeded task = transfer_client.get_task(res["task_id"]).data task["success"] = (task["status"] == "SUCCEEDED") task["finished"] = True yield task
def globus_check_directory(transfer_client, endpoint, path, allow_missing=False): """Check if a path on a Globus Endpoint is a directory or file. Arguments: transfer_client (TransferClient): An authenticated Transfer client. endpoint (str): The Endpoint ID. path (str): The path on the Endpoint to check. allow_missing (bool): When ``True``, the path not being found is not an error. When ``False``, the path must exist for the check to succeed. **Default**: ``False``. Returns: dict: Results of the check. success (bool): ``True`` if the check was able to be performed. error (str): The error encountered, if any. exists (bool): ``True`` iff the path exists on the endpoint. If ``allow_missing`` is ``False``, ``exists`` being ``False`` is an error. is_dir (bool): ``True`` iff the path is confirmed to lead to a directory. is_file (bool): ``True`` iff the path is confirmed to lead to a file. Note: ``is_dir`` and ``is_file`` will both be ``False`` if ``allow_missing`` is ``True`` and ``exists`` is ``False``. """ # is_dir has three states: # True (dir confirmed), implies exists is True # False (file confirmed), implies exists is True # None (no information) is_dir = None # exists can be: # True (exists, type unknown), also implied if is_dir is not None # False (confirmed missing) # None (no information) exists = None # error can either be None (no error) or a string (error) # The presence of an error implies success is False error = None # Try operation_ls on the path, which gives actionable info about the path try: transfer_client.operation_ls(endpoint, path=path) is_dir = True except globus_sdk.TransferAPIError as e: # If error indicates path exists but is not dir, is not dir if e.code == "ExternalError.DirListingFailed.NotDirectory": is_dir = False # Too many files in dir indicates is dir elif e.code == "ExternalError.DirListingFailed.SizeLimit": is_dir = True # Not found must be logged elif e.code == "ClientError.NotFound": exists = False # Else, retry on parent dir (some other error occurred) else: try: parent, item_name = os.path.split(path) parent_ls = transfer_client.operation_ls(parent, path=parent) type_list = [ x["type"] for x in parent_ls["DATA"] if x["name"] == item_name ] # If item_name not found in list, other error occurred on missing path # Odd, but still a missing path if len(type_list) < 1: exists = False # If multiple hits, panic (shouldn't occur, but...) # Technically possible in GDrive connector? elif len(type_list) > 1: raise globus_sdk.GlobusError( "Multiple items with name '{}' in path '{}'" "on endpoint '{}'".format(item_name, parent, endpoint)) else: # Otherwise we have exactly one hit - the correct node item_type = type_list[0] if item_type == "dir": is_dir = True elif item_type == "file": is_dir = False # If not file or dir, but does exist, log an error else: exists = True error = ( "Path '{}' leads to a '{}', not a file or directory" .format(path, item_type)) except globus_sdk.TransferAPIError as e: # Size limit means we can't figure out this path if e.code == "ExternalError.DirListingFailed.SizeLimit": error = ( "Unable to check type of path '{}': Parent directory too large" .format(path)) # Not found must be logged (not sure this branch is possible) elif e.code == "ClientError.NotFound": exists = False # Can't handle other error on parent else: error = str(e) # If path must exist but doesn't, set error if exists is False and allow_missing is False: error = "Path '{}' not found on endpoint '{}'".format(path, endpoint) return { "success": (error is None), "error": error, "exists": (exists or is_dir is not None), "is_dir": (is_dir is True), "is_file": (is_dir is False) }