Beispiel #1
0
def quick_transfer(transfer_client, source_ep, dest_ep, path_list, timeout=None):
    """Perform a Globus Transfer and monitor for success.

    Arguments:
    transfer_client (TransferClient): An authenticated Transfer client.
    source_ep (str): The source Globus Endpoint ID.
    dest_ep (str): The destination Globus Endpoint ID.
    path_list (list of tuple of 2 str): A list of tuples containing the paths to transfer as
                                        (source, destination).
        Directory paths must end in a slash, and file paths must not.
        Example: [("/source/files/file.dat", "/dest/mydocs/doc.dat"),
                  ("/source/all_reports/", "/dest/reports/")]
    timeout (int): Time, in scores of seconds, to wait for a transfer to complete before erroring.
                   Default None, which will wait until a transfer succeeds or fails.
                   If this argument is -1, the transfer will submit but not wait at all.
                       There is then no error checking.

    Returns:
    str: ID of the Globus Transfer.
    """
    INTERVAL_SEC = 10
    tdata = globus_sdk.TransferData(transfer_client, source_ep, dest_ep, verify_checksum=True)
    for item in path_list:
        # Is not directory
        if item[0][-1] != "/" and item[1][-1] != "/":
            tdata.add_item(item[0], item[1])
        # Is directory
        elif item[0][-1] == "/" and item[1][-1] == "/":
            tdata.add_item(item[0], item[1], recursive=True)
        # Malformed
        else:
            raise globus_sdk.GlobusError("Cannot transfer file to directory or vice-versa: "
                                         + str(item))

    res = transfer_client.submit_transfer(tdata)
    if res["code"] != "Accepted":
        raise globus_sdk.GlobusError("Failed to transfer files: Transfer " + res["code"])

    iterations = 0
    while timeout is not None and timeout >= 0 and not transfer_client.task_wait(
                                                            res["task_id"],
                                                            timeout=INTERVAL_SEC,
                                                            polling_interval=INTERVAL_SEC):
        for event in transfer_client.task_event_list(res["task_id"]):
            if event["is_error"]:
                transfer_client.cancel_task(res["task_id"])
                raise globus_sdk.GlobusError("Error transferring data: " + event["description"])
            if timeout and iterations >= timeout:
                transfer_client.cancel_task(res["task_id"])
                raise globus_sdk.GlobusError("Transfer timed out after "
                                             + str(iterations * INTERVAL_SEC)
                                             + " seconds.")
            iterations += 1

    return res["task_id"]
Beispiel #2
0
def get_local_ep(transfer_client):
    """Discover the local Globus Connect Personal endpoint's ID, if possible.

    Arguments:
    transfer_client (TransferClient): An authenticated Transfer client.

    Returns:
    str: The local GCP EP ID if it was discovered.
    If the ID is not discovered, an exception will be raised.
        (globus_sdk.GlobusError unless the user cancels the search)
    """
    pgr_res = transfer_client.endpoint_search(filter_scope="my-endpoints")
    ep_candidates = pgr_res.data
    # Check number of candidates
    if len(ep_candidates) < 1:
        # Nothing found
        raise globus_sdk.GlobusError("Error: No local endpoints found")
    elif len(ep_candidates) == 1:
        # Exactly one candidate
        if not ep_candidates[0]["gcp_connected"]:
            # Is GCP, is not on
            raise globus_sdk.GlobusError("Error: Globus Connect is not running")
        else:
            # Is GCServer or GCP and connected
            return ep_candidates[0]["id"]
    else:
        # >1 found
        # Filter out disconnected GCP
        ep_connections = [candidate for candidate in ep_candidates
                          if candidate["gcp_connected"] is not False]
        # Recheck list
        if len(ep_connections) < 1:
            # Nothing found
            raise globus_sdk.GlobusError("Error: No local endpoints running")
        elif len(ep_connections) == 1:
            # Exactly one candidate
            if not ep_connections[0]["gcp_connected"]:
                # Is GCP, is not on
                raise globus_sdk.GlobusError("Error: Globus Connect is not active")
            else:
                # Is GCServer or GCP and connected
                return ep_connections[0]["id"]
        else:
            # Still >1 found
            # Prompt user
            print_("Multiple endpoints found:")
            count = 0
            for ep in ep_connections:
                count += 1
                print_(count, ": ", ep["display_name"], "\t", ep["id"])
            print_("\nPlease choose the endpoint on this machine")
            ep_num = 0
            while ep_num == 0:
                usr_choice = input("Enter the number of the correct endpoint (-1 to cancel): ")
                try:
                    ep_choice = int(usr_choice)
                    if ep_choice == -1:
                        # User wants to quit
                        ep_num = -1
                    elif ep_choice in range(1, count+1):
                        # Valid selection
                        ep_num = ep_choice
                    else:
                        # Invalid number
                        print_("Invalid selection")
                except Exception:
                    print_("Invalid input")

            if ep_num == -1:
                print_("Cancelling")
                raise SystemExit
            return ep_connections[ep_num-1]["id"]
Beispiel #3
0
def custom_transfer(transfer_client,
                    source_ep,
                    dest_ep,
                    path_list,
                    interval=DEFAULT_INTERVAL,
                    inactivity_time=DEFAULT_INACTIVITY_TIME,
                    notify=True):
    """Perform a Globus Transfer.

    Arguments:
        transfer_client (TransferClient): An authenticated Transfer client.
        source_ep (str): The source Globus Endpoint ID.
        dest_ep (str): The destination Globus Endpoint ID.
        path_list (list of tuple of 2 str): A list of tuples containing the paths to transfer as
                ``(source, destination)``.

                **Example**::

                    [("/source/files/file.dat", "/dest/mydocs/doc.dat"),
                     ("/source/all_reports/", "/dest/reports/")]

        interval (int): Number of seconds to wait before polling Transfer status.
                Minimum ``1``. **Default**: ``DEFAULT_INTERVAL``.
        inactivity_time (int): Number of seconds a Transfer is allowed to go without progress
                before being cancelled. **Default**: ``DEFAULT_INACTIVITY_TIME``.
        notify (bool): When ``True``, trigger a notification email from Globus to the user when
                the Transfer succeeds or fails. When ``False``, disable the notification.
                **Default**: ``True``.

    Yields:
        dict: An error from the transfer, or (last) a success status.

    Accepts via ``.send()``:
        *bool*: ``True``: Continue the Transfer
                ``False``: Cancel the Transfer
                **Default**: ``True``
    """
    # Ensure paths are POSIX
    for i, path in enumerate(path_list):
        path_list[i] = (posixify_path(path[0]), posixify_path(path[1]))

    # TODO: (LW) Handle transfers with huge number of files
    # If a TransferData object is too large, Globus might timeout
    #   before it can be completely uploaded.
    # So, we need to be able to check the size of the TD object and, if need be, send it early.
    if interval < 1:
        interval = 1
    deadline = datetime.utcfromtimestamp(int(time.time()) + inactivity_time)
    tdata = globus_sdk.TransferData(transfer_client,
                                    source_ep,
                                    dest_ep,
                                    deadline=deadline,
                                    verify_checksum=True,
                                    notify_on_succeeded=notify,
                                    notify_on_failed=notify,
                                    notify_on_inactive=notify)
    for item in path_list:
        # Check if source path is directory or missing
        source_res = globus_check_directory(transfer_client,
                                            source_ep,
                                            item[0],
                                            allow_missing=False)
        if not source_res["success"]:
            raise globus_sdk.GlobusError(source_res["error"])
        source_is_dir = source_res["is_dir"]

        # Check if dest path is directory
        dest_res = globus_check_directory(transfer_client,
                                          dest_ep,
                                          item[1],
                                          allow_missing=True)
        if not dest_res["success"]:
            raise globus_sdk.GlobusError(dest_res["error"])
        dest_exists = dest_res["exists"]
        dest_is_dir = dest_res["is_dir"]

        # Transfer dir
        if source_is_dir and (not dest_exists or dest_is_dir):
            tdata.add_item(item[0], item[1], recursive=True)
        # Transfer non-dir
        elif not source_is_dir and (not dest_exists or not dest_is_dir):
            tdata.add_item(item[0], item[1])
        # Transfer non-dir into dir
        # TODO: Is this logic user-friendly or is it surprising?
        # Take non-dir source filename, Transfer to dest dir+filename
        elif not source_is_dir and (dest_exists and dest_is_dir):
            new_dest = os.path.join(item[1], os.path.basename(item[0]))
            tdata.add_item(item[0], new_dest)
        # Malformed - Cannot transfer dir into non-dir
        else:
            raise globus_sdk.GlobusError(
                "Cannot transfer a directory into a file: " + str(item))

    res = transfer_client.submit_transfer(tdata)
    if res["code"] != "Accepted":
        raise globus_sdk.GlobusError("Failed to transfer files: Transfer " +
                                     res["code"])

    error_timestamps = set()
    # while Transfer is active
    while not transfer_client.task_wait(
            res["task_id"], timeout=interval, polling_interval=interval):
        for event in transfer_client.task_event_list(res["task_id"]):
            # Only process error events that have not been presented to the user
            # Events do not have UUIDs, so if there are multiple simultaneous errors
            #   only the last (chronologically) error will be processed
            if event["is_error"] and event["time"] not in error_timestamps:
                error_timestamps.add(event["time"])
                ret_event = deepcopy(event.data)
                # yield value should always have success: bool
                ret_event["success"] = False
                ret_event["finished"] = False
                # User can cancel Transfer with .send(False)
                cont = yield ret_event
                if cont is False:
                    transfer_client.cancel_task(res["task_id"])
                    # Wait until Transfer is no longer active after cancellation
                    while not transfer_client.task_wait(
                            res["task_id"], timeout=1, polling_interval=1):
                        pass
                    break
            # If progress has been made, move deadline forward
            elif event["code"] == "PROGRESS":
                new_deadline = datetime.utcfromtimestamp(
                    int(time.time()) + inactivity_time)
                new_doc = {"DATA_TYPE": "task", "deadline": str(new_deadline)}
                transfer_client.update_task(res["task_id"], new_doc)
    # Transfer is no longer active; now check if succeeded
    task = transfer_client.get_task(res["task_id"]).data
    task["success"] = (task["status"] == "SUCCEEDED")
    task["finished"] = True
    yield task
Beispiel #4
0
def globus_check_directory(transfer_client,
                           endpoint,
                           path,
                           allow_missing=False):
    """Check if a path on a Globus Endpoint is a directory or file.

    Arguments:
        transfer_client (TransferClient): An authenticated Transfer client.
        endpoint (str): The Endpoint ID.
        path (str): The path on the Endpoint to check.
        allow_missing (bool): When ``True``, the path not being found is not an error.
                When ``False``, the path must exist for the check to succeed.
                **Default**: ``False``.

    Returns:
        dict: Results of the check.
            success (bool): ``True`` if the check was able to be performed.
            error (str): The error encountered, if any.
            exists (bool): ``True`` iff the path exists on the endpoint.
                    If ``allow_missing`` is ``False``, ``exists`` being ``False`` is an error.
            is_dir (bool): ``True`` iff the path is confirmed to lead to a directory.
            is_file (bool): ``True`` iff the path is confirmed to lead to a file.

    Note: ``is_dir`` and ``is_file`` will both be ``False`` if ``allow_missing`` is ``True``
            and ``exists`` is ``False``.
    """
    # is_dir has three states:
    #   True (dir confirmed), implies exists is True
    #   False (file confirmed), implies exists is True
    #   None (no information)
    is_dir = None
    # exists can be:
    #   True (exists, type unknown), also implied if is_dir is not None
    #   False (confirmed missing)
    #   None (no information)
    exists = None
    # error can either be None (no error) or a string (error)
    # The presence of an error implies success is False
    error = None

    # Try operation_ls on the path, which gives actionable info about the path
    try:
        transfer_client.operation_ls(endpoint, path=path)
        is_dir = True
    except globus_sdk.TransferAPIError as e:
        # If error indicates path exists but is not dir, is not dir
        if e.code == "ExternalError.DirListingFailed.NotDirectory":
            is_dir = False
        # Too many files in dir indicates is dir
        elif e.code == "ExternalError.DirListingFailed.SizeLimit":
            is_dir = True
        # Not found must be logged
        elif e.code == "ClientError.NotFound":
            exists = False
        # Else, retry on parent dir (some other error occurred)
        else:
            try:
                parent, item_name = os.path.split(path)
                parent_ls = transfer_client.operation_ls(parent, path=parent)
                type_list = [
                    x["type"] for x in parent_ls["DATA"]
                    if x["name"] == item_name
                ]
                # If item_name not found in list, other error occurred on missing path
                # Odd, but still a missing path
                if len(type_list) < 1:
                    exists = False
                # If multiple hits, panic (shouldn't occur, but...)
                # Technically possible in GDrive connector?
                elif len(type_list) > 1:
                    raise globus_sdk.GlobusError(
                        "Multiple items with name '{}' in path '{}'"
                        "on endpoint '{}'".format(item_name, parent, endpoint))
                else:
                    # Otherwise we have exactly one hit - the correct node
                    item_type = type_list[0]
                    if item_type == "dir":
                        is_dir = True
                    elif item_type == "file":
                        is_dir = False
                    # If not file or dir, but does exist, log an error
                    else:
                        exists = True
                        error = (
                            "Path '{}' leads to a '{}', not a file or directory"
                            .format(path, item_type))
            except globus_sdk.TransferAPIError as e:
                # Size limit means we can't figure out this path
                if e.code == "ExternalError.DirListingFailed.SizeLimit":
                    error = (
                        "Unable to check type of path '{}': Parent directory too large"
                        .format(path))
                # Not found must be logged (not sure this branch is possible)
                elif e.code == "ClientError.NotFound":
                    exists = False
                # Can't handle other error on parent
                else:
                    error = str(e)

    # If path must exist but doesn't, set error
    if exists is False and allow_missing is False:
        error = "Path '{}' not found on endpoint '{}'".format(path, endpoint)

    return {
        "success": (error is None),
        "error": error,
        "exists": (exists or is_dir is not None),
        "is_dir": (is_dir is True),
        "is_file": (is_dir is False)
    }