Ejemplo n.º 1
0
def test_load_accepts_string_or_iterable_requested_scopes(
        mem_storage, mock_tokens):
    cli = NativeClient(client_id=str(uuid4()), token_storage=mem_storage)
    mem_storage.tokens = mock_tokens
    scopes = ('openid profile email custom_scope '
              'urn:globus:auth:scope:transfer.api.globus.org:all')
    tokens = cli.load_tokens(scopes)
    assert len(tokens) == 3
    authorizers = cli.get_authorizers(scopes)
    assert len(authorizers) == 3
Ejemplo n.º 2
0
def test_client_get_authorizers(mock_tokens, mock_refresh_token_authorizer,
                                mem_storage):
    mock_tokens['resource.server.org']['refresh_token'] = '<Refresh Token>'
    mem_storage.tokens = mock_tokens
    cli = NativeClient(client_id=str(uuid4()), token_storage=mem_storage)
    for rs, authorizer in cli.get_authorizers().items():
        if rs == 'resource.server.org':
            assert isinstance(authorizer, globus_sdk.RefreshTokenAuthorizer)
        else:
            assert isinstance(authorizer, globus_sdk.AccessTokenAuthorizer)
Ejemplo n.º 3
0
 def __init__(self):
     # initialize a transfer_client if it has not been done already
     if Globus.transfer_client == None:
         native_client = NativeClient(
             client_id=settings.globus.get("client_id"),
             app_name="Globus Endpoint Performance Dashboard",
             default_scopes=settings.globus.get("scopes"))
         native_client.login(no_local_server=True, refresh_tokens=True)
         transfer_authorizer = native_client.get_authorizers().get(
             "transfer.api.globus.org")
         Globus.transfer_client = TransferClient(transfer_authorizer)
Ejemplo n.º 4
0
def test_authorizer_refresh_hook(mock_tokens, mock_refresh_token_authorizer,
                                 mem_storage):
    mock_tokens['resource.server.org']['refresh_token'] = '<Refresh Token>'
    mem_storage.tokens = mock_tokens
    cli = NativeClient(client_id=str(uuid4()), token_storage=mem_storage)
    rs_auth = cli.get_authorizers()['resource.server.org']
    rs_auth.expires_at = 0
    rs_auth.check_expiration_time()

    tokens = cli.load_tokens()
    assert 'example.on.refresh.success' in tokens.keys()
Ejemplo n.º 5
0
    def preactivate_globus(self):
        """
        Read the local globus endpoint UUID from ~/.zstash.ini.
        If the ini file does not exist, create an ini file with empty values,
        and try to find the local endpoint UUID based on the FQDN
        """
        local_endpoint = None
        ini_path = os.path.expanduser("~/.zstash.ini")
        ini = configparser.ConfigParser()
        if ini.read(ini_path):
            if "local" in ini.sections():
                local_endpoint = ini["local"].get("globus_endpoint_uuid")
        else:
            ini["local"] = {"globus_endpoint_uuid": ""}
            try:
                with open(ini_path, "w") as f:
                    ini.write(f)
            except Exception as e:
                self.fail(e)
        if not local_endpoint:
            fqdn = socket.getfqdn()
            for pattern in regex_endpoint_map.keys():
                if re.fullmatch(pattern, fqdn):
                    local_endpoint = regex_endpoint_map.get(pattern)
                    break
        if not local_endpoint:
            # self.fail("{} does not have the local Globus endpoint set".format(ini_path))
            self.skipTest(
                "{} does not have the local Globus endpoint set".format(
                    ini_path))

        native_client = NativeClient(
            client_id="6c1629cf-446c-49e7-af95-323c6412397f",
            app_name="Zstash",
            default_scopes=
            "openid urn:globus:auth:scope:transfer.api.globus.org:all",
        )
        native_client.login(no_local_server=True, refresh_tokens=True)
        transfer_authorizer = native_client.get_authorizers().get(
            "transfer.api.globus.org")
        self.transfer_client = TransferClient(transfer_authorizer)

        for ep_id in [hpss_globus_endpoint, local_endpoint]:
            r = self.transfer_client.endpoint_autoactivate(ep_id,
                                                           if_expires_in=600)
            if r.get("code") == "AutoActivationFailed":
                self.fail(
                    "The {} endpoint is not activated or the current activation expires soon. Please go to https://app.globus.org/file-manager/collections/{} and (re)-activate the endpoint."
                    .format(ep_id, ep_id))
Ejemplo n.º 6
0
def globus_transfer(  # noqa: C901
        remote_endpoint,
        remote_path,
        name,
        transfer_type,
        non_blocking=False):
    """
    Read the local globus endpoint UUID from ~/.zstash.ini.
    If the ini file does not exist, create an ini file with empty values,
    and try to find the local endpoint UUID based on the FQDN
    """
    ini_path = os.path.expanduser("~/.zstash.ini")
    ini = configparser.ConfigParser()
    local_endpoint = None
    if ini.read(ini_path):
        if "local" in ini.sections():
            local_endpoint = ini["local"].get("globus_endpoint_uuid")
    else:
        ini["local"] = {"globus_endpoint_uuid": ""}
        try:
            with open(ini_path, "w") as f:
                ini.write(f)
        except Exception as e:
            logger.error(e)
            sys.exit(1)
    if not local_endpoint:
        fqdn = socket.getfqdn()
        for pattern in regex_endpoint_map.keys():
            if re.fullmatch(pattern, fqdn):
                local_endpoint = regex_endpoint_map.get(pattern)
                break
    if not local_endpoint:
        logger.error(
            "{} does not have the local Globus endpoint set".format(ini_path))
        sys.exit(1)

    if remote_endpoint.upper() in hpss_endpoint_map.keys():
        remote_endpoint = hpss_endpoint_map.get(remote_endpoint.upper())

    if transfer_type == "get":
        src_ep = remote_endpoint
        src_path = os.path.join(remote_path, name)
        dst_ep = local_endpoint
        dst_path = os.path.join(os.getcwd(), name)
    else:
        src_ep = local_endpoint
        src_path = os.path.join(os.getcwd(), name)
        dst_ep = remote_endpoint
        dst_path = os.path.join(remote_path, name)

    subdir = os.path.basename(os.path.normpath(remote_path))
    subdir_label = re.sub("[^A-Za-z0-9_ -]", "", subdir)
    filename = name.split(".")[0]
    label = subdir_label + " " + filename

    native_client = NativeClient(
        client_id="6c1629cf-446c-49e7-af95-323c6412397f",
        app_name="Zstash",
        default_scopes=
        "openid urn:globus:auth:scope:transfer.api.globus.org:all",
    )
    native_client.login(no_local_server=True, refresh_tokens=True)
    transfer_authorizer = native_client.get_authorizers().get(
        "transfer.api.globus.org")
    tc = TransferClient(transfer_authorizer)

    for ep_id in [src_ep, dst_ep]:
        r = tc.endpoint_autoactivate(ep_id, if_expires_in=600)
        if r.get("code") == "AutoActivationFailed":
            logger.error(
                "The {} endpoint is not activated or the current activation expires soon. Please go to https://app.globus.org/file-manager/collections/{} and (re)activate the endpoint."
                .format(ep_id, ep_id))
            sys.exit(1)

    td = TransferData(
        tc,
        src_ep,
        dst_ep,
        label=label,
        sync_level="checksum",
        verify_checksum=True,
        preserve_timestamp=True,
        fail_on_quota_errors=True,
    )
    td.add_item(src_path, dst_path)
    try:
        task = tc.submit_transfer(td)
    except TransferAPIError as e:
        if e.code == "NoCredException":
            logger.error(
                "{}. Please go to https://app.globus.org/endpoints and activate the endpoint."
                .format(e.message))
        else:
            logger.error(e)
        sys.exit(1)
    except Exception as e:
        logger.error("Exception: {}".format(e))
        sys.exit(1)

    if non_blocking:
        return

    try:
        task_id = task.get("task_id")
        """
        A Globus transfer job (task) can be in one of the three states:
        ACTIVE, SUCCEEDED, FAILED. The script every 20 seconds polls a
        status of the transfer job (task) from the Globus Transfer service,
        with 20 second timeout limit. If the task is ACTIVE after time runs
        out 'task_wait' returns False, and True otherwise.
        """
        while not tc.task_wait(task_id, 20, 20):
            pass
        """
        The Globus transfer job (task) has been finished (SUCCEEDED or FAILED).
        Check if the transfer SUCCEEDED or FAILED.
        """
        task = tc.get_task(task_id)
        if task["status"] == "SUCCEEDED":
            logger.info(
                "Globus transfer {}, from {}{} to {}{} succeeded".format(
                    task_id, src_ep, src_path, dst_ep, dst_path))
        else:
            logger.error("Transfer FAILED")
    except TransferAPIError as e:
        if e.code == "NoCredException":
            logger.error(
                "{}. Please go to https://app.globus.org/endpoints and activate the endpoint."
                .format(e.message))
        else:
            logger.error(e)
        sys.exit(1)
    except Exception as e:
        logger.error("Exception: {}".format(e))
        sys.exit(1)
Ejemplo n.º 7
0
def main(args):

    # Obtain Globus tokens
    cli = NativeClient(client_id=client_id, app_name="Data Stager")
    cli.login(no_local_server=True,
              requested_scopes=scopes,
              refresh_tokens=True,
              force=args.login)
    authorizers = cli.get_authorizers()
    if args.login:
        sys.exit(0)

    # Determine source and destination Globus endpoints and directories
    source_endpoint = args.source
    hostname = socket.gethostname()
    if not source_endpoint:
        source_endpoint = None
        for h, ep in hostname_endpoint.items():
            if hostname.startswith(h):
                source_endpoint = ep
                break
    if not source_endpoint:
        logger.error("The source Globus endpoint is required")
        sys.exit(1)

    try:
        destination_endpoint, destination_dir = args.destination.split(":", 1)
    except ValueError:
        logger.error("Globus destination endpoint and path are incorrect")
        sys.exit(1)
    for name, ep in name_endpoint.items():
        if destination_endpoint == name:
            destination_endpoint = ep
            break

    # Try to activate source and destination Globus endpoints
    tc = globus_sdk.TransferClient(
        authorizer=authorizers["transfer.api.globus.org"])
    resp = tc.endpoint_autoactivate(source_endpoint, if_expires_in=36000)
    if resp["code"] == "AutoActivationFailed":
        logger.error(
            "The source endpoint is not active. Please go to https://app.globus.org/file-manager/collections/{} to activate the endpoint."
            .format(source_endpoint))
        sys.exit(1)
    logger.info("The source Globus endpoint has been activated")

    resp = tc.endpoint_autoactivate(destination_endpoint, if_expires_in=36000)
    if resp["code"] == "AutoActivationFailed":
        logger.error(
            "The destination endpoint is not active. Please go to https://app.globus.org/file-manager/collections/{} to activate the endpoint."
            .format(destination_endpoint))
        sys.exit(1)
    logger.info("The destination Globus endpoint has been activated")

    # Load pattern file if provided
    global patterns
    if args.pattern_file:
        with open(args.pattern_file, "r") as f:
            patterns = json.load(f)

    components = []
    if args.component:
        components = args.component.split(",")

    # Data file patterns
    file_patterns = []
    for c in components:
        p = patterns.get(c)
        if isinstance(p, str):
            file_patterns.append(p)
        elif isinstance(p, list):
            file_patterns = file_patterns + p
    file_patterns = file_patterns + args.files
    if not file_patterns:
        file_patterns = ["*"]
    logger.debug("File patterns: {}".format(file_patterns))

    # Restart file patterns
    p = patterns.get("restart")
    if isinstance(p, str):
        restart_patterns = [p]
    elif isinstance(p, list):
        restart_patterns = p
    logger.debug("Restart file patterns: {}".format(restart_patterns))

    # Namelist file patterns
    p = patterns.get("namelist")
    if isinstance(p, str):
        namelist_patterns = [p]
    elif isinstance(p, list):
        namelist_patterns = p
    logger.debug("Namelist file patterns: {}".format(namelist_patterns))

    # Create temporary directory for all zstash files, etc.
    tmp_directory = tempfile.mkdtemp(prefix="stager-", dir=".")
    os.chdir(tmp_directory)

    # Download and open database
    logger.info('Opening index database')
    config.hpss = args.zstash
    hpss_get(config.hpss, DB_FILENAME)
    con = sqlite3.connect(DB_FILENAME, detect_types=sqlite3.PARSE_DECLTYPES)
    cur = con.cursor()

    # Retrieve some configuration settings from database
    for attr in dir(config):
        value = getattr(config, attr)
        if not callable(value) and not attr.startswith("__"):
            cur.execute(u"select value from config where arg=?", (attr, ))
            value = cur.fetchone()[0]
            setattr(config, attr, value)
    config.maxsize = int(config.maxsize)
    config.keep = bool(int(config.keep))

    # The command line arg should always have precedence
    config.keep = True
    if args.zstash is not None:
        config.hpss = args.zstash

    logger.info("Local path: {}".format(config.path))
    logger.info("HPSS path: {}".format(config.hpss))
    logger.info("Max size: {}".format(config.maxsize))

    # Find matching files
    file_matches = []
    for p in file_patterns:
        cur.execute(u"select * from files where name GLOB ? or tar GLOB ?",
                    (p, p))
        file_matches = file_matches + cur.fetchall()

    restart_matches = []
    for p in restart_patterns:
        cur.execute(
            u"select * from files where name GLOB ? or tar GLOB ? limit 1",
            (p, p))
        restart_matches = cur.fetchall()
        if restart_matches:
            break

    namelist_matches = []
    for p in namelist_patterns:
        cur.execute(
            u"select * from files where name GLOB ? or tar GLOB ? limit 1",
            (p, p))
        namelist_matches = cur.fetchall()
        if namelist_matches:
            break

    logger.debug("Matching files: {}".format(file_matches))
    logger.debug("Matching restart file: {}".format(restart_matches))
    logger.debug("Matching namelist file: {}".format(namelist_matches))

    matches = file_matches + restart_matches + namelist_matches

    # Sort by the filename, tape (so the tar archive), and order within tapes (offset).
    matches.sort(key=lambda x: (x[1], x[5], x[6]))
    """
    Based off the filenames, keep only the last instance of a file.
    This is because we may have different versions of the same file across many tars.
    """
    insert_idx, iter_idx = 0, 1
    for iter_idx in range(1, len(matches)):
        # If the filenames are unique, just increment insert_idx.
        # iter_idx will increment after this iteration.
        if matches[insert_idx][1] != matches[iter_idx][1]:
            insert_idx += 1
        # Always copy over the value at the correct location.
        matches[insert_idx] = matches[iter_idx]

    matches = matches[:insert_idx + 1]
    logger.info(
        "{} matching files including restart and namelist files".format(
            len(matches)))

    # Sort by tape and offset, so that we make sure that extract the files by tape order.
    matches.sort(key=lambda x: (x[5], x[6]))

    # Retrieve from tapes
    if args.workers > 1:
        logger.debug("Running zstash with multiprocessing")
        failures = multiprocess_extract(args.workers, matches, True)
    else:
        failures = extractFiles(matches, True)

    # Close database
    logger.debug('Closing index database')
    con.close()

    if failures:
        logger.error("Encountered an error for files:")
        for fail in failures:
            logger.error("{} in {}".format(fail[1], fail[5]))
        broken_tars = sorted(set([f[5] for f in failures]))
        logger.error("The following tar archives had errors:")
        for tar in broken_tars:
            logger.error(tar)
        sys.exit(1)

    # Create a manifest file
    manifest = []
    for m in matches:
        manifest.append({"filename": m[1], "length": m[2], "md5": m[4]})
    if args.m:
        manifest_name = args.m + "-"
    manifest_name += "manifest.json"
    with open(manifest_name, "w+") as f:
        f.write(json.dumps(manifest))

    # Transfer the files downloaded from the zstash archive
    if args.t:
        label = args.t
    else:
        label = "E3SM Data Stager on {}".format(hostname)
    td = globus_sdk.TransferData(tc,
                                 source_endpoint,
                                 destination_endpoint,
                                 label=label)

    cwd = os.getcwd()
    source_path = os.path.join(cwd, manifest_name)
    destination_path = os.path.join(destination_dir, manifest_name)
    td.add_item(source_path, destination_path)
    for m in matches:
        source_path = os.path.join(cwd, m[1])
        destination_path = os.path.join(destination_dir, m[1])
        td.add_item(source_path, destination_path)

    try:
        task = tc.submit_transfer(td)
        task_id = task.get("task_id")
        logger.info("Submitted Globus transfer: {}".format(task_id))
    except Exception as e:
        logger.error("Globus transfer failed due to error: {}".format(e))
        sys.exit(1)

    if not args.block:
        logger.info(
            "You can monitor the status of the transfer at https://app.globus.org/activity/{}"
            .format(task_id))
        sys.exit(0)
    """
    A Globus transfer job (task) can be in one of the three states: ACTIVE, SUCCEEDED, FAILED.
    The Data Stager polls a status of the transfer job (task) from the Globus Transfer service
    every 15 seconds with 60 second timeout limit. If the task is ACTIVE after time runs out,
    'tc.task_wait()' returns False, and True otherwise.
    """
    last_event_time = None
    while not tc.task_wait(task_id, 60, 15):
        task = tc.get_task(task_id)
        # Get the last error Globus event
        events = tc.task_event_list(task_id,
                                    num_results=1,
                                    filter="is_error:1")
        try:
            event = next(events)
        except StopIteration:
            continue
        # Log the error event if it was not yet logged
        if event["time"] != last_event_time:
            last_event_time = event["time"]
            logger.warn(
                "Non-critical Globus Transfer error event: {} at {}".format(
                    event["description"], event["time"]))
            logger.warn("Globus Transfer error details: {}".format(
                event["details"]))
    """
    The Globus transfer job (task) has been terminated (is not ACTIVE). Check if the transfer
    SUCCEEDED or FAILED.
    """
    task = tc.get_task(task_id)
    if task["status"] == "SUCCEEDED":
        logger.info("Globus transfer {} succeeded".format(task_id))
    else:
        logger.error("Globus Transfer task: {}".format(task_id))
        events = tc.task_event_list(task_id,
                                    num_results=1,
                                    filter="is_error:1")
        event = next(events)
        logger.error("Globus transfer {} failed due to error: {}".format(
            task_id, event["details"]))
        sys.exit(1)

    if args.e:
        logger.info("Deleting downloaded zstash archives and extracted files")
        os.chdir("..")
        shutil.rmtree(tmp_directory)