Beispiel #1
0
    def get(self, scope_name):
        """
        ---
        summary: Metalink redirect
        description: Get Metalink redirect.
        tags:
          - Redirect
        parameters:
        - name: scope_name
          in: path
          description: The data identifier (scope)/(name).
          schema:
            type: string
          style: simple
        - name: ip
          in: query
          description: The client ip.
          schema:
            type: string
          style: simple
          required: false
        - name: fqdn
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: site
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: schemes
          in: query
          schema:
            type: array
          style: simple
          required: false
        - name: select
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: sort
          in: query
          schema:
            type: string
          style: simple
          required: false
        responses:
          200:
            description: OK
            content:
              application/metalink4+xml:
                schema:
                  description: The metalink file.
                  type: string
          401:
            description: Invalid Auth Token
          404:
            description: Rse or did not found
          406:
            description: Not acceptable
        """
        headers = self.get_headers()

        try:
            scope, name = parse_scope_name(scope_name,
                                           extract_vo(request.headers))
        except ValueError as error:
            return generate_http_error_flask(400, error, headers=headers)

        # set the correct client IP
        client_ip = request.headers.get('X-Forwarded-For',
                                        default=request.remote_addr)

        client_location = {
            'ip': request.args.get('ip', default=client_ip),
            'fqdn': request.args.get('fqdn', default=None),
            'site': request.args.get('site', default=None),
        }

        dids = [{'scope': scope, 'name': name}]
        schemes = request.args.getlist('schemes') or [
            'http', 'https', 'root', 'gsiftp', 'srm', 'davs'
        ]
        sortby = request.args.get('select', default=None)
        sortby = request.args.get('sort', default=sortby)

        # get vo if given
        vo = extract_vo(request.headers)

        try:
            replicas_iter = list_replicas(dids=dids,
                                          schemes=schemes,
                                          client_location=client_location,
                                          vo=vo)
            try:
                first = next(replicas_iter)
            except StopIteration:
                return 'no redirection possible - cannot find the DID', 404

            def generate():
                # first, set the appropriate content type, and stream the header
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

                # iteratively stream the XML per file
                for rfile in itertools.chain((first, ), replicas_iter):
                    replicas = []
                    dictreplica = {}
                    for rse in rfile['rses']:
                        for replica in rfile['rses'][rse]:
                            replicas.append(replica)
                            dictreplica[replica] = rse

                    # stream metadata
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield f'  <glfn name="/atlas/rucio/{rfile["scope"]}:{rfile["name"]}">'
                    yield '</glfn>\n'

                    replicas = sort_replicas(dictreplica,
                                             client_location,
                                             selection=sortby)

                    # stream URLs
                    idx = 1
                    for replica in replicas:
                        yield '  <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx) + '">' + replica + '</url>\n'
                        idx += 1

                    yield ' </file>\n'

                # don't forget to send the metalink footer
                yield '</metalink>\n'

            return try_stream(generate(),
                              content_type='application/metalink4+xml')
        except (DataIdentifierNotFound, ReplicaNotFound) as error:
            return generate_http_error_flask(404, error, headers=headers)
Beispiel #2
0
    def GET(self, scope, name):
        """
        Metalink redirect

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound
            406 Not Acceptable

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        dids, schemes, select = [{
            'scope': scope,
            'name': name
        }], ['http', 'https', 'root', 'gsiftp', 'srm', 'davs'], None

        # set the correct client IP
        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'sort' in params:
                select = params['sort'][0]

            if 'ip' in params:
                client_location['ip'] = params['ip'][0]
            if 'fqdn' in params:
                client_location['fqdn'] = params['fqdn'][0]
            if 'site' in params:
                client_location['site'] = params['site'][0]

        # get vo if given
        vo = ctx.env.get('HTTP_X_RUCIO_VO', 'def')

        try:
            tmp_replicas = [
                rep for rep in list_replicas(dids=dids,
                                             schemes=schemes,
                                             client_location=client_location,
                                             vo=vo)
            ]

            if not tmp_replicas:
                raise ReplicaNotFound(
                    'no redirection possible - cannot find the DID')

            # first, set the appropriate content type, and stream the header
            header('Content-Type', 'application/metalink4+xml')
            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # iteratively stream the XML per file
            for rfile in tmp_replicas:
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                # stream metadata
                yield ' <file name="' + rfile['name'] + '">\n'
                yield '  <identity>' + rfile['scope'] + ':' + rfile[
                    'name'] + '</identity>\n'

                if rfile['adler32'] is not None:
                    yield '  <hash type="adler32">' + rfile[
                        'adler32'] + '</hash>\n'
                if rfile['md5'] is not None:
                    yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                yield '  <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'],
                                                              rfile['name'])
                yield '</glfn>\n'

                # sort the actual replicas if necessary
                if select == 'geoip':
                    replicas = sort_geoip(dictreplica,
                                          client_location['ip'],
                                          ignore_error=True)
                elif select == 'closeness':
                    replicas = sort_closeness(dictreplica, client_location)
                elif select == 'dynamic':
                    replicas = sort_dynamic(dictreplica, client_location)
                elif select == 'ranking':
                    replicas = sort_ranking(dictreplica, client_location)
                else:
                    replicas = sort_random(dictreplica)

                # stream URLs
                idx = 1
                for replica in replicas:
                    yield '  <url location="' + str(
                        dictreplica[replica]) + '" priority="' + str(
                            idx) + '">' + replica + '</url>\n'
                    idx += 1

                yield ' </file>\n'

            # don't forget to send the metalink footer
            yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except ReplicaNotFound as error:
            raise generate_http_error(404, 'ReplicaNotFound', error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Beispiel #3
0
    def get(self, scope_name):
        """
        ---
        summary: Header redirect
        description: Get the header redirect.
        tags:
          - Redirect
        parameters:
        - name: scope_name
          in: path
          description: The data identifier (scope)/(name).
          schema:
            type: string
          style: simple
        - name: ip
          in: query
          description: The client ip.
          schema:
            type: string
          style: simple
          required: false
        - name: fqdn
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: site
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: schemes
          in: query
          schema:
            type: array
          style: simple
          required: false
        - name: select
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: sort
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: rse
          in: query
          schema:
            type: string
          style: simple
          required: false
        responses:
          303:
            description: OK
            content:
              application/json:
                schema:
                  description: The redirect url.
                  type: string
          401:
            description: Invalid Auth Token
          404:
            description: Rse or did not found
        """
        headers = self.get_headers()

        try:
            scope, name = parse_scope_name(scope_name,
                                           extract_vo(request.headers))
        except ValueError as error:
            return generate_http_error_flask(400, error, headers=headers)

        try:
            client_ip = request.headers.get('X-Forwarded-For',
                                            default=request.remote_addr)

            client_location = {
                'ip': request.args.get('ip', default=client_ip),
                'fqdn': request.args.get('fqdn', default=None),
                'site': request.args.get('site', default=None),
            }
            # use the default HTTP protocols if no scheme is given
            schemes = request.args.getlist('schemes') or [
                'davs', 'https', 's3'
            ]
            sortby = request.args.get('select', default='random')
            sortby = request.args.get('sort', default=sortby)
            rse = request.args.get('rse', default=None)
            site = request.args.get('site', default=None)

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = request.environ.get('REQUEST_URI').split('?')[0]

            headers.set(
                'Link',
                f'<{cleaned_url}/metalink?schemes={",".join(schemes)}&select={sortby}>; rel=describedby; type="application/metalink+xml"'
            )

            # get vo if given
            vo = extract_vo(request.headers)

            replicas = list(
                list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                              schemes=schemes,
                              client_location=client_location,
                              vo=vo))

            selected_url = None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                        else:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace(
                                    'davs://',
                                    'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers

                        elif site:
                            rep = site_selector(dictreplica, site, vo)
                            if rep:
                                selected_url = rep[0]
                            else:
                                return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                        else:
                            rep = sort_replicas(dictreplica,
                                                client_location,
                                                selection=sortby)
                            selected_url = rep[0]

            if selected_url:
                response = redirect(selected_url, code=303)
                response.headers.extend(headers)
                return response

            return 'no redirection possible - file does not exist', 404, headers
        except ReplicaNotFound as error:
            return generate_http_error_flask(404, error, headers=headers)
Beispiel #4
0
    def GET(self, scope, name):
        """
        Redirect download

        HTTP Success:
            303 See Other

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header("Access-Control-Allow-Origin", ctx.env.get("HTTP_ORIGIN"))
        header("Access-Control-Allow-Headers", ctx.env.get("HTTP_ACCESS_CONTROL_REQUEST_HEADERS"))
        header("Access-Control-Allow-Methods", "*")
        header("Access-Control-Allow-Credentials", "true")

        try:
            replicas = [
                r
                for r in list_replicas(dids=[{"scope": scope, "name": name, "type": "FILE"}], schemes=["http", "https"])
            ]

            select = "random"
            rse = None
            site = None
            if ctx.query:
                params = parse_qs(ctx.query[1:])
                if "select" in params:
                    select = params["select"][0]
                if "rse" in params:
                    rse = params["rse"][0]
                if "site" in params:
                    site = params["site"][0]

            for r in replicas:
                if r["rses"]:
                    replicadict = {}
                    if rse:
                        if rse in r["rses"] and r["rses"][rse]:
                            return found(r["rses"][rse][0])
                        return notfound("Sorry, the replica you were looking for was not found.")
                    else:
                        for rep in r["rses"]:
                            for replica in r["rses"][rep]:
                                replicadict[replica] = rep
                        if not replicadict:
                            return notfound("Sorry, the replica you were looking for was not found.")
                        elif site:
                            rep = site_selector(replicadict, site)
                            if rep:
                                return found(rep[0])
                            return notfound("Sorry, the replica you were looking for was not found.")
                        else:
                            client_ip = ctx.get("ip")
                            if select == "geoip":
                                rep = geoIP_order(replicadict, client_ip)
                            else:
                                rep = random_order(replicadict, client_ip)
                            return found(rep[0])

            return notfound("Sorry, the replica you were looking for was not found.")

        except RucioException, e:
            raise generate_http_error(500, e.__class__.__name__, e.args[0][0])
Beispiel #5
0
    def GET(self, scope, name):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = None
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            # first check if client accepts metalink
            if 'application/metalink+xml' in tmp:
                metalink = 3
            # but prefer metalink4 if the client has support for it
            # (clients can put both in their ACCEPT header!)
            if 'application/metalink4+xml' in tmp:
                metalink = 4

        dids, schemes, select, limit = [{
            'scope': scope,
            'name': name
        }], None, None, None
        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = int(params['limit'][0])

        try:
            # first, set the APPropriate content type, and stream the header
            if metalink is None:
                header('Content-Type', 'application/x-json-stream')
            elif metalink == 3:
                header('Content-Type', 'application/metalink+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink version="3.0" xmlns="http://www.metalinker.org/">\n<files>\n'
            elif metalink == 4:
                header('Content-Type', 'application/metalink4+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids, schemes=schemes):
                client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
                if client_ip is None:
                    client_ip = ctx.ip

                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse
                if select == 'geoip':
                    try:
                        replicas = geoIP_order(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = random_order(dictreplica, client_ip)
                if metalink is None:
                    yield dumps(rfile) + '\n'

                elif metalink == 3:
                    idx = 0
                    yield ' <file name="' + rfile['name'] + '">\n'

                    # To help support the FAX transition period, add the glfn to the metalink:
                    # AGIS does not expose specific FAX redirectors per DDM Endpoint, so go through top-level redirector
                    yield '  <glfn name="%s%s">' % (
                        'root://atlas-xrd-eu.cern.ch:1094//atlas/rucio/',
                        '%s:%s' % (rfile['scope'], rfile['name']))
                    yield '</glfn>\n'

                    yield '  <resources>\n'
                    for replica in replicas:
                        yield '   <url type="http" preference="' + str(
                            idx) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield '  </resources>\n </file>\n'

                elif metalink == 4:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    # To help support the FAX transition period, add the glfn to the metalink:
                    # AGIS does not expose specific FAX redirectors per DDM Endpoint, so go through top-level redirector
                    yield '  <glfn name="%s%s">' % (
                        'root://atlas-xrd-eu.cern.ch:1094//atlas/rucio/',
                        '%s:%s' % (rfile['scope'], rfile['name']))
                    yield '</glfn>\n'

                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                if metalink == 3:
                    yield '</files>\n</metalink>\n'
                elif metalink == 4:
                    yield '</metalink>\n'

        except DataIdentifierNotFound, e:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      e.args[0][0])
Beispiel #6
0
    def POST(self):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: A dictionary containing all replicas information, either as JSON stream or metalink4.
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        dids, schemes, select, unavailable, limit = [], None, None, False, None
        ignore_availability, rse_expression, all_states, domain = False, None, False, None
        signature_lifetime, resolve_archives, resolve_parents = None, True, False
        client_location = {}

        json_data = data()
        try:
            params = parse_response(json_data)
            if 'dids' in params:
                dids = params['dids']
            if 'schemes' in params:
                schemes = params['schemes']
            if 'unavailable' in params:
                unavailable = params['unavailable']
                ignore_availability = True
            if 'all_states' in params:
                all_states = params['all_states']
            if 'rse_expression' in params:
                rse_expression = params['rse_expression']
            if 'client_location' in params:
                client_location = params['client_location']
                client_location['ip'] = params['client_location'].get(
                    'ip', client_ip)
            if 'sort' in params:
                select = params['sort']
            if 'domain' in params:
                domain = params['domain']
            if 'resolve_archives' in params:
                resolve_archives = params['resolve_archives']
            if 'resolve_parents' in params:
                resolve_parents = params['resolve_parents']
            if 'signature_lifetime' in params:
                signature_lifetime = params['signature_lifetime']
            else:
                # hardcoded default of 10 minutes if config is not parseable
                signature_lifetime = config_get('credentials',
                                                'signature_lifetime',
                                                raise_exception=False,
                                                default=600)
        except ValueError:
            raise generate_http_error(400, 'ValueError',
                                      'Cannot decode json parameter list')

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = params['limit'][0]
            if 'sort' in params:
                select = params['sort']

        # Resolve all reasonable protocols when doing metalink for maximum access possibilities
        if metalink and schemes is None:
            schemes = SUPPORTED_PROTOCOLS

        try:

            # we need to call list_replicas before starting to reply
            # otherwise the exceptions won't be propagated correctly
            __first = True

            # then, stream the replica information
            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       unavailable=unavailable,
                                       request_id=ctx.env.get('request_id'),
                                       ignore_availability=ignore_availability,
                                       all_states=all_states,
                                       rse_expression=rse_expression,
                                       client_location=client_location,
                                       domain=domain,
                                       signature_lifetime=signature_lifetime,
                                       resolve_archives=resolve_archives,
                                       resolve_parents=resolve_parents,
                                       issuer=ctx.env.get('issuer')):

                # in first round, set the appropriate content type, and stream the header
                if __first:
                    if not metalink:
                        header('Content-Type', 'application/x-json-stream')
                    else:
                        header('Content-Type', 'application/metalink4+xml')
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                    __first = False

                if not metalink:
                    yield dumps(rfile, cls=APIEncoder) + '\n'
                else:

                    replicas = []
                    dictreplica = {}
                    for replica in rfile['pfns'].keys():
                        replicas.append(replica)
                        dictreplica[replica] = (
                            rfile['pfns'][replica]['domain'],
                            rfile['pfns'][replica]['priority'],
                            rfile['pfns'][replica]['rse'],
                            rfile['pfns'][replica]['client_extract'])

                    yield ' <file name="' + rfile['name'] + '">\n'

                    if 'parents' in rfile and rfile['parents']:
                        yield '  <parents>\n'
                        for parent in rfile['parents']:
                            yield '   <did>' + parent + '</did>\n'
                        yield '  </parents>\n'

                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/%s/rucio/%s:%s"></glfn>\n' % (
                        config_get(
                            'policy',
                            'schema',
                            raise_exception=False,
                            default='generic'), rfile['scope'], rfile['name'])

                    # TODO: deprecate this
                    if select == 'geoip':
                        replicas = sort_geoip(dictreplica,
                                              client_location['ip'])
                    elif select == 'closeness':
                        replicas = sort_closeness(dictreplica, client_location)
                    elif select == 'dynamic':
                        replicas = sort_dynamic(dictreplica, client_location)
                    elif select == 'ranking':
                        replicas = sort_ranking(dictreplica, client_location)
                    elif select == 'random':
                        replicas = sort_random(dictreplica)
                    else:
                        replicas = sorted(dictreplica, key=dictreplica.get)

                    idx = 0
                    for replica in replicas:
                        yield '  <url location="' + str(dictreplica[replica][2]) \
                            + '" domain="' + str(dictreplica[replica][0]) \
                            + '" priority="' + str(dictreplica[replica][1]) \
                            + '" client_extract="' + str(dictreplica[replica][3]).lower() \
                            + '">' + escape(replica) + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # ensure complete metalink
            if __first and metalink:
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
            if metalink:
                yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Beispiel #7
0
    def GET(self, scope, name):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        dids, schemes, select, limit = [{
            'scope': scope,
            'name': name
        }], None, None, None
        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = int(params['limit'][0])

        try:
            # first, set the appropriate content type, and stream the header
            if not metalink:
                header('Content-Type', 'application/x-json-stream')
            else:
                header('Content-Type', 'application/metalink4+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids, schemes=schemes):
                client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
                if client_ip is None:
                    client_ip = ctx.ip

                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse
                if select == 'geoip':
                    try:
                        replicas = sort_geoip(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = sort_random(dictreplica)
                if not metalink:
                    yield dumps(rfile) + '\n'
                else:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print format_exc()
            raise InternalError(error)
Beispiel #8
0
    def GET(self, scope, name):
        """
        Header Redirect

        HTTP Success:
            303 See Other

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        try:

            # use the default HTTP protocols if no scheme is given
            select, rse, site, schemes = 'random', None, None, [
                'http', 'https', 's3+rucio'
            ]
            if ctx.query:
                params = parse_qs(ctx.query[1:])
                if 'select' in params:
                    select = params['select'][0]
                if 'rse' in params:
                    rse = params['rse'][0]
                if 'site' in params:
                    site = params['site'][0]
                if 'schemes' in params:
                    schemes = params['schemes'][0]

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, ','.join(schemes), select))
            else:
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, schemes, select))
                schemes = [schemes]  # list_replicas needs a list

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes)
            ]

            selected_url, selected_rse = None, None
            for r in replicas:
                if r['rses']:
                    replicadict = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                            selected_rse = rse
                        else:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                replicadict[replica] = rep

                        if not replicadict:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )

                        elif site:
                            rep = site_selector(replicadict, site)
                            if rep:
                                selected_url = rep[0]
                            else:
                                raise ReplicaNotFound(
                                    'no redirection possible - no valid RSE for HTTP redirection found'
                                )
                        else:
                            client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
                            if client_ip is None:
                                client_ip = ctx.ip
                            if select == 'geoip':
                                rep = geoIP_order(replicadict, client_ip)
                            else:
                                rep = random_order(replicadict, client_ip)

                            selected_url = rep[0]

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                if selected_url == replica:
                                    selected_rse = rep

            if selected_url:
                if selected_url.startswith('s3+rucio://'):
                    connect(selected_rse, selected_url)
                    signed_URLS = get_signed_urls([selected_url],
                                                  rse=selected_rse,
                                                  operation='read')
                    raise seeother(signed_URLS[selected_url])

                raise seeother(selected_url)

            raise ReplicaNotFound(
                'no redirection possible - file does not exist')

        except seeother:
            raise
        except ReplicaNotFound, e:
            raise generate_http_error(404, 'ReplicaNotFound', e.args[0][0])
Beispiel #9
0
    def GET(self, scope, name):
        """
        Metalink redirect

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        dids, schemes, select = [{
            'scope': scope,
            'name': name
        }], ['http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp',
             'srm'], None

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]

        try:
            tmp_replicas = [
                rep for rep in list_replicas(dids=dids, schemes=schemes)
            ]

            if not tmp_replicas:
                raise ReplicaNotFound(
                    'no redirection possible - cannot find the DID')

            # first, set the APPropriate content type, and stream the header
            header('Content-Type', 'application/metalink4+xml')
            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # set the correct client IP
            client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
            if client_ip is None:
                client_ip = ctx.ip

            # iteratively stream the XML per file
            for rfile in tmp_replicas:
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                # sort the actual replicas if necessary
                if select == 'geoip':
                    try:
                        replicas = geoIP_order(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = random_order(dictreplica, client_ip)

                # stream metadata
                yield ' <file name="' + rfile['name'] + '">\n'
                yield '  <identity>' + rfile['scope'] + ':' + rfile[
                    'name'] + '</identity>\n'

                if rfile['adler32'] is not None:
                    yield '  <hash type="adler32">' + rfile[
                        'adler32'] + '</hash>\n'
                if rfile['md5'] is not None:
                    yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                yield '  <glfn name="%s%s">' % (
                    'root://atlas-xrd-eu.cern.ch:1094//atlas/rucio/', '%s:%s' %
                    (rfile['scope'], rfile['name']))
                yield '</glfn>\n'

                # stream URLs
                idx = 1
                for replica in replicas:
                    yield '  <url location="' + str(
                        dictreplica[replica]) + '" priority="' + str(
                            idx) + '">' + replica + '</url>\n'
                    idx += 1

                yield ' </file>\n'

            # don't forget to send the metalink footer
            yield '</metalink>\n'

        except DataIdentifierNotFound, e:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      e.args[0][0])
Beispiel #10
0
    def POST(self):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: A dictionary containing all replicas information, either as JSON stream or metalink4.
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        dids, schemes, select, unavailable, limit = [], None, None, False, None
        ignore_availability, rse_expression, all_states, domain = False, None, False, None
        signature_lifetime, resolve_archives, resolve_parents = None, True, False
        updated_after = None

        client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

        json_data = data()
        try:
            params = parse_response(json_data)
            if 'dids' in params:
                dids = params['dids']
            if 'schemes' in params:
                schemes = params['schemes']
            if 'unavailable' in params:
                unavailable = params['unavailable']
                ignore_availability = True
            if 'all_states' in params:
                all_states = params['all_states']
            if 'rse_expression' in params:
                rse_expression = params['rse_expression']
            if 'client_location' in params:
                client_location.update(params['client_location'])
            if 'sort' in params:
                select = params['sort']
            if 'domain' in params:
                domain = params['domain']
            if 'resolve_archives' in params:
                resolve_archives = params['resolve_archives']
            if 'resolve_parents' in params:
                resolve_parents = params['resolve_parents']

            if 'signature_lifetime' in params:
                signature_lifetime = params['signature_lifetime']
            else:
                # hardcoded default of 10 minutes if config is not parseable
                signature_lifetime = config_get('credentials',
                                                'signature_lifetime',
                                                raise_exception=False,
                                                default=600)

            if 'updated_after' in params:
                if isinstance(params['updated_after'], (int, float)):
                    # convert from epoch time stamp to datetime object
                    updated_after = datetime.utcfromtimestamp(
                        params['updated_after'])
                else:
                    # attempt UTC format '%Y-%m-%dT%H:%M:%S' conversion
                    updated_after = datetime.strptime(params['updated_after'],
                                                      '%Y-%m-%dT%H:%M:%S')

        except ValueError:
            raise generate_http_error(400, 'ValueError',
                                      'Cannot decode json parameter list')

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = params['limit'][0]
            if 'sort' in params:
                select = params['sort']

        # Resolve all reasonable protocols when doing metalink for maximum access possibilities
        if metalink and schemes is None:
            schemes = SUPPORTED_PROTOCOLS

        try:

            # we need to call list_replicas before starting to reply
            # otherwise the exceptions won't be propagated correctly
            __first = True

            header(
                'Content-Type', 'application/metalink4+xml'
                if metalink else 'application/x-json-stream')

            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       unavailable=unavailable,
                                       request_id=ctx.env.get('request_id'),
                                       ignore_availability=ignore_availability,
                                       all_states=all_states,
                                       rse_expression=rse_expression,
                                       client_location=client_location,
                                       domain=domain,
                                       signature_lifetime=signature_lifetime,
                                       resolve_archives=resolve_archives,
                                       resolve_parents=resolve_parents,
                                       updated_after=updated_after,
                                       issuer=ctx.env.get('issuer'),
                                       vo=ctx.env.get('vo')):

                # in first round, set the appropriate content type, and stream the header
                if __first:
                    if metalink:
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                    __first = False

                # ... then, stream the replica information
                if not metalink:
                    yield dumps(rfile, cls=APIEncoder) + '\n'
                else:

                    replicas = []
                    dictreplica = {}
                    for replica in rfile['pfns'].keys():
                        replicas.append(replica)
                        dictreplica[replica] = (
                            rfile['pfns'][replica]['domain'],
                            rfile['pfns'][replica]['priority'],
                            rfile['pfns'][replica]['rse'],
                            rfile['pfns'][replica]['client_extract'])

                    yield ' <file name="' + rfile['name'] + '">\n'

                    if 'parents' in rfile and rfile['parents']:
                        yield '  <parents>\n'
                        for parent in rfile['parents']:
                            yield '   <did>' + parent + '</did>\n'
                        yield '  </parents>\n'

                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/%s/rucio/%s:%s"></glfn>\n' % (
                        config_get(
                            'policy',
                            'schema',
                            raise_exception=False,
                            default='generic'), rfile['scope'], rfile['name'])

                    lanreplicas = [
                        replica for replica, v in dictreplica.items()
                        if v[0] == 'lan'
                    ]
                    # sort lan by priority
                    lanreplicas.sort(key=lambda rep: dictreplica[rep][1])
                    replicas = lanreplicas + sort_replicas(
                        {
                            k: v
                            for k, v in dictreplica.items() if v[0] != 'lan'
                        },
                        client_location,
                        selection=select)

                    idx = 1
                    for replica in replicas:
                        yield '  <url location="' + str(dictreplica[replica][2]) \
                            + '" domain="' + str(dictreplica[replica][0]) \
                            + '" priority="' + str(idx) \
                            + '" client_extract="' + str(dictreplica[replica][3]).lower() \
                            + '">' + escape(replica) + '</url>\n'
                        if limit and limit == idx:
                            break
                        idx += 1
                    yield ' </file>\n'

            if metalink:
                if __first:
                    # ensure complete metalink on success without any content
                    yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                else:
                    # if metalink start was already sent, always send the end
                    yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Beispiel #11
0
    def GET(self, scope, name):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        dids, schemes, select, limit = [{
            'scope': scope,
            'name': name
        }], None, None, None
        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = int(params['limit'][0])

        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

        # Resolve all reasonable protocols when doing metalink for maximum access possibilities
        if metalink and schemes is None:
            schemes = SUPPORTED_PROTOCOLS

        try:

            # we need to call list_replicas before starting to reply
            # otherwise the exceptions won't be propagated correctly
            __first = True

            header(
                'Content-Type', 'application/metalink4+xml'
                if metalink else 'application/x-json-stream')

            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       vo=ctx.env.get('vo')):

                # in first round, set the appropriate content type, and stream the header
                if __first:
                    if metalink:
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                    __first = False

                # ... then, stream the replica information
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                replicas = sort_replicas(dictreplica,
                                         client_location,
                                         selection=select)

                if not metalink:
                    yield dumps(rfile) + '\n'
                else:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + escape(replica) + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            if metalink:
                if __first:
                    # ensure complete metalink on success without any content
                    yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                else:
                    # if metalink start was already sent, always send the end
                    yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Beispiel #12
0
def test_reaper_affect_other_vo_via_run(vo):
    """ MULTI VO (DAEMON): Test that reaper runs on the specified VO(s) and does not reap others"""
    new_vo = __setup_new_vo()
    scope_name, [scope_tst, scope_new] = __setup_scopes_for_vos(vo, new_vo)
    rse_name = rse_name_generator()

    nb_files = 30
    file_size = 200  # 2G
    names = ['lfn' + generate_uuid() for _ in range(nb_files)]
    _, rse_id_tst, _ = __add_test_rse_and_replicas(vo=vo,
                                                   scope=scope_tst,
                                                   rse_name=rse_name,
                                                   names=names,
                                                   file_size=file_size)
    _, rse_id_new, _ = __add_test_rse_and_replicas(vo=new_vo,
                                                   scope=scope_new,
                                                   rse_name=rse_name,
                                                   names=names,
                                                   file_size=file_size)

    rse_api.set_rse_usage(rse=rse_name,
                          source='storage',
                          used=nb_files * file_size,
                          free=1,
                          issuer='root',
                          vo=vo)
    rse_api.set_rse_limits(rse=rse_name,
                           name='MinFreeSpace',
                           value=5 * 200,
                           issuer='root',
                           vo=vo)
    rse_api.set_rse_limits(rse=rse_name,
                           name='MaxBeingDeletedFiles',
                           value=10,
                           issuer='root',
                           vo=vo)

    rse_api.set_rse_usage(rse=rse_name,
                          source='storage',
                          used=nb_files * file_size,
                          free=1,
                          issuer='root',
                          vo=new_vo)
    rse_api.set_rse_limits(rse=rse_name,
                           name='MinFreeSpace',
                           value=5 * 200,
                           issuer='root',
                           vo=new_vo)
    rse_api.set_rse_limits(rse=rse_name,
                           name='MaxBeingDeletedFiles',
                           value=10,
                           issuer='root',
                           vo=new_vo)

    # Check we start of with the expected number of replicas
    assert len(
        list(
            replica_api.list_replicas([{
                'scope': scope_name,
                'name': n
            } for n in names],
                                      rse_expression=rse_name,
                                      vo=vo))) == nb_files
    assert len(
        list(
            replica_api.list_replicas([{
                'scope': scope_name,
                'name': n
            } for n in names],
                                      rse_expression=rse_name,
                                      vo=new_vo))) == nb_files

    # Check we don't affect a second VO that isn't specified
    REGION.invalidate()
    run_reaper(once=True, rses=[rse_name], vos=['new'])
    assert len(
        list(
            replica_api.list_replicas([{
                'scope': scope_name,
                'name': n
            } for n in names],
                                      rse_expression=rse_name,
                                      vo=vo))) == nb_files
    assert len(
        list(
            replica_api.list_replicas([{
                'scope': scope_name,
                'name': n
            } for n in names],
                                      rse_expression=rse_name,
                                      vo=new_vo))) == 25
Beispiel #13
0
    def get(self, scope, name):
        """
        Metalink redirect

        .. :quickref: MetaLinkRedirector; Metalink redirect.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        :resheader Content-Type: application/metalink4+xml'.
        :status 200: OK.
        :status 401: Invalid Auth Token.
        :status 404: RSE Not Found.
        :status 404: DID Not Found.
        :status 500: Internal Error.
        :returns: Metalink file
        """

        dids = [{'scope': scope, 'name': name}]

        # set the correct client IP
        client_ip = request.environ.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = request.remote_addr

        client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

        schemes = request.args.get('schemes', [
            'http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp', 'srm',
            'davs'
        ])
        select = request.args.get('select', None)
        if 'sort' in request.args:
            select = request.args['sort']

        client_location['ip'] = request.args.get('ip', None)
        client_location['fqdn'] = request.args.get('fqdn', None)
        client_location['site'] = request.args.get('site', None)

        try:
            tmp_replicas = [
                rep for rep in list_replicas(dids=dids,
                                             schemes=schemes,
                                             client_location=client_location)
            ]

            if not tmp_replicas:
                return 'no redirection possible - cannot find the DID', 404

            # first, set the appropriate content type, and stream the header
            data = '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # iteratively stream the XML per file
            for rfile in tmp_replicas:
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                # stream metadata
                data += ' <file name="' + rfile['name'] + '">\n'
                data += '  <identity>' + rfile['scope'] + ':' + rfile[
                    'name'] + '</identity>\n'

                if rfile['adler32'] is not None:
                    data += '  <hash type="adler32">' + rfile[
                        'adler32'] + '</hash>\n'
                if rfile['md5'] is not None:
                    data += '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                data += '  <size>' + str(rfile['bytes']) + '</size>\n'

                data += '  <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'],
                                                                rfile['name'])
                data += '</glfn>\n'

                # sort the actual replicas if necessary
                if select == 'geoip':
                    replicas = sort_geoip(dictreplica,
                                          client_location['ip'],
                                          ignore_error=True)
                elif select == 'closeness':
                    replicas = sort_closeness(dictreplica, client_location)
                elif select == 'dynamic':
                    replicas = sort_dynamic(dictreplica, client_location)
                elif select == 'ranking':
                    replicas = sort_ranking(dictreplica, client_location)
                else:
                    replicas = sort_random(dictreplica)

                # stream URLs
                idx = 1
                for replica in replicas:
                    data += '  <url location="' + str(
                        dictreplica[replica]) + '" priority="' + str(
                            idx) + '">' + replica + '</url>\n'
                    idx += 1

                data += ' </file>\n'

            # don't forget to send the metalink footer
            data += '</metalink>\n'
            return Response(data, content_type='application/metalink4+xml')
        except DataIdentifierNotFound as error:
            return generate_http_error_flask(404, 'DataIdentifierNotFound',
                                             error.args[0])
        except ReplicaNotFound as error:
            return generate_http_error_flask(404, 'ReplicaNotFound',
                                             error.args[0])
        except RucioException as error:
            return generate_http_error_flask(500, error.__class__.__name__,
                                             error.args[0])
        except Exception as error:
            print(format_exc())
            return error, 500
Beispiel #14
0
            def generate(request_id, issuer, vo):
                # we need to call list_replicas before starting to reply
                # otherwise the exceptions won't be propagated correctly
                first = metalink

                for rfile in list_replicas(
                        dids=dids,
                        schemes=schemes,
                        unavailable=unavailable,
                        request_id=request_id,
                        ignore_availability=ignore_availability,
                        all_states=all_states,
                        rse_expression=rse_expression,
                        client_location=client_location,
                        domain=domain,
                        signature_lifetime=signature_lifetime,
                        resolve_archives=resolve_archives,
                        resolve_parents=resolve_parents,
                        nrandom=nrandom,
                        updated_after=updated_after,
                        issuer=issuer,
                        vo=vo):

                    # in first round, set the appropriate content type, and stream the header
                    if first and metalink:
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                    first = False

                    if not metalink:
                        yield dumps(rfile, cls=APIEncoder) + '\n'
                    else:
                        replicas = []
                        dictreplica = {}
                        for replica in rfile['pfns'].keys():
                            replicas.append(replica)
                            dictreplica[replica] = (
                                rfile['pfns'][replica]['domain'],
                                rfile['pfns'][replica]['priority'],
                                rfile['pfns'][replica]['rse'],
                                rfile['pfns'][replica]['client_extract'])

                        yield ' <file name="' + rfile['name'] + '">\n'

                        if 'parents' in rfile and rfile['parents']:
                            yield '  <parents>\n'
                            for parent in rfile['parents']:
                                yield '   <did>' + parent + '</did>\n'
                            yield '  </parents>\n'

                        yield '  <identity>' + rfile['scope'] + ':' + rfile[
                            'name'] + '</identity>\n'
                        if rfile['adler32'] is not None:
                            yield '  <hash type="adler32">' + rfile[
                                'adler32'] + '</hash>\n'
                        if rfile['md5'] is not None:
                            yield '  <hash type="md5">' + rfile[
                                'md5'] + '</hash>\n'
                        yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                        policy_schema = config_get('policy',
                                                   'schema',
                                                   raise_exception=False,
                                                   default='generic')
                        yield f'  <glfn name="/{policy_schema}/rucio/{rfile["scope"]}:{rfile["name"]}"></glfn>\n'

                        lanreplicas = [
                            replica for replica, v in dictreplica.items()
                            if v[0] == 'lan'
                        ]
                        # sort lan by priority
                        lanreplicas.sort(key=lambda rep: dictreplica[rep][1])
                        replicas = lanreplicas + sort_replicas(
                            {
                                k: v
                                for k, v in dictreplica.items()
                                if v[0] != 'lan'
                            },
                            client_location,
                            selection=select)

                        for idx, replica in enumerate(replicas, start=1):
                            yield '  <url location="' + str(dictreplica[replica][2]) \
                                + '" domain="' + str(dictreplica[replica][0]) \
                                + '" priority="' + str(idx) \
                                + '" client_extract="' + str(dictreplica[replica][3]).lower() \
                                + '">' + escape(replica) + '</url>\n'
                            if limit and limit == idx:
                                break
                        yield ' </file>\n'

                if metalink:
                    if first:
                        # if still first output, i.e. there were no replicas
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                    else:
                        # don't forget to send the metalink footer
                        yield '</metalink>\n'
Beispiel #15
0
    def get(self, scope_name):
        """
        Header Redirect

        .. :quickref: HeaderRedirector; Header redirect.

        :param scope_name: data identifier (scope)/(name).
        :resheader Content-Type: application/metalink+xml'.
        :status 303: Redirect.
        :status 401: Invalid Auth Token.
        :status 404: RSE Not Found.
        :status 404: DID Not Found.
        :status 500: Internal Error.
        """
        headers = Headers()
        headers.set('Access-Control-Allow-Origin', request.environ.get('HTTP_ORIGIN'))
        headers.set('Access-Control-Allow-Headers', request.environ.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        headers.set('Access-Control-Allow-Methods', '*')
        headers.set('Access-Control-Allow-Credentials', 'true')

        try:
            scope, name = parse_scope_name(scope_name)
        except ValueError as error:
            return generate_http_error_flask(400, 'ValueError', error.args[0], headers=headers)
        except Exception as error:
            print(format_exc())
            return str(error), 500, headers

        try:

            # use the default HTTP protocols if no scheme is given
            select, rse, site, schemes = 'random', None, None, ['davs', 'http', 'https']

            client_ip = request.headers.get('X-Forwarded-For', request.remote_addr)

            client_location = {'ip': client_ip,
                               'fqdn': None,
                               'site': None}

            if request.query_string:
                query_string = request.query_string.decode(encoding='utf-8')
                params = parse_qs(query_string)
                if 'select' in params:
                    select = params['select'][0]
                if 'sort' in params:
                    select = params['sort'][0]
                if 'rse' in params:
                    rse = params['rse'][0]
                if 'site' in params:
                    site = params['site'][0]
                if 'schemes' in params:
                    schemes = params['schemes'][0]
                else:
                    schemes = ['davs', 'https', 's3']

                if 'ip' in params:
                    client_location['ip'] = params['ip'][0]
                if 'fqdn' in params:
                    client_location['fqdn'] = params['fqdn'][0]
                if 'site' in params:
                    client_location['site'] = params['site'][0]

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = request.environ.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                headers.set('Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, ','.join(schemes), select))
            else:
                headers.set('Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, schemes, select))
                schemes = [schemes]  # list_replicas needs a list

            # get vo if given
            vo = request.headers.get('X-Rucio-VO', 'def')

            replicas = [r for r in list_replicas(dids=[{'scope': scope, 'name': name, 'type': 'FILE'}],
                                                 schemes=schemes, client_location=client_location, vo=vo)]

            selected_url = None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                        else:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace('davs://', 'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers

                        elif site:
                            rep = site_selector(dictreplica, site, vo)
                            if rep:
                                selected_url = rep[0]
                            else:
                                return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                        else:
                            if select == 'geoip':
                                rep = sort_geoip(dictreplica, client_location['ip'])
                            elif select == 'closeness':
                                rep = sort_closeness(dictreplica, client_location)
                            elif select == 'dynamic':
                                rep = sort_dynamic(dictreplica, client_location)
                            elif select == 'ranking':
                                rep = sort_ranking(dictreplica, client_location)
                            else:
                                rep = sort_random(dictreplica)

                            selected_url = rep[0]

            if selected_url:
                response = redirect(selected_url, code=303)
                response.headers.extend(headers)
                return response

            return 'no redirection possible - file does not exist', 404, headers
        except ReplicaNotFound as error:
            return generate_http_error_flask(404, 'ReplicaNotFound', error.args[0], headers=headers)
        except RucioException as error:
            return generate_http_error_flask(500, error.__class__.__name__, error.args[0], headers=headers)
        except Exception as error:
            print(format_exc())
            return str(error), 500, headers
Beispiel #16
0
            def generate(request_id, issuer, vo):
                # we need to call list_replicas before starting to reply
                # otherwise the exceptions won't be propagated correctly
                first = metalink

                try:
                    for rfile in list_replicas(dids=dids, schemes=schemes,
                                               unavailable=unavailable,
                                               request_id=request_id,
                                               ignore_availability=ignore_availability,
                                               all_states=all_states,
                                               rse_expression=rse_expression,
                                               client_location=client_location,
                                               domain=domain, signature_lifetime=signature_lifetime,
                                               resolve_archives=resolve_archives,
                                               resolve_parents=resolve_parents,
                                               updated_after=updated_after,
                                               issuer=issuer,
                                               vo=vo):

                        # in first round, set the appropriate content type, and stream the header
                        if first and metalink:
                            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                        first = False

                        if not metalink:
                            yield dumps(rfile, cls=APIEncoder) + '\n'
                        else:
                            replicas = []
                            dictreplica = {}
                            for replica in rfile['pfns'].keys():
                                replicas.append(replica)
                                dictreplica[replica] = (rfile['pfns'][replica]['domain'],
                                                        rfile['pfns'][replica]['priority'],
                                                        rfile['pfns'][replica]['rse'],
                                                        rfile['pfns'][replica]['client_extract'])

                            yield ' <file name="' + rfile['name'] + '">\n'

                            if 'parents' in rfile and rfile['parents']:
                                yield '  <parents>\n'
                                for parent in rfile['parents']:
                                    yield '   <did>' + parent + '</did>\n'
                                yield '  </parents>\n'

                            yield '  <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n'
                            if rfile['adler32'] is not None:
                                yield '  <hash type="adler32">' + rfile['adler32'] + '</hash>\n'
                            if rfile['md5'] is not None:
                                yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                            yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                            yield '  <glfn name="/%s/rucio/%s:%s"></glfn>\n' % (config_get('policy', 'schema',
                                                                                           raise_exception=False,
                                                                                           default='generic'),
                                                                                rfile['scope'],
                                                                                rfile['name'])

                            # TODO: deprecate this
                            if select == 'geoip':
                                replicas = sort_geoip(dictreplica, client_location['ip'])
                            elif select == 'closeness':
                                replicas = sort_closeness(dictreplica, client_location)
                            elif select == 'dynamic':
                                replicas = sort_dynamic(dictreplica, client_location)
                            elif select == 'ranking':
                                replicas = sort_ranking(dictreplica, client_location)
                            elif select == 'random':
                                replicas = sort_random(dictreplica)
                            else:
                                replicas = sorted(dictreplica, key=dictreplica.get)

                            idx = 0
                            for replica in replicas:
                                yield '  <url location="' + str(dictreplica[replica][2]) \
                                    + '" domain="' + str(dictreplica[replica][0]) \
                                    + '" priority="' + str(dictreplica[replica][1]) \
                                    + '" client_extract="' + str(dictreplica[replica][3]).lower() \
                                    + '">' + escape(replica) + '</url>\n'
                                idx += 1
                                if limit and limit == idx:
                                    break
                            yield ' </file>\n'

                    if metalink and first:
                        # if still first output, i.e. there were no replicas
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                finally:
                    # don't forget to send the metalink footer
                    if metalink and not first:
                        yield '</metalink>\n'
Beispiel #17
0
    def get(self, scope_name):
        """
        Metalink redirect

        .. :quickref: MetaLinkRedirector; Metalink redirect.

        :param scope_name: data identifier (scope)/(name).
        :resheader Content-Type: application/metalink4+xml'.
        :status 200: OK.
        :status 401: Invalid Auth Token.
        :status 404: RSE Not Found.
        :status 404: DID Not Found.
        :status 406: Not Acceptable.
        :status 500: Internal Error.
        :returns: Metalink file
        """
        headers = Headers()
        headers.set('Access-Control-Allow-Origin', request.environ.get('HTTP_ORIGIN'))
        headers.set('Access-Control-Allow-Headers', request.environ.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        headers.set('Access-Control-Allow-Methods', '*')
        headers.set('Access-Control-Allow-Credentials', 'true')

        try:
            scope, name = parse_scope_name(scope_name)
        except ValueError as error:
            return generate_http_error_flask(400, 'ValueError', error.args[0], headers=headers)
        except Exception as error:
            print(format_exc())
            return str(error), 500, headers

        dids, schemes, select = [{'scope': scope, 'name': name}], ['http', 'https', 'root', 'gsiftp', 'srm', 'davs'], None

        # set the correct client IP
        client_ip = request.headers.get('X-Forwarded-For', request.remote_addr)

        client_location = {'ip': client_ip,
                           'fqdn': None,
                           'site': None}

        if request.query_string:
            query_string = request.query_string.decode(encoding='utf-8')
            params = parse_qs(query_string)
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'sort' in params:
                select = params['sort'][0]

            if 'ip' in params:
                client_location['ip'] = params['ip'][0]
            if 'fqdn' in params:
                client_location['fqdn'] = params['fqdn'][0]
            if 'site' in params:
                client_location['site'] = params['site'][0]

        # get vo if given
        vo = request.headers.get('X-Rucio-VO', 'def')

        try:
            replicas_iter = list_replicas(dids=dids, schemes=schemes, client_location=client_location, vo=vo)
            try:
                first = next(replicas_iter)
            except StopIteration:
                return 'no redirection possible - cannot find the DID', 404

            def generate():
                # first, set the appropriate content type, and stream the header
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

                try:
                    # iteratively stream the XML per file
                    for rfile in itertools.chain((first, ), replicas_iter):
                        replicas = []
                        dictreplica = {}
                        for rse in rfile['rses']:
                            for replica in rfile['rses'][rse]:
                                replicas.append(replica)
                                dictreplica[replica] = rse

                        # stream metadata
                        yield ' <file name="' + rfile['name'] + '">\n'
                        yield '  <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n'

                        if rfile['adler32'] is not None:
                            yield '  <hash type="adler32">' + rfile['adler32'] + '</hash>\n'
                        if rfile['md5'] is not None:
                            yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                        yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                        yield '  <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name'])
                        yield '</glfn>\n'

                        # sort the actual replicas if necessary
                        if select == 'geoip':
                            replicas = sort_geoip(dictreplica, client_location['ip'], ignore_error=True)
                        elif select == 'closeness':
                            replicas = sort_closeness(dictreplica, client_location)
                        elif select == 'dynamic':
                            replicas = sort_dynamic(dictreplica, client_location)
                        elif select == 'ranking':
                            replicas = sort_ranking(dictreplica, client_location)
                        else:
                            replicas = sort_random(dictreplica)

                        # stream URLs
                        idx = 1
                        for replica in replicas:
                            yield '  <url location="' + str(dictreplica[replica]) + '" priority="' + str(idx) + '">' + replica + '</url>\n'
                            idx += 1

                        yield ' </file>\n'

                finally:
                    # don't forget to send the metalink footer
                    yield '</metalink>\n'

            return try_stream(generate(), content_type='application/metalink4+xml')
        except DataIdentifierNotFound as error:
            return generate_http_error_flask(404, 'DataIdentifierNotFound', error.args[0], headers=headers)
        except ReplicaNotFound as error:
            return generate_http_error_flask(404, 'ReplicaNotFound', error.args[0], headers=headers)
        except RucioException as error:
            return generate_http_error_flask(500, error.__class__.__name__, error.args[0], headers=headers)
        except Exception as error:
            print(format_exc())
            return str(error), 500, headers
Beispiel #18
0
    def make_replicas_available(self):
        """
        Marks available replicas for the dataset at rse if they are in PhEDEx
        """

        with monitor.record_timer_block('cms_sync.time_recover_replica'):
            logging.info('Recovering unavailable replicas for %s:%s at %s',
                         self.scope, self.block_name, self.rse)

            replicas = list(
                list_replicas(dids=[{
                    'scope': self.scope,
                    'name': self.block_name
                }],
                              rse_expression='rse=%s' % self.rse,
                              all_states=True))
            logging.info('Recovery: Rucio replicas %s', len(replicas))
            ewv_rucio_repl = {repl['name'] for repl in replicas}

            import pprint
            logging.info(pprint.pformat(ewv_rucio_repl))

            try:
                unavailable_replicas = {
                    repl['name']
                    for repl in replicas
                    if repl['states'][self.rse] != 'AVAILABLE'
                }
            except TypeError:
                logging.warn(
                    'Got a type error, setting unavailable replicas to null')
                unavailable_replicas = set()
            logging.info('Recovery: Unavailable replicas %s',
                         len(unavailable_replicas))
            phedex_replicas = set(self.replicas.keys())
            logging.info('Recovery: PhEDEx replicas %s', len(phedex_replicas))

            logging.info('Recovery: PhEDEx %s',
                         pprint.pformat(phedex_replicas))
            logging.info('Recovery: Unavailable %s',
                         pprint.pformat(unavailable_replicas))

            missing = list(phedex_replicas & unavailable_replicas)
            logging.info('Recovery: Missing replicas %s', len(missing))

            logging.info(
                'Recovery for %s:%s at %s: PhEDEx has %s, Rucio unavailable %s. Missing: %s ',
                self.scope, self.block_name, self.rse, len(phedex_replicas),
                len(unavailable_replicas), len(missing))

            # Fix up things which are unavailable
            rse_details = get_rse(self.rse)
            rse_id = rse_details['id']
            scope = InternalScope(self.scope)
            state = 'A'

            for name in missing:
                logging.info('Setting available %s:%s at %s', self.scope, name,
                             self.rse)
                core_update_state(rse_id=rse_id,
                                  scope=scope,
                                  name=name,
                                  state=state)

            monitor.record_counter('cms_sync.files_made_available',
                                   delta=len(missing))

        return
Beispiel #19
0
    def POST(self):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        dids, schemes, select, unavailable, limit = [], None, None, False, None
        ignore_availability, rse_expression, all_states, domain = False, None, False, None
        client_location = {}

        json_data = data()
        try:
            params = parse_response(json_data)
            if 'dids' in params:
                dids = params['dids']
            if 'schemes' in params:
                schemes = params['schemes']
            if 'unavailable' in params:
                unavailable = params['unavailable']
                ignore_availability = True
            if 'all_states' in params:
                all_states = params['all_states']
            if 'rse_expression' in params:
                rse_expression = params['rse_expression']
            if 'client_location' in params:
                client_location = params['client_location']
                client_location['ip'] = params['client_location'].get(
                    'ip', client_ip)
            if 'sort' in params:
                select = params['sort']
            if 'domain' in params:
                domain = params['domain']
        except ValueError:
            raise generate_http_error(400, 'ValueError',
                                      'Cannot decode json parameter list')

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = params['limit'][0]
            if 'sort' in params:
                select = params['sort']

        try:
            # first, set the appropriate content type, and stream the header
            if not metalink:
                header('Content-Type', 'application/x-json-stream')
            else:
                header('Content-Type', 'application/metalink4+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       unavailable=unavailable,
                                       request_id=ctx.env.get('request_id'),
                                       ignore_availability=ignore_availability,
                                       all_states=all_states,
                                       rse_expression=rse_expression,
                                       client_location=client_location,
                                       domain=domain,
                                       issuer=ctx.env.get('issuer')):
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                if not metalink:
                    yield dumps(rfile, cls=APIEncoder) + '\n'
                else:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    if select == 'geoip':
                        replicas = sort_geoip(dictreplica,
                                              client_location['ip'])
                    elif select == 'closeness':
                        replicas = sort_closeness(dictreplica, client_location)
                    elif select == 'dynamic':
                        replicas = sort_dynamic(dictreplica, client_location)
                    elif select == 'ranking':
                        replicas = sort_ranking(dictreplica, client_location)
                    else:
                        replicas = sort_random(dictreplica)

                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print format_exc()
            raise InternalError(error)
Beispiel #20
0
    def test_api_replica(self):
        """ REPLICA (API): Test external representation of replicas """

        did = did_name_generator('file')
        did_parent = did_name_generator('dataset')
        pfn = 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (
            self.scope_name, generate_uuid())
        add_replicas(self.rse2_name,
                     files=[{
                         'scope': self.scope_name,
                         'name': did,
                         'bytes': 100,
                         'pfn': pfn
                     }],
                     issuer='root',
                     **self.vo)

        add_did(self.scope_name,
                did_parent,
                'dataset',
                issuer='root',
                account=self.account_name,
                **self.vo)
        attachment = {
            'scope': self.scope_name,
            'name': did_parent,
            'dids': [{
                'scope': self.scope_name,
                'name': did
            }]
        }
        attach_dids_to_dids([attachment], issuer='root', **self.vo)

        out = get_did_from_pfns([pfn], self.rse2_name, **self.vo)
        out = list(out)
        assert 0 != len(out)
        did_found = False
        for p in out:
            for key in p:
                if p[key]['name'] == did:
                    did_found = True
                    assert self.scope_name == p[key]['scope']
        assert did_found

        out = list_replicas(dids=[{
            'scope': self.scope_name,
            'name': did
        }],
                            resolve_parents=True,
                            **self.vo)
        out = list(out)
        assert 0 != len(out)
        parents_found = False
        for rep in out:
            assert rep['scope'] == self.scope_name
            if 'parents' in rep:
                parents_found = True
                for parent in rep['parents']:
                    assert self.scope_name in parent
                    if self.multi_vo:
                        assert self.scope.internal not in parent
        assert parents_found
Beispiel #21
0
    def get(self, scope, name):
        """
        Header Redirect

        .. :quickref: HeaderRedirector; Header redirect.

        :param scope: The scope name of the file.
        :param name: The name of the file.
        :resheader Content-Type: application/metalink+xml'.
        :status 303: Redirect.
        :status 401: Invalid Auth Token.
        :status 404: RSE Not Found.
        :status 404: DID Not Found.
        :status 500: Internal Error.
        """

        headers = {}
        try:

            # use the default HTTP protocols if no scheme is given

            client_ip = request.environ.get('HTTP_X_FORWARDED_FOR')
            if client_ip is None:
                client_ip = request.remote_addr

            client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

            schemes = request.args.get('schemes', ['davs', 'https', 's3'])
            select = request.args.get('select', 'random')
            if 'sort' in request.args:
                select = request.args['sort']
            rse = request.args.get('rse', None)
            site = request.args.get('site', None)

            client_location['ip'] = request.args.get('ip', client_ip)
            client_location['fqdn'] = request.args.get('fqdn', None)
            client_location['site'] = request.args.get('site', None)

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = request.environ.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                headers[
                    'Link'] = '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (
                        cleaned_url, ','.join(schemes), select)
            else:
                headers[
                    'Link'] = '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (
                        cleaned_url, schemes, select)
                schemes = [schemes]  # list_replicas needs a list

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes,
                                         client_location=client_location)
            ]

            selected_url, selected_rse = None, None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                            selected_rse = rse
                        else:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace(
                                    'davs://',
                                    'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404

                        elif site:
                            rep = site_selector(dictreplica, site)
                            if rep:
                                selected_url = rep[0]
                            else:
                                return 'no redirection possible - no valid RSE for HTTP redirection found', 404
                        else:
                            if select == 'geoip':
                                rep = sort_geoip(dictreplica,
                                                 client_location['ip'])
                            elif select == 'closeness':
                                rep = sort_closeness(dictreplica,
                                                     client_location)
                            elif select == 'dynamic':
                                rep = sort_dynamic(dictreplica,
                                                   client_location)
                            elif select == 'ranking':
                                rep = sort_ranking(dictreplica,
                                                   client_location)
                            else:
                                rep = sort_random(dictreplica)

                            selected_url = rep[0]

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                if selected_url == replica:
                                    selected_rse = rep

            if selected_url:
                if selected_url.startswith('s3+rucio://'):
                    connect(selected_rse, selected_url)
                    signed_URLS = get_signed_urls([selected_url],
                                                  rse=selected_rse,
                                                  operation='read')
                    res = redirect(signed_URLS[selected_url], code=303)
                    res.header = headers
                    return res

                res = redirect(signed_URLS[selected_url], code=303)
                res.header = headers
                return res

            return 'no redirection possible - file does not exist', 404

        except ReplicaNotFound, e:
            return generate_http_error_flask(404, 'ReplicaNotFound',
                                             e.args[0][0])
Beispiel #22
0
    def GET(self, scope, name):
        """
        Header Redirect

        HTTP Success:
            303 See Other

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        try:

            # use the default HTTP protocols if no scheme is given
            select, rse, site, schemes = 'random', None, None, [
                'davs', 'http', 'https'
            ]

            client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
            if client_ip is None:
                client_ip = ctx.ip

            client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

            if ctx.query:
                params = parse_qs(ctx.query[1:])
                if 'select' in params:
                    select = params['select'][0]
                if 'sort' in params:
                    select = params['sort'][0]
                if 'rse' in params:
                    rse = params['rse'][0]
                if 'site' in params:
                    site = params['site'][0]
                if 'schemes' in params:
                    schemes = params['schemes'][0]
                else:
                    schemes = ['davs', 'https', 's3']

                if 'ip' in params:
                    client_location['ip'] = params['ip'][0]
                if 'fqdn' in params:
                    client_location['fqdn'] = params['fqdn'][0]
                if 'site' in params:
                    client_location['site'] = params['site'][0]

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, ','.join(schemes), select))
            else:
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, schemes, select))
                schemes = [schemes]  # list_replicas needs a list

            # get vo if given
            vo = ctx.env.get('HTTP_X_RUCIO_VO', 'def')

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes,
                                         client_location=client_location,
                                         vo=vo)
            ]

            selected_url = None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                        else:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace(
                                    'davs://',
                                    'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )

                        elif site:
                            rep = site_selector(dictreplica, site, vo)
                            if rep:
                                selected_url = rep[0]
                            else:
                                raise ReplicaNotFound(
                                    'no redirection possible - no valid RSE for HTTP redirection found'
                                )
                        else:
                            if select == 'geoip':
                                rep = sort_geoip(dictreplica,
                                                 client_location['ip'])
                            elif select == 'closeness':
                                rep = sort_closeness(dictreplica,
                                                     client_location)
                            elif select == 'dynamic':
                                rep = sort_dynamic(dictreplica,
                                                   client_location)
                            elif select == 'ranking':
                                rep = sort_ranking(dictreplica,
                                                   client_location)
                            else:
                                rep = sort_random(dictreplica)

                            selected_url = rep[0]

            if selected_url:
                raise seeother(selected_url)

            raise ReplicaNotFound(
                'no redirection possible - file does not exist')

        except seeother:
            raise
        except ReplicaNotFound as error:
            raise generate_http_error(404, 'ReplicaNotFound', error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Beispiel #23
0
    def GET(self, scope, name):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = None
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            # first check if client accepts metalink
            if 'application/metalink+xml' in tmp:
                metalink = 3
            # but prefer metalink4 if the client has support for it
            # (clients can put both in their ACCEPT header!)
            if 'application/metalink4+xml' in tmp:
                metalink = 4

        dids, schemes, select, limit = [{'scope': scope, 'name': name}], None, None, None
        if ctx.query:
            try:
                params = loads(unquote(ctx.query[1:]))
                if 'schemes' in params:
                    schemes = params['schemes']
            except ValueError:
                params = parse_qs(ctx.query[1:])
                if 'select' in params:
                    select = params['select'][0]
                if 'limit' in params:
                    limit = int(params['limit'][0])

        try:
            # first, set the appropriate content type, and stream the header
            if metalink is None:
                header('Content-Type', 'application/x-json-stream')
            elif metalink == 3:
                header('Content-Type', 'application/metalink+xml')
                schemes = ['http', 'https']
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink version="3.0" xmlns="http://www.metalinker.org/">\n<files>\n'
            elif metalink == 4:
                header('Content-Type', 'application/metalink4+xml')
                schemes = ['http', 'https']
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids, schemes=schemes):
                client_ip = ctx.get('ip')
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse
                if select == 'geoip':
                    try:
                        replicas = geoIP_order(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = random_order(dictreplica, client_ip)
                if metalink is None:
                    yield dumps(rfile) + '\n'
                elif metalink == 3:
                    idx = 0
                    yield ' <file name="' + rfile['name'] + '">\n  <resources>\n'
                    for replica in replicas:
                        yield '   <url type="http" preference="' + str(idx) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield '  </resources>\n </file>\n'
                elif metalink == 4:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile['adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'
                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(dictreplica[replica]) + '" priority="' + str(idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                if metalink == 3:
                    yield '</files>\n</metalink>\n'
                elif metalink == 4:
                    yield '</metalink>\n'

        except DataIdentifierNotFound, e:
            raise generate_http_error(404, 'DataIdentifierNotFound', e.args[0][0])
Beispiel #24
0
    def POST(self):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = None
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            # first check if client accepts metalink
            if 'application/metalink+xml' in tmp:
                metalink = 3
            # but prefer metalink4 if the client has support for it
            # (clients can put both in their ACCEPT header!)
            if 'application/metalink4+xml' in tmp:
                metalink = 4

        dids, schemes, select, unavailable, limit = [], None, None, False, None
        ignore_availability, rse_expression, all_states = False, None, False
        json_data = data()
        try:
            params = parse_response(json_data)
            if 'dids' in params:
                dids = params['dids']
            if 'schemes' in params:
                schemes = params['schemes']
            if 'unavailable' in params:
                unavailable = params['unavailable']
                ignore_availability = True
            if 'all_states' in params:
                all_states = params['all_states']
            if 'rse_expression' in params:
                rse_expression = params['rse_expression']

        except ValueError:
            raise generate_http_error(400, 'ValueError',
                                      'Cannot decode json parameter list')

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = params['limit'][0]

        try:
            # first, set the APPropriate content type, and stream the header
            if metalink is None:
                header('Content-Type', 'application/x-json-stream')
            elif metalink == 3:
                header('Content-Type', 'application/metalink+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink version="3.0" xmlns="http://www.metalinker.org/">\n<files>\n'
            elif metalink == 4:
                header('Content-Type', 'application/metalink4+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       unavailable=unavailable,
                                       request_id=ctx.env.get('request_id'),
                                       ignore_availability=ignore_availability,
                                       all_states=all_states,
                                       rse_expression=rse_expression):
                client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
                if client_ip is None:
                    client_ip = ctx.ip
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse
                if select == 'geoip':
                    replicas = geoIP_order(dictreplica, client_ip)
                else:
                    replicas = random_order(dictreplica, client_ip)
                if metalink is None:
                    yield dumps(rfile, cls=APIEncoder) + '\n'
                elif metalink == 3:
                    idx = 0
                    yield ' <file name="' + rfile[
                        'name'] + '">\n  <resources>\n'
                    for replica in replicas:
                        yield '   <url type="http" preference="' + str(
                            idx) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield '  </resources>\n </file>\n'
                elif metalink == 4:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'
                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                if metalink == 3:
                    yield '</files>\n</metalink>\n'
                elif metalink == 4:
                    yield '</metalink>\n'

        except DataIdentifierNotFound, e:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      e.args[0][0])