예제 #1
0
            def generate(vo):
                # we need to call list_replicas before starting to reply
                # otherwise the exceptions won't be propagated correctly
                first = metalink

                # then, stream the replica information
                for rfile in list_replicas(dids=dids, schemes=schemes, vo=vo):
                    if first and metalink:
                        # first, set the appropriate content type, and stream the header
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                        first = False

                    replicas = []
                    dictreplica = {}
                    for rse in rfile['rses']:
                        for replica in rfile['rses'][rse]:
                            replicas.append(replica)
                            dictreplica[replica] = rse

                    replicas = sort_replicas(dictreplica,
                                             client_location,
                                             selection=select)

                    if not metalink:
                        yield dumps(rfile) + '\n'
                    else:
                        yield ' <file name="' + rfile['name'] + '">\n'
                        yield '  <identity>' + rfile['scope'] + ':' + rfile[
                            'name'] + '</identity>\n'

                        if rfile['adler32'] is not None:
                            yield '  <hash type="adler32">' + rfile[
                                'adler32'] + '</hash>\n'
                        if rfile['md5'] is not None:
                            yield '  <hash type="md5">' + rfile[
                                'md5'] + '</hash>\n'

                        yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                        yield f'  <glfn name="/atlas/rucio/{rfile["scope"]}:{rfile["name"]}">'
                        yield '</glfn>\n'

                        idx = 0
                        for replica in replicas:
                            yield '   <url location="' + str(
                                dictreplica[replica]) + '" priority="' + str(
                                    idx +
                                    1) + '">' + escape(replica) + '</url>\n'
                            idx += 1
                            if limit and limit == idx:
                                break
                        yield ' </file>\n'

                if metalink:
                    if first:
                        # if still first output, i.e. there were no replicas
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                    else:
                        # don't forget to send the metalink footer
                        yield '</metalink>\n'
예제 #2
0
            def generate():
                # first, set the appropriate content type, and stream the header
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

                # iteratively stream the XML per file
                for rfile in itertools.chain((first, ), replicas_iter):
                    replicas = []
                    dictreplica = {}
                    for rse in rfile['rses']:
                        for replica in rfile['rses'][rse]:
                            replicas.append(replica)
                            dictreplica[replica] = rse

                    # stream metadata
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    replicas = sort_replicas(dictreplica,
                                             client_location,
                                             selection=sortby)

                    # stream URLs
                    idx = 1
                    for replica in replicas:
                        yield '  <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx) + '">' + replica + '</url>\n'
                        idx += 1

                    yield ' </file>\n'

                # don't forget to send the metalink footer
                yield '</metalink>\n'
예제 #3
0
파일: replicas.py 프로젝트: rak108/rucio
            def _list_and_sort_replicas(request_id, issuer, vo):
                # we need to call list_replicas before starting to reply
                # otherwise the exceptions won't be propagated correctly
                for rfile in list_replicas(
                        dids=dids,
                        schemes=schemes,
                        unavailable=unavailable,
                        request_id=request_id,
                        ignore_availability=ignore_availability,
                        all_states=all_states,
                        rse_expression=rse_expression,
                        client_location=client_location,
                        domain=domain,
                        signature_lifetime=signature_lifetime,
                        resolve_archives=resolve_archives,
                        resolve_parents=resolve_parents,
                        nrandom=nrandom,
                        updated_after=updated_after,
                        issuer=issuer,
                        vo=vo):

                    # Sort rfile['pfns'] and limit its size according to "limit" parameter
                    lanreplicas = {}
                    wanreplicas = {}
                    for pfn, replica in rfile['pfns'].items():
                        replica_tuple = (replica['domain'],
                                         replica['priority'], replica['rse'],
                                         replica['client_extract'])
                        if replica_tuple[0] == 'lan':
                            lanreplicas[pfn] = replica_tuple
                        else:
                            wanreplicas[pfn] = replica_tuple

                    rfile['pfns'] = dict(
                        _sorted_with_priorities(
                            replicas=rfile['pfns'],
                            # Lan replicas sorted by priority; followed by wan replicas sorted by selection criteria
                            sorted_pfns=chain(
                                sorted(lanreplicas.keys(),
                                       key=lambda pfn: lanreplicas[pfn][1]),
                                sort_replicas(wanreplicas,
                                              client_location,
                                              selection=select)),
                            limit=limit))
                    yield rfile
예제 #4
0
파일: replicas.py 프로젝트: rak108/rucio
            def _list_and_sort_replicas(vo):
                # we need to call list_replicas before starting to reply
                # otherwise the exceptions won't be propagated correctly
                for rfile in list_replicas(dids=dids, schemes=schemes, vo=vo):
                    replicas = []
                    dictreplica = {}
                    for rse in rfile['rses']:
                        for replica in rfile['rses'][rse]:
                            replicas.append(replica)
                            dictreplica[replica] = rse

                    replicas = sort_replicas(dictreplica,
                                             client_location,
                                             selection=select)
                    rfile['pfns'] = dict(
                        _sorted_with_priorities(rfile['pfns'],
                                                replicas,
                                                limit=limit))
                    yield rfile
예제 #5
0
파일: replicas.py 프로젝트: vokac/rucio
            def generate(request_id, issuer, vo):
                # we need to call list_replicas before starting to reply
                # otherwise the exceptions won't be propagated correctly
                first = metalink

                for rfile in list_replicas(dids=dids, schemes=schemes,
                                           unavailable=unavailable,
                                           request_id=request_id,
                                           ignore_availability=ignore_availability,
                                           all_states=all_states,
                                           rse_expression=rse_expression,
                                           client_location=client_location,
                                           domain=domain, signature_lifetime=signature_lifetime,
                                           resolve_archives=resolve_archives,
                                           resolve_parents=resolve_parents,
                                           updated_after=updated_after,
                                           issuer=issuer,
                                           vo=vo):

                    # in first round, set the appropriate content type, and stream the header
                    if first and metalink:
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                    first = False

                    if not metalink:
                        yield dumps(rfile, cls=APIEncoder) + '\n'
                    else:
                        replicas = []
                        dictreplica = {}
                        for replica in rfile['pfns'].keys():
                            replicas.append(replica)
                            dictreplica[replica] = (rfile['pfns'][replica]['domain'],
                                                    rfile['pfns'][replica]['priority'],
                                                    rfile['pfns'][replica]['rse'],
                                                    rfile['pfns'][replica]['client_extract'])

                        yield ' <file name="' + rfile['name'] + '">\n'

                        if 'parents' in rfile and rfile['parents']:
                            yield '  <parents>\n'
                            for parent in rfile['parents']:
                                yield '   <did>' + parent + '</did>\n'
                            yield '  </parents>\n'

                        yield '  <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n'
                        if rfile['adler32'] is not None:
                            yield '  <hash type="adler32">' + rfile['adler32'] + '</hash>\n'
                        if rfile['md5'] is not None:
                            yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                        yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                        policy_schema = config_get('policy', 'schema', raise_exception=False, default='generic')
                        yield f'  <glfn name="/{policy_schema}/rucio/{rfile["scope"]}:{rfile["name"]}"></glfn>\n'

                        lanreplicas = [replica for replica, v in dictreplica.items() if v[0] == 'lan']
                        # sort lan by priority
                        lanreplicas.sort(key=lambda rep: dictreplica[rep][1])
                        replicas = lanreplicas + sort_replicas({k: v for k, v in dictreplica.items() if v[0] != 'lan'}, client_location, selection=select)

                        for idx, replica in enumerate(replicas, start=1):
                            yield '  <url location="' + str(dictreplica[replica][2]) \
                                + '" domain="' + str(dictreplica[replica][0]) \
                                + '" priority="' + str(idx) \
                                + '" client_extract="' + str(dictreplica[replica][3]).lower() \
                                + '">' + escape(replica) + '</url>\n'
                            if limit and limit == idx:
                                break
                        yield ' </file>\n'

                if metalink:
                    if first:
                        # if still first output, i.e. there were no replicas
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                    else:
                        # don't forget to send the metalink footer
                        yield '</metalink>\n'
예제 #6
0
    def get(self, scope_name):
        """
        ---
        summary: Header redirect
        description: Get the header redirect.
        tags:
          - Redirect
        parameters:
        - name: scope_name
          in: path
          description: The data identifier (scope)/(name).
          schema:
            type: string
          style: simple
        - name: ip
          in: query
          description: The client ip.
          schema:
            type: string
          style: simple
          required: false
        - name: fqdn
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: site
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: schemes
          in: query
          schema:
            type: array
          style: simple
          required: false
        - name: select
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: sort
          in: query
          schema:
            type: string
          style: simple
          required: false
        - name: rse
          in: query
          schema:
            type: string
          style: simple
          required: false
        responses:
          303:
            description: OK
            content:
              application/json:
                schema:
                  description: The redirect url.
                  type: string
          401:
            description: Invalid Auth Token
          404:
            description: Rse or did not found
        """
        headers = self.get_headers()

        try:
            scope, name = parse_scope_name(scope_name,
                                           extract_vo(request.headers))
        except ValueError as error:
            return generate_http_error_flask(400, error, headers=headers)

        try:
            client_ip = request.headers.get('X-Forwarded-For',
                                            default=request.remote_addr)

            client_location = {
                'ip': request.args.get('ip', default=client_ip),
                'fqdn': request.args.get('fqdn', default=None),
                'site': request.args.get('site', default=None),
            }
            # use the default HTTP protocols if no scheme is given
            schemes = request.args.getlist('schemes') or [
                'davs', 'https', 's3'
            ]
            sortby = request.args.get('select', default='random')
            sortby = request.args.get('sort', default=sortby)
            rse = request.args.get('rse', default=None)
            site = request.args.get('site', default=None)

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = request.environ.get('REQUEST_URI').split('?')[0]

            headers.set(
                'Link',
                f'<{cleaned_url}/metalink?schemes={",".join(schemes)}&select={sortby}>; rel=describedby; type="application/metalink+xml"'
            )

            # get vo if given
            vo = extract_vo(request.headers)

            replicas = list(
                list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                              schemes=schemes,
                              client_location=client_location,
                              vo=vo))

            selected_url = None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                        else:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace(
                                    'davs://',
                                    'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers

                        elif site:
                            rep = site_selector(dictreplica, site, vo)
                            if rep:
                                selected_url = rep[0]
                            else:
                                return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                        else:
                            rep = sort_replicas(dictreplica,
                                                client_location,
                                                selection=sortby)
                            selected_url = rep[0]

            if selected_url:
                response = redirect(selected_url, code=303)
                response.headers.extend(headers)
                return response

            return 'no redirection possible - file does not exist', 404, headers
        except ReplicaNotFound as error:
            return generate_http_error_flask(404, error, headers=headers)
예제 #7
0
    def get(self, scope_name):
        """
        Header Redirect

        .. :quickref: HeaderRedirector; Header redirect.

        :param scope_name: data identifier (scope)/(name).
        :resheader Content-Type: application/metalink+xml'.
        :status 303: Redirect.
        :status 401: Invalid Auth Token.
        :status 404: RSE Not Found.
        :status 404: DID Not Found.
        :status 500: Internal Error.
        """
        headers = Headers()
        headers.set('Access-Control-Allow-Origin',
                    request.environ.get('HTTP_ORIGIN'))
        headers.set('Access-Control-Allow-Headers',
                    request.environ.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        headers.set('Access-Control-Allow-Methods', '*')
        headers.set('Access-Control-Allow-Credentials', 'true')

        try:
            scope, name = parse_scope_name(
                scope_name, request.headers.get('X-Rucio-VO', default='def'))
        except ValueError as error:
            return generate_http_error_flask(400,
                                             'ValueError',
                                             error.args[0],
                                             headers=headers)
        except Exception as error:
            logging.exception("Internal Error")
            return str(error), 500, headers

        try:

            # use the default HTTP protocols if no scheme is given
            select, rse, site, schemes = 'random', None, None, [
                'davs', 'http', 'https'
            ]

            client_ip = request.headers.get('X-Forwarded-For',
                                            default=request.remote_addr)

            client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

            if request.query_string:
                query_string = request.query_string.decode(encoding='utf-8')
                params = parse_qs(query_string)
                if 'select' in params:
                    select = params['select'][0]
                if 'sort' in params:
                    select = params['sort'][0]
                if 'rse' in params:
                    rse = params['rse'][0]
                if 'site' in params:
                    site = params['site'][0]
                if 'schemes' in params:
                    schemes = params['schemes'][0]
                else:
                    schemes = ['davs', 'https', 's3']

                if 'ip' in params:
                    client_location['ip'] = params['ip'][0]
                if 'fqdn' in params:
                    client_location['fqdn'] = params['fqdn'][0]
                if 'site' in params:
                    client_location['site'] = params['site'][0]

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = request.environ.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                headers.set(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, ','.join(schemes), select))
            else:
                headers.set(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, schemes, select))
                schemes = [schemes]  # list_replicas needs a list

            # get vo if given
            vo = request.headers.get('X-Rucio-VO', default='def')

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes,
                                         client_location=client_location,
                                         vo=vo)
            ]

            selected_url = None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                        else:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace(
                                    'davs://',
                                    'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers

                        elif site:
                            rep = site_selector(dictreplica, site, vo)
                            if rep:
                                selected_url = rep[0]
                            else:
                                return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                        else:
                            rep = sort_replicas(dictreplica,
                                                client_location,
                                                selection=select)
                            selected_url = rep[0]

            if selected_url:
                response = redirect(selected_url, code=303)
                response.headers.extend(headers)
                return response

            return 'no redirection possible - file does not exist', 404, headers
        except ReplicaNotFound as error:
            return generate_http_error_flask(404,
                                             'ReplicaNotFound',
                                             error.args[0],
                                             headers=headers)
        except RucioException as error:
            return generate_http_error_flask(500,
                                             error.__class__.__name__,
                                             error.args[0],
                                             headers=headers)
        except Exception as error:
            logging.exception("Internal Error")
            return str(error), 500, headers
예제 #8
0
            def generate(request_id, issuer, vo):
                # we need to call list_replicas before starting to reply
                # otherwise the exceptions won't be propagated correctly
                first = True

                for rfile in list_replicas(
                        dids=dids,
                        schemes=schemes,
                        unavailable=unavailable,
                        request_id=request_id,
                        ignore_availability=ignore_availability,
                        all_states=all_states,
                        rse_expression=rse_expression,
                        client_location=client_location,
                        domain=domain,
                        signature_lifetime=signature_lifetime,
                        resolve_archives=resolve_archives,
                        resolve_parents=resolve_parents,
                        nrandom=nrandom,
                        updated_after=updated_after,
                        issuer=issuer,
                        vo=vo):

                    # Sort rfile['pfns'] and limit its size according to "limit" parameter
                    lanreplicas = {}
                    wanreplicas = {}
                    for pfn, replica in rfile['pfns'].items():
                        replica_tuple = (replica['domain'],
                                         replica['priority'], replica['rse'],
                                         replica['client_extract'])
                        if replica_tuple[0] == 'lan':
                            lanreplicas[pfn] = replica_tuple
                        else:
                            wanreplicas[pfn] = replica_tuple

                    rfile['pfns'] = dict(
                        _sorted_with_priorities(
                            replicas=rfile['pfns'],
                            # Lan replicas sorted by priority; followed by wan replicas sorted by selection criteria
                            sorted_pfns=chain(
                                sorted(lanreplicas.keys(),
                                       key=lambda pfn: lanreplicas[pfn][1]),
                                sort_replicas(wanreplicas,
                                              client_location,
                                              selection=select)),
                            limit=limit))

                    if not metalink:
                        yield dumps(rfile, cls=APIEncoder) + '\n'
                    else:
                        # in first round, set the appropriate content type, and stream the header
                        if first:
                            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                        first = False
                        yield ' <file name="' + rfile['name'] + '">\n'

                        if 'parents' in rfile and rfile['parents']:
                            yield '  <parents>\n'
                            for parent in rfile['parents']:
                                yield '   <did>' + parent + '</did>\n'
                            yield '  </parents>\n'

                        yield '  <identity>' + rfile['scope'] + ':' + rfile[
                            'name'] + '</identity>\n'
                        if rfile['adler32'] is not None:
                            yield '  <hash type="adler32">' + rfile[
                                'adler32'] + '</hash>\n'
                        if rfile['md5'] is not None:
                            yield '  <hash type="md5">' + rfile[
                                'md5'] + '</hash>\n'
                        yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                        policy_schema = config_get('policy',
                                                   'schema',
                                                   raise_exception=False,
                                                   default='generic')
                        yield f'  <glfn name="/{policy_schema}/rucio/{rfile["scope"]}:{rfile["name"]}"></glfn>\n'

                        for pfn, replica in rfile['pfns'].items():
                            yield '  <url location="' + str(replica['rse']) \
                                + '" domain="' + str(replica['domain']) \
                                + '" priority="' + str(replica['priority']) \
                                + '" client_extract="' + str(replica['client_extract']).lower() \
                                + '">' + escape(pfn) + '</url>\n'
                        yield ' </file>\n'

                if metalink:
                    if first:
                        # if still first output, i.e. there were no replicas
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                    else:
                        # don't forget to send the metalink footer
                        yield '</metalink>\n'
예제 #9
0
파일: redirect.py 프로젝트: vokac/rucio
    def get(self, scope_name):
        """
        Header Redirect

        .. :quickref: HeaderRedirector; Header redirect.

        :param scope_name: data identifier (scope)/(name).
        :resheader Content-Type: application/metalink+xml'.
        :status 303: Redirect.
        :status 401: Invalid Auth Token.
        :status 404: RSE Not Found.
        :status 404: DID Not Found.
        """
        headers = self.get_headers()

        try:
            scope, name = parse_scope_name(scope_name, request.headers.get('X-Rucio-VO', default='def'))
        except ValueError as error:
            return generate_http_error_flask(400, error, headers=headers)

        try:
            client_ip = request.headers.get('X-Forwarded-For', default=request.remote_addr)

            client_location = {
                'ip': request.args.get('ip', default=client_ip),
                'fqdn': request.args.get('fqdn', default=None),
                'site': request.args.get('site', default=None),
            }
            # use the default HTTP protocols if no scheme is given
            schemes = request.args.getlist('schemes') or ['davs', 'https', 's3']
            sortby = request.args.get('select', default='random')
            sortby = request.args.get('sort', default=sortby)
            rse = request.args.get('rse', default=None)
            site = request.args.get('site', default=None)

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = request.environ.get('REQUEST_URI').split('?')[0]

            headers.set('Link', f'<{cleaned_url}/metalink?schemes={",".join(schemes)}&select={sortby}>; rel=describedby; type="application/metalink+xml"')

            # get vo if given
            vo = request.headers.get('X-Rucio-VO', default='def')

            replicas = list(
                list_replicas(
                    dids=[{'scope': scope, 'name': name, 'type': 'FILE'}],
                    schemes=schemes,
                    client_location=client_location,
                    vo=vo
                )
            )

            selected_url = None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                        else:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace('davs://', 'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers

                        elif site:
                            rep = site_selector(dictreplica, site, vo)
                            if rep:
                                selected_url = rep[0]
                            else:
                                return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers
                        else:
                            rep = sort_replicas(dictreplica, client_location, selection=sortby)
                            selected_url = rep[0]

            if selected_url:
                response = redirect(selected_url, code=303)
                response.headers.extend(headers)
                return response

            return 'no redirection possible - file does not exist', 404, headers
        except ReplicaNotFound as error:
            return generate_http_error_flask(404, error, headers=headers)
예제 #10
0
    def GET(self, scope, name):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        dids, schemes, select, limit = [{
            'scope': scope,
            'name': name
        }], None, None, None
        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = int(params['limit'][0])

        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

        # Resolve all reasonable protocols when doing metalink for maximum access possibilities
        if metalink and schemes is None:
            schemes = SUPPORTED_PROTOCOLS

        try:

            # we need to call list_replicas before starting to reply
            # otherwise the exceptions won't be propagated correctly
            __first = True

            header(
                'Content-Type', 'application/metalink4+xml'
                if metalink else 'application/x-json-stream')

            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       vo=ctx.env.get('vo')):

                # in first round, set the appropriate content type, and stream the header
                if __first:
                    if metalink:
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                    __first = False

                # ... then, stream the replica information
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                replicas = sort_replicas(dictreplica,
                                         client_location,
                                         selection=select)

                if not metalink:
                    yield dumps(rfile) + '\n'
                else:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + escape(replica) + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            if metalink:
                if __first:
                    # ensure complete metalink on success without any content
                    yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                else:
                    # if metalink start was already sent, always send the end
                    yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
예제 #11
0
    def POST(self):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            406 Not Acceptable
            500 InternalError

        :returns: A dictionary containing all replicas information, either as JSON stream or metalink4.
        """

        metalink = False
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            if 'application/metalink4+xml' in tmp:
                metalink = True

        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        dids, schemes, select, unavailable, limit = [], None, None, False, None
        ignore_availability, rse_expression, all_states, domain = False, None, False, None
        signature_lifetime, resolve_archives, resolve_parents = None, True, False
        updated_after = None

        client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

        json_data = data()
        try:
            params = parse_response(json_data)
            if 'dids' in params:
                dids = params['dids']
            if 'schemes' in params:
                schemes = params['schemes']
            if 'unavailable' in params:
                unavailable = params['unavailable']
                ignore_availability = True
            if 'all_states' in params:
                all_states = params['all_states']
            if 'rse_expression' in params:
                rse_expression = params['rse_expression']
            if 'client_location' in params:
                client_location.update(params['client_location'])
            if 'sort' in params:
                select = params['sort']
            if 'domain' in params:
                domain = params['domain']
            if 'resolve_archives' in params:
                resolve_archives = params['resolve_archives']
            if 'resolve_parents' in params:
                resolve_parents = params['resolve_parents']

            if 'signature_lifetime' in params:
                signature_lifetime = params['signature_lifetime']
            else:
                # hardcoded default of 10 minutes if config is not parseable
                signature_lifetime = config_get('credentials',
                                                'signature_lifetime',
                                                raise_exception=False,
                                                default=600)

            if 'updated_after' in params:
                if isinstance(params['updated_after'], (int, float)):
                    # convert from epoch time stamp to datetime object
                    updated_after = datetime.utcfromtimestamp(
                        params['updated_after'])
                else:
                    # attempt UTC format '%Y-%m-%dT%H:%M:%S' conversion
                    updated_after = datetime.strptime(params['updated_after'],
                                                      '%Y-%m-%dT%H:%M:%S')

        except ValueError:
            raise generate_http_error(400, 'ValueError',
                                      'Cannot decode json parameter list')

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = params['limit'][0]
            if 'sort' in params:
                select = params['sort']

        # Resolve all reasonable protocols when doing metalink for maximum access possibilities
        if metalink and schemes is None:
            schemes = SUPPORTED_PROTOCOLS

        try:

            # we need to call list_replicas before starting to reply
            # otherwise the exceptions won't be propagated correctly
            __first = True

            header(
                'Content-Type', 'application/metalink4+xml'
                if metalink else 'application/x-json-stream')

            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       unavailable=unavailable,
                                       request_id=ctx.env.get('request_id'),
                                       ignore_availability=ignore_availability,
                                       all_states=all_states,
                                       rse_expression=rse_expression,
                                       client_location=client_location,
                                       domain=domain,
                                       signature_lifetime=signature_lifetime,
                                       resolve_archives=resolve_archives,
                                       resolve_parents=resolve_parents,
                                       updated_after=updated_after,
                                       issuer=ctx.env.get('issuer'),
                                       vo=ctx.env.get('vo')):

                # in first round, set the appropriate content type, and stream the header
                if __first:
                    if metalink:
                        yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'
                    __first = False

                # ... then, stream the replica information
                if not metalink:
                    yield dumps(rfile, cls=APIEncoder) + '\n'
                else:

                    replicas = []
                    dictreplica = {}
                    for replica in rfile['pfns'].keys():
                        replicas.append(replica)
                        dictreplica[replica] = (
                            rfile['pfns'][replica]['domain'],
                            rfile['pfns'][replica]['priority'],
                            rfile['pfns'][replica]['rse'],
                            rfile['pfns'][replica]['client_extract'])

                    yield ' <file name="' + rfile['name'] + '">\n'

                    if 'parents' in rfile and rfile['parents']:
                        yield '  <parents>\n'
                        for parent in rfile['parents']:
                            yield '   <did>' + parent + '</did>\n'
                        yield '  </parents>\n'

                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/%s/rucio/%s:%s"></glfn>\n' % (
                        config_get(
                            'policy',
                            'schema',
                            raise_exception=False,
                            default='generic'), rfile['scope'], rfile['name'])

                    lanreplicas = [
                        replica for replica, v in dictreplica.items()
                        if v[0] == 'lan'
                    ]
                    replicas = lanreplicas + sort_replicas(
                        {
                            k: v
                            for k, v in dictreplica.items() if v[0] != 'lan'
                        },
                        client_location,
                        selection=select)

                    idx = 1
                    for replica in replicas:
                        yield '  <url location="' + str(dictreplica[replica][2]) \
                            + '" domain="' + str(dictreplica[replica][0]) \
                            + '" priority="' + str(idx) \
                            + '" client_extract="' + str(dictreplica[replica][3]).lower() \
                            + '">' + escape(replica) + '</url>\n'
                        if limit and limit == idx:
                            break
                        idx += 1
                    yield ' </file>\n'

            if metalink:
                if __first:
                    # ensure complete metalink on success without any content
                    yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n'
                else:
                    # if metalink start was already sent, always send the end
                    yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
예제 #12
0
    def GET(self, scope, name):
        """
        Metalink redirect

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound
            406 Not Acceptable

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        dids, schemes, select = [{
            'scope': scope,
            'name': name
        }], ['http', 'https', 'root', 'gsiftp', 'srm', 'davs'], None

        # set the correct client IP
        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'sort' in params:
                select = params['sort'][0]

            if 'ip' in params:
                client_location['ip'] = params['ip'][0]
            if 'fqdn' in params:
                client_location['fqdn'] = params['fqdn'][0]
            if 'site' in params:
                client_location['site'] = params['site'][0]

        # get vo if given
        vo = ctx.env.get('HTTP_X_RUCIO_VO', 'def')

        try:
            tmp_replicas = [
                rep for rep in list_replicas(dids=dids,
                                             schemes=schemes,
                                             client_location=client_location,
                                             vo=vo)
            ]

            if not tmp_replicas:
                raise ReplicaNotFound(
                    'no redirection possible - cannot find the DID')

            # first, set the appropriate content type, and stream the header
            header('Content-Type', 'application/metalink4+xml')
            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # iteratively stream the XML per file
            for rfile in tmp_replicas:
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                # stream metadata
                yield ' <file name="' + rfile['name'] + '">\n'
                yield '  <identity>' + rfile['scope'] + ':' + rfile[
                    'name'] + '</identity>\n'

                if rfile['adler32'] is not None:
                    yield '  <hash type="adler32">' + rfile[
                        'adler32'] + '</hash>\n'
                if rfile['md5'] is not None:
                    yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                yield '  <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'],
                                                              rfile['name'])
                yield '</glfn>\n'

                replicas = sort_replicas(dictreplica,
                                         client_location,
                                         selection=select)

                # stream URLs
                idx = 1
                for replica in replicas:
                    yield '  <url location="' + str(
                        dictreplica[replica]) + '" priority="' + str(
                            idx) + '">' + replica + '</url>\n'
                    idx += 1

                yield ' </file>\n'

            # don't forget to send the metalink footer
            yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except ReplicaNotFound as error:
            raise generate_http_error(404, 'ReplicaNotFound', error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
예제 #13
0
    def GET(self, scope, name):
        """
        Header Redirect

        HTTP Success:
            303 See Other

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        try:

            # use the default HTTP protocols if no scheme is given
            select, rse, site, schemes = 'random', None, None, [
                'davs', 'http', 'https'
            ]

            client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
            if client_ip is None:
                client_ip = ctx.ip

            client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

            if ctx.query:
                params = parse_qs(ctx.query[1:])
                if 'select' in params:
                    select = params['select'][0]
                if 'sort' in params:
                    select = params['sort'][0]
                if 'rse' in params:
                    rse = params['rse'][0]
                if 'site' in params:
                    site = params['site'][0]
                if 'schemes' in params:
                    schemes = params['schemes'][0]
                else:
                    schemes = ['davs', 'https', 's3']

                if 'ip' in params:
                    client_location['ip'] = params['ip'][0]
                if 'fqdn' in params:
                    client_location['fqdn'] = params['fqdn'][0]
                if 'site' in params:
                    client_location['site'] = params['site'][0]

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, ','.join(schemes), select))
            else:
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, schemes, select))
                schemes = [schemes]  # list_replicas needs a list

            # get vo if given
            vo = ctx.env.get('HTTP_X_RUCIO_VO', 'def')

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes,
                                         client_location=client_location,
                                         vo=vo)
            ]

            selected_url = None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                        else:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace(
                                    'davs://',
                                    'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )

                        elif site:
                            rep = site_selector(dictreplica, site, vo)
                            if rep:
                                selected_url = rep[0]
                            else:
                                raise ReplicaNotFound(
                                    'no redirection possible - no valid RSE for HTTP redirection found'
                                )
                        else:
                            rep = sort_replicas(dictreplica,
                                                client_location,
                                                selection=select)
                            selected_url = rep[0]

            if selected_url:
                raise seeother(selected_url)

            raise ReplicaNotFound(
                'no redirection possible - file does not exist')

        except seeother:
            raise
        except ReplicaNotFound as error:
            raise generate_http_error(404, 'ReplicaNotFound', error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)