Esempio n. 1
0
    def GET(self, scope, name):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = None
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            # first check if client accepts metalink
            if 'application/metalink+xml' in tmp:
                metalink = 3
            # but prefer metalink4 if the client has support for it
            # (clients can put both in their ACCEPT header!)
            if 'application/metalink4+xml' in tmp:
                metalink = 4

        dids, schemes, select, limit = [{'scope': scope, 'name': name}], None, None, None
        if ctx.query:
            try:
                params = loads(unquote(ctx.query[1:]))
                if 'schemes' in params:
                    schemes = params['schemes']
            except ValueError:
                params = parse_qs(ctx.query[1:])
                if 'select' in params:
                    select = params['select'][0]
                if 'limit' in params:
                    limit = int(params['limit'][0])

        try:
            # first, set the appropriate content type, and stream the header
            if metalink is None:
                header('Content-Type', 'application/x-json-stream')
            elif metalink == 3:
                header('Content-Type', 'application/metalink+xml')
                schemes = ['http', 'https']
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink version="3.0" xmlns="http://www.metalinker.org/">\n<files>\n'
            elif metalink == 4:
                header('Content-Type', 'application/metalink4+xml')
                schemes = ['http', 'https']
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids, schemes=schemes):
                client_ip = ctx.get('ip')
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse
                if select == 'geoip':
                    try:
                        replicas = geoIP_order(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = random_order(dictreplica, client_ip)
                if metalink is None:
                    yield dumps(rfile) + '\n'
                elif metalink == 3:
                    idx = 0
                    yield ' <file name="' + rfile['name'] + '">\n  <resources>\n'
                    for replica in replicas:
                        yield '   <url type="http" preference="' + str(idx) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield '  </resources>\n </file>\n'
                elif metalink == 4:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile['adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'
                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(dictreplica[replica]) + '" priority="' + str(idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                if metalink == 3:
                    yield '</files>\n</metalink>\n'
                elif metalink == 4:
                    yield '</metalink>\n'

        except DataIdentifierNotFound, e:
            raise generate_http_error(404, 'DataIdentifierNotFound', e.args[0][0])
Esempio n. 2
0
    def GET(self, scope, name):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = None
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            # first check if client accepts metalink
            if 'application/metalink+xml' in tmp:
                metalink = 3
            # but prefer metalink4 if the client has support for it
            # (clients can put both in their ACCEPT header!)
            if 'application/metalink4+xml' in tmp:
                metalink = 4

        dids, schemes, select, limit = [{
            'scope': scope,
            'name': name
        }], None, None, None
        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = int(params['limit'][0])

        try:
            # first, set the APPropriate content type, and stream the header
            if metalink is None:
                header('Content-Type', 'application/x-json-stream')
            elif metalink == 3:
                header('Content-Type', 'application/metalink+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink version="3.0" xmlns="http://www.metalinker.org/">\n<files>\n'
            elif metalink == 4:
                header('Content-Type', 'application/metalink4+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids, schemes=schemes):
                client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
                if client_ip is None:
                    client_ip = ctx.ip

                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse
                if select == 'geoip':
                    try:
                        replicas = geoIP_order(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = random_order(dictreplica, client_ip)
                if metalink is None:
                    yield dumps(rfile) + '\n'

                elif metalink == 3:
                    idx = 0
                    yield ' <file name="' + rfile['name'] + '">\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    yield '  <resources>\n'
                    for replica in replicas:
                        yield '   <url type="http" preference="' + str(
                            idx) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield '  </resources>\n </file>\n'

                elif metalink == 4:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'

                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                    yield '  <glfn name="/atlas/rucio/%s:%s">' % (
                        rfile['scope'], rfile['name'])
                    yield '</glfn>\n'

                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                if metalink == 3:
                    yield '</files>\n</metalink>\n'
                elif metalink == 4:
                    yield '</metalink>\n'

        except DataIdentifierNotFound, e:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      e.args[0][0])
Esempio n. 3
0
    def POST(self):
        """
        List all replicas for data identifiers.

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError

        :returns: A dictionary containing all replicas information.
        :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept:
        """

        metalink = None
        if ctx.env.get('HTTP_ACCEPT') is not None:
            tmp = ctx.env.get('HTTP_ACCEPT').split(',')
            # first check if client accepts metalink
            if 'application/metalink+xml' in tmp:
                metalink = 3
            # but prefer metalink4 if the client has support for it
            # (clients can put both in their ACCEPT header!)
            if 'application/metalink4+xml' in tmp:
                metalink = 4

        dids, schemes, select, unavailable, limit = [], None, None, False, None
        ignore_availability, rse_expression, all_states = False, None, False
        json_data = data()
        try:
            params = parse_response(json_data)
            if 'dids' in params:
                dids = params['dids']
            if 'schemes' in params:
                schemes = params['schemes']
            if 'unavailable' in params:
                unavailable = params['unavailable']
                ignore_availability = True
            if 'all_states' in params:
                all_states = params['all_states']
            if 'rse_expression' in params:
                rse_expression = params['rse_expression']

        except ValueError:
            raise generate_http_error(400, 'ValueError',
                                      'Cannot decode json parameter list')

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'select' in params:
                select = params['select'][0]
            if 'limit' in params:
                limit = params['limit'][0]

        try:
            # first, set the APPropriate content type, and stream the header
            if metalink is None:
                header('Content-Type', 'application/x-json-stream')
            elif metalink == 3:
                header('Content-Type', 'application/metalink+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink version="3.0" xmlns="http://www.metalinker.org/">\n<files>\n'
            elif metalink == 4:
                header('Content-Type', 'application/metalink4+xml')
                yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # then, stream the replica information
            for rfile in list_replicas(dids=dids,
                                       schemes=schemes,
                                       unavailable=unavailable,
                                       request_id=ctx.env.get('request_id'),
                                       ignore_availability=ignore_availability,
                                       all_states=all_states,
                                       rse_expression=rse_expression):
                client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
                if client_ip is None:
                    client_ip = ctx.ip
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse
                if select == 'geoip':
                    replicas = geoIP_order(dictreplica, client_ip)
                else:
                    replicas = random_order(dictreplica, client_ip)
                if metalink is None:
                    yield dumps(rfile, cls=APIEncoder) + '\n'
                elif metalink == 3:
                    idx = 0
                    yield ' <file name="' + rfile[
                        'name'] + '">\n  <resources>\n'
                    for replica in replicas:
                        yield '   <url type="http" preference="' + str(
                            idx) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield '  </resources>\n </file>\n'
                elif metalink == 4:
                    yield ' <file name="' + rfile['name'] + '">\n'
                    yield '  <identity>' + rfile['scope'] + ':' + rfile[
                        'name'] + '</identity>\n'
                    if rfile['adler32'] is not None:
                        yield '  <hash type="adler32">' + rfile[
                            'adler32'] + '</hash>\n'
                    if rfile['md5'] is not None:
                        yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'
                    yield '  <size>' + str(rfile['bytes']) + '</size>\n'
                    idx = 0
                    for replica in replicas:
                        yield '   <url location="' + str(
                            dictreplica[replica]) + '" priority="' + str(
                                idx + 1) + '">' + replica + '</url>\n'
                        idx += 1
                        if limit and limit == idx:
                            break
                    yield ' </file>\n'

            # don't forget to send the metalink footer
            if metalink:
                if metalink == 3:
                    yield '</files>\n</metalink>\n'
                elif metalink == 4:
                    yield '</metalink>\n'

        except DataIdentifierNotFound, e:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      e.args[0][0])
Esempio n. 4
0
    def GET(self, scope, name):
        """
        Redirect download

        HTTP Success:
            303 See Other

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header("Access-Control-Allow-Origin", ctx.env.get("HTTP_ORIGIN"))
        header("Access-Control-Allow-Headers", ctx.env.get("HTTP_ACCESS_CONTROL_REQUEST_HEADERS"))
        header("Access-Control-Allow-Methods", "*")
        header("Access-Control-Allow-Credentials", "true")

        try:
            replicas = [
                r
                for r in list_replicas(dids=[{"scope": scope, "name": name, "type": "FILE"}], schemes=["http", "https"])
            ]

            select = "random"
            rse = None
            site = None
            if ctx.query:
                params = parse_qs(ctx.query[1:])
                if "select" in params:
                    select = params["select"][0]
                if "rse" in params:
                    rse = params["rse"][0]
                if "site" in params:
                    site = params["site"][0]

            for r in replicas:
                if r["rses"]:
                    replicadict = {}
                    if rse:
                        if rse in r["rses"] and r["rses"][rse]:
                            return found(r["rses"][rse][0])
                        return notfound("Sorry, the replica you were looking for was not found.")
                    else:
                        for rep in r["rses"]:
                            for replica in r["rses"][rep]:
                                replicadict[replica] = rep
                        if not replicadict:
                            return notfound("Sorry, the replica you were looking for was not found.")
                        elif site:
                            rep = site_selector(replicadict, site)
                            if rep:
                                return found(rep[0])
                            return notfound("Sorry, the replica you were looking for was not found.")
                        else:
                            client_ip = ctx.get("ip")
                            if select == "geoip":
                                rep = geoIP_order(replicadict, client_ip)
                            else:
                                rep = random_order(replicadict, client_ip)
                            return found(rep[0])

            return notfound("Sorry, the replica you were looking for was not found.")

        except RucioException, e:
            raise generate_http_error(500, e.__class__.__name__, e.args[0][0])
Esempio n. 5
0
    def GET(self, scope, name):
        """
        Metalink redirect

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        dids, schemes, select = [{
            'scope': scope,
            'name': name
        }], ['http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp',
             'srm'], None

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]

        try:
            tmp_replicas = [
                rep for rep in list_replicas(dids=dids, schemes=schemes)
            ]

            if not tmp_replicas:
                raise ReplicaNotFound(
                    'no redirection possible - cannot find the DID')

            # first, set the APPropriate content type, and stream the header
            header('Content-Type', 'application/metalink4+xml')
            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # set the correct client IP
            client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
            if client_ip is None:
                client_ip = ctx.ip

            # iteratively stream the XML per file
            for rfile in tmp_replicas:
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                # sort the actual replicas if necessary
                if select == 'geoip':
                    try:
                        replicas = geoIP_order(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = random_order(dictreplica, client_ip)

                # stream metadata
                yield ' <file name="' + rfile['name'] + '">\n'
                yield '  <identity>' + rfile['scope'] + ':' + rfile[
                    'name'] + '</identity>\n'

                if rfile['adler32'] is not None:
                    yield '  <hash type="adler32">' + rfile[
                        'adler32'] + '</hash>\n'
                if rfile['md5'] is not None:
                    yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                yield '  <glfn name="%s%s">' % (
                    'root://atlas-xrd-eu.cern.ch:1094//atlas/rucio/', '%s:%s' %
                    (rfile['scope'], rfile['name']))
                yield '</glfn>\n'

                # stream URLs
                idx = 1
                for replica in replicas:
                    yield '  <url location="' + str(
                        dictreplica[replica]) + '" priority="' + str(
                            idx) + '">' + replica + '</url>\n'
                    idx += 1

                yield ' </file>\n'

            # don't forget to send the metalink footer
            yield '</metalink>\n'

        except DataIdentifierNotFound, e:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      e.args[0][0])
Esempio n. 6
0
    def GET(self, scope, name):
        """
        Header Redirect

        HTTP Success:
            303 See Other

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        try:

            # use the default HTTP protocols if no scheme is given
            select, rse, site, schemes = 'random', None, None, [
                'http', 'https', 's3+rucio'
            ]
            if ctx.query:
                params = parse_qs(ctx.query[1:])
                if 'select' in params:
                    select = params['select'][0]
                if 'rse' in params:
                    rse = params['rse'][0]
                if 'site' in params:
                    site = params['site'][0]
                if 'schemes' in params:
                    schemes = params['schemes'][0]

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, ','.join(schemes), select))
            else:
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, schemes, select))
                schemes = [schemes]  # list_replicas needs a list

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes)
            ]

            selected_url, selected_rse = None, None
            for r in replicas:
                if r['rses']:
                    replicadict = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                            selected_rse = rse
                        else:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                replicadict[replica] = rep

                        if not replicadict:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )

                        elif site:
                            rep = site_selector(replicadict, site)
                            if rep:
                                selected_url = rep[0]
                            else:
                                raise ReplicaNotFound(
                                    'no redirection possible - no valid RSE for HTTP redirection found'
                                )
                        else:
                            client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
                            if client_ip is None:
                                client_ip = ctx.ip
                            if select == 'geoip':
                                rep = geoIP_order(replicadict, client_ip)
                            else:
                                rep = random_order(replicadict, client_ip)

                            selected_url = rep[0]

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                if selected_url == replica:
                                    selected_rse = rep

            if selected_url:
                if selected_url.startswith('s3+rucio://'):
                    connect(selected_rse, selected_url)
                    signed_URLS = get_signed_urls([selected_url],
                                                  rse=selected_rse,
                                                  operation='read')
                    raise seeother(signed_URLS[selected_url])

                raise seeother(selected_url)

            raise ReplicaNotFound(
                'no redirection possible - file does not exist')

        except seeother:
            raise
        except ReplicaNotFound, e:
            raise generate_http_error(404, 'ReplicaNotFound', e.args[0][0])