Example #1
0
def __update_bulk_replicas(replicas, session=None, logger=logging.log):
    """
    Used by finisher to handle available and unavailable replicas blongs to same rule in bulk way.

    :param replicas:              List of replicas.
    :param session:               The database session to use.
    :returns commit_or_rollback:  Boolean.
    """
    try:
        replica_core.update_replicas_states(replicas,
                                            nowait=True,
                                            add_tombstone=True,
                                            session=session)
    except ReplicaNotFound as error:
        logger(logging.WARNING,
               'Failed to bulk update replicas, will do it one by one: %s',
               str(error))
        raise ReplicaNotFound(error)

    for replica in replicas:
        if not replica['archived']:
            request_core.archive_request(replica['request_id'],
                                         session=session)
        logger(logging.INFO, "HANDLED REQUEST %s DID %s:%s AT RSE %s STATE %s",
               replica['request_id'], replica['scope'], replica['name'],
               replica['rse_id'], str(replica['state']))
    return True
Example #2
0
def __update_bulk_replicas(replicas, req_type, rule_id, session=None):
    """
    Used by finisher to handle available and unavailable replicas blongs to same rule in bulk way.

    :param replicas:              List of replicas.
    :param req_type:              Request type: STAGEIN, STAGEOUT, TRANSFER.
    :param rule_id:               RULE id.
    :param session:               The database session to use.
    :returns commit_or_rollback:  Boolean.
    """
    try:
        replica_core.update_replicas_states(replicas,
                                            nowait=True,
                                            session=session)
    except ReplicaNotFound as error:
        logging.warn(
            'Failed to bulk update replicas, will do it one by one: %s' %
            str(error))
        raise ReplicaNotFound(error)

    for replica in replicas:
        if not replica['archived']:
            request_core.archive_request(replica['request_id'],
                                         session=session)
        logging.info("HANDLED REQUEST %s DID %s:%s AT RSE %s STATE %s" %
                     (replica['request_id'], replica['scope'], replica['name'],
                      replica['rse_id'], str(replica['state'])))
    return True
Example #3
0
    def GET(self, scope, name):
        """
        Metalink redirect

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound
            406 Not Acceptable

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        dids, schemes, select = [{
            'scope': scope,
            'name': name
        }], [
            'http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp', 'srm',
            'davs'
        ], None

        # set the correct client IP
        client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
        if client_ip is None:
            client_ip = ctx.ip

        client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]
            if 'sort' in params:
                select = params['sort'][0]

            if 'ip' in params:
                client_location['ip'] = params['ip'][0]
            if 'fqdn' in params:
                client_location['fqdn'] = params['fqdn'][0]
            if 'site' in params:
                client_location['site'] = params['site'][0]

        try:
            tmp_replicas = [
                rep for rep in list_replicas(dids=dids,
                                             schemes=schemes,
                                             client_location=client_location)
            ]

            if not tmp_replicas:
                raise ReplicaNotFound(
                    'no redirection possible - cannot find the DID')

            # first, set the appropriate content type, and stream the header
            header('Content-Type', 'application/metalink4+xml')
            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # iteratively stream the XML per file
            for rfile in tmp_replicas:
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                # stream metadata
                yield ' <file name="' + rfile['name'] + '">\n'
                yield '  <identity>' + rfile['scope'] + ':' + rfile[
                    'name'] + '</identity>\n'

                if rfile['adler32'] is not None:
                    yield '  <hash type="adler32">' + rfile[
                        'adler32'] + '</hash>\n'
                if rfile['md5'] is not None:
                    yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                yield '  <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'],
                                                              rfile['name'])
                yield '</glfn>\n'

                # sort the actual replicas if necessary
                if select == 'geoip':
                    replicas = sort_geoip(dictreplica,
                                          client_location['ip'],
                                          ignore_error=True)
                elif select == 'closeness':
                    replicas = sort_closeness(dictreplica, client_location)
                elif select == 'dynamic':
                    replicas = sort_dynamic(dictreplica, client_location)
                elif select == 'ranking':
                    replicas = sort_ranking(dictreplica, client_location)
                else:
                    replicas = sort_random(dictreplica)

                # stream URLs
                idx = 1
                for replica in replicas:
                    yield '  <url location="' + str(
                        dictreplica[replica]) + '" priority="' + str(
                            idx) + '">' + replica + '</url>\n'
                    idx += 1

                yield ' </file>\n'

            # don't forget to send the metalink footer
            yield '</metalink>\n'

        except DataIdentifierNotFound as error:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      error.args[0])
        except ReplicaNotFound as error:
            raise generate_http_error(404, 'ReplicaNotFound', error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Example #4
0
    def GET(self, scope, name):
        """
        Header Redirect

        HTTP Success:
            303 See Other

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        try:

            # use the default HTTP protocols if no scheme is given
            select, rse, site, schemes = 'random', None, None, [
                'davs', 'http', 'https', 's3+rucio'
            ]

            client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
            if client_ip is None:
                client_ip = ctx.ip

            client_location = {'ip': client_ip, 'fqdn': None, 'site': None}

            if ctx.query:
                params = parse_qs(ctx.query[1:])
                if 'select' in params:
                    select = params['select'][0]
                if 'sort' in params:
                    select = params['sort'][0]
                if 'rse' in params:
                    rse = params['rse'][0]
                if 'site' in params:
                    site = params['site'][0]
                if 'schemes' in params:
                    schemes = params['schemes'][0]
                else:
                    schemes = ['davs', 'https', 's3']

                if 'ip' in params:
                    client_location['ip'] = params['ip'][0]
                if 'fqdn' in params:
                    client_location['fqdn'] = params['fqdn'][0]
                if 'site' in params:
                    client_location['site'] = params['site'][0]

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, ','.join(schemes), select))
            else:
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, schemes, select))
                schemes = [schemes]  # list_replicas needs a list

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes,
                                         client_location=client_location)
            ]

            selected_url, selected_rse = None, None
            for r in replicas:
                if r['rses']:
                    dictreplica = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                            selected_rse = rse
                        else:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible
                                # forcibly replacement davs and s3 URLs to https
                                replica = replica.replace(
                                    'davs://',
                                    'https://').replace('s3://', 'https://')
                                dictreplica[replica] = rep

                        if not dictreplica:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )

                        elif site:
                            rep = site_selector(dictreplica, site)
                            if rep:
                                selected_url = rep[0]
                            else:
                                raise ReplicaNotFound(
                                    'no redirection possible - no valid RSE for HTTP redirection found'
                                )
                        else:
                            if select == 'geoip':
                                rep = sort_geoip(dictreplica,
                                                 client_location['ip'])
                            elif select == 'closeness':
                                rep = sort_closeness(dictreplica,
                                                     client_location)
                            elif select == 'dynamic':
                                rep = sort_dynamic(dictreplica,
                                                   client_location)
                            elif select == 'ranking':
                                rep = sort_ranking(dictreplica,
                                                   client_location)
                            else:
                                rep = sort_random(dictreplica)

                            selected_url = rep[0]

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                if selected_url == replica:
                                    selected_rse = rep

            if selected_url:
                if selected_url.startswith('s3+rucio://'):
                    connect(selected_rse, selected_url)
                    signed_URLS = get_signed_urls([selected_url],
                                                  rse=selected_rse,
                                                  operation='read')
                    raise seeother(signed_URLS[selected_url])

                raise seeother(selected_url)

            raise ReplicaNotFound(
                'no redirection possible - file does not exist')

        except seeother:
            raise
        except ReplicaNotFound as error:
            raise generate_http_error(404, 'ReplicaNotFound', error.args[0])
        except RucioException as error:
            raise generate_http_error(500, error.__class__.__name__,
                                      error.args[0])
        except Exception as error:
            print(format_exc())
            raise InternalError(error)
Example #5
0
    def GET(self, scope, name):
        """
        Metalink redirect

        HTTP Success:
            200 OK

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        dids, schemes, select = [{
            'scope': scope,
            'name': name
        }], ['http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp',
             'srm'], None

        if ctx.query:
            params = parse_qs(ctx.query[1:])
            if 'schemes' in params:
                schemes = params['schemes']
            if 'select' in params:
                select = params['select'][0]

        try:
            tmp_replicas = [
                rep for rep in list_replicas(dids=dids, schemes=schemes)
            ]

            if not tmp_replicas:
                raise ReplicaNotFound(
                    'no redirection possible - cannot find the DID')

            # first, set the APPropriate content type, and stream the header
            header('Content-Type', 'application/metalink4+xml')
            yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n'

            # set the correct client IP
            client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
            if client_ip is None:
                client_ip = ctx.ip

            # iteratively stream the XML per file
            for rfile in tmp_replicas:
                replicas = []
                dictreplica = {}
                for rse in rfile['rses']:
                    for replica in rfile['rses'][rse]:
                        replicas.append(replica)
                        dictreplica[replica] = rse

                # sort the actual replicas if necessary
                if select == 'geoip':
                    try:
                        replicas = geoIP_order(dictreplica, client_ip)
                    except AddressNotFoundError:
                        pass
                else:
                    replicas = random_order(dictreplica, client_ip)

                # stream metadata
                yield ' <file name="' + rfile['name'] + '">\n'
                yield '  <identity>' + rfile['scope'] + ':' + rfile[
                    'name'] + '</identity>\n'

                if rfile['adler32'] is not None:
                    yield '  <hash type="adler32">' + rfile[
                        'adler32'] + '</hash>\n'
                if rfile['md5'] is not None:
                    yield '  <hash type="md5">' + rfile['md5'] + '</hash>\n'

                yield '  <size>' + str(rfile['bytes']) + '</size>\n'

                yield '  <glfn name="%s%s">' % (
                    'root://atlas-xrd-eu.cern.ch:1094//atlas/rucio/', '%s:%s' %
                    (rfile['scope'], rfile['name']))
                yield '</glfn>\n'

                # stream URLs
                idx = 1
                for replica in replicas:
                    yield '  <url location="' + str(
                        dictreplica[replica]) + '" priority="' + str(
                            idx) + '">' + replica + '</url>\n'
                    idx += 1

                yield ' </file>\n'

            # don't forget to send the metalink footer
            yield '</metalink>\n'

        except DataIdentifierNotFound, e:
            raise generate_http_error(404, 'DataIdentifierNotFound',
                                      e.args[0][0])
Example #6
0
    def GET(self, scope, name):
        """
        Header Redirect

        HTTP Success:
            303 See Other

        HTTP Error:
            401 Unauthorized
            500 InternalError
            404 Notfound

        :param scope: The scope name of the file.
        :param name: The name of the file.
        """

        header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN'))
        header('Access-Control-Allow-Headers',
               ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS'))
        header('Access-Control-Allow-Methods', '*')
        header('Access-Control-Allow-Credentials', 'true')

        try:

            # use the default HTTP protocols if no scheme is given
            select, rse, site, schemes = 'random', None, None, [
                'http', 'https', 's3+rucio'
            ]
            if ctx.query:
                params = parse_qs(ctx.query[1:])
                if 'select' in params:
                    select = params['select'][0]
                if 'rse' in params:
                    rse = params['rse'][0]
                if 'site' in params:
                    site = params['site'][0]
                if 'schemes' in params:
                    schemes = params['schemes'][0]

            # correctly forward the schemes and select to potential metalink followups
            cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0]
            if isinstance(schemes, list):
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, ','.join(schemes), select))
            else:
                header(
                    'Link',
                    '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"'
                    % (cleaned_url, schemes, select))
                schemes = [schemes]  # list_replicas needs a list

            replicas = [
                r for r in list_replicas(dids=[{
                    'scope': scope,
                    'name': name,
                    'type': 'FILE'
                }],
                                         schemes=schemes)
            ]

            selected_url, selected_rse = None, None
            for r in replicas:
                if r['rses']:
                    replicadict = {}

                    if rse:
                        if rse in r['rses'] and r['rses'][rse]:
                            selected_url = r['rses'][rse][0]
                            selected_rse = rse
                        else:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )
                    else:

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                replicadict[replica] = rep

                        if not replicadict:
                            raise ReplicaNotFound(
                                'no redirection possible - no valid RSE for HTTP redirection found'
                            )

                        elif site:
                            rep = site_selector(replicadict, site)
                            if rep:
                                selected_url = rep[0]
                            else:
                                raise ReplicaNotFound(
                                    'no redirection possible - no valid RSE for HTTP redirection found'
                                )
                        else:
                            client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR')
                            if client_ip is None:
                                client_ip = ctx.ip
                            if select == 'geoip':
                                rep = geoIP_order(replicadict, client_ip)
                            else:
                                rep = random_order(replicadict, client_ip)

                            selected_url = rep[0]

                        for rep in r['rses']:
                            for replica in r['rses'][rep]:
                                if selected_url == replica:
                                    selected_rse = rep

            if selected_url:
                if selected_url.startswith('s3+rucio://'):
                    connect(selected_rse, selected_url)
                    signed_URLS = get_signed_urls([selected_url],
                                                  rse=selected_rse,
                                                  operation='read')
                    raise seeother(signed_URLS[selected_url])

                raise seeother(selected_url)

            raise ReplicaNotFound(
                'no redirection possible - file does not exist')

        except seeother:
            raise
        except ReplicaNotFound, e:
            raise generate_http_error(404, 'ReplicaNotFound', e.args[0][0])