def get(self, scope_name): """ --- summary: Metalink redirect description: Get Metalink redirect. tags: - Redirect parameters: - name: scope_name in: path description: The data identifier (scope)/(name). schema: type: string style: simple - name: ip in: query description: The client ip. schema: type: string style: simple required: false - name: fqdn in: query schema: type: string style: simple required: false - name: site in: query schema: type: string style: simple required: false - name: schemes in: query schema: type: array style: simple required: false - name: select in: query schema: type: string style: simple required: false - name: sort in: query schema: type: string style: simple required: false responses: 200: description: OK content: application/metalink4+xml: schema: description: The metalink file. type: string 401: description: Invalid Auth Token 404: description: Rse or did not found 406: description: Not acceptable """ headers = self.get_headers() try: scope, name = parse_scope_name(scope_name, extract_vo(request.headers)) except ValueError as error: return generate_http_error_flask(400, error, headers=headers) # set the correct client IP client_ip = request.headers.get('X-Forwarded-For', default=request.remote_addr) client_location = { 'ip': request.args.get('ip', default=client_ip), 'fqdn': request.args.get('fqdn', default=None), 'site': request.args.get('site', default=None), } dids = [{'scope': scope, 'name': name}] schemes = request.args.getlist('schemes') or [ 'http', 'https', 'root', 'gsiftp', 'srm', 'davs' ] sortby = request.args.get('select', default=None) sortby = request.args.get('sort', default=sortby) # get vo if given vo = extract_vo(request.headers) try: replicas_iter = list_replicas(dids=dids, schemes=schemes, client_location=client_location, vo=vo) try: first = next(replicas_iter) except StopIteration: return 'no redirection possible - cannot find the DID', 404 def generate(): # first, set the appropriate content type, and stream the header yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # iteratively stream the XML per file for rfile in itertools.chain((first, ), replicas_iter): replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse # stream metadata yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield f' <glfn name="/atlas/rucio/{rfile["scope"]}:{rfile["name"]}">' yield '</glfn>\n' replicas = sort_replicas(dictreplica, client_location, selection=sortby) # stream URLs idx = 1 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx) + '">' + replica + '</url>\n' idx += 1 yield ' </file>\n' # don't forget to send the metalink footer yield '</metalink>\n' return try_stream(generate(), content_type='application/metalink4+xml') except (DataIdentifierNotFound, ReplicaNotFound) as error: return generate_http_error_flask(404, error, headers=headers)
def GET(self, scope, name): """ Metalink redirect HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound 406 Not Acceptable :param scope: The scope name of the file. :param name: The name of the file. """ header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN')) header('Access-Control-Allow-Headers', ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) header('Access-Control-Allow-Methods', '*') header('Access-Control-Allow-Credentials', 'true') dids, schemes, select = [{ 'scope': scope, 'name': name }], ['http', 'https', 'root', 'gsiftp', 'srm', 'davs'], None # set the correct client IP client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip client_location = {'ip': client_ip, 'fqdn': None, 'site': None} if ctx.query: params = parse_qs(ctx.query[1:]) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] if 'sort' in params: select = params['sort'][0] if 'ip' in params: client_location['ip'] = params['ip'][0] if 'fqdn' in params: client_location['fqdn'] = params['fqdn'][0] if 'site' in params: client_location['site'] = params['site'][0] # get vo if given vo = ctx.env.get('HTTP_X_RUCIO_VO', 'def') try: tmp_replicas = [ rep for rep in list_replicas(dids=dids, schemes=schemes, client_location=client_location, vo=vo) ] if not tmp_replicas: raise ReplicaNotFound( 'no redirection possible - cannot find the DID') # first, set the appropriate content type, and stream the header header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # iteratively stream the XML per file for rfile in tmp_replicas: replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse # stream metadata yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name']) yield '</glfn>\n' # sort the actual replicas if necessary if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip'], ignore_error=True) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) else: replicas = sort_random(dictreplica) # stream URLs idx = 1 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx) + '">' + replica + '</url>\n' idx += 1 yield ' </file>\n' # don't forget to send the metalink footer yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except ReplicaNotFound as error: raise generate_http_error(404, 'ReplicaNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def get(self, scope_name): """ --- summary: Header redirect description: Get the header redirect. tags: - Redirect parameters: - name: scope_name in: path description: The data identifier (scope)/(name). schema: type: string style: simple - name: ip in: query description: The client ip. schema: type: string style: simple required: false - name: fqdn in: query schema: type: string style: simple required: false - name: site in: query schema: type: string style: simple required: false - name: schemes in: query schema: type: array style: simple required: false - name: select in: query schema: type: string style: simple required: false - name: sort in: query schema: type: string style: simple required: false - name: rse in: query schema: type: string style: simple required: false responses: 303: description: OK content: application/json: schema: description: The redirect url. type: string 401: description: Invalid Auth Token 404: description: Rse or did not found """ headers = self.get_headers() try: scope, name = parse_scope_name(scope_name, extract_vo(request.headers)) except ValueError as error: return generate_http_error_flask(400, error, headers=headers) try: client_ip = request.headers.get('X-Forwarded-For', default=request.remote_addr) client_location = { 'ip': request.args.get('ip', default=client_ip), 'fqdn': request.args.get('fqdn', default=None), 'site': request.args.get('site', default=None), } # use the default HTTP protocols if no scheme is given schemes = request.args.getlist('schemes') or [ 'davs', 'https', 's3' ] sortby = request.args.get('select', default='random') sortby = request.args.get('sort', default=sortby) rse = request.args.get('rse', default=None) site = request.args.get('site', default=None) # correctly forward the schemes and select to potential metalink followups cleaned_url = request.environ.get('REQUEST_URI').split('?')[0] headers.set( 'Link', f'<{cleaned_url}/metalink?schemes={",".join(schemes)}&select={sortby}>; rel=describedby; type="application/metalink+xml"' ) # get vo if given vo = extract_vo(request.headers) replicas = list( list_replicas(dids=[{ 'scope': scope, 'name': name, 'type': 'FILE' }], schemes=schemes, client_location=client_location, vo=vo)) selected_url = None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace( 'davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers elif site: rep = site_selector(dictreplica, site, vo) if rep: selected_url = rep[0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: rep = sort_replicas(dictreplica, client_location, selection=sortby) selected_url = rep[0] if selected_url: response = redirect(selected_url, code=303) response.headers.extend(headers) return response return 'no redirection possible - file does not exist', 404, headers except ReplicaNotFound as error: return generate_http_error_flask(404, error, headers=headers)
def GET(self, scope, name): """ Redirect download HTTP Success: 303 See Other HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound :param scope: The scope name of the file. :param name: The name of the file. """ header("Access-Control-Allow-Origin", ctx.env.get("HTTP_ORIGIN")) header("Access-Control-Allow-Headers", ctx.env.get("HTTP_ACCESS_CONTROL_REQUEST_HEADERS")) header("Access-Control-Allow-Methods", "*") header("Access-Control-Allow-Credentials", "true") try: replicas = [ r for r in list_replicas(dids=[{"scope": scope, "name": name, "type": "FILE"}], schemes=["http", "https"]) ] select = "random" rse = None site = None if ctx.query: params = parse_qs(ctx.query[1:]) if "select" in params: select = params["select"][0] if "rse" in params: rse = params["rse"][0] if "site" in params: site = params["site"][0] for r in replicas: if r["rses"]: replicadict = {} if rse: if rse in r["rses"] and r["rses"][rse]: return found(r["rses"][rse][0]) return notfound("Sorry, the replica you were looking for was not found.") else: for rep in r["rses"]: for replica in r["rses"][rep]: replicadict[replica] = rep if not replicadict: return notfound("Sorry, the replica you were looking for was not found.") elif site: rep = site_selector(replicadict, site) if rep: return found(rep[0]) return notfound("Sorry, the replica you were looking for was not found.") else: client_ip = ctx.get("ip") if select == "geoip": rep = geoIP_order(replicadict, client_ip) else: rep = random_order(replicadict, client_ip) return found(rep[0]) return notfound("Sorry, the replica you were looking for was not found.") except RucioException, e: raise generate_http_error(500, e.__class__.__name__, e.args[0][0])
def GET(self, scope, name): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = None if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') # first check if client accepts metalink if 'application/metalink+xml' in tmp: metalink = 3 # but prefer metalink4 if the client has support for it # (clients can put both in their ACCEPT header!) if 'application/metalink4+xml' in tmp: metalink = 4 dids, schemes, select, limit = [{ 'scope': scope, 'name': name }], None, None, None if ctx.query: params = parse_qs(ctx.query[1:]) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] if 'limit' in params: limit = int(params['limit'][0]) try: # first, set the APPropriate content type, and stream the header if metalink is None: header('Content-Type', 'application/x-json-stream') elif metalink == 3: header('Content-Type', 'application/metalink+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink version="3.0" xmlns="http://www.metalinker.org/">\n<files>\n' elif metalink == 4: header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes): client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if select == 'geoip': try: replicas = geoIP_order(dictreplica, client_ip) except AddressNotFoundError: pass else: replicas = random_order(dictreplica, client_ip) if metalink is None: yield dumps(rfile) + '\n' elif metalink == 3: idx = 0 yield ' <file name="' + rfile['name'] + '">\n' # To help support the FAX transition period, add the glfn to the metalink: # AGIS does not expose specific FAX redirectors per DDM Endpoint, so go through top-level redirector yield ' <glfn name="%s%s">' % ( 'root://atlas-xrd-eu.cern.ch:1094//atlas/rucio/', '%s:%s' % (rfile['scope'], rfile['name'])) yield '</glfn>\n' yield ' <resources>\n' for replica in replicas: yield ' <url type="http" preference="' + str( idx) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </resources>\n </file>\n' elif metalink == 4: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' # To help support the FAX transition period, add the glfn to the metalink: # AGIS does not expose specific FAX redirectors per DDM Endpoint, so go through top-level redirector yield ' <glfn name="%s%s">' % ( 'root://atlas-xrd-eu.cern.ch:1094//atlas/rucio/', '%s:%s' % (rfile['scope'], rfile['name'])) yield '</glfn>\n' idx = 0 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' # don't forget to send the metalink footer if metalink: if metalink == 3: yield '</files>\n</metalink>\n' elif metalink == 4: yield '</metalink>\n' except DataIdentifierNotFound, e: raise generate_http_error(404, 'DataIdentifierNotFound', e.args[0][0])
def POST(self): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 406 Not Acceptable 500 InternalError :returns: A dictionary containing all replicas information, either as JSON stream or metalink4. """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip dids, schemes, select, unavailable, limit = [], None, None, False, None ignore_availability, rse_expression, all_states, domain = False, None, False, None signature_lifetime, resolve_archives, resolve_parents = None, True, False client_location = {} json_data = data() try: params = parse_response(json_data) if 'dids' in params: dids = params['dids'] if 'schemes' in params: schemes = params['schemes'] if 'unavailable' in params: unavailable = params['unavailable'] ignore_availability = True if 'all_states' in params: all_states = params['all_states'] if 'rse_expression' in params: rse_expression = params['rse_expression'] if 'client_location' in params: client_location = params['client_location'] client_location['ip'] = params['client_location'].get( 'ip', client_ip) if 'sort' in params: select = params['sort'] if 'domain' in params: domain = params['domain'] if 'resolve_archives' in params: resolve_archives = params['resolve_archives'] if 'resolve_parents' in params: resolve_parents = params['resolve_parents'] if 'signature_lifetime' in params: signature_lifetime = params['signature_lifetime'] else: # hardcoded default of 10 minutes if config is not parseable signature_lifetime = config_get('credentials', 'signature_lifetime', raise_exception=False, default=600) except ValueError: raise generate_http_error(400, 'ValueError', 'Cannot decode json parameter list') if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'limit' in params: limit = params['limit'][0] if 'sort' in params: select = params['sort'] # Resolve all reasonable protocols when doing metalink for maximum access possibilities if metalink and schemes is None: schemes = SUPPORTED_PROTOCOLS try: # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly __first = True # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=ctx.env.get('request_id'), ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, issuer=ctx.env.get('issuer')): # in first round, set the appropriate content type, and stream the header if __first: if not metalink: header('Content-Type', 'application/x-json-stream') else: header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' __first = False if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: replicas = [] dictreplica = {} for replica in rfile['pfns'].keys(): replicas.append(replica) dictreplica[replica] = ( rfile['pfns'][replica]['domain'], rfile['pfns'][replica]['priority'], rfile['pfns'][replica]['rse'], rfile['pfns'][replica]['client_extract']) yield ' <file name="' + rfile['name'] + '">\n' if 'parents' in rfile and rfile['parents']: yield ' <parents>\n' for parent in rfile['parents']: yield ' <did>' + parent + '</did>\n' yield ' </parents>\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/%s/rucio/%s:%s"></glfn>\n' % ( config_get( 'policy', 'schema', raise_exception=False, default='generic'), rfile['scope'], rfile['name']) # TODO: deprecate this if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) elif select == 'random': replicas = sort_random(dictreplica) else: replicas = sorted(dictreplica, key=dictreplica.get) idx = 0 for replica in replicas: yield ' <url location="' + str(dictreplica[replica][2]) \ + '" domain="' + str(dictreplica[replica][0]) \ + '" priority="' + str(dictreplica[replica][1]) \ + '" client_extract="' + str(dictreplica[replica][3]).lower() \ + '">' + escape(replica) + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' # ensure complete metalink if __first and metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' if metalink: yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def GET(self, scope, name): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True dids, schemes, select, limit = [{ 'scope': scope, 'name': name }], None, None, None if ctx.query: params = parse_qs(ctx.query[1:]) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] if 'limit' in params: limit = int(params['limit'][0]) try: # first, set the appropriate content type, and stream the header if not metalink: header('Content-Type', 'application/x-json-stream') else: header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes): client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if select == 'geoip': try: replicas = sort_geoip(dictreplica, client_ip) except AddressNotFoundError: pass else: replicas = sort_random(dictreplica) if not metalink: yield dumps(rfile) + '\n' else: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % ( rfile['scope'], rfile['name']) yield '</glfn>\n' idx = 0 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' # don't forget to send the metalink footer if metalink: yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print format_exc() raise InternalError(error)
def GET(self, scope, name): """ Header Redirect HTTP Success: 303 See Other HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound :param scope: The scope name of the file. :param name: The name of the file. """ header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN')) header('Access-Control-Allow-Headers', ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) header('Access-Control-Allow-Methods', '*') header('Access-Control-Allow-Credentials', 'true') try: # use the default HTTP protocols if no scheme is given select, rse, site, schemes = 'random', None, None, [ 'http', 'https', 's3+rucio' ] if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'rse' in params: rse = params['rse'][0] if 'site' in params: site = params['site'][0] if 'schemes' in params: schemes = params['schemes'][0] # correctly forward the schemes and select to potential metalink followups cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0] if isinstance(schemes, list): header( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, ','.join(schemes), select)) else: header( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, schemes, select)) schemes = [schemes] # list_replicas needs a list replicas = [ r for r in list_replicas(dids=[{ 'scope': scope, 'name': name, 'type': 'FILE' }], schemes=schemes) ] selected_url, selected_rse = None, None for r in replicas: if r['rses']: replicadict = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] selected_rse = rse else: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) else: for rep in r['rses']: for replica in r['rses'][rep]: replicadict[replica] = rep if not replicadict: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) elif site: rep = site_selector(replicadict, site) if rep: selected_url = rep[0] else: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) else: client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip if select == 'geoip': rep = geoIP_order(replicadict, client_ip) else: rep = random_order(replicadict, client_ip) selected_url = rep[0] for rep in r['rses']: for replica in r['rses'][rep]: if selected_url == replica: selected_rse = rep if selected_url: if selected_url.startswith('s3+rucio://'): connect(selected_rse, selected_url) signed_URLS = get_signed_urls([selected_url], rse=selected_rse, operation='read') raise seeother(signed_URLS[selected_url]) raise seeother(selected_url) raise ReplicaNotFound( 'no redirection possible - file does not exist') except seeother: raise except ReplicaNotFound, e: raise generate_http_error(404, 'ReplicaNotFound', e.args[0][0])
def GET(self, scope, name): """ Metalink redirect HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound :param scope: The scope name of the file. :param name: The name of the file. """ header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN')) header('Access-Control-Allow-Headers', ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) header('Access-Control-Allow-Methods', '*') header('Access-Control-Allow-Credentials', 'true') dids, schemes, select = [{ 'scope': scope, 'name': name }], ['http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp', 'srm'], None if ctx.query: params = parse_qs(ctx.query[1:]) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] try: tmp_replicas = [ rep for rep in list_replicas(dids=dids, schemes=schemes) ] if not tmp_replicas: raise ReplicaNotFound( 'no redirection possible - cannot find the DID') # first, set the APPropriate content type, and stream the header header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # set the correct client IP client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip # iteratively stream the XML per file for rfile in tmp_replicas: replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse # sort the actual replicas if necessary if select == 'geoip': try: replicas = geoIP_order(dictreplica, client_ip) except AddressNotFoundError: pass else: replicas = random_order(dictreplica, client_ip) # stream metadata yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="%s%s">' % ( 'root://atlas-xrd-eu.cern.ch:1094//atlas/rucio/', '%s:%s' % (rfile['scope'], rfile['name'])) yield '</glfn>\n' # stream URLs idx = 1 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx) + '">' + replica + '</url>\n' idx += 1 yield ' </file>\n' # don't forget to send the metalink footer yield '</metalink>\n' except DataIdentifierNotFound, e: raise generate_http_error(404, 'DataIdentifierNotFound', e.args[0][0])
def POST(self): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 406 Not Acceptable 500 InternalError :returns: A dictionary containing all replicas information, either as JSON stream or metalink4. """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip dids, schemes, select, unavailable, limit = [], None, None, False, None ignore_availability, rse_expression, all_states, domain = False, None, False, None signature_lifetime, resolve_archives, resolve_parents = None, True, False updated_after = None client_location = {'ip': client_ip, 'fqdn': None, 'site': None} json_data = data() try: params = parse_response(json_data) if 'dids' in params: dids = params['dids'] if 'schemes' in params: schemes = params['schemes'] if 'unavailable' in params: unavailable = params['unavailable'] ignore_availability = True if 'all_states' in params: all_states = params['all_states'] if 'rse_expression' in params: rse_expression = params['rse_expression'] if 'client_location' in params: client_location.update(params['client_location']) if 'sort' in params: select = params['sort'] if 'domain' in params: domain = params['domain'] if 'resolve_archives' in params: resolve_archives = params['resolve_archives'] if 'resolve_parents' in params: resolve_parents = params['resolve_parents'] if 'signature_lifetime' in params: signature_lifetime = params['signature_lifetime'] else: # hardcoded default of 10 minutes if config is not parseable signature_lifetime = config_get('credentials', 'signature_lifetime', raise_exception=False, default=600) if 'updated_after' in params: if isinstance(params['updated_after'], (int, float)): # convert from epoch time stamp to datetime object updated_after = datetime.utcfromtimestamp( params['updated_after']) else: # attempt UTC format '%Y-%m-%dT%H:%M:%S' conversion updated_after = datetime.strptime(params['updated_after'], '%Y-%m-%dT%H:%M:%S') except ValueError: raise generate_http_error(400, 'ValueError', 'Cannot decode json parameter list') if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'limit' in params: limit = params['limit'][0] if 'sort' in params: select = params['sort'] # Resolve all reasonable protocols when doing metalink for maximum access possibilities if metalink and schemes is None: schemes = SUPPORTED_PROTOCOLS try: # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly __first = True header( 'Content-Type', 'application/metalink4+xml' if metalink else 'application/x-json-stream') for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=ctx.env.get('request_id'), ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, updated_after=updated_after, issuer=ctx.env.get('issuer'), vo=ctx.env.get('vo')): # in first round, set the appropriate content type, and stream the header if __first: if metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' __first = False # ... then, stream the replica information if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: replicas = [] dictreplica = {} for replica in rfile['pfns'].keys(): replicas.append(replica) dictreplica[replica] = ( rfile['pfns'][replica]['domain'], rfile['pfns'][replica]['priority'], rfile['pfns'][replica]['rse'], rfile['pfns'][replica]['client_extract']) yield ' <file name="' + rfile['name'] + '">\n' if 'parents' in rfile and rfile['parents']: yield ' <parents>\n' for parent in rfile['parents']: yield ' <did>' + parent + '</did>\n' yield ' </parents>\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/%s/rucio/%s:%s"></glfn>\n' % ( config_get( 'policy', 'schema', raise_exception=False, default='generic'), rfile['scope'], rfile['name']) lanreplicas = [ replica for replica, v in dictreplica.items() if v[0] == 'lan' ] # sort lan by priority lanreplicas.sort(key=lambda rep: dictreplica[rep][1]) replicas = lanreplicas + sort_replicas( { k: v for k, v in dictreplica.items() if v[0] != 'lan' }, client_location, selection=select) idx = 1 for replica in replicas: yield ' <url location="' + str(dictreplica[replica][2]) \ + '" domain="' + str(dictreplica[replica][0]) \ + '" priority="' + str(idx) \ + '" client_extract="' + str(dictreplica[replica][3]).lower() \ + '">' + escape(replica) + '</url>\n' if limit and limit == idx: break idx += 1 yield ' </file>\n' if metalink: if __first: # ensure complete metalink on success without any content yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' else: # if metalink start was already sent, always send the end yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def GET(self, scope, name): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 406 Not Acceptable 500 InternalError :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True dids, schemes, select, limit = [{ 'scope': scope, 'name': name }], None, None, None if ctx.query: params = parse_qs(ctx.query[1:]) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] if 'limit' in params: limit = int(params['limit'][0]) client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip client_location = {'ip': client_ip, 'fqdn': None, 'site': None} # Resolve all reasonable protocols when doing metalink for maximum access possibilities if metalink and schemes is None: schemes = SUPPORTED_PROTOCOLS try: # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly __first = True header( 'Content-Type', 'application/metalink4+xml' if metalink else 'application/x-json-stream') for rfile in list_replicas(dids=dids, schemes=schemes, vo=ctx.env.get('vo')): # in first round, set the appropriate content type, and stream the header if __first: if metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' __first = False # ... then, stream the replica information replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse replicas = sort_replicas(dictreplica, client_location, selection=select) if not metalink: yield dumps(rfile) + '\n' else: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % ( rfile['scope'], rfile['name']) yield '</glfn>\n' idx = 0 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + escape(replica) + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' if metalink: if __first: # ensure complete metalink on success without any content yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' else: # if metalink start was already sent, always send the end yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def test_reaper_affect_other_vo_via_run(vo): """ MULTI VO (DAEMON): Test that reaper runs on the specified VO(s) and does not reap others""" new_vo = __setup_new_vo() scope_name, [scope_tst, scope_new] = __setup_scopes_for_vos(vo, new_vo) rse_name = rse_name_generator() nb_files = 30 file_size = 200 # 2G names = ['lfn' + generate_uuid() for _ in range(nb_files)] _, rse_id_tst, _ = __add_test_rse_and_replicas(vo=vo, scope=scope_tst, rse_name=rse_name, names=names, file_size=file_size) _, rse_id_new, _ = __add_test_rse_and_replicas(vo=new_vo, scope=scope_new, rse_name=rse_name, names=names, file_size=file_size) rse_api.set_rse_usage(rse=rse_name, source='storage', used=nb_files * file_size, free=1, issuer='root', vo=vo) rse_api.set_rse_limits(rse=rse_name, name='MinFreeSpace', value=5 * 200, issuer='root', vo=vo) rse_api.set_rse_limits(rse=rse_name, name='MaxBeingDeletedFiles', value=10, issuer='root', vo=vo) rse_api.set_rse_usage(rse=rse_name, source='storage', used=nb_files * file_size, free=1, issuer='root', vo=new_vo) rse_api.set_rse_limits(rse=rse_name, name='MinFreeSpace', value=5 * 200, issuer='root', vo=new_vo) rse_api.set_rse_limits(rse=rse_name, name='MaxBeingDeletedFiles', value=10, issuer='root', vo=new_vo) # Check we start of with the expected number of replicas assert len( list( replica_api.list_replicas([{ 'scope': scope_name, 'name': n } for n in names], rse_expression=rse_name, vo=vo))) == nb_files assert len( list( replica_api.list_replicas([{ 'scope': scope_name, 'name': n } for n in names], rse_expression=rse_name, vo=new_vo))) == nb_files # Check we don't affect a second VO that isn't specified REGION.invalidate() run_reaper(once=True, rses=[rse_name], vos=['new']) assert len( list( replica_api.list_replicas([{ 'scope': scope_name, 'name': n } for n in names], rse_expression=rse_name, vo=vo))) == nb_files assert len( list( replica_api.list_replicas([{ 'scope': scope_name, 'name': n } for n in names], rse_expression=rse_name, vo=new_vo))) == 25
def get(self, scope, name): """ Metalink redirect .. :quickref: MetaLinkRedirector; Metalink redirect. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound :param scope: The scope name of the file. :param name: The name of the file. :resheader Content-Type: application/metalink4+xml'. :status 200: OK. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: DID Not Found. :status 500: Internal Error. :returns: Metalink file """ dids = [{'scope': scope, 'name': name}] # set the correct client IP client_ip = request.environ.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = request.remote_addr client_location = {'ip': client_ip, 'fqdn': None, 'site': None} schemes = request.args.get('schemes', [ 'http', 'https', 's3+rucio', 's3+https', 'root', 'gsiftp', 'srm', 'davs' ]) select = request.args.get('select', None) if 'sort' in request.args: select = request.args['sort'] client_location['ip'] = request.args.get('ip', None) client_location['fqdn'] = request.args.get('fqdn', None) client_location['site'] = request.args.get('site', None) try: tmp_replicas = [ rep for rep in list_replicas(dids=dids, schemes=schemes, client_location=client_location) ] if not tmp_replicas: return 'no redirection possible - cannot find the DID', 404 # first, set the appropriate content type, and stream the header data = '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # iteratively stream the XML per file for rfile in tmp_replicas: replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse # stream metadata data += ' <file name="' + rfile['name'] + '">\n' data += ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: data += ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: data += ' <hash type="md5">' + rfile['md5'] + '</hash>\n' data += ' <size>' + str(rfile['bytes']) + '</size>\n' data += ' <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name']) data += '</glfn>\n' # sort the actual replicas if necessary if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip'], ignore_error=True) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) else: replicas = sort_random(dictreplica) # stream URLs idx = 1 for replica in replicas: data += ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx) + '">' + replica + '</url>\n' idx += 1 data += ' </file>\n' # don't forget to send the metalink footer data += '</metalink>\n' return Response(data, content_type='application/metalink4+xml') except DataIdentifierNotFound as error: return generate_http_error_flask(404, 'DataIdentifierNotFound', error.args[0]) except ReplicaNotFound as error: return generate_http_error_flask(404, 'ReplicaNotFound', error.args[0]) except RucioException as error: return generate_http_error_flask(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) return error, 500
def generate(request_id, issuer, vo): # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly first = metalink for rfile in list_replicas( dids=dids, schemes=schemes, unavailable=unavailable, request_id=request_id, ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, nrandom=nrandom, updated_after=updated_after, issuer=issuer, vo=vo): # in first round, set the appropriate content type, and stream the header if first and metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' first = False if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: replicas = [] dictreplica = {} for replica in rfile['pfns'].keys(): replicas.append(replica) dictreplica[replica] = ( rfile['pfns'][replica]['domain'], rfile['pfns'][replica]['priority'], rfile['pfns'][replica]['rse'], rfile['pfns'][replica]['client_extract']) yield ' <file name="' + rfile['name'] + '">\n' if 'parents' in rfile and rfile['parents']: yield ' <parents>\n' for parent in rfile['parents']: yield ' <did>' + parent + '</did>\n' yield ' </parents>\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile[ 'md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' policy_schema = config_get('policy', 'schema', raise_exception=False, default='generic') yield f' <glfn name="/{policy_schema}/rucio/{rfile["scope"]}:{rfile["name"]}"></glfn>\n' lanreplicas = [ replica for replica, v in dictreplica.items() if v[0] == 'lan' ] # sort lan by priority lanreplicas.sort(key=lambda rep: dictreplica[rep][1]) replicas = lanreplicas + sort_replicas( { k: v for k, v in dictreplica.items() if v[0] != 'lan' }, client_location, selection=select) for idx, replica in enumerate(replicas, start=1): yield ' <url location="' + str(dictreplica[replica][2]) \ + '" domain="' + str(dictreplica[replica][0]) \ + '" priority="' + str(idx) \ + '" client_extract="' + str(dictreplica[replica][3]).lower() \ + '">' + escape(replica) + '</url>\n' if limit and limit == idx: break yield ' </file>\n' if metalink: if first: # if still first output, i.e. there were no replicas yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' else: # don't forget to send the metalink footer yield '</metalink>\n'
def get(self, scope_name): """ Header Redirect .. :quickref: HeaderRedirector; Header redirect. :param scope_name: data identifier (scope)/(name). :resheader Content-Type: application/metalink+xml'. :status 303: Redirect. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: DID Not Found. :status 500: Internal Error. """ headers = Headers() headers.set('Access-Control-Allow-Origin', request.environ.get('HTTP_ORIGIN')) headers.set('Access-Control-Allow-Headers', request.environ.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) headers.set('Access-Control-Allow-Methods', '*') headers.set('Access-Control-Allow-Credentials', 'true') try: scope, name = parse_scope_name(scope_name) except ValueError as error: return generate_http_error_flask(400, 'ValueError', error.args[0], headers=headers) except Exception as error: print(format_exc()) return str(error), 500, headers try: # use the default HTTP protocols if no scheme is given select, rse, site, schemes = 'random', None, None, ['davs', 'http', 'https'] client_ip = request.headers.get('X-Forwarded-For', request.remote_addr) client_location = {'ip': client_ip, 'fqdn': None, 'site': None} if request.query_string: query_string = request.query_string.decode(encoding='utf-8') params = parse_qs(query_string) if 'select' in params: select = params['select'][0] if 'sort' in params: select = params['sort'][0] if 'rse' in params: rse = params['rse'][0] if 'site' in params: site = params['site'][0] if 'schemes' in params: schemes = params['schemes'][0] else: schemes = ['davs', 'https', 's3'] if 'ip' in params: client_location['ip'] = params['ip'][0] if 'fqdn' in params: client_location['fqdn'] = params['fqdn'][0] if 'site' in params: client_location['site'] = params['site'][0] # correctly forward the schemes and select to potential metalink followups cleaned_url = request.environ.get('REQUEST_URI').split('?')[0] if isinstance(schemes, list): headers.set('Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, ','.join(schemes), select)) else: headers.set('Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, schemes, select)) schemes = [schemes] # list_replicas needs a list # get vo if given vo = request.headers.get('X-Rucio-VO', 'def') replicas = [r for r in list_replicas(dids=[{'scope': scope, 'name': name, 'type': 'FILE'}], schemes=schemes, client_location=client_location, vo=vo)] selected_url = None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace('davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers elif site: rep = site_selector(dictreplica, site, vo) if rep: selected_url = rep[0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: if select == 'geoip': rep = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': rep = sort_closeness(dictreplica, client_location) elif select == 'dynamic': rep = sort_dynamic(dictreplica, client_location) elif select == 'ranking': rep = sort_ranking(dictreplica, client_location) else: rep = sort_random(dictreplica) selected_url = rep[0] if selected_url: response = redirect(selected_url, code=303) response.headers.extend(headers) return response return 'no redirection possible - file does not exist', 404, headers except ReplicaNotFound as error: return generate_http_error_flask(404, 'ReplicaNotFound', error.args[0], headers=headers) except RucioException as error: return generate_http_error_flask(500, error.__class__.__name__, error.args[0], headers=headers) except Exception as error: print(format_exc()) return str(error), 500, headers
def generate(request_id, issuer, vo): # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly first = metalink try: for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=request_id, ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, updated_after=updated_after, issuer=issuer, vo=vo): # in first round, set the appropriate content type, and stream the header if first and metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' first = False if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: replicas = [] dictreplica = {} for replica in rfile['pfns'].keys(): replicas.append(replica) dictreplica[replica] = (rfile['pfns'][replica]['domain'], rfile['pfns'][replica]['priority'], rfile['pfns'][replica]['rse'], rfile['pfns'][replica]['client_extract']) yield ' <file name="' + rfile['name'] + '">\n' if 'parents' in rfile and rfile['parents']: yield ' <parents>\n' for parent in rfile['parents']: yield ' <did>' + parent + '</did>\n' yield ' </parents>\n' yield ' <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile['adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/%s/rucio/%s:%s"></glfn>\n' % (config_get('policy', 'schema', raise_exception=False, default='generic'), rfile['scope'], rfile['name']) # TODO: deprecate this if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) elif select == 'random': replicas = sort_random(dictreplica) else: replicas = sorted(dictreplica, key=dictreplica.get) idx = 0 for replica in replicas: yield ' <url location="' + str(dictreplica[replica][2]) \ + '" domain="' + str(dictreplica[replica][0]) \ + '" priority="' + str(dictreplica[replica][1]) \ + '" client_extract="' + str(dictreplica[replica][3]).lower() \ + '">' + escape(replica) + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' if metalink and first: # if still first output, i.e. there were no replicas yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' finally: # don't forget to send the metalink footer if metalink and not first: yield '</metalink>\n'
def get(self, scope_name): """ Metalink redirect .. :quickref: MetaLinkRedirector; Metalink redirect. :param scope_name: data identifier (scope)/(name). :resheader Content-Type: application/metalink4+xml'. :status 200: OK. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: DID Not Found. :status 406: Not Acceptable. :status 500: Internal Error. :returns: Metalink file """ headers = Headers() headers.set('Access-Control-Allow-Origin', request.environ.get('HTTP_ORIGIN')) headers.set('Access-Control-Allow-Headers', request.environ.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) headers.set('Access-Control-Allow-Methods', '*') headers.set('Access-Control-Allow-Credentials', 'true') try: scope, name = parse_scope_name(scope_name) except ValueError as error: return generate_http_error_flask(400, 'ValueError', error.args[0], headers=headers) except Exception as error: print(format_exc()) return str(error), 500, headers dids, schemes, select = [{'scope': scope, 'name': name}], ['http', 'https', 'root', 'gsiftp', 'srm', 'davs'], None # set the correct client IP client_ip = request.headers.get('X-Forwarded-For', request.remote_addr) client_location = {'ip': client_ip, 'fqdn': None, 'site': None} if request.query_string: query_string = request.query_string.decode(encoding='utf-8') params = parse_qs(query_string) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] if 'sort' in params: select = params['sort'][0] if 'ip' in params: client_location['ip'] = params['ip'][0] if 'fqdn' in params: client_location['fqdn'] = params['fqdn'][0] if 'site' in params: client_location['site'] = params['site'][0] # get vo if given vo = request.headers.get('X-Rucio-VO', 'def') try: replicas_iter = list_replicas(dids=dids, schemes=schemes, client_location=client_location, vo=vo) try: first = next(replicas_iter) except StopIteration: return 'no redirection possible - cannot find the DID', 404 def generate(): # first, set the appropriate content type, and stream the header yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' try: # iteratively stream the XML per file for rfile in itertools.chain((first, ), replicas_iter): replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse # stream metadata yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile['adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name']) yield '</glfn>\n' # sort the actual replicas if necessary if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip'], ignore_error=True) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) else: replicas = sort_random(dictreplica) # stream URLs idx = 1 for replica in replicas: yield ' <url location="' + str(dictreplica[replica]) + '" priority="' + str(idx) + '">' + replica + '</url>\n' idx += 1 yield ' </file>\n' finally: # don't forget to send the metalink footer yield '</metalink>\n' return try_stream(generate(), content_type='application/metalink4+xml') except DataIdentifierNotFound as error: return generate_http_error_flask(404, 'DataIdentifierNotFound', error.args[0], headers=headers) except ReplicaNotFound as error: return generate_http_error_flask(404, 'ReplicaNotFound', error.args[0], headers=headers) except RucioException as error: return generate_http_error_flask(500, error.__class__.__name__, error.args[0], headers=headers) except Exception as error: print(format_exc()) return str(error), 500, headers
def make_replicas_available(self): """ Marks available replicas for the dataset at rse if they are in PhEDEx """ with monitor.record_timer_block('cms_sync.time_recover_replica'): logging.info('Recovering unavailable replicas for %s:%s at %s', self.scope, self.block_name, self.rse) replicas = list( list_replicas(dids=[{ 'scope': self.scope, 'name': self.block_name }], rse_expression='rse=%s' % self.rse, all_states=True)) logging.info('Recovery: Rucio replicas %s', len(replicas)) ewv_rucio_repl = {repl['name'] for repl in replicas} import pprint logging.info(pprint.pformat(ewv_rucio_repl)) try: unavailable_replicas = { repl['name'] for repl in replicas if repl['states'][self.rse] != 'AVAILABLE' } except TypeError: logging.warn( 'Got a type error, setting unavailable replicas to null') unavailable_replicas = set() logging.info('Recovery: Unavailable replicas %s', len(unavailable_replicas)) phedex_replicas = set(self.replicas.keys()) logging.info('Recovery: PhEDEx replicas %s', len(phedex_replicas)) logging.info('Recovery: PhEDEx %s', pprint.pformat(phedex_replicas)) logging.info('Recovery: Unavailable %s', pprint.pformat(unavailable_replicas)) missing = list(phedex_replicas & unavailable_replicas) logging.info('Recovery: Missing replicas %s', len(missing)) logging.info( 'Recovery for %s:%s at %s: PhEDEx has %s, Rucio unavailable %s. Missing: %s ', self.scope, self.block_name, self.rse, len(phedex_replicas), len(unavailable_replicas), len(missing)) # Fix up things which are unavailable rse_details = get_rse(self.rse) rse_id = rse_details['id'] scope = InternalScope(self.scope) state = 'A' for name in missing: logging.info('Setting available %s:%s at %s', self.scope, name, self.rse) core_update_state(rse_id=rse_id, scope=scope, name=name, state=state) monitor.record_counter('cms_sync.files_made_available', delta=len(missing)) return
def POST(self): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip dids, schemes, select, unavailable, limit = [], None, None, False, None ignore_availability, rse_expression, all_states, domain = False, None, False, None client_location = {} json_data = data() try: params = parse_response(json_data) if 'dids' in params: dids = params['dids'] if 'schemes' in params: schemes = params['schemes'] if 'unavailable' in params: unavailable = params['unavailable'] ignore_availability = True if 'all_states' in params: all_states = params['all_states'] if 'rse_expression' in params: rse_expression = params['rse_expression'] if 'client_location' in params: client_location = params['client_location'] client_location['ip'] = params['client_location'].get( 'ip', client_ip) if 'sort' in params: select = params['sort'] if 'domain' in params: domain = params['domain'] except ValueError: raise generate_http_error(400, 'ValueError', 'Cannot decode json parameter list') if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'limit' in params: limit = params['limit'][0] if 'sort' in params: select = params['sort'] try: # first, set the appropriate content type, and stream the header if not metalink: header('Content-Type', 'application/x-json-stream') else: header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=ctx.env.get('request_id'), ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, issuer=ctx.env.get('issuer')): replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % ( rfile['scope'], rfile['name']) yield '</glfn>\n' if select == 'geoip': replicas = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': replicas = sort_closeness(dictreplica, client_location) elif select == 'dynamic': replicas = sort_dynamic(dictreplica, client_location) elif select == 'ranking': replicas = sort_ranking(dictreplica, client_location) else: replicas = sort_random(dictreplica) idx = 0 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' # don't forget to send the metalink footer if metalink: yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print format_exc() raise InternalError(error)
def test_api_replica(self): """ REPLICA (API): Test external representation of replicas """ did = did_name_generator('file') did_parent = did_name_generator('dataset') pfn = 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % ( self.scope_name, generate_uuid()) add_replicas(self.rse2_name, files=[{ 'scope': self.scope_name, 'name': did, 'bytes': 100, 'pfn': pfn }], issuer='root', **self.vo) add_did(self.scope_name, did_parent, 'dataset', issuer='root', account=self.account_name, **self.vo) attachment = { 'scope': self.scope_name, 'name': did_parent, 'dids': [{ 'scope': self.scope_name, 'name': did }] } attach_dids_to_dids([attachment], issuer='root', **self.vo) out = get_did_from_pfns([pfn], self.rse2_name, **self.vo) out = list(out) assert 0 != len(out) did_found = False for p in out: for key in p: if p[key]['name'] == did: did_found = True assert self.scope_name == p[key]['scope'] assert did_found out = list_replicas(dids=[{ 'scope': self.scope_name, 'name': did }], resolve_parents=True, **self.vo) out = list(out) assert 0 != len(out) parents_found = False for rep in out: assert rep['scope'] == self.scope_name if 'parents' in rep: parents_found = True for parent in rep['parents']: assert self.scope_name in parent if self.multi_vo: assert self.scope.internal not in parent assert parents_found
def get(self, scope, name): """ Header Redirect .. :quickref: HeaderRedirector; Header redirect. :param scope: The scope name of the file. :param name: The name of the file. :resheader Content-Type: application/metalink+xml'. :status 303: Redirect. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: DID Not Found. :status 500: Internal Error. """ headers = {} try: # use the default HTTP protocols if no scheme is given client_ip = request.environ.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = request.remote_addr client_location = {'ip': client_ip, 'fqdn': None, 'site': None} schemes = request.args.get('schemes', ['davs', 'https', 's3']) select = request.args.get('select', 'random') if 'sort' in request.args: select = request.args['sort'] rse = request.args.get('rse', None) site = request.args.get('site', None) client_location['ip'] = request.args.get('ip', client_ip) client_location['fqdn'] = request.args.get('fqdn', None) client_location['site'] = request.args.get('site', None) # correctly forward the schemes and select to potential metalink followups cleaned_url = request.environ.get('REQUEST_URI').split('?')[0] if isinstance(schemes, list): headers[ 'Link'] = '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % ( cleaned_url, ','.join(schemes), select) else: headers[ 'Link'] = '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % ( cleaned_url, schemes, select) schemes = [schemes] # list_replicas needs a list replicas = [ r for r in list_replicas(dids=[{ 'scope': scope, 'name': name, 'type': 'FILE' }], schemes=schemes, client_location=client_location) ] selected_url, selected_rse = None, None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] selected_rse = rse else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404 else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace( 'davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: return 'no redirection possible - no valid RSE for HTTP redirection found', 404 elif site: rep = site_selector(dictreplica, site) if rep: selected_url = rep[0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404 else: if select == 'geoip': rep = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': rep = sort_closeness(dictreplica, client_location) elif select == 'dynamic': rep = sort_dynamic(dictreplica, client_location) elif select == 'ranking': rep = sort_ranking(dictreplica, client_location) else: rep = sort_random(dictreplica) selected_url = rep[0] for rep in r['rses']: for replica in r['rses'][rep]: if selected_url == replica: selected_rse = rep if selected_url: if selected_url.startswith('s3+rucio://'): connect(selected_rse, selected_url) signed_URLS = get_signed_urls([selected_url], rse=selected_rse, operation='read') res = redirect(signed_URLS[selected_url], code=303) res.header = headers return res res = redirect(signed_URLS[selected_url], code=303) res.header = headers return res return 'no redirection possible - file does not exist', 404 except ReplicaNotFound, e: return generate_http_error_flask(404, 'ReplicaNotFound', e.args[0][0])
def GET(self, scope, name): """ Header Redirect HTTP Success: 303 See Other HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound :param scope: The scope name of the file. :param name: The name of the file. """ header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN')) header('Access-Control-Allow-Headers', ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) header('Access-Control-Allow-Methods', '*') header('Access-Control-Allow-Credentials', 'true') try: # use the default HTTP protocols if no scheme is given select, rse, site, schemes = 'random', None, None, [ 'davs', 'http', 'https' ] client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip client_location = {'ip': client_ip, 'fqdn': None, 'site': None} if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'sort' in params: select = params['sort'][0] if 'rse' in params: rse = params['rse'][0] if 'site' in params: site = params['site'][0] if 'schemes' in params: schemes = params['schemes'][0] else: schemes = ['davs', 'https', 's3'] if 'ip' in params: client_location['ip'] = params['ip'][0] if 'fqdn' in params: client_location['fqdn'] = params['fqdn'][0] if 'site' in params: client_location['site'] = params['site'][0] # correctly forward the schemes and select to potential metalink followups cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0] if isinstance(schemes, list): header( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, ','.join(schemes), select)) else: header( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, schemes, select)) schemes = [schemes] # list_replicas needs a list # get vo if given vo = ctx.env.get('HTTP_X_RUCIO_VO', 'def') replicas = [ r for r in list_replicas(dids=[{ 'scope': scope, 'name': name, 'type': 'FILE' }], schemes=schemes, client_location=client_location, vo=vo) ] selected_url = None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] else: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace( 'davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) elif site: rep = site_selector(dictreplica, site, vo) if rep: selected_url = rep[0] else: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) else: if select == 'geoip': rep = sort_geoip(dictreplica, client_location['ip']) elif select == 'closeness': rep = sort_closeness(dictreplica, client_location) elif select == 'dynamic': rep = sort_dynamic(dictreplica, client_location) elif select == 'ranking': rep = sort_ranking(dictreplica, client_location) else: rep = sort_random(dictreplica) selected_url = rep[0] if selected_url: raise seeother(selected_url) raise ReplicaNotFound( 'no redirection possible - file does not exist') except seeother: raise except ReplicaNotFound as error: raise generate_http_error(404, 'ReplicaNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def GET(self, scope, name): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = None if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') # first check if client accepts metalink if 'application/metalink+xml' in tmp: metalink = 3 # but prefer metalink4 if the client has support for it # (clients can put both in their ACCEPT header!) if 'application/metalink4+xml' in tmp: metalink = 4 dids, schemes, select, limit = [{'scope': scope, 'name': name}], None, None, None if ctx.query: try: params = loads(unquote(ctx.query[1:])) if 'schemes' in params: schemes = params['schemes'] except ValueError: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'limit' in params: limit = int(params['limit'][0]) try: # first, set the appropriate content type, and stream the header if metalink is None: header('Content-Type', 'application/x-json-stream') elif metalink == 3: header('Content-Type', 'application/metalink+xml') schemes = ['http', 'https'] yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink version="3.0" xmlns="http://www.metalinker.org/">\n<files>\n' elif metalink == 4: header('Content-Type', 'application/metalink4+xml') schemes = ['http', 'https'] yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes): client_ip = ctx.get('ip') replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if select == 'geoip': try: replicas = geoIP_order(dictreplica, client_ip) except AddressNotFoundError: pass else: replicas = random_order(dictreplica, client_ip) if metalink is None: yield dumps(rfile) + '\n' elif metalink == 3: idx = 0 yield ' <file name="' + rfile['name'] + '">\n <resources>\n' for replica in replicas: yield ' <url type="http" preference="' + str(idx) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </resources>\n </file>\n' elif metalink == 4: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile['adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' idx = 0 for replica in replicas: yield ' <url location="' + str(dictreplica[replica]) + '" priority="' + str(idx + 1) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' # don't forget to send the metalink footer if metalink: if metalink == 3: yield '</files>\n</metalink>\n' elif metalink == 4: yield '</metalink>\n' except DataIdentifierNotFound, e: raise generate_http_error(404, 'DataIdentifierNotFound', e.args[0][0])
def POST(self): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = None if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') # first check if client accepts metalink if 'application/metalink+xml' in tmp: metalink = 3 # but prefer metalink4 if the client has support for it # (clients can put both in their ACCEPT header!) if 'application/metalink4+xml' in tmp: metalink = 4 dids, schemes, select, unavailable, limit = [], None, None, False, None ignore_availability, rse_expression, all_states = False, None, False json_data = data() try: params = parse_response(json_data) if 'dids' in params: dids = params['dids'] if 'schemes' in params: schemes = params['schemes'] if 'unavailable' in params: unavailable = params['unavailable'] ignore_availability = True if 'all_states' in params: all_states = params['all_states'] if 'rse_expression' in params: rse_expression = params['rse_expression'] except ValueError: raise generate_http_error(400, 'ValueError', 'Cannot decode json parameter list') if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'limit' in params: limit = params['limit'][0] try: # first, set the APPropriate content type, and stream the header if metalink is None: header('Content-Type', 'application/x-json-stream') elif metalink == 3: header('Content-Type', 'application/metalink+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink version="3.0" xmlns="http://www.metalinker.org/">\n<files>\n' elif metalink == 4: header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=ctx.env.get('request_id'), ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression): client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse if select == 'geoip': replicas = geoIP_order(dictreplica, client_ip) else: replicas = random_order(dictreplica, client_ip) if metalink is None: yield dumps(rfile, cls=APIEncoder) + '\n' elif metalink == 3: idx = 0 yield ' <file name="' + rfile[ 'name'] + '">\n <resources>\n' for replica in replicas: yield ' <url type="http" preference="' + str( idx) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </resources>\n </file>\n' elif metalink == 4: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' idx = 0 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + replica + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' # don't forget to send the metalink footer if metalink: if metalink == 3: yield '</files>\n</metalink>\n' elif metalink == 4: yield '</metalink>\n' except DataIdentifierNotFound, e: raise generate_http_error(404, 'DataIdentifierNotFound', e.args[0][0])