def generate(vo): # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly first = metalink # then, stream the replica information for rfile in list_replicas(dids=dids, schemes=schemes, vo=vo): if first and metalink: # first, set the appropriate content type, and stream the header yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' first = False replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse replicas = sort_replicas(dictreplica, client_location, selection=select) if not metalink: yield dumps(rfile) + '\n' else: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile[ 'md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield f' <glfn name="/atlas/rucio/{rfile["scope"]}:{rfile["name"]}">' yield '</glfn>\n' idx = 0 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + escape(replica) + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' if metalink: if first: # if still first output, i.e. there were no replicas yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' else: # don't forget to send the metalink footer yield '</metalink>\n'
def generate(): # first, set the appropriate content type, and stream the header yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # iteratively stream the XML per file for rfile in itertools.chain((first, ), replicas_iter): replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse # stream metadata yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % ( rfile['scope'], rfile['name']) yield '</glfn>\n' replicas = sort_replicas(dictreplica, client_location, selection=sortby) # stream URLs idx = 1 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx) + '">' + replica + '</url>\n' idx += 1 yield ' </file>\n' # don't forget to send the metalink footer yield '</metalink>\n'
def _list_and_sort_replicas(request_id, issuer, vo): # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly for rfile in list_replicas( dids=dids, schemes=schemes, unavailable=unavailable, request_id=request_id, ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, nrandom=nrandom, updated_after=updated_after, issuer=issuer, vo=vo): # Sort rfile['pfns'] and limit its size according to "limit" parameter lanreplicas = {} wanreplicas = {} for pfn, replica in rfile['pfns'].items(): replica_tuple = (replica['domain'], replica['priority'], replica['rse'], replica['client_extract']) if replica_tuple[0] == 'lan': lanreplicas[pfn] = replica_tuple else: wanreplicas[pfn] = replica_tuple rfile['pfns'] = dict( _sorted_with_priorities( replicas=rfile['pfns'], # Lan replicas sorted by priority; followed by wan replicas sorted by selection criteria sorted_pfns=chain( sorted(lanreplicas.keys(), key=lambda pfn: lanreplicas[pfn][1]), sort_replicas(wanreplicas, client_location, selection=select)), limit=limit)) yield rfile
def _list_and_sort_replicas(vo): # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly for rfile in list_replicas(dids=dids, schemes=schemes, vo=vo): replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse replicas = sort_replicas(dictreplica, client_location, selection=select) rfile['pfns'] = dict( _sorted_with_priorities(rfile['pfns'], replicas, limit=limit)) yield rfile
def generate(request_id, issuer, vo): # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly first = metalink for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=request_id, ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, updated_after=updated_after, issuer=issuer, vo=vo): # in first round, set the appropriate content type, and stream the header if first and metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' first = False if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: replicas = [] dictreplica = {} for replica in rfile['pfns'].keys(): replicas.append(replica) dictreplica[replica] = (rfile['pfns'][replica]['domain'], rfile['pfns'][replica]['priority'], rfile['pfns'][replica]['rse'], rfile['pfns'][replica]['client_extract']) yield ' <file name="' + rfile['name'] + '">\n' if 'parents' in rfile and rfile['parents']: yield ' <parents>\n' for parent in rfile['parents']: yield ' <did>' + parent + '</did>\n' yield ' </parents>\n' yield ' <identity>' + rfile['scope'] + ':' + rfile['name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile['adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' policy_schema = config_get('policy', 'schema', raise_exception=False, default='generic') yield f' <glfn name="/{policy_schema}/rucio/{rfile["scope"]}:{rfile["name"]}"></glfn>\n' lanreplicas = [replica for replica, v in dictreplica.items() if v[0] == 'lan'] # sort lan by priority lanreplicas.sort(key=lambda rep: dictreplica[rep][1]) replicas = lanreplicas + sort_replicas({k: v for k, v in dictreplica.items() if v[0] != 'lan'}, client_location, selection=select) for idx, replica in enumerate(replicas, start=1): yield ' <url location="' + str(dictreplica[replica][2]) \ + '" domain="' + str(dictreplica[replica][0]) \ + '" priority="' + str(idx) \ + '" client_extract="' + str(dictreplica[replica][3]).lower() \ + '">' + escape(replica) + '</url>\n' if limit and limit == idx: break yield ' </file>\n' if metalink: if first: # if still first output, i.e. there were no replicas yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' else: # don't forget to send the metalink footer yield '</metalink>\n'
def get(self, scope_name): """ --- summary: Header redirect description: Get the header redirect. tags: - Redirect parameters: - name: scope_name in: path description: The data identifier (scope)/(name). schema: type: string style: simple - name: ip in: query description: The client ip. schema: type: string style: simple required: false - name: fqdn in: query schema: type: string style: simple required: false - name: site in: query schema: type: string style: simple required: false - name: schemes in: query schema: type: array style: simple required: false - name: select in: query schema: type: string style: simple required: false - name: sort in: query schema: type: string style: simple required: false - name: rse in: query schema: type: string style: simple required: false responses: 303: description: OK content: application/json: schema: description: The redirect url. type: string 401: description: Invalid Auth Token 404: description: Rse or did not found """ headers = self.get_headers() try: scope, name = parse_scope_name(scope_name, extract_vo(request.headers)) except ValueError as error: return generate_http_error_flask(400, error, headers=headers) try: client_ip = request.headers.get('X-Forwarded-For', default=request.remote_addr) client_location = { 'ip': request.args.get('ip', default=client_ip), 'fqdn': request.args.get('fqdn', default=None), 'site': request.args.get('site', default=None), } # use the default HTTP protocols if no scheme is given schemes = request.args.getlist('schemes') or [ 'davs', 'https', 's3' ] sortby = request.args.get('select', default='random') sortby = request.args.get('sort', default=sortby) rse = request.args.get('rse', default=None) site = request.args.get('site', default=None) # correctly forward the schemes and select to potential metalink followups cleaned_url = request.environ.get('REQUEST_URI').split('?')[0] headers.set( 'Link', f'<{cleaned_url}/metalink?schemes={",".join(schemes)}&select={sortby}>; rel=describedby; type="application/metalink+xml"' ) # get vo if given vo = extract_vo(request.headers) replicas = list( list_replicas(dids=[{ 'scope': scope, 'name': name, 'type': 'FILE' }], schemes=schemes, client_location=client_location, vo=vo)) selected_url = None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace( 'davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers elif site: rep = site_selector(dictreplica, site, vo) if rep: selected_url = rep[0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: rep = sort_replicas(dictreplica, client_location, selection=sortby) selected_url = rep[0] if selected_url: response = redirect(selected_url, code=303) response.headers.extend(headers) return response return 'no redirection possible - file does not exist', 404, headers except ReplicaNotFound as error: return generate_http_error_flask(404, error, headers=headers)
def get(self, scope_name): """ Header Redirect .. :quickref: HeaderRedirector; Header redirect. :param scope_name: data identifier (scope)/(name). :resheader Content-Type: application/metalink+xml'. :status 303: Redirect. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: DID Not Found. :status 500: Internal Error. """ headers = Headers() headers.set('Access-Control-Allow-Origin', request.environ.get('HTTP_ORIGIN')) headers.set('Access-Control-Allow-Headers', request.environ.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) headers.set('Access-Control-Allow-Methods', '*') headers.set('Access-Control-Allow-Credentials', 'true') try: scope, name = parse_scope_name( scope_name, request.headers.get('X-Rucio-VO', default='def')) except ValueError as error: return generate_http_error_flask(400, 'ValueError', error.args[0], headers=headers) except Exception as error: logging.exception("Internal Error") return str(error), 500, headers try: # use the default HTTP protocols if no scheme is given select, rse, site, schemes = 'random', None, None, [ 'davs', 'http', 'https' ] client_ip = request.headers.get('X-Forwarded-For', default=request.remote_addr) client_location = {'ip': client_ip, 'fqdn': None, 'site': None} if request.query_string: query_string = request.query_string.decode(encoding='utf-8') params = parse_qs(query_string) if 'select' in params: select = params['select'][0] if 'sort' in params: select = params['sort'][0] if 'rse' in params: rse = params['rse'][0] if 'site' in params: site = params['site'][0] if 'schemes' in params: schemes = params['schemes'][0] else: schemes = ['davs', 'https', 's3'] if 'ip' in params: client_location['ip'] = params['ip'][0] if 'fqdn' in params: client_location['fqdn'] = params['fqdn'][0] if 'site' in params: client_location['site'] = params['site'][0] # correctly forward the schemes and select to potential metalink followups cleaned_url = request.environ.get('REQUEST_URI').split('?')[0] if isinstance(schemes, list): headers.set( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, ','.join(schemes), select)) else: headers.set( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, schemes, select)) schemes = [schemes] # list_replicas needs a list # get vo if given vo = request.headers.get('X-Rucio-VO', default='def') replicas = [ r for r in list_replicas(dids=[{ 'scope': scope, 'name': name, 'type': 'FILE' }], schemes=schemes, client_location=client_location, vo=vo) ] selected_url = None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace( 'davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers elif site: rep = site_selector(dictreplica, site, vo) if rep: selected_url = rep[0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: rep = sort_replicas(dictreplica, client_location, selection=select) selected_url = rep[0] if selected_url: response = redirect(selected_url, code=303) response.headers.extend(headers) return response return 'no redirection possible - file does not exist', 404, headers except ReplicaNotFound as error: return generate_http_error_flask(404, 'ReplicaNotFound', error.args[0], headers=headers) except RucioException as error: return generate_http_error_flask(500, error.__class__.__name__, error.args[0], headers=headers) except Exception as error: logging.exception("Internal Error") return str(error), 500, headers
def generate(request_id, issuer, vo): # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly first = True for rfile in list_replicas( dids=dids, schemes=schemes, unavailable=unavailable, request_id=request_id, ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, nrandom=nrandom, updated_after=updated_after, issuer=issuer, vo=vo): # Sort rfile['pfns'] and limit its size according to "limit" parameter lanreplicas = {} wanreplicas = {} for pfn, replica in rfile['pfns'].items(): replica_tuple = (replica['domain'], replica['priority'], replica['rse'], replica['client_extract']) if replica_tuple[0] == 'lan': lanreplicas[pfn] = replica_tuple else: wanreplicas[pfn] = replica_tuple rfile['pfns'] = dict( _sorted_with_priorities( replicas=rfile['pfns'], # Lan replicas sorted by priority; followed by wan replicas sorted by selection criteria sorted_pfns=chain( sorted(lanreplicas.keys(), key=lambda pfn: lanreplicas[pfn][1]), sort_replicas(wanreplicas, client_location, selection=select)), limit=limit)) if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: # in first round, set the appropriate content type, and stream the header if first: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' first = False yield ' <file name="' + rfile['name'] + '">\n' if 'parents' in rfile and rfile['parents']: yield ' <parents>\n' for parent in rfile['parents']: yield ' <did>' + parent + '</did>\n' yield ' </parents>\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile[ 'md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' policy_schema = config_get('policy', 'schema', raise_exception=False, default='generic') yield f' <glfn name="/{policy_schema}/rucio/{rfile["scope"]}:{rfile["name"]}"></glfn>\n' for pfn, replica in rfile['pfns'].items(): yield ' <url location="' + str(replica['rse']) \ + '" domain="' + str(replica['domain']) \ + '" priority="' + str(replica['priority']) \ + '" client_extract="' + str(replica['client_extract']).lower() \ + '">' + escape(pfn) + '</url>\n' yield ' </file>\n' if metalink: if first: # if still first output, i.e. there were no replicas yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' else: # don't forget to send the metalink footer yield '</metalink>\n'
def get(self, scope_name): """ Header Redirect .. :quickref: HeaderRedirector; Header redirect. :param scope_name: data identifier (scope)/(name). :resheader Content-Type: application/metalink+xml'. :status 303: Redirect. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: DID Not Found. """ headers = self.get_headers() try: scope, name = parse_scope_name(scope_name, request.headers.get('X-Rucio-VO', default='def')) except ValueError as error: return generate_http_error_flask(400, error, headers=headers) try: client_ip = request.headers.get('X-Forwarded-For', default=request.remote_addr) client_location = { 'ip': request.args.get('ip', default=client_ip), 'fqdn': request.args.get('fqdn', default=None), 'site': request.args.get('site', default=None), } # use the default HTTP protocols if no scheme is given schemes = request.args.getlist('schemes') or ['davs', 'https', 's3'] sortby = request.args.get('select', default='random') sortby = request.args.get('sort', default=sortby) rse = request.args.get('rse', default=None) site = request.args.get('site', default=None) # correctly forward the schemes and select to potential metalink followups cleaned_url = request.environ.get('REQUEST_URI').split('?')[0] headers.set('Link', f'<{cleaned_url}/metalink?schemes={",".join(schemes)}&select={sortby}>; rel=describedby; type="application/metalink+xml"') # get vo if given vo = request.headers.get('X-Rucio-VO', default='def') replicas = list( list_replicas( dids=[{'scope': scope, 'name': name, 'type': 'FILE'}], schemes=schemes, client_location=client_location, vo=vo ) ) selected_url = None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace('davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers elif site: rep = site_selector(dictreplica, site, vo) if rep: selected_url = rep[0] else: return 'no redirection possible - no valid RSE for HTTP redirection found', 404, headers else: rep = sort_replicas(dictreplica, client_location, selection=sortby) selected_url = rep[0] if selected_url: response = redirect(selected_url, code=303) response.headers.extend(headers) return response return 'no redirection possible - file does not exist', 404, headers except ReplicaNotFound as error: return generate_http_error_flask(404, error, headers=headers)
def GET(self, scope, name): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 406 Not Acceptable 500 InternalError :returns: A dictionary containing all replicas information. :returns: A metalink description of replicas if metalink(4)+xml is specified in Accept: """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True dids, schemes, select, limit = [{ 'scope': scope, 'name': name }], None, None, None if ctx.query: params = parse_qs(ctx.query[1:]) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] if 'limit' in params: limit = int(params['limit'][0]) client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip client_location = {'ip': client_ip, 'fqdn': None, 'site': None} # Resolve all reasonable protocols when doing metalink for maximum access possibilities if metalink and schemes is None: schemes = SUPPORTED_PROTOCOLS try: # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly __first = True header( 'Content-Type', 'application/metalink4+xml' if metalink else 'application/x-json-stream') for rfile in list_replicas(dids=dids, schemes=schemes, vo=ctx.env.get('vo')): # in first round, set the appropriate content type, and stream the header if __first: if metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' __first = False # ... then, stream the replica information replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse replicas = sort_replicas(dictreplica, client_location, selection=select) if not metalink: yield dumps(rfile) + '\n' else: yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % ( rfile['scope'], rfile['name']) yield '</glfn>\n' idx = 0 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx + 1) + '">' + escape(replica) + '</url>\n' idx += 1 if limit and limit == idx: break yield ' </file>\n' if metalink: if __first: # ensure complete metalink on success without any content yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' else: # if metalink start was already sent, always send the end yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def POST(self): """ List all replicas for data identifiers. HTTP Success: 200 OK HTTP Error: 401 Unauthorized 406 Not Acceptable 500 InternalError :returns: A dictionary containing all replicas information, either as JSON stream or metalink4. """ metalink = False if ctx.env.get('HTTP_ACCEPT') is not None: tmp = ctx.env.get('HTTP_ACCEPT').split(',') if 'application/metalink4+xml' in tmp: metalink = True client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip dids, schemes, select, unavailable, limit = [], None, None, False, None ignore_availability, rse_expression, all_states, domain = False, None, False, None signature_lifetime, resolve_archives, resolve_parents = None, True, False updated_after = None client_location = {'ip': client_ip, 'fqdn': None, 'site': None} json_data = data() try: params = parse_response(json_data) if 'dids' in params: dids = params['dids'] if 'schemes' in params: schemes = params['schemes'] if 'unavailable' in params: unavailable = params['unavailable'] ignore_availability = True if 'all_states' in params: all_states = params['all_states'] if 'rse_expression' in params: rse_expression = params['rse_expression'] if 'client_location' in params: client_location.update(params['client_location']) if 'sort' in params: select = params['sort'] if 'domain' in params: domain = params['domain'] if 'resolve_archives' in params: resolve_archives = params['resolve_archives'] if 'resolve_parents' in params: resolve_parents = params['resolve_parents'] if 'signature_lifetime' in params: signature_lifetime = params['signature_lifetime'] else: # hardcoded default of 10 minutes if config is not parseable signature_lifetime = config_get('credentials', 'signature_lifetime', raise_exception=False, default=600) if 'updated_after' in params: if isinstance(params['updated_after'], (int, float)): # convert from epoch time stamp to datetime object updated_after = datetime.utcfromtimestamp( params['updated_after']) else: # attempt UTC format '%Y-%m-%dT%H:%M:%S' conversion updated_after = datetime.strptime(params['updated_after'], '%Y-%m-%dT%H:%M:%S') except ValueError: raise generate_http_error(400, 'ValueError', 'Cannot decode json parameter list') if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'limit' in params: limit = params['limit'][0] if 'sort' in params: select = params['sort'] # Resolve all reasonable protocols when doing metalink for maximum access possibilities if metalink and schemes is None: schemes = SUPPORTED_PROTOCOLS try: # we need to call list_replicas before starting to reply # otherwise the exceptions won't be propagated correctly __first = True header( 'Content-Type', 'application/metalink4+xml' if metalink else 'application/x-json-stream') for rfile in list_replicas(dids=dids, schemes=schemes, unavailable=unavailable, request_id=ctx.env.get('request_id'), ignore_availability=ignore_availability, all_states=all_states, rse_expression=rse_expression, client_location=client_location, domain=domain, signature_lifetime=signature_lifetime, resolve_archives=resolve_archives, resolve_parents=resolve_parents, updated_after=updated_after, issuer=ctx.env.get('issuer'), vo=ctx.env.get('vo')): # in first round, set the appropriate content type, and stream the header if __first: if metalink: yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' __first = False # ... then, stream the replica information if not metalink: yield dumps(rfile, cls=APIEncoder) + '\n' else: replicas = [] dictreplica = {} for replica in rfile['pfns'].keys(): replicas.append(replica) dictreplica[replica] = ( rfile['pfns'][replica]['domain'], rfile['pfns'][replica]['priority'], rfile['pfns'][replica]['rse'], rfile['pfns'][replica]['client_extract']) yield ' <file name="' + rfile['name'] + '">\n' if 'parents' in rfile and rfile['parents']: yield ' <parents>\n' for parent in rfile['parents']: yield ' <did>' + parent + '</did>\n' yield ' </parents>\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/%s/rucio/%s:%s"></glfn>\n' % ( config_get( 'policy', 'schema', raise_exception=False, default='generic'), rfile['scope'], rfile['name']) lanreplicas = [ replica for replica, v in dictreplica.items() if v[0] == 'lan' ] replicas = lanreplicas + sort_replicas( { k: v for k, v in dictreplica.items() if v[0] != 'lan' }, client_location, selection=select) idx = 1 for replica in replicas: yield ' <url location="' + str(dictreplica[replica][2]) \ + '" domain="' + str(dictreplica[replica][0]) \ + '" priority="' + str(idx) \ + '" client_extract="' + str(dictreplica[replica][3]).lower() \ + '">' + escape(replica) + '</url>\n' if limit and limit == idx: break idx += 1 yield ' </file>\n' if metalink: if __first: # ensure complete metalink on success without any content yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n</metalink>\n' else: # if metalink start was already sent, always send the end yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def GET(self, scope, name): """ Metalink redirect HTTP Success: 200 OK HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound 406 Not Acceptable :param scope: The scope name of the file. :param name: The name of the file. """ header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN')) header('Access-Control-Allow-Headers', ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) header('Access-Control-Allow-Methods', '*') header('Access-Control-Allow-Credentials', 'true') dids, schemes, select = [{ 'scope': scope, 'name': name }], ['http', 'https', 'root', 'gsiftp', 'srm', 'davs'], None # set the correct client IP client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip client_location = {'ip': client_ip, 'fqdn': None, 'site': None} if ctx.query: params = parse_qs(ctx.query[1:]) if 'schemes' in params: schemes = params['schemes'] if 'select' in params: select = params['select'][0] if 'sort' in params: select = params['sort'][0] if 'ip' in params: client_location['ip'] = params['ip'][0] if 'fqdn' in params: client_location['fqdn'] = params['fqdn'][0] if 'site' in params: client_location['site'] = params['site'][0] # get vo if given vo = ctx.env.get('HTTP_X_RUCIO_VO', 'def') try: tmp_replicas = [ rep for rep in list_replicas(dids=dids, schemes=schemes, client_location=client_location, vo=vo) ] if not tmp_replicas: raise ReplicaNotFound( 'no redirection possible - cannot find the DID') # first, set the appropriate content type, and stream the header header('Content-Type', 'application/metalink4+xml') yield '<?xml version="1.0" encoding="UTF-8"?>\n<metalink xmlns="urn:ietf:params:xml:ns:metalink">\n' # iteratively stream the XML per file for rfile in tmp_replicas: replicas = [] dictreplica = {} for rse in rfile['rses']: for replica in rfile['rses'][rse]: replicas.append(replica) dictreplica[replica] = rse # stream metadata yield ' <file name="' + rfile['name'] + '">\n' yield ' <identity>' + rfile['scope'] + ':' + rfile[ 'name'] + '</identity>\n' if rfile['adler32'] is not None: yield ' <hash type="adler32">' + rfile[ 'adler32'] + '</hash>\n' if rfile['md5'] is not None: yield ' <hash type="md5">' + rfile['md5'] + '</hash>\n' yield ' <size>' + str(rfile['bytes']) + '</size>\n' yield ' <glfn name="/atlas/rucio/%s:%s">' % (rfile['scope'], rfile['name']) yield '</glfn>\n' replicas = sort_replicas(dictreplica, client_location, selection=select) # stream URLs idx = 1 for replica in replicas: yield ' <url location="' + str( dictreplica[replica]) + '" priority="' + str( idx) + '">' + replica + '</url>\n' idx += 1 yield ' </file>\n' # don't forget to send the metalink footer yield '</metalink>\n' except DataIdentifierNotFound as error: raise generate_http_error(404, 'DataIdentifierNotFound', error.args[0]) except ReplicaNotFound as error: raise generate_http_error(404, 'ReplicaNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)
def GET(self, scope, name): """ Header Redirect HTTP Success: 303 See Other HTTP Error: 401 Unauthorized 500 InternalError 404 Notfound :param scope: The scope name of the file. :param name: The name of the file. """ header('Access-Control-Allow-Origin', ctx.env.get('HTTP_ORIGIN')) header('Access-Control-Allow-Headers', ctx.env.get('HTTP_ACCESS_CONTROL_REQUEST_HEADERS')) header('Access-Control-Allow-Methods', '*') header('Access-Control-Allow-Credentials', 'true') try: # use the default HTTP protocols if no scheme is given select, rse, site, schemes = 'random', None, None, [ 'davs', 'http', 'https' ] client_ip = ctx.env.get('HTTP_X_FORWARDED_FOR') if client_ip is None: client_ip = ctx.ip client_location = {'ip': client_ip, 'fqdn': None, 'site': None} if ctx.query: params = parse_qs(ctx.query[1:]) if 'select' in params: select = params['select'][0] if 'sort' in params: select = params['sort'][0] if 'rse' in params: rse = params['rse'][0] if 'site' in params: site = params['site'][0] if 'schemes' in params: schemes = params['schemes'][0] else: schemes = ['davs', 'https', 's3'] if 'ip' in params: client_location['ip'] = params['ip'][0] if 'fqdn' in params: client_location['fqdn'] = params['fqdn'][0] if 'site' in params: client_location['site'] = params['site'][0] # correctly forward the schemes and select to potential metalink followups cleaned_url = ctx.env.get('REQUEST_URI').split('?')[0] if isinstance(schemes, list): header( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, ','.join(schemes), select)) else: header( 'Link', '<%s/metalink?schemes=%s&select=%s>; rel=describedby; type="application/metalink+xml"' % (cleaned_url, schemes, select)) schemes = [schemes] # list_replicas needs a list # get vo if given vo = ctx.env.get('HTTP_X_RUCIO_VO', 'def') replicas = [ r for r in list_replicas(dids=[{ 'scope': scope, 'name': name, 'type': 'FILE' }], schemes=schemes, client_location=client_location, vo=vo) ] selected_url = None for r in replicas: if r['rses']: dictreplica = {} if rse: if rse in r['rses'] and r['rses'][rse]: selected_url = r['rses'][rse][0] else: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) else: for rep in r['rses']: for replica in r['rses'][rep]: # since this is HTTP-only redirection, and to ensure compatibility with as many http clients as possible # forcibly replacement davs and s3 URLs to https replica = replica.replace( 'davs://', 'https://').replace('s3://', 'https://') dictreplica[replica] = rep if not dictreplica: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) elif site: rep = site_selector(dictreplica, site, vo) if rep: selected_url = rep[0] else: raise ReplicaNotFound( 'no redirection possible - no valid RSE for HTTP redirection found' ) else: rep = sort_replicas(dictreplica, client_location, selection=select) selected_url = rep[0] if selected_url: raise seeother(selected_url) raise ReplicaNotFound( 'no redirection possible - file does not exist') except seeother: raise except ReplicaNotFound as error: raise generate_http_error(404, 'ReplicaNotFound', error.args[0]) except RucioException as error: raise generate_http_error(500, error.__class__.__name__, error.args[0]) except Exception as error: print(format_exc()) raise InternalError(error)