def test_exists_mgr_ok_multi(self): """(RSE/PROTOCOLS): Check multiple files on storage (Success)""" pfn_a = list( mgr.lfns2pfns(self.rse_settings, { 'name': '3_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values())[0] pfn_b = list( mgr.lfns2pfns(self.rse_settings, { 'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values())[0] status, details = mgr.exists(self.rse_settings, [{ 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, { 'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, { 'name': pfn_a }, { 'name': pfn_b }]) if not (status and details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user] and details[pfn_a] and details[pfn_b]): raise Exception('Return not as expected: %s, %s' % (status, details))
def test_rename_mgr_SourceNotFound_multi(self): """(RSE/PROTOCOLS): Rename multiple files on storage (SourceNotFound)""" pfn_a = list( mgr.lfns2pfns(self.rse_settings, { 'name': '12_rse_not_existing.raw', 'scope': 'user.%s' % self.user }).values())[0] pfn_b = list( mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_not_created.raw', 'scope': 'user.%s' % self.user }).values())[0] status, details = mgr.rename( self.rse_settings, [{ 'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user, 'new_name': '1_rse_new_not_created.raw' }, { 'name': pfn_a, 'new_name': pfn_b }]) if not status and isinstance( details['user.%s:1_rse_not_existing.raw' % self.user], type(details[pfn_a])): raise details['user.%s:1_rse_not_existing.raw' % self.user] else: raise Exception('Return not as expected: %s, %s' % (status, details))
def test_exists_mgr_false_multi(self): """(RSE/PROTOCOLS): Check multiple files on storage (Fail)""" pfn_a = list( mgr.lfns2pfns(self.rse_settings, { 'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values())[0] pfn_b = list( mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user }).values())[0] status, details = mgr.exists(self.rse_settings, [{ 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, { 'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user }, { 'name': pfn_a }, { 'name': pfn_b }]) if status or not details[ 'user.%s:1_rse_remote_get.raw' % self.user] or details[ 'user.%s:not_existing_data.raw' % self.user] or not details[pfn_a] or details[pfn_b]: raise Exception('Return not as expected: %s, %s' % (status, details))
def test_rename_mgr_SourceNotFound_multi(self): """(RSE/PROTOCOLS): Rename multiple files on storage (SourceNotFound)""" pfn_a = mgr.lfns2pfns(self.rse_settings, {'name': '12_rse_not_existing.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_b = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_not_created.raw', 'scope': 'user.%s' % self.user}).values()[0] status, details = mgr.rename(self.rse_settings, [{'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user, 'new_name': '1_rse_new_not_created.raw'}, {'name': pfn_a, 'new_name': pfn_b}]) if not status and (type(details['user.%s:1_rse_not_existing.raw' % self.user]) == type(details[pfn_a])): raise details['user.%s:1_rse_not_existing.raw' % self.user] else: raise Exception('Return not as expected: %s, %s' % (status, details))
def test_exists_mgr_false_multi(self): """(RSE/PROTOCOLS): Check multiple files on storage (Fail)""" pfn_a = mgr.lfns2pfns(self.rse_settings, {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_b = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user}).values()[0] status, details = mgr.exists(self.rse_settings, [{'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user}, {'name': pfn_a}, {'name': pfn_b}]) if status or not details['user.%s:1_rse_remote_get.raw' % self.user] or details['user.%s:not_existing_data.raw' % self.user] or not details[pfn_a] or details[pfn_b]: raise Exception('Return not as expected: %s, %s' % (status, details))
def test_exists_mgr_ok_multi(self): """(RSE/PROTOCOLS): Check multiple files on storage (Success)""" pfn_a = mgr.lfns2pfns(self.rse_settings, {'name': '3_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_b = mgr.lfns2pfns(self.rse_settings, {'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] status, details = mgr.exists(self.rse_settings, [{'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': pfn_a}, {'name': pfn_b}]) if not (status and details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user] and details[pfn_a] and details[pfn_b]): raise Exception('Return not as expected: %s, %s' % (status, details))
def test_rename_mgr_ok_single_pfn(self): """(RSE/PROTOCOLS): Rename a single file on storage using PFN (Success)""" pfn = mgr.lfns2pfns(self.rse_settings, { 'name': '9_rse_remote_rename.raw', 'scope': 'user.%s' % self.user }).values()[0] pfn_new = mgr.lfns2pfns(self.rse_settings, { 'name': '9_rse_new.raw', 'scope': 'user.%s' % self.user }).values()[0] mgr.rename(self.rse_settings, {'name': pfn, 'new_name': pfn_new})
def test_rename_mgr_FileReplicaAlreadyExists_single_pfn(self): """(RSE/PROTOCOLS): Rename a single file on storage using PFN (FileReplicaAlreadyExists)""" pfn = mgr.lfns2pfns(self.rse_settings, { 'name': '12_rse_remote_rename.raw', 'scope': 'user.%s' % self.user }).values()[0] pfn_new = mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values()[0] mgr.rename(self.rse_settings, {'name': pfn, 'new_name': pfn_new})
def test_rename_mgr_SourceNotFound_single_pfn(self): """(RSE/PROTOCOLS): Rename a single file on storage using PFN (SourceNotFound)""" pfn = mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user }).values()[0] pfn_new = mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_new_not_created.raw', 'scope': 'user.%s' % self.user }).values()[0] mgr.rename(self.rse_settings, {'name': pfn, 'new_name': pfn_new})
def test_rename_mgr_FileReplicaAlreadyExists_multi(self): """(RSE/PROTOCOLS): Rename multiple files on storage (FileReplicaAlreadyExists)""" pfn_a = list( mgr.lfns2pfns(self.rse_settings, { 'name': '10_rse_remote_rename.raw', 'scope': 'user.%s' % self.user }, impl=self.impl).values())[0] pfn_a_new = list( mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, impl=self.impl).values())[0] pfn_b = list( mgr.lfns2pfns(self.rse_settings, { 'name': '11_rse_remote_rename.raw', 'scope': 'user.%s' % self.user }, impl=self.impl).values())[0] pfn_b_new = list( mgr.lfns2pfns(self.rse_settings, { 'name': '11_rse_new_rename.raw', 'scope': 'user.%s' % self.user }, impl=self.impl).values())[0] status, details = mgr.rename(self.rse_settings, [{ 'name': '4_rse_remote_rename.raw', 'scope': 'user.%s' % self.user, 'new_name': '1_rse_remote_get.raw', 'new_scope': 'user.%s' % self.user }, { 'name': '5_rse_remote_rename.raw', 'scope': 'user.%s' % self.user, 'new_name': '5_rse_new.raw' }, { 'name': pfn_a, 'new_name': pfn_a_new }, { 'name': pfn_b, 'new_name': pfn_b_new }], impl=self.impl) if (not status and details['user.%s:5_rse_remote_rename.raw' % self.user] and details[pfn_b]) and isinstance( details['user.%s:4_rse_remote_rename.raw' % self.user], type(details[pfn_a])): raise details['user.%s:4_rse_remote_rename.raw' % self.user] else: raise Exception('Return not as expected: %s, %s' % (status, details))
def test_rename_mgr_FileReplicaAlreadyExists_multi(self): """(RSE/PROTOCOLS): Rename multiple files on storage (FileReplicaAlreadyExists)""" pfn_a = mgr.lfns2pfns(self.rse_settings, {'name': '10_rse_remote_rename.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_a_new = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_b = mgr.lfns2pfns(self.rse_settings, {'name': '11_rse_remote_rename.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_b_new = mgr.lfns2pfns(self.rse_settings, {'name': '11_rse_new_rename.raw', 'scope': 'user.%s' % self.user}).values()[0] status, details = mgr.rename(self.rse_settings, [{'name': '4_rse_remote_rename.raw', 'scope': 'user.%s' % self.user, 'new_name': '1_rse_remote_get.raw', 'new_scope': 'user.%s' % self.user}, {'name': '5_rse_remote_rename.raw', 'scope': 'user.%s' % self.user, 'new_name': '5_rse_new.raw'}, {'name': pfn_a, 'new_name': pfn_a_new}, {'name': pfn_b, 'new_name': pfn_b_new}]) if (not status and details['user.%s:5_rse_remote_rename.raw' % self.user] and details[pfn_b]) and (type(details['user.%s:4_rse_remote_rename.raw' % self.user]) == type(details[pfn_a])): raise details['user.%s:4_rse_remote_rename.raw' % self.user] else: raise Exception('Return not as expected: %s, %s' % (status, details))
def test_change_scope_mgr_ok_single_pfn(self): """(RSE/PROTOCOLS): Change the scope of a single file on storage using PFN (Success)""" pfn = list( mgr.lfns2pfns( self.rse_settings, { 'name': '2_rse_remote_change_scope.raw', 'scope': 'user.%s' % self.user }).values())[0] pfn_new = list( mgr.lfns2pfns( self.rse_settings, { 'name': '2_rse_remote_change_scope.raw', 'scope': 'group.%s' % self.user }).values())[0] mgr.rename(self.rse_settings, {'name': pfn, 'new_name': pfn_new})
def setUpClass(cls): """S3 (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() # cls.user = '******' # use again when latency issue with S3 storage is resolved with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write(b'\0') for f in MgrTestCases.files_local: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) rse_tag = 'BNL-BOTO' rse_settings = mgr.get_rse_info(rse_tag) try: with open('etc/rse-accounts.cfg') as f: data = json.load(f) rse_settings['credentials'] = data[rse_tag] except KeyError: print('No credentials found for this RSE.') pass for protocol in rse_settings['protocols']: if protocol['scheme'] == 's3': break conn = boto.connect_s3( host=protocol['hostname'], port=int(protocol.get('port', 80)), aws_access_key_id=rse_settings['credentials']['access_key'], aws_secret_access_key=rse_settings['credentials']['secret_key'], is_secure=rse_settings['credentials'].get('is_secure', False), calling_format=boto.s3.connection.OrdinaryCallingFormat()) cls.static_file = mgr.lfns2pfns(rse_settings, { 'name': 'data.raw', 'scope': 'user.%s' % cls.user }).values()[0] key = get_bucket_key(cls.static_file, conn, create=True) key.set_contents_from_filename("%s/data.raw" % cls.tmpdir) for f in MgrTestCases.files_remote: pfn = mgr.lfns2pfns(rse_settings, { 'name': f, 'scope': 'user.%s' % cls.user }).values()[0] bucket_name, key_name = get_bucket_key_name(pfn) key.copy(bucket_name, key_name)
def setupClass(cls): """POSIX (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) with open('etc/rse_repository.json') as f: data = json.load(f) prefix = data['MOCK-POSIX']['protocols']['supported']['file']['prefix'] try: os.mkdir(prefix) except Exception as e: print(e) os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' % prefix) cls.static_file = '%s/data.raw' % prefix for f in MgrTestCases.files_remote: protocol = mgr.create_protocol(mgr.get_rse_info('MOCK-POSIX'), 'write') pfn = mgr.lfns2pfns(mgr.get_rse_info('MOCK-POSIX'), { 'name': f, 'scope': 'user.%s' % cls.user }).values()[0] path = protocol.pfn2path(pfn) dirs = os.path.dirname(path) if not os.path.exists(dirs): os.makedirs(dirs) shutil.copy('%s/data.raw' % prefix, path)
def test_exists_mgr_ok_single_pfn(self): """(RSE/PROTOCOLS): Check a single file on storage using PFN (Success)""" pfn = mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values()[0] mgr.exists(self.rse_settings, {'name': pfn})
def test_multi_get_mgr_ok(self): """(RSE/PROTOCOLS): Get multiple files from storage providing LFNs and PFNs (Success)""" pfn_b = mgr.lfns2pfns(self.rse_settings, { 'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user }).values()[0] status, details = mgr.download(self.rse_settings, [{ 'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, { 'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user }, { 'name': '3_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': self.static_file }, { 'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_b }], self.gettmpdir) if not (status and details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user] and details['user.%s:3_rse_remote_get.raw' % self.user] and details['user.%s:4_rse_remote_get.raw' % self.user]): raise Exception('Return not as expected: %s, %s' % (status, details))
def test_exists_mgr_false_single_pfn(self): """(RSE/PROTOCOLS): Check a single file on storage using PFN (Fail)""" pfn = mgr.lfns2pfns(self.rse_settings, { 'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user }).values()[0] not mgr.exists(self.rse_settings, {'name': pfn})
def get(self, rse, scheme=None): """ Return PFNs for a set of LFNs. Formatted as a JSON object where the key is a LFN and the value is the corresponding PFN. .. :quickref: Attributes; Translate LFNs to PFNs. :param rse: The RSE name. :param scheme: The protocol identifier. :query lfn: One or moref LFN to translate. :query scheme: Optional argument to help with the protocol selection (e.g., http / gsiftp / srm) :query domain: Optional argument used to select the protocol for wan or lan use cases. :query operation: Optional query argument to select the protoco for read-vs-writes. :resheader Content-Type: application/json :status 200: OK. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: RSE Protocol Not Supported. :status 404: RSE Protocol Domain Not Supported. :status 500: Internal Error. :returns: A list with detailed PFN information. """ lfns = [] scheme = request.get('scheme', None) domain = request.get('domain', 'wan') operation = request.get('operation', 'write') p_lfns = request.get('lfn', None) if p_lfns: info = p_lfns.split(":", 1) if len(info) != 2: return generate_http_error_flask(400, 'InvalidPath', 'LFN in invalid format') lfn_dict = {'scope': info[0], 'name': info[1]} lfns.append(lfn_dict) rse_settings = None try: rse_settings = get_rse_protocols( rse, issuer=request.environ.get('issuer')) except RSENotFound as error: return generate_http_error_flask(404, 'RSENotFound', error.args[0]) except RSEProtocolNotSupported as error: return generate_http_error_flask(404, 'RSEProtocolNotSupported', error.args[0]) except RSEProtocolDomainNotSupported as error: return generate_http_error_flask(404, 'RSEProtocolDomainNotSupported', error.args[0]) except Exception as error: print(error) print(format_exc()) return error, 500 pfns = rsemanager.lfns2pfns(rse_settings, lfns, operation=operation, scheme=scheme, domain=domain) return Response(dumps(pfns), content_type="application/json")
def get(self, rse, scheme=None): """ Return PFNs for a set of LFNs. Formatted as a JSON object where the key is a LFN and the value is the corresponding PFN. .. :quickref: Attributes; Translate LFNs to PFNs. :param rse: The RSE name. :param scheme: The protocol identifier. :query lfn: One or moref LFN to translate. :query scheme: Optional argument to help with the protocol selection (e.g., http / gsiftp / srm) :query domain: Optional argument used to select the protocol for wan or lan use cases. :query operation: Optional query argument to select the protoco for read-vs-writes. :resheader Content-Type: application/json :status 200: OK. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: RSE Protocol Not Supported. :status 404: RSE Protocol Domain Not Supported. :status 406: Not Acceptable. :status 500: Internal Error. :returns: A list with detailed PFN information. """ lfns = [] scheme = None domain = 'wan' operation = 'write' if request.query_string: for key, val in request.args.items(): if key == 'lfn': info = val.split(":", 1) if len(info) != 2: return generate_http_error_flask(400, 'InvalidPath', 'LFN in invalid format') lfn_dict = {'scope': info[0], 'name': info[1]} lfns.append(lfn_dict) elif key == 'scheme': scheme = val elif key == 'domain': domain = val elif key == 'operation': operation = val rse_settings = None try: rse_settings = get_rse_protocols(rse, issuer=request.environ.get('issuer'), vo=request.environ.get('vo')) except RSENotFound as error: return generate_http_error_flask(404, 'RSENotFound', error.args[0]) except RSEProtocolNotSupported as error: return generate_http_error_flask(404, 'RSEProtocolNotSupported', error.args[0]) except RSEProtocolDomainNotSupported as error: return generate_http_error_flask(404, 'RSEProtocolDomainNotSupported', error.args[0]) except Exception as error: logging.exception("Internal Error") return str(error), 500 pfns = rsemanager.lfns2pfns(rse_settings, lfns, operation=operation, scheme=scheme, domain=domain) return jsonify(pfns)
def setUpClass(cls): """SRM (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) with open('etc/rse_repository.json') as f: data = json.load(f) prefix = data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm'][ 'prefix'] hostname = data['FZK-LCG2_SCRATCHDISK']['protocols']['supported'][ 'srm']['hostname'] if hostname.count("://"): hostname = hostname.split("://")[1] if 'port' in data['FZK-LCG2_SCRATCHDISK']['protocols']['supported'][ 'srm'].keys(): port = int(data['FZK-LCG2_SCRATCHDISK']['protocols']['supported'] ['srm']['port']) else: port = 0 if 'extended_attributes' in data['FZK-LCG2_SCRATCHDISK']['protocols'][ 'supported']['srm'].keys() and 'web_service_path' in data[ 'FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm'][ 'extended_attributes'].keys(): web_service_path = data['FZK-LCG2_SCRATCHDISK']['protocols'][ 'supported']['srm']['extended_attributes']['web_service_path'] else: web_service_path = '' os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' % cls.tmpdir) if port > 0: cls.static_file = 'srm://%s:%s%s%s/data.raw' % ( hostname, port, web_service_path, prefix) else: cls.static_file = 'srm://%s%s%s/data.raw' % ( hostname, web_service_path, prefix) cmd = 'srmcp --debug=false -retry_num=0 file:///%s/data.raw %s' % ( cls.tmpdir, cls.static_file) execute(cmd) for f in MgrTestCases.files_remote: tmp = mgr.lfns2pfns(mgr.get_rse_info('FZK-LCG2_SCRATCHDISK'), { 'name': f, 'scope': 'user.%s' % cls.user }, scheme='srm').values()[0] cmd = 'srmcp --debug=false -retry_num=0 file:///%s/data.raw %s' % ( cls.tmpdir, tmp) execute(cmd)
def get(self, rse, scheme=None): """ Return PFNs for a set of LFNs. Formatted as a JSON object where the key is a LFN and the value is the corresponding PFN. .. :quickref: Attributes; Translate LFNs to PFNs. :param rse: The RSE name. :param scheme: The protocol identifier. :query lfn: One or more LFN to translate. :query scheme: Optional argument to help with the protocol selection (e.g., http / gsiftp / srm) :query domain: Optional argument used to select the protocol for wan or lan use cases. :query operation: Optional query argument to select the protoco for read-vs-writes. :resheader Content-Type: application/json :status 200: OK. :status 401: Invalid Auth Token. :status 404: RSE Not Found. :status 404: RSE Protocol Not Supported. :status 404: RSE Protocol Domain Not Supported. :status 406: Not Acceptable. :returns: A list with detailed PFN information. """ lfns = request.args.getlist('lfn') lfns = list(map(lambda lfn: lfn.split(":", 1), lfns)) if any(filter(lambda info: len(info) != 2, lfns)): invalid_lfns = ', '.join(filter(lambda info: len(info) != 2, lfns)) return generate_http_error_flask( 400, InvalidPath.__name__, 'LFN(s) in invalid format: ' + invalid_lfns) lfns = list(map(lambda info: { 'scope': info[0], 'name': info[1] }, lfns)) scheme = request.args.get('scheme', default=None) domain = request.args.get('domain', default='wan') operation = request.args.get('operation', default='write') try: rse_settings = get_rse_protocols( rse, issuer=request.environ.get('issuer'), vo=request.environ.get('vo')) except (RSENotFound, RSEProtocolNotSupported, RSEProtocolDomainNotSupported) as error: return generate_http_error_flask(404, error) pfns = rsemanager.lfns2pfns(rse_settings, lfns, operation=operation, scheme=scheme, domain=domain) if not pfns: return generate_http_error_flask(404, ReplicaNotFound.__name__, 'No replicas found') return jsonify(pfns)
def test_multi_get_mgr_ok(self): """(RSE/PROTOCOLS): Get multiple files from storage providing LFNs and PFNs (Success)""" pfn_b = mgr.lfns2pfns(self.rse_settings, {'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] status, details = mgr.download(self.rse_settings, [{'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': '2_rse_remote_get.raw', 'scope': 'user.%s' % self.user}, {'name': '3_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': self.static_file}, {'name': '4_rse_remote_get.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn_b}], self.tmpdir) if not (status and details['user.%s:1_rse_remote_get.raw' % self.user] and details['user.%s:2_rse_remote_get.raw' % self.user] and details['user.%s:3_rse_remote_get.raw' % self.user] and details['user.%s:4_rse_remote_get.raw' % self.user]): raise Exception('Return not as expected: %s, %s' % (status, details))
def setupClass(cls): """S3 (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() # cls.user = '******' # use again when latency issue with S3 storage is resolved with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) rse_tag = 'BNL-BOTO' rse_settings = mgr.get_rse_info(rse_tag) try: with open('etc/rse-accounts.cfg') as f: data = json.load(f) rse_settings['credentials'] = data[rse_tag] except KeyError: print 'No credentials found for this RSE.' pass for protocol in rse_settings['protocols']: if protocol['scheme'] == 's3': break conn = boto.connect_s3(host=protocol['hostname'], port=int(protocol.get('port', 80)), aws_access_key_id=rse_settings['credentials']['access_key'], aws_secret_access_key=rse_settings['credentials']['secret_key'], is_secure=rse_settings['credentials'].get('is_secure', False), calling_format=boto.s3.connection.OrdinaryCallingFormat()) cls.static_file = mgr.lfns2pfns(rse_settings, {'name': 'data.raw', 'scope': 'user.%s' % cls.user}).values()[0] key = get_bucket_key(cls.static_file, conn, create=True) key.set_contents_from_filename("%s/data.raw" % cls.tmpdir) for f in MgrTestCases.files_remote: pfn = mgr.lfns2pfns(rse_settings, {'name': f, 'scope': 'user.%s' % cls.user}).values()[0] bucket_name, key_name = get_bucket_key_name(pfn) key.copy(bucket_name, key_name)
def test_get_mgr_SourceNotFound_single_pfn(self): """(RSE/PROTOCOLS): Get a single file from storage providing PFN (SourceNotF ound)""" pfn = mgr.lfns2pfns(self.rse_settings, { 'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user }).values()[0] mgr.download( self.rse_settings, { 'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn }, self.gettmpdir)
def setupClass(cls): """WebDAV (RSE/PROTOCOLS): Creating necessary directories and files """ session = requests.Session() session.cert = os.getenv('X509_USER_PROXY') session.verify = False cls.site = 'FZK-LCG2_SCRATCHDISK' # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = '******' with open('etc/rse_repository.json') as f: data = json.load(f) scheme = data[cls.site]['protocols']['supported']['https']['scheme'] prefix = data[cls.site]['protocols']['supported']['https']['prefix'] hostname = data[ cls.site]['protocols']['supported']['https']['hostname'] port = data[cls.site]['protocols']['supported']['https']['port'] with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024) - 1) # 1 kB out.write('\0') for f in MgrTestCases.files_local: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) cls.static_file = '%s://%s:%s%sdata.raw' % (scheme, hostname, port, prefix) rse_settings = rsemanager.get_rse_info(cls.site) storage = rsemanager.create_protocol(rse_settings, operation='write', scheme='https') storage.connect() for f in MgrTestCases.files_remote: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) destfile = rsemanager.lfns2pfns(rse_settings, [ { 'name': f, 'scope': 'user.%s' % (cls.user) }, ], operation='write', scheme='https').values()[0] try: storage.put('%s/%s' % (cls.tmpdir, f), destfile) except FileReplicaAlreadyExists as e: print(e) with open('%s/data.raw' % cls.tmpdir, 'rb') as f_file: session.put(cls.static_file, data=f_file.read(), verify=False, allow_redirects=True)
def setupClass(cls): """S3ES (RSE/PROTOCOLS): Creating necessary directories and files """ cls.site = 'BNL-OSG2_ES' # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = '******' with open('etc/rse_repository.json') as f: data = json.load(f) scheme = data[cls.site]['protocols']['supported']['s3+https']['scheme'] prefix = data[cls.site]['protocols']['supported']['s3+https']['prefix'] hostname = data[cls.site]['protocols']['supported']['s3+https']['hostname'] port = data[cls.site]['protocols']['supported']['s3+https']['port'] with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024) - 1) # 1 kB out.write('\0') for f in MgrTestCases.files_local: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) cls.static_file = '%s://%s:%s/%s/user.%s/data.raw' % (scheme, hostname, port, prefix, cls.user) rse_settings = rsemanager.get_rse_info(cls.site) storage = rsemanager.create_protocol(rse_settings, operation='write', scheme='s3+https') storage.connect() for f in MgrTestCases.files_remote: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) destfile = rsemanager.lfns2pfns(rse_settings, [{'name': f, 'scope': 'user.%s' % (cls.user)}, ], operation='write', scheme='s3+https').values()[0] try: storage.put('%s/%s' % (cls.tmpdir, f), destfile) except FileReplicaAlreadyExists as e: print(e) f = 'data.raw' destfile = rsemanager.lfns2pfns(rse_settings, [{'name': f, 'scope': 'user.%s' % (cls.user)}, ], operation='write', scheme='s3+https').values()[0] try: storage.put('%s/%s' % (cls.tmpdir, f), destfile) except FileReplicaAlreadyExists as e: print(e)
def setUpClass(cls): """S3 (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() # cls.user = '******' # use again when latency issue with S3 storage is resolved with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) fnull = open(os.devnull, 'w') # Create test files on storage try: subprocess.call(["s3cmd", "mb", "s3://USER"], stdout=fnull, stderr=fnull, shell=False) subprocess.call(["s3cmd", "mb", "s3://GROUP"], stdout=fnull, stderr=fnull, shell=False) subprocess.call(["s3cmd", "mb", "s3://NONDETERMINISTIC"], stdout=fnull, stderr=fnull, shell=False) except S3Error: pass cls.static_file = 's3://NONDETERMINISTIC/data.raw' subprocess.call([ "s3cmd", "put", "%s/data.raw" % cls.tmpdir, cls.static_file, "--no-progress" ], stdout=fnull, stderr=fnull) for f in MgrTestCases.files_remote: subprocess.call([ "s3cmd", "cp", cls.static_file, mgr.lfns2pfns(mgr.get_rse_info('SWIFT'), { 'name': f, 'scope': 'user.%s' % cls.user }).values()[0] ], stdout=fnull, stderr=fnull) fnull.close()
def setupClass(cls): """SRM (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) with open('etc/rse_repository.json') as f: data = json.load(f) prefix = data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm']['prefix'] hostname = data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm']['hostname'] if hostname.count("://"): hostname = hostname.split("://")[1] if 'port' in data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm'].keys(): port = int(data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm']['port']) else: port = 0 if 'extended_attributes' in data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm'].keys() and 'web_service_path' in data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm']['extended_attributes'].keys(): web_service_path = data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm']['extended_attributes']['web_service_path'] else: web_service_path = '' os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' % cls.tmpdir) if port > 0: cls.static_file = 'srm://%s:%s%s%s/data.raw' % (hostname, port, web_service_path, prefix) else: cls.static_file = 'srm://%s%s%s/data.raw' % (hostname, web_service_path, prefix) cmd = 'srmcp --debug=false -retry_num=0 file:///%s/data.raw %s' % (cls.tmpdir, cls.static_file) execute(cmd) for f in MgrTestCases.files_remote: tmp = mgr.lfns2pfns(mgr.get_rse_info('FZK-LCG2_SCRATCHDISK'), {'name': f, 'scope': 'user.%s' % cls.user}, scheme='srm').values()[0] cmd = 'srmcp --debug=false -retry_num=0 file:///%s/data.raw %s' % (cls.tmpdir, tmp) execute(cmd)
def test_get_mgr_SourceNotFound_single_pfn(self): """(RSE/PROTOCOLS): Get a single file from storage providing PFN (SourceNotF ound)""" pfn = mgr.lfns2pfns(self.rse_settings, {'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user}).values()[0] mgr.download(self.rse_settings, {'name': 'not_existing_data.raw', 'scope': 'user.%s' % self.user, 'pfn': pfn}, self.tmpdir)
def test_exists_mgr_ok_single_pfn(self): """(RSE/PROTOCOLS): Check a single file on storage using PFN (Success)""" pfn = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] mgr.exists(self.rse_settings, {'name': pfn})
def _downloader(self, pfn, protocol, human, input_queue, output_queue, user_agent, threadnb, total_threads, trace_endpoint, trace_pattern, transfer_timeout=None): rse_dict = {} thread_prefix = 'Thread %s/%s' % (threadnb, total_threads) while True: try: file = input_queue.get_nowait() except Empty: return dest_dir = file['dest_dir'] file_scope = file['scope'] file_name = file['name'] file_didstr = '%s:%s' % (file_scope, file_name) # arguments for rsemgr.download already known dlfile = {} dlfile['name'] = file_name dlfile['scope'] = file_scope dlfile['adler32'] = file['adler32'] ignore_checksum = True if pfn else False if pfn: dlfile['pfn'] = pfn logger.info('%s : Starting the download of %s' % (thread_prefix, file_didstr)) trace = deepcopy(trace_pattern) trace.update({'scope': file_scope, 'filename': file_name, 'datasetScope': file['dataset_scope'], 'dataset': file['dataset_name'], 'filesize': file['bytes']}) rses = list(file['rses'].keys()) if rses == []: logger.warning('%s : File %s has no available replicas. Cannot be downloaded.' % (thread_prefix, file_didstr)) trace['clientState'] = 'FILE_NOT_FOUND' self.send_trace(trace, trace_endpoint, user_agent) input_queue.task_done() continue random.shuffle(rses) logger.debug('%s : Potential sources : %s' % (thread_prefix, str(rses))) success = False while not success and len(rses): rse_name = rses.pop() if rse_name not in rse_dict: try: rse_dict[rse_name] = rsemgr.get_rse_info(rse_name) except RSENotFound: logger.warning('%s : Could not get info of RSE %s' % (thread_prefix, rse_name)) continue rse = rse_dict[rse_name] if not rse['availability_read']: logger.info('%s : %s is blacklisted for reading' % (thread_prefix, rse_name)) continue try: if pfn: protocols = [rsemgr.select_protocol(rse, operation='read', scheme=pfn.split(':')[0])] else: protocols = rsemgr.get_protocols_ordered(rse, operation='read', scheme=protocol) protocols.reverse() except RSEProtocolNotSupported as error: logger.info('%s : The protocol specfied (%s) is not supported by %s' % (thread_prefix, protocol, rse_name)) logger.debug(error) continue logger.debug('%s : %d possible protocol(s) for read' % (thread_prefix, len(protocols))) trace['remoteSite'] = rse_name trace['clientState'] = 'DOWNLOAD_ATTEMPT' while not success and len(protocols): protocol_retry = protocols.pop() logger.debug('%s : Trying protocol %s at %s' % (thread_prefix, protocol_retry['scheme'], rse_name)) trace['protocol'] = protocol_retry['scheme'] out = {} out['dataset_scope'] = file['dataset_scope'] out['dataset_name'] = file['dataset_name'] out['scope'] = file_scope out['name'] = file_name attempt = 0 retries = 2 while not success and attempt < retries: attempt += 1 out['attemptnr'] = attempt logger.info('%s : File %s trying from %s' % (thread_prefix, file_didstr, rse_name)) try: trace['transferStart'] = time.time() rsemgr.download(rse, files=[dlfile], dest_dir=dest_dir, force_scheme=protocol_retry['scheme'], ignore_checksum=ignore_checksum, transfer_timeout=transfer_timeout) trace['transferEnd'] = time.time() trace['clientState'] = 'DONE' out['clientState'] = 'DONE' success = True output_queue.put(out) logger.info('%s : File %s successfully downloaded from %s' % (thread_prefix, file_didstr, rse_name)) except KeyboardInterrupt: logger.warning('You pressed Ctrl+C! Exiting gracefully') os.kill(os.getpgid(), signal.SIGINT) return except FileConsistencyMismatch as error: logger.warning(str(error)) try: pfns_dict = rsemgr.lfns2pfns(rse, lfns=[{'name': file_name, 'scope': file_scope}], operation='read', scheme=protocol) pfn = pfns_dict[file_didstr] out['clientState'] = 'CORRUPTED' out['pfn'] = pfn output_queue.put(out) except Exception as error: logger.debug('%s : %s' % (thread_prefix, str(error))) trace['clientState'] = 'FAIL_VALIDATE' logger.debug('%s : Failed attempt %s/%s' % (thread_prefix, attempt, retries)) except Exception as error: logger.warning(str(error)) trace['clientState'] = str(type(error).__name__) logger.debug('%s : Failed attempt %s/%s' % (thread_prefix, attempt, retries)) self.send_trace(trace, trace_endpoint, user_agent, threadnb=threadnb, total_threads=total_threads) if success: duration = round(trace['transferEnd'] - trace['transferStart'], 2) if pfn: logger.info('%s : File %s successfully downloaded in %s seconds' % (thread_prefix, file_didstr, duration)) else: logger.info('%s : File %s successfully downloaded. %s in %s seconds = %s MBps' % (thread_prefix, file_didstr, sizefmt(file['bytes'], human), duration, round((file['bytes'] / duration) * 1e-6, 2))) else: logger.error('%s : Cannot download file %s' % (thread_prefix, file_didstr)) input_queue.task_done()
def test_exists_mgr_false_single_pfn(self): """(RSE/PROTOCOLS): Check a single file on storage using PFN (Fail)""" pfn = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user}).values()[0] not mgr.exists(self.rse_settings, {'name': pfn})
def test_rename_mgr_FileReplicaAlreadyExists_single_pfn(self): """(RSE/PROTOCOLS): Rename a single file on storage using PFN (FileReplicaAlreadyExists)""" pfn = mgr.lfns2pfns(self.rse_settings, {'name': '12_rse_remote_rename.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_new = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_remote_get.raw', 'scope': 'user.%s' % self.user}).values()[0] mgr.rename(self.rse_settings, {'name': pfn, 'new_name': pfn_new})
def test_rename_mgr_SourceNotFound_single_pfn(self): """(RSE/PROTOCOLS): Rename a single file on storage using PFN (SourceNotFound)""" pfn = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_not_existing.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_new = mgr.lfns2pfns(self.rse_settings, {'name': '1_rse_new_not_created.raw', 'scope': 'user.%s' % self.user}).values()[0] mgr.rename(self.rse_settings, {'name': pfn, 'new_name': pfn_new})
def get_destinations(rse_info, scheme, req, sources): dsn = 'other' pfn = {} paths = {} if not rse_info['deterministic']: ts = time.time() # get rule scope and name if req['attributes']: if type(req['attributes']) is dict: req_attributes = json.loads(json.dumps(req['attributes'])) else: req_attributes = json.loads(str(req['attributes'])) if 'ds_name' in req_attributes: dsn = req_attributes["ds_name"] if dsn == 'other': # select a containing dataset for parent in did.list_parent_dids(req['scope'], req['name']): if parent['type'] == DIDType.DATASET: dsn = parent['name'] break record_timer('daemons.conveyor.submitter.list_parent_dids', (time.time() - ts) * 1000) # always use SRM ts = time.time() nondet = rsemgr.create_protocol(rse_info, 'write', scheme='srm') record_timer('daemons.conveyor.submitter.create_protocol', (time.time() - ts) * 1000) # if there exists a prefix for SRM, use it prefix = '' for s in rse_info['protocols']: if s['scheme'] == 'srm': prefix = s['prefix'] # DQ2 path always starts with /, but prefix might not end with / path = construct_surl_DQ2(dsn, req['name']) # retrial transfers to tape need a new filename - add timestamp if req['request_type'] == RequestType.TRANSFER\ and 'previous_attempt_id' in req\ and req['previous_attempt_id']\ and rse_info['rse_type'] == 'TAPE': # TODO: RUCIO-809 - rsemanager: get_rse_info -> rse_type is string instead of RSEType path = '%s_%i' % (path, int(time.time())) logging.debug('Retrial transfer request %s DID %s:%s to tape %s renamed to %s' % (req['request_id'], req['scope'], req['name'], rse_info['rse'], path)) tmp_path = '%s%s' % (prefix[:-1], path) if prefix[-1] != '/': tmp_path = '%s%s' % (prefix, path) paths[req['scope'], req['name']] = path # add the hostname pfn['%s:%s' % (req['scope'], req['name'])] = nondet.path2pfn(tmp_path) if req['request_type'] == RequestType.STAGEIN: if len(sources) == 1: pfn['%s:%s' % (req['scope'], req['name'])] = sources[0][1] else: # TODO: need to check return None, None # we must set the destination path for nondeterministic replicas explicitly replica.update_replicas_paths([{'scope': req['scope'], 'name': req['name'], 'rse_id': req['dest_rse_id'], 'path': path}]) else: ts = time.time() try: pfn = rsemgr.lfns2pfns(rse_info, lfns=[{'scope': req['scope'], 'name': req['name']}], scheme=scheme) except RSEProtocolNotSupported: logging.warn('%s not supported by %s' % (scheme, rse_info['rse'])) return None, None record_timer('daemons.conveyor.submitter.lfns2pfns', (time.time() - ts) * 1000) destinations = [] for k in pfn: if isinstance(pfn[k], (str, unicode)): destinations.append(pfn[k]) elif isinstance(pfn[k], (tuple, list)): for url in pfn[k]: destinations.append(pfn[k][url]) protocols = None try: protocols = rsemgr.select_protocol(rse_info, 'write', scheme=scheme) except RSEProtocolNotSupported: logging.warn('%s not supported by %s' % (scheme, rse_info['rse'])) return None, None # we need to set the spacetoken if we use SRM dest_spacetoken = None if scheme == 'srm': dest_spacetoken = protocols['extended_attributes']['space_token'] return destinations, dest_spacetoken
def reaper(rses, worker_number=1, child_number=1, total_children=1, chunk_size=100, once=False, greedy=False, scheme=None, exclude_rses=None, delay_seconds=0): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param worker_number: The worker number. :param child_number: The child number. :param total_children: The total number of children created per worker. :param chunk_size: the size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param scheme: Force the reaper to use a particular protocol, e.g., mock. :param exclude_rses: RSE expression to exclude RSEs from the Reaper. """ logging.info('Starting reaper: worker %(worker_number)s, child %(child_number)s' % locals()) while not graceful_stop.is_set(): try: max_deleting_rate = 0 for rse in rses: deleting_rate = 0 rse_info = rsemgr.get_rse_info(rse['rse']) rse_protocol = rse_core.get_rse_protocols(rse['rse']) if not rse_protocol['availability_delete']: logging.info('Reaper %s-%s: RSE %s is not available for deletion' % (worker_number, child_number, rse_info['rse'])) continue # Temporary hack to force gfal for deletion for protocol in rse_info['protocols']: if protocol['impl'] == 'rucio.rse.protocols.srm.Default' or protocol['impl'] == 'rucio.rse.protocols.gsiftp.Default': protocol['impl'] = 'rucio.rse.protocols.gfal.Default' logging.info('Reaper %s-%s: Running on RSE %s' % (worker_number, child_number, rse_info['rse'])) try: needed_free_space, max_being_deleted_files = None, 10000 if not greedy: max_being_deleted_files, needed_free_space, used, free = __check_rse_usage(rse=rse['rse'], rse_id=rse['id']) logging.info('Reaper %(worker_number)s-%(child_number)s: Space usage for RSE %(rse)s - max_being_deleted_files: %(max_being_deleted_files)s, needed_free_space: %(needed_free_space)s, used: %(used)s, free: %(free)s' % locals()) if needed_free_space <= 0: needed_free_space = 0 logging.info('Reaper %s-%s: free space is above minimum limit for %s' % (worker_number, child_number, rse['rse'])) s = time.time() with monitor.record_timer_block('reaper.list_unlocked_replicas'): replicas = list_unlocked_replicas(rse=rse['rse'], bytes=needed_free_space, limit=max_being_deleted_files, worker_number=child_number, total_workers=total_children, delay_seconds=delay_seconds) logging.debug('Reaper %s-%s: list_unlocked_replicas %s %s %s' % (worker_number, child_number, rse['rse'], time.time() - s, len(replicas))) if not replicas: logging.info('Reaper %s-%s: nothing to do for %s' % (worker_number, child_number, rse['rse'])) continue p = rsemgr.create_protocol(rse_info, 'delete', scheme=None) for files in chunks(replicas, chunk_size): logging.debug('Reaper %s-%s: Running on : %s' % (worker_number, child_number, str(files))) try: s = time.time() update_replicas_states(replicas=[dict(replica.items() + [('state', ReplicaState.BEING_DELETED), ('rse_id', rse['id'])]) for replica in files]) for replica in files: try: replica['pfn'] = str(rsemgr.lfns2pfns(rse_settings=rse_info, lfns=[{'scope': replica['scope'], 'name': replica['name']}, ], operation='delete').values()[0]) except ReplicaUnAvailable as e: err_msg = 'Failed to get pfn UNAVAILABLE replica %s:%s on %s with error %s' % (replica['scope'], replica['name'], rse['rse'], str(e)) logging.warning('Reaper %s-%s: %s' % (worker_number, child_number, err_msg)) replica['pfn'] = None add_message('deletion-planned', {'scope': replica['scope'], 'name': replica['name'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'rse': rse_info['rse']}) # logging.debug('update_replicas_states %s' % (time.time() - s)) monitor.record_counter(counters='reaper.deletion.being_deleted', delta=len(files)) if not scheme: try: deleted_files = [] p.connect() for replica in files: try: logging.info('Reaper %s-%s: Deletion ATTEMPT of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])) s = time.time() if rse['staging_area'] or rse['rse'].endswith("STAGING"): logging.warning('Reaper %s-%s: Deletion STAGING of %s:%s as %s on %s, will only delete the catalog and not do physical deletion' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])) else: if replica['pfn']: p.delete(replica['pfn']) else: logging.warning('Reaper %s-%s: Deletion UNAVAILABLE of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])) monitor.record_timer('daemons.reaper.delete.%s.%s' % (p.attributes['scheme'], rse['rse']), (time.time()-s)*1000) duration = time.time() - s deleted_files.append({'scope': replica['scope'], 'name': replica['name']}) add_message('deletion-done', {'scope': replica['scope'], 'name': replica['name'], 'rse': rse_info['rse'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'duration': duration}) logging.info('Reaper %s-%s: Deletion SUCCESS of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])) except SourceNotFound: err_msg = 'Reaper %s-%s: Deletion NOTFOUND of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']) logging.warning(err_msg) deleted_files.append({'scope': replica['scope'], 'name': replica['name']}) add_message('deletion-failed', {'scope': replica['scope'], 'name': replica['name'], 'rse': rse_info['rse'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'reason': err_msg}) except (ServiceUnavailable, RSEAccessDenied) as e: logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(e))) add_message('deletion-failed', {'scope': replica['scope'], 'name': replica['name'], 'rse': rse_info['rse'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'reason': str(e)}) except Exception as e: logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc()))) add_message('deletion-failed', {'scope': replica['scope'], 'name': replica['name'], 'rse': rse_info['rse'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'reason': str(e)}) except: logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc()))) except (ServiceUnavailable, RSEAccessDenied) as e: for replica in files: logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(e))) add_message('deletion-failed', {'scope': replica['scope'], 'name': replica['name'], 'rse': rse_info['rse'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'reason': str(e)}) finally: p.close() s = time.time() with monitor.record_timer_block('reaper.delete_replicas'): delete_replicas(rse=rse['rse'], files=deleted_files) logging.debug('Reaper %s-%s: delete_replicas successes %s %s %s' % (worker_number, child_number, rse['rse'], len(deleted_files), time.time() - s)) monitor.record_counter(counters='reaper.deletion.done', delta=len(deleted_files)) deleting_rate += len(deleted_files) except: logging.critical(traceback.format_exc()) deleting_rate = deleting_rate * 1.0 / max_being_deleted_files if deleting_rate > max_deleting_rate: max_deleting_rate = deleting_rate except: logging.critical(traceback.format_exc()) if once: break logging.info(" Reaper %s-%s: max_deleting_rate: %s " % (worker_number, child_number, max_deleting_rate)) sleep_time = int((1 - max_deleting_rate) * 60 + 1) time.sleep(sleep_time) except: logging.critical(traceback.format_exc()) logging.info('Graceful stop requested') logging.info('Graceful stop done')
def reaper(rses, worker_number=0, child_number=0, total_children=1, chunk_size=100, once=False, greedy=False, scheme=None, delay_seconds=0): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param worker_number: The worker number. :param child_number: The child number. :param total_children: The total number of children created per worker. :param chunk_size: the size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param scheme: Force the reaper to use a particular protocol, e.g., mock. """ logging.info('Starting Reaper: Worker %(worker_number)s, ' 'child %(child_number)s will work on RSEs: ' % locals() + ', '.join([rse['rse'] for rse in rses])) pid = os.getpid() thread = threading.current_thread() hostname = socket.gethostname() executable = ' '.join(sys.argv) # Generate a hash just for the subset of RSEs rse_names = [rse['rse'] for rse in rses] hash_executable = hashlib.sha256((sys.argv[0] + ''.join(rse_names)).encode()).hexdigest() sanity_check(executable=None, hostname=hostname) nothing_to_do = {} while not GRACEFUL_STOP.is_set(): try: # heartbeat heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) checkpoint_time = datetime.datetime.now() # logging.info('Reaper({0[worker_number]}/{0[child_number]}): Live gives {0[heartbeat]}'.format(locals())) max_deleting_rate = 0 for rse in sort_rses(rses): try: if checkpoint_time + datetime.timedelta(minutes=1) < datetime.datetime.now(): heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) # logging.info('Reaper({0[worker_number]}/{0[child_number]}): Live gives {0[heartbeat]}'.format(locals())) checkpoint_time = datetime.datetime.now() if rse['id'] in nothing_to_do and nothing_to_do[rse['id']] > datetime.datetime.now(): continue logging.info('Reaper %s-%s: Running on RSE %s %s', worker_number, child_number, rse['rse'], nothing_to_do.get(rse['id'])) rse_info = rsemgr.get_rse_info(rse_id=rse['id']) rse_protocol = rse_core.get_rse_protocols(rse_id=rse['id']) if not rse_protocol['availability_delete']: logging.info('Reaper %s-%s: RSE %s is not available for deletion', worker_number, child_number, rse_info['rse']) nothing_to_do[rse['id']] = datetime.datetime.now() + datetime.timedelta(minutes=30) continue # Temporary hack to force gfal for deletion for protocol in rse_info['protocols']: if protocol['impl'] == 'rucio.rse.protocols.srm.Default' or protocol['impl'] == 'rucio.rse.protocols.gsiftp.Default': protocol['impl'] = 'rucio.rse.protocols.gfal.Default' needed_free_space, max_being_deleted_files = None, 100 needed_free_space_per_child = None if not greedy: max_being_deleted_files, needed_free_space, used, free = __check_rse_usage(rse_id=rse['id']) logging.info('Reaper %(worker_number)s-%(child_number)s: Space usage for RSE %(rse)s - max_being_deleted_files: %(max_being_deleted_files)s, needed_free_space: %(needed_free_space)s, used: %(used)s, free: %(free)s' % locals()) if needed_free_space <= 0: needed_free_space, needed_free_space_per_child = 0, 0 logging.info('Reaper %s-%s: free space is above minimum limit for %s', worker_number, child_number, rse['rse']) else: if total_children and total_children > 0: needed_free_space_per_child = needed_free_space / float(total_children) start = time.time() with monitor.record_timer_block('reaper.list_unlocked_replicas'): replicas = list_unlocked_replicas(rse_id=rse['id'], bytes=needed_free_space_per_child, limit=max_being_deleted_files, worker_number=child_number, total_workers=total_children, delay_seconds=delay_seconds) logging.debug('Reaper %s-%s: list_unlocked_replicas on %s for %s bytes in %s seconds: %s replicas', worker_number, child_number, rse['rse'], needed_free_space_per_child, time.time() - start, len(replicas)) if not replicas: nothing_to_do[rse['id']] = datetime.datetime.now() + datetime.timedelta(minutes=30) logging.info('Reaper %s-%s: No replicas to delete %s. The next check will occur at %s', worker_number, child_number, rse['rse'], nothing_to_do[rse['id']]) continue prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme) for files in chunks(replicas, chunk_size): logging.debug('Reaper %s-%s: Running on : %s', worker_number, child_number, str(files)) try: update_replicas_states(replicas=[dict(list(replica.items()) + [('state', ReplicaState.BEING_DELETED), ('rse_id', rse['id'])]) for replica in files], nowait=True) for replica in files: try: replica['pfn'] = str(list(rsemgr.lfns2pfns(rse_settings=rse_info, lfns=[{'scope': replica['scope'].external, 'name': replica['name'], 'path': replica['path']}], operation='delete', scheme=scheme).values())[0]) except (ReplicaUnAvailable, ReplicaNotFound) as error: err_msg = 'Failed to get pfn UNAVAILABLE replica %s:%s on %s with error %s' % (replica['scope'], replica['name'], rse['rse'], str(error)) logging.warning('Reaper %s-%s: %s', worker_number, child_number, err_msg) replica['pfn'] = None monitor.record_counter(counters='reaper.deletion.being_deleted', delta=len(files)) try: deleted_files = [] prot.connect() for replica in files: try: deletion_dict = {'scope': replica['scope'].external, 'name': replica['name'], 'rse': rse_info['rse'], 'rse_id': rse_info['id'], 'file-size': replica['bytes'], 'bytes': replica['bytes'], 'url': replica['pfn'], 'protocol': prot.attributes['scheme']} if replica['scope'].vo != 'def': deletion_dict['vo'] = replica['scope'].vo logging.info('Reaper %s-%s: Deletion ATTEMPT of %s:%s as %s on %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']) start = time.time() if rse['staging_area'] or rse['rse'].endswith("STAGING"): logging.warning('Reaper %s-%s: Deletion STAGING of %s:%s as %s on %s, will only delete the catalog and not do physical deletion', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']) else: if replica['pfn']: pfn = replica['pfn'] # sign the URL if necessary if prot.attributes['scheme'] == 'https' and rse_info['sign_url'] is not None: pfn = get_signed_url(rse['id'], rse_info['sign_url'], 'delete', pfn) prot.delete(pfn) else: logging.warning('Reaper %s-%s: Deletion UNAVAILABLE of %s:%s as %s on %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']) monitor.record_timer('daemons.reaper.delete.%s.%s' % (prot.attributes['scheme'], rse['rse']), (time.time() - start) * 1000) duration = time.time() - start deleted_files.append({'scope': replica['scope'], 'name': replica['name']}) deletion_dict['duration'] = duration add_message('deletion-done', deletion_dict) logging.info('Reaper %s-%s: Deletion SUCCESS of %s:%s as %s on %s in %s seconds', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], duration) except SourceNotFound: err_msg = 'Deletion NOTFOUND of %s:%s as %s on %s' % (replica['scope'], replica['name'], replica['pfn'], rse['rse']) logging.warning(err_msg) deleted_files.append({'scope': replica['scope'], 'name': replica['name']}) if replica['state'] == ReplicaState.AVAILABLE: deletion_dict['reason'] = str(err_msg) add_message('deletion-failed', deletion_dict) except (ServiceUnavailable, RSEAccessDenied, ResourceTemporaryUnavailable) as error: logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(error)) deletion_dict['reason'] = str(error) add_message('deletion-failed', deletion_dict) except Exception as error: logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc())) deletion_dict['reason'] = str(error) add_message('deletion-failed', deletion_dict) except: logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc())) except (ServiceUnavailable, RSEAccessDenied, ResourceTemporaryUnavailable) as error: for replica in files: logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s', worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(error)) payload = {'scope': replica['scope'].external, 'name': replica['name'], 'rse': rse_info['rse'], 'rse_id': rse_info['id'], 'file-size': replica['bytes'], 'bytes': replica['bytes'], 'url': replica['pfn'], 'reason': str(error), 'protocol': prot.attributes['scheme']} if replica['scope'].vo != 'def': deletion_dict['vo'] = replica['scope'].vo add_message('deletion-failed', payload) break finally: prot.close() start = time.time() with monitor.record_timer_block('reaper.delete_replicas'): delete_replicas(rse_id=rse['id'], files=deleted_files) logging.debug('Reaper %s-%s: delete_replicas successes %s %s %s', worker_number, child_number, rse['rse'], len(deleted_files), time.time() - start) monitor.record_counter(counters='reaper.deletion.done', delta=len(deleted_files)) except DatabaseException as error: logging.warning('Reaper %s-%s: DatabaseException %s', worker_number, child_number, str(error)) except UnsupportedOperation as error: logging.warning('Reaper %s-%s: UnsupportedOperation %s', worker_number, child_number, str(error)) except: logging.critical(traceback.format_exc()) except RSENotFound as error: logging.warning('Reaper %s-%s: RSE not found %s', worker_number, child_number, str(error)) except: logging.critical(traceback.format_exc()) if once: break time.sleep(1) except DatabaseException as error: logging.warning('Reaper: %s', str(error)) except: logging.critical(traceback.format_exc()) die(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) logging.info('Graceful stop requested') logging.info('Graceful stop done') return
def request_transfer(loop=1, src=None, dst=None, upload=False, same_src=False, same_dst=False): """ Main loop to request a new transfer. """ logging.info('request: starting') session = get_session() src_rse = generate_rse( src, ''.join(random.sample(string.ascii_letters.upper(), 8))) dst_rse = generate_rse( dst, ''.join(random.sample(string.ascii_letters.upper(), 8))) logging.info('request: started') i = 0 while not graceful_stop.is_set(): if i >= loop: return try: if not same_src: src_rse = generate_rse( src, ''.join(random.sample(string.ascii_letters.upper(), 8))) if not same_dst: dst_rse = generate_rse( dst, ''.join(random.sample(string.ascii_letters.upper(), 8))) tmp_name = generate_uuid() # add a new dataset scope = InternalScope('mock') account = InternalAccount('root') did.add_did(scope=scope, name='dataset-%s' % tmp_name, type=DIDType.DATASET, account=account, session=session) # construct PFN pfn = rsemanager.lfns2pfns(src_rse, lfns=[{ 'scope': scope.external, 'name': 'file-%s' % tmp_name }])['%s:file-%s' % (scope.external, tmp_name)] if upload: # create the directories if needed p = rsemanager.create_protocol(src_rse, operation='write', scheme='srm') p.connect() try: p.mkdir(pfn) except: pass # upload the test file try: fp = os.path.dirname(config_get('injector', 'file')) fn = os.path.basename(config_get('injector', 'file')) p.put(fn, pfn, source_dir=fp) except: logging.critical( 'Could not upload, removing temporary DID: %s' % str(sys.exc_info())) did.delete_dids([{ 'scope': scope, 'name': 'dataset-%s' % tmp_name }], account=account, session=session) break # add the replica replica.add_replica(rse_id=src_rse['id'], scope=scope, name='file-%s' % tmp_name, bytes=config_get_int('injector', 'bytes'), adler32=config_get('injector', 'adler32'), md5=config_get('injector', 'md5'), account=account, session=session) logging.info('added replica on %s for DID mock:%s' % (src_rse['rse'], tmp_name)) # to the dataset did.attach_dids(scope=scope, name='dataset-%s' % tmp_name, dids=[{ 'scope': scope, 'name': 'file-%s' % tmp_name, 'bytes': config_get('injector', 'bytes') }], account=account, session=session) # add rule for the dataset rule.add_rule(dids=[{ 'scope': scope, 'name': 'dataset-%s' % tmp_name }], account=account, copies=1, rse_expression=dst_rse['rse'], grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='mock-injector', session=session) logging.info('added rule for %s for DID %s:%s' % (dst_rse['rse'], scope, tmp_name)) session.commit() except: session.rollback() logging.critical(traceback.format_exc()) i += 1 logging.info('request: graceful stop requested') logging.info('request: graceful stop done')
def GET(self, rse, scheme=None): """ Return PFNs for a set of LFNs. Formatted as a JSON object where the key is a LFN and the value is the corresponding PFN. - One or more LFN should be passed as the LFN arguments. - A URL scheme (e.g., http / gsiftp / srm) can be passed to help with protocol selection using the `scheme` query argument. - The `domain` query argument is used to select protocol for wan or lan use cases. - The `operation` query argument is used to select the protocol for read-vs-writes. The `scheme`, `domain`, and `operation` options help with the selection of the protocol, in case if that affects the possible PFN generation. HTTP Success: 200 OK HTTP Error: 400 LFN parameter(s) malformed 404 Resource not Found 406 Not Acceptable 500 InternalError :returns: A list with detailed PFN information. """ header('Content-Type', 'application/json') lfns = [] scheme = None domain = 'wan' operation = 'write' if ctx.query: params = parse_qsl(ctx.query[1:]) for key, val in params: if key == 'lfn': info = val.split(":", 1) if len(info) != 2: raise generate_http_error(400, 'InvalidPath', 'LFN in invalid format') lfn_dict = {'scope': info[0], 'name': info[1]} lfns.append(lfn_dict) elif key == 'scheme': scheme = val elif key == 'domain': domain = val elif key == 'operation': operation = val rse_settings = None try: rse_settings = get_rse_protocols(rse, issuer=ctx.env.get('issuer'), vo=ctx.env.get('vo')) except RSENotFound as error: raise generate_http_error(404, 'RSENotFound', error.args[0]) except RSEProtocolNotSupported as error: raise generate_http_error(404, 'RSEProtocolNotSupported', error.args[0]) except RSEProtocolDomainNotSupported as error: raise generate_http_error(404, 'RSEProtocolDomainNotSupported', error.args[0]) except Exception as error: print(error) print(format_exc()) raise InternalError(error) pfns = rsemanager.lfns2pfns(rse_settings, lfns, operation=operation, scheme=scheme, domain=domain) return dumps(pfns)
def reaper(rses, chunk_size=100, once=False, greedy=False, scheme=None, delay_seconds=0, sleep_time=60): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param chunk_size: The size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param scheme: Force the reaper to use a particular protocol, e.g., mock. :param delay_seconds: The delay to query replicas in BEING_DELETED state. :param sleep_time: Time between two cycles. """ try: max_deletion_thread = get('reaper', 'nb_workers_by_hostname') except ConfigNotFound as error: max_deletion_thread = 5 hostname = socket.getfqdn() executable = sys.argv[0] pid = os.getpid() hb_thread = threading.current_thread() sanity_check(executable=executable, hostname=hostname) heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1, heart_beat['nr_threads']) logging.info('%s Reaper starting', prepend_str) time.sleep( 10 ) # To prevent running on the same partition if all the reapers restart at the same time heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1, heart_beat['nr_threads']) logging.info('%s Reaper started', prepend_str) while not GRACEFUL_STOP.is_set(): start_time = time.time() try: staging_areas = [] dict_rses = {} heart_beat = live(executable, hostname, pid, hb_thread, older_than=3600) prepend_str = 'Thread [%i/%i] : ' % (heart_beat['assign_thread'] + 1, heart_beat['nr_threads']) tot_needed_free_space = 0 for rse in rses: # Check if the RSE is a staging area if rse['staging_area']: staging_areas.append(rse['rse']) # Check if RSE is blacklisted if rse['availability'] % 2 == 0: logging.debug('%s RSE %s is blacklisted for delete', prepend_str, rse['rse']) continue max_being_deleted_files, needed_free_space, used, free = __check_rse_usage( rse['rse'], rse['id'], prepend_str) # Check if greedy mode if greedy: dict_rses[(rse['rse'], rse['id'])] = [ 1000000000000, max_being_deleted_files ] tot_needed_free_space += 1000000000000 else: if needed_free_space: dict_rses[(rse['rse'], rse['id'])] = [ needed_free_space, max_being_deleted_files ] tot_needed_free_space += needed_free_space else: logging.debug('%s Nothing to delete on %s', prepend_str, rse['rse']) # Ordering the RSEs based on the needed free space sorted_dict_rses = OrderedDict( sorted(dict_rses.items(), key=itemgetter(1), reverse=True)) logging.debug( '%s List of RSEs to process ordered by needed space desc : %s', prepend_str, str(sorted_dict_rses)) # Get the mapping between the RSE and the hostname used for deletion. The dictionary has RSE as key and (hostanme, rse_info) as value rses_hostname_mapping = get_rses_to_hostname_mapping() # logging.debug('%s Mapping RSEs to hostnames used for deletion : %s', prepend_str, str(rses_hostname_mapping)) list_rses_mult = [] # Loop over the RSEs. rse_key = (rse, rse_id) and fill list_rses_mult that contains all RSEs to process with different multiplicity for rse_key in dict_rses: rse_name, rse_id = rse_key # The length of the deletion queue scales inversily with the number of workers # The ceil increase the weight of the RSE with small amount of files to delete max_workers = ceil(dict_rses[rse_key][0] / tot_needed_free_space * 1000 / heart_beat['nr_threads']) list_rses_mult.extend([ (rse_name, rse_id, dict_rses[rse_key][0], dict_rses[rse_key][1]) for _ in range(int(max_workers)) ]) random.shuffle(list_rses_mult) skip_until_next_run = [] for rse_name, rse_id, needed_free_space, max_being_deleted_files in list_rses_mult: if rse_id in skip_until_next_run: continue logging.debug( '%s Working on %s. Percentage of the total space needed %.2f', prepend_str, rse_name, needed_free_space / tot_needed_free_space * 100) rse_hostname, rse_info = rses_hostname_mapping[rse_id] rse_hostname_key = '%s,%s' % (rse_id, rse_hostname) payload_cnt = list_payload_counts(executable, older_than=600, hash_executable=None, session=None) # logging.debug('%s Payload count : %s', prepend_str, str(payload_cnt)) tot_threads_for_hostname = 0 tot_threads_for_rse = 0 for key in payload_cnt: if key and key.find(',') > -1: if key.split(',')[1] == rse_hostname: tot_threads_for_hostname += payload_cnt[key] if key.split(',')[0] == str(rse_id): tot_threads_for_rse += payload_cnt[key] if rse_hostname_key in payload_cnt and tot_threads_for_hostname >= max_deletion_thread: logging.debug( '%s Too many deletion threads for %s on RSE %s. Back off', prepend_str, rse_hostname, rse_name) # Might need to reschedule a try on this RSE later in the same cycle continue logging.info( '%s Nb workers on %s smaller than the limit (current %i vs max %i). Starting new worker on RSE %s', prepend_str, rse_hostname, tot_threads_for_hostname, max_deletion_thread, rse_name) live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) logging.debug('%s Total deletion workers for %s : %i', prepend_str, rse_hostname, tot_threads_for_hostname + 1) # List and mark BEING_DELETED the files to delete del_start_time = time.time() try: with monitor.record_timer_block( 'reaper.list_unlocked_replicas'): replicas = list_and_mark_unlocked_replicas( limit=chunk_size, bytes=needed_free_space, rse_id=rse_id, delay_seconds=delay_seconds, session=None) logging.debug( '%s list_and_mark_unlocked_replicas on %s for %s bytes in %s seconds: %s replicas', prepend_str, rse_name, needed_free_space, time.time() - del_start_time, len(replicas)) if len(replicas) < chunk_size: logging.info( '%s Not enough replicas to delete on %s (%s requested vs %s returned). Will skip any new attempts on this RSE until next cycle', prepend_str, rse_name, chunk_size, len(replicas)) skip_until_next_run.append(rse_id) except (DatabaseException, IntegrityError, DatabaseError) as error: logging.error('%s %s', prepend_str, str(error)) continue except Exception: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) # Physical deletion will take place there try: prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme) for file_replicas in chunks(replicas, 100): # Refresh heartbeat live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) del_start_time = time.time() for replica in file_replicas: try: replica['pfn'] = str( rsemgr.lfns2pfns( rse_settings=rse_info, lfns=[{ 'scope': replica['scope'], 'name': replica['name'], 'path': replica['path'] }], operation='delete', scheme=scheme).values()[0]) time.sleep(random.uniform(0, 0.01)) except (ReplicaUnAvailable, ReplicaNotFound) as error: logging.warning( '%s Failed get pfn UNAVAILABLE replica %s:%s on %s with error %s', prepend_str, replica['scope'], replica['name'], rse_name, str(error)) replica['pfn'] = None except Exception: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) deleted_files = delete_from_storage( file_replicas, prot, rse_info, staging_areas, prepend_str) logging.info('%s %i files processed in %s seconds', prepend_str, len(file_replicas), time.time() - del_start_time) # Then finally delete the replicas del_start = time.time() with monitor.record_timer_block( 'reaper.delete_replicas'): delete_replicas(rse_id=rse_id, files=deleted_files) logging.debug( '%s delete_replicas successed on %s : %s replicas in %s seconds', prepend_str, rse_name, len(deleted_files), time.time() - del_start) monitor.record_counter(counters='reaper.deletion.done', delta=len(deleted_files)) except Exception as error: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) if once: break tottime = time.time() - start_time if tottime < sleep_time: logging.info('%s Will sleep for %s seconds', prepend_str, sleep_time - tottime) time.sleep(sleep_time - tottime) except DatabaseException as error: logging.warning('%s Reaper: %s', prepend_str, str(error)) except Exception: logging.critical('%s %s', prepend_str, str(traceback.format_exc())) finally: if once: break die(executable=executable, hostname=hostname, pid=pid, thread=hb_thread) logging.info('%s Graceful stop requested', prepend_str) logging.info('%s Graceful stop done', prepend_str) return
def test_change_scope_mgr_ok_single_pfn(self): """(RSE/PROTOCOLS): Change the scope of a single file on storage using PFN (Success)""" pfn = mgr.lfns2pfns(self.rse_settings, {'name': '2_rse_remote_change_scope.raw', 'scope': 'user.%s' % self.user}).values()[0] pfn_new = mgr.lfns2pfns(self.rse_settings, {'name': '2_rse_remote_change_scope.raw', 'scope': 'group.%s' % self.user}).values()[0] mgr.rename(self.rse_settings, {'name': pfn, 'new_name': pfn_new})
def reaper(rses, include_rses, exclude_rses, vos=None, chunk_size=100, once=False, greedy=False, scheme=None, delay_seconds=0, sleep_time=60, auto_exclude_threshold=100, auto_exclude_timeout=600): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param include_rses: RSE expression to include RSEs. :param exclude_rses: RSE expression to exclude RSEs from the Reaper. :param vos: VOs on which to look for RSEs. Only used in multi-VO mode. If None, we either use all VOs if run from "def", or the current VO otherwise. :param chunk_size: The size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param scheme: Force the reaper to use a particular protocol, e.g., mock. :param delay_seconds: The delay to query replicas in BEING_DELETED state. :param sleep_time: Time between two cycles. :param auto_exclude_threshold: Number of service unavailable exceptions after which the RSE gets temporarily excluded. :param auto_exclude_timeout: Timeout for temporarily excluded RSEs. """ hostname = socket.getfqdn() executable = 'reaper' pid = os.getpid() hb_thread = threading.current_thread() sanity_check(executable=executable, hostname=hostname) heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') logger(logging.INFO, 'Reaper starting') if not once: GRACEFUL_STOP.wait( 10 ) # To prevent running on the same partition if all the reapers restart at the same time heart_beat = live(executable, hostname, pid, hb_thread) prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') logger(logging.INFO, 'Reaper started') while not GRACEFUL_STOP.is_set(): # try to get auto exclude parameters from the config table. Otherwise use CLI parameters. try: auto_exclude_threshold = config_get('reaper', 'auto_exclude_threshold', default=auto_exclude_threshold) auto_exclude_timeout = config_get('reaper', 'auto_exclude_timeout', default=auto_exclude_timeout) except (NoOptionError, NoSectionError, RuntimeError): pass # Check if there is a Judge Evaluator backlog try: max_evaluator_backlog_count = config_get( 'reaper', 'max_evaluator_backlog_count') except (NoOptionError, NoSectionError, RuntimeError): max_evaluator_backlog_count = None try: max_evaluator_backlog_duration = config_get( 'reaper', 'max_evaluator_backlog_duration') except (NoOptionError, NoSectionError, RuntimeError): max_evaluator_backlog_duration = None if max_evaluator_backlog_count or max_evaluator_backlog_duration: backlog = get_evaluation_backlog() if max_evaluator_backlog_count and \ backlog[0] and \ max_evaluator_backlog_duration and \ backlog[1] and \ backlog[0] > max_evaluator_backlog_count and \ backlog[1] < datetime.utcnow() - timedelta(minutes=max_evaluator_backlog_duration): logger( logging.ERROR, 'Reaper: Judge evaluator backlog count and duration hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_count and backlog[ 0] and backlog[0] > max_evaluator_backlog_count: logger( logging.ERROR, 'Reaper: Judge evaluator backlog count hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue elif max_evaluator_backlog_duration and backlog[ 1] and backlog[1] < datetime.utcnow() - timedelta( minutes=max_evaluator_backlog_duration): logger( logging.ERROR, 'Reaper: Judge evaluator backlog duration hit, stopping operation' ) GRACEFUL_STOP.wait(30) continue rses_to_process = get_rses_to_process(rses, include_rses, exclude_rses, vos) if not rses_to_process: logger(logging.ERROR, 'Reaper: No RSEs found. Will sleep for 30 seconds') GRACEFUL_STOP.wait(30) continue start_time = time.time() try: staging_areas = [] dict_rses = {} heart_beat = live(executable, hostname, pid, hb_thread, older_than=3600) prepend_str = 'reaper[%i/%i] ' % (heart_beat['assign_thread'], heart_beat['nr_threads']) logger = formatted_logger(logging.log, prepend_str + '%s') tot_needed_free_space = 0 for rse in rses_to_process: # Check if the RSE is a staging area if rse['staging_area']: staging_areas.append(rse['rse']) # Check if RSE is blocklisted if rse['availability'] % 2 == 0: logger(logging.DEBUG, 'RSE %s is blocklisted for delete', rse['rse']) continue needed_free_space, only_delete_obsolete = __check_rse_usage( rse['rse'], rse['id'], greedy=greedy, logger=logger) if needed_free_space: dict_rses[(rse['rse'], rse['id'])] = [ needed_free_space, only_delete_obsolete ] tot_needed_free_space += needed_free_space elif only_delete_obsolete: dict_rses[(rse['rse'], rse['id'])] = [ needed_free_space, only_delete_obsolete ] else: logger(logging.DEBUG, 'Nothing to delete on %s', rse['rse']) # Ordering the RSEs based on the needed free space sorted_dict_rses = OrderedDict( sorted(dict_rses.items(), key=lambda x: x[1][0], reverse=True)) logger(logging.DEBUG, 'List of RSEs to process ordered by needed space desc: %s', str(sorted_dict_rses)) # Get the mapping between the RSE and the hostname used for deletion. The dictionary has RSE as key and (hostanme, rse_info) as value rses_hostname_mapping = get_rses_to_hostname_mapping() # logger(logging.DEBUG, '%s Mapping RSEs to hostnames used for deletion : %s', prepend_str, str(rses_hostname_mapping)) list_rses_mult = [] # Loop over the RSEs. rse_key = (rse, rse_id) and fill list_rses_mult that contains all RSEs to process with different multiplicity for rse_key in dict_rses: rse_name, rse_id = rse_key # The length of the deletion queue scales inversily with the number of workers # The ceil increase the weight of the RSE with small amount of files to delete if tot_needed_free_space: max_workers = ceil(dict_rses[rse_key][0] / tot_needed_free_space * 1000 / heart_beat['nr_threads']) else: max_workers = 1 list_rses_mult.extend([ (rse_name, rse_id, dict_rses[rse_key][0], dict_rses[rse_key][1]) for _ in range(int(max_workers)) ]) random.shuffle(list_rses_mult) paused_rses = [] for rse_name, rse_id, needed_free_space, max_being_deleted_files in list_rses_mult: result = REGION.get('pause_deletion_%s' % rse_id, expiration_time=120) if result is not NO_VALUE: paused_rses.append(rse_name) logger( logging.DEBUG, 'Not enough replicas to delete on %s during the previous cycle. Deletion paused for a while', rse_name) continue result = REGION.get('temporary_exclude_%s' % rse_id, expiration_time=auto_exclude_timeout) if result is not NO_VALUE: logger( logging.WARNING, 'Too many failed attempts for %s in last cycle. RSE is temporarly excluded.', rse_name) labels = {'rse': rse_name} EXCLUDED_RSE_GAUGE.labels(**labels).set(1) continue labels = {'rse': rse_name} EXCLUDED_RSE_GAUGE.labels(**labels).set(0) percent = 0 if tot_needed_free_space: percent = needed_free_space / tot_needed_free_space * 100 logger( logging.DEBUG, 'Working on %s. Percentage of the total space needed %.2f', rse_name, percent) try: rse_hostname, rse_info = rses_hostname_mapping[rse_id] except KeyError: logger(logging.DEBUG, "Hostname lookup for %s failed.", rse_name) REGION.set('pause_deletion_%s' % rse_id, True) continue rse_hostname_key = '%s,%s' % (rse_id, rse_hostname) payload_cnt = list_payload_counts(executable, older_than=600, hash_executable=None, session=None) # logger(logging.DEBUG, '%s Payload count : %s', prepend_str, str(payload_cnt)) tot_threads_for_hostname = 0 tot_threads_for_rse = 0 for key in payload_cnt: if key and key.find(',') > -1: if key.split(',')[1] == rse_hostname: tot_threads_for_hostname += payload_cnt[key] if key.split(',')[0] == str(rse_id): tot_threads_for_rse += payload_cnt[key] max_deletion_thread = get_max_deletion_threads_by_hostname( rse_hostname) if rse_hostname_key in payload_cnt and tot_threads_for_hostname >= max_deletion_thread: logger( logging.DEBUG, 'Too many deletion threads for %s on RSE %s. Back off', rse_hostname, rse_name) # Might need to reschedule a try on this RSE later in the same cycle continue logger( logging.INFO, 'Nb workers on %s smaller than the limit (current %i vs max %i). Starting new worker on RSE %s', rse_hostname, tot_threads_for_hostname, max_deletion_thread, rse_name) live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) logger(logging.DEBUG, 'Total deletion workers for %s : %i', rse_hostname, tot_threads_for_hostname + 1) # List and mark BEING_DELETED the files to delete del_start_time = time.time() only_delete_obsolete = dict_rses[(rse_name, rse_id)][1] try: with monitor.record_timer_block( 'reaper.list_unlocked_replicas'): if only_delete_obsolete: logger( logging.DEBUG, 'Will run list_and_mark_unlocked_replicas on %s. No space needed, will only delete EPOCH tombstoned replicas', rse_name) replicas = list_and_mark_unlocked_replicas( limit=chunk_size, bytes_=needed_free_space, rse_id=rse_id, delay_seconds=delay_seconds, only_delete_obsolete=only_delete_obsolete, session=None) logger( logging.DEBUG, 'list_and_mark_unlocked_replicas on %s for %s bytes in %s seconds: %s replicas', rse_name, needed_free_space, time.time() - del_start_time, len(replicas)) if len(replicas) < chunk_size: logger( logging.DEBUG, 'Not enough replicas to delete on %s (%s requested vs %s returned). Will skip any new attempts on this RSE until next cycle', rse_name, chunk_size, len(replicas)) REGION.set('pause_deletion_%s' % rse_id, True) except (DatabaseException, IntegrityError, DatabaseError) as error: logger(logging.ERROR, '%s', str(error)) continue except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) # Physical deletion will take place there try: prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme, logger=logger) for file_replicas in chunks(replicas, chunk_size): # Refresh heartbeat live(executable, hostname, pid, hb_thread, older_than=600, hash_executable=None, payload=rse_hostname_key, session=None) del_start_time = time.time() for replica in file_replicas: try: replica['pfn'] = str( list( rsemgr.lfns2pfns( rse_settings=rse_info, lfns=[{ 'scope': replica['scope'].external, 'name': replica['name'], 'path': replica['path'] }], operation='delete', scheme=scheme).values())[0]) except (ReplicaUnAvailable, ReplicaNotFound) as error: logger( logging.WARNING, 'Failed get pfn UNAVAILABLE replica %s:%s on %s with error %s', replica['scope'], replica['name'], rse_name, str(error)) replica['pfn'] = None except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) deleted_files = delete_from_storage( file_replicas, prot, rse_info, staging_areas, auto_exclude_threshold, logger=logger) logger(logging.INFO, '%i files processed in %s seconds', len(file_replicas), time.time() - del_start_time) # Then finally delete the replicas del_start = time.time() with monitor.record_timer_block( 'reaper.delete_replicas'): delete_replicas(rse_id=rse_id, files=deleted_files) logger( logging.DEBUG, 'delete_replicas successed on %s : %s replicas in %s seconds', rse_name, len(deleted_files), time.time() - del_start) DELETION_COUNTER.inc(len(deleted_files)) except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) if paused_rses: logger(logging.INFO, 'Deletion paused for a while for following RSEs: %s', ', '.join(paused_rses)) if once: break daemon_sleep(start_time=start_time, sleep_time=sleep_time, graceful_stop=GRACEFUL_STOP, logger=logger) except DatabaseException as error: logger(logging.WARNING, 'Reaper: %s', str(error)) except Exception: logger(logging.CRITICAL, 'Exception', exc_info=True) finally: if once: break die(executable=executable, hostname=hostname, pid=pid, thread=hb_thread) logger(logging.INFO, 'Graceful stop requested') logger(logging.INFO, 'Graceful stop done') return
return generate_http_error_flask(404, 'RSENotFound', error.args[0]) except RSEProtocolNotSupported, error: return generate_http_error_flask(404, 'RSEProtocolNotSupported', error.args[0]) except RSEProtocolDomainNotSupported, error: return generate_http_error_flask(404, 'RSEProtocolDomainNotSupported', error.args[0]) except Exception, error: print error print format_exc() return error, 500 pfns = rsemanager.lfns2pfns(rse_settings, lfns, operation=operation, scheme=scheme, domain=domain) return Response(dumps(pfns), content_type="application/json") class Protocol(MethodView): """ Create, Update, Read and delete a specific protocol. """ def post(self, rse, scheme): """ Create a protocol for a given RSE. .. :quickref: Protocol; Create an RSE protocol. :param rse: The RSE name. :param scheme: The protocol identifier.
def process_dark_files(path, scope, rse, latest_run, max_dark_fraction, max_files_at_site, old_enough_run, force_proceed): """ Process the Dark Files. """ prefix = 'storage-consistency-actions (process_dark_files())' logger = formatted_logger(logging.log, prefix + '%s') # Create a cc_dark section in the stats file t0 = time.time() stats_key = "cc_dark" cc_stats = stats = None stats = Stats(latest_run) cc_stats = { "start_time": t0, "end_time": None, "initial_dark_files": 0, "confirmed_dark_files": 0, "x-check_run": old_enough_run, "status": "started" } stats[stats_key] = cc_stats # Compare the two lists, and take only the dark files that are in both latest_dark = re.sub('_stats.json$', '_D.list', latest_run) old_enough_dark = re.sub('_stats.json$', '_D.list', old_enough_run) logger(logging.INFO, 'latest_dark = %s' % latest_dark) logger(logging.INFO, 'old_enough_dark = %s' % old_enough_dark) confirmed_dark = re.sub('_stats.json$', '_DeletionList.csv', latest_run) cmp2dark(new_list=latest_dark, old_list=old_enough_dark, comm_list=confirmed_dark, stats_file=latest_run) ### # SAFEGUARD # If a large fraction (larger than 'max_dark_fraction') of the files at a site # are reported as 'dark', do NOT proceed with the deletion. # Instead, put a warning in the _stats.json file, so that an operator can have a look. ### # Get the number of files recorded by the scanner dark_files = sum(1 for line in open(latest_dark)) confirmed_dark_files = sum(1 for line in open(confirmed_dark)) logger(logging.INFO, 'dark_files %d' % dark_files) logger(logging.INFO, 'confirmed_dark_files %d' % confirmed_dark_files) logger( logging.INFO, 'confirmed_dark_files/max_files_at_sit = %f' % (confirmed_dark_files / max_files_at_site)) logger(logging.INFO, 'max_dark_fraction configured for this RSE: %f' % max_dark_fraction) # Labels for the Prometheus counters/gauges labels = {'rse': rse} record_gauge('storage.consistency.actions_dark_files_found', confirmed_dark_files, labels=labels) record_gauge('storage.consistency.actions_dark_files_confirmed', confirmed_dark_files, labels=labels) deleted_files = 0 if confirmed_dark_files / max_files_at_site < max_dark_fraction or force_proceed is True: logger(logging.INFO, 'Can proceed with dark files deletion') # Then, do the real deletion (code from DeleteReplicas.py) issuer = InternalAccount('root') with open(confirmed_dark, 'r') as csvfile: reader = csv.reader(csvfile) for name, in reader: logger( logging.INFO, 'Processing a dark file:\n RSE %s Scope: %s Name: %s' % (rse, scope, name)) rse_id = get_rse_id(rse=rse) Intscope = InternalScope(scope=scope, vo=issuer.vo) lfns = [{'scope': scope, 'name': name}] attributes = get_rse_info(rse=rse) pfns = lfns2pfns(rse_settings=attributes, lfns=lfns, operation='delete') pfn_key = scope + ':' + name url = pfns[pfn_key] urls = [url] paths = parse_pfns(attributes, urls, operation='delete') replicas = [{ 'scope': Intscope, 'rse_id': rse_id, 'name': name, 'path': paths[url]['path'] + paths[url]['name'] }] add_quarantined_replicas(rse_id, replicas, session=None) deleted_files += 1 labels = {'rse': rse} record_counter( 'storage.consistency.actions_dark_files_deleted_counter', delta=1, labels=labels) # Update the stats t1 = time.time() cc_stats.update({ "end_time": t1, "initial_dark_files": dark_files, "confirmed_dark_files": deleted_files, "status": "done" }) stats[stats_key] = cc_stats record_gauge('storage.consistency.actions_dark_files_deleted', deleted_files, labels=labels) else: darkperc = 100. * confirmed_dark_files / max_files_at_site logger( logging.WARNING, '\n ATTENTION: Too many DARK files! (%3.2f%%) \n\ Stopping and asking for operators help.' % darkperc) # Update the stats t1 = time.time() cc_stats.update({ "end_time": t1, "initial_dark_files": dark_files, "confirmed_dark_files": 0, "status": "ABORTED", "aborted_reason": "%3.2f%% dark" % darkperc, }) stats[stats_key] = cc_stats record_gauge('storage.consistency.actions_dark_files_deleted', 0, labels=labels)
def reaper(rses=[], worker_number=1, total_workers=1, chunk_size=100, once=False, scheme=None): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param worker_number: The worker number. :param total_workers: The total number of workers. :param chunk_size: the size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param scheme: Force the reaper to use a particular protocol, e.g., mock. """ logging.info('Starting Dark Reaper %s-%s: Will work on RSEs: %s', worker_number, total_workers, str(rses)) pid = os.getpid() thread = threading.current_thread() hostname = socket.gethostname() executable = ' '.join(sys.argv) hash_executable = hashlib.sha256(sys.argv[0] + ''.join(rses)).hexdigest() sanity_check(executable=None, hostname=hostname) while not GRACEFUL_STOP.is_set(): try: # heartbeat heartbeat = live(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) logging.info( 'Dark Reaper({0[worker_number]}/{0[total_workers]}): Live gives {0[heartbeat]}' .format(locals())) nothing_to_do = True random.shuffle(rses) for rse_id in rses: rse = rse_core.get_rse_name(rse_id=rse_id) replicas = list_quarantined_replicas( rse_id=rse_id, limit=chunk_size, worker_number=worker_number, total_workers=total_workers) rse_info = rsemgr.get_rse_info(rse_id=rse_id) prot = rsemgr.create_protocol(rse_info, 'delete', scheme=scheme) deleted_replicas = [] try: prot.connect() for replica in replicas: nothing_to_do = False try: pfn = str( rsemgr.lfns2pfns(rse_settings=rse_info, lfns=[{ 'scope': replica['scope'].external, 'name': replica['name'], 'path': replica['path'] }], operation='delete', scheme=scheme).values()[0]) logging.info( 'Dark Reaper %s-%s: Deletion ATTEMPT of %s:%s as %s on %s', worker_number, total_workers, replica['scope'], replica['name'], pfn, rse) start = time.time() prot.delete(pfn) duration = time.time() - start logging.info( 'Dark Reaper %s-%s: Deletion SUCCESS of %s:%s as %s on %s in %s seconds', worker_number, total_workers, replica['scope'], replica['name'], pfn, rse, duration) add_message( 'deletion-done', { 'scope': replica['scope'].external, 'name': replica['name'], 'rse': rse, 'rse_id': rse_id, 'file-size': replica.get('bytes') or 0, 'bytes': replica.get('bytes') or 0, 'url': pfn, 'duration': duration, 'protocol': prot.attributes['scheme'] }) deleted_replicas.append(replica) except SourceNotFound: err_msg = 'Dark Reaper %s-%s: Deletion NOTFOUND of %s:%s as %s on %s' % ( worker_number, total_workers, replica['scope'], replica['name'], pfn, rse) logging.warning(err_msg) deleted_replicas.append(replica) except (ServiceUnavailable, RSEAccessDenied, ResourceTemporaryUnavailable) as error: err_msg = 'Dark Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % ( worker_number, total_workers, replica['scope'], replica['name'], pfn, rse, str(error)) logging.warning(err_msg) add_message( 'deletion-failed', { 'scope': replica['scope'].external, 'name': replica['name'], 'rse': rse, 'rse_id': rse_id, 'file-size': replica['bytes'] or 0, 'bytes': replica['bytes'] or 0, 'url': pfn, 'reason': str(error), 'protocol': prot.attributes['scheme'] }) except: logging.critical(traceback.format_exc()) finally: prot.close() delete_quarantined_replicas(rse_id=rse_id, replicas=deleted_replicas) if once: break if once: break if nothing_to_do: logging.info( 'Dark Reaper %s-%s: Nothing to do. I will sleep for 60s', worker_number, total_workers) time.sleep(60) except DatabaseException as error: logging.warning('Reaper: %s', str(error)) except: logging.critical(traceback.format_exc()) die(executable=executable, hostname=hostname, pid=pid, thread=thread, hash_executable=hash_executable) logging.info('Graceful stop requested') logging.info('Graceful stop done') return
def request_transfer(once=False, src=None, dst=None): """ Main loop to request a new transfer. """ logging.info('request: starting') site_a = 'RSE%s' % generate_uuid().upper() site_b = 'RSE%s' % generate_uuid().upper() scheme = 'https' impl = 'rucio.rse.protocols.webdav.Default' if not src.startswith('https://'): scheme = 'srm' impl = 'rucio.rse.protocols.srm.Default' srctoken = src.split(':')[0] dsttoken = dst.split(':')[0] tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse.add_rse(site_a) tmp_proto['hostname'] = src.split(':')[1][2:] tmp_proto['port'] = src.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + src.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': srctoken, 'web_service_path': ''} rse.add_protocol(site_a, tmp_proto) tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse.add_rse(site_b) tmp_proto['hostname'] = dst.split(':')[1][2:] tmp_proto['port'] = dst.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + dst.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': dsttoken, 'web_service_path': ''} rse.add_protocol(site_b, tmp_proto) si = rsemanager.get_rse_info(site_a) session = get_session() logging.info('request: started') while not graceful_stop.is_set(): try: ts = time.time() tmp_name = generate_uuid() # add a new dataset did.add_did(scope='mock', name='dataset-%s' % tmp_name, type=DIDType.DATASET, account='root', session=session) # construct PFN pfn = rsemanager.lfns2pfns(si, lfns=[{'scope': 'mock', 'name': 'file-%s' % tmp_name}])['mock:file-%s' % tmp_name] # create the directories if needed p = rsemanager.create_protocol(si, operation='write', scheme=scheme) p.connect() try: p.mkdir(pfn) except: pass # upload the test file try: fp = os.path.dirname(config_get('injector', 'file')) fn = os.path.basename(config_get('injector', 'file')) p.put(fn, pfn, source_dir=fp) except: logging.critical('Could not upload, removing temporary DID: %s' % str(sys.exc_info())) did.delete_dids([{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', session=session) break # add the replica replica.add_replica(rse=site_a, scope='mock', name='file-%s' % tmp_name, bytes=config_get_int('injector', 'bytes'), adler32=config_get('injector', 'adler32'), md5=config_get('injector', 'md5'), account='root', session=session) # to the dataset did.attach_dids(scope='mock', name='dataset-%s' % tmp_name, dids=[{'scope': 'mock', 'name': 'file-%s' % tmp_name, 'bytes': config_get('injector', 'bytes')}], account='root', session=session) # add rule for the dataset ts = time.time() rule.add_rule(dids=[{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', copies=1, rse_expression=site_b, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='mock-injector', session=session) logging.info('added rule for %s for DID mock:%s' % (site_b, tmp_name)) record_timer('daemons.mock.conveyorinjector.add_rule', (time.time()-ts)*1000) record_counter('daemons.mock.conveyorinjector.request_transfer') session.commit() except: session.rollback() logging.critical(traceback.format_exc()) if once: return logging.info('request: graceful stop requested') logging.info('request: graceful stop done')