def setupClass(cls): """POSIX (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) with open('etc/rse_repository.json') as f: data = json.load(f) prefix = data['MOCK-POSIX']['protocols']['supported']['file']['prefix'] try: os.mkdir(prefix) except Exception as e: print(e) os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' % prefix) cls.static_file = '%s/data.raw' % prefix for f in MgrTestCases.files_remote: protocol = mgr.create_protocol(mgr.get_rse_info('MOCK-POSIX'), 'write') pfn = mgr.lfns2pfns(mgr.get_rse_info('MOCK-POSIX'), { 'name': f, 'scope': 'user.%s' % cls.user }).values()[0] path = protocol.pfn2path(pfn) dirs = os.path.dirname(path) if not os.path.exists(dirs): os.makedirs(dirs) shutil.copy('%s/data.raw' % prefix, path)
def generate_rse(endpoint, token): rse_name = 'RSE%s' % generate_uuid().upper() scheme = 'https' impl = 'rucio.rse.protocols.webdav.Default' if not endpoint.startswith('https://'): scheme = 'srm' impl = 'rucio.rse.protocols.srm.Default' tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse_id = rse.add_rse(rse_name) tmp_proto['hostname'] = endpoint.split(':')[1][2:] tmp_proto['port'] = endpoint.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + endpoint.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': token, 'web_service_path': '/srm/managerv2?SFN='} rse.add_protocol(rse_id=rse_id, parameter=tmp_proto) rse.add_rse_attribute(rse_id=rse_id, key='fts', value='https://fts3-pilot.cern.ch:8446') account_limit.set_account_limit(account='root', rse_id=rsemanager.get_rse_info(rse_name)['id'], bytes=-1) return rsemanager.get_rse_info(rse_name)
def test_add_list_bad_replicas(self): """ REPLICA (CORE): Add bad replicas and list them""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK') rse_id1 = rse_info['id'] add_replicas(rse='MOCK', files=files, account='root', ignore_availability=True) # Listing replicas on deterministic RSE replicas = [] list_rep = [] for replica in list_replicas(dids=[{'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE} for f in files], schemes=['srm']): replicas.extend(replica['rses']['MOCK']) list_rep.append(replica) r = declare_bad_file_replicas(replicas, 'This is a good reason', 'root') assert_equal(r, {}) bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id1: if badrep['scope'] == rep['scope'] and badrep['name'] == rep['name']: nbbadrep += 1 assert_equal(len(replicas), nbbadrep) # Adding replicas to non-deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in range(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK2') rse_id2 = rse_info['id'] add_replicas(rse='MOCK2', files=files, account='root', ignore_availability=True) # Listing replicas on non-deterministic RSE replicas = [] list_rep = [] for replica in list_replicas(dids=[{'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE} for f in files], schemes=['srm']): replicas.extend(replica['rses']['MOCK2']) list_rep.append(replica) r = declare_bad_file_replicas(replicas, 'This is a good reason', 'root') assert_equal(r, {}) bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id2: if badrep['scope'] == rep['scope'] and badrep['name'] == rep['name']: nbbadrep += 1 assert_equal(len(replicas), nbbadrep) # Now adding non-existing bad replicas files = ['srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ] r = declare_bad_file_replicas(files, 'This is a good reason', 'root') output = ['%s Unknown replica' % rep for rep in files] assert_equal(r, {'MOCK2': output})
def get_did_from_pfns(pfns, rse, session=None): """ Get the DIDs associated to a PFN on one given RSE :param pfns: The list of PFNs. :param rse: The RSE name. :param session: The database session in use. :returns: A dictionary {pfn: {'scope': scope, 'name': name}} """ rse_info = rsemgr.get_rse_info(rse, session=session) rse_id = rse_info['id'] pfndict = {} p = rsemgr.create_protocol(rse_info, 'read', scheme='srm') if rse_info['deterministic']: parsed_pfn = p.parse_pfns(pfns=pfns) for pfn in parsed_pfn: path = parsed_pfn[pfn]['path'] if path.startswith('user') or path.startswith('group'): scope = '%s.%s' % (path.split('/')[0], path.split('/')[1]) name = parsed_pfn[pfn]['name'] else: scope = path.split('/')[0] name = parsed_pfn[pfn]['name'] yield {pfn: {'scope': scope, 'name': name}} else: condition = [] parsed_pfn = p.parse_pfns(pfns=pfns) for pfn in parsed_pfn: path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name']) pfndict[path] = pfn condition.append(and_(models.RSEFileAssociation.path == path, models.RSEFileAssociation.rse_id == rse_id)) for scope, name, pfn in session.query(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.path).filter(or_(*condition)): yield {pfndict[pfn]: {'scope': scope, 'name': name}}
def test_pfn_filename_in_dataset(self): """ PFN (CORE): Test the splitting of PFNs cornercase: filename in prefix""" rse_info = rsemgr.get_rse_info('MOCK', **self.vo) proto = rsemgr.create_protocol(rse_info, 'read', scheme='mock') pfn = 'mock://localhost/tmp/rucio_rse/rucio_rse' ret = proto.parse_pfns([pfn]) assert ret[pfn]['scheme'] == 'mock' assert ret[pfn]['hostname'] == 'localhost' assert ret[pfn]['port'] == 0 assert ret[pfn]['prefix'] == '/tmp/rucio_rse/' assert ret[pfn]['path'] == '/' assert ret[pfn]['name'] == 'rucio_rse' proto = rsemgr.create_protocol(rse_info, 'read', scheme='srm') pfn = 'srm://mock.com/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/group/phys-fake/mc15_13TeV/group.phys-fake.mc15_13TeV/mc15c.MGHwpp_tHjb125_yt_minus1.MxAODFlavorSys.p2908.h015.totape_20170825.root' ret = proto.parse_pfns([pfn]) assert ret[pfn]['scheme'] == 'srm' assert ret[pfn]['hostname'] == 'mock.com' assert ret[pfn]['port'] == 8443 assert ret[pfn]['prefix'] == '/rucio/tmpdisk/rucio_tests/' assert ret[pfn][ 'path'] == '/group/phys-fake/mc15_13TeV/group.phys-fake.mc15_13TeV/' assert ret[pfn][ 'name'] == 'mc15c.MGHwpp_tHjb125_yt_minus1.MxAODFlavorSys.p2908.h015.totape_20170825.root'
def tearDownClass(cls): """S3 (RSE/PROTOCOLS): Removing created directories and files """ # Remove test files from storage # rse_tag = 'AMAZON-BOTO' rse_tag = 'BNL-BOTO' rse_settings = mgr.get_rse_info(rse_tag) try: with open('etc/rse-accounts.cfg') as f: data = json.load(f) rse_settings['credentials'] = data[rse_tag] except KeyError: print('No credentials found for this RSE.') pass for protocol in rse_settings['protocols']: if protocol['scheme'] == 's3': break conn = boto.connect_s3(host=protocol['hostname'], port=int(protocol.get('port', 80)), aws_access_key_id=rse_settings['credentials']['access_key'], aws_secret_access_key=rse_settings['credentials']['secret_key'], is_secure=rse_settings['credentials'].get('is_secure', False), calling_format=boto.s3.connection.OrdinaryCallingFormat()) for protocol in rse_settings['protocols']: if protocol['scheme'] == 's3': break bucket_name = protocol['prefix'] bucket = conn.get_bucket(bucket_name) keys = bucket.list() for key in keys: key.delete()
def tearDownClass(cls): """S3 (RSE/PROTOCOLS): Removing created directories and files """ # Remove test files from storage # rse_tag = 'AMAZON-BOTO' rse_tag = 'BNL-BOTO' rse_settings = mgr.get_rse_info(rse_tag) try: with open('etc/rse-accounts.cfg') as f: data = json.load(f) rse_settings['credentials'] = data[rse_tag] except KeyError: print 'No credentials found for this RSE.' pass for protocol in rse_settings['protocols']: if protocol['scheme'] == 's3': break conn = boto.connect_s3(host=protocol['hostname'], port=int(protocol.get('port', 80)), aws_access_key_id=rse_settings['credentials']['access_key'], aws_secret_access_key=rse_settings['credentials']['secret_key'], is_secure=rse_settings['credentials'].get('is_secure', False), calling_format=boto.s3.connection.OrdinaryCallingFormat()) for protocol in rse_settings['protocols']: if protocol['scheme'] == 's3': break bucket_name = protocol['prefix'] bucket = conn.get_bucket(bucket_name) keys = bucket.list() for key in keys: key.delete()
def load_info(self, session=None): if self.info is None: self.info = rsemgr.get_rse_info( rse=self.load_name(session=session), vo=rse_core.get_rse_vo(rse_id=self.id, session=session), session=session) return self.info
def test_get_did_from_pfns_deterministic(self): """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for deterministic sites""" tmp_scope = 'mock' rse = 'MOCK3' nbfiles = 3 pfns = [] input = {} rse_info = rsemgr.get_rse_info(rse) assert_equal(rse_info['deterministic'], True) files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] p = rsemgr.create_protocol(rse_info, 'read', scheme='srm') for f in files: pfn = p.lfns2pfns(lfns={ 'scope': f['scope'], 'name': f['name'] }).values()[0] pfns.append(pfn) input[pfn] = {'scope': f['scope'], 'name': f['name']} add_replicas(rse=rse, files=files, account='root', ignore_availability=True) for result in self.replica_client.get_did_from_pfns(pfns, rse): pfn = result.keys()[0] assert_equal(input[pfn], result.values()[0])
def test_update_replicas_paths(self): """ REPLICA (CORE): Force update the replica path """ tmp_scope = 'mock' nbfiles = 5 rse_info = rsemgr.get_rse_info('MOCK') files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/does/not/really/matter/where', 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 }, 'rse_id': rse_info['id'], 'path': '/does/not/really/matter/where' } for _ in range(nbfiles)] add_replicas(rse='MOCK2', files=files, account='root', ignore_availability=True) update_replicas_paths(files) for replica in list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE } for f in files], schemes=['srm']): # force the changed string - if we look it up from the DB, then we're not testing anything :-D assert_equal( replica['rses']['MOCK2'][0], 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/does/not/really/matter/where' )
def test_download_succeeds_md5only(self): """CLIENT(USER): Rucio download succeeds MD5 only""" # user has a file to upload filename = file_generator() file_md5 = md5(filename) filesize = stat(filename).st_size lfn = {'name': filename[5:], 'scope': self.user, 'bytes': filesize, 'md5': file_md5} # user uploads file self.replica_client.add_replicas(files=[lfn], rse=self.def_rse) rse_settings = rsemgr.get_rse_info(self.def_rse) protocol = rsemgr.create_protocol(rse_settings, 'write') protocol.connect() pfn = protocol.lfns2pfns(lfn).values()[0] protocol.put(filename[5:], pfn, filename[:5]) protocol.close() remove(filename) # download files cmd = 'rucio -v download --dir /tmp {0}:{1}'.format(self.user, filename[5:]) print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) # search for the files with ls cmd = 'ls /tmp/{0}'.format(self.user) # search in /tmp/ print(self.marker + cmd) exitcode, out, err = execute(cmd) print(out, err) nose.tools.assert_not_equal(re.search(filename[5:], out), None) try: for i in listdir('data13_hip'): unlink('data13_hip/%s' % i) rmdir('data13_hip') except Exception: pass
def __init__(self, tmpdir, rse_tag, user, static_file, vo='def', impl=None): self.rse_settings = mgr.get_rse_info(rse=rse_tag, vo=vo) try: data = load_test_conf_file('rse-accounts.cfg.template') self.rse_settings['credentials'] = data[rse_tag] except KeyError: print('No credentials found for this RSE.') pass self.tmpdir = tmpdir self.gettmpdir = tempfile.mkdtemp() self.user = user self.static_file = static_file self.impl = None if impl and len(self.rse_settings['protocols']) > 0: if len(impl.split('.')) == 1: self.impl = 'rucio.rse.protocols.' + impl + '.Default' else: self.impl = 'rucio.rse.protocols.' + impl self.vo = vo
def setupClass(cls): """SFTP (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) # Load local credentials from file with open('etc/rse-accounts.cfg') as f: data = json.load(f) credentials = data['LXPLUS'] lxplus = pysftp.Connection(**credentials) with open('etc/rse_repository.json') as f: prefix = json.load(f)['LXPLUS']['protocols']['supported']['sftp']['prefix'] lxplus.execute('mkdir %s' % prefix) lxplus.execute('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' % prefix) cls.static_file = 'sftp://lxplus.cern.ch:22%sdata.raw' % prefix protocol = mgr.create_protocol(mgr.get_rse_info('LXPLUS'), 'write') for f in MgrTestCases.files_remote: tmp = protocol.parse_pfns(protocol.lfns2pfns({'name': f, 'scope': 'user.%s' % cls.user}).values()[0]).values()[0] for cmd in ['mkdir -p %s' % ''.join([tmp['prefix'], tmp['path']]), 'ln -s %sdata.raw %s' % (prefix, ''.join([tmp['prefix'], tmp['path'], tmp['name']]))]: lxplus.execute(cmd) lxplus.close()
def test_get_did_from_pfns_nondeterministic(self): """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for non-deterministic sites""" rse = 'MOCK2' tmp_scope = 'mock' nbfiles = 3 pfns = [] input = {} rse_info = rsemgr.get_rse_info(rse) assert_equal(rse_info['deterministic'], False) files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': { 'events': 10 } } for i in xrange(nbfiles)]
def get_rse_url(self): """ Return the base path of the rucio url """ rse_settings = rsemgr.get_rse_info(self.orgRse) protocol = rse_settings['protocols'][0] schema = protocol['scheme'] prefix = protocol['prefix'] port = protocol['port'] rucioserver = protocol['hostname'] rse_url = list() if None not in (schema, str(rucioserver + ':' + str(port)), prefix): rse_url.extend( [schema, rucioserver + ':' + str(port), prefix, '', '']) if self.working_folder != None: # Check if our test folder exists path = os.path.join(urlunsplit(rse_url), self.working_folder) self.gfal.mkdir_rec(path, 775) return (path) else: return (urlunsplit(rse_url)) else: return ('Wrong url parameters')
def get_conveyor_rses(rses=None, include_rses=None, exclude_rses=None, vos=None, logger=logging.log): """ Get a list of rses for conveyor :param rses: List of rses (Single-VO only) :param include_rses: RSEs to include :param exclude_rses: RSEs to exclude :param vos: VOs on which to look for RSEs. Only used in multi-VO mode. If None, we either use all VOs if run from "def", or the current VO otherwise. :param logger: Optional decorated logger that can be passed from the calling daemons or servers. :return: List of working rses """ multi_vo = config_get_bool('common', 'multi_vo', raise_exception=False, default=False) if not multi_vo: if vos: logger(logging.WARNING, 'Ignoring argument vos, this is only applicable in a multi-VO setup.') vos = ['def'] else: if vos: invalid = set(vos) - set([v['vo'] for v in list_vos()]) if invalid: msg = 'VO{} {} cannot be found'.format('s' if len(invalid) > 1 else '', ', '.join([repr(v) for v in invalid])) raise VONotFound(msg) else: vos = [v['vo'] for v in list_vos()] logger(logging.INFO, 'This instance will work on VO%s: %s' % ('s' if len(vos) > 1 else '', ', '.join([v for v in vos]))) working_rses = [] rses_list = [] for vo in vos: rses_list.extend(list_rses(filters={'vo': vo})) if rses: working_rses = [rse for rse in rses_list if rse['rse'] in rses] if include_rses: for vo in vos: try: parsed_rses = parse_expression(include_rses, filter={'vo': vo}, session=None) except InvalidRSEExpression: logger(logging.ERROR, "Invalid RSE exception %s to include RSEs", include_rses) else: for rse in parsed_rses: if rse not in working_rses: working_rses.append(rse) if not (rses or include_rses): working_rses = rses_list if exclude_rses: try: parsed_rses = parse_expression(exclude_rses, session=None) except InvalidRSEExpression as error: logger(logging.ERROR, "Invalid RSE exception %s to exclude RSEs: %s", exclude_rses, error) else: working_rses = [rse for rse in working_rses if rse not in parsed_rses] working_rses = [rsemgr.get_rse_info(rse_id=rse['id']) for rse in working_rses] return working_rses
def setUpClass(cls): """SRM (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) with open('etc/rse_repository.json') as f: data = json.load(f) prefix = data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm'][ 'prefix'] hostname = data['FZK-LCG2_SCRATCHDISK']['protocols']['supported'][ 'srm']['hostname'] if hostname.count("://"): hostname = hostname.split("://")[1] if 'port' in data['FZK-LCG2_SCRATCHDISK']['protocols']['supported'][ 'srm'].keys(): port = int(data['FZK-LCG2_SCRATCHDISK']['protocols']['supported'] ['srm']['port']) else: port = 0 if 'extended_attributes' in data['FZK-LCG2_SCRATCHDISK']['protocols'][ 'supported']['srm'].keys() and 'web_service_path' in data[ 'FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm'][ 'extended_attributes'].keys(): web_service_path = data['FZK-LCG2_SCRATCHDISK']['protocols'][ 'supported']['srm']['extended_attributes']['web_service_path'] else: web_service_path = '' os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' % cls.tmpdir) if port > 0: cls.static_file = 'srm://%s:%s%s%s/data.raw' % ( hostname, port, web_service_path, prefix) else: cls.static_file = 'srm://%s%s%s/data.raw' % ( hostname, web_service_path, prefix) cmd = 'srmcp --debug=false -retry_num=0 file:///%s/data.raw %s' % ( cls.tmpdir, cls.static_file) execute(cmd) for f in MgrTestCases.files_remote: tmp = mgr.lfns2pfns(mgr.get_rse_info('FZK-LCG2_SCRATCHDISK'), { 'name': f, 'scope': 'user.%s' % cls.user }, scheme='srm').values()[0] cmd = 'srmcp --debug=false -retry_num=0 file:///%s/data.raw %s' % ( cls.tmpdir, tmp) execute(cmd)
def test_get_mgr_SourceNotFound_single_pfn(self): """MOCK (RSE/PROTOCOLS): Get a single file from storage providing PFN (SourceNotFound)""" for fichier in MgrTestCases.files_remote: mgr.upload(mgr.get_rse_info(self.rse_id), [ { 'name': fichier, 'scope': 'user.%s' % self.user }, ]) self.mtc.test_get_mgr_SourceNotFound_single_pfn()
def verify_stage_out(fspec): """ Checks that the uploaded file is physically at the destination. :param fspec: file specifications """ from rucio.rse import rsemanager as rsemgr rse_settings = rsemgr.get_rse_info(fspec.ddmendpoint) uploaded_file = {'name': fspec.lfn, 'scope': fspec.scope} logger.info('Checking file: %s' % str(fspec.lfn)) return rsemgr.exists(rse_settings, [uploaded_file])
def test_get_did_from_pfns_deterministic(self): """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for deterministic sites""" tmp_scope = 'mock' rse = 'MOCK3' nbfiles = 3 pfns = [] input = {} rse_info = rsemgr.get_rse_info(rse) assert_equal(rse_info['deterministic'], True) files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)]
def test_delete_mgr_ok_multi(self): """MOCK (RSE/PROTOCOLS): Delete multiple files from storage (Success)""" for fichier in MgrTestCases.files_remote: mgr.upload(mgr.get_rse_info(self.rse_id), [ { 'name': fichier, 'scope': 'user.%s' % self.user }, ]) self.mtc.test_delete_mgr_ok_multi()
def setUpClass(cls): """SSH (RSE/PROTOCOLS): Creating necessary directories and files """ # Getting info for the test environment rse_id, prefix, hostname, port, sshuser = cls.get_rse_info() try: os.mkdir(prefix) except Exception as e: print(e) # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() set_preferred_checksum('md5') cmd = 'ssh-keygen -R %s' % (cls.hostname) execute(cmd) cmd = 'ssh-keyscan %s >> /root/.ssh/known_hosts' % (cls.hostname) execute(cmd) with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write(b'\0') for f in MgrTestCases.files_local: shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) protocol = rsemanager.create_protocol(rsemanager.get_rse_info(rse_id), 'write') protocol.connect() os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' % prefix) cls.static_file = '%s@%s:/%s/data.raw' % (sshuser, hostname, prefix) pathdir = os.path.dirname(prefix) cmd = 'ssh %s@%s "mkdir -p %s" && scp %s/data.raw %s' % ( sshuser, hostname, str(pathdir), prefix, cls.static_file) execute(cmd) for f in MgrTestCases.files_remote: path = str(prefix + protocol._get_path('user.%s' % cls.user, f)) pathdir = os.path.dirname(path) cmd = 'ssh %s@%s "mkdir -p %s" && scp %s/data.raw %s@%s:%s' % ( sshuser, hostname, str(pathdir), prefix, sshuser, hostname, path) execute(cmd) for f in MgrTestCases.files_local_and_remote: shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) path = str(prefix + protocol._get_path('user.%s' % cls.user, f)) pathdir = os.path.dirname(path) cmd = 'ssh {0}@{1} "mkdir -p {2}" && scp {3}/{4} {5}@{6}:{7}'.format( sshuser, hostname, str(pathdir), str(cls.tmpdir), str(f), sshuser, hostname, path) execute(cmd)
def test_get_mgr_SourceNotFound_multi(self): """MOCK (RSE/PROTOCOLS): Get multiple files from storage providing LFNs and PFNs (SourceNotFound)""" for fichier in MgrTestCases.files_remote: mgr.upload(mgr.get_rse_info(self.rse_id), [ { 'name': fichier, 'scope': 'user.%s' % self.user }, ]) with pytest.raises(exception.SourceNotFound): self.mtc.test_get_mgr_SourceNotFound_multi()
def test_get_did_from_pfns_nondeterministic(self): """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for non-deterministic sites""" rse = 'MOCK2' tmp_scope = 'mock' nbfiles = 3 pfns = [] input = {} rse_info = rsemgr.get_rse_info(rse) assert_equal(rse_info['deterministic'], False) files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in xrange(nbfiles)]
def __init__(self, tmpdir, rse_tag, user, static_file, vo='def'): self.rse_settings = mgr.get_rse_info(rse=rse_tag, vo=vo) try: data = load_test_conf_file('rse-accounts.cfg') self.rse_settings['credentials'] = data[rse_tag] except KeyError: print('No credentials found for this RSE.') pass self.tmpdir = tmpdir self.gettmpdir = tempfile.mkdtemp() self.user = user self.static_file = static_file
def test_pfn_mock(self): """ PFN (CORE): Test the splitting of PFNs with mock""" rse_info = rsemgr.get_rse_info('MOCK', **self.vo) proto = rsemgr.create_protocol(rse_info, 'read', scheme='mock') pfn = 'mock://localhost/tmp/rucio_rse/whatever' ret = proto.parse_pfns([pfn]) assert ret[pfn]['scheme'] == 'mock' assert ret[pfn]['hostname'] == 'localhost' assert ret[pfn]['port'] == 0 assert ret[pfn]['prefix'] == '/tmp/rucio_rse/' assert ret[pfn]['path'] == '/' assert ret[pfn]['name'] == 'whatever'
def VerifyStageOut(self, dst, fspec): """ Checks that the uploaded file is physically at the destination. :param dst: destination rse :param fspec: file specifications """ from rucio.rse import rsemanager as rsemgr rse_settings = rsemgr.get_rse_info(dst) uploaded_file = {'name': fspec.lfn, 'scope': fspec.scope} tolog('Checking file: %s' % str(fspec.lfn)) return rsemgr.exists(rse_settings, [uploaded_file])
def __init__(self, tmpdir, rse_tag, user, static_file): self.rse_settings = mgr.get_rse_info(rse_tag) try: with open('etc/rse-accounts.cfg') as f: data = json.load(f) self.rse_settings['credentials'] = data[rse_tag] except KeyError: print 'No credentials found for this RSE.' pass self.tmpdir = tmpdir self.user = user self.static_file = static_file
def VerifyStageOut(self, dst, fspec): """ Checks that the uploaded file is physically at the destination. :param dst: destination rse :param fspec: file specifications """ from rucio.rse import rsemanager as rsemgr rse_settings = rsemgr.get_rse_info(dst) uploaded_file = {'name':fspec.lfn, 'scope':fspec.scope} tolog('Checking file: %s' % str(fspec.lfn)) return rsemgr.exists(rse_settings, [uploaded_file])
def test_update_replicas_paths(self): """ REPLICA (CORE): Force update the replica path """ tmp_scope = 'mock' nbfiles = 5 rse_info = rsemgr.get_rse_info('MOCK') files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/does/not/really/matter/where', 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}, 'rse_id': rse_info['id'], 'path': '/does/not/really/matter/where'} for i in xrange(nbfiles)]
def __init__(self, tmpdir, rse_tag, user, static_file): self.rse_settings = mgr.get_rse_info(rse_tag) try: with open('etc/rse-accounts.cfg') as f: data = json.load(f) self.rse_settings['credentials'] = data[rse_tag] except KeyError: print('No credentials found for this RSE.') pass self.tmpdir = tmpdir self.gettmpdir = tempfile.mkdtemp() self.user = user self.static_file = static_file
def test_pfn_https(self): """ PFN (CORE): Test the splitting of PFNs with https""" rse_info = rsemgr.get_rse_info('MOCK', **self.vo) proto = rsemgr.create_protocol(rse_info, 'read', scheme='https') pfn = 'https://mock.com:2880/pnfs/rucio/disk-only/scratchdisk/whatever' ret = proto.parse_pfns([pfn]) assert ret[pfn]['scheme'] == 'https' assert ret[pfn]['hostname'] == 'mock.com' assert ret[pfn]['port'] == 2880 assert ret[pfn]['prefix'] == '/pnfs/rucio/disk-only/scratchdisk/' assert ret[pfn]['path'] == '/' assert ret[pfn]['name'] == 'whatever'
def test_update_replicas_paths(self): """ REPLICA (CORE): Force update the replica path """ tmp_scope = 'mock' nbfiles = 5 rse_info = rsemgr.get_rse_info('MOCK') files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests//does/not/really/matter/where', 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}, 'rse_id': rse_info['id'], 'path': '/does/not/really/matter/where'} for i in xrange(nbfiles)]
def inject(rse, older_than): logging.info('Starting to inject objects for RSE: %s' % rse) num_of_queued_dids = get_count_of_expired_temporary_dids(rse) rse_id = rse_core.get_rse_id(rse) if num_of_queued_dids < 1000: max_being_deleted_files, needed_free_space, used, free = __check_rse_usage( rse=rse, rse_id=rse_id) logging.info("needed_free_space: %s" % needed_free_space) if needed_free_space is None or needed_free_space > 0: rse_info = rsemgr.get_rse_info(rse) for protocol in rse_info['protocols']: protocol['impl'] = 'rucio.rse.protocols.s3boto.Default' prot = rsemgr.create_protocol(rse_info, 'delete') try: prot.connect() dids = [] older_than_time = datetime.datetime.utcnow( ) - datetime.timedelta(days=older_than) older_than_time = older_than_time.replace(tzinfo=pytz.utc) for key in prot.list(): d = dateutil.parser.parse(key.last_modified) if d < older_than_time: did = { 'scope': 'transient', 'name': key.name.encode('utf-8'), 'rse': rse, 'rse_id': rse_id, 'bytes': key.size, 'created_at': d } dids.append(did) if len(dids) == 1000: add_temporary_dids(dids=dids, account='root') logging.info('Adding 1000 dids to temp dids.') dids = [] else: pass logging.info( 'Found objects newer than %s days, quit to list(normally objects in os are returned with order by time)' % older_than) break if GRACEFUL_STOP.is_set(): logging.info('GRACEFUL_STOP is set. quit') break except: logging.critical(traceback.format_exc()) else: logging.info( "Number of queued deletion for %s is %s, which is bigger than 1000. quit." % (rse, num_of_queued_dids))
def setupClass(cls): """WebDAV (RSE/PROTOCOLS): Creating necessary directories and files """ session = requests.Session() session.cert = os.getenv('X509_USER_PROXY') session.verify = False cls.site = 'FZK-LCG2_SCRATCHDISK' # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = '******' with open('etc/rse_repository.json') as f: data = json.load(f) scheme = data[cls.site]['protocols']['supported']['https']['scheme'] prefix = data[cls.site]['protocols']['supported']['https']['prefix'] hostname = data[ cls.site]['protocols']['supported']['https']['hostname'] port = data[cls.site]['protocols']['supported']['https']['port'] with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024) - 1) # 1 kB out.write('\0') for f in MgrTestCases.files_local: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) cls.static_file = '%s://%s:%s%sdata.raw' % (scheme, hostname, port, prefix) rse_settings = rsemanager.get_rse_info(cls.site) storage = rsemanager.create_protocol(rse_settings, operation='write', scheme='https') storage.connect() for f in MgrTestCases.files_remote: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) destfile = rsemanager.lfns2pfns(rse_settings, [ { 'name': f, 'scope': 'user.%s' % (cls.user) }, ], operation='write', scheme='https').values()[0] try: storage.put('%s/%s' % (cls.tmpdir, f), destfile) except FileReplicaAlreadyExists as e: print(e) with open('%s/data.raw' % cls.tmpdir, 'rb') as f_file: session.put(cls.static_file, data=f_file.read(), verify=False, allow_redirects=True)
def setUpClass(cls): """S3 (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() # cls.user = '******' # use again when latency issue with S3 storage is resolved with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) fnull = open(os.devnull, 'w') # Create test files on storage try: subprocess.call(["s3cmd", "mb", "s3://USER"], stdout=fnull, stderr=fnull, shell=False) subprocess.call(["s3cmd", "mb", "s3://GROUP"], stdout=fnull, stderr=fnull, shell=False) subprocess.call(["s3cmd", "mb", "s3://NONDETERMINISTIC"], stdout=fnull, stderr=fnull, shell=False) except S3Error: pass cls.static_file = 's3://NONDETERMINISTIC/data.raw' subprocess.call([ "s3cmd", "put", "%s/data.raw" % cls.tmpdir, cls.static_file, "--no-progress" ], stdout=fnull, stderr=fnull) for f in MgrTestCases.files_remote: subprocess.call([ "s3cmd", "cp", cls.static_file, mgr.lfns2pfns(mgr.get_rse_info('SWIFT'), { 'name': f, 'scope': 'user.%s' % cls.user }).values()[0] ], stdout=fnull, stderr=fnull) fnull.close()
def delete_replicas(client, dest_rse, replicas): rse_settings = rsemanager.get_rse_info(dest_rse) # we would expect "delete" operation but tape sites have that disabled for safety protocol_delete = rsemanager.create_protocol( rse_settings, operation="read", domain="wan", logger=logger.log ) lfns = [lfn["scope"] + ":" + lfn["name"] for lfn in replicas] pfns = client.lfns2pfns(dest_rse, lfns, operation="read") protocol_delete.connect() for pfn in pfns.values(): logger.debug(f"Deleting PFN {pfn} from destination RSE {dest_rse}") try: protocol_delete.delete(pfn) except SourceNotFound: pass
def add_replicas(rse, files, account, rse_id=None, ignore_availability=True, session=None): """ Bulk add file replicas. :param rse: The rse name. :param files: The list of files. :param account: The account owner. :param rse_id: The RSE id. To be used if rse parameter is None. :param ignore_availability: Ignore the RSE blacklisting. :param session: The database session in use. :returns: True is successful. """ if rse: replica_rse = get_rse(rse=rse, session=session) else: replica_rse = get_rse(rse=None, rse_id=rse_id, session=session) if (not (replica_rse.availability & 2)) and not ignore_availability: raise exception.RessourceTemporaryUnavailable('%s is temporary unavailable for writing' % rse) replicas = __bulk_add_file_dids(files=files, account=account, session=session) if not replica_rse.deterministic: pfns, scheme = list(), None for file in files: if 'pfn' not in file: raise exception.UnsupportedOperation('PFN needed for this (non deterministic) RSE %(rse)s ' % locals()) else: scheme = file['pfn'].split(':')[0] pfns.append(file['pfn']) p = rsemgr.create_protocol(rse_settings=rsemgr.get_rse_info(rse, session=session), operation='write', scheme=scheme) pfns = p.parse_pfns(pfns=pfns) for file in files: tmp = pfns[file['pfn']] file['path'] = ''.join([tmp['path'], tmp['name']]) nbfiles, bytes = __bulk_add_replicas(rse_id=replica_rse.id, files=files, account=account, session=session) increase(rse_id=replica_rse.id, files=nbfiles, bytes=bytes, session=session) return replicas
def setupClass(cls): """S3 (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() # cls.user = '******' # use again when latency issue with S3 storage is resolved with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: os.symlink('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) rse_tag = 'BNL-BOTO' rse_settings = mgr.get_rse_info(rse_tag) try: with open('etc/rse-accounts.cfg') as f: data = json.load(f) rse_settings['credentials'] = data[rse_tag] except KeyError: print 'No credentials found for this RSE.' pass for protocol in rse_settings['protocols']: if protocol['scheme'] == 's3': break conn = boto.connect_s3(host=protocol['hostname'], port=int(protocol.get('port', 80)), aws_access_key_id=rse_settings['credentials']['access_key'], aws_secret_access_key=rse_settings['credentials']['secret_key'], is_secure=rse_settings['credentials'].get('is_secure', False), calling_format=boto.s3.connection.OrdinaryCallingFormat()) cls.static_file = mgr.lfns2pfns(rse_settings, {'name': 'data.raw', 'scope': 'user.%s' % cls.user}).values()[0] key = get_bucket_key(cls.static_file, conn, create=True) key.set_contents_from_filename("%s/data.raw" % cls.tmpdir) for f in MgrTestCases.files_remote: pfn = mgr.lfns2pfns(rse_settings, {'name': f, 'scope': 'user.%s' % cls.user}).values()[0] bucket_name, key_name = get_bucket_key_name(pfn) key.copy(bucket_name, key_name)
def upload(files, scope, metadata, rse, account, source_dir, worker_number, total_workers, dataset_lifetime, did=None): logging.debug('In upload') dsn = None if did: dsn = {'scope': did.split(':')[0], 'name': did.split(':')[1]} client = Client() list_files = [] lfns = [] logging.debug('Thread [%i/%i] : Looping over the files' % (worker_number, total_workers)) for filename in files: fullpath = '%s/%s' % (source_dir, filename) size = stat(fullpath).st_size checksum = adler32(fullpath) logging.info('Thread [%i/%i] : File %s : Size %s , adler32 %s' % (worker_number, total_workers, fullpath, str(size), checksum)) list_files.append({'scope': scope, 'name': filename, 'bytes': size, 'adler32': checksum, 'meta': {'guid': generate_uuid()}}) lfns.append({'name': filename, 'scope': scope, 'filesize': size, 'adler32': checksum}) # Physical upload logging.info('Thread [%i/%i] : Uploading physically the files %s on %s' % (worker_number, total_workers, str(lfns), rse)) rse_info = rsemgr.get_rse_info(rse) try: success_upload = True for i in xrange(0, 3): gs, ret = rsemgr.upload(rse_info, lfns=lfns, source_dir=source_dir) logging.info('Returned global status : %s, Returned : %s' % (str(gs), str(ret))) if not gs: for x in ret: if (not isinstance(ret[x], FileReplicaAlreadyExists)) and ret[x] is not True: sleep(exp(i)) success_upload = False logging.error('Problem to upload file %s with error %s' % (x, str(ret[x]))) break else: break if not success_upload: logging.error('Thread [%i/%i] : Upload operation to %s failed, removing leftovers' % (worker_number, total_workers, rse)) rsemgr.delete(rse_info, lfns=lfns) return False except Exception, e: return False
def declare_bad_file_replicas(pfns, rse, session=None): """ Declare a list of bad replicas. :param pfns: The list of PFNs. :param rse: The RSE name. :param session: The database session in use. """ rse_info = rsemgr.get_rse_info(rse, session=session) rse_id = rse_info['id'] replicas = [] p = rsemgr.create_protocol(rse_info, 'read', scheme='srm') if rse_info['deterministic']: parsed_pfn = p.parse_pfns(pfns=pfns) for pfn in parsed_pfn: path = parsed_pfn[pfn]['path'] if path.startswith('user') or path.startswith('group'): scope = '%s.%s' % (path.split('/')[0], path.split('/')[1]) name = parsed_pfn[pfn]['name'] else: scope = path.split('/')[0] name = parsed_pfn[pfn]['name'] replicas.append({'scope': scope, 'name': name, 'rse_id': rse_id, 'state': ReplicaState.BAD}) try: update_replicas_states(replicas, session=session) except exception.UnsupportedOperation: raise exception.ReplicaNotFound("One or several replicas don't exist.") else: path_clause = [] parsed_pfn = p.parse_pfns(pfns=pfns) for pfn in parsed_pfn: path = '%s%s' % (parsed_pfn[pfn]['path'], parsed_pfn[pfn]['name']) path_clause.append(models.RSEFileAssociation.path == path) query = session.query(models.RSEFileAssociation.path, models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.rse_id).\ with_hint(models.RSEFileAssociation, "+ index(replicas REPLICAS_PATH_IDX", 'oracle').\ filter(models.RSEFileAssociation.rse_id == rse_id).filter(or_(*path_clause)) rowcount = query.update({'state': ReplicaState.BAD}) if rowcount != len(parsed_pfn): raise exception.ReplicaNotFound("One or several replicas don't exist.")
def setupClass(cls): """SRM (RSE/PROTOCOLS): Creating necessary directories and files """ # Creating local files cls.tmpdir = tempfile.mkdtemp() cls.user = uuid() with open("%s/data.raw" % cls.tmpdir, "wb") as out: out.seek((1024 * 1024) - 1) # 1 MB out.write('\0') for f in MgrTestCases.files_local: shutil.copy('%s/data.raw' % cls.tmpdir, '%s/%s' % (cls.tmpdir, f)) with open('etc/rse_repository.json') as f: data = json.load(f) prefix = data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm']['prefix'] hostname = data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm']['hostname'] if hostname.count("://"): hostname = hostname.split("://")[1] if 'port' in data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm'].keys(): port = int(data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm']['port']) else: port = 0 if 'extended_attributes' in data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm'].keys() and 'web_service_path' in data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm']['extended_attributes'].keys(): web_service_path = data['FZK-LCG2_SCRATCHDISK']['protocols']['supported']['srm']['extended_attributes']['web_service_path'] else: web_service_path = '' os.system('dd if=/dev/urandom of=%s/data.raw bs=1024 count=1024' % cls.tmpdir) if port > 0: cls.static_file = 'srm://%s:%s%s%s/data.raw' % (hostname, port, web_service_path, prefix) else: cls.static_file = 'srm://%s%s%s/data.raw' % (hostname, web_service_path, prefix) cmd = 'srmcp --debug=false -retry_num=0 file:///%s/data.raw %s' % (cls.tmpdir, cls.static_file) execute(cmd) for f in MgrTestCases.files_remote: tmp = mgr.lfns2pfns(mgr.get_rse_info('FZK-LCG2_SCRATCHDISK'), {'name': f, 'scope': 'user.%s' % cls.user}, scheme='srm').values()[0] cmd = 'srmcp --debug=false -retry_num=0 file:///%s/data.raw %s' % (cls.tmpdir, tmp) execute(cmd)
def test_get_mgr_SourceNotFound_single_pfn(self): """MOCK (RSE/PROTOCOLS): Get a single file from storage providing PFN (SourceNotFound)""" for f in MgrTestCases.files_remote: mgr.upload(mgr.get_rse_info(self.rse_id), [{'name': f, 'scope': 'user.%s' % self.user}, ]) self.mtc.test_get_mgr_SourceNotFound_single_pfn()
def test_delete_mgr_ok_single(self): """MOCK (RSE/PROTOCOLS): Delete a single file from storage (Success)""" for f in MgrTestCases.files_remote: mgr.upload(mgr.get_rse_info(self.rse_id), [{'name': f, 'scope': 'user.%s' % self.user}, ]) self.mtc.test_delete_mgr_ok_single()
def submitter(once=False, rses=[], process=0, total_processes=1, thread=0, total_threads=1, mock=False, bulk=100, activities=None): """ Main loop to submit a new transfer primitive to a transfertool. """ logging.info('submitter starting - process (%i/%i) thread (%i/%i)' % (process, total_processes, thread, total_threads)) try: scheme = config_get('conveyor', 'scheme') except NoOptionError: scheme = 'srm' logging.info('submitter started - process (%i/%i) thread (%i/%i)' % (process, total_processes, thread, total_threads)) while not graceful_stop.is_set(): try: if activities is None: activities = [None] for activity in activities: if rses is None: rses = [None] for rse in rses: if rse: # run in rse list mode rse_info = rsemgr.get_rse_info(rse['rse']) logging.info("Working on RSE: %s" % rse['rse']) ts = time.time() reqs = get_requests(rse_id=rse['id'], process=process, total_processes=total_processes, thread=thread, total_threads=total_threads, mock=mock, bulk=bulk, activity=activity) record_timer('daemons.conveyor.submitter.get_requests', (time.time() - ts) * 1000) else: # no rse list, run FIFO mode rse_info = None ts = time.time() reqs = get_requests(process=process, total_processes=total_processes, thread=thread, total_threads=total_threads, mock=mock, bulk=bulk, activity=activity) record_timer('daemons.conveyor.submitter.get_requests', (time.time() - ts) * 1000) if reqs: logging.debug('%i:%i - submitting %i requests' % (process, thread, len(reqs))) if not reqs or reqs == []: time.sleep(1) continue for req in reqs: try: if not rse: # no rse list, in FIFO mode dest_rse = rse_core.get_rse(rse=None, rse_id=req['dest_rse_id']) rse_info = rsemgr.get_rse_info(dest_rse['rse']) ts = time.time() transfer = get_transfer(rse_info, req, scheme, mock) record_timer('daemons.conveyor.submitter.get_transfer', (time.time() - ts) * 1000) logging.debug('Transfer for request %s: %s' % (req['request_id'], transfer)) if transfer is None: logging.warn("Request %s DID %s:%s RSE %s failed to get transfer" % (req['request_id'], req['scope'], req['name'], rse_info['rse'])) # TODO: Merge these two calls request.set_request_state(req['request_id'], RequestState.LOST) # if the DID does not exist anymore request.archive_request(req['request_id']) continue ts = time.time() tmp_metadata = transfer['file_metadata'] eids = request.submit_transfers(transfers=[transfer, ], transfertool='fts3', job_metadata=tmp_metadata) record_timer('daemons.conveyor.submitter.submit_transfer', (time.time() - ts) * 1000) ts = time.time() if req['previous_attempt_id']: logging.info('COPYING RETRY %s REQUEST %s PREVIOUS %s DID %s:%s FROM %s TO %s USING %s with eid: %s' % (req['retry_count'], req['request_id'], req['previous_attempt_id'], req['scope'], req['name'], transfer['src_urls'], transfer['dest_urls'], eids[req['request_id']]['external_host'], eids[req['request_id']]['external_id'])) else: logging.info('COPYING REQUEST %s DID %s:%s FROM %s TO %s USING %s with eid: %s' % (req['request_id'], req['scope'], req['name'], transfer['src_urls'], transfer['dest_urls'], eids[req['request_id']]['external_host'], eids[req['request_id']]['external_id'])) record_counter('daemons.conveyor.submitter.submit_request') except UnsupportedOperation, e: # The replica doesn't exist, need to cancel the request logging.warning(e) logging.info('Cancelling transfer request %s' % req['request_id']) try: # TODO: for now, there is only ever one destination request.cancel_request_did(req['scope'], req['name'], transfer['dest_urls'][0]) except Exception, e: logging.warning('Cannot cancel request: %s' % str(e))
def request_transfer(once=False, src=None, dst=None): """ Main loop to request a new transfer. """ logging.info('request: starting') site_a = 'RSE%s' % generate_uuid().upper() site_b = 'RSE%s' % generate_uuid().upper() scheme = 'https' impl = 'rucio.rse.protocols.webdav.Default' if not src.startswith('https://'): scheme = 'srm' impl = 'rucio.rse.protocols.srm.Default' srctoken = src.split(':')[0] dsttoken = dst.split(':')[0] tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse.add_rse(site_a) tmp_proto['hostname'] = src.split(':')[1][2:] tmp_proto['port'] = src.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + src.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': srctoken, 'web_service_path': ''} rse.add_protocol(site_a, tmp_proto) tmp_proto = { 'impl': impl, 'scheme': scheme, 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}} rse.add_rse(site_b) tmp_proto['hostname'] = dst.split(':')[1][2:] tmp_proto['port'] = dst.split(':')[2].split('/')[0] tmp_proto['prefix'] = '/'.join([''] + dst.split(':')[2].split('/')[1:]) if scheme == 'srm': tmp_proto['extended_attributes'] = {'space_token': dsttoken, 'web_service_path': ''} rse.add_protocol(site_b, tmp_proto) si = rsemanager.get_rse_info(site_a) session = get_session() logging.info('request: started') while not graceful_stop.is_set(): try: ts = time.time() tmp_name = generate_uuid() # add a new dataset did.add_did(scope='mock', name='dataset-%s' % tmp_name, type=DIDType.DATASET, account='root', session=session) # construct PFN pfn = rsemanager.lfns2pfns(si, lfns=[{'scope': 'mock', 'name': 'file-%s' % tmp_name}])['mock:file-%s' % tmp_name] # create the directories if needed p = rsemanager.create_protocol(si, operation='write', scheme=scheme) p.connect() try: p.mkdir(pfn) except: pass # upload the test file try: fp = os.path.dirname(config_get('injector', 'file')) fn = os.path.basename(config_get('injector', 'file')) p.put(fn, pfn, source_dir=fp) except: logging.critical('Could not upload, removing temporary DID: %s' % str(sys.exc_info())) did.delete_dids([{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', session=session) break # add the replica replica.add_replica(rse=site_a, scope='mock', name='file-%s' % tmp_name, bytes=config_get_int('injector', 'bytes'), adler32=config_get('injector', 'adler32'), md5=config_get('injector', 'md5'), account='root', session=session) # to the dataset did.attach_dids(scope='mock', name='dataset-%s' % tmp_name, dids=[{'scope': 'mock', 'name': 'file-%s' % tmp_name, 'bytes': config_get('injector', 'bytes')}], account='root', session=session) # add rule for the dataset ts = time.time() rule.add_rule(dids=[{'scope': 'mock', 'name': 'dataset-%s' % tmp_name}], account='root', copies=1, rse_expression=site_b, grouping='ALL', weight=None, lifetime=None, locked=False, subscription_id=None, activity='mock-injector', session=session) logging.info('added rule for %s for DID mock:%s' % (site_b, tmp_name)) record_timer('daemons.mock.conveyorinjector.add_rule', (time.time()-ts)*1000) record_counter('daemons.mock.conveyorinjector.request_transfer') session.commit() except: session.rollback() logging.critical(traceback.format_exc()) if once: return logging.info('request: graceful stop requested') logging.info('request: graceful stop done')
def reaper(rses, worker_number=1, child_number=1, total_children=1, chunk_size=100, once=False, greedy=False, scheme=None, exclude_rses=None, delay_seconds=0): """ Main loop to select and delete files. :param rses: List of RSEs the reaper should work against. If empty, it considers all RSEs. :param worker_number: The worker number. :param child_number: The child number. :param total_children: The total number of children created per worker. :param chunk_size: the size of chunk for deletion. :param once: If True, only runs one iteration of the main loop. :param greedy: If True, delete right away replicas with tombstone. :param scheme: Force the reaper to use a particular protocol, e.g., mock. :param exclude_rses: RSE expression to exclude RSEs from the Reaper. """ logging.info('Starting reaper: worker %(worker_number)s, child %(child_number)s' % locals()) while not graceful_stop.is_set(): try: max_deleting_rate = 0 for rse in rses: deleting_rate = 0 rse_info = rsemgr.get_rse_info(rse['rse']) rse_protocol = rse_core.get_rse_protocols(rse['rse']) if not rse_protocol['availability_delete']: logging.info('Reaper %s-%s: RSE %s is not available for deletion' % (worker_number, child_number, rse_info['rse'])) continue # Temporary hack to force gfal for deletion for protocol in rse_info['protocols']: if protocol['impl'] == 'rucio.rse.protocols.srm.Default' or protocol['impl'] == 'rucio.rse.protocols.gsiftp.Default': protocol['impl'] = 'rucio.rse.protocols.gfal.Default' logging.info('Reaper %s-%s: Running on RSE %s' % (worker_number, child_number, rse_info['rse'])) try: needed_free_space, max_being_deleted_files = None, 10000 if not greedy: max_being_deleted_files, needed_free_space, used, free = __check_rse_usage(rse=rse['rse'], rse_id=rse['id']) logging.info('Reaper %(worker_number)s-%(child_number)s: Space usage for RSE %(rse)s - max_being_deleted_files: %(max_being_deleted_files)s, needed_free_space: %(needed_free_space)s, used: %(used)s, free: %(free)s' % locals()) if needed_free_space <= 0: needed_free_space = 0 logging.info('Reaper %s-%s: free space is above minimum limit for %s' % (worker_number, child_number, rse['rse'])) s = time.time() with monitor.record_timer_block('reaper.list_unlocked_replicas'): replicas = list_unlocked_replicas(rse=rse['rse'], bytes=needed_free_space, limit=max_being_deleted_files, worker_number=child_number, total_workers=total_children, delay_seconds=delay_seconds) logging.debug('Reaper %s-%s: list_unlocked_replicas %s %s %s' % (worker_number, child_number, rse['rse'], time.time() - s, len(replicas))) if not replicas: logging.info('Reaper %s-%s: nothing to do for %s' % (worker_number, child_number, rse['rse'])) continue p = rsemgr.create_protocol(rse_info, 'delete', scheme=None) for files in chunks(replicas, chunk_size): logging.debug('Reaper %s-%s: Running on : %s' % (worker_number, child_number, str(files))) try: s = time.time() update_replicas_states(replicas=[dict(replica.items() + [('state', ReplicaState.BEING_DELETED), ('rse_id', rse['id'])]) for replica in files]) for replica in files: try: replica['pfn'] = str(rsemgr.lfns2pfns(rse_settings=rse_info, lfns=[{'scope': replica['scope'], 'name': replica['name']}, ], operation='delete').values()[0]) except ReplicaUnAvailable as e: err_msg = 'Failed to get pfn UNAVAILABLE replica %s:%s on %s with error %s' % (replica['scope'], replica['name'], rse['rse'], str(e)) logging.warning('Reaper %s-%s: %s' % (worker_number, child_number, err_msg)) replica['pfn'] = None add_message('deletion-planned', {'scope': replica['scope'], 'name': replica['name'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'rse': rse_info['rse']}) # logging.debug('update_replicas_states %s' % (time.time() - s)) monitor.record_counter(counters='reaper.deletion.being_deleted', delta=len(files)) if not scheme: try: deleted_files = [] p.connect() for replica in files: try: logging.info('Reaper %s-%s: Deletion ATTEMPT of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])) s = time.time() if rse['staging_area'] or rse['rse'].endswith("STAGING"): logging.warning('Reaper %s-%s: Deletion STAGING of %s:%s as %s on %s, will only delete the catalog and not do physical deletion' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])) else: if replica['pfn']: p.delete(replica['pfn']) else: logging.warning('Reaper %s-%s: Deletion UNAVAILABLE of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])) monitor.record_timer('daemons.reaper.delete.%s.%s' % (p.attributes['scheme'], rse['rse']), (time.time()-s)*1000) duration = time.time() - s deleted_files.append({'scope': replica['scope'], 'name': replica['name']}) add_message('deletion-done', {'scope': replica['scope'], 'name': replica['name'], 'rse': rse_info['rse'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'duration': duration}) logging.info('Reaper %s-%s: Deletion SUCCESS of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'])) except SourceNotFound: err_msg = 'Reaper %s-%s: Deletion NOTFOUND of %s:%s as %s on %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse']) logging.warning(err_msg) deleted_files.append({'scope': replica['scope'], 'name': replica['name']}) add_message('deletion-failed', {'scope': replica['scope'], 'name': replica['name'], 'rse': rse_info['rse'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'reason': err_msg}) except (ServiceUnavailable, RSEAccessDenied) as e: logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(e))) add_message('deletion-failed', {'scope': replica['scope'], 'name': replica['name'], 'rse': rse_info['rse'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'reason': str(e)}) except Exception as e: logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc()))) add_message('deletion-failed', {'scope': replica['scope'], 'name': replica['name'], 'rse': rse_info['rse'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'reason': str(e)}) except: logging.critical('Reaper %s-%s: Deletion CRITICAL of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(traceback.format_exc()))) except (ServiceUnavailable, RSEAccessDenied) as e: for replica in files: logging.warning('Reaper %s-%s: Deletion NOACCESS of %s:%s as %s on %s: %s' % (worker_number, child_number, replica['scope'], replica['name'], replica['pfn'], rse['rse'], str(e))) add_message('deletion-failed', {'scope': replica['scope'], 'name': replica['name'], 'rse': rse_info['rse'], 'file-size': replica['bytes'], 'url': replica['pfn'], 'reason': str(e)}) finally: p.close() s = time.time() with monitor.record_timer_block('reaper.delete_replicas'): delete_replicas(rse=rse['rse'], files=deleted_files) logging.debug('Reaper %s-%s: delete_replicas successes %s %s %s' % (worker_number, child_number, rse['rse'], len(deleted_files), time.time() - s)) monitor.record_counter(counters='reaper.deletion.done', delta=len(deleted_files)) deleting_rate += len(deleted_files) except: logging.critical(traceback.format_exc()) deleting_rate = deleting_rate * 1.0 / max_being_deleted_files if deleting_rate > max_deleting_rate: max_deleting_rate = deleting_rate except: logging.critical(traceback.format_exc()) if once: break logging.info(" Reaper %s-%s: max_deleting_rate: %s " % (worker_number, child_number, max_deleting_rate)) sleep_time = int((1 - max_deleting_rate) * 60 + 1) time.sleep(sleep_time) except: logging.critical(traceback.format_exc()) logging.info('Graceful stop requested') logging.info('Graceful stop done')
for rse in parsed_rses: if rse not in working_rses: working_rses.append(rse) if not (rses or include_rses): working_rses = rses_list if exclude_rses: try: parsed_rses = parse_expression(exclude_rses, session=None) except InvalidRSEExpression, e: logging.error("Invalid RSE exception %s to exclude RSEs: %s" % (exclude_rses, e)) else: working_rses = [rse for rse in working_rses if rse not in parsed_rses] working_rses = [rsemgr.get_rse_info(rse['rse']) for rse in working_rses] return working_rses def get_requests(rse_id=None, process=0, total_processes=1, thread=0, total_threads=1, mock=False, bulk=100, activity=None): ts = time.time() reqs = request.get_next(request_type=[RequestType.TRANSFER, RequestType.STAGEIN, RequestType.STAGEOUT], state=RequestState.QUEUED, limit=bulk, rse=rse_id, activity=activity, process=process, total_processes=total_processes, thread=thread,
add_replicas(rse='MOCK2', files=files, account='root', ignore_availability=True) update_replicas_paths(files) for replica in list_replicas(dids=[{'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE} for f in files], schemes=['srm']): # force the changed string - if we look it up from the DB, then we're not testing anything :-D assert_equal(replica['rses']['MOCK2'][0], 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests//does/not/really/matter/where') def test_add_list_bad_replicas(self): """ REPLICA (CORE): Add bad replicas and list them""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1L, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in xrange(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK') rse_id1 = rse_info['id'] add_replicas(rse='MOCK', files=files, account='root', ignore_availability=True) # Listing replicas on deterministic RSE replicas = [] list_rep = [] for replica in list_replicas(dids=[{'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE} for f in files], schemes=['srm']): replicas.extend(replica['rses']['MOCK']) list_rep.append(replica) declare_bad_file_replicas(replicas, 'MOCK') bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id1:
def list_replicas(dids, schemes=None, unavailable=False, request_id=None, ignore_availability=True, all_states=False, session=None): """ List file replicas for a list of data identifiers (DIDs). :param dids: The list of data identifiers (DIDs). :param schemes: A list of schemes to filter the replicas. (e.g. file, http, ...) :param unavailable: Also include unavailable replicas in the list. :param request_id: ID associated with the request for debugging. :param ignore_availability: Ignore the RSE blacklisting. :param all_states: Return all replicas whatever state they are in. Adds an extra 'states' entry in the result dictionary. :param session: The database session in use. """ # Get the list of files rseinfo = {} replicas = {} replica_conditions, did_conditions = [], [] # remove duplicate did from the list for did in [dict(tupleized) for tupleized in set(tuple(item.items()) for item in dids)]: if 'type' in did and did['type'] in (DIDType.FILE, DIDType.FILE.value) or 'did_type' in did and did['did_type'] in (DIDType.FILE, DIDType.FILE.value): if all_states: condition = and_(models.RSEFileAssociation.scope == did['scope'], models.RSEFileAssociation.name == did['name']) else: if not unavailable: condition = and_(models.RSEFileAssociation.scope == did['scope'], models.RSEFileAssociation.name == did['name'], models.RSEFileAssociation.state == ReplicaState.AVAILABLE) else: condition = and_(models.RSEFileAssociation.scope == did['scope'], models.RSEFileAssociation.name == did['name'], or_(models.RSEFileAssociation.state == ReplicaState.AVAILABLE, models.RSEFileAssociation.state == ReplicaState.UNAVAILABLE, models.RSEFileAssociation.state == ReplicaState.COPYING)) replicas['%s:%s' % (did['scope'], did['name'])] = {'scope': did['scope'], 'name': did['name'], 'rses': {}} replica_conditions.append(condition) else: did_conditions.append(and_(models.DataIdentifier.scope == did['scope'], models.DataIdentifier.name == did['name'])) if did_conditions: # Get files for scope, name, did_type in session.query(models.DataIdentifier.scope, models.DataIdentifier.name, models.DataIdentifier.did_type).filter(or_(*did_conditions)): if did_type == DIDType.FILE: replicas['%s:%s' % (scope, name)] = {'scope': scope, 'name': name, 'rses': {}} if all_states: condition = and_(models.RSEFileAssociation.scope == scope, models.RSEFileAssociation.name == name) else: if not unavailable: condition = and_(models.RSEFileAssociation.scope == scope, models.RSEFileAssociation.name == name, models.RSEFileAssociation.state == ReplicaState.AVAILABLE) else: condition = and_(models.RSEFileAssociation.scope == scope, models.RSEFileAssociation.name == name, or_(models.RSEFileAssociation.state == ReplicaState.AVAILABLE, models.RSEFileAssociation.state == ReplicaState.UNAVAILABLE, models.RSEFileAssociation.state == ReplicaState.COPYING)) replica_conditions.append(condition) else: # for dataset/container content_query = session.query(models.DataIdentifierAssociation.child_scope, models.DataIdentifierAssociation.child_name, models.DataIdentifierAssociation.child_type) content_query = content_query.with_hint(models.DataIdentifierAssociation, "INDEX(CONTENTS CONTENTS_PK)", 'oracle') child_dids = [(scope, name)] while child_dids: s, n = child_dids.pop() for tmp_did in content_query.filter_by(scope=s, name=n): if tmp_did.child_type == DIDType.FILE: replicas['%s:%s' % (tmp_did.child_scope, tmp_did.child_name)] = {'scope': tmp_did.child_scope, 'name': tmp_did.child_name, 'rses': {}} if all_states: condition = and_(models.RSEFileAssociation.scope == tmp_did.child_scope, models.RSEFileAssociation.name == tmp_did.child_name) else: if not unavailable: condition = and_(models.RSEFileAssociation.scope == tmp_did.child_scope, models.RSEFileAssociation.name == tmp_did.child_name, models.RSEFileAssociation.state == ReplicaState.AVAILABLE) else: condition = and_(models.RSEFileAssociation.scope == tmp_did.child_scope, models.RSEFileAssociation.name == tmp_did.child_name, or_(models.RSEFileAssociation.state == ReplicaState.AVAILABLE, models.RSEFileAssociation.state == ReplicaState.UNAVAILABLE, models.RSEFileAssociation.state == ReplicaState.COPYING)) replica_conditions.append(condition) else: child_dids.append((tmp_did.child_scope, tmp_did.child_name)) # Get the list of replicas is_false = False tmp_protocols = {} key = None for replica_condition in chunks(replica_conditions, 50): replica_query = select(columns=(models.RSEFileAssociation.scope, models.RSEFileAssociation.name, models.RSEFileAssociation.bytes, models.RSEFileAssociation.md5, models.RSEFileAssociation.adler32, models.RSEFileAssociation.path, models.RSEFileAssociation.state, models.RSE.rse), whereclause=and_(models.RSEFileAssociation.rse_id == models.RSE.id, models.RSE.deleted == is_false, or_(*replica_condition)), order_by=(models.RSEFileAssociation.scope, models.RSEFileAssociation.name)).\ with_hint(models.RSEFileAssociation.scope, text="INDEX(REPLICAS REPLICAS_PK)", dialect_name='oracle').\ compile() # models.RSE.availability.op(avail_op)(0x100) != 0 for scope, name, bytes, md5, adler32, path, state, rse in session.execute(replica_query.statement, replica_query.params).fetchall(): if rse not in rseinfo: rseinfo[rse] = rsemgr.get_rse_info(rse, session=session) if not rseinfo[rse]['staging_area']: if not key: key = '%s:%s' % (scope, name) elif key != '%s:%s' % (scope, name): yield replicas[key] del replicas[key] key = '%s:%s' % (scope, name) if 'bytes' not in replicas[key]: replicas[key]['bytes'] = bytes replicas[key]['md5'] = md5 replicas[key]['adler32'] = adler32 if rse not in replicas[key]['rses']: replicas[key]['rses'][rse] = [] if all_states: if 'states' not in replicas[key]: replicas[key]['states'] = {} replicas[key]['states'][rse] = state # get protocols if rse not in tmp_protocols: protocols = list() if not schemes: try: protocols.append(rsemgr.create_protocol(rseinfo[rse], 'read')) except exception.RSEProtocolNotSupported: pass # no need to be verbose except: print format_exc() else: for s in schemes: try: protocols.append(rsemgr.create_protocol(rse_settings=rseinfo[rse], operation='read', scheme=s)) except exception.RSEProtocolNotSupported: pass # no need to be verbose except: print format_exc() tmp_protocols[rse] = protocols # get pfns pfns_cache = dict() for protocol in tmp_protocols[rse]: if 'determinism_type' in protocol.attributes: # PFN is cachable try: path = pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], scope, name)] except KeyError: # No cache entry scope:name found for this protocol path = protocol._get_path(scope, name) pfns_cache['%s:%s:%s' % (protocol.attributes['determinism_type'], scope, name)] = path if not schemes or protocol.attributes['scheme'] in schemes: try: replicas[key]['rses'][rse].append(protocol.lfns2pfns(lfns={'scope': scope, 'name': name, 'path': path}).values()[0]) except: # temporary protection print format_exc() if protocol.attributes['scheme'] == 'srm': try: replicas[key]['space_token'] = protocol.attributes['extended_attributes']['space_token'] except KeyError: replicas[key]['space_token'] = None if key: yield replicas[key] # Files with no replicas for replica in replicas: if not replicas[replica]['rses']: yield replicas[replica]